### Recap: Nested For Loops

In [None]:
# Create a nested for loop here

for i in range(5):
    for j in range(5):
        print(i, j)

In [None]:
# Another nested for loop

list1 = ['The ball is ', 'The car is ']
list2 = ['red.', 'yellow.', 'blue.']

for item in list1:
    for colour in list2:
        print(item + colour)

### Global Alignment

In [26]:
from pprint import pprint 

#Create scoring matrix

def getMatrix(sizeX, sizeY, gap):
    matrix = []
    for i in range(len(sizeX)+1): #plus 1 because of gap
        submatrix = []
        for j in range(len(sizeY)+1):
            submatrix.append(0)
        matrix.append(submatrix)
    # Initialise the first row and column with gap values
    for j in range(1, len(sizeY)+1):
        matrix[0][j] = j*gap
    for i in range(1, len(sizeX)+1):
        matrix[i][0] = i*gap
    return matrix


x = "ATCGATCG"
y = "ATCCAT"
pprint(getMatrix(x, y, -2))

[[0, 0, 0, 0, 0, 0, 0],
 [0, -2, -4, -6, -8, -10, -12],
 [0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0]]


In [2]:
#Create initial matrix of zeros with dimensions len(x) x len(y)

def getTraceBackMatrix(sizeX, sizeY):
    matrix = []
    for i in range(len(sizeX)+1):
        subMatrix = []
        for j in range(len(sizeY)+1):
            subMatrix.append('0')
        matrix.append(subMatrix)
    # Initialise the first row and column with up or left values
    for j in range(1, len(sizeY)+1):
        matrix[0][j] = 'left'
    for i in range(1, len(sizeX)+1):
        matrix[i][0] = 'up'
    matrix[0][0] = 'done'
    return matrix

pprint(getTraceBackMatrix(x, y))

[['done', 'left', 'left', 'left', 'left', 'left', 'left'],
 ['up', '0', '0', '0', '0', '0', '0'],
 ['up', '0', '0', '0', '0', '0', '0'],
 ['up', '0', '0', '0', '0', '0', '0'],
 ['up', '0', '0', '0', '0', '0', '0'],
 ['up', '0', '0', '0', '0', '0', '0'],
 ['up', '0', '0', '0', '0', '0', '0'],
 ['up', '0', '0', '0', '0', '0', '0'],
 ['up', '0', '0', '0', '0', '0', '0']]


### Class

A class is considered as a blueprint of objects. We can think of the class as a sketch (prototype) of a house. It contains all the details about the floors, doors, windows, etc. Based on these descriptions we build the house. House is the object.

Since many houses can be made from the same description, we can create many objects from a class.

In [27]:
# Example of class and how it can be used
class Person:
  def __init__(self, name, age):
    self.name = name
    self.age = age

p1 = Person("John", 36)

print(p1.name)
print(p1.age) 

John
36


In [3]:
#Define scores for each parameter

class ScoreParams:
    def __init__(self, gap, match, mismatch):
        self.gap = gap
        self.match = match
        self.mismatch = mismatch

    def misMatchChar(self, x, y):
        if x != y:
            return self.mismatch
        else:
            return self.match

In [4]:
#Fill in the matrix with alignment scores

def globalAlign(x, y, score):
    matrix = getMatrix(x,y,score.gap)
    traceBack = getTraceBackMatrix(x, y)

    for i in range(1, len(x)+1):
        for j in range(1, len(y)+1):
            left = matrix[i][j-1] + score.gap
            up = matrix[i-1][j] + score.gap
            diag = matrix[i-1][j-1] + score.misMatchChar(x[i-1], y[j-1])
            matrix[i][j] = max(left, up, diag)
            if matrix[i][j] == left:
                traceBack[i][j] = 'left'
            elif matrix[i][j] == up:
                traceBack[i][j] = 'up'
            elif matrix[i][j] == diag:
                traceBack[i][j] = 'diag'
    return matrix, traceBack

score = ScoreParams(-2, 1, -1)
pprint(globalAlign(x, y, score))

([[0, -2, -4, -6, -8, -10, -12],
  [-2, 1, -1, -3, -5, -7, -9],
  [-4, -1, 2, 0, -2, -4, -6],
  [-6, -3, 0, 3, 1, -1, -3],
  [-8, -5, -2, 1, 2, 0, -2],
  [-10, -7, -4, -1, 0, 3, 1],
  [-12, -9, -6, -3, -2, 1, 4],
  [-14, -11, -8, -5, -2, -1, 2],
  [-16, -13, -10, -7, -4, -3, 0]],
 [['done', 'left', 'left', 'left', 'left', 'left', 'left'],
  ['up', 'diag', 'left', 'left', 'left', 'left', 'left'],
  ['up', 'up', 'diag', 'left', 'left', 'left', 'left'],
  ['up', 'up', 'up', 'diag', 'left', 'left', 'left'],
  ['up', 'up', 'up', 'up', 'diag', 'left', 'left'],
  ['up', 'up', 'up', 'up', 'up', 'diag', 'left'],
  ['up', 'up', 'up', 'up', 'up', 'up', 'diag'],
  ['up', 'up', 'up', 'up', 'diag', 'up', 'up'],
  ['up', 'up', 'up', 'up', 'up', 'up', 'up']])


In [5]:
# Obtain x and y globally aligned sequence arrays using the bottom-up-approach

def getAlignedSequences(x, y, matrix, traceBack):
    xSeq = []
    ySeq = []
    i = len(x)
    j = len(y)
    while(i > 0 or j > 0):
        if traceBack[i][j] == 'diag':
            xSeq.append(x[i-1])
            ySeq.append(y[j-1])
            i = i-1
            j = j-1
        elif traceBack[i][j] == 'left':
            xSeq.append('-')
            ySeq.append(y[j-1])
            j = j-1
        elif traceBack[i][j] == 'up':
            xSeq.append(x[i-1])
            ySeq.append('-')
            i = i - 1
        elif traceBack[i][j] == 'done':
            break
    return xSeq[::-1], ySeq[::-1]

matrix, traceBack = globalAlign(x, y, score)[0], globalAlign(x, y, score)[1]
x, y = getAlignedSequences(x, y, matrix, traceBack)
print(x)
print(y)

['A', 'T', 'C', 'G', 'A', 'T', 'C', 'G']
['A', 'T', 'C', 'C', 'A', 'T', '-', '-']


In [8]:
x = "tcgcattcggggaaagttccacttataggttcttgtgcctatttacgcacactacgggaagtccttcatagacatatcagccggttgaccccatc"
y = "tcgttcggggaaagttccacttacaggttcttgtgcctttacgcaactacgggaagtccaacatagacatatgggcggttgaccccatca"
score = ScoreParams(-2, 1, -1)
matrix, traceBack = globalAlign(x, y, score)[0], globalAlign(x, y, score)[1]
x, y = getAlignedSequences(x, y, matrix, traceBack)
x = ''.join(_ for _ in x)
y = ''.join(_ for _ in y)
print(matrix)
print("Global alignment: ")
print(x)
print(y)

[[0, -2, -4, -6, -8, -10, -12, -14, -16, -18, -20, -22, -24, -26, -28, -30, -32, -34, -36, -38, -40, -42, -44, -46, -48, -50, -52, -54, -56, -58, -60, -62, -64, -66, -68, -70, -72, -74, -76, -78, -80, -82, -84, -86, -88, -90, -92, -94, -96, -98, -100, -102, -104, -106, -108, -110, -112, -114, -116, -118, -120, -122, -124, -126, -128, -130, -132, -134, -136, -138, -140, -142, -144, -146, -148, -150, -152, -154, -156, -158, -160, -162, -164, -166, -168, -170, -172, -174, -176, -178, -180], [-2, 1, -1, -3, -5, -7, -9, -11, -13, -15, -17, -19, -21, -23, -25, -27, -29, -31, -33, -35, -37, -39, -41, -43, -45, -47, -49, -51, -53, -55, -57, -59, -61, -63, -65, -67, -69, -71, -73, -75, -77, -79, -81, -83, -85, -87, -89, -91, -93, -95, -97, -99, -101, -103, -105, -107, -109, -111, -113, -115, -117, -119, -121, -123, -125, -127, -129, -131, -133, -135, -137, -139, -141, -143, -145, -147, -149, -151, -153, -155, -157, -159, -161, -163, -165, -167, -169, -171, -173, -175, -177], [-4, -1, 2, 0, -2, 

### Local Alignment

In [10]:
#Create scoring matrix

def getMatrix(sizeX, sizeY):
    matrix = []
    for i in range(len(sizeY)+1): #plus 1 because of gap
        subMatrix = []
        for j in range(len(sizeX)+1):
            subMatrix.append(0)
        matrix.append(subMatrix)
    return matrix


In [11]:
#Create initial matrix of zeros with dimensions len(x) x len(y)

def getTraceBackMatrix(sizeX, sizeY):
    matrix = []
    for i in range(len(sizeY)+1):
        subMatrix = []
        for j in range(len(sizeX)+1):
            subMatrix.append('0')
        matrix.append(subMatrix)
    # Initialise the first row and column with up or left values
    for j in range(1, len(sizeX)+1):
        matrix[0][j] = 'left'
    for i in range(1, len(sizeY)+1):
        matrix[i][0] = 'up'
    matrix[0][0] = 'done'
    return matrix


In [12]:
#Define scores for each parameter

class ScoreParams:
    def __init__(self, gap, match, mismatch):
        self.gap = gap
        self.match = match
        self.mismatch = mismatch


In [13]:
#Fill in the matrix with alignment scores and obtain the best score and position

def localAlign(x, y, score):
    matrix = getMatrix(x, y)
    traceBack = getTraceBackMatrix(x, y)
    best = 0
    optLoc = (0,0)

    for i in range(1,len(y)+1):
        for j in range(1,len(x)+1):
            left = matrix[i][j-1] + score.gap
            up = matrix[i-1][j] + score.gap
            diag = matrix[i-1][j-1] + (score.match if x[j-1] == y[i-1] else score.mismatch)
            matrix[i][j] = max(left, up, diag, 0)
            if matrix[i][j] == left:
                traceBack[i][j] = 'left'
            elif matrix[i][j] == up:
                traceBack[i][j] = 'up'
            elif matrix[i][j] == diag:
                traceBack[i][j] = 'diag'
            else:
                traceBack[i][j] = '0'
            
            if matrix[i][j] >= best:
                best = matrix[i][j]
                optLoc = (i, j)
    return best, optLoc, matrix, traceBack

x = "CATDOGFISH"
y = "DOG"
score = ScoreParams(-7, 10, -5)
best, optLoc, matrix, traceBack = localAlign(x, y, score)
pprint(localAlign(x, y, score))

(30,
 (3, 6),
 [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  [0, 0, 0, 0, 10, 3, 0, 0, 0, 0, 0],
  [0, 0, 0, 0, 3, 20, 13, 6, 0, 0, 0],
  [0, 0, 0, 0, 0, 13, 30, 23, 16, 9, 2]],
 [['done',
   'left',
   'left',
   'left',
   'left',
   'left',
   'left',
   'left',
   'left',
   'left',
   'left'],
  ['up', '0', '0', '0', 'diag', 'left', '0', '0', '0', '0', '0'],
  ['up', '0', '0', '0', 'up', 'diag', 'left', 'left', '0', '0', '0'],
  ['up', '0', '0', '0', '0', 'up', 'diag', 'left', 'left', 'left', 'left']])


In [14]:
#Obtaining the locally aligned sequence using matrix

def getSequence(x, best, optLoc, matrix, traceBack):
    seq = ''
    i = optLoc[0]
    j = optLoc[1]
    
    while (i > 0 or j > 0):
        if traceBack[i][j] == 'diag':
            seq += y[i-1]
            i = i-1
            j = j-1
        elif traceBack[i][j] == 'left':
            j = j-1
        elif traceBack[i][j] == 'up':
            i = i-1
        elif traceBack[i][j] == 'done':
            break
        else:
            break
    return seq[::-1]

print(getSequence(x, best, optLoc, matrix, traceBack))

DOG


In [21]:
x = 'tcgcattcggggaaagttccacttataggttcttgtgcctatttacgcacactacgggaagtccttcatagacatatcagccggttgaccccatc'
y = 'accccaaagtgttcttgtgcctatttcccgtccttcagctacagacccc'
score = ScoreParams(-7, 10, -5)
best, optLoc, matrix, traceBack = localAlign(x, y, score)
print(localAlign(x, y, score))
print(getSequence(x, best, optLoc, matrix, traceBack))

(256, (47, 82), [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 10, 3, 0, 0, 0, 0, 0, 0, 10, 10, 10, 3, 0, 0, 0, 0, 10, 3, 0, 0, 10, 3, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 3, 0, 0, 10, 3, 0, 0, 10, 3, 10, 3, 0, 10, 3, 0, 0, 0, 10, 10, 3, 0, 0, 0, 0, 0, 0, 10, 3, 10, 3, 10, 3, 10, 3, 10, 3, 0, 10, 3, 0, 0, 0, 0, 0, 0, 0, 10, 3, 0, 0, 0, 10, 3, 0], [0, 0, 10, 3, 10, 3, 5, 0, 10, 3, 0, 0, 0, 3, 5, 5, 5, 0, 0, 10, 10, 3, 20, 13, 6, 3, 5, 3, 5, 0, 0, 0, 10, 3, 0, 0, 0, 0, 10, 10, 3, 3, 5, 0, 0, 3, 20, 13, 10, 3, 20, 13, 20, 13, 6, 20, 13, 6, 0, 3, 5, 5, 0, 10, 10, 3, 0, 10, 3, 5, 3, 5, 3, 20, 13, 6, 3, 5, 13, 6, 5, 13, 10, 3, 0, 0, 0, 0, 3, 20, 13, 10, 10, 3, 5, 13], [0, 0, 10, 5, 13, 6, 0, 0, 10, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [24]:
x = 'ratoxtigerrabbitdragonsnakehorsegoatmonkeychickendogpig'
y = 'catoxdragonfishhorsedog'
score = ScoreParams(-7, 10, -5)
best, optLoc, matrix, traceBack = localAlign(x, y, score)
print(localAlign(x, y, score))
print(getSequence(x, best, optLoc, matrix, traceBack))

(88, (22, 34), [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 3, 0, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 10, 3, 0, 0, 0, 0, 0, 10, 3, 0, 0, 0, 0, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 10, 3, 0, 0, 0, 0, 0, 0, 3, 5, 0, 3, 5, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 3, 20, 13, 6, 10, 3, 0, 0, 0, 0, 3, 5, 0, 0, 10, 3, 0, 3, 5, 0, 0, 0, 0, 3, 5, 0, 0, 0, 0, 0, 0, 0, 0, 3, 20, 13, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 13, 30, 23, 16, 9, 2, 0, 0, 0, 0, 0, 0, 0, 3, 5, 0, 0, 0, 15, 8, 1, 0, 0, 0, 0, 0, 10, 3, 0, 0, 0, 10, 3, 13, 15, 23, 16, 9, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 3, 0, 0, 0], [0, 0, 0, 6, 23, 40, 33, 26, 19, 12, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 10, 3, 0, 0, 0, 0, 0, 3, 5, 0, 0, 0,