In [4]:
import pandas as pd
import numpy as np

## From https://github.com/ZhangQiuxue/Rosalind/blob/master/Textbook_05E.py
def global_alignment(v, w, scoring_matrix, sigma):
    '''Returns the global alignment of v and w subject to the given scoring matrix and indel penalty sigma.'''
    # Initialize the matrices.
    S = [[0]*(len(w)+1) for _ in range(len(v)+1)]
    backtrack = [[0]*(len(w)+1) for _ in range(len(v)+1)]

    # Initialize the edges with the given penalties.
    for i in range(1, len(v)+1):
        S[i][0] = -i*sigma
    for j in range(1, len(w)+1):
        S[0][j] = -j*sigma

    # Fill in the Score and Backtrack matrices.
    for i in range(1, len(v)+1):
        for j in range(1, len(w)+1):
            scores = [S[i-1][j] - sigma, S[i][j-1] - sigma, S[i-1][j-1] + blosum.loc[v[i-1]][w[j-1]]]
            S[i][j] = max(scores)
            backtrack[i][j] = scores.index(S[i][j])

    # Quick lambda function to insert indels.
    insert_indel = lambda word, i: word[:i] + '-' + word[i:]

    # Initialize the aligned strings as the input strings.
    v_aligned, w_aligned = v, w

    # Get the position of the highest scoring cell in the matrix and the high score.
    i, j = len(v), len(w)
    max_score = str(S[i][j])

    # Backtrack to the edge of the matrix starting at the highest scoring cell.
    while i*j != 0:
        if backtrack[i][j] == 0:
            i -= 1
            w_aligned = insert_indel(w_aligned, j)
        elif backtrack[i][j] == 1:
            j -= 1
            v_aligned = insert_indel(v_aligned, i)
        else:
            i -= 1
            j -= 1

    # Prepend the necessary preceeding indels to get to (0,0).
    for _ in range(i):
        w_aligned = insert_indel(w_aligned, 0)
    for _ in range(j):
        v_aligned = insert_indel(v_aligned, 0)

    return max_score, v_aligned, w_aligned,scores,S,backtrack


def readBlosum62():
    '''
    Read scoring matrix
    '''
    with open('blosum62.txt') as matrix_file:
        matrix = matrix_file.read()
        lines = matrix.strip().split('\n')

    blosum={}
    cols = lines[0].split()

    for row in lines[1:]:       
        idx = row[0]
        vals = row[1:].split()
        vals = [int(_) for _ in vals]
        #print(idx,vals)
        blosum[idx]=vals

    blosum = pd.DataFrame.from_dict(blosum,orient='index',columns=cols)
    return blosum

In [34]:
def scorematch(residueA,residueB,scoringMatrix):
    '''
    Get the value of a score between two matches
    '''
    return scoringMatrix.loc[residueA][residueB]

def linearSpaceAlign(x,y,scoringMatrix,indel,verbose=False):
    ####
    m = len(y)
    n = len(x)
    score = list(np.cumsum([indel for _ in y]))
    nullScore = list(np.cumsum([0] + [indel for _ in x]))
    prev = score.copy()
    score,nullScore,prev
    ####
    for j,residue in enumerate(x):
        if verbose: print(j,residue)
        score = [nullScore[j]] + score
        prev = score.copy()

        for i in range(1,m+1):
            if residue == y[i-1]:
                if verbose: print(i, residue,'match',y[i-1], prev,score)
                score[i] = prev[i-1] + scorematch(residue,y[i-1],scoringMatrix) 
            else:
                if verbose: print(i, residue, 'no match', y[i-1], prev,score,'choose',
                                      score[i-1],
                                      score[i],prev[i-1])
                score[i] = max(score[i-1] + indel, #Up
                                      score[i] + indel, #Left
                                      prev[i-1] + scorematch(residue,y[i-1],scoringMatrix)) #Mismatch
            if verbose: print(score,"\n----")

        score = score[1:]
    
    score = [nullScore[j+1]] + score
    return score


In [31]:
blosum = readBlosum62()

In [32]:
x='PLEASANTLY'
y='MEASNLY'

In [36]:
linearSpaceAlign(x, y, blosum, -5)

[-50, -43, -33, -24, -15, -4, 5, 17]

In [None]:
linearSpaceAlign(x, y, blosum, -5)

In [37]:
linearSpaceAlign(x[0:int(len(x)/2)],y[0:int(len(y)/2)], blosum, -5)

[25, 30, 35, 40]

In [39]:
linearSpaceAlign(x[int(len(x)/2):][::-1],y[int(len(y)/2):][::-1], blosum, -5)

[-25, -13, -4, 7, 13]