pairwise sequence alignment
- Affine gap penalty

space complexity: O(mn)

time complexity: O(mn)

author: Juntao Chen

date: 12.27.2021

In [None]:
def s(xi,yi,m=1,mis=-2):
    """
    return the score of match or mismatch
    """
    if xi == yi:
        return m
    else:
        return mis


def ChooseWay(p0: float, p1: float, p2: float):
    """
    choose the trace path
    """
    if p0 >= p1:
        if p0 >= p2:
            return 't'
        else:
            return 'y'
    elif p1 >= p2:
        return 'x'
    else:
        return 'y'

In [2]:
# PSA ~ Affine gap penalty
def PSA_AGP(A, B, d=3, e=1):
    # n>=m
    # record the loc of A & B
    state_ex = 0
    if len(A)>len(B):
        A, B = B, A
        state_ex = 1
    n = len(B)
    m = len(A)

    t = [[-float('Inf')]*(n+1) for _ in range(m+1)]
    x = [[-float('Inf')]*(n+1) for _ in range(m+1)]
    y = [[-float('Inf')]*(n+1) for _ in range(m+1)]

    # init
    t[0][0] = 0
    for i in range(1, m+1):
        y[i][0] = -d - e*(i-1)

    for j in range(1, n+1):
        x[0][j] = -d - e*(j-1)

    for i in range(1, m+1):
        for j in range(1, n+1):
            # x :  _ ~ B[j]
            x[i][j] = max(t[i][j-1]-d, x[i][j-1]-e)
            # y : A[i] ~ _ 
            y[i][j] = max(t[i-1][j]-d, y[i-1][j]-e)
            # t : A[i] ~ B[j]
            t[i][j] = max(t[i-1][j-1], x[i-1][j-1], y[i-1][j-1]) + s(A[i-1], B[j-1])

    i = m
    j = n
    seq_A = ""
    seq_B = ""

    score_ = max(t[-1][-1], x[-1][-1], y[-1][-1])
    score = ChooseWay(t[i][j], x[i][j], y[i][j])

    while (i > 0 or j > 0):
        if score == 't' and i>0 and j>0:
            if t[i][j] == t[i-1][j-1] + s(A[i-1], B[j-1]) and i>1 and j>1:
                score = 't'
            elif t[i][j] == x[i-1][j-1] + s(A[i-1], B[j-1]) and j>1:
                score = 'x'
            elif t[i][j] == y[i-1][j-1] + s(A[i-1], B[j-1]) and i>1:
                score = 'y'
            seq_A += A[i-1]
            seq_B += B[j-1]
            i -= 1
            j -= 1
        elif score == 'x' and j>0:
            if x[i][j] == t[i][j-1] - d and i>=1 and j>1:
                score = 't'
            elif x[i][j] == x[i][j-1] - e:
                score = 'x'
            seq_A += '-'
            seq_B += B[j-1]
            j -= 1
        elif score == 'y' and i>0:
            if y[i][j] == t[i-1][j] - d and i>1 and j>=1:
                score = 't'
            elif y[i][j] == y[i-1][j] - e:
                score = 'y'
            seq_A += A[i-1]
            seq_B += '-'
            i -= 1

    # exchange the loc of A & B
    if state_ex:
        seq_A, seq_B = seq_B, seq_A

    return score_, seq_A[::-1], seq_B[::-1]

In [5]:
A = 'CCATTTTT'
B = 'CCATT'
PSA_AGP(A,B)

(0, 'CCATTTTT', 'CCA---TT')