# Bioinformatic Algorithms Chapter 5 - Alignment

---

### BA5A Find the Minimum Number of Coins Needed to Make Change
http://rosalind.info/problems/ba5a/

**The Change Problem** <br>
_Find the minimum number of coins needed to make change._ <br>

Given: An integer money and an array Coins of positive integers. <br>
Return: The minimum number of coins with denominations Coins that changes money <br>

In [32]:
# Get the smaller number of coins from array{denominations} that sum to int{money}

### Greedy approach
####

### Order reverse

def greedyChange(money,coins):
    coins.sort(reverse=True)
    remain = money
    change = []
    while remain > 0:
        coin = [_ for _ in coins if _ <= remain] [0]
        remain -= coin
        change.append(coin)
    return change, remain



In [33]:
money=40
denominations=[1,5,10,20,25,50]
greedyChange(money,denominations)

([25, 10, 5], 0)

In [34]:
money=48
denominations=[120,40,30,24,20,10,5,4,1]
greedyChange(money,denominations)
## not optimal since we can use change=[24,24]

([40, 5, 1, 1, 1], 0)

In [61]:
greedyChange(76,denominations)

([40, 30, 5, 1], 0)

In [117]:
def recursiveChange(money,denominations):
    if money == 0: return 0
    else:
        minNumCoins = np.inf
        totNumCoins = {}
        for i in range(0,len(denominations)):
            coin = denominations[i]
            if money >= coin:
                numCoins = recursiveChange(money-coin,denominations)
                if numCoins + 1 < minNumCoins:
                    minNumCoins = numCoins + 1
    return minNumCoins

In [118]:
recursiveChange(7,[5,4,1])

3

In [161]:
import pandas as pd
def dynprogChange(money,denominations):
    minNumCoins = [0]
    for m in range(0,money+1):
        minNumCoins.append(np.inf)
        for i in range(0,len(denominations)):
            coin = denominations[i]
            if m >= coin:
                if (minNumCoins[m-coin] + 1) < minNumCoins[m]:
                    minNumCoins[m] = minNumCoins[m-coin] + 1
    res = minNumCoins[0:len(minNumCoins)-1]
    return pd.DataFrame(res).T

In [163]:
res = dynprogChange(12,[5,4,1])
res

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0,1,2,3,1,1,2,3,2,2,2,3,3


In [166]:
m=40
d=1,5,10,20,25,50
dynprogChange(m,d)[[m]]

Unnamed: 0,40
0,2


In [167]:
#http://bioinformaticsalgorithms.com/data/extradatasets/alignment/change_problem.txt
m=8074
d=[24,13,12,7,5,3,1]
#338
dynprogChange(m,d)[[m]]

Unnamed: 0,8074
0,338


In [168]:
#http://bioinformaticsalgorithms.com/data/extradatasets/alignment/change_problem.txt
m=18667
d=[1,3,5,19]
dynprogChange(m,d)[[m]]

Unnamed: 0,18667
0,985


---

### BA5B Find the Length of a Longest Path in a Manhattan-like Grid
http://rosalind.info/problems/ba5b/


**Length of a Longest Path in the Manhattan Tourist Problem** <br>


_Find the length of a longest path in a rectangular city._ <br>


Given: Integers n and m, followed by an n × (m+1) matrix Down and an (n+1) × m matrix Right. The two matrices are separated by the "-" symbol. <br>
Return: The length of a longest path from source (0, 0) to sink (n, m) in the n × m rectangular grid whose edges are defined by the matrices Down and Right. <br>

In [84]:
import pandas as pd
def manhTourist(n,m,Down,Right):
    s = pd.DataFrame(-np.inf,index=range(0,n+1),columns=range(0,m+1))
    s[0][0] = 0
    ## Fill first column first
    for i in range(1,n+1):
        s[0][i] = s[0][i-1] + Down[0][i-1]
    for j in range(1,m+1):
        s[j][0] = s[j-1][0] + Right[j-1][0]
    for i in range(1,n+1):
        for j in range(1,m+1):
            d = s[j][i-1] + Down[j][i-1]
            r = s[j-1][i] + Right[j-1][i]
            s[j][i] = max(d,r)
    return s[m][n]

1 0 2 4 3
4 6 5 2 1
4 4 5 2 1
5 6 8 5 3

3 2 4 0
3 2 4 2
0 7 3 3
3 3 0 2
1 3 2 2

In [6]:
down=pd.read_clipboard(header=None, names=None)
down

Unnamed: 0,0,1,2,3,4
0,1,0,2,4,3
1,4,6,5,2,1
2,4,4,5,2,1
3,5,6,8,5,3


In [7]:
right=pd.read_clipboard(header=None, names=None)
right

Unnamed: 0,0,1,2,3
0,3,2,4,0
1,3,2,4,2
2,0,7,3,3
3,3,3,0,2
4,1,3,2,2


In [30]:
n=4
m=4
manhTourist(n,m,down,right)

34.0

In [33]:
down=pd.read_clipboard(header=None, names=None)
down

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,2,3,4,0,3,1,1,1,1,1
1,4,2,3,4,3,3,0,4,1,1
2,4,4,0,1,4,3,2,0,2,2
3,4,3,0,3,4,4,3,2,4,4
4,0,1,0,1,3,0,3,0,3,4
5,3,2,4,4,4,3,1,0,0,0
6,3,4,3,1,2,3,0,0,4,0
7,2,4,3,4,1,2,0,3,2,0
8,1,4,4,1,4,4,3,1,1,4
9,3,1,2,2,3,3,0,4,0,0


In [34]:
right=pd.read_clipboard(header=None, names=None)
right

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,1,0,4,4,3,3,1,0,4
1,0,2,0,3,3,0,1,2,1
2,3,2,3,1,1,4,2,4,4
3,1,3,4,4,2,1,1,1,4
4,1,4,2,2,3,1,3,2,3
5,0,3,1,0,1,0,4,1,4
6,1,3,4,4,1,0,3,2,1
7,2,3,1,2,3,2,2,2,3
8,3,2,1,4,0,2,4,2,4
9,4,0,2,0,1,3,1,4,4


In [35]:
n=17
m=9
manhTourist(n,m,down,right) #84

84.0

In [36]:
down=pd.read_clipboard(header=None, names=None)
down

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,2,1,4,1,3,3,0,4,4
1,1,4,1,4,0,2,3,3,4,4
2,4,1,1,4,1,0,3,0,2,4
3,3,0,0,4,3,1,4,1,1,4
4,0,1,2,3,2,2,0,1,0,0
5,2,3,2,0,4,2,1,2,1,3
6,3,4,0,2,0,4,2,2,1,4
7,0,0,2,2,4,1,3,2,4,2
8,4,4,3,2,3,1,4,4,2,1
9,0,1,4,0,3,2,4,2,2,1


In [37]:
right=pd.read_clipboard(header=None, names=None)
right

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,2,2,0,0,2,1,3,0,4
1,0,1,2,4,0,4,3,4,1
2,1,4,1,1,0,4,4,0,2
3,3,2,1,2,4,0,0,0,0
4,1,3,3,0,4,0,4,2,3
5,3,4,1,1,4,4,0,1,3
6,3,2,0,4,0,3,3,2,0
7,3,2,2,3,3,2,1,0,3
8,2,0,1,2,0,0,3,2,4
9,1,2,1,0,3,1,3,3,2


In [38]:
n=15
m=9
manhTourist(n,m,down,right) #Challenge

69.0

In [39]:
### Going column first or row first has no influence
def manhTourist_bw(n,m,Down,Right):
    s = pd.DataFrame(-np.inf,index=range(0,n+1),columns=range(0,m+1))
    s[0][0] = 0
    ## Fill first column first
    for i in range(1,n+1):
        s[0][i] = s[0][i-1] + down[0][i-1]
    for j in range(1,m+1):
        s[j][0] = s[j-1][0] + right[j-1][0]
    for j in range(1,m+1): #j then i as opposed to i then j above
        for i in range(1,n+1):
            d = s[j][i-1] + down[j][i-1]
            r = s[j-1][i] + right[j-1][i]
            s[j][i] = max(d,r)
    return s[m][n]

In [40]:
n=15
m=9
manhTourist_bw(n,m,down,right) #Challenge

69.0

In [None]:
### Going column first or row first has no influence
def manhTourist_diag(n,m,Down,Right):
    s = pd.DataFrame(-np.inf,index=range(0,n+1),columns=range(0,m+1))
    s[0][0] = 0
    ## Fill first column first
    for i in range(1,n+1):
        s[0][i] = s[0][i-1] + down[0][i-1]
    for j in range(1,m+1):
        s[j][0] = s[j-1][0] + right[j-1][0]
    for j in range(1,m+1): #j then i as opposed to i then j above
        for i in range(1,n+1):
            d = s[j][i-1] + down[j][i-1]
            r = s[j-1][i] + right[j-1][i]
            diag = s[j-1][i-1] + diag[j-1][i-1]
            s[j][i] = max(d,r)
    return s[m][n]

### BA5c Find a Longest Common Subsequence of Two Strings

http://rosalind.info/problems/ba5c/

Longest Common Subsequence Problem
Given: Two strings.
Return: A longest common subsequence of these strings.

In [1]:
#penalties
gap = 0#-4
mismatch = 0#-3
match = 1

# gap = -4
# mismatch = -3
# match = 1

In [6]:
import pandas as pd
s = pd.DataFrame(0,index=range(0,n),columns=range(0,m))
p = pd.DataFrame('',index=range(0,n),columns=range(0,m))
for i in range(1,n):
    if vh[i] != wh[0]: s[0][i] = s[0][i-1] + gap
for j in range(1,m):
    if vh[0] != wh[j]: s[j][0] = s[j-1][0] + gap
###
for i in range(1,n):
    for j in range(1,m):
        south = s[j][i-1] + gap
        east = s[j-1][i] + gap
        if vh[i] == wh[j]:
            diag = s[j-1][i-1] + match
        else:
            diag = s[j-1][i-1] + mismatch
        t=[('s',south),('e',east),('d',diag)]
        ### Get which one is max
        whichMax = np.argmax([_[1]for _ in t])                
        s[j][i] = t[whichMax][1]
        p[j][i] = t[whichMax][0]
s2 = s.copy()
s2.index=[_ for _ in vh]
s2.columns=[_ for _ in wh]
s2

NameError: name 'n' is not defined

In [None]:
def lcsq(x, y):
    m = len(x)
    n = len(y)
    table = np.zeros((m+1, n+1), dtype=int)
    for i in range(1, m+1):
        for j in range(1, n+1):
            if x[i-1] == y[j-1]:
                table[i, j] = table[i-1, j-1] + 1
            else:
                table[i, j] = max(table[i, j-1], table[i-1, j])
    
    i, j = m, n
    w = ''
    while (i > 0) and (j > 0):
        if x[i-1] == y[j-1]:
            w = x[i-1] + w
            i -= 1
            j -= 1
        else:
            if table[i, j-1] > table[i-1, j]:
                j -= 1
            else:
                i -= 1
    return w,table


In [None]:
def scoreAlignment(w,v,gap=0,mismatch=0,match=1):
    """
    """
    ###
    wh = '-'+ w
    vh = '-'+ v
    n=len(vh)
    m=len(wh)
    ###
    scoring = pd.DataFrame(0,index=range(0,n),columns=range(0,m))
    ## Initialize F matrix
    for i in range(1,n):
        if vh[i] != wh[0]: scoring[0][i] = scoring[0][i-1] + gap
    for j in range(1,m):
        if vh[0] != wh[j]: scoring[j][0] = scoring[j-1][0] + gap

    ### Dynamic programming. Iterate over the scoring matrix.
    for i in range(1,n):
        for j in range(1,m):
            down = scoring[j][i-1] + gap
            right = scoring[j-1][i] + gap
            if vh[i] == wh[j]:
                diag = scoring[j-1][i-1] + match
            else:
                diag = scoring[j-1][i-1] + mismatch
            t=[('s',down),('e',right),('d',diag)]
            ### Get which one is max
            whichMax = np.argmax([_[1]for _ in t])                
            scoring[j][i] = t[whichMax][1]
    s2 = scoring.copy()
    s2.index=[_ for _ in vh]
    s2.columns=[_ for _ in wh]
    s2
    return scoring,s2

##
def LCS(w,v,Fmatrix,debug=False):
    wh = '-'+ w
    vh = '-'+ v
    """
    """
    lcs = ''
    i = Fmatrix.shape[0]-1
    j = Fmatrix.shape[1]-1
    while (i > 0) and (j > 0):
        #print((j,i), s[j][i],LCS[::-1],'<===LCS\t',vh[i],wh[j])
        if vh[i] == wh[j]:
                lcs += wh[j]
                i -= 1
                j -= 1
                if debug: print('\tDiag')
        else:
            if i >= 0: up = Fmatrix[j][i-1]
            else: up = -np.inf
            if i >= 0: left = Fmatrix[j-1][i]
            else: left = -np.inf
            if left > up: 
                if debug: print("\t",(up,left),'left')
                j -= 1
            else:
                if debug: print("\t",(up,left),'up')
                i -= 1
    return lcs[::-1]


In [None]:
w='AACCTTGG'
v='ACACTGTGA'
l, scor = lcsq(w, v)
pd.DataFrame(scor)

In [7]:
w='AACCTTGG'
v='ACACTGTGA'
sco,_ = scoreAlignment(v,w)
sco

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,0,0,0,0,0,0,0,0,0
1,0,1,1,1,1,1,1,1,1,1
2,0,1,1,2,2,2,2,2,2,2
3,0,1,2,2,3,3,3,3,3,3
4,0,1,2,2,3,3,3,3,3,3
5,0,1,2,2,3,4,4,4,4,4
6,0,1,2,2,3,4,4,5,5,5
7,0,1,2,2,3,4,5,5,6,6
8,0,1,2,2,3,4,5,5,6,6


In [773]:
l

'AACTTG'

In [776]:
LCS(v,w,sco,debug=False)

'AACTTG'

In [611]:
##https://www.youtube.com/watch?v=LhpGz5--isw
# v='ATGGCCTC'
# w='ACGGCTC'

##http://rosalind.info/problems/ba5c/
w='AACCTTGG'
v='ACACTGTGA'



Unnamed: 0,-,A,A.1,C,C.1,T,T.1,G,G.1
-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
C,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
A,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
C,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
T,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
G,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
T,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
G,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
A,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [712]:
w='AACCTTGG'
v='ACACTGTGA'
Fmat,_=scoreAlignment(w,v)
LCS(w,v,Fmat)

'ACCTGG'

In [713]:
##https://www.youtube.com/watch?v=wmIMeyWDZI0&list=PLQ-85lQlPqFNmbPEsMoxb5dM5qtRaVShn&index=3
v='ATGTTATA'
w='ATCGTCC'
Fmat,_=scoreAlignment(v,w)
LCS(v,w,Fmat)

'ATGT'

In [715]:
%time
w='ACCGTCTTAGCGATCAACACATTTAACAACGCGCCGCACCCCCCGTCAAACGAGCTTTTGGGCTCTTGTCCTTTTACAAGCTTCACGACGCATACAGCCTTGATCAACGGTTTGATCTGTCTCCCTTCAGCTGGCTTTAAAGGACATACATATGAAGGCCTTAATAAGGTCCGGGAACTCCACATATTCGGTACTGGGCAAACCCCATGAACCACCTCAACATGAAGAGTCCGAGGACTCTCACGATCCACCAATGCAGATCGGAACTGTGCGATCGCGTAATGAGCCGAGTACTTGGTTTGTGTTTAGGTTATGGGGGCCGGGAGCCGGTTCAATATAAGGAAGTAGTTGCAGATTAGTTGTTGCGAACGGTCATAAATTTGATGGGTAAACGTGAACTTAACAAACCGTGATAGCTAATCCTATGCATCCCTTACGTGGATCGACTCGAGTACCCAGGTGAACCGACTACTTGATAACCGGAAATCGCGGTATAAAAGCGCTCACGGTCAGGAGATATACCTCCAAGCAGTAGTCTTTCTGAGCCTAGAGTAGTAAATTACAGGGACGATGTCTTTTACCGAGGCAACATTTTATTGAGAATCACATGAGGCACAGGTAAAGGCGACATCACGATCGAGATCAACCCCTACTTGTTCAAAACATTGAGAACCAGCTCTGTTTTGGAACCTAGAAAGATAACGCATCCGCTTGATATTCCACGGCTTGTCCCTCTTGTGCGGTCCATCTATCGGAGTTTCCTCCGATACGACCCGCAATGTTTCCAGGCGTACGGTACTTTATGAATACACTCGCGCTGTAACCTGTTATGTGAAACACACACGACAGAGCTTCGCGTGGGCCCAGCGACCCGGTAATACTACATCACCGCACACGACCTCGAGCAGTCTTTGCCGGCGTCCGTAAGTAGTCTAAAGTTGTGTTGATGCTTGGGGTTAAAGCTAAATCGTCCGCAGAATACGACTCTCATCCCAAT'
v='ACCCGCACGCGCTTTGGTCTAGATTCTAGCTCCAACTTGCCTGCTAGATACTCTGTTAAAAGATGGTTTTACAACCCCCTCCTCTGTCCCTGGGGTATTATATAATACGTCGGATAGTCAGGTACAAATACAAGTGGGTGGGAATACTTTTCCTCGGATCCTAGACCACGGATTACTGCGTGGTTGACAAGAGTCGGCCCGGAGGGAAACGTGAAGGTTAGTGCAATTAAAGTCTCTAATGTGAAGCCTCCGCGAAGCGAGGAGTTTCTGAGATCGAGTACTATTTAGAGTTCGAAATCACGGCTTAACCTCACTGCCACGCATAACTTGCCGGCAATCCAGTTTTGCAACGATACTTAATTTGTGCAGCTCATCTTTGCTGTCCAGAAATAGAGCTAGTCGATCTCATCTTGCGGGTAGCCAGAAGTCCTACCGTCTCCTCCATGTAGCTTAAAAATTTCGGTGAGGATCAAAAATGATAAACGTGACAGGTAAGCTCCTACGTCTATCCTATGACCCCCGCGGCAGAATAGGTTGGTAGTGTTAGTGCGTGAGCTGGTAGAATAGAGCACACTTAGGGAAACGGGAACCGTTATGTAGGGCTGCGACACACAAAAAAGTGTTCGTTGGTAAGCTGCCTCTCCACTAAACAGGATTTCTCTGGATGATCCCATCGAAGCAAGTTACGCACCACGCCGAGGCGGACCCTGGTACTAGCTGCCCCCCCCTTTATGGGGCGCTCGTACATCAAGATGATCGCGGACTCAACCTGATTACGAGTTGTCCAAGTAGTCCAGGGTAAGAGAAACTGGAGAGA'
Fmat,_=scoreAlignment(v,w)

CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 21 µs


In [747]:
### Challenge
% time
w='TTTGACATTCCTACGACCTAGAACCGGCCGGAAGGGATAGGCTACCAAGCGATGTATCTGAGGATCTCATTGGCCGGTCGTTTCAAGCCGTAGTTTGGCCGCGATACCCCGTTTCTTCTTATAATCTCACCGTTGCGCCCACGTGAAATCAGTACTGTTCGGGGGTTGTGTAAAAATCCTCATCACCGCAAACCCACTCGCAGGGGTCTTGTCAGATAAGCGCGCGTATGTTGGTCGGGGAACGGAAGGGGGGTGTATGTCGTATCTTCGGGTGACCACCCATCCCCCTACTCAAGATCTGTGTAGGAGCTATTATTGTTTGAGGTGAATGAGATGCGCTTCGAGGCCGTGCGTACACGGATAGTAGCTGGCGGACACGCCGGCAGCCGCATTCTGTCTCAGCTTCAGTCCGACCTTCCTATTTGGTAATAACCGCAATTATGGAAGTTCGACATCCCGAGGTGATTCAAATTAGGGTTTAATGCGCTCCGCTGGTACTGCATGTATCTATCTTCTTCATGTTAGGGCGCTAAGGCGTGCCTTCATTGAATTGTTTTAGCATACCACCCTGGAAAGGCCGCAAGCTCACGGTCGAGACAGAGCCGTGGAACGGTGATGGACTGTTAAGGATCGCTGTGGTGCGTAATATTAGAGAATTTCCGGCCCGCTAATACCCAACTGTGCACTTAACCCCATCACATGTGAAAGTTGTACTCCAGTAGGTGACCTGAGTCCCAAGACGCTCCCCCTGATTAGTTGTGTGCACTGTTCCGATCGTCAGGGTGCTAGATGTCCTTGACCGAGGTTCAGTTCGCCAGATGGTGCCGACGGACGCGGTCCACTGAGGCCCCTCATTGGCCGAGCAATAGTTGATCCG'
v='CACCTAGCCTATCTTTTCGGTTCTAGCCTAAATATCATCAGCACGTGCCTATGTGGCCGTGTAATGAAGCTTGGTTGAGAATATTGGTCCTTGTTAATATGTGTTCTCATATTACTTGCGGTGGAGACTCGAACAACGCCGTCCTGGCGTAGAAGCGGAATTTCGATGTCGCCGTACAGTATTGGTATTGTTCCGCGGGGCATGACCAGCTGCCGAACACGTGCTCCGTGATGATACCGTCACACCAATTTGAAGCTAAAACCATGTGCTTACATAAATCGGCTGTCACAGAAATTCGCTTGCTGTCCTCGTGCGCAATTTCGTGCGTTACCCGCGTGCCCCCACAACATTTACTTTCTAACATGCCCTCAACAAATCCACTACCTAGTAGCGCCGCACTCTTCAGAGTATCAAAGCGGCATAAAGGACAACAGCAACGCTCTCATCGTGACGCCTTATTGTCCTTGCTAGGGAACTTGTCGCGGTGACACTATTGTGCGGAGAGGGTTGTGCTTGCGCTGGCTACCTATCAGCAAATCGTCGTGTGTTGCGCTTCATAGGCTTTCCCGCAAGGTTCACGCGAGGGCGGTTCAACGGCTTATCTCCTCGTCTACGTCCACCAGATTACGTTGCTGTCGCCCGCTCTTTACCCAGGGTTCCCGTGCGGATGTCATTCTTTGTTAAGTACTACCATCCACACAACTTGGTTCTTTCCTTCTCACCGGTAGCGTAGTTAAAGCGCTAACGCATAGTTGCGAAATAACTAACTGCATGACGCCATCGACTTCCCTGTCTTCATTATTACTTAAGTACGCATGAGGCCCCCGAACGCCCCCAGACGAGGGTCCTCTGGCGGA'
Fmat,_=scoreAlignment(w,v)
LCS(w,v,Fmat)

'CACCTAGCCTACCGGTCTACCAAATATCTAGACTCATTGGCCGTGTAATAGTTGGGGATATTTCTTTTAAATTGTTCCATATTACTTGGGTGGAACTCAACAACCCTCCGGCGTAGAAGCGGATTTGGTCGCGAAGTATTGTATTTCGGGTGACCACCCACCCCCTACTCAAATTTGAAGCTAATTGTTTAGGTGAAGAATCGCTTGGCCGTGCGCAATTTGGCGACCCCGGCCCCATTCTTCTCAGCTCATCCACCTTCCATTTGGTATAACGCATAAAGGACACAGCAATTAGGGTTATGTCCGCTGGACTGTGTACACTTTTCAGTTGGGCGCTGGCTCCTTCAGAATTGTTTTGCCCATAGGCCGCAAGTCACGCGAGGGCGGAACGGTTACTGTTAGGATCGTTGGTGCGTTTTAAGTTCCGGCGATCACTGTGACTACCCCACACATTGGTTCTCCTAGGTACGAGTAAACGCTCGATAGTTGGTCACTGCATCGCATCGATTCCTGCCATTATTCAAGTCCGAGGCCCCCGAGCCCCCAGCGAGGGTCCG'

In [781]:
### Challenge new dataset
%time
w='TTAGTGGGTATAGGTGCCGTGTCGGATATTACTCATTTCCGACCAATTCGGTTTCAGTGAAACGTAGATGATCACCTATCGGTATAAGGACTTGCAGTAGAGGTCAGTTTCCTTGAGAACAAGCATCCATGTAACCCAAGTTTCTCCCGCGGAGTGGGCGATTGGCTAACAAGAGCGGTTCGGCGGTGCAAGCTGAGACAAGAGCTTGCGGAATGGGAGCTGTAACTCCATAAACTAGAGTTTAATCGTAGTCCTAGTCCTGCCCAGTTGGCAAGTGCTTAAGGGGTAAAGTCGTAATGATTCCTCGACCAGTTCCCCTATGGTACTTACAAGTTGGCTCATGGGGTAAATCACTGAAGAACGAGAAGCGCACGTGAGGCTGGCGGGCTGGTCACGATTTCTATCATTGTAGGTAGGGCGGTTCGGGTCATCCAACGCAGTATGTTATCGTCATTTCTCACACATCGGGAGCACGACACGAATCTTCGTACGTGTCCATGCCATGTTTAAGTTTCTTAGAGATACTTAAGTTGAAAATCCCGATAGAATCAACTACAGCTCACTACCTCTTCCACCTCTTCGCTTCGGGTAACATAACTTCTCATTCCAGAGGTTGCGAGGCCACCCGGACTTCATACGGCTTATATCAGCTTCACTCCATCCCTCGGCCCATTAGCGAAGCAGTGCGCGAGGATGCCATGCAGCGTAATTGGAGGGACTTCGGAGTCCCAACACATAGTTCTAAAGTAGTTTTTAGCCGCTTAGAATCCCTGGCGGGGGAAGATTACCATCGGAGATTCCCTTGAACTGATCTGCGCGACACTGGATATGGTATCGTGTGCAAGCAACCGTATACATCAAAACCATGGGAGTAAGCATCCCAGGGAAACCAGGGAACTCATGTACAAGATTTTCAGTGGACGACACGACGGTCTATATCCGGACCACGGTCACACCTACCACGACC'
v='GTTAGACGCATGGTGAAGCTTGGCGCCCCAAGGCTAGTAGGGCGAACATGAGCGGCCAACAACTTATGTTAGCGCATATACGAAGGGGGTCATTTAACAGGGGTACAATTACCATGGTTAGACGGATTTCCAACCTCGAGCTGAAGGGCACAATAGAACGTTATGGGGTGCTAATTCCTCCTGATGTTGATACAGCGTTGGCCGTGACTATTTGGCGCAAGTAGACTATATTGTATACTTCTATTCGAACAGCCGATCCGGTGGAGTTTGGGGGGGAACGCAGCCGGAATCGCTGTGACCCCCTGCGTACCCCGAGGAGACGACTCTTCCGGAGTGATATAATGCGCAGCGAGTCGGCCTATAGGGGAATGTTGATAATATCCTAGCCCCGTGCCCAATAAGCCTAAATTTTGCCTTACTGAGCCCTCATGCATCGCGATAAGATCTAGTGAGAACGCCAACGGAAAACTCTTTCGGTGGGACCCGGCACCCTCGGCTGACGCACACTGTCGTCGACAGGGTAGGCCCGGGCTCGTATCTCCCCAACTTAAAAAACGGTGGCTTGAGGTTGGGACGGCTGAGCATGCGATTGGCTATGGCGTGAACAGCCTCAGATGGGTAATTACTGACAGCGCGTTACGCAACAGCATCACTCCCGAACCAAACAAGAGAGGTCTATGGATCGAGAGCGAGAGGATCTGGTGTTGTACACGCGTAAACACCGTACTCCCGATCATACCTTAGTAATATGATGCTCGGTCCATATAGGATAGGATGAGTACTGCATTACAGCGGTTCCATAAAACTACCCCTCTAGCAAGCCGGAACGATTGAGTCTCGTGCAATTATTTCGATGAGGTCAGTCATTATAAGGTATGGAATAATGCTTCACACGCTTCGTTTGTTCCTATGACTTACGCTTAGGACGCAAGCCACGTCGTGTTTTTAGGGACCCAGACTCTGCTCATGTTGATACGAGGAGGGGTCTCC'
longSub,_ = lcsq(w,v)
longSub

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 17.9 µs


'TTAGTGGGTTGGGCCGTGTCGAATACCATTTGACCAATCGGTTTCAGTAAACGTAGAGATCACCTCGGTAAGGACTGCGTAGGGTCATTTCCTTGGAACAGCTCCTGACATTTCGCAGTGCGATTCTACAAAGCGTCCGGTGAGTGAACGAGCCGGAATGGGAGCGTACCCAAAACTGAGTTTAATCGCAGCGAGTGGCCTTAGGGGAAGTGTAATATCCTCCCGTCCCCTATGCTTACTGGCTCATGGGTAAATCATGAAGAACGAAACCCGTGAGGCTCGGCTGGCACATTTCACAGGGTAGGGGGTCGTCTCCCCAATTAAAAACGGGGCAGGTTGACGTGCATGCATGGCTTGGCGTGAAACCCGATGAATACTACAGCTCACACTCACCCCGCCAACAAATCTATTCAGAGCGAGAGGATCTGGTTTACAGCTCACTCTCCCTCCCTTAGAAGATGCCGGTCCATAGGATAGGATGAGTCCAACACGTTCTAAAATACCCTTAGAACCGGAAGATTACTCGGAATTTTGATGATCTCAATGGTATGGTATCTTCAAGCCGTTTCCATGATAGCTAGGGAACCAGTCTGTTTTTAGGGACGACCGAGTTATACGGAGGTCTCC'

In [777]:
%time
w='TTAGTGGGTATAGGTGCCGTGTCGGATATTACTCATTTCCGACCAATTCGGTTTCAGTGAAACGTAGATGATCACCTATCGGTATAAGGACTTGCAGTAGAGGTCAGTTTCCTTGAGAACAAGCATCCATGTAACCCAAGTTTCTCCCGCGGAGTGGGCGATTGGCTAACAAGAGCGGTTCGGCGGTGCAAGCTGAGACAAGAGCTTGCGGAATGGGAGCTGTAACTCCATAAACTAGAGTTTAATCGTAGTCCTAGTCCTGCCCAGTTGGCAAGTGCTTAAGGGGTAAAGTCGTAATGATTCCTCGACCAGTTCCCCTATGGTACTTACAAGTTGGCTCATGGGGTAAATCACTGAAGAACGAGAAGCGCACGTGAGGCTGGCGGGCTGGTCACGATTTCTATCATTGTAGGTAGGGCGGTTCGGGTCATCCAACGCAGTATGTTATCGTCATTTCTCACACATCGGGAGCACGACACGAATCTTCGTACGTGTCCATGCCATGTTTAAGTTTCTTAGAGATACTTAAGTTGAAAATCCCGATAGAATCAACTACAGCTCACTACCTCTTCCACCTCTTCGCTTCGGGTAACATAACTTCTCATTCCAGAGGTTGCGAGGCCACCCGGACTTCATACGGCTTATATCAGCTTCACTCCATCCCTCGGCCCATTAGCGAAGCAGTGCGCGAGGATGCCATGCAGCGTAATTGGAGGGACTTCGGAGTCCCAACACATAGTTCTAAAGTAGTTTTTAGCCGCTTAGAATCCCTGGCGGGGGAAGATTACCATCGGAGATTCCCTTGAACTGATCTGCGCGACACTGGATATGGTATCGTGTGCAAGCAACCGTATACATCAAAACCATGGGAGTAAGCATCCCAGGGAAACCAGGGAACTCATGTACAAGATTTTCAGTGGACGACACGACGGTCTATATCCGGACCACGGTCACACCTACCACGACC'
v='GTTAGACGCATGGTGAAGCTTGGCGCCCCAAGGCTAGTAGGGCGAACATGAGCGGCCAACAACTTATGTTAGCGCATATACGAAGGGGGTCATTTAACAGGGGTACAATTACCATGGTTAGACGGATTTCCAACCTCGAGCTGAAGGGCACAATAGAACGTTATGGGGTGCTAATTCCTCCTGATGTTGATACAGCGTTGGCCGTGACTATTTGGCGCAAGTAGACTATATTGTATACTTCTATTCGAACAGCCGATCCGGTGGAGTTTGGGGGGGAACGCAGCCGGAATCGCTGTGACCCCCTGCGTACCCCGAGGAGACGACTCTTCCGGAGTGATATAATGCGCAGCGAGTCGGCCTATAGGGGAATGTTGATAATATCCTAGCCCCGTGCCCAATAAGCCTAAATTTTGCCTTACTGAGCCCTCATGCATCGCGATAAGATCTAGTGAGAACGCCAACGGAAAACTCTTTCGGTGGGACCCGGCACCCTCGGCTGACGCACACTGTCGTCGACAGGGTAGGCCCGGGCTCGTATCTCCCCAACTTAAAAAACGGTGGCTTGAGGTTGGGACGGCTGAGCATGCGATTGGCTATGGCGTGAACAGCCTCAGATGGGTAATTACTGACAGCGCGTTACGCAACAGCATCACTCCCGAACCAAACAAGAGAGGTCTATGGATCGAGAGCGAGAGGATCTGGTGTTGTACACGCGTAAACACCGTACTCCCGATCATACCTTAGTAATATGATGCTCGGTCCATATAGGATAGGATGAGTACTGCATTACAGCGGTTCCATAAAACTACCCCTCTAGCAAGCCGGAACGATTGAGTCTCGTGCAATTATTTCGATGAGGTCAGTCATTATAAGGTATGGAATAATGCTTCACACGCTTCGTTTGTTCCTATGACTTACGCTTAGGACGCAAGCCACGTCGTGTTTTTAGGGACCCAGACTCTGCTCATGTTGATACGAGGAGGGGTCTCC'
Fmat,_= scoreAlignment(v,w)
myAlgo = LCS(v,w,Fmat)
myAlgo

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 17.2 µs


'TTAGTGGGTTGGGCCGTGTCGAATACCATTTGACCAATCGGTTTCAGTAAACGTAGAGATCACCTCGGTAAGGACTGCGTAGGGTCATTTCCTTGGAACAGCTCCTGACATTTCGCAGTGCGATTCTACAAAGCGTCCGGTGAGTGAACGAGCCGGAATGGGAGCGTACCCAAAACTGAGTTTAATCGCAGCGAGTGGCCTTAGGGGAAGTGTAATATCCTCCCGTCCCCTATGCTTACTGGCTCATGGGTAAATCATGAAGAACGAAACCCGTGAGGCTCGGCTGGCACATTTCACAGGGTAGGGGGTCGTCTCCCCAATTAAAAACGGGGCAGGTTGACGTGCATGCATGGCTTGGCGTGAAACCCGATGAATACTACAGCTCACACTCACCCCGCCAACAAATCTATTCAGAGCGAGAGGATCTGGTTTACAGCTCACTCTCCCTCCCTTAGAAGATGCCGGTCCATAGGATAGGATGAGTCCAACACGTTCTAAAATACCCTTAGAACCGGAAGATTACTCGGAATTTTGATGATCTCAATGGTATGGTATCTTCAAGCCGTTTCCATGATAGCTAGGGAACCAGTCTGTTTTTAGGGACGACCGAGTTATACGGAGGTCTCC'

In [782]:
len(myAlgo) == len(longSub)

True

                    My implementation is slow but it works. **Optimize later**

---

### BA5E Find the Longest Path in a DAG

http://rosalind.info/problems/ba5c/

**Longest Path in a DAG Problem** <br>
Find a longest path between two nodes in an edge-weighted DAG.<br>

Given: An integer representing the source node of a graph, followed by an integer representing the sink node of the graph, followed by an edge-weighted graph. The graph is represented by a modified adjacency list in which the notation "0->1:7" indicates that an edge connects node 0 to node 1 with weight 7.<br>

Return: The length of a longest path in the graph, followed by a longest path. (If multiple longest paths exist, you may return any one.)<br>

Answer is in http://localhost:8888/notebooks/LargestPath.ipynb

----

### BA5E  Find a Highest-Scoring Alignment of Two Strings

http://rosalind.info/problems/ba5e/

**Global Alignment Problem** </br>
Find the highest-scoring alignment between two strings using a scoring matrix.</br>

Given: Two amino acid strings.</br>

Return: The maximum alignment score of these strings followed by an alignment achieving this maximum score. Use the BLOSUM62 scoring matrix and indel penalty σ = 5. (If multiple alignments achieving the maximum score exist, you may return any one.)</br>


```
- input: two strings
s1=PLEASANTLY
s2=MEANLY

- output: optimal alignment score, and aligned strings
8
PLEASANTLY
-MEA--N-LY
```

### BA5F  Find a Highest-Scoring Alignment of Two Strings

http://rosalind.info/problems/ba5f/

**Local Alignment Problem** </br>
Find the highest-scoring alignment between two strings using a scoring matrix.</br>

Given: Two amino acid strings.</br>

Return: The maximum alignment score of these strings followed by an alignment achieving this maximum score. Use the BLOSUM62 scoring matrix and indel penalty σ = 5. (If multiple alignments achieving the maximum score exist, you may return any one.)</br>


```
- input: two strings
s1=PLEASANTLY
s2=MEANLY

- output: optimal alignment score, and aligned strings
8
PLEASANTLY
-MEA--N-LY
```

### BA5G Compute the Edit Distance Between Two Strings

http://rosalind.info/problems/ba5g/

> In 1966, Vladimir Levenshtein introduced the notion of the edit distance between two strings as the minimum number of edit operations needed to transform one string into another. Here, an edit operation is the insertion, deletion, or substitution of a single symbol. For example, TGCATAT can be transformed into ATCCGAT with five edit operations, implying that the edit distance between these strings is at most 5.

**Edit Distance Problem** <br>
Find the edit distance between two strings.<br>

*Given*: Two amino acid strings.<br>
*Return*: The edit distance between these strings.<br>

````
- in:
PLEASANTLY
MEANLY

- out:
5
```

In [33]:
def editDistance(x,y,penalty=1,match=0):
    """
    More efficient. Time is O(m*n) but space is O(n)
    """
    m = len(x)
    n = len(y)
    Fmatrix = np.zeros((m+1,n+1))

    ## initialize
    for i in range(1,m+1):
        Fmatrix[i,0] += Fmatrix[i-1,0] + 1
    for j in range(1,n+1):
        Fmatrix[0,j] += Fmatrix[0,j-1] + 1

    ##
    for i in range(1,m+1):
        for j in range(1,n+1):
            if x[i-1] == y[j-1]:
                Fmatrix[i,j] = Fmatrix[i-1,j-1] + match
            else:
                mm = Fmatrix[i-1,j-1] + penalty
                down = Fmatrix[i-1,j] + penalty
                right = Fmatrix[i,j-1] + penalty
                Fmatrix[i,j] = min([mm,down,right])
            #print(x[i-1],y[j-1],[mm,down,right])

    return Fmatrix[m][n]

In [34]:
a='PLEASANTLY'
b='MEANLY'
editDistance(a,b)

5.0

In [35]:
##http://bioinformaticsalgorithms.com/data/extradatasets/alignment/edit_distance.txt
a='GGACRNQMSEVNMWGCWWASVWVSWCEYIMPSGWRRMKDRHMWHWSVHQQSSPCAKSICFHETKNQWNQDACGPKVTQHECMRRRLVIAVKEEKSRETKMLDLRHRMSGRMNEHNVTLRKSPCVKRIMERTTYHRFMCLFEVVPAKRQAYNSCDTYTMMACVAFAFVNEADWWKCNCAFATVPYYFDDSCRMVCGARQCYRLWQWEVNTENYVSIEHAEENPFSKLKQQWCYIPMYANFAWSANHMFWAYIANELQLDWQHPNAHPIKWLQNFLMRPYHPNCGLQHKERITPLHKSFYGMFTQHHLFCKELDWRIMAHANRYYCIQHGWHTNNPMDPIDTRHCCMIQGIPKRDHHCAWSTCDVAPLQGNWMLMHHCHHWNRVESMIQNQHEVAAGIKYWRLNRNGKLPVHTADNYGVLFQRWWFLGWYNFMMWHYSLHFFAVNFYFPELNAGQMPRFQDDQNRDDVYDTCIWYFAWSNTEFMEVFGNMMMYSRPMTKMGFHGMMLPYIAINGLRSISHVNKGIGPISGENCNLSTGLHHYGQLRMVMCGYCTPYRTEVKNQREMISAVHCHQHIDWRWIWCSGHWFGSNKCDLRIEDLQNYEPAKNKSNWPYMKECRKTEPYQDNIETMFFHQHDLARDSGYIANGWHENCRQHQDFSNTFAGGHKGTPKGEHMRRSLYVWDTDCVEKCQWVPELFALCWWTPLPDGVPVMLGTYRQYMFGLVVLYWFEVKYSCHNSWDYYNFHEGTMKDSDPENWCFWGMQIIQFHDHGKPEFFQDPMKQIIKTECTAYNSFMMGHIGKTTIVYLVSYIGRLWMKSCCLTWPPYATAPIKWAEETLLDFGQGPHPKYACHFTHQNMIRLAKLPMYWLWKLMFHE'
b='GMWGFVQVSTQSRFRHMWHWSVHQQSSECAKSICHHEWKNQWNQDACGPKVTQHECMANMPMHKCNNWFWRLVIAVKEEKVRETKMLDLIHRHWLVLNQGRMNEHNVTLRKSPCVKRIMHKWKSRTTFHRFMCLMASEVVPAKRGAQCWRQLGTYATYTVYTMMACVAFAFEYQQDNDNEADWWKCNCAFVPVYFDDSCRPVVGAFQCYRLGLPFGTGWNYAEENPFSKLKQQMHRKTMGECKNMMIWAYCANELQLPIKWGSMYHEHDFQLPPYHPNRFHKIRITILHKSFYGMFTQHHLFCKELDWRIMAWANRYYCIQHGWHTNNPDDPITRHKCMIQGGQNSRNADIRHMPVQCGNWGHAIGLEMPMPMHHCHHANRVESMIQTQHYWGPKLNRNADWWFLGWQNFEIFRMPILRWMGAYEWHYSLHFFAVNFYFPELNAGQMPRFQDDQNNNACYDVWAWSNTEFMEVNGIKKLRFGNMMMYSRPMTKMGFHGMMKSRSISHVNKGIGPISGENCSTGLHHYGQLTEVKNQREMISAVHCHQHIWCKCDLRIEPAKNKGYWPYQKEFCWRKQINSRKTEPYQVAPVINIETMFFDFWYIANGMHENCRRTGHKPNPDCVEKCQWVPELFALCWWRAMPDGVPVMLGTMFGLVVYWFEVKYSCHNSLYRRVTDYYNFHEGTMKDHEVPWNWDNEHCHDHGKAEFFFQMLKIPICDPMKAIIPSTEMVNTPWHPFSFMMGHDGKTTIVYSGSYIGRLWVPSRWKPYAPANWKMPIKWAEETLLMVPHPHFTHQQLWGTTLRLAKLPMYWLWKLMFHHLFGVK'
editDistance(a,b) #400

400.0

In [36]:
## Challenge
a='KILHWMRGSHPWRWWQQHTKVIKPGWEIAVQDTNTHRYMKRIYRMRGLWMQISTDPGVWNKVVVPDTWSASKEIMVKMYAWSYLSYNEDIPCNLDRQNKNRVSMVHMIWRWFLEATHVRWTHNWYCTTLEAHEALSWNSNRAGIVIENSFMQHHMHCQEYDASACYSFQYIVTQECLGRGWLLEIRCFWEIMCITYMCAGDMNMCNREPPRCVWVPNHKLRKHLHMCTRFISTGEKGHIVQMVWSQVTWMWHGDQFTLAQENYMRFSGTTYGWCLAPGPCYQNLGQNTDGFWPYDIPNWNEWNMCERWAYVYRFSAVIIIFPQYQMSDPYAPGEIPNDLGPIIKMVDACKQKAENIEMFDKNASFTGTEASYFPYQMEWYRFCTEHRSGWYMQRSGMKPNWMAPGVRCYNWARKKRCPHIWEIPPIWFIWPTGYMAIVHCQNHRGVVYNDAVQIKRADWDHVLMAVKQAQSSMPNHSMKEEPYLTDWYVRMDFTQQCPFLNKSVILSYNCSYGPNEHYGRRFQAVNPVRSRACFAAWQWKHTPLDMTDWKWTFHQISNTERRQVNQEKVWPNRTFDGCLGRTCRPRDIDAHWQEQEMMQPNLNVVNEYPNMPRQFEARMPCHRQPDPDAEKHYCTSKCLDSKLRLCDNYIQCVHIFGTPWLDSNMFIYKENALLMEVANLFHTIVNCPRWHRWCWPFLPNPGNACWNSRVWIIIHIFIPEPVGGTGFYGQEHEHWMDGSAGSWFLPFLLLVDHLVVNENNRSKQMILAPRSQYPTLSETGSGLNQVLSYCWHIMHTKWYSMYFLYFMHHCLLTIMMISSLTTIGDTYNCFSDHSNLTNPKKKFNLTRKVGGFTLELWWAIFCRPPWNAVAFPNQPYSPEKVKSSNCAGLLTFWTVKKCQEFWGVQYMIEQNHDMWVQCFEDFGHFTRGSFDTKSDMQLGNNWETTLCKLKHEPTCVYVHEWEPYRHQPPSPNKPMCEWWRCYNCMNGYSTLMWETDKEFLEWCWSKSCYMKHIEIAHEAMIIRQNKYLESRNFHMFNQRSQLSCDRGLKFWSGNKKGYMIFPMGEVLPVFLTYDDSFSGWMPCMKWHTTSCSVVFPYAYCLGWIWKIANSKHQYFIKTKCEEGTVKGEPHNAKILSQNKMCMHGKCMYWYMCPDFGQWNMATGMVWNNYCPGRITQKHWEGHMVYALQKWYHCAEMFNCQELVVCYDMQVCELTCGCSEYYSCNIRTMGKLTCYWNGKAQEVHQPIIDAGDSSELQWCYMSYIFMLMKSSFPFMAWHPHWACTAPQITHDLHNNEEMIAQHTIITFQRCLGCYEHAVRDKGMCYFITREGLVGMPAEVRHMPELTLLKWMCQTWRFCIAFLDFRLKQPTGKSLGIEFRGNSRMIDLGAPFWQMDFVPSLGEMARPAKYSMIKAGVFCGQCAQNAMRGIYVICMINPTDMDLTCHSMTVKQKQGPFHEDKNDPNQRYFWAQHQQFEHVAYVYGPAGDEALRQSNIDFNREKGDSFEKDTQAILRACITSFKNFTIVEMMLCYDLKHEEEYGLKFISSKAFRGHKHTGVMFIETLVKIGKEETHTRSMKSYRYNLWDAGTYGWTVLHEDWRSAYNIMFRDEYCDICPLPWEREFCPYMQTDNCVPYEGPCMQTTKCAIAIRVTMLGQNGCVNKADWRDNEMTAWEQIMTHESYSSKYTYDQFAQGAPHSMFHQGHDTASGPTLVYSDGPEFTTAGAIQPELKFGFVYWYCSDWCFVENFHFNNEGVFNDRSAHDQRLIVSQQNTEQPQQIIAFMFWDSFTDWQNVRQDDWAIVHWMTQARAGYQDANHVCRPMEPAGGWYINVRQFDTYHSQRPGYMTYTNISVTFLYSYEIGMTIRWNSSCGSNLISDHLMAWYQNQKRPYMNHCQMPNGFATPPVAGHFDNGHTMMHIHHRDVWILWPQYLQHVMMMETQLDYLMPEDFQMCPCPEDCCRNIPEDRVQYMFTKMIKKNMQPSFDVLINADNGWRLNEHDFPVMRRIGKNWMLDILATFECNWTDRYCNCCHDPPAFAHHRSMGFRKFDSNIPFVALPGIMGNPGPNAVPSCCFRHQFSGHDPSHVKALKSLEGLYFAVESSWCLNVMWEDVHCSIAMVQDHWSRNWCHDMAADKFCAYRYGAVTVVTQYSPFRCIWSCQRKKGMYATGEDKWSIKRHRIQNQAMVGAHEGRGCSAHCCQNGGKTNIGKRPTKKGGQKHNMNITQSEKFQFHVECAEYDKKPAIIVHQGDMTNQFRTDKVDMEYRWSLPMEPMKERWGTPGHWNQIYMQDADGCEKKIQKTDRFWDDPYQYVNKYAASRVSDHYWDRTSQQKVLHMWDDEATIDQFSFEAGQRIGLLDSKLDMLREAQKQCEPSKAFQYYCHEACGVLPSFYVAQYLEVTTFLIGFISCYCEFCHEPEMNNWAHGEANEHEVRIVCQHDGYVKNKQHLFSGRQFQYVDESIIKMYRIWMMTRRNWFRYIERTVKEYEKDRHNMASLIEHFSQAQHQKTVSCYFQRRWPRELYTELKIQRRLSKMKSYCYFYTLNTRCLPVNQTGQRAIMACSVAGYIGRYSVHQNWGSMNAQKVEMPMPVAIYLNKYYGVHRNGWRSPDMDHGVHLNRANGANKEKIHNGQPIDAEANHKGMSVNLVDSCDVGNSRQHEKRVNITWPSDLTEWFEWTLPMWRCQNNWYRAKYGIWMCAPQPVSPCMHLFQIEKIVEPINIIFCMTASDLPHPKNWYSFNQWWYSDTIIGENFIPNLSCCDNIYKEDIFGSLSMSTYLIYKPMWSQETCQVMDKHECGAAIGPHPTWPWGWFNLFGNSHMCGSTRKMAFSIEIFVLVTQALSVECPMYKVLVKECQISKLGAYFYTNNMGSDVNILRWCEWMFCGYAFFLFIGMMFFKAKENIKIAENTKVSIWQCGKTKPNWNGRSEMWAASYKHRNPLYIRIVHVGYLDIIIVWTEMNEDPVAFTSINSEYTQPITSCAGTGTGCAVCSMEWLWEMVVGKRNDTEVCMRREYQFHGFFCFFSSMCYPAEATWKTCLFVLAPITSKLQYEHMFHPSTPGAGDSIDGIFMIMEDCRIWDTRTLCETEVGAKCWVETHMDVHAFIWTLMSFQNVENHGHNEDYQFTSLVDIAMVDWWAYEFGTHLKCWHLQQSDKGLYVSVDDGVLMRWVFCMVETMDSSTRQVQRLLVGEAFHTRWRYVWSAGKRPHHQFMDDQAYGWHAKWNILQFTGMSLCDARCDHAFDNIMGTQRAAFVNQWWLRKDACFTQCHQMPHRGQRQWKEYIMNEQQFTCFESPMPQQEPASDYPIKQCHDLNSLEWMTPAANQCDHIETYRVCFDKQCCRASDVWSHAHLLSKNSAGEWPCIMVMYVYLWQRCAPWYLKPAACYMTKMRLWTWLVGGWDIAIEWCQVCDVEVESQKGCLDEAHKSRINELMNMGTTSMRSGDRIMIIYVKDDEDQYAESVEKFNDFHLVKAFDFEQWDNNNLLSFMMCWSPAKFQSKWFEWKYARFEVALPVCNDDCDEEPSGDQECTNICGHCRWDVVQPKNSRCYKVFPVMELMPWWEHKFLCIRSAINDRAVRREWTRWMTLMLHTLTFHPMCCGFNGAKWCIWMWELSGCQAVCKKSLKLRPMMKRSGNHTFARQPPVQAWRRSCVHWPCIQWCKADFKSWNYVGRGQTPLFWAVWCITQPRYWNWSSCDFEDEWTEMSYCMNHCLHCHGKWCCCPQHFQHYCYRLYWCHIRDNMEYACAYYATLPPSNEQMEYVHKWKDCDMIENTRWLTNRFKNKQAPTHSSLMCEKYYRARSPSEMRENAEAGQHVLKSNDCCWEKMPGYQMKKLDE'
b='GILHWMRGLEFESIPQQQQHCPKVIKPGWEISLPEVQDHNRHRYMKGVVHVVWMQPSTDPGVWNNWCFFVQRVVVPTWLASKEAWRPCNEWVVTYQMKFWRWFLFASATLTHNWYCTNFIWHITSLEALSWRWNRAGIVIENSMSQEHMHCQEGDASACYEFQYICTQERGWIRVPGPYLETYQLAGNVNGMTCNRGPPRCIWVPNHKLRKHLHMCTRFGSTGEGGLNTWQMYHAIIDWANQQMMIEHGDQFKCIAFENYMRESGTTYGWWLHPGPRLEHCQSDIPNWNEWPPEIWERWAYVFRQHVTRFSAVIQGKNIICLIPQYQMFDPYAPGEIPNDYGPIIKMVDVWHELMKLMATSWNINAEMFDKNASFTWMVGSPMFSYFPYQGEWYRFCTEHRVGWYMQRSGMKPNWHPDESHAPGVRCYNWARKKRCPHIWEIPPIWFIWPEGYMAILTWSISNHRGVVYNDAVQIIMAVKQAQSSMPTHSMKEESYRMDFTQQSILLNNMYMNKSWPRDKRYFMCETLPNEHYGRRFQAVNPVKSRWCFAQYHTFKYYKWQIIKESKHRTHENAKQFTYVIWTFHQISNTGVFPSCSQVNQECCNPYGHTQVWPNLTFDCRPRDIDAHWQEQIMMQPNLNVVNPFREPYPMMKNCTEVVARMPCHRQPDPDTFWLREKGYCTFKCLDSKLRLCDNYQYKENAHCANCMEVAVNCPDCHRICEFCWPFEAQLRFNPVWIIIHYFIPEPYGQEHWQDRHFHWMDGSNAFNWGPWGSWFLPYLLLSNKEDQMILHNCDGPRSQYPTLSETGMMWYGLNQVLSYCGHIMHTSWYFLFCVCYYFLYFMHNIDLLTIMMISSLTETDGFRQIGALYNCFHSLLTNDSVKFNLTAKVGGFTLELWWAIFCAPPWNAVAFPNQPYSPEKSNCAGCQEFWGVGYHIEIDYNHDMTVCSGSRNRCFEFTRISFDTNRGDMQLGNNVETTLCKLKHEPTCVVGEWESYRHEPMYSPRCYNCMNGYSGRLSMYEATFINNYCEKEFLSKSCHMKCLHHCKKVEIAHEAMIIRQNKYGPFKRNFQRGKSVMFAMRRTCCSALTYYVDCGIMSMIFPSGEVLPVFTTYHDSFTWCSVVFVIFLDRLVESYTTFKSANTKQNKANMDSWKNQVVKIKNSFWCHQYKFKYGIKTKCEEVTVKGEPHRGLTVWSVDAKILVQYMKMCDANYCPGRITQKHWEGYMMYALGKWYAQIVNRTCAEMFNPVVYFAVRYDMQVCEITCCCSLQLNIRTMGFLTRYANFGTIRSGRDSNPQAGDSSELMYDSYRWIIFMEKSSWPFMAWHPHWAATHVTAPQGTHDLHNNEIMIAQHFQRCLGCYTHAVRDKGYFDTEGLVGMRHMHELTLLKWQTWRFCIAFSDFRLKQPTPFKSLGIEFRTADLGWPFWEERKYTYEMHSRCSDCHFVPSLGEMAKLQVKQMSRGVFCGQCAQNAMRGKPTDMHTAGYWSMTVKQKQGPFHEDKNDCNQRYFVAQHQQITKAYVYGPHGIHTGFKSEAVRQSQIDFNREKGDSFEKDTQAILRHRALYKQCITSFKNFTMLCYEEEYGLKFISSKAFRGHKHTTVHFIEHLSEVIGKEWTLNYDLHTRSMKSYRYNLWIQALADEAGTYGWTVLHETWRSNILFRDEYCDICPLPWECPYMQTDNCVPYESRTTTKNMPCMQTKKCAIAIRVTMGDFCGNNGCVEKMTVGYATEQIKYTYDQFAKGAAAHSMFHQGHASGPTLVGRQSWGPEFLNLSLWLTAGAIQPNVKFGFVYWECSDWGFVENFHFMNEGHFNDRSAHDQRLIVSPSQIKTDQWLWDHWVEIMAVQNVRQDDWGRACRPMEPAGGWYINVRYFVTYHSQVPGYMYPNWEIWPYEIGMTIRWMNPKKCSNLISDMLMAWYQNQKRGYMNYANDCQMPNGFATPEFHTSVARHFDNGHAPMRIHHRDWPQYQGDYMCPAPEDCCRPYMFTKMIKKNMQPSFDVLINASHWLVANGWRLNEHPVMCEWEWMLDILDTFECNWTGGQQYCGMMYMYCFCCHRAPAFECWHHRSMGFRKFDSNIPFVALCMHWNSTKGGIMGNYGPNAVPSCCFRHQFSGDNSHGLYFLNVMWEDVHCSYVGYGSAMVQDHWSRNGRMCNLEHRCHFMAPILPGAVTVVPLWIQYFRKKGMRATCPSGWAFKGEDKSSIQAMLTYHSDCAHEGRGCGKTNIGKRWTRKGGQKHNMTNDCRGEITQSEKSANHGRLCFNTSIWFHVECAEYTMMKKPAIIVYQGDMTNDFRCQHAWVTHDKVDMEYRWSLPMEPMKERWGTDKQGLDIWMDYIQKSSPDKYVNKLAASRHSDHWDRSDQFSFWADSLKARLPFIWWAGKKQCEPNKARQNNYMRLPMGSFCHEACGAWIRFHYALPSFYVAQYLEVTTFLIGFISCYWATHREGEANEHEVQKDANAIVCQHDWYTRLHPAKNKQILFSGRQFQYHCHWIWKIFDEWIIQNIIKMASQSSKWYIPVMQAVRTVGEYHDMKDRNNMASLIESWGNFPKLIFSWAQQRRWPRQQISTLYTELKILNFQNVFYTLIICYHSNTRCLRKWWKAVIMKGQQTGQRAIMACSVAGTHVEHLFIGHYSVEQNWGSMNAQKFFCLGEMPMPVAIYLNKYYPAVYCDNHTPHLNMANGIHNGTPIHAVFKSNMLHVNHTGMSVNLVDRCDVGNVAQQHETRVNITWPSDLTTLPMWRCQNNWQRAKKGWMFRGIWMCAPQPDSPKIVNWQHACHWPICSKFYMNQIIFMTASDLPHLKNWYSSVDMNNDPYSDTIIGENFIFNTMLCVTGSRDNIVKMSTYLIYKPMWSGPHPTPAQLSISPWDWFNLLPIHQNHNIGNSHMCMATQLWWALSVECPMYKVHVKECQIQMKLQAYQYTFKGCNDNMGQDVNILMWCYWNFCGYAFRFMVPPDLDTSENHGMMFFKAKENIKGQIRENTKESQWQCGKTKPNGRSEMWAYPNMKYYYLYLHGQKKYNRRIVHVGYLDIVIVWTEMNERFVAFTSINSNDPIISCAGTTGCAVCSMEWLWEMCMNWWMMGKRNDTEVCMRRSYQFHGFFCFASEMCYPAEAQWKTCLYCVNHVLAPITSKLQYEHMFHPSTGDSDWSFMLSDGIIMEDCRWDTRGAKCWVDTHMDVHTLMSFQNVENHMHNEDYQFDKWWAYEFGTHLKCWHLQQSDNGLYKAPHTQVGSMVDDGDILMRWFFCMVETSLVGEAFHTRWRYVWSAGKRGHHAKPNILQCTGMSLCDARLDHAFQRIMGATNDDAAKDLWLSMKHQMPHRNQWYEQRQWKEYIMNEQQFTCFESMMPQDFNEPASDYPIKQCHDLESLEMITPAAEQCWAHIETYRVCFDKDCCRASDVESGMHLFDSCVNVVGSAGAWPCIMVMYTYLWQWYLNPAACYMTKMRLWCWLGWQGWCANIEWCQVCDVEVESQKGCLDEAHKFRINETMNMGTTSMRSGDRIMIIQRRMKQVCSWYAESFEFFNVFHLVLFDFEQWDNKMSRMGMYNLLSFSEFACNCGSGDKIDSKWFEWKYARGEVALCNDPWICVMLRCDEEPSGDQECTNIQGHCRWDVVQFPVPRTKNEWVKPVMELMPWWEHKFLLKDLMGHKIRSAINDRAVRREWTRMTLMLHTLTNGSHPMSDCDMGFNGIVMHGGCKLRMKRSGNHTFARMWPPVQAWRYRATVLHSCDHWPCIQWCKADFKSWNYVGRGQTPNVYNQDLFWAVWCITFPRYWNWEMSYCMNHWLHCHGKFCCCPCYRLYWCHIRDNMNMRVHTLPPSNNQMEYVMHPKWKDCYIANTQDQQWLTNRFKQAPTARSPSEMRENAEAGQHVLKSQDCKMKKGYCGADE'
editDistance(a,b) #1776

1776.0