# Bioinformatic Algorithms Chapter 5 - Alignment

---

### BA5A Find the Minimum Number of Coins Needed to Make Change
http://rosalind.info/problems/ba5a/

**The Change Problem** <br>
_Find the minimum number of coins needed to make change._ <br>

Given: An integer money and an array Coins of positive integers. <br>
Return: The minimum number of coins with denominations Coins that changes money <br>

In [32]:
# Get the smaller number of coins from array{denominations} that sum to int{money}

### Greedy approach
####

### Order reverse

def greedyChange(money,coins):
    coins.sort(reverse=True)
    remain = money
    change = []
    while remain > 0:
        coin = [_ for _ in coins if _ <= remain] [0]
        remain -= coin
        change.append(coin)
    return change, remain



In [33]:
money=40
denominations=[1,5,10,20,25,50]
greedyChange(money,denominations)

([25, 10, 5], 0)

In [34]:
money=48
denominations=[120,40,30,24,20,10,5,4,1]
greedyChange(money,denominations)
## not optimal since we can use change=[24,24]

([40, 5, 1, 1, 1], 0)

In [61]:
greedyChange(76,denominations)

([40, 30, 5, 1], 0)

In [117]:
def recursiveChange(money,denominations):
    if money == 0: return 0
    else:
        minNumCoins = np.inf
        totNumCoins = {}
        for i in range(0,len(denominations)):
            coin = denominations[i]
            if money >= coin:
                numCoins = recursiveChange(money-coin,denominations)
                if numCoins + 1 < minNumCoins:
                    minNumCoins = numCoins + 1
    return minNumCoins

In [118]:
recursiveChange(7,[5,4,1])

3

In [161]:
import pandas as pd
def dynprogChange(money,denominations):
    minNumCoins = [0]
    for m in range(0,money+1):
        minNumCoins.append(np.inf)
        for i in range(0,len(denominations)):
            coin = denominations[i]
            if m >= coin:
                if (minNumCoins[m-coin] + 1) < minNumCoins[m]:
                    minNumCoins[m] = minNumCoins[m-coin] + 1
    res = minNumCoins[0:len(minNumCoins)-1]
    return pd.DataFrame(res).T

In [163]:
res = dynprogChange(12,[5,4,1])
res

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0,1,2,3,1,1,2,3,2,2,2,3,3


In [166]:
m=40
d=1,5,10,20,25,50
dynprogChange(m,d)[[m]]

Unnamed: 0,40
0,2


In [167]:
#http://bioinformaticsalgorithms.com/data/extradatasets/alignment/change_problem.txt
m=8074
d=[24,13,12,7,5,3,1]
#338
dynprogChange(m,d)[[m]]

Unnamed: 0,8074
0,338


In [168]:
#http://bioinformaticsalgorithms.com/data/extradatasets/alignment/change_problem.txt
m=18667
d=[1,3,5,19]
dynprogChange(m,d)[[m]]

Unnamed: 0,18667
0,985


---

### BA5B Find the Length of a Longest Path in a Manhattan-like Grid
http://rosalind.info/problems/ba5b/


**Length of a Longest Path in the Manhattan Tourist Problem** <br>


_Find the length of a longest path in a rectangular city._ <br>


Given: Integers n and m, followed by an n × (m+1) matrix Down and an (n+1) × m matrix Right. The two matrices are separated by the "-" symbol. <br>
Return: The length of a longest path from source (0, 0) to sink (n, m) in the n × m rectangular grid whose edges are defined by the matrices Down and Right. <br>

In [84]:
import pandas as pd
def manhTourist(n,m,Down,Right):
    s = pd.DataFrame(-np.inf,index=range(0,n+1),columns=range(0,m+1))
    s[0][0] = 0
    ## Fill first column first
    for i in range(1,n+1):
        s[0][i] = s[0][i-1] + Down[0][i-1]
    for j in range(1,m+1):
        s[j][0] = s[j-1][0] + Right[j-1][0]
    for i in range(1,n+1):
        for j in range(1,m+1):
            d = s[j][i-1] + Down[j][i-1]
            r = s[j-1][i] + Right[j-1][i]
            s[j][i] = max(d,r)
    return s[m][n]

1 0 2 4 3
4 6 5 2 1
4 4 5 2 1
5 6 8 5 3

3 2 4 0
3 2 4 2
0 7 3 3
3 3 0 2
1 3 2 2

In [6]:
down=pd.read_clipboard(header=None, names=None)
down

Unnamed: 0,0,1,2,3,4
0,1,0,2,4,3
1,4,6,5,2,1
2,4,4,5,2,1
3,5,6,8,5,3


In [7]:
right=pd.read_clipboard(header=None, names=None)
right

Unnamed: 0,0,1,2,3
0,3,2,4,0
1,3,2,4,2
2,0,7,3,3
3,3,3,0,2
4,1,3,2,2


In [30]:
n=4
m=4
manhTourist(n,m,down,right)

34.0

In [33]:
down=pd.read_clipboard(header=None, names=None)
down

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,2,3,4,0,3,1,1,1,1,1
1,4,2,3,4,3,3,0,4,1,1
2,4,4,0,1,4,3,2,0,2,2
3,4,3,0,3,4,4,3,2,4,4
4,0,1,0,1,3,0,3,0,3,4
5,3,2,4,4,4,3,1,0,0,0
6,3,4,3,1,2,3,0,0,4,0
7,2,4,3,4,1,2,0,3,2,0
8,1,4,4,1,4,4,3,1,1,4
9,3,1,2,2,3,3,0,4,0,0


In [34]:
right=pd.read_clipboard(header=None, names=None)
right

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,1,0,4,4,3,3,1,0,4
1,0,2,0,3,3,0,1,2,1
2,3,2,3,1,1,4,2,4,4
3,1,3,4,4,2,1,1,1,4
4,1,4,2,2,3,1,3,2,3
5,0,3,1,0,1,0,4,1,4
6,1,3,4,4,1,0,3,2,1
7,2,3,1,2,3,2,2,2,3
8,3,2,1,4,0,2,4,2,4
9,4,0,2,0,1,3,1,4,4


In [35]:
n=17
m=9
manhTourist(n,m,down,right) #84

84.0

In [36]:
down=pd.read_clipboard(header=None, names=None)
down

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,2,1,4,1,3,3,0,4,4
1,1,4,1,4,0,2,3,3,4,4
2,4,1,1,4,1,0,3,0,2,4
3,3,0,0,4,3,1,4,1,1,4
4,0,1,2,3,2,2,0,1,0,0
5,2,3,2,0,4,2,1,2,1,3
6,3,4,0,2,0,4,2,2,1,4
7,0,0,2,2,4,1,3,2,4,2
8,4,4,3,2,3,1,4,4,2,1
9,0,1,4,0,3,2,4,2,2,1


In [37]:
right=pd.read_clipboard(header=None, names=None)
right

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,2,2,0,0,2,1,3,0,4
1,0,1,2,4,0,4,3,4,1
2,1,4,1,1,0,4,4,0,2
3,3,2,1,2,4,0,0,0,0
4,1,3,3,0,4,0,4,2,3
5,3,4,1,1,4,4,0,1,3
6,3,2,0,4,0,3,3,2,0
7,3,2,2,3,3,2,1,0,3
8,2,0,1,2,0,0,3,2,4
9,1,2,1,0,3,1,3,3,2


In [38]:
n=15
m=9
manhTourist(n,m,down,right) #Challenge

69.0

In [39]:
### Going column first or row first has no influence
def manhTourist_bw(n,m,Down,Right):
    s = pd.DataFrame(-np.inf,index=range(0,n+1),columns=range(0,m+1))
    s[0][0] = 0
    ## Fill first column first
    for i in range(1,n+1):
        s[0][i] = s[0][i-1] + down[0][i-1]
    for j in range(1,m+1):
        s[j][0] = s[j-1][0] + right[j-1][0]
    for j in range(1,m+1): #j then i as opposed to i then j above
        for i in range(1,n+1):
            d = s[j][i-1] + down[j][i-1]
            r = s[j-1][i] + right[j-1][i]
            s[j][i] = max(d,r)
    return s[m][n]

In [40]:
n=15
m=9
manhTourist_bw(n,m,down,right) #Challenge

69.0

In [None]:
### Going column first or row first has no influence
def manhTourist_diag(n,m,Down,Right):
    s = pd.DataFrame(-np.inf,index=range(0,n+1),columns=range(0,m+1))
    s[0][0] = 0
    ## Fill first column first
    for i in range(1,n+1):
        s[0][i] = s[0][i-1] + down[0][i-1]
    for j in range(1,m+1):
        s[j][0] = s[j-1][0] + right[j-1][0]
    for j in range(1,m+1): #j then i as opposed to i then j above
        for i in range(1,n+1):
            d = s[j][i-1] + down[j][i-1]
            r = s[j-1][i] + right[j-1][i]
            diag = s[j-1][i-1] + diag[j-1][i-1]
            s[j][i] = max(d,r)
    return s[m][n]

### BA5c Find a Longest Common Subsequence of Two Strings

http://rosalind.info/problems/ba5c/

Longest Common Subsequence Problem
Given: Two strings.
Return: A longest common subsequence of these strings.

In [569]:
#penalties
gap = 0#-4
mismatch = 0#-3
match = 1

# gap = -4
# mismatch = -3
# match = 1

In [607]:
s = pd.DataFrame(0,index=range(0,n),columns=range(0,m))
p = pd.DataFrame('',index=range(0,n),columns=range(0,m))
for i in range(1,n):
    if vh[i] != wh[0]: s[0][i] = s[0][i-1] + gap
for j in range(1,m):
    if vh[0] != wh[j]: s[j][0] = s[j-1][0] + gap
###
for i in range(1,n):
    for j in range(1,m):
        south = s[j][i-1] + gap
        east = s[j-1][i] + gap
        if vh[i] == wh[j]:
            diag = s[j-1][i-1] + match
        else:
            diag = s[j-1][i-1] + mismatch
        t=[('s',south),('e',east),('d',diag)]
        ### Get which one is max
        whichMax = np.argmax([_[1]for _ in t])                
        s[j][i] = t[whichMax][1]
        p[j][i] = t[whichMax][0]
s2 = s.copy()
s2.index=[_ for _ in vh]
s2.columns=[_ for _ in wh]
s2

Unnamed: 0,-,A,A.1,C,C.1,T,T.1,G,G.1
-,0,0,0,0,0,0,0,0,0
A,0,1,1,1,1,1,1,1,1
C,0,1,1,2,2,2,2,2,2
A,0,1,2,2,2,2,2,2,2
C,0,1,2,3,3,3,3,3,3
T,0,1,2,3,3,4,4,4,4
G,0,1,2,3,3,4,4,5,5
T,0,1,2,3,3,4,5,5,5
G,0,1,2,3,3,4,5,6,6
A,0,1,2,3,3,4,5,6,6


In [764]:
def lcsq(x, y):
    m = len(x)
    n = len(y)
    table = np.zeros((m+1, n+1), dtype=int)
    for i in range(1, m+1):
        for j in range(1, n+1):
            if x[i-1] == y[j-1]:
                table[i, j] = table[i-1, j-1] + 1
            else:
                table[i, j] = max(table[i, j-1], table[i-1, j])
    
    i, j = m, n
    w = ''
    while (i > 0) and (j > 0):
        if x[i-1] == y[j-1]:
            w = x[i-1] + w
            i -= 1
            j -= 1
        else:
            if table[i, j-1] > table[i-1, j]:
                j -= 1
            else:
                i -= 1
    return w,table


In [729]:
def scoreAlignment(w,v,gap=0,mismatch=0,match=1):
    """
    """
    ###
    wh = '-'+ w
    vh = '-'+ v
    n=len(vh)
    m=len(wh)
    ###
    scoring = pd.DataFrame(0,index=range(0,n),columns=range(0,m))
    ## Initialize F matrix
    for i in range(1,n):
        if vh[i] != wh[0]: scoring[0][i] = scoring[0][i-1] + gap
    for j in range(1,m):
        if vh[0] != wh[j]: scoring[j][0] = scoring[j-1][0] + gap

    ### Dynamic programming. Iterate over the scoring matrix.
    for i in range(1,n):
        for j in range(1,m):
            down = scoring[j][i-1] + gap
            right = scoring[j-1][i] + gap
            if vh[i] == wh[j]:
                diag = scoring[j-1][i-1] + match
            else:
                diag = scoring[j-1][i-1] + mismatch
            t=[('s',down),('e',right),('d',diag)]
            ### Get which one is max
            whichMax = np.argmax([_[1]for _ in t])                
            scoring[j][i] = t[whichMax][1]
    s2 = scoring.copy()
    s2.index=[_ for _ in vh]
    s2.columns=[_ for _ in wh]
    s2
    return scoring,s2

##
def LCS(w,v,Fmatrix,debug=False):
    wh = '-'+ w
    vh = '-'+ v
    """
    """
    lcs = ''
    i = Fmatrix.shape[0]-1
    j = Fmatrix.shape[1]-1
    while (i > 0) and (j > 0):
        #print((j,i), s[j][i],LCS[::-1],'<===LCS\t',vh[i],wh[j])
        if vh[i] == wh[j]:
                lcs += wh[j]
                i -= 1
                j -= 1
                if debug: print('\tDiag')
        else:
            if i >= 0: up = Fmatrix[j][i-1]
            else: up = -np.inf
            if i >= 0: left = Fmatrix[j-1][i]
            else: left = -np.inf
            if left > up: 
                if debug: print("\t",(up,left),'left')
                j -= 1
            else:
                if debug: print("\t",(up,left),'up')
                i -= 1
    return lcs[::-1]


In [768]:
w='AACCTTGG'
v='ACACTGTGA'
l, scor = lcsq(w, v)
pd.DataFrame(scor)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,0,0,0,0,0,0,0,0,0
1,0,1,1,1,1,1,1,1,1,1
2,0,1,1,2,2,2,2,2,2,2
3,0,1,2,2,3,3,3,3,3,3
4,0,1,2,2,3,3,3,3,3,3
5,0,1,2,2,3,4,4,4,4,4
6,0,1,2,2,3,4,4,5,5,5
7,0,1,2,2,3,4,5,5,6,6
8,0,1,2,2,3,4,5,5,6,6


In [775]:
w='AACCTTGG'
v='ACACTGTGA'
sco,_ = scoreAlignment(v,w)
sco

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,0,0,0,0,0,0,0,0,0
1,0,1,1,1,1,1,1,1,1,1
2,0,1,1,2,2,2,2,2,2,2
3,0,1,2,2,3,3,3,3,3,3
4,0,1,2,2,3,3,3,3,3,3
5,0,1,2,2,3,4,4,4,4,4
6,0,1,2,2,3,4,4,5,5,5
7,0,1,2,2,3,4,5,5,6,6
8,0,1,2,2,3,4,5,5,6,6


In [773]:
l

'AACTTG'

In [776]:
LCS(v,w,sco,debug=False)

'AACTTG'

In [611]:
##https://www.youtube.com/watch?v=LhpGz5--isw
# v='ATGGCCTC'
# w='ACGGCTC'

##http://rosalind.info/problems/ba5c/
w='AACCTTGG'
v='ACACTGTGA'



Unnamed: 0,-,A,A.1,C,C.1,T,T.1,G,G.1
-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
C,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
A,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
C,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
T,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
G,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
T,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
G,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
A,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [712]:
w='AACCTTGG'
v='ACACTGTGA'
Fmat,_=scoreAlignment(w,v)
LCS(w,v,Fmat)

'ACCTGG'

In [713]:
##https://www.youtube.com/watch?v=wmIMeyWDZI0&list=PLQ-85lQlPqFNmbPEsMoxb5dM5qtRaVShn&index=3
v='ATGTTATA'
w='ATCGTCC'
Fmat,_=scoreAlignment(v,w)
LCS(v,w,Fmat)

'ATGT'

In [715]:
%time
w='ACCGTCTTAGCGATCAACACATTTAACAACGCGCCGCACCCCCCGTCAAACGAGCTTTTGGGCTCTTGTCCTTTTACAAGCTTCACGACGCATACAGCCTTGATCAACGGTTTGATCTGTCTCCCTTCAGCTGGCTTTAAAGGACATACATATGAAGGCCTTAATAAGGTCCGGGAACTCCACATATTCGGTACTGGGCAAACCCCATGAACCACCTCAACATGAAGAGTCCGAGGACTCTCACGATCCACCAATGCAGATCGGAACTGTGCGATCGCGTAATGAGCCGAGTACTTGGTTTGTGTTTAGGTTATGGGGGCCGGGAGCCGGTTCAATATAAGGAAGTAGTTGCAGATTAGTTGTTGCGAACGGTCATAAATTTGATGGGTAAACGTGAACTTAACAAACCGTGATAGCTAATCCTATGCATCCCTTACGTGGATCGACTCGAGTACCCAGGTGAACCGACTACTTGATAACCGGAAATCGCGGTATAAAAGCGCTCACGGTCAGGAGATATACCTCCAAGCAGTAGTCTTTCTGAGCCTAGAGTAGTAAATTACAGGGACGATGTCTTTTACCGAGGCAACATTTTATTGAGAATCACATGAGGCACAGGTAAAGGCGACATCACGATCGAGATCAACCCCTACTTGTTCAAAACATTGAGAACCAGCTCTGTTTTGGAACCTAGAAAGATAACGCATCCGCTTGATATTCCACGGCTTGTCCCTCTTGTGCGGTCCATCTATCGGAGTTTCCTCCGATACGACCCGCAATGTTTCCAGGCGTACGGTACTTTATGAATACACTCGCGCTGTAACCTGTTATGTGAAACACACACGACAGAGCTTCGCGTGGGCCCAGCGACCCGGTAATACTACATCACCGCACACGACCTCGAGCAGTCTTTGCCGGCGTCCGTAAGTAGTCTAAAGTTGTGTTGATGCTTGGGGTTAAAGCTAAATCGTCCGCAGAATACGACTCTCATCCCAAT'
v='ACCCGCACGCGCTTTGGTCTAGATTCTAGCTCCAACTTGCCTGCTAGATACTCTGTTAAAAGATGGTTTTACAACCCCCTCCTCTGTCCCTGGGGTATTATATAATACGTCGGATAGTCAGGTACAAATACAAGTGGGTGGGAATACTTTTCCTCGGATCCTAGACCACGGATTACTGCGTGGTTGACAAGAGTCGGCCCGGAGGGAAACGTGAAGGTTAGTGCAATTAAAGTCTCTAATGTGAAGCCTCCGCGAAGCGAGGAGTTTCTGAGATCGAGTACTATTTAGAGTTCGAAATCACGGCTTAACCTCACTGCCACGCATAACTTGCCGGCAATCCAGTTTTGCAACGATACTTAATTTGTGCAGCTCATCTTTGCTGTCCAGAAATAGAGCTAGTCGATCTCATCTTGCGGGTAGCCAGAAGTCCTACCGTCTCCTCCATGTAGCTTAAAAATTTCGGTGAGGATCAAAAATGATAAACGTGACAGGTAAGCTCCTACGTCTATCCTATGACCCCCGCGGCAGAATAGGTTGGTAGTGTTAGTGCGTGAGCTGGTAGAATAGAGCACACTTAGGGAAACGGGAACCGTTATGTAGGGCTGCGACACACAAAAAAGTGTTCGTTGGTAAGCTGCCTCTCCACTAAACAGGATTTCTCTGGATGATCCCATCGAAGCAAGTTACGCACCACGCCGAGGCGGACCCTGGTACTAGCTGCCCCCCCCTTTATGGGGCGCTCGTACATCAAGATGATCGCGGACTCAACCTGATTACGAGTTGTCCAAGTAGTCCAGGGTAAGAGAAACTGGAGAGA'
Fmat,_=scoreAlignment(v,w)

CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 21 µs


In [747]:
### Challenge
% time
w='TTTGACATTCCTACGACCTAGAACCGGCCGGAAGGGATAGGCTACCAAGCGATGTATCTGAGGATCTCATTGGCCGGTCGTTTCAAGCCGTAGTTTGGCCGCGATACCCCGTTTCTTCTTATAATCTCACCGTTGCGCCCACGTGAAATCAGTACTGTTCGGGGGTTGTGTAAAAATCCTCATCACCGCAAACCCACTCGCAGGGGTCTTGTCAGATAAGCGCGCGTATGTTGGTCGGGGAACGGAAGGGGGGTGTATGTCGTATCTTCGGGTGACCACCCATCCCCCTACTCAAGATCTGTGTAGGAGCTATTATTGTTTGAGGTGAATGAGATGCGCTTCGAGGCCGTGCGTACACGGATAGTAGCTGGCGGACACGCCGGCAGCCGCATTCTGTCTCAGCTTCAGTCCGACCTTCCTATTTGGTAATAACCGCAATTATGGAAGTTCGACATCCCGAGGTGATTCAAATTAGGGTTTAATGCGCTCCGCTGGTACTGCATGTATCTATCTTCTTCATGTTAGGGCGCTAAGGCGTGCCTTCATTGAATTGTTTTAGCATACCACCCTGGAAAGGCCGCAAGCTCACGGTCGAGACAGAGCCGTGGAACGGTGATGGACTGTTAAGGATCGCTGTGGTGCGTAATATTAGAGAATTTCCGGCCCGCTAATACCCAACTGTGCACTTAACCCCATCACATGTGAAAGTTGTACTCCAGTAGGTGACCTGAGTCCCAAGACGCTCCCCCTGATTAGTTGTGTGCACTGTTCCGATCGTCAGGGTGCTAGATGTCCTTGACCGAGGTTCAGTTCGCCAGATGGTGCCGACGGACGCGGTCCACTGAGGCCCCTCATTGGCCGAGCAATAGTTGATCCG'
v='CACCTAGCCTATCTTTTCGGTTCTAGCCTAAATATCATCAGCACGTGCCTATGTGGCCGTGTAATGAAGCTTGGTTGAGAATATTGGTCCTTGTTAATATGTGTTCTCATATTACTTGCGGTGGAGACTCGAACAACGCCGTCCTGGCGTAGAAGCGGAATTTCGATGTCGCCGTACAGTATTGGTATTGTTCCGCGGGGCATGACCAGCTGCCGAACACGTGCTCCGTGATGATACCGTCACACCAATTTGAAGCTAAAACCATGTGCTTACATAAATCGGCTGTCACAGAAATTCGCTTGCTGTCCTCGTGCGCAATTTCGTGCGTTACCCGCGTGCCCCCACAACATTTACTTTCTAACATGCCCTCAACAAATCCACTACCTAGTAGCGCCGCACTCTTCAGAGTATCAAAGCGGCATAAAGGACAACAGCAACGCTCTCATCGTGACGCCTTATTGTCCTTGCTAGGGAACTTGTCGCGGTGACACTATTGTGCGGAGAGGGTTGTGCTTGCGCTGGCTACCTATCAGCAAATCGTCGTGTGTTGCGCTTCATAGGCTTTCCCGCAAGGTTCACGCGAGGGCGGTTCAACGGCTTATCTCCTCGTCTACGTCCACCAGATTACGTTGCTGTCGCCCGCTCTTTACCCAGGGTTCCCGTGCGGATGTCATTCTTTGTTAAGTACTACCATCCACACAACTTGGTTCTTTCCTTCTCACCGGTAGCGTAGTTAAAGCGCTAACGCATAGTTGCGAAATAACTAACTGCATGACGCCATCGACTTCCCTGTCTTCATTATTACTTAAGTACGCATGAGGCCCCCGAACGCCCCCAGACGAGGGTCCTCTGGCGGA'
Fmat,_=scoreAlignment(w,v)
LCS(w,v,Fmat)

'CACCTAGCCTACCGGTCTACCAAATATCTAGACTCATTGGCCGTGTAATAGTTGGGGATATTTCTTTTAAATTGTTCCATATTACTTGGGTGGAACTCAACAACCCTCCGGCGTAGAAGCGGATTTGGTCGCGAAGTATTGTATTTCGGGTGACCACCCACCCCCTACTCAAATTTGAAGCTAATTGTTTAGGTGAAGAATCGCTTGGCCGTGCGCAATTTGGCGACCCCGGCCCCATTCTTCTCAGCTCATCCACCTTCCATTTGGTATAACGCATAAAGGACACAGCAATTAGGGTTATGTCCGCTGGACTGTGTACACTTTTCAGTTGGGCGCTGGCTCCTTCAGAATTGTTTTGCCCATAGGCCGCAAGTCACGCGAGGGCGGAACGGTTACTGTTAGGATCGTTGGTGCGTTTTAAGTTCCGGCGATCACTGTGACTACCCCACACATTGGTTCTCCTAGGTACGAGTAAACGCTCGATAGTTGGTCACTGCATCGCATCGATTCCTGCCATTATTCAAGTCCGAGGCCCCCGAGCCCCCAGCGAGGGTCCG'

In [781]:
### Challenge new dataset
%time
w='TTAGTGGGTATAGGTGCCGTGTCGGATATTACTCATTTCCGACCAATTCGGTTTCAGTGAAACGTAGATGATCACCTATCGGTATAAGGACTTGCAGTAGAGGTCAGTTTCCTTGAGAACAAGCATCCATGTAACCCAAGTTTCTCCCGCGGAGTGGGCGATTGGCTAACAAGAGCGGTTCGGCGGTGCAAGCTGAGACAAGAGCTTGCGGAATGGGAGCTGTAACTCCATAAACTAGAGTTTAATCGTAGTCCTAGTCCTGCCCAGTTGGCAAGTGCTTAAGGGGTAAAGTCGTAATGATTCCTCGACCAGTTCCCCTATGGTACTTACAAGTTGGCTCATGGGGTAAATCACTGAAGAACGAGAAGCGCACGTGAGGCTGGCGGGCTGGTCACGATTTCTATCATTGTAGGTAGGGCGGTTCGGGTCATCCAACGCAGTATGTTATCGTCATTTCTCACACATCGGGAGCACGACACGAATCTTCGTACGTGTCCATGCCATGTTTAAGTTTCTTAGAGATACTTAAGTTGAAAATCCCGATAGAATCAACTACAGCTCACTACCTCTTCCACCTCTTCGCTTCGGGTAACATAACTTCTCATTCCAGAGGTTGCGAGGCCACCCGGACTTCATACGGCTTATATCAGCTTCACTCCATCCCTCGGCCCATTAGCGAAGCAGTGCGCGAGGATGCCATGCAGCGTAATTGGAGGGACTTCGGAGTCCCAACACATAGTTCTAAAGTAGTTTTTAGCCGCTTAGAATCCCTGGCGGGGGAAGATTACCATCGGAGATTCCCTTGAACTGATCTGCGCGACACTGGATATGGTATCGTGTGCAAGCAACCGTATACATCAAAACCATGGGAGTAAGCATCCCAGGGAAACCAGGGAACTCATGTACAAGATTTTCAGTGGACGACACGACGGTCTATATCCGGACCACGGTCACACCTACCACGACC'
v='GTTAGACGCATGGTGAAGCTTGGCGCCCCAAGGCTAGTAGGGCGAACATGAGCGGCCAACAACTTATGTTAGCGCATATACGAAGGGGGTCATTTAACAGGGGTACAATTACCATGGTTAGACGGATTTCCAACCTCGAGCTGAAGGGCACAATAGAACGTTATGGGGTGCTAATTCCTCCTGATGTTGATACAGCGTTGGCCGTGACTATTTGGCGCAAGTAGACTATATTGTATACTTCTATTCGAACAGCCGATCCGGTGGAGTTTGGGGGGGAACGCAGCCGGAATCGCTGTGACCCCCTGCGTACCCCGAGGAGACGACTCTTCCGGAGTGATATAATGCGCAGCGAGTCGGCCTATAGGGGAATGTTGATAATATCCTAGCCCCGTGCCCAATAAGCCTAAATTTTGCCTTACTGAGCCCTCATGCATCGCGATAAGATCTAGTGAGAACGCCAACGGAAAACTCTTTCGGTGGGACCCGGCACCCTCGGCTGACGCACACTGTCGTCGACAGGGTAGGCCCGGGCTCGTATCTCCCCAACTTAAAAAACGGTGGCTTGAGGTTGGGACGGCTGAGCATGCGATTGGCTATGGCGTGAACAGCCTCAGATGGGTAATTACTGACAGCGCGTTACGCAACAGCATCACTCCCGAACCAAACAAGAGAGGTCTATGGATCGAGAGCGAGAGGATCTGGTGTTGTACACGCGTAAACACCGTACTCCCGATCATACCTTAGTAATATGATGCTCGGTCCATATAGGATAGGATGAGTACTGCATTACAGCGGTTCCATAAAACTACCCCTCTAGCAAGCCGGAACGATTGAGTCTCGTGCAATTATTTCGATGAGGTCAGTCATTATAAGGTATGGAATAATGCTTCACACGCTTCGTTTGTTCCTATGACTTACGCTTAGGACGCAAGCCACGTCGTGTTTTTAGGGACCCAGACTCTGCTCATGTTGATACGAGGAGGGGTCTCC'
longSub,_ = lcsq(w,v)
longSub

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 17.9 µs


'TTAGTGGGTTGGGCCGTGTCGAATACCATTTGACCAATCGGTTTCAGTAAACGTAGAGATCACCTCGGTAAGGACTGCGTAGGGTCATTTCCTTGGAACAGCTCCTGACATTTCGCAGTGCGATTCTACAAAGCGTCCGGTGAGTGAACGAGCCGGAATGGGAGCGTACCCAAAACTGAGTTTAATCGCAGCGAGTGGCCTTAGGGGAAGTGTAATATCCTCCCGTCCCCTATGCTTACTGGCTCATGGGTAAATCATGAAGAACGAAACCCGTGAGGCTCGGCTGGCACATTTCACAGGGTAGGGGGTCGTCTCCCCAATTAAAAACGGGGCAGGTTGACGTGCATGCATGGCTTGGCGTGAAACCCGATGAATACTACAGCTCACACTCACCCCGCCAACAAATCTATTCAGAGCGAGAGGATCTGGTTTACAGCTCACTCTCCCTCCCTTAGAAGATGCCGGTCCATAGGATAGGATGAGTCCAACACGTTCTAAAATACCCTTAGAACCGGAAGATTACTCGGAATTTTGATGATCTCAATGGTATGGTATCTTCAAGCCGTTTCCATGATAGCTAGGGAACCAGTCTGTTTTTAGGGACGACCGAGTTATACGGAGGTCTCC'

In [777]:
%time
w='TTAGTGGGTATAGGTGCCGTGTCGGATATTACTCATTTCCGACCAATTCGGTTTCAGTGAAACGTAGATGATCACCTATCGGTATAAGGACTTGCAGTAGAGGTCAGTTTCCTTGAGAACAAGCATCCATGTAACCCAAGTTTCTCCCGCGGAGTGGGCGATTGGCTAACAAGAGCGGTTCGGCGGTGCAAGCTGAGACAAGAGCTTGCGGAATGGGAGCTGTAACTCCATAAACTAGAGTTTAATCGTAGTCCTAGTCCTGCCCAGTTGGCAAGTGCTTAAGGGGTAAAGTCGTAATGATTCCTCGACCAGTTCCCCTATGGTACTTACAAGTTGGCTCATGGGGTAAATCACTGAAGAACGAGAAGCGCACGTGAGGCTGGCGGGCTGGTCACGATTTCTATCATTGTAGGTAGGGCGGTTCGGGTCATCCAACGCAGTATGTTATCGTCATTTCTCACACATCGGGAGCACGACACGAATCTTCGTACGTGTCCATGCCATGTTTAAGTTTCTTAGAGATACTTAAGTTGAAAATCCCGATAGAATCAACTACAGCTCACTACCTCTTCCACCTCTTCGCTTCGGGTAACATAACTTCTCATTCCAGAGGTTGCGAGGCCACCCGGACTTCATACGGCTTATATCAGCTTCACTCCATCCCTCGGCCCATTAGCGAAGCAGTGCGCGAGGATGCCATGCAGCGTAATTGGAGGGACTTCGGAGTCCCAACACATAGTTCTAAAGTAGTTTTTAGCCGCTTAGAATCCCTGGCGGGGGAAGATTACCATCGGAGATTCCCTTGAACTGATCTGCGCGACACTGGATATGGTATCGTGTGCAAGCAACCGTATACATCAAAACCATGGGAGTAAGCATCCCAGGGAAACCAGGGAACTCATGTACAAGATTTTCAGTGGACGACACGACGGTCTATATCCGGACCACGGTCACACCTACCACGACC'
v='GTTAGACGCATGGTGAAGCTTGGCGCCCCAAGGCTAGTAGGGCGAACATGAGCGGCCAACAACTTATGTTAGCGCATATACGAAGGGGGTCATTTAACAGGGGTACAATTACCATGGTTAGACGGATTTCCAACCTCGAGCTGAAGGGCACAATAGAACGTTATGGGGTGCTAATTCCTCCTGATGTTGATACAGCGTTGGCCGTGACTATTTGGCGCAAGTAGACTATATTGTATACTTCTATTCGAACAGCCGATCCGGTGGAGTTTGGGGGGGAACGCAGCCGGAATCGCTGTGACCCCCTGCGTACCCCGAGGAGACGACTCTTCCGGAGTGATATAATGCGCAGCGAGTCGGCCTATAGGGGAATGTTGATAATATCCTAGCCCCGTGCCCAATAAGCCTAAATTTTGCCTTACTGAGCCCTCATGCATCGCGATAAGATCTAGTGAGAACGCCAACGGAAAACTCTTTCGGTGGGACCCGGCACCCTCGGCTGACGCACACTGTCGTCGACAGGGTAGGCCCGGGCTCGTATCTCCCCAACTTAAAAAACGGTGGCTTGAGGTTGGGACGGCTGAGCATGCGATTGGCTATGGCGTGAACAGCCTCAGATGGGTAATTACTGACAGCGCGTTACGCAACAGCATCACTCCCGAACCAAACAAGAGAGGTCTATGGATCGAGAGCGAGAGGATCTGGTGTTGTACACGCGTAAACACCGTACTCCCGATCATACCTTAGTAATATGATGCTCGGTCCATATAGGATAGGATGAGTACTGCATTACAGCGGTTCCATAAAACTACCCCTCTAGCAAGCCGGAACGATTGAGTCTCGTGCAATTATTTCGATGAGGTCAGTCATTATAAGGTATGGAATAATGCTTCACACGCTTCGTTTGTTCCTATGACTTACGCTTAGGACGCAAGCCACGTCGTGTTTTTAGGGACCCAGACTCTGCTCATGTTGATACGAGGAGGGGTCTCC'
Fmat,_= scoreAlignment(v,w)
myAlgo = LCS(v,w,Fmat)
myAlgo

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 17.2 µs


'TTAGTGGGTTGGGCCGTGTCGAATACCATTTGACCAATCGGTTTCAGTAAACGTAGAGATCACCTCGGTAAGGACTGCGTAGGGTCATTTCCTTGGAACAGCTCCTGACATTTCGCAGTGCGATTCTACAAAGCGTCCGGTGAGTGAACGAGCCGGAATGGGAGCGTACCCAAAACTGAGTTTAATCGCAGCGAGTGGCCTTAGGGGAAGTGTAATATCCTCCCGTCCCCTATGCTTACTGGCTCATGGGTAAATCATGAAGAACGAAACCCGTGAGGCTCGGCTGGCACATTTCACAGGGTAGGGGGTCGTCTCCCCAATTAAAAACGGGGCAGGTTGACGTGCATGCATGGCTTGGCGTGAAACCCGATGAATACTACAGCTCACACTCACCCCGCCAACAAATCTATTCAGAGCGAGAGGATCTGGTTTACAGCTCACTCTCCCTCCCTTAGAAGATGCCGGTCCATAGGATAGGATGAGTCCAACACGTTCTAAAATACCCTTAGAACCGGAAGATTACTCGGAATTTTGATGATCTCAATGGTATGGTATCTTCAAGCCGTTTCCATGATAGCTAGGGAACCAGTCTGTTTTTAGGGACGACCGAGTTATACGGAGGTCTCC'

In [782]:
len(myAlgo) == len(longSub)

True

                    My implementation is slow but it works. **Optimize later**

---

### BA5d Find the Longest Path in a DAG

http://rosalind.info/problems/ba5c/

**Longest Path in a DAG Problem** <br>
Find a longest path between two nodes in an edge-weighted DAG.<br>

Given: An integer representing the source node of a graph, followed by an integer representing the sink node of the graph, followed by an edge-weighted graph. The graph is represented by a modified adjacency list in which the notation "0->1:7" indicates that an edge connects node 0 to node 1 with weight 7.<br>

Return: The length of a longest path in the graph, followed by a longest path. (If multiple longest paths exist, you may return any one.)<br>

In [1]:
import re
def readba5d(file):
    F = open(file,'r') 
    i = 0
    source=np.nan
    sink=np.nan
    adjacencyMat=[]
    for line in F: 
        if i == 0:
            source = int(line)
            i += 1
        else:
            if i == 1:
                sink = int(line)
                i += 1
            else:
                adjacencyMat.append([int(_)for _ in re.split('->|:|\n',line)[0:3]])
                i += 1

    #adjacencyMat = pd.DataFrame(adjacencyMat,columns=['out','in','w'])
    return source,sink,adjacencyMat

In [18]:
source,sink,adjacencyMat = readba5d("BA5/test_ba5d.txt")
print(source,sink,adjacencyMat)

0 4 [[0, 1, 7], [0, 2, 4], [2, 3, 2], [1, 4, 1], [3, 4, 3]]


In [336]:
adjacencyMat

[[0, 1, 7], [0, 2, 4], [2, 3, 2], [1, 4, 1], [3, 4, 3]]

{0, 1, 2, 3, 4}

In [401]:
### Subset the adjacency matrix for the starting nodes
start = {_[1]:_[2] for _ in adjacencyMat if source == _[0]}

#### Prepare matrix for dynamic programming
ins = set([_[1] for _ in adjacencyMat]) # To avoid repeated entries, use sets
outs = set([_[0] for _ in adjacencyMat]) #

## From the out nodes, remove the source and keep only the starting nodes
outs = outs - {source} and set(list(start.keys()))
## From the in nodes, add the source node to start counting
ins.add(source) 
scoring = pd.DataFrame(0,index=outs,columns=ins)

############
### Prepare adj matrix as dictionary for easier lookup
adjacencyMat_dict = {_[0]:(_[1],_[2]) for _ in adjacencyMat if source != _[0] }
####
# Initialize
for k,v in start.items():
    scoring.loc[k][source] = v
print(scoring)

### Add the condition to exit the loop in dynamic programming step
adjacencyMat_dict[sink]=('F')
print(adjacencyMat_dict)
###
## Iterate over each starting node from the source (rows)
# Columns are the nodes each path follows, if a node is in the path, add the weight.
for src in scoring.index:
    #print(src)
    OutWeight = adjacencyMat_dict[src]
    #print(OutWeight)
    while OutWeight[0] != 'F':
        scoring.loc[src][OutWeight[0]] = OutWeight[1]
        OutWeight = adjacencyMat_dict[OutWeight[0]]
scoring        
####
## Find max
winner = np.argmax(scoring.sum(axis=1))
winner = list(scoring.index)[winner]
winner
### Results
res = [i for i,r in pd.DataFrame(scoring.loc[winner]).iterrows() if int(r) != 0]
res = [source,winner] + res
res = ''.join(['-->'+str(_) for _ in res])[3:]
###
maxVal = scoring.loc[winner].sum()
print(maxVal)
print(res)

{2: (3, 2), 1: (4, 1), 3: (4, 3)}

In [497]:
def longestPath(source,sink,adjacencyMat):
    ### Subset the adjacency matrix for the starting nodes
    start = {_[1]:_[2] for _ in adjacencyMat if source == _[0]}

    #### Prepare matrix for dynamic programming
    ins = set([_[1] for _ in adjacencyMat]) # To avoid repeated entries, use sets
    outs = set([_[0] for _ in adjacencyMat]) #

    ## From the out nodes, remove the source and keep only the starting nodes
    outs = outs - {source} and set(list(start.keys()))
    ## From the in nodes, add the source node to start counting
    ins.add(source) 
    scoring = pd.DataFrame(0,index=outs,columns=ins)

    ############
    ### Prepare adj matrix as dictionary for easier lookup
    adjacencyMat_dict = {_[0]:(_[1],_[2]) for _ in adjacencyMat if source != _[0] }
    ####
    # Initialize
    for k,v in start.items():
        scoring.loc[k][source] = v
    #print(scoring)

    ### Add the condition to exit the loop in dynamic programming step
    adjacencyMat_dict[sink]=('F')
    #print(adjacencyMat_dict)
    ###
    ## Iterate over each starting node from the source (rows)
    # Columns are the nodes each path follows, if a node is in the path, add the weight.
    for src in scoring.index:
        #print(src)
        OutWeight = adjacencyMat_dict[src]
        #print(OutWeight)
        while OutWeight[0] != 'F':
            scoring.loc[src][OutWeight[0]] = OutWeight[1]
            OutWeight = adjacencyMat_dict[OutWeight[0]]
    scoring        
    ####
    ## Find max
    winner = np.argmax(scoring.sum(axis=1))
    winner = list(scoring.index)[winner]
    winner
    ### Results
    res = [i for i,r in pd.DataFrame(scoring.loc[winner]).iterrows() if int(r) != 0]
    res = [source,winner] + res
    res = ''.join(['-->'+str(_) for _ in res])[3:]
    ###
    maxVal = scoring.loc[winner].sum()
    return maxVal, res

In [793]:
outs=set([_[0]for _ in adjacencyMat])
ins=set([_[1]for _ in adjacencyMat])
scoring = pd.DataFrame(0,index=outs,columns=ins)
scoring

Unnamed: 0,3,4,9,10,11,12,13,14,15,16,...,35,36,37,38,39,40,41,42,43,44
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [795]:
for _ in adjacencyMat:
    #print(_)
    scoring.loc[_[0]][_[1]] = _[2] 

scoring

Unnamed: 0,3,4,9,10,11,12,13,14,15,16,...,35,36,37,38,39,40,41,42,43,44
0,0,0,0,0,0,0,0,19,0,0,...,0,8,0,0,0,0,0,0,0,0
1,36,0,0,4,0,0,0,0,0,0,...,0,0,0,0,0,0,8,31,0,0
2,0,0,38,0,0,0,0,0,0,27,...,0,0,0,0,0,0,0,0,0,0
3,0,4,0,0,0,0,0,0,0,19,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,3,0
5,0,0,0,0,37,39,0,0,0,0,...,0,0,0,0,0,0,0,0,37,0
6,0,0,0,0,0,0,0,5,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,22,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,28,0,0,...,0,0,0,0,0,0,0,0,0,0
10,0,0,0,0,5,0,0,0,0,0,...,35,0,19,0,30,0,0,0,0,0


In [796]:
adjacencyMat_dict = {(_[0],_[1]):_[2]for _ in adjacencyMat}
adjacencyMat_dict

{(6, 26): 32,
 (10, 39): 30,
 (26, 28): 24,
 (3, 16): 19,
 (10, 35): 35,
 (10, 37): 19,
 (10, 31): 36,
 (10, 33): 32,
 (10, 32): 4,
 (15, 23): 0,
 (15, 21): 0,
 (22, 24): 0,
 (22, 27): 31,
 (1, 3): 36,
 (5, 43): 37,
 (8, 30): 23,
 (19, 34): 11,
 (12, 13): 38,
 (39, 40): 35,
 (12, 15): 29,
 (27, 29): 13,
 (1, 42): 31,
 (24, 25): 2,
 (1, 10): 4,
 (4, 30): 11,
 (13, 35): 17,
 (24, 28): 2,
 (23, 25): 37,
 (31, 43): 7,
 (31, 40): 17,
 (3, 28): 2,
 (5, 12): 39,
 (5, 11): 37,
 (3, 4): 4,
 (2, 31): 23,
 (14, 29): 13,
 (19, 27): 21,
 (27, 36): 20,
 (31, 33): 23,
 (30, 40): 27,
 (28, 42): 29,
 (21, 35): 33,
 (21, 37): 5,
 (20, 37): 24,
 (2, 9): 38,
 (0, 14): 19,
 (4, 20): 0,
 (1, 41): 8,
 (8, 14): 28,
 (19, 20): 13,
 (4, 43): 3,
 (14, 31): 25,
 (14, 30): 22,
 (13, 41): 19,
 (13, 40): 32,
 (14, 35): 10,
 (10, 11): 5,
 (14, 38): 23,
 (2, 23): 9,
 (2, 25): 1,
 (24, 40): 37,
 (12, 38): 38,
 (20, 23): 34,
 (20, 21): 29,
 (12, 30): 10,
 (12, 37): 12,
 (29, 44): 30,
 (33, 35): 15,
 (33, 37): 22,
 (0, 3

In [568]:
validNodes = list(scoring[scoring[sink] != 0].index)
validNodes

[17, 29]

In [None]:
for each in validNodes:

In [591]:
## Check if value is in columns
each = 29
path = [each]
if any(scoring.columns == each):
    possible = list(scoring[scoring[each] != 0].index)
    {pos:each for pos in possible if any(scoring.columns == pos)}

In [605]:
adjacencyMat_df = pd.DataFrame(adjacencyMat,columns=['outNode','inNode','weight'])
adjacencyMat_df.head()

Unnamed: 0,outNode,inNode,weight
0,6,26,32
1,10,39,30
2,26,28,24
3,3,16,19
4,10,35,35


In [747]:
node = sink

def pathGraph(node):
    if node == source:
        return source
    else:
        #return [pathGraph(_) for _ in list(adjacencyMat_df[adjacencyMat_df['inNode'] == node]['outNode'])]
        vals = list(adjacencyMat_df[adjacencyMat_df['inNode'] == node]['outNode'])
        for _ in vals:
            print(_,vals)
            return pathGraph(_),vals
    

In [765]:
node = sink

def pathGraph2(node):
    if node == source:
        return source
    else:
        vals = list(adjacencyMat_df[adjacencyMat_df['inNode'] == node]['outNode'])
        #print (vals)
        return {node:pathGraph(node) for node in vals if node is not None}

x=pathGraph2(44)
x

27 [27, 14, 10]
22 [22, 19, 0]
10 [10]
1 [1, 7]


{29: ((((None, [1, 7]), [10]), [22, 19, 0]), [27, 14, 10]), 17: None}

In [774]:
pathGraph2(14)

{0: 0, 8: None, 6: None}

### Grokking

In [208]:
source,sink,adjacencyMat = readba5d("BA5/test2_batd.txt")
print(source,sink,adjacencyMat)

0 44 [[6, 26, 32], [10, 39, 30], [26, 28, 24], [3, 16, 19], [10, 35, 35], [10, 37, 19], [10, 31, 36], [10, 33, 32], [10, 32, 4], [15, 23, 0], [15, 21, 0], [22, 24, 0], [22, 27, 31], [1, 3, 36], [5, 43, 37], [8, 30, 23], [19, 34, 11], [12, 13, 38], [39, 40, 35], [12, 15, 29], [27, 29, 13], [1, 42, 31], [24, 25, 2], [1, 10, 4], [4, 30, 11], [13, 35, 17], [24, 28, 2], [23, 25, 37], [31, 43, 7], [31, 40, 17], [3, 28, 2], [5, 12, 39], [5, 11, 37], [3, 4, 4], [2, 31, 23], [14, 29, 13], [19, 27, 21], [27, 36, 20], [31, 33, 23], [30, 40, 27], [28, 42, 29], [21, 35, 33], [21, 37, 5], [20, 37, 24], [2, 9, 38], [0, 14, 19], [4, 20, 0], [1, 41, 8], [8, 14, 28], [19, 20, 13], [4, 43, 3], [14, 31, 25], [14, 30, 22], [13, 41, 19], [13, 40, 32], [14, 35, 10], [10, 11, 5], [14, 38, 23], [2, 23, 9], [2, 25, 1], [24, 40, 37], [12, 38, 38], [20, 23, 34], [20, 21, 29], [12, 30, 10], [12, 37, 12], [29, 44, 30], [33, 35, 15], [33, 37, 22], [0, 36, 8], [37, 38, 17], [10, 29, 13], [17, 44, 11], [6, 14, 5], [10

In [209]:
import pandas as pd

adjacencyMat_df = pd.DataFrame(adjacencyMat,columns=['outNode','inNode','weight'])
adjacencyMat_df.head()
####
outs=set([_[0]for _ in adjacencyMat])
ins=set([_[1]for _ in adjacencyMat])
scoring = pd.DataFrame(0,index=outs,columns=ins)
scoring.head()

Unnamed: 0,3,4,9,10,11,12,13,14,15,16,...,35,36,37,38,39,40,41,42,43,44
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [210]:
inDict = {_ : list(adjacencyMat_df[adjacencyMat_df['inNode'] == _]['outNode']) for _ in ins}
inDict

{3: [1],
 4: [3],
 9: [2],
 10: [1, 7],
 11: [5, 10],
 12: [5],
 13: [12],
 14: [0, 8, 6],
 15: [12],
 16: [3, 2],
 20: [4, 19],
 21: [15, 20],
 22: [10],
 23: [15, 2, 20],
 24: [22],
 25: [24, 23, 2],
 26: [6],
 27: [22, 19, 0],
 28: [26, 24, 3, 0],
 29: [27, 14, 10],
 30: [8, 4, 14, 12, 15, 24],
 31: [10, 2, 14, 11],
 32: [10, 24],
 33: [10, 31],
 34: [19, 22],
 35: [10, 13, 21, 14, 33, 11, 22],
 36: [27, 0, 15],
 37: [10, 21, 20, 12, 33, 22, 32],
 38: [14, 12, 37, 32, 24],
 39: [10],
 40: [39, 31, 30, 13, 24],
 41: [1, 13, 16],
 42: [1, 28],
 43: [5, 31, 4, 32, 12],
 44: [29, 17]}

In [211]:
outDict = {_ : list(adjacencyMat_df[adjacencyMat_df['outNode'] == _]['inNode']) for _ in outs}
outDict

{0: [14, 36, 27, 28],
 1: [3, 42, 10, 41],
 2: [31, 9, 23, 25, 16],
 3: [16, 28, 4],
 4: [30, 20, 43],
 5: [43, 12, 11],
 6: [26, 14],
 7: [10],
 8: [30, 14],
 10: [39, 35, 37, 31, 33, 32, 11, 29, 22],
 11: [35, 31],
 12: [13, 15, 38, 30, 37, 43],
 13: [35, 41, 40],
 14: [29, 31, 30, 35, 38],
 15: [23, 21, 36, 30],
 16: [41],
 17: [44],
 19: [34, 27, 20],
 20: [37, 23, 21],
 21: [35, 37],
 22: [24, 27, 37, 34, 35],
 23: [25],
 24: [25, 28, 40, 30, 32, 38],
 26: [28],
 27: [29, 36],
 28: [42],
 29: [44],
 30: [40],
 31: [43, 40, 33],
 32: [43, 37, 38],
 33: [35, 37],
 37: [38],
 39: [40]}

In [212]:
adjacencyMat_df.head()

Unnamed: 0,outNode,inNode,weight
0,6,26,32
1,10,39,30
2,26,28,24
3,3,16,19
4,10,35,35


### initialize

In [288]:
{(_[0],_[1]):i for i,_ in enumerate(adjacencyMat)}

{(6, 26): 0,
 (10, 39): 1,
 (26, 28): 2,
 (3, 16): 3,
 (10, 35): 4,
 (10, 37): 5,
 (10, 31): 6,
 (10, 33): 7,
 (10, 32): 8,
 (15, 23): 9,
 (15, 21): 10,
 (22, 24): 11,
 (22, 27): 12,
 (1, 3): 13,
 (5, 43): 14,
 (8, 30): 15,
 (19, 34): 16,
 (12, 13): 17,
 (39, 40): 18,
 (12, 15): 19,
 (27, 29): 20,
 (1, 42): 21,
 (24, 25): 22,
 (1, 10): 23,
 (4, 30): 24,
 (13, 35): 25,
 (24, 28): 26,
 (23, 25): 27,
 (31, 43): 28,
 (31, 40): 29,
 (3, 28): 30,
 (5, 12): 31,
 (5, 11): 32,
 (3, 4): 33,
 (2, 31): 34,
 (14, 29): 35,
 (19, 27): 36,
 (27, 36): 37,
 (31, 33): 38,
 (30, 40): 39,
 (28, 42): 40,
 (21, 35): 41,
 (21, 37): 42,
 (20, 37): 43,
 (2, 9): 44,
 (0, 14): 45,
 (4, 20): 46,
 (1, 41): 47,
 (8, 14): 48,
 (19, 20): 49,
 (4, 43): 50,
 (14, 31): 51,
 (14, 30): 52,
 (13, 41): 53,
 (13, 40): 54,
 (14, 35): 55,
 (10, 11): 56,
 (14, 38): 57,
 (2, 23): 58,
 (2, 25): 59,
 (24, 40): 60,
 (12, 38): 61,
 (20, 23): 62,
 (20, 21): 63,
 (12, 30): 64,
 (12, 37): 65,
 (29, 44): 66,
 (33, 35): 67,
 (33, 37): 68,

In [272]:
### Checks out!
costs = {(r[0],r[1]):r[2] for i,r in adjacencyMat_df.iterrows()}
costs[sink] = float('inf')

In [273]:
### good to go
graph = {}
for k,v in outDict.items():
    graph[k] = {_:costs[(k,_)] for _ in v}
graph[source]

{14: 19, 36: 8, 27: 9, 28: 7}

In [274]:
parents = {_:source for _ in list(graph[source].keys())}
parents[sink] = {}
parents

{14: 0, 36: 0, 27: 0, 28: 0, 44: {}}

In [261]:
# # the graph
# graph = {}
# graph["start"] = {}
# graph["start"]["a"] = 6
# graph["start"]["b"] = 2

# graph["a"] = {}
# graph["a"]["fin"] = 1

# graph["b"] = {}
# graph["b"]["a"] = 3
# graph["b"]["fin"] = 5

# graph["fin"] = {}

# # the costs table
# infinity = float("inf")
# costs = {}
# costs["a"] = 6
# costs["b"] = 2
# costs["fin"] = infinity

# # the parents table
# parents = {}
# parents["a"] = "start"
# parents["b"] = "start"
# parents["fin"] = None

# processed = []


In [275]:
processed = []
def find_lowest_cost_node(costs):
    lowest_cost = float("inf")
    lowest_cost_node = None
    # Go through each node.
    for node in costs:
        cost = costs[node]
        # If it's the lowest cost so far and hasn't been processed yet...
        if cost < lowest_cost and node[1] not in processed:
            # ... set it as the new lowest-cost node.
            lowest_cost = cost
            lowest_cost_node = node
    return lowest_cost_node

In [276]:
node = find_lowest_cost_node(costs)
node

(15, 23)

In [279]:
node

(32, 38)

In [277]:
while node is not None: 
    cost = costs[node]
    neighbors = graph[node[1]]
    for n in neighbors.keys():
        new_cost = cost + neighbors[n] 
        costs[(node[1],n)] = new_cost
        parents[n] = node[1]
    processed.append(node[1])         
    node = find_lowest_cost_node(costs)

KeyError: 38

In [240]:
costs

{(6, 26): 32,
 (10, 39): 30,
 (26, 28): 24,
 (3, 16): 19,
 (10, 35): 35,
 (10, 37): 19,
 (10, 31): 36,
 (10, 33): 32,
 (10, 32): 4,
 (15, 23): 0,
 (15, 21): 0,
 (22, 24): 0,
 (22, 27): 31,
 (1, 3): 36,
 (5, 43): 37,
 (8, 30): 23,
 (19, 34): 11,
 (12, 13): 38,
 (39, 40): 35,
 (12, 15): 29,
 (27, 29): 13,
 (1, 42): 31,
 (24, 25): 2,
 (1, 10): 4,
 (4, 30): 11,
 (13, 35): 17,
 (24, 28): 2,
 (23, 25): 37,
 (31, 43): 7,
 (31, 40): 17,
 (3, 28): 2,
 (5, 12): 39,
 (5, 11): 37,
 (3, 4): 4,
 (2, 31): 23,
 (14, 29): 13,
 (19, 27): 21,
 (27, 36): 20,
 (31, 33): 23,
 (30, 40): 27,
 (28, 42): 29,
 (21, 35): 33,
 (21, 37): 5,
 (20, 37): 24,
 (2, 9): 38,
 (0, 14): 19,
 (4, 20): 0,
 (1, 41): 8,
 (8, 14): 28,
 (19, 20): 13,
 (4, 43): 3,
 (14, 31): 25,
 (14, 30): 22,
 (13, 41): 19,
 (13, 40): 32,
 (14, 35): 10,
 (10, 11): 5,
 (14, 38): 23,
 (2, 23): 9,
 (2, 25): 1,
 (24, 40): 37,
 (12, 38): 38,
 (20, 23): 34,
 (20, 21): 29,
 (12, 30): 10,
 (12, 37): 12,
 (29, 44): 30,
 (33, 35): 15,
 (33, 37): 22,
 (0, 3

In [244]:
lowest_zcost = float("inf")
lowest_cost_node = None

In [241]:

# Go through each node.
for node in costs:
    cost = costs[node]
    # If it's the lowest cost so far and hasn't been processed yet...
    if cost < lowest_cost and node not in processed:
        # ... set it as the new lowest-cost node.
        lowest_cost = cost
        lowest_cost_node = node
lowest_cost_node

In [243]:
lowest_cost_node

In [292]:
ins=[_[1] for _ in Graph]
outs=[_[0] for _ in Graph]
weights=[_[2] for _ in Graph]

In [296]:
ins=[_[1] for _ in Graph]
outs=[_[0] for _ in Graph]
weights=[_[2] for _ in Graph]
#
vertices = set(ins) or set(outs)
len(vertices)

7

In [299]:
N = len(vertices)
source = 7

In [None]:
### good to go
graph = {}
for k,v in outDict.items():
    graph[k] = {_:costs[(k,_)] for _ in v}
graph[source]

In [301]:
adjList = [[] for _ in range(N)]
adjList


[[], [], [], [], [], [], []]

In [304]:
for edge in Graph:
    print(edge[0])
    #adjList[edge[0]].append(edge[1])

0
1
1
1
3
3
5
7
7
7
7


In [721]:
Graph = [(1, 2, 2),
         (1, 3, 0),
         (1, 6, 8),
         (2, 4, 3),
         (2, 5, 12),
         (3, 5, 3),
         (3, 6, 6),
         (6, 5, 1),
         (7, 5, 16)]
source = 1
sink = 5

In [722]:
ins=[_[1] for _ in Graph]
outs=[_[0] for _ in Graph]
weights=[_[2] for _ in Graph]
#
vertices = set(outs).union(set(ins))
(vertices,len(vertices))

({1, 2, 3, 4, 5, 6, 7}, 7)

{(1, 2): 2,
 (1, 3): 0,
 (1, 6): 8,
 (2, 4): 3,
 (2, 5): 12,
 (3, 5): 3,
 (3, 6): 6,
 (6, 5): 1,
 (7, 5): 16}

In [1154]:
parents = {v:[e[0] for e in Graph if e[1]==v] for v in vertices}
parents = {k:v if len(v)!=0 else None for k,v in parents.items()}
parents

{1: None, 2: [1], 3: [1], 4: [2], 5: [2, 3, 6, 7], 6: [1, 3], 7: None}

In [1155]:
AllEdges = sum([[(v,k) for v in vals] for k,vals in parents.items() if vals is not None],[])
parents_tups = {k:tuple(v) if v is not None else None for k,v in parents.items() }
paths = {k:{v:k for v in vals} for k,vals in parents.items() if vals is not None}
costsDict = {(_[0],_[1]):_[2] for _ in Graph}
###
print(paths)
print(parents_tups)
print(AllEdges)
print(costsDict)

{2: {1: 2}, 3: {1: 3}, 4: {2: 4}, 5: {2: 5, 3: 5, 6: 5, 7: 5}, 6: {1: 6, 3: 6}}
{1: None, 2: (1,), 3: (1,), 4: (2,), 5: (2, 3, 6, 7), 6: (1, 3), 7: None}
[(1, 2), (1, 3), (2, 4), (2, 5), (3, 5), (6, 5), (7, 5), (1, 6), (3, 6)]
{(1, 2): 2, (1, 3): 0, (1, 6): 8, (2, 4): 3, (2, 5): 12, (3, 5): 3, (3, 6): 6, (6, 5): 1, (7, 5): 16}


In [724]:
# start = 5
# path = [start]
# for node in parents[start]:
#     print(node)
#     while each is not None:
        
        
    

SyntaxError: unexpected EOF while parsing (<ipython-input-724-fc1b16ff47e4>, line 8)

In [431]:
# def rec(node):
#     return [[each,parents[each]] for each in parents[node]]

[(1, 2), (1, 3), (2, 4), (2, 5), (3, 5), (6, 5), (7, 5), (1, 6), (3, 6)]

{2: {1: 2}, 3: {1: 3}, 4: {2: 4}, 5: {2: 5, 3: 5, 6: 5, 7: 5}, 6: {1: 6, 3: 6}}

{1: None, 2: (1,), 3: (1,), 4: (2,), 5: (2, 3, 6, 7), 6: (1, 3), 7: None}

KeyError: (1,)

In [1140]:
## Don't modify
paths = {k:{v:k for v in vals} for k,vals in parents.items() if vals is not None}
paths

{2: {1: 2}, 3: {1: 3}, 4: {2: 4}, 5: {2: 5, 3: 5, 6: 5, 7: 5}, 6: {1: 6, 3: 6}}

In [None]:
def pathTrack3(startNode):
    """
    Traverse all possible paths for a given node
    """
    traversed = []
    #print(print(traversed)) 
    i = 0 #Start a counter to add paths as necessary
    if paths.get(startNode) == None: # Check if the node has parents. If not, there is no path, leave as is.
        #print(startNode) # For debugging
        traversed.append([startNode]) #Add the parent node.
        #i += 1 #Not needed here (?)
    else:
        for k,node in list(paths[startNode].items()): #For each parent node, traverse its parents nodes until no parent node is reached.
            traversed.append([node,k]) # Starts a new list with the parent and starting nodes.
            while parents.get(k) is not None: # Check if the parent node as a parental node
                #print(parents[k],'pth',traversed[i])
                traversed[i] = traversed[i] + parents[k] #Works but adds multiple
                #traversed[i] = traversed[i] + [pathTrack3(each) for each in parents[k]] #parents[k]
                #traversed[i] = traversed[i] + [pths(each) for each in parents[k]] #parents[k]
                k = tuple(parents[k])
            i += 1
    return [_[::-1] for _ in traversed if _ is not None]
    #return traversed
    
def getPaths(node):
    if paths.get(node) == None:
        return [[node]]
    else:
        pths = sum([pathTrack3(_) for _ in parents[node]],[])
        pths = [_ + [node] for _ in pths]
        return pths
    
def getCost(node):
    tmp = AllPossiblePaths[node]
    a = []
    for _ in tmp:
        if len(_) == 1:
            a.append(0)
        else:
            a.append([costsDict[_[i-1],_[i]] for i in range(1,len(_))])
    return a

In [1143]:
startNode = 1
traversed = []
#print(print(traversed))
i = 0
if paths.get(startNode) == None:
    i += 1
    traversed.append([startNode])
else:
    for k,node in list(paths[startNode].items()):
        traversed.append([node,k]) # Initialize with
        while parents.get(k) is not None:
            traversed[i] = traversed[i] + parents[k]
            k = tuple(parents[k])
        i += 1

print([_[::-1] for _ in traversed if _ is not None])

[[1]]


In [1324]:
paths = {k:{v:k for v in vals} for k,vals in parents.items() if vals is not None}
paths

def pathTrack(startNode):
    traversed = []
    #print(print(traversed))
    i = 0
    if paths.get(startNode) == None:
        i += 1
        traversed.append([startNode])
    else:
        for k,node in list(paths[startNode].items()):
            traversed.append([node,k])
            while parents.get(k) is not None:
                traversed[i] = traversed[i] + parents[k]
                k = tuple(parents[k])
            i += 1

    return [_[::-1] for _ in traversed if _ is not None]
    #return traversed
    
def pathTrack2(startNode):
    traversed = []
    #print(print(traversed))
    i = 0
    if paths.get(startNode) == None:
        i += 1
        traversed.append([startNode])
    else:
        for k,node in list(paths[startNode].items()):
            traversed.append([node,k])
            while parents.get(k) is not None:
                traversed[i] = traversed[i] + parents[k]
                k = tuple(parents[k])
            i += 1

    return [_[::-1] for _ in traversed if _ is not None]
    #return traversed

In [1368]:
for k,v in paths.get(5).items():
    if paths.get(k) == None:
        print(k)
    else:
        print(paths.get(k).items())

dict_items([(1, 2)])
dict_items([(1, 3)])
dict_items([(1, 6), (3, 6)])
7


In [1443]:
def recPath(node):
    if paths.get(node) == None:
        return 
    else:
        outList = [node]
        while paths.get(node) is not None:
            node = list(paths.get(node).keys())
            #print(node)
            return node

In [1469]:
anchor = 5
tmp = {k:recPath(k) for k,v in paths.get(anchor).items()}
for k,v in tmp.items():
    if v is not None:
        print(v + [k] + [anchor])
    else:
        print([k] + [anchor])

[1, 2, 5]
[1, 3, 5]
[1, 3, 6, 5]
[7, 5]


{2: 5, 3: 5, 6: 5, 7: 5}

[1, 2]
[1, 3]
[1, 3, 6]
[7]


In [1362]:
trav = []
node = 5
for k,v in paths[node].items():
    i = 0
    trav.append([k])
    while paths.get(k) is not None:
        print(k,paths[k],list(paths[k].keys()))
        trav[i] = trav[i] + [k]
        k = tuple(paths[k].keys())
#        if paths.get(k) is None:
#            trav[i] = trav[i] + k
        i += 1
trav

2 {1: 2} [1]
3 {1: 3} [1]
6 {1: 6, 3: 6} [1, 3]


[[2, 2, 3, 6], [3], [6], [7]]

In [1555]:
def pathTrack3(startNode):
    """
    Traverse all possible paths for a given node
    """
    traversed = []
    #print(print(traversed)) 
    i = 0 #Start a counter to add paths as necessary
    if paths.get(startNode) == None: # Check if the node has parents. If not, there is no path, leave as is.
        #print(startNode) # For debugging
        traversed.append([startNode]) #Add the parent node.
        #i += 1 #Not needed here (?)
    else:
        for k,node in list(paths[startNode].items()): #For each parent node, traverse its parents nodes until no parent node is reached.
            traversed.append([node,k]) # Starts a new list with the parent and starting nodes.
            while parents.get(k) is not None: # Check if the parent node as a parental node
                #print(parents[k],'pth',traversed[i])
                traversed[i] = traversed[i] + parents[k] #Works but adds multiple
                #traversed[i] = traversed[i] + [pathTrack3(each) for each in parents[k]] #parents[k]
                #traversed[i] = traversed[i] + [pths(each) for each in parents[k]] #parents[k]
                k = tuple(parents[k])
            i += 1
    return [_[::-1] for _ in traversed if _ is not None]
    #return traversed
    
def getPaths(node):
    if paths.get(node) == None:
        return [[node]]
    else:
        pths = sum([pathTrack3(_) for _ in parents[node]],[])
        pths = [_ + [node] for _ in pths]
        return pths
    
def getCost(node):
    tmp = AllPossiblePaths[node]
    a = []
    for _ in tmp:
        if len(_) == 1:
            a.append(0)
        else:
            a.append([costsDict[_[i-1],_[i]] for i in range(1,len(_))])
    return a

In [1556]:
AllPossiblePaths = {v:getPaths(v) for v in vertices}
AllPossiblePaths

{1: [[1]],
 2: [[1, 2]],
 3: [[1, 3]],
 4: [[1, 2, 4]],
 5: [[1, 2, 5], [1, 3, 5], [1, 6, 5], [1, 3, 6, 5], [7, 5]],
 6: [[1, 6], [1, 3, 6]],
 7: [[7]]}

In [1565]:
print(AllPossiblePaths[5])
print([sum(_) for _ in getCost(5)])

[[1, 2, 5], [1, 3, 5], [1, 6, 5], [1, 3, 6, 5], [7, 5]]
[14, 3, 9, 7, 16]


In [1509]:
### Get costs
def getCostPaths(whichNode):
    tmpPaths = getPaths(whichNode)
    if len(tmpPaths) == 1:
        return 0
    else:
        return [[costsDict[currentPath[i-1],currentPath[i]] for i in range(1,len(currentPath))] for currentPath in tmpPaths]
    #listCosts = [[costsDict[currentPath[i-1],currentPath[i]] for i in range(1,len(currentPath))] for currentPath in paths]
    

In [1511]:
getCostPaths(5)

[[2, 12], [0, 3], [8, 1], [0, 6, 1], [16]]

In [1507]:
for k,v in AllPossiblePaths.items():
    print(k,v,getCostPaths(k))

1 [1] 0
2 [[1, 2]] 0
3 [[1, 3]] 0
4 [[1, 2, 4]] 0
5 [[1, 2, 5], [1, 3, 5], [1, 6, 5], [1, 3, 6, 5], [7, 5]] [[2, 12], [0, 3], [8, 1], [0, 6, 1], [16]]
6 [[1, 6], [1, 3, 6]] [[8], [0, 6]]
7 [7] 0


In [1537]:
def GetCost(node):
    for _ in AllPossiblePaths[node]:
        if len(_) == 1:
            return 0
        else:
            return [costsDict[(_[i-1],_[i])] for i in range(1,len(_))]



[2, 3]

In [1257]:
pathTrack2(startNode)

[[1, 2, 5], [1, 3, 5], [3, 1, 6, 5], [7, 5]]

In [1270]:
[pathTrack2(_) for _ in parents[5]]

[1] pth [6, 3]


[[[1, 2]], [[1, 3]], [[1, 6], [1, 3, 6]], [[7]]]

In [1156]:
anchor = 5
pths = sum([pathTrack(_) for _ in parents[anchor]],[])
pths
#[ for _ in pths]

IndexError: list index out of range

[[1]]

In [1090]:
### Get costs
def getCostPaths(whichNode):
    paths = getPaths(whichNode)
    if len(paths) == 1:
        return 0
    else:
        return [[costsDict[currentPath[i-1],currentPath[i]] for i in range(1,len(currentPath))] for currentPath in paths]
    #listCosts = [[costsDict[currentPath[i-1],currentPath[i]] for i in range(1,len(currentPath))] for currentPath in paths]
    

In [1157]:
getPaths(2)

1


IndexError: list index out of range

In [1102]:
paths5 = getPaths(1)
if len(paths5) == 1:
    return 0
else:
    [[costsDict[currentPath[i-1],currentPath[i]] for i in range(1,len(currentPath))] for currentPath in paths]

AttributeError: 'list' object has no attribute 'get'

1

In [1139]:
pathTrack2(1)

1


IndexError: list index out of range

In [1170]:
a=[]
print(a)
a.append([3,4])
print(a)
a.append(['b'])
print(a)

[]
[[3, 4]]
[[3, 4], ['b']]


In [1317]:
def pathTrack3(startNode):
    """
    Traverse all possible paths for a given node
    """
    traversed = []
    #print(print(traversed)) 
    i = 0 #Start a counter to add paths as necessary
    if paths.get(startNode) == None: # Check if the node has parents. If not, there is no path, leave as is.
        #print(startNode) # For debugging
        traversed.append([startNode]) #Add the parent node.
        #i += 1 #Not needed here (?)
    else:
        for k,node in list(paths[startNode].items()): #For each parent node, traverse its parents nodes until no parent node is reached.
            traversed.append([node,k]) # Starts a new list with the parent and starting nodes.
            while parents.get(k) is not None: # Check if the parent node as a parental node
                #print(parents[k],'pth',traversed[i])
                traversed[i] = traversed[i] + parents[k] #Works but adds multiple
                #traversed[i] = traversed[i] + [pathTrack3(each) for each in parents[k]] #parents[k]
                #traversed[i] = traversed[i] + [pths(each) for each in parents[k]] #parents[k]
                k = tuple(parents[k])
            i += 1
    return [_[::-1] for _ in traversed if _ is not None]
    #return traversed

In [1320]:
for _ in pathTrack2

SyntaxError: invalid syntax (<ipython-input-1320-3c3f9f7e73ae>, line 1)

In [1284]:
def pathToNode(node):
    tmp = [pathTrack3(_) for _ in parents[node]]
    #return [sum(_,[]) + [node] for _ in tmp]

In [1285]:
node=6
pathToNode(node)

[[1, 6], [1, 3, 6]]

In [1295]:
node=5
pathToNode(node)

TypeError: can only concatenate list (not "int") to list

In [1323]:
[pathTrack(_) for _ in parents[5]]


IndexError: list index out of range

In [1255]:
pathTrack3(5)

[1] pth [5, 2]
[1] pth [5, 3]
[1, 3] pth [5, 6]


[[1, 2, 5], [1, 3, 5], [3, 1, 6, 5], [7, 5]]

In [1186]:
pathTrack(5)

[[1, 2, 5], [1, 3, 5], [3, 1, 6, 5], [7, 5]]

In [1181]:
paths.get(5)

{2: 5, 3: 5, 6: 5, 7: 5}

In [1161]:
paths.get(5)

{2: 5, 3: 5, 6: 5, 7: 5}

In [1162]:
parents[5]

[2, 3, 6, 7]

In [959]:
print(_,pathTrack(_)) for _ in parents[anchor]:
    

2 [[1, 2]]
3 [[1, 3]]
6 [[1, 6], [1, 3, 6]]
7 []


In [776]:
parents[3]

[1]

[]

In [1244]:
traversed = []
#print(print(traversed))
i = 0
for k,node in list(paths[5].items()):
    traversed.append([node,k])
    visited = []
    while parents.get(k) is not None:
        traversed[i] = traversed[i] + pathparents[k]
        k = tuple(parents[k])
    i += 1

traversed

TypeError: unhashable type: 'list'

In [793]:
parents[2]

[1]

In [1146]:
visited = set()
for node in parents[5]:
    print(visited, node)
    if node not in visited:
        print(parents[node])
    visited.add(parents[node])
visited

set() 2
[1]


TypeError: unhashable type: 'list'

In [1145]:
anchor = 5
for node in parents[anchor]:
    print(node)
    path = [anchor]
    visited = Graph.copy()
    while len(visited) > 0:
        if each[1]==anchor and each[2]==node:
            path.append(each[1])

2


KeyboardInterrupt: 

[(1, 2, 2),
 (1, 3, 0),
 (1, 6, 8),
 (2, 4, 3),
 (2, 5, 12),
 (3, 5, 3),
 (3, 6, 6),
 (6, 5, 1),
 (7, 5, 16)]

In [1238]:
parents[5]

[2, 3, 6, 7]

In [1144]:
def pths(anchor):
    return [[anchor,each] + parents[each] for each in parents[anchor] if parents[each] is not None]
anchor=5
pths(anchor)

[[5, 2, 1], [5, 3, 1], [5, 6, 1, 3]]

In [1239]:
PosPaths = {k:(v,pths(k)) for k,v in parents.items() if v is not None}   
PosPaths

{2: ([1], []),
 3: ([1], []),
 4: ([2], [[4, 2, 1]]),
 5: ([2, 3, 6, 7], [[5, 2, 1], [5, 3, 1], [5, 6, 1, 3]]),
 6: ([1, 3], [[6, 3, 1]])}

In [938]:
for _ in parents[5]:
    print(_,parents[_])

2 [1]
3 [1]
6 [1, 3]
7 None


In [952]:
anchor=5
for _ in parents[anchor]:
    

[2, 3, 6, 7]

2 [[1, 2]]
3 [[1, 3]]
6 [[1, 6], [1, 3, 6]]
7 []
