In [77]:
'''Cyclopeptide Scoring Problem: Compute the score of a cyclic peptide against a spectrum.
     Input: An amino acid string Peptide and a collection of integers Spectrum. 
     Output: The score of Peptide against Spectrum, Score(Peptide, Spectrum).

CODE CHALLENGE: Solve the Cyclopeptide Scoring Problem.'''

def CircularSpectrum_string(Peptide, AminoAcid, AminoAcidMass):
    ''' generate circular spectrum of peptide
    when peptide input is under the string form:
    EXAMPLE: peptide NQEL = [114,128,129,113] or 'NQEL' '''
        
    PrefixMass={}
    PrefixMass[0]=0
    for i in range(1,len(Peptide)+1):
        for j in range(len(AminoAcid)):
            if AminoAcid[j] == Peptide[i-1]:
                PrefixMass[i] = PrefixMass[i-1] + AminoAcidMass[j]
    peptideMass = PrefixMass[len(Peptide)]

    CircularSpectrum=[0]
    for i in range(len(Peptide)):
        for j in range(i+1,len(Peptide)+1):
            CircularSpectrum.append(PrefixMass[j]-PrefixMass[i])
            if i > 0 and j < len(PrefixMass)-1:
                CircularSpectrum.append(peptideMass -(PrefixMass[j]-PrefixMass[i]))
    return sorted(CircularSpectrum)


def CircularSpectrum_list(Peptide, AminoAcid, AminoAcidMass):
    ''' generate circular spectrum of peptide
    when peptide input is under the list form:
    EXAMPLE: peptide NQEL = [0,114,128,129,113] '''

    from collections import defaultdict
    PrefixMass={}
    PrefixMass[0]=0
    for i in range(1,len(Peptide)):
        for j in range(len(AminoAcid)):
            if AminoAcid[j] == Peptide[i]:
                PrefixMass[i] = PrefixMass[i-1] + AminoAcidMass[j]
    peptideMass = PrefixMass[len(Peptide)-1]

    CircularSpectrum=[0]
    for i in range(len(Peptide)-1):
        for j in range(i+1,len(Peptide)):
            CircularSpectrum.append(PrefixMass[j]-PrefixMass[i])
            if i > 0 and j < len(PrefixMass)-1:
                CircularSpectrum.append(peptideMass -(PrefixMass[j]-PrefixMass[i]))
    return sorted(CircularSpectrum)
    
def CircularPeptideScore(Peptide,Spectrum,AminoAcid, AminoAcidMass):
    ''' score of cyclic peptide against a spectrum'''
    floatSpectrum=Spectrum[:]
    score=0
    for spike in CircularSpectrum_string(peptide, AminoAcid, AminoAcidMass):
        if spike in floatSpectrum:
            floatSpectrum.remove(spike)
            score+=1
    return score


'''challenge '''
f=open('../../Downloads/rosalind_ba4f.txt','r')
lines=f.read().splitlines()
f.close()
Spectrum=map(int,lines[1].split(' '))
peptide=lines[0]

#peptide='VYYEVDWTMGRQIDPDEYPIAQCTRHRATILTLPDWQM'
#lines='0 71 71 87 87 97 97 99 101 101 101 101 103 113 113 113 113 113 115 115 115 115 128 128 129 129 131 131 137 147 156 156 156 163 163 163 163 172 184 186 186 199 204 210 212 212 214 214 214 216 218 226 227 228 230 231 232 243 244 244 257 259 260 260 262 273 276 278 278 281 285 287 292 293 293 301 302 303 312 314 319 325 326 327 327 327 327 328 331 332 341 358 360 364 373 374 374 375 377 389 390 391 393 394 398 402 403 407 407 409 415 416 418 425 428 429 440 440 441 441 444 445 449 456 464 465 472 475 488 490 492 494 497 499 502 503 503 504 504 505 505 512 516 517 520 521 521 531 540 541 544 550 554 556 559 560 569 570 572 573 577 578 587 592 592 601 603 605 612 613 617 618 618 619 621 621 622 625 628 634 634 653 655 657 659 661 669 671 672 675 678 680 684 688 691 696 701 704 705 707 714 715 716 716 718 721 722 723 724 732 734 735 743 748 749 749 756 768 769 775 776 781 781 785 791 792 799 800 804 806 808 809 816 819 822 822 825 829 829 830 835 836 836 843 846 847 850 852 852 870 872 877 879 887 890 895 900 904 905 905 906 912 913 919 919 921 923 928 931 932 935 937 938 942 942 944 947 948 948 953 959 961 965 976 985 985 985 992 999 1005 1008 1013 1015 1016 1020 1024 1024 1028 1032 1035 1036 1036 1036 1043 1046 1049 1050 1051 1061 1061 1062 1062 1066 1068 1069 1073 1075 1076 1089 1090 1098 1100 1114 1117 1122 1123 1131 1132 1133 1133 1133 1137 1137 1137 1139 1141 1148 1149 1152 1155 1160 1162 1162 1163 1174 1174 1176 1179 1183 1191 1198 1199 1201 1201 1204 1219 1220 1224 1225 1229 1232 1232 1234 1244 1245 1248 1250 1250 1252 1252 1253 1261 1261 1262 1263 1264 1265 1273 1280 1288 1296 1302 1302 1305 1312 1313 1316 1316 1318 1325 1332 1335 1335 1339 1345 1347 1349 1350 1351 1354 1354 1360 1362 1363 1363 1367 1369 1376 1377 1387 1388 1391 1392 1392 1393 1397 1403 1410 1415 1417 1425 1425 1428 1433 1434 1440 1444 1451 1460 1463 1464 1464 1464 1464 1464 1465 1469 1475 1477 1479 1488 1491 1492 1497 1501 1502 1505 1510 1520 1525 1525 1526 1526 1530 1531 1534 1540 1540 1543 1547 1548 1556 1561 1564 1564 1566 1577 1578 1579 1588 1590 1590 1592 1593 1597 1603 1606 1619 1623 1626 1627 1627 1627 1628 1634 1635 1637 1637 1638 1639 1641 1648 1648 1653 1662 1665 1671 1674 1676 1677 1681 1689 1693 1693 1695 1703 1706 1708 1719 1724 1724 1727 1729 1734 1735 1738 1740 1742 1742 1750 1752 1752 1753 1754 1754 1756 1760 1763 1765 1766 1778 1784 1785 1790 1792 1794 1804 1804 1806 1808 1821 1821 1823 1832 1837 1837 1837 1851 1853 1853 1853 1855 1857 1857 1862 1865 1866 1866 1867 1871 1871 1875 1879 1879 1883 1885 1889 1890 1891 1897 1905 1908 1916 1919 1923 1928 1933 1936 1941 1941 1949 1952 1964 1966 1966 1966 1966 1967 1968 1968 1977 1980 1984 1984 1985 1988 1992 2000 2004 2004 2009 2010 2012 2020 2020 2022 2028 2031 2034 2034 2036 2037 2041 2046 2051 2052 2053 2056 2065 2079 2079 2081 2081 2087 2091 2093 2097 2097 2105 2111 2113 2115 2116 2122 2123 2124 2129 2131 2133 2135 2138 2139 2140 2147 2148 2152 2153 2159 2160 2165 2168 2168 2169 2178 2180 2184 2187 2192 2197 2198 2206 2206 2209 2215 2224 2226 2237 2237 2240 2244 2244 2244 2247 2250 2253 2255 2260 2263 2264 2266 2268 2269 2277 2280 2286 2287 2293 2293 2294 2296 2297 2300 2301 2307 2309 2310 2311 2315 2315 2324 2334 2337 2339 2341 2352 2354 2356 2359 2369 2378 2378 2382 2383 2384 2386 2392 2393 2396 2397 2399 2400 2400 2406 2407 2413 2416 2424 2425 2427 2429 2430 2433 2438 2440 2443 2446 2449 2449 2449 2453 2456 2456 2467 2469 2478 2484 2487 2487 2495 2496 2501 2506 2509 2513 2515 2524 2525 2525 2528 2533 2534 2540 2541 2545 2546 2553 2554 2555 2558 2560 2562 2564 2569 2570 2571 2577 2578 2580 2582 2588 2596 2596 2600 2602 2606 2612 2612 2614 2614 2628 2637 2640 2641 2642 2647 2652 2656 2657 2659 2659 2662 2665 2671 2673 2673 2681 2683 2684 2689 2689 2693 2701 2705 2708 2709 2709 2713 2716 2725 2725 2726 2727 2727 2727 2727 2729 2741 2744 2752 2752 2757 2760 2765 2770 2774 2777 2785 2788 2796 2802 2803 2804 2808 2810 2814 2814 2818 2822 2822 2826 2827 2827 2828 2831 2836 2836 2838 2840 2840 2840 2842 2856 2856 2856 2861 2870 2872 2872 2885 2887 2889 2889 2899 2901 2903 2908 2909 2915 2927 2928 2930 2933 2937 2939 2939 2940 2941 2941 2943 2951 2951 2953 2955 2958 2959 2964 2966 2969 2969 2974 2985 2987 2990 2998 3000 3000 3004 3012 3016 3017 3019 3022 3028 3031 3040 3045 3045 3052 3054 3055 3056 3056 3058 3059 3065 3066 3066 3066 3067 3070 3074 3087 3090 3096 3100 3101 3103 3103 3105 3114 3115 3116 3127 3129 3129 3132 3137 3145 3146 3150 3153 3153 3159 3162 3163 3167 3167 3168 3168 3173 3183 3188 3191 3192 3196 3201 3202 3205 3214 3216 3218 3224 3228 3229 3229 3229 3229 3229 3230 3233 3242 3249 3253 3259 3260 3265 3268 3268 3276 3278 3283 3290 3296 3300 3301 3301 3302 3305 3306 3316 3317 3324 3326 3330 3330 3331 3333 3339 3339 3342 3343 3344 3346 3348 3354 3358 3358 3361 3368 3375 3377 3377 3380 3381 3388 3391 3391 3397 3405 3413 3420 3428 3429 3430 3431 3432 3432 3440 3441 3441 3443 3443 3445 3448 3449 3459 3461 3461 3464 3468 3469 3473 3474 3489 3492 3492 3494 3495 3502 3510 3514 3517 3519 3519 3530 3531 3531 3533 3538 3541 3544 3545 3552 3554 3556 3556 3556 3560 3560 3560 3561 3562 3570 3571 3576 3579 3593 3595 3603 3604 3617 3618 3620 3624 3625 3627 3631 3631 3632 3632 3642 3643 3644 3647 3650 3657 3657 3657 3658 3661 3665 3669 3669 3673 3677 3678 3680 3685 3688 3694 3701 3708 3708 3708 3717 3728 3732 3734 3740 3745 3745 3746 3749 3751 3751 3755 3756 3758 3761 3762 3765 3770 3772 3774 3774 3780 3781 3787 3788 3788 3789 3793 3798 3803 3806 3814 3816 3821 3823 3841 3841 3843 3846 3847 3850 3857 3857 3858 3863 3864 3864 3868 3871 3871 3874 3877 3884 3885 3887 3889 3893 3894 3901 3902 3908 3912 3912 3917 3918 3924 3925 3937 3944 3944 3945 3950 3958 3959 3961 3969 3970 3971 3972 3975 3977 3977 3978 3979 3986 3988 3989 3992 3997 4002 4005 4009 4013 4015 4018 4021 4022 4024 4032 4034 4036 4038 4040 4059 4059 4065 4068 4071 4072 4072 4074 4075 4075 4076 4080 4081 4088 4090 4092 4101 4101 4106 4115 4116 4120 4121 4123 4124 4133 4134 4137 4139 4143 4149 4152 4153 4162 4172 4172 4173 4176 4177 4181 4188 4188 4189 4189 4190 4190 4191 4194 4196 4199 4201 4203 4205 4218 4221 4228 4229 4237 4244 4248 4249 4252 4252 4253 4253 4264 4265 4268 4275 4277 4278 4284 4286 4286 4290 4291 4295 4299 4300 4302 4303 4304 4316 4318 4319 4319 4320 4329 4333 4335 4352 4361 4362 4365 4366 4366 4366 4366 4367 4368 4374 4379 4381 4390 4391 4392 4400 4400 4401 4406 4408 4412 4415 4415 4417 4420 4431 4433 4433 4434 4436 4449 4449 4450 4461 4462 4463 4465 4466 4467 4475 4477 4479 4479 4479 4481 4481 4483 4489 4494 4507 4507 4509 4521 4530 4530 4530 4530 4537 4537 4537 4546 4556 4562 4562 4564 4564 4565 4565 4578 4578 4578 4578 4580 4580 4580 4580 4580 4590 4592 4592 4592 4592 4594 4596 4596 4606 4606 4622 4622 4693'

#peptide='NQEL'
#peptide=[0,114,128,129,113]
#Spectrum=[0,99,113,114,128,227,257,299,355,356,370,371,484]


AminoAcid='GASPVTCILNDKQEMHFRYW'
AminoAcidMass=[57,71,87,97,99,101,103,113,113,114,115,128,128,129,131,137,147,156,163,186]


#AminoAcid='GASPVTCLNDQEMHFRYW'
#AminoAcidMass=[57,71,87,97,99,101,103,113,114,115,128,129,131,137,147,156,163,186]
#'''
#AminoAcid=[57,71,87,97,99,101,103,113,114,115,128,129,131,137,147,156,163,186]
#AminoAcidMass=[57,71,87,97,99,101,103,113,114,115,128,129,131,137,147,156,163,186]


print len(AminoAcid),len(AminoAcidMass)


print PeptideScore(peptide,Spectrum,AminoAcid,AminoAcidMass)



20 20
633


In [137]:
''' CHALLENGE: Compute the Score of a Linear Peptide '''


def LinearSpectrum_list(Peptide, AminoAcid, AminoAcidMass):
    ''' generate circular spectrum of peptide
    when peptide input is under the list form:
    EXAMPLE: peptide NQEL = [0,114,128,129,113] '''
    PrefixMass={}
    PrefixMass[0]=0
    for i in range(1,len(Peptide)):
        for j in range(len(AminoAcid)):
            if AminoAcid[j] == Peptide[i]:
                #print i, j,PrefixMass[i-1],AminoAcidMass[j]
                PrefixMass[i] = PrefixMass[i-1] + AminoAcidMass[j]
                #print PrefixMass[i]
    LinearSpectrum=[0]
    for i in range(len(Peptide)-1):
        for j in range(i+1,len(Peptide)):
            LinearSpectrum.append(PrefixMass[j]-PrefixMass[i])
    return sorted(LinearSpectrum)

def LinearSpectrum_string(Peptide, AminoAcid, AminoAcidMass):
    ''' generate circular spectrum of peptide
    when peptide input is under the string form:
    EXAMPLE: peptide NQEL = [114,128,129,113] or 'NQEL' '''
    PrefixMass={}
    PrefixMass[0]=0
    for i in range(len(Peptide)):
        for j in range(len(AminoAcid)):
            if AminoAcid[j] == Peptide[i]:
                PrefixMass[i+1] = PrefixMass[i] + AminoAcidMass[j]
    #print PrefixMass

    LinearSpectrum=[0]
    for i in range(len(Peptide)):
        for j in range(i+1,len(Peptide)+1):
            LinearSpectrum.append(PrefixMass[j]-PrefixMass[i])
    return sorted(LinearSpectrum)

def LinearPeptideScore(Peptide,Spectrum,AminoAcid,AminoAcidMass):
    ''' score of cyclic peptide against a spectrum'''
    floatSpectrum=Spectrum[:]
    score=0
    for spike in LinearSpectrum_string(Peptide, AminoAcid, AminoAcidMass):
        if spike in floatSpectrum:
            floatSpectrum.remove(spike)
            score+=1
    return score


   
AminoAcid='GASPVTCILNDKQEMHFRYW'
AminoAcidMass=[57,71,87,97,99,101,103,113,113,114,115,128,128,129,131,137,147,156,163,186]

'''
AminoAcid='GASPVTCLNDQEMHFRYW'
AminoAcidMass=[57,71,87,97,99,101,103,113,114,115,128,129,131,137,147,156,163,186]
'''
'''
AminoAcid=[57,71,87,97,99,101,103,113,114,115,128,129,131,137,147,156,163,186]
AminoAcidMass=[57,71,87,97,99,101,103,113,114,115,128,129,131,137,147,156,163,186]
'''

f=open('../../Downloads/rosalind_ba4k.txt','r')
lines=f.read().splitlines()
f.close()
Peptide=lines[0]
Spectrum=map(int,lines[1].split(' '))

#Peptide='NQEL'
#Peptide=[0,114,128,129,113]
#Spectrum = [0,99,113,114,128,227,257,299,355,356,370,371,484]

#print peptide
#print Spectrum
#print LinearSpectrum_string(Peptide, AminoAcid, AminoAcidMass)
print LinearPeptideScore(Peptide,Spectrum,AminoAcid,AminoAcidMass)

175


In [8]:
''' Using class functions '''
from Bioinf_functions import StringPeptide,ListPeptide

#peptide=ListPeptide([0,114,128,129,113])
#peptide=StringPeptide('PEEP')
peptide=StringPeptide('MAMA')
#Spectrum=map(int,'0 97 97 129 129 194 203 226 226 258 323 323 323 355 403 452'.split(" "))
Spectrum=map(int,'0 71 178 202 202 202 333 333 333 404 507 507'.split(" "))
#peptide=StringPeptide([114,128,129,113])
print peptide.AminoAcid
print peptide.AminoAcidMass
#peptide.AminoAcid=peptide.AminoAcidMass[:]
#print peptide.AminoAcid
print len(peptide.string)
print peptide.description
print peptide.string
print peptide.CircularSpectrum()
print peptide.LinearSpectrum()
print peptide.CircularPeptideScore(Spectrum)
print peptide.LinearPeptideScore(Spectrum)  


GASPVTCILNDKQEMHFRYW
[57, 71, 87, 97, 99, 101, 103, 113, 113, 114, 115, 128, 128, 129, 131, 137, 147, 156, 163, 186]
4
Peptide represented by a string of letters for each aa
MAMA
[0, 71, 71, 131, 131, 202, 202, 202, 202, 273, 273, 333, 333, 404]
[0, 71, 71, 131, 131, 202, 202, 202, 273, 333, 404]
8
7


In [1]:
''' CHALLENGE: Trim a Peptide Leaderboard '''

#Leaderboard=[[0, 57], [0, 71], [0, 87], [0, 97], [0, 99], [0, 101], [0, 103], [0, 113], [0, 114], [0, 115], [0, 128], [0, 129], [0, 131], [0, 137], [0, 147], [0, 156], [0, 163], [0, 186]]
#Spectrum=[0,71,87,101,113,158,184,188,259,271,372]
#N=2
#AminoAcidNames=[57,71,87,97,99,101,103,113,114,115,128,129,131,137,147,156,163,186]
#AminoAcidMass=[57,71,87,97,99,101,103,113,114,115,128,129,131,137,147,156,163,186]

#Leaderboard=['LAST','ALST','TLLT','TQAS']
#Spectrum=[0,71,87,101,113,158,184,188,259,271,372]
#N=2
AminoAcidNames='GASPVTCILNDKQEMHFRYW'
AminoAcidMass=[57,71,87,97,99,101,103,113,113,114,115,128,128,129,131,137,147,156,163,186]

#print LinearPeptideScore('TQAS',Spectrum,AminoAcidNames, AminoAcidMass)  


f=open('../../Downloads/rosalind_ba4l.txt','r')
lines=f.read().splitlines()
f.close()
Leaderboard=lines[0].split(' ')
Spectrum=map(int,lines[1].split(' '))
N=int(lines[2])


def LinearPeptideScore(Peptide,Spectrum,AminoAcid,AminoAcidMass):
    ''' score of cyclic peptide against a spectrum'''
    floatSpectrum=Spectrum[:]
    score=0
    for spike in LinearSpectrum_string(Peptide, AminoAcid, AminoAcidMass):
        if spike in floatSpectrum:
            floatSpectrum.remove(spike)
            score+=1
    return score

def Trim(Leaderboard, Spectrum, N,AminoAcid, AminoAcidMass):
    import pandas as pd
    #from Bioinf_functions import LinearSpectrum_string

    PepScoredic = {}
    for j in range(len(Leaderboard)):
        Peptide = Leaderboard[j]
        PepScoredic['-'.join(map(str,Peptide))]=LinearPeptideScore(Peptide,Spectrum,AminoAcid,AminoAcidMass)
    df=pd.DataFrame(list(PepScoredic.iteritems()),columns=['Peptide','Score'])
    df=df.sort(columns='Score',ascending=False)
    df.index = range(len(df)) #reset index
    for j in range (N,len(Leaderboard)):
        if df.Score[j]< df.Score[N-1]:
            df2=df[df.index < j]
            break
            
    return [''.join(item.split('-')) for item in list(df[df.index < j].T.itertuples())[0][1:]]
            

        #map(int,item.split('-'))


print ' '.join(Trim(Leaderboard, Spectrum, N,AminoAcidNames, AminoAcidMass))                   

NameError: global name 'LinearSpectrum_string' is not defined

In [6]:
''' TRIM: Using class functions '''
Leaderboard=['LAST','ALST','TLLT','TQAS']
Spectrum=[0,71,87,101,113,158,184,188,259,271,372]
N=2
AminoAcidNames=[57,71,87,97,99,101,103,113,114,115,128,129,131,137,147,156,163,186]
AminoAcidMass=[57,71,87,97,99,101,103,113,114,115,128,129,131,137,147,156,163,186]


def Trim(Leaderboard, Spectrum, N,AminoAcidNames, AminoAcidMass):
    import pandas as pd
    from Bioinf_functions import StringPeptide
    
    PepScoredic = {}
    for j in range(len(Leaderboard)):
        Peptide = StringPeptide(Leaderboard[j])
        Peptide.AminoAcid= AminoAcidNames[:]
        Peptide.AminoAcidMass = AminoAcidMass[:]
        
        PepScoredic['-'.join(map(str,Peptide.string))]=LinearPeptideScore.Peptide
    df=pd.DataFrame(list(PepScoredic.iteritems()),columns=['Peptide','Score'])
    df=df.sort(columns='Score',ascending=False)
    df.index = range(len(df)) #reset index
    for j in range (N,len(Leaderboard)):
        if df.Score[j]< df.Score[N-1]:
            df2=df[df.index < j]
            break
            
    return [map(int,item.split('-')) for item in list(df[df.index < j].T.itertuples())[0][1:]]

print Trim(Leaderboard, Spectrum, N,AminoAcidNames, AminoAcidMass)

['TQAS', 'TLLT']


In [80]:
'''LEADERBOARDCYCLOPEPTIDESEQUENCING.'''

'''Leaderboard ← set containing only the empty peptide
        LeaderPeptide ← empty peptide
        while Leaderboard is non-empty
            Leaderboard ← Expand(Leaderboard)
            for each Peptide in Leaderboard
                if Mass(Peptide) = ParentMass(Spectrum)
                    if Score(Peptide, Spectrum) > Score(LeaderPeptide, Spectrum)
                        LeaderPeptide ← Peptide
                else if Mass(Peptide) > ParentMass(Spectrum)
                    remove Peptide from Leaderboard
            Leaderboard ← Trim(Leaderboard, Spectrum, N)
        output LeaderPeptide'''

''' RAW '''

###########
def Expand(Peptides,AminoAcidNames):
    '''
    Expand each peptides by adding one aa
    '''
    NewPeptides=[]
    for n in range(len(Peptides)):
        peptide=Peptides[n]
        #print n,peptide, peptide[-1]
        for aa in AminoAcidNames:
            #print peptide
            newpeptide=peptide[:]
            newpeptide.append(aa)
            #print newpeptide
            NewPeptides.append(newpeptide)
    return NewPeptides

#####
def CircularSpectrum(Peptide, AminoAcid, AminoAcidMass):
    ''' generate circulat spectrum of peptide '''
    from collections import defaultdict
    PrefixMass={}
    PrefixMass[0]=0
    for i in range(1,len(Peptide)):
        for j in range(len(AminoAcid)):
            if AminoAcid[j] == Peptide[i]:
                #print i, j,PrefixMass[i-1],AminoAcidMass[j]
                PrefixMass[i] = PrefixMass[i-1] + AminoAcidMass[j]
    #print len(Peptide), PrefixMass
    peptideMass = PrefixMass[len(Peptide)-1]

    CircularSpectrum=[0]
    for i in range(len(Peptide)-1):
        for j in range(i+1,len(Peptide)):
            CircularSpectrum.append(PrefixMass[j]-PrefixMass[i])
            if i > 0 and j < len(PrefixMass)-1:
                CircularSpectrum.append(peptideMass -(PrefixMass[j]-PrefixMass[i]))
    return sorted(CircularSpectrum)
#####

def PeptideScore(Peptide,Spectrum,AminoAcid, AminoAcidMass):
    ''' score of cyclic peptide against a spectrum'''
    floatSpectrum=Spectrum[:]
    score=0
    for spike in CircularSpectrum(Peptide, AminoAcid, AminoAcidMass):
        if spike in floatSpectrum:
            floatSpectrum.remove(spike)
            score+=1
    return score
####
def Trim(Leaderboard, Spectrum, N,AminoAcid, AminoAcidMass):
    import pandas as pd
    def LinearSpectrum(Peptide, AminoAcid, AminoAcidMass):
        ''' Generate linear spectrum of peptide
        need peptides as list of integer'''
        PrefixMass={}
        PrefixMass[0]=0
        for i in range(1,len(Peptide)):
            for j in range(len(AminoAcid)):
                if AminoAcid[j] == Peptide[i]:
                    #print i, j,PrefixMass[i-1],AminoAcidMass[j]
                    PrefixMass[i] = PrefixMass[i-1] + AminoAcidMass[j]
                    #print PrefixMass[i]
        LinearSpectrum=[0]
        for i in range(len(Peptide)-1):
            for j in range(i+1,len(Peptide)):
                LinearSpectrum.append(PrefixMass[j]-PrefixMass[i])
        return sorted(LinearSpectrum)

    def LinearPeptideScore(Peptide,Spectrum,AminoAcid, AminoAcidMass):
        ''' score of Linear peptide against a spectrum'''
        floatSpectrum=Spectrum[:]
        #from Bioinf_functions import LinearSpectrum
        score=0
        for spike in LinearSpectrum(Peptide, AminoAcid, AminoAcidMass):
            if spike in floatSpectrum:
                floatSpectrum.remove(spike)
                score+=1
        return score

    PepScoredic = {}
    for j in range(len(Leaderboard)):
        Peptide = Leaderboard[j]
        PepScoredic['-'.join(map(str,Peptide))]=LinearPeptideScore(Peptide,Spectrum,AminoAcid,AminoAcidMass)
    df=pd.DataFrame(list(PepScoredic.iteritems()),columns=['Peptide','Score'])
    df=df.sort(columns='Score',ascending=False)
    df.index = range(len(df)) #reset index
    for j in range (N,len(Leaderboard)):
        if df.Score[j]< df.Score[N-1]:
            df2=df[df.index < j]
            break
            
    return [map(int,item.split('-')) for item in list(df[df.index < j].T.itertuples())[0][1:]]

###############
AminoAcidNames=[57,71,87,97,99,101,103,113,114,115,128,129,131,137,147,156,163,186]
AminoAcidMass=[57,71,87,97,99,101,103,113,114,115,128,129,131,137,147,156,163,186]
N=10
Spectrum=[0,71,113,129,147,200,218,260,313,331,347,389,460]

f=open('../../Downloads/rosalind_ba4g.txt','r')
#lines=f.read().splitlines()
f.close()
#N=int(lines[0])
#Spectrum=map(int,lines[1].split(' '))


#from Bioinf_functions import Expand
Leaderboard=[[0]]
#while len(Leaderboard) >0:
i=0
LeaderPeptide=[[0]]
LeaderPeptideScore=0
#while i<4:
while len(Leaderboard)>0:
    Leaderboard=Expand(Leaderboard,AminoAcidNames)
    #print Leaderboard
    for peptide in Leaderboard[:]:
        #print CircularSpectrum(peptide, AminoAcidNames, AminoAcidMass)[-1], Spectrum[-1]
        if CircularSpectrum(peptide, AminoAcidNames, AminoAcidMass)[-1] == Spectrum[-1]:
                if PeptideScore(peptide,Spectrum,AminoAcidNames, AminoAcidMass)>LeaderPeptideScore:
                    #print 'ICI: ',PeptideScore(peptide,Spectrum,AminoAcidNames, AminoAcidMass), LeaderPeptideScore, PeptideScore([71,129,113,147],Spectrum,AminoAcidNames, AminoAcidMass)
                    LeaderPeptide=peptide[:]
                    LeaderPeptideScore=PeptideScore(LeaderPeptide,Spectrum,AminoAcidNames, AminoAcidMass)
                    Leaderboard.remove(LeaderPeptide)
        elif sum(peptide) > Spectrum[-1]:
            Leaderboard.remove(peptide)
    #print Leaderboard
    try:
        Leaderboard = Trim(Leaderboard, Spectrum, N,AminoAcidNames, AminoAcidMass)
        #i+=1
    except ValueError:
        print 'oh oh'
        print 'LeaderScore= ','-'.join(map(str,LeaderPeptide[1:])), LeaderPeptideScore, PeptideScore([0,113,147,71,129],Spectrum,AminoAcidNames, AminoAcidMass)
        #print 'BoardScore= ',[PeptideScore(item,Spectrum,AminoAcidNames, AminoAcidMass) for item in Leaderboard]

oh oh
LeaderScore=  129-71-147-113 13 13


In [3]:
 ''' Using class '''
    
'''LEADERBOARDCYCLOPEPTIDESEQUENCING.'''

###############
AminoAcidNames=[57,71,87,97,99,101,103,113,114,115,128,129,131,137,147,156,163,186]
AminoAcidMass=[57,71,87,97,99,101,103,113,114,115,128,129,131,137,147,156,163,186]
#N=10
#Spectrum=[0,71,113,129,147,200,218,260,313,331,347,389,460]

f=open('../../Downloads/rosalind_ba4g.txt','r')
lines=f.read().splitlines()
f.close()
N=int(lines[0])
Spectrum=map(int,lines[1].split(' '))

def LEADERBOARDCYCLOPEPTIDESEQUENCING(N,Spectrum,AminoAcidNames,AminoAcidMass):
    from Bioinf_functions import ListPeptide,Expand,Trim

    Leaderboard=[[0]]
    LeaderPeptide=[[0]]
    LeaderPeptideScore=0

    while len(Leaderboard)>0:
        Leaderboard=Expand(Leaderboard,AminoAcidNames)
        for peptide in Leaderboard[:]:
            
            Peptide= ListPeptide(peptide)
            Peptide.AminoAcid=AminoAcidNames[:]
            Peptide.AminoAcidMass=AminoAcidMass[:]
            
            if Peptide.LinearSpectrum()[-1] == Spectrum[-1]:
                    if Peptide.CircularPeptideScore(Spectrum)>LeaderPeptideScore:
                        LeaderPeptide=Peptide.string
                        LeaderPeptideScore=Peptide.CircularPeptideScore(Spectrum)
                        Leaderboard.remove(LeaderPeptide)
            elif sum(Peptide.string) > Spectrum[-1]:
                Leaderboard.remove(Peptide.string)
        try:
            Leaderboard = Trim(Leaderboard, Spectrum, N,AminoAcidNames, AminoAcidMass)
        except ValueError:
            #print 'End of pipeline'
            return '-'.join(map(str,LeaderPeptide[1:]))

            
print LEADERBOARDCYCLOPEPTIDESEQUENCING(N,Spectrum,AminoAcidNames,AminoAcidMass)            

113-87-113-128-156-113-147-129-103-99-186-103


In [22]:
'''CHALLENGE: Generate the Convolution of a Spectrum 
we define the convolution of a spectrum by taking all positive differences of masses in the spectrum'''
#Spectrum=[0,137,186,323]

f=open('../../Downloads/rosalind_ba4h.txt','r')
lines=f.read().splitlines()
f.close()
Spectrum=map(int,lines[0].split(' '))
Spectrum=sorted(Spectrum)

def Convolution(Spectrum):
    convol=[]
    for i in Spectrum:
        for j in Spectrum:
            if i-j>0:
                convol.append(i-j)
    return ' '.join(map(str,convol))        
    
fa=open('output.txt','w')
fa.write(entry)
fa.close()


In [9]:
''' ConvolutionCyclopeptideSequencing'''
''' Given an experimental spectrum, we first compute the convolution of an experimental spectrum.
We then select the M most frequent elements between 57 and 200 in the convolution to form an
extended alphabet of candidate amino acid masses. In order to be fair, we should include the
top M elements of the convolution "with ties". Finally, we run the algorithm
LeaderboardCyclopeptideSequencing, where the amino acid masses are restricted to this alphabet.
We call this algorithm ConvolutionCyclopeptideSequencing.'''

'''Raw '''

M=20
N=60
#Spectrum=map(int,'0 57 57 71 99 129 137 170 186 194 208 228 265 285 299 307 323 356 364 394 422 493'.split(' '))
Spectrum=map(int,'0 57 118 179 236 240 301'.split(' '))

f=open('input.txt','r')
#lines=f.read().splitlines()
f.close()
#M=int(lines[0])
#N=int(lines[1])
#Spectrum=sorted(map(int,lines[2].split(' ')))
       
def Convolution(Spectrum):
    convol=[]
    for i in Spectrum:
        for j in Spectrum:
            if i-j>0:
                convol.append(i-j)
    return map(int,convol)  

from collections import Counter
import pandas as pd

convol=dict(Counter(Convolution(Spectrum))) #compute the convolution of an experimental spectrum + Counts as dictionary
df=pd.DataFrame(list(convol.iteritems()),columns=['AA','Count']) #Dictionary to dataframe 
df=df.sort(columns='Count',ascending=False) #Sort Dataframe
print Spectrum
print df
# Pick only first M (with ties) amongst the amino acids of weight between 57 and 200
df=df[(df.AA>=57) & (df.AA <=200)]
df.index = range(len(df))
for j in range (M,len(df)):
        if df.Count[j]< df.Count[M-1]:
            df2=df[df.index < j]
            break
PotentialAA= list(df2.T.itertuples())[0][1:]            

#print LeaderBoard

#AminoAcidNames=[57,71,87,97,99,101,103,113,114,115,128,129,131,137,147,156,163,186]
#AminoAcidMass=[57,71,87,97,99,101,103,113,114,115,128,129,131,137,147,156,163,186]

#AminoAcidNames= [item for item in AminoAcidNames if item in LeaderBoard]
#AminoAcidMass= AminoAcidNames[:]

AminoAcidNames= PotentialAA[:]
AminoAcidMass= PotentialAA[:]

#print LEADERBOARDCYCLOPEPTIDESEQUENCING(N,Spectrum,AminoAcidNames,AminoAcidMass)

[0, 57, 118, 179, 236, 240, 301]
     AA  Count
11   61      4
10  122      3
5   179      2
7   118      2
8   183      2
9    57      2
0    65      1
1     4      1
2   236      1
3   301      1
4   240      1
6   244      1


NameError: name 'df2' is not defined

In [106]:
''' CHALLENGE '''


def ConvolutionCyclopeptideSequencing(M,N,Spectrum):

    def Convolution(Spectrum):
        convol=[]
        for i in Spectrum:
            for j in Spectrum:
                if i-j>0:
                    convol.append(i-j)
        return map(int,convol)  

    from collections import Counter
    import pandas as pd
    #from Bioinf_functions import LEADERBOARDCYCLOPEPTIDESEQUENCING
    
    convol=dict(Counter(Convolution(Spectrum[:]))) #compute the convolution of an experimental spectrum + Counts as dictionary
    df=pd.DataFrame(list(convol.iteritems()),columns=['AA','Count']) #Dictionary to dataframe 
    df=df.sort(columns='Count',ascending=False) #Sort Dataframe
    # Pick only first M (with ties) amongst the amino acids of weight between 57 and 200
    df=df[(df.AA>=57) & (df.AA <=200)]
    df.index = range(len(df))
    for j in range (M,len(df)):
            if df.Count[j]< df.Count[M-1]:
                df2=df[df.index < j]
                break
    PotentialAA= list(df2.T.itertuples())[0][1:]

    AminoAcidNames= PotentialAA[:]
    AminoAcidMass= PotentialAA[:]

    return LEADERBOARDCYCLOPEPTIDESEQUENCING(N,Spectrum,AminoAcidNames,AminoAcidMass)

f=open('../../Downloads/rosalind_ba4i.txt','r')
lines=f.read().splitlines()
f.close()
M=int(lines[0])
N=int(lines[1])
Spectrum=sorted(map(int,lines[2].split(' ')))


answer=ConvolutionCyclopeptideSequencing(M,N,Spectrum)
print answer
fa=open('output.txt','w')
#fa.write(answer)
fa.close()

147-137-113-128-147-163-131-115-113-87-97-115-87-128
