# Chapter 4

BA4A: Translate an RNA String into an Amino Acid String

In [4]:
from Bio.Seq import Seq
from Bio.Alphabet import generic_rna

def translate(RNA):
    messenger_rna = Seq(RNA, generic_rna)
    print messenger_rna.translate()
    
translate("AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA")

MAMAPRTEINSTRING*


BA4B: Find Substrings of a Genome Encoding a Given Amino Acid String

In [1]:
import re
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna

def translate(dna):
    coding_dna = Seq(dna, generic_dna)
    return coding_dna.translate()

def match(aminoacid, inaminoacid):
    occurances = [m.start() for m in re.finditer(inaminoacid, aminoacid)]
    return occurances

def revseq(aminoacid, DNA, occurances):
    seqlen = len(aminoacid) * 3
    for i in occurances:
        print DNA[i:i+seqlen]

dna = "ATGGCCATGGCCCCCAGAACTGAGATCAATAGTACCCGTATTAACGGGTGA"
inaminoacid = "MA"
aminoacid = translate(dna)
revseq(inaminoacid, dna, match(str(aminoacid), inaminoacid))

ATGGCC
GGCCAT


BA4C: Generate the Theoretical Spectrum of a Cyclic Peptide

In [3]:
import numpy as np
from itertools import cycle,islice
values={'G':57,'A':71,'S':87,'P':97,'V':99,'T':101,'C':103,'I':113,'L':113,'N':114,'D':115,'K':128,'Q':128,'E':129,'M':131,'H':137,'F':147,'R':156,'Y':163,'W':186}

def getTotalMass(peptidestr):
    total = 0
    for i in peptidestr:
        total+=values[i]
    return total

def getTheoreticalSpectrum(peptide):
    peptides=[]
    values=[]
    for i in range (1,len(peptide)):
        for j in range(len(peptide)):
            c = cycle(peptide)
            val=list( islice(c,j,j+i))
            peptides.append(''.join(val))
            values.append(getTotalMass(val))
    peptides.append(peptide)
    values.append(getTotalMass(peptide))
    values.append(0)
    values.sort()
    for i in range(len(values)):
        if i == 0:
            print str(0),
            continue;
        print str(values[i]),

peptidecode = 'LEQN'
getTheoreticalSpectrum(peptidecode)

0 113 114 128 129 227 242 242 257 355 356 370 371 484


BA4D: Compute the Number of Peptides of Given Total Mass

In [5]:
values={'G':57,'A':71,'S':87,'P':97,'V':99,'T':101,'C':103,'I':113,'L':113,'N':114,'D':115,'K':128,'Q':128,'E':129,'M':131,'H':137,'F':147,'R':156,'Y':163,'W':186}
val = set(values.values())

def peptideCounter(M):
    counts = {0:1}
    for i in range(M-min(val)+1):
        j = i+min(val)
        for v in val:
            if j-v in counts:
                counts[j] = counts.get(j,0)+counts[j-v]
    print counts[M]
peptideCounter(1024)

14712706211


BA4E: Find a Cyclic Peptide with Theoretical Spectrum Matching an Ideal Spectrum

BA4F: Compute the Score of a Cyclic Peptide Against a Spectrum

In [14]:
import numpy as np
from itertools import cycle,islice
values={'G':57,'A':71,'S':87,'P':97,'V':99,'T':101,'C':103,'I':113,'L':113,'N':114,'D':115,'K':128,'Q':128,'E':129,'M':131,'H':137,'F':147,'R':156,'Y':163,'W':186}

def getTotalMass(peptidestr):
    total = 0
    for i in peptidestr:
        total+=values[i]
    return total

def getTheoreticalSpectrum(peptide, spectrum):
    peptides=[]
    values=[]
    for i in range (1,len(peptide)):
        for j in range(len(peptide)):
            c = cycle(peptide)
            val=list( islice(c,j,j+i))
            peptides.append(''.join(val))
            values.append(getTotalMass(val))
    peptides.append(peptide)
    values.append(getTotalMass(peptide))
    values.append(0)
    values.sort()
    score = 0
    if len(values) > len(spectrum):
        for i in values:
            if i in spectrum:
                score = score + 1
    else:
        for i in spectrum:
            if i in values:
                score = score + 1
    print score
peptidecode = 'LEQN'
spectrum = [0, 99, 113, 114, 128, 227, 257, 299, 355, 356, 370, 371, 484]
getTheoreticalSpectrum(peptidecode, spectrum)

11


BA4G: Implement LeaderboardCyclopeptideSequencing

In [None]:
import sys
from collections import defaultdict
peptide_to_mass = {'':-1, 'L':113, 'E':129, 'Q':128, 'N':114, 'G':57,'A':71,'S':87,'P':97,'V':99,'T':101,'C':103,'I':113,'D':115,'K':128,'M':131,'H':137,'F':147,'R':156,'Y':163,'W':186}
n = 10
spectrum = [0, 71, 113, 129, 147, 200, 218, 260, 313, 331, 347, 389, 460]
leaderboard = []
leaderboard.append("")
leaderscore = 0
leaderpeptide = ""

def cut(leaderboard):
    scores = []
    for lead in leaderboard:
        scores.append(getscore(lead))
    scores.sort()
    nthhighestscore = -1
    if len(scores)==0:
        nthhighestscore = 0
    elif len(scores)>=n:
        nthhighestscore = scores[len(scores)-n]
    else:
        nthhighestscore = scores[0]
    out = []
    for lead in leaderboard:
        if getscore(lead)>=nthhighestscore:
            out.append(lead)
    return out

def expandpeps(leaderboard):
    out = []
    for i in range(len(leaderboard)):
        curr = leaderboard[i]
        for key,value in peptide_to_mass.iteritems():
            out.append(curr+key)
    return out

def getmass(peptide):
    totalmass = 0
    for pep in peptide:
        totalmass += peptide_to_mass[pep]
    return totalmass

def getscore(peptide):
    peptidespectrum = defaultdict(list)
    peptidespectrum = GenTheoriticalSpec(peptide)
    total_score = 0
    for key,value in peptidespectrum.iteritems():
        has = False
        for val in spectrum:
            if val==value:
                has = True
                break
        if has == True:
            total_score += 1
    return total_score

def GenTheoriticalSpec(string):
    dic = {'':-1, 'L':113, 'E':129, 'Q':128, 'N':114, 'G':57,'A':71,'S':87,'P':97,'V':99,'T':101,'C':103,'I':113,'D':115,'K':128,'M':131,'H':137,'F':147,'R':156,'Y':163,'W':186}
    length = len(string)
    alist = defaultdict(list)
    for i in range(length):
        for j in range(i,length):
            subpeptide = ""
            subpeptide = string[i:j + 1]
            if j==length-1 and len(subpeptide)<length:
                for k in range(length-len(subpeptide)):
                    if len(subpeptide)!=length:
                        sm = 0
                        for ii in range(len(subpeptide)):
                            sm = sm + dic[subpeptide[ii]]
                        alist[subpeptide].append(sm)
                    subpeptide+=string[k]
            if len(subpeptide)!=length:
                sm = 0
                for ii in range(len(subpeptide)):
                    sm = sm + dic[subpeptide[ii]]
                alist[subpeptide].append(sm)
    sm= 0
    for ii in range(len(string)):
        sm = sm + dic[string[ii]]
    alist[string].append(sm)
    alist['def'].append(0)
    return alist
while len(leaderboard)!=0:
    leaderboard = expandpeps(leaderboard)
    print(len(leaderboard))
    try:
        for i in range(0,len(leaderboard)):
            peptide = leaderboard[i]
            currmass = getmass(peptide)
            if currmass == spectrum[len(spectrum)-1]:
                currscore = getscore(peptide)
                if currscore > leaderscore:
                    leaderpeptide = peptide
                    leaderscore = currscore
            elif currmass > spectrum[len(spectrum)-1]:
                leaderboard.remove(peptide)
    except:
        asda = "as"
    leaderboard = cut(leaderboard)

21
441
9261
190512
2829729


BA4H: Generate the Convolution of a Spectrum

In [2]:
import sys
import operator
from collections import defaultdict

def convolute(spectrum):
    dic = {}
    for i in range(len(spectrum)):
        for j in range(len(spectrum)):
            if (spectrum[i]-spectrum[j]>=0):
                dic[spectrum[i]-spectrum[j]] = 0
    for i in range(len(spectrum)):
        for j in range(len(spectrum)):
            if (spectrum[i]-spectrum[j]>=0):
                dic[spectrum[i]-spectrum[j]] += 1
    sorted(dic.items())
    for key,value in dic.iteritems():
        if key > 0:
            for i in range(value):
                sys.stdout.write("%d "%(key))

spectrum = [0, 137, 186, 323]
convolute(spectrum)

137 137 186 186 323 49 

BA4I: Implement ConvolutionCyclopeptideSequencing

BA4J: Generate the Theoretical Spectrum of a Linear Peptide

In [4]:
def TheoreticalSpectrum(peptide, pep_to_mass):
    spectrum = []
    for i in range(1,len(peptide)+1):
        for j in range(len(peptide)-i+1):
            kmer = peptide[j:j+i]
            mass_sum = 0
            for c in kmer:
                mass_sum += pep_to_mass[c]
            spectrum.append(mass_sum)
    spectrum.append(0)
    spectrum.sort()
    print spectrum
    
peptide = "NQEL"
pep_to_mass = {'L':113, 'E':129, 'Q':128, 'N':114, 'G':57,'A':71,'S':87,'P':97,'V':99,'T':101,'C':103,'I':113,'D':115,'K':128,'M':131,'H':137,'F':147,'R':156,'Y':163,'W':186}
TheoreticalSpectrum(peptide, pep_to_mass)

[0, 113, 114, 128, 129, 242, 242, 257, 370, 371, 484]


BA4K: Compute the Score of a Linear Peptide

In [6]:
def TheoreticalSpectrum(peptide, pep_to_mass, values):
    spectrum = []
    for i in range(1,len(peptide)+1):
        for j in range(len(peptide)-i+1):
            kmer = peptide[j:j+i]
            mass_sum = 0
            for c in kmer:
                mass_sum += pep_to_mass[c]
            spectrum.append(mass_sum)
    spectrum.append(0)
    spectrum.sort()
    score = 0
    if len(values) > len(spectrum):
        for i in values:
            if i in spectrum:
                score = score + 1
    else:
        for i in spectrum:
            if i in values:
                score = score + 1
    print score

values = [0, 99, 113, 114, 128, 227, 257, 299, 355, 356, 370, 371, 484]
peptide = "NQEL"
pep_to_mass = {'L':113, 'E':129, 'Q':128, 'N':114, 'G':57,'A':71,'S':87,'P':97,'V':99,'T':101,'C':103,'I':113,'D':115,'K':128,'M':131,'H':137,'F':147,'R':156,'Y':163,'W':186}
TheoreticalSpectrum(peptide, pep_to_mass, values)

8


BA4L: Trim a Peptide Leaderboard 

In [8]:
peptides = ["LAST", "ALST", "TLLT", "TQAS"]
spectrum2 = [0, 71, 87, 101, 113, 158, 184, 188, 259, 271, 372]
keep = 2
pep_to_mass = {'L':113, 'E':129, 'Q':128, 'N':114, 'G':57,'A':71,'S':87,'P':97,'V':99,'T':101,'C':103,'I':113,'D':115,'K':128,'M':131,'H':137,'F':147,'R':156,'Y':163,'W':186}

def findscorepeptide(peptide,spectrum2):
    spectrum = []
    for i in range(1,len(peptide)+1):
        for j in range(len(peptide)-i+1):
            kmer = peptide[j:j+i]
            mass_sum = 0
            for c in kmer:
                mass_sum += pep_to_mass[c]
            if mass_sum not in spectrum:
                spectrum.append(mass_sum)
    spectrum.append(0)
    count = 0
    for mass1 in spectrum:
        for mass2 in spectrum2:
            if mass1==mass2:
                count+=1
    return count

peptide_score = {}
for pep in peptides:
    peptide_score[pep] = 0
for pep in peptides:
    peptide_score[pep] = findscorepeptide(pep,spectrum2)
sorted_values = sorted(peptide_score.values())
count = 0
target = 0
for i in range(len(sorted_values)-1,-1,-1):
    count+=1
    if count==keep:
        target = i
final_ans = []
for key,value in peptide_score.iteritems():
    if value>=sorted_values[target]:
        final_ans.append(key)
print final_ans

['ALST', 'LAST']


BA4M: Solve the Turnpike Problem