In [219]:
'''
CODE CHALLENGE: Construct the graph of a spectrum.
     Given: A space-delimited list of integers Spectrum.
     Return: Graph(Spectrum).
'''
AminoAcid='GASPVTCILNDKQEMHFRYW'
AminoAcidMass=[57,71,87,97,99,101,103,113,113,114,115,128,128,129,131,137,147,156,163,186]
prot={'A': 71, 'C': 103, 'E': 129, 'D': 115, 'G': 57, 'F': 147, 'I': 113, 'H': 137, 'K': 128, 'M': 131, 'L': 113, 'N': 114, 'Q': 128, 'P': 97, 'S': 87, 'R': 156, 'T': 101, 'W': 186, 'V': 99, 'Y': 163}


def Graph(Spectrum):
    
    '''
    Construct the graph of a spectrum.
    Given: A space-delimited list of integers Spectrum.
    Return: Graph(Spectrum).
    
    INPUT=[57,71,154,185,301,332,415,429,486]
    OUTPUT=[[0, 57, 'G'], [0, 71, 'A'], [57, 154, 'P'], [57, 185, 'K'], [71, 185, 'N'], [154, 301, 'F'], [185, 332, 'F'], [301, 415, 'N'], [301, 429, 'K'], [332, 429, 'P'], [415, 486, 'A'], [429, 486, 'G']]
    '''
    result=[]
    prot={'A': 71, 'C': 103, 'E': 129, 'D': 115, 'G': 57, 'F': 147, 'I': 113, 'H': 137, 'K': 128, 'M': 131, 'L': 113, 'N': 114, 'Q': 128, 'P': 97, 'S': 87, 'R': 156, 'T': 101, 'W': 186, 'V': 99, 'Y': 163}
    for n in range(len(spec)):
        for i in range(n+1,len(spec)):
            diff=spec[i]-spec[n]
            if diff in prot.values():
                result+=[[spec[n],spec[i],[k for k,v in prot.items() if v==diff][0]]]
                #print str(spec[n])+'->'+str(spec[i])+':'+str([k for k,v in prot.items() if v==diff][0])
    return result
#print Graph(spec)

#### tests ####


spec=[57,71,154,185,301,332,415,429,486]
'''
f=open('input.txt','r')
#f=open('../../Downloads/dataset_11813_2.txt','r')
f=open('../../Downloads/rosalind_ba11a.txt','r')
lines=f.read().splitlines()
f.close()
spec=map(int,lines[0].split(' '))
'''


spec.insert(0,0)
#print spec

print Graph(spec)

for item in Graph(spec):
    entry=str(item[0])+'->'+str(item[1])+':'+str(item[2])
    #print entry

[[0, 57, 'G'], [0, 71, 'A'], [57, 154, 'P'], [57, 185, 'K'], [71, 185, 'N'], [154, 301, 'F'], [185, 332, 'F'], [301, 415, 'N'], [301, 429, 'K'], [332, 429, 'P'], [415, 486, 'A'], [429, 486, 'G']]


In [221]:
'''
Decoding an Ideal Spectrum Problem: Reconstruct a peptide from its ideal spectrum.
     Input: A collection of integers Spectrum.
     Output: An amino acid string Peptide that explains Spectrum.
'''

'''
DecodingIdealSpectrum(Spectrum)
     construct Graph(Spectrum)
     for each path Path from source to sink in Graph(Spectrum)
          Peptide ← the amino acid string spelled by the edge labels of Path
          if IdealSpectrum(Peptide) = Spectrum
                return Peptide
'''

def DecodingIdealSpectrum(Spectrum):
    
    '''
     Reconstruct a peptide from its ideal spectrum.
     INPUT=[57,71,154,185,301,332,415,429,486] # A collection of integers Spectrum.
     OUTPUT=GPFNA # An amino acid string Peptide that explains Spectrum.
    '''
    ###
    def find_all_paths(graph, start, end,path=[]):
        path = path + [start]
        if start == end:
            return [path]
        if not graph.has_key(start):
            return []
        paths = []
        for node in graph[start]:
            if node not in path:
                newpaths = find_all_paths(graph, node, end, path)
                for newpath in newpaths:
                    paths.append(newpath)
        return paths
    
    
    def SpellPeptidefomPath(path,graph):
        n=1
        peptide=''
        while n<len(path):
            peptide+= [index[2] for index in graph if [index[0],index[1]]==[path[n-1],path[n]]][0]
            n+=1
        return peptide
            
    def IdealSpectrum(Peptide):
        ''' Generate linear spectrum of peptide'''
        prot={'A': 71, 'C': 103, 'E': 129, 'D': 115, 'G': 57, 'F': 147, 'I': 113, 'H': 137, 'K': 128, 'M': 131, 'L': 113, 'N': 114, 'Q': 128, 'P': 97, 'S': 87, 'R': 156, 'T': 101, 'W': 186, 'V': 99, 'Y': 163}
        LinearSpectrum=[]
        for n in range(len(Peptide)):
            prefix= Peptide[:n]
            suffix= Peptide[n:]
            LinearSpectrum += [sum([prot[k] for k in prefix])]
            LinearSpectrum += [sum([prot[k] for k in suffix])]
        return sorted(LinearSpectrum)

    ###
    #construct Graph(Spectrum)
    graph=Graph(Spectrum)
    dag={}
    for item in graph:
        if item[0] in dag.keys():
            dag[item[0]]+=[item[1]]
        else:
            dag[item[0]]=[item[1]]
    Paths=find_all_paths(dag, Spectrum[0], Spectrum[-1]) #find all path paths from source to sink in Graph(Spectrum)
    for path in Paths:
        Peptide = SpellPeptidefomPath(path,graph)
        ideal= IdealSpectrum(Peptide)
        #print ideal, Spectrum
        if IdealSpectrum(Peptide) == Spectrum:
            return Peptide
            break
    
    
#### TEST ####

spec=[57,71,154,185,301,332,415,429,486]


#f=open('input.txt','r')
#f=open('../../Downloads/dataset_11813_4.txt','r')
f=open('../../Downloads/rosalind_ba11b.txt','r')
lines=f.read().splitlines()
f.close()
spec=map(int,lines[0].split(' '))

spec.insert(0,0)

print DecodingIdealSpectrum(spec)

PTVWGRCHRMSMVAYMCGHNIWCIFFAHWF


In [225]:
'''CODE CHALLENGE: Solve the Converting a Peptide into a Peptide Vector Problem.
     Given: An amino acid string P.
     Return: The peptide vector of P (in the form of space-separated integers).
'''


def PeptideToVector(Peptide,prot=[]):
    
    '''
    Convert a peptide into a binary peptide vector.
    INPUT = XZZXX # A peptide P.
    OUTPUT = 0001000010000100010001 # The peptide vector of P.
    prot={'X':4,'Z':5}
    '''

    vector_len=sum([prot[n] for n in peptide])
    vector=vector_len*'0'
    for n in range(1,len(peptide)+1):
        mass=sum([prot[n] for n in peptide[:n]])
        vector=vector[0:mass-1]+'1'+vector[mass:]
    return vector

peptide='XZZXX'
prot={'X':4,'Z':5}

#peptide='TFPRGPHSPRVVDLRCCKQMNDHKSIDWKYSLYFM'
#prot={'A': 71, 'C': 103, 'E': 129, 'D': 115, 'G': 57, 'F': 147, 'I': 113, 'H': 137, 'K': 128, 'M': 131, 'L': 113, 'N': 114, 'Q': 128, 'P': 97, 'S': 87, 'R': 156, 'T': 101, 'W': 186, 'V': 99, 'Y': 163}

'''
#f=open('input.txt','r')
#f=open('../../Downloads/dataset_11813_6.txt','r')
f=open('../../Downloads/rosalind_ba11c.txt','r')
lines=f.read().splitlines()
f.close()
peptide=lines[0]
'''
#print peptide
#print PeptideVector(peptide,prot=prot)
print ' '.join(list(PeptideToVector(peptide,prot=prot)))

0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0 0 0 1


In [228]:
def VectorToPeptide(Vector,prot=[]):
    '''
    Convert a peptide vector into a peptide.
    INPUT='0001000010000100010001'
    OUTPUT=XZZXX
    prot={'X':4,'Z':5}
    '''
    peptides=[0]+[i+1 for i, ltr in enumerate(Vector) if ltr == '1']
    Peptide=[]
    try:
        for n in range(len(peptides)):
            pep=int(peptides[n+1])-int(peptides[n])
            Peptide+=[k for k,v in prot.items() if v == pep]
    except IndexError:
        pass
    return ''.join(Peptide)


vector='0001000010000100010001'
prot={'X':4,'Z':5}


prot={'A': 71, 'C': 103, 'E': 129, 'D': 115, 'G': 57, 'F': 147, 'H': 137, 'M': 131, 'L': 113, 'N': 114, 'Q': 128, 'P': 97, 'S': 87, 'R': 156, 'T': 101, 'W': 186, 'V': 99, 'Y': 163}

#f=open('input.txt','r')
#f=open('../../Downloads/dataset_11813_8.txt','r')
f=open('../../Downloads/rosalind_ba11d.txt','r')
lines=f.read().splitlines()
f.close()
vector=''.join(lines[0].split())
#print vector



print VectorToPeptide(vector,prot=prot)

#print [k for k,v in prot.items() if v==pep for pep in peptides]

MLPGDLNLLVVACDVSETLL


In [229]:
def PeptideSequencing(Spectrum_v,prot):
    
    '''Given a spectral vector, find a peptide with maximum score against this spectrum. 
    !!! Add an extra 0 before all the values in the spectral vector input
    spectral vector=[0, 0, 0, 4, -2, -3, -1, -7, 6, 5, 3, 2, 1, 9, 3, -8, 0, 3, 1, 2, 1, 8]
    prot={'X':4,'Z':5}
    
    INPUT=[0, 0, 0, 0, 4, -2, -3, -1, -7, 6, 5, 3, 2, 1, 9, 3, -8, 0, 3, 1, 2, 1, 8]
    OUTPUT= XZZXX
    '''
    
    def VectorToPeptide(peptides,prot=[]):
        
        '''
        Convert a peptide vector into a peptide.
        prot={'X':4,'Z':5}
        INPUT=[0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1]
        OUTPUT=XZZXX
        '''
        
        Peptide=[]
        try:
            for n in range(len(peptides)):
                pep=int(peptides[n+1])-int(peptides[n])
                Peptide+=[k for k,v in prot.items() if v == pep]
        except IndexError:
            pass
        return ''.join(Peptide)

    def SpectrumVectortoDAG(Spectrum_v):
        
        '''
        Build the graph starting at node 1, where each node has a list of edges. 
        Each edges consists of the [parentNode,aminoAcidLetterToGetThere].
        In the sample input, the only edge for node 4 would be [0,'X'].
        
        INPUT= [0, 0, 0, 0, 4, -2, -3, -1, -7, 6, 5, 3, 2, 1, 9, 3, -8, 0, 3, 1, 2, 1, 8]
        OUTPUT={4: [[0, 'X']], 5: [[0, 'Z']], 8: [[4, 'X']], 9: [[4, 'Z'], [5, 'X']], 10: [[5, 'Z']], 12: [[8, 'X']], 13: [[8, 'Z'], [9, 'X']], 14: [[9, 'Z'], [10, 'X']], 15: [[10, 'Z']], 16: [[12, 'X']], 17: [[12, 'Z'], [13, 'X']], 18: [[13, 'Z'], [14, 'X']], 19: [[14, 'Z'], [15, 'X']], 20: [[15, 'Z'], [16, 'X']], 21: [[16, 'Z'], [17, 'X']], 22: [[17, 'Z'], [18, 'X']]}
        '''
        ### Build dag
        dag={}
        for n in range(1,len(Spectrum_v)):
            for i in range(len(Spectrum_v)):
                if n-i in prot.values():
                    if n in dag.keys():
                        dag[n]+=[[i,[k for k,v in prot.items() if n-i==v][0]]]
                    else:
                        dag[n]=[[i,[k for k,v in prot.items() if n-i==v][0]]]
        #clean dag by removing nodes with no incoming edges
        for node in dag.keys():
            dag[node]=[item for item in dag[node] if item[0] in [0]+dag.keys()]
            if len(dag[node])==0:
                del dag[node]
        return dag

    #############################################################################
    
    ###init
    dag=SpectrumVectortoDAG(Spectrum_v)
    best_score={}
    best_score[0]=0
    backtrack={}
    backtrack[0]=[0]
    ### Evaluate each node, starting at node 1 to determine its total score and "bold edges" (best parent edges). And record best backtrack
    for n in range(len(dag.keys())):
        node=dag.keys()[n]
        if len(dag[node])==1:
            parent_node=dag[node][0][0]
            best_score[node]=best_score[parent_node] +Spectrum_v[node]
            backtrack[node]=backtrack[parent_node]+[node]
        else:
            if dag[node]:
                score={}
                for parent_node in dag[node]:
                    score[parent_node[0]]=best_score[parent_node[0]] +Spectrum_v[node]
                max_score=max(score.values())
                best_score[node]=max_score
                best_parent=[k for k,v in score.items() if v==max_score][0]
                backtrack[node]=backtrack[best_parent]+[node]

    return VectorToPeptide(backtrack[End],prot=prot)


#f=open('input.txt','r')
#f=open('input2.txt','r')
#f=open('../../Downloads/dataset_11813_10.txt','r')
f=open('../../Downloads/rosalind_ba11e.txt','r')
lines=f.read().splitlines()
f.close()
Spectrum_v=map(int,lines[0].split())
Start=0
End=len(Spectrum_v)
Spectrum_v=[0]+Spectrum_v
#print Spectrum_v

#prot={'X':4,'Z':5}
prot={'A': 71, 'C': 103, 'E': 129, 'D': 115, 'G': 57, 'F': 147, 'H': 137, 'M': 131, 'L': 113, 'N': 114, 'Q': 128, 'P': 97, 'S': 87, 'R': 156, 'T': 101, 'W': 186, 'V': 99, 'Y': 163}


print PeptideSequencing(Spectrum_v,prot)

#print VectorToPeptide(vector,prot=prot)
#print [k for k,v in prot.items() if v==pep for pep in peptides]

VPGCSGPGLS
