Rosalind_18.ipynb problem18 Nicholas Rose BME 205

Cyclopeptide Sequencing Problem:
Given an ideal experimental spectrum, find a cyclic peptide whose theoretical spectrum matches the experimental spectrum.
Given: A collection of (possibly repeated) integers Spectrum corresponding to an ideal experimental spectrum.
Return: Every amino acid string Peptide such that Cyclospectrum(Peptide) = Spectrum (if such a string exists).

In [197]:
class Sequences:
    '''
    Initializes an object(Sequences) from a given hypothetical
    spectrum. Includes a table of all possible amino acid masses.
    '''
    
    table = {57, 71, 87, 97, 99, 101, 103, 
            113, 113, 114, 115, 128, 128, 
            129, 131, 137, 147, 156, 163, 186}
    
    
    def __init__(self, spectrum):
        self.spectrum = spectrum
        self.spectrumSet = set(spectrum)
        self.aminoMassList = []
        
        for i in self.spectrum:
            if i in self.table:
                self.aminoMassList.append(i)
        
        
    def expand(self, peptideList):
        '''
        Method which takes a list of peptides, which are in turn
        lists of amino acids, and returns a new list of peptides
        which are each longer than peptides of the given list by
        one amino acid. The length of the returned list is then n
        times the length of the given list, n being the number of
        possible amino acid additions.
        '''
        
        expandedPeptides = []
        
        for i in peptideList:
            left = self.aminoMassList[:]
            for k in i:
                left.remove(k)
            for j in left:
                start = i[:]
                start.append(j)
                expandedPeptides.append(start)
        return expandedPeptides
    
    
    def createSpectrum(self, peptide):
        '''
        Creates a Cyclopeptide Spectrum from a given peptide.
        '''
        
        spectrum = []
            
        windowSize = 1
        i = 0
        
        while True:
            if i+windowSize <= len(peptide):
                window = peptide[i:i+windowSize]
            else:
                window = (peptide[i:len(peptide)] + 
                          peptide[0:i+windowSize - len(peptide)])

            spectrum.append(sum(window))
            if windowSize == len(peptide):
                break
            i += 1
            if i >= len(peptide):
                windowSize += 1
                i = 0
                
        return spectrum
        

    def CYCLOPEPTIDESEQUENCING(self):
        '''
        Method to produce all possible cyclopeptide sequences from
        a given list of masses. This method takes a given Sequences object's
        peptides attribute. Returns a list of possible cyclopeptides, which
        are in turn lists of amino acids
        '''
        
        peptides = [[]]
        peptidesComplete = []
        
        while len(peptides) > 0:
            peptides = self.expand(peptides)
            
            for peptide in list(peptides):
                if set(self.createSpectrum(peptide)).issubset(self.spectrumSet):
                    if sum(peptide) == sum(self.aminoMassList):
                        if peptide not in peptidesComplete:
                            peptidesComplete.append(peptide)
                        peptides.remove(peptide)
                        
                elif sum(peptide) not in self.spectrumSet:
                    peptides.remove(peptide)
                    
        return peptidesComplete

In [201]:
def main(infile):
    '''
    The main method. This method takes file containing a row with
    a collection of (possibly repeated) integers Spectrum corresponding 
    to an ideal experimental spectrum.
    Prints output into a file named 'rosalind_18.txt.out', 
    containing rows of every amino acid string Peptide such that 
    Cyclospectrum(Peptide) = Spectrum (if such a string exists)
    '''
    
    with open(infile) as f:
        stringList = f.readline().strip().split()
        spectrum = []
        for i in stringList:
            spectrum.append(int(i))
    
    sequences = Sequences(spectrum).CYCLOPEPTIDESEQUENCING()
    sequences = sorted(sequences)
    
    
    with open('rosalind_18.txt.out', 'w') as out:
        print(*sequences[0], end='', sep = "-", file=out)
        for i in sequences[1:]:
            print(' ', end='', file=out)
            print(*i, end='', sep = "-", file=out)
    
    
if __name__ == "__main__":
    main('/home/nick_rose/Downloads/rosalind_ba4e.txt')