In [None]:
#!/usr/bin/env python3

########################################################################
# File: problem17.ipynb

# Author: Nicholas Chan
# History: 11/15/2021 Created
########################################################################

# Assignment 5: problem 17
<br>
For this assignment, we were to find the theoretical spectrum of a cyclic peptide given a linear peptide <br>
This could be done in a way similar to that of problem 16. However, now we must travel past the end of <br>
the peptide and wrap around to the beginning to find subsequences only found with a cyclic representation. <br>
These additional subsequences found only in the cyclic representation along with those found from the <br>
linear representation will have their masses compiled into a theoretical spectrum corresponding to<br>
a the cyclopeptide representation of the input peptide.

# Spectrum Class:
<br>
The Spectrum class can be used to obtain the theoretical spectrum of a cyclopeptide <br>
given just its linear peptide representation. The theoretical spectrum is a collection <br>
of the masses of all subsequences found in a peptide. For the theoretical spectrum of <br>
a cyclic peptide, this implies the inclusion of subseqs only found by wrapping over <br>
the ends and coming over to the beginning of the linear peptide sequence.<br>

In [5]:
class Spectrum:
    '''
    Spectrum object takes in a linear peptide sequence as input. Input 
    should be in the form of a list of strings corresponding to amino acids.
    A cyclic representation of the peptide is given by doubling it. This cyclic 
    representation is used for finding the theoretical spectrum of the linear peptide's
    cyclic form.
    '''
    # Amino acid to its integer mass table
    AA = {'G': 57, 'A': 71, 'S': 87, 'P': 97, 'V': 99, 'T': 101, 'C': 103,
          'I': 113, 'L': 113, 'N': 114, 'D': 115, 'K': 128, 'Q': 128, 'E': 129,
          'M': 131, 'H': 137, 'F': 147, 'R': 156, 'Y': 163, 'W': 186}
    
    def __init__(self,pepSeq):
        '''
        Initialize Spectrum object with linear peptide sequence and cyclic representation
        of linear peptide sequence.
        '''
        self.pepSeq = pepSeq
        self.cyclicPep = self.pepSeq*2
    
    def mass(self, seq):
        '''
        mass is a small method used to convert lists of amino acids (strings) into
        lists of amino acid masses (int). Useful for finding theoretical spectrum
        '''
        return sum([self.AA[nuc] for nuc in seq])
    
    def findSpec(self):
        ''' 
        Given a linear peptide, return its circular theoretical spectrum as a list of ints 
        '''
        specList = [0, self.mass(self.pepSeq)] # List for all subpep masses, where subpep come from circularized input pep
        for start in range(len(self.pepSeq)): # For each position in linear pep, 
            for end in range(1,len(self.pepSeq)): # generate substrings of len 1 to len(pep)-1
                specList.append(self.mass(self.cyclicPep[start : start+end]))
        return sorted(specList)

# Main Function
<br>
Parses an input text file as a string which the Spectrum class requires as input. <br>
Input is assumed to be a string. Output is printed to stdout. <br>

In [8]:
def main(infile, outfile='', inCL=None):
    '''
    main function parses in data specified by infile and creates 
    a peptide string. main function then creates a Spectrum object called 
    mySpec to generate output corresponding to the theoretical spectrum 
    of the cyclic representation of the linear peptide given as input.
    Output is in the form of a string representing the theoretical spectrum.
    '''
    with open(infile,'r') as myfile:
        pep = myfile.readline().rstrip()
        mySpec = Spectrum(pep)
        specList = [str(mass) for mass in mySpec.findSpec()]
        outstring = " ".join(specList)
        print(outstring)

if __name__ == "__main__":
    main("data/rosalind_ba4c.txt")

0 57 71 87 97 103 113 114 114 115 115 115 128 128 137 171 184 186 201 202 210 212 217 218 229 241 242 243 258 265 281 301 315 316 323 325 329 332 332 338 355 357 373 380 388 396 428 429 438 442 444 447 451 452 453 460 494 499 502 525 539 542 544 556 557 559 566 566 567 575 597 613 630 639 653 654 654 657 670 670 672 680 681 696 712 728 743 757 767 767 768 769 783 784 785 785 795 809 824 840 856 871 872 880 882 882 895 898 898 899 913 922 939 955 977 985 986 986 993 995 996 1008 1010 1013 1027 1050 1053 1058 1092 1099 1100 1101 1105 1108 1110 1114 1123 1124 1156 1164 1172 1179 1195 1197 1214 1220 1220 1223 1227 1229 1236 1237 1251 1271 1287 1294 1309 1310 1311 1323 1334 1335 1340 1342 1350 1351 1366 1368 1381 1415 1424 1424 1437 1437 1437 1438 1438 1439 1449 1455 1465 1481 1495 1552


In [7]:
# INSPECTION

# INSPECTION TEAM
# Jodi Lee
# Nabil Mohammed

# RESPONSES
# - Use the amino acid to mass table that was recommended
# - Write more markdown comments
# - Clean up code
# - Add more docstrings and inline comments

# CORRECTIONS
# - Used amino acid to mass table
# - Wrote markdown comments
# - Cleaned code
# - Added more docstrings and inline comments