In [None]:
# a function which takes in a peptide and 
# outputs the theoretical spectrum of that peptide
# where "theoretical spectrum" is the collection of all of the masses of its subpeptides, 
# in addition to the mass 0 and the mass of the entire peptide, with masses ordered from smallest to largest.

def CyclicSpectrum(peptide):
  amino_acid_table = {'G': 57, 'A': 71, 'S': 87, 'P': 97, 'V': 99, 'T': 101, 'C': 103, 'I': 113, 'L': 113, 'N': 114, 'D': 115, 'K': 128, 'Q': 128, 'E': 129, 'M': 131, 'H': 137, 'F': 147, 'R': 156, 'Y': 163, 'W': 186}
  prefix_mass = [0] * (len(peptide)+1)

  # prefix_mass is the list of mass of 1st, 1st+2nd, 1st+2nd+3rd,...1st+...+last
  for i in range(1,len(peptide)+1):
    prefix_mass[i] = prefix_mass[i-1] + amino_acid_table[peptide[i-1]]
  
  # take the value of the total mass of the peptide and store it in a var  
  peptide_mass = max(prefix_mass)
  cyclic_spectrum = [0]
  for i in range(0, len(peptide)):
    for j in range(i + 1,len(peptide)+1):
      cyclic_spectrum.append(prefix_mass[j] - prefix_mass[i])

    # this here part accounts for the cyclic nature of the peptide
      if i > 0 and j < len(peptide):
        cyclic_spectrum.append(peptide_mass - (prefix_mass[j] - prefix_mass[i]))

  cyclic_spectrum.sort()        
  return cyclic_spectrum
 

In [None]:
# a function which takes in a peptide
# and spits out its linear spectrum
# where "linear spectrum" is a list of the integer masses of the subpeptides

def LinearSpectrum(peptide):
    
  amino_acid_table = {'G': 57, 'A': 71, 'S': 87, 'P': 97, 'V': 99, 'T': 101, 'C': 103, 'I': 113, 'L': 113, 'N': 114, 'D': 115, 'K': 128, 'Q': 128, 'E': 129, 'M': 131, 'H': 137, 'F': 147, 'R': 156, 'Y': 163, 'W': 186}
  prefix_mass = [0] * (len(peptide)+1)

  # prefix_mass is the list of mass of 1st, 1st+2nd, 1st+2nd+3rd,...1st+...+last
  for i in range(1,len(peptide)+1):
    prefix_mass[i] = prefix_mass[i-1] + amino_acid_table[peptide[i-1]]

  linear_spectrum = [0]
  for i in range(0, len(peptide)):
    for j in range(i + 1,len(peptide)+1):
      linear_spectrum.append(prefix_mass[j] - prefix_mass[i])

  
  linear_spectrum.sort()        
  return linear_spectrum
 

In [None]:
# a function which takes in a peptide (as a string)
# and it spits out its mass

def mass(peptide):
  # a dictionary with name of amino acid as a key and its int mass as a value
  # the empty peptide is denoted by '0' - zero, and has mass 0
  amino_acid_mass = {'G': 57, 'A': 71, 'S': 87, 'P': 97, 'V': 99, 'T': 101, 'C': 103, 'I': 113, 'L': 113, 'N': 114, 'D': 115, 'K': 128, 'Q': 128, 'E': 129, 'M': 131, 'H': 137, 'F': 147, 'R': 156, 'Y': 163, 'W': 186}
  # split peptide into amino acids
  peptide = list(peptide)
  mass = 0
  for i in peptide:
    mass = mass + amino_acid_mass[i]
    
  return mass  

In [None]:
# a function which checks if a list is a sublist of another list
def is_sublist(sub_list, list_):
    for item in sub_list:
      if sub_list.count(item) > list_.count(item):
        return False
    return True

In [None]:
# Input: a k-long peptide and a list of amino acids
# Output: the newly formed (k+1)-long peptides, formed by peptide + amino acd from list
def expand(peptide, amino_list):
  new_peptides = []
  for a in amino_list:
    new_peptides.append(peptide+a)
    
  return new_peptides  

In [None]:
def cyclopeptide_sequencing(spectrum):

  # initial list of candidate peptides
  candidate_peptides = ['G', 'A', 'S', 'P', 'V', 'T', 'C', 'I', 'L', 'N', 'D', 'K', 'Q', 'E', 'M', 'H', 'F', 'R', 'Y', 'W']
  # a place to store the single amino acids, encountered in the spectrum
  single_aminos = []
  good_peptides = []
  final_peptides = []

  # use this to check if it's the first iteration of the while loop
  i = 0
  while len(candidate_peptides) != 0:
    i = i + 1
    # if this is the first iteration, then we just check which single amino acids are in
    if i == 1:   
      for peptide in candidate_peptides:
        # check if the mass of the investigated peptide is the max in the spectrum
        if mass(peptide) == max(spectrum):
        # if so, check if its spectrum coincides with the final spectrum
          if sorted(CyclicSpectrum(peptide)) == sorted(Spectrum) and peptide not in final_peptides:
            final_peptides.append(peptide)

        # check if the linear spectrum of the peptide is consistent with spectrum  
        elif is_sublist(LinearSpectrum(peptide), spectrum):
          good_peptides.append(peptide)      
      # store in candidate_peptides only the longest, 'good' peptides
      candidate_peptides = good_peptides[::]
      # store single aminos to use to generate new peptides with these aminos
      single_aminos = good_peptides[::]
      good_peptides = []   
      
    # for every following iteration
    else:
      for peptide in candidate_peptides:
        new_peptides = expand(peptide, single_aminos)
        for p in new_peptides:
          # check if the mass of the investigated peptide is the max in the spectrum
          if mass(p) == max(spectrum):
            
          # if so, check if its spectrum coincides with the final spectrum
            if sorted(CyclicSpectrum(p)) == sorted(spectrum) and p not in final_peptides:
              final_peptides.append(p)

          # check if the linear spectrum of the peptide is consistent with spectrum    
          elif is_sublist(LinearSpectrum(p), spectrum):
            good_peptides.append(p)
            
      candidate_peptides = good_peptides[::]
      good_peptides = [] 


  final_peptides_num = []
  for f in final_peptides:
    f = list(f)
    s = ''
    for z in range(len(f)):
      if z == len(f)-1:
        s = s + str(mass(f[z]))
      else:
        s = s + str(mass(f[z])) + '-'

    final_peptides_num.append(s)
  final_peptides_num = set(final_peptides_num)
  for w in final_peptides_num:
    print(w)
       
       

In [None]:
cyclopeptide_sequencing([0, 113, 128, 186, 241, 299, 314, 427])

In [None]:
lst = [0, 87, 99, 101, 103, 113, 113, 115, 128, 131, 156, 156, 190, 214, 214, 230, 231, 232, 241, 243, 269, 269, 271, 318, 331, 344, 345, 345, 346, 358, 370, 370, 382, 397, 431, 444, 446, 457, 461, 474, 483, 500, 501, 501, 510, 559, 560, 587, 587, 588, 589, 600, 602, 611, 613, 614, 688,689, 691, 700, 702, 713, 714, 715, 715, 742, 743, 792, 801, 801, 802, 819, 828, 841, 845, 856, 858, 871, 905, 920, 932, 932, 944, 956, 957, 957, 958, 971, 984, 1031, 1033, 1033, 1059, 1061, 1070, 1071, 1072, 1088, 1088, 1112, 1146, 1146, 1171, 1174, 1187, 1189, 1189, 1199, 1201, 1203, 1215, 1302]

In [None]:
cyclopeptide_sequencing(lst)

131-99-115-156-87-103-128-113-156-113-101
113-156-113-128-103-87-156-115-99-131-101
99-131-101-113-156-113-128-103-87-156-115
113-128-103-87-156-115-99-131-101-113-156
87-156-115-99-131-101-113-156-113-128-103
103-87-156-115-99-131-101-113-156-113-128
128-113-156-113-101-131-99-115-156-87-103
115-156-87-103-128-113-156-113-101-131-99
113-101-131-99-115-156-87-103-128-113-156
156-113-101-131-99-115-156-87-103-128-113
156-115-99-131-101-113-156-113-128-103-87
99-115-156-87-103-128-113-156-113-101-131
101-113-156-113-128-103-87-156-115-99-131
131-101-113-156-113-128-103-87-156-115-99
87-103-128-113-156-113-101-131-99-115-156
115-99-131-101-113-156-113-128-103-87-156
113-156-113-101-131-99-115-156-87-103-128
128-103-87-156-115-99-131-101-113-156-113
103-128-113-156-113-101-131-99-115-156-87
101-131-99-115-156-87-103-128-113-156-113
156-113-128-103-87-156-115-99-131-101-113
156-87-103-128-113-156-113-101-131-99-115
