Cyclopeptide Scoring Problem: Compute the score of a cyclic peptide against a spectrum.

Input: An amino acid string Peptide and a collection of integers Spectrum.<br>
Output: The score of Peptide against Spectrum, Score(Peptide, Spectrum).<br>
Code Challenge: Solve the Cyclopeptide Scoring Problem.

Sample Input:

    NQEL
    0 99 113 114 128 227 257 299 355 356 370 371 484
Sample Output:

    11

In [2]:
def cyclospectrum(peptide):

    def subpeptides(peptide):
        l = len(peptide)
        ls = []
        looped = peptide + peptide
        for start in range(0, l):
            for length in range(1, l):
                ls.append((looped[start:start + length]))
        ls.append(peptide)
        return ls

    masses = {'A' : 71,
          'R' : 156,
          'N' : 114,
          'D' : 115,
          'C' : 103,
          'E' : 129,
          'Q' : 128,
          'G' : 57,
          'H' : 137,
          'I' : 113,
          'L' : 113,
          'K' : 128,
          'M' : 131,
          'F' : 147,
          'P' : 97,
          'S' : 87,
          'T' : 101,
          'W' : 186,
          'Y' : 163,
          'V' : 99,
          'X' : 0     #  for unknown amino acids
          } # dictionary of amino acids 'aa' and their monoisotopic mass

    subpeptides = subpeptides(peptide)
    result = [0]
    for item in subpeptides:
        mass = 0
        for aa in item:
            mass += masses[aa]
        result.append(mass)

    final_result = sorted(result)
    return final_result


def scoring(peptide, spectrum: str):
    exp = list(map(int, spectrum.split(" ")))
    theo = cyclospectrum(peptide)
    score = 0
    for item in exp:
        if item in theo:
            score += 1
            theo.remove(item)
    return score

Code Challenge: Compute the score of a linear peptide with respect to a spectrum.

Input: An amino acid string Peptide and a collection of integers Spectrum.<br>
Output: The linear score of Peptide with respect to Spectrum, LinearScore(Peptide, Spectrum).

Sample Input:

    NQEL
    0 99 113 114 128 227 257 299 355 356 370 371 484
Sample Output:

    8

In [6]:
def linear_spectrum(peptide):
    masses = {'A' : 71,
          'R' : 156,
          'N' : 114,
          'D' : 115,
          'C' : 103,
          'E' : 129,
          'Q' : 128,
          'G' : 57,
          'H' : 137,
          'I' : 113,
          'L' : 113,
          'K' : 128,
          'M' : 131,
          'F' : 147,
          'P' : 97,
          'S' : 87,
          'T' : 101,
          'W' : 186,
          'Y' : 163,
          'V' : 99,
          'X' : 0     #  for unknown amino acids
          } # dictionary of amino acids 'aa' and their monoisotopic mass
    alphabet = masses.keys()
    prefix_mass = [0]
    for i in range(1, len(peptide) + 1):
        for s in alphabet:
            if s == peptide[i - 1]:
                prefix_mass.append(prefix_mass[i - 1] + masses[s])
    linear_spectrum = [0]
    for i in range(0, len(peptide)):
        for j in range(i + 1, len(peptide) + 1):
            linear_spectrum.append(prefix_mass[j] - prefix_mass[i])
    return sorted(linear_spectrum)


def linear_score(peptide, spectrum: str):
    exp = list(map(int, spectrum.split(" ")))
    theo = linear_spectrum(peptide)
    score = 0
    for item in exp:
        if item in theo:
            score += 1
            theo.remove(item)
    return score

176

Code Challenge: Implement Trim (reproduced below).<br>
Input: A collection of peptides Leaderboard, a collection of integers Spectrum, and an integer N.<br>
Output: The N highest-scoring linear peptides on Leaderboard with respect to Spectrum.


    Trim(Leaderboard, Spectrum, N, Alphabet, AminoAcidMass)
        for j ← 1 to |Leaderboard|
            Peptide ← j-th peptide in Leaderboard
            LinearScores(j) ← LinearScore(Peptide, ﻿Alphabet, AminoAcidMass, Spectrum)
        sort Leaderboard according to the decreasing order of scores in LinearScores
        sort LinearScores in decreasing order
        for j ← N + 1 to |Leaderboard|
            if LinearScores(j) < LinearScores(N)
                remove all peptides starting from the j-th peptide from Leaderboard
                return Leaderboard
        return Leaderboard

Sample Input:

    LAST ALST TLLT TQAS
    0 71 87 101 113 158 184 188 259 271 372
    2
Sample Output:

    LAST ALST

In [18]:
def linear_spectrum(peptide):
    masses = {'A' : 71,
          'R' : 156,
          'N' : 114,
          'D' : 115,
          'C' : 103,
          'E' : 129,
          'Q' : 128,
          'G' : 57,
          'H' : 137,
          'I' : 113,
          'L' : 113,
          'K' : 128,
          'M' : 131,
          'F' : 147,
          'P' : 97,
          'S' : 87,
          'T' : 101,
          'W' : 186,
          'Y' : 163,
          'V' : 99,
          'X' : 0     #  for unknown amino acids
          } # dictionary of amino acids 'aa' and their monoisotopic mass
    alphabet = masses.keys()
    prefix_mass = [0]
    for i in range(1, len(peptide) + 1):
        for s in alphabet:
            if s == peptide[i - 1]:
                prefix_mass.append(prefix_mass[i - 1] + masses[s])
    linear_spectrum = [0]
    for i in range(0, len(peptide)):
        for j in range(i + 1, len(peptide) + 1):
            linear_spectrum.append(prefix_mass[j] - prefix_mass[i])
    return sorted(linear_spectrum)


def linear_score(peptide, spectrum: list):
    exp = spectrum
    theo = linear_spectrum(peptide)
    score = 0
    for item in exp:
        if item in theo:
            score += 1
            theo.remove(item)
    return score


def trim(leaderboard: list, spectrum: list, n):
    linear_scores = []
    for j in range(0, len(leaderboard)):
        peptide = leaderboard[j]
        linear_scores.append(linear_score(peptide, spectrum))
    leaderboard = [x for _, x in sorted(zip(linear_scores, leaderboard), key=lambda x: -x[0])]
    linear_scores.sort(key= lambda x: -x)
    for j in range(n, len(leaderboard)):
        if linear_scores[j] < linear_scores[n - 1]:
            leaderboard = leaderboard[0:j]
            return leaderboard
    return leaderboard


def trim_main(leaderboard: str, spectrum: str, n):
    leaderboard = leaderboard.split(" ")
    spectrum = list(map(int, spectrum.split(" ")))
    leaderboard = trim(leaderboard, spectrum, n)
    return " ".join(leaderboard)

AALLCSRHADDHGEDYGFFMCPSVKMNHSAGTTKRVQKAIGEQ HVRGTDTPHSVVEIGIHKVVWTETPECWYYPDSNEFPKKWTQN SLNMDCYPKMYGRGYQMWMFPATDGTRAKYSSRRWKDPTCTVI DHNIGFVIAALISSPPWGWCWPPNPQEDDPKLTQCPTFAYTWT DKEYGELTVPCWYTASSSDTFVGPDKDYDNAYEPGYQRGRTPG PFMYAPVGGSFYYFNKTIATSKVHDMWGEGKAHMPSYEPFAHP


Code Challenge: Implement LeaderboardCyclopeptideSequencing.

Input: An integer N and a collection of integers Spectrum.<br>
Output: LeaderPeptide after running LeaderboardCyclopeptideSequencing(Spectrum, N).

Note: Multiple solutions may exist. You may return any one.

Sample Input:

    10
    0 71 113 129 147 200 218 260 313 331 347 389 460
Sample Output:

    113-147-71-129

In [None]:
import copy

def linear_spectrum(peptide):
    masses = {'A' : 71,
          'R' : 156,
          'N' : 114,
          'D' : 115,
          'C' : 103,
          'E' : 129,
          'Q' : 128,
          'G' : 57,
          'H' : 137,
          'I' : 113,
          'L' : 113,
          'K' : 128,
          'M' : 131,
          'F' : 147,
          'P' : 97,
          'S' : 87,
          'T' : 101,
          'W' : 186,
          'Y' : 163,
          'V' : 99,
          'X' : 0     #  for unknown amino acids
          } # dictionary of amino acids 'aa' and their monoisotopic mass
    alphabet = masses.keys()
    prefix_mass = [0]
    for i in range(1, len(peptide) + 1):
        for s in alphabet:
            if s == peptide[i - 1]:
                prefix_mass.append(prefix_mass[i - 1] + masses[s])
    linear_spectrum = [0]
    for i in range(0, len(peptide)):
        for j in range(i + 1, len(peptide) + 1):
            linear_spectrum.append(prefix_mass[j] - prefix_mass[i])
    return sorted(linear_spectrum)


def linear_score(peptide, spectrum: list):
    exp = spectrum
    theo = linear_spectrum(peptide)
    score = 0
    for item in exp:
        if item in theo:
            score += 1
            theo.remove(item)
    return score


def trim(leaderboard: list, spectrum: list, n):
    linear_scores = []
    for j in range(0, len(leaderboard)):
        peptide = leaderboard[j]
        linear_scores.append(linear_score(peptide, spectrum))
    leaderboard = [x for _, x in sorted(zip(linear_scores, leaderboard), key=lambda x: -x[0])]
    linear_scores.sort(key= lambda x: -x)
    for j in range(n, len(leaderboard)):
        if linear_scores[j] < linear_scores[n - 1]:
            leaderboard = leaderboard[0:j]
            return leaderboard
    return leaderboard


def expand(leaderboard):
    result = []
    aminoacids = ['A', 'R', 'N', 'D', 'C', 'E', 'Q', 'G', 'H', 'I', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
    for item in leaderboard:
        for aa in aminoacids:
            result.append(item + aa)
    return result


def mass(peptide):
    masses = {'A': 71, 'R': 156, 'N': 114, 'D': 115, 'C': 103,
              'E': 129, 'Q': 128, 'G': 57, 'H': 137, 'I': 113,
              'L': 113, 'K': 128, 'M': 131, 'F': 147, 'P': 97,
              'S': 87, 'T': 101, 'W': 186, 'Y': 163, 'V': 99, 'X': 0}  # dictionary of amino acids 'aa' and their monoisotopic mass
    result = 0
    for aa in peptide:
        result += masses[aa]
    return result


def leaderboard_cyclopeptide_sequencing(spectrum, n):
    leaderboard = [""]
    leaderpeptide = ""
    while len(leaderboard):
        leaderboard = expand(leaderboard)
        leaderboard_copy = copy.deepcopy(leaderboard)
        for peptide in leaderboard:
            if mass(peptide) == max(spectrum):
                if linear_score(peptide, spectrum) > linear_score(leaderpeptide, spectrum):
                    leaderpeptide = peptide
            elif mass(peptide) > max(spectrum):
                leaderboard_copy.remove(peptide)
        leaderboard_copy = trim(leaderboard_copy, spectrum, n)
        leaderboard = leaderboard_copy
    return leaderpeptide


def leaderboard_cyclopeptide_sequencing_main(spectrum_str, n):
    masses = {'A': 71, 'R': 156, 'N': 114, 'D': 115, 'C': 103,
              'E': 129, 'Q': 128, 'G': 57, 'H': 137, 'I': 113,
              'L': 113, 'K': 128, 'M': 131, 'F': 147, 'P': 97,
              'S': 87, 'T': 101, 'W': 186, 'Y': 163, 'V': 99, 'X': 0}
    n = 202
    spectrum = list(map(int, spectrum_str.split(" ")))
    result = leaderboard_cyclopeptide_sequencing(spectrum, n)
    result2 = list(map(str, (map(mass, result))))
    print("-".join(result2))
