### Importing the necessary libraries:

In [3]:
from pyteomics import mgf, parser
from Bio.SeqUtils import ProtParam

###  Read in the peptide sequence file 
##### Then extract the peptide sequence, scan number, and precursor mass over charge value for each peptide:

In [5]:
peptide_file = "data/psmlist.txt"
ion_type = "y"
fragment_tol = 0.5

peptides = []
with open(peptide_file, 'r') as f:
    for line in f:
        ScanNum, PrecursorMZ, Charge, Peptide = line.strip().split('\t')
        Peptide = parser.parse(Peptide).sequence
        peptides.append({'ScanNum': ScanNum, 'Peptide': Peptide, 'PrecursorMZ': float(PrecursorMZ)})


ValueError: too many values to unpack (expected 4)

### Generate the theoretical peak list for each peptide:

In [3]:
def get_fragment_masses(peptide, ion_type):
    """
    Calculate the expected masses of fragment ions for a given peptide sequence and ion type
    """
    prot_param = ProtParam.ProteinAnalysis(str(peptide))
    aa_masses = prot_param.monoisotopic_counts

    if ion_type == 'y':
        ion_masses = [sum(aa_masses[i:]) + 19.0178 for i in range(len(aa_masses))]  # add mass of H2O
    elif ion_type == 'b':
        ion_masses = [sum(aa_masses[:i]) + 1.0078 for i in range(len(aa_masses))]  # add mass of H

    return ion_masses


def get_peak_list(peptide, ion_type, fragment_tol):
    """
    Generate a theoretical peak list for a given peptide sequence, ion type, and fragment mass tolerance
    """
    fragment_masses = get_fragment_masses(peptide, ion_type)
    peak_list = []

    for i, ion_mass in enumerate(fragment_masses):
        if i == 0:
            continue
        diff = ion_mass - fragment_masses[i-1]
        if abs(diff - 1.0078) <= fragment_tol:
            peak_list.append({'ion_type': ion_type, 'ion_num': i, 'mass': ion_mass, 'intensity': 1.0})

    return peak_list


theoretical_peak_lists = []
for peptide in peptides:
    peak_list = get_peak_list(peptide['peptide_seq'], ion_type, fragment_tol)
    theoretical_peak_lists.append({'scan_num': peptide['scan_num'], 'peak_list': peak_list})


### Annotate the peaks in the spectral file:

In [4]:
def annotate_peak_list(peak_list, theoretical_peak_list, fragment_tol):
    """
    Annotate a peak list with the expected ions from the theoretical peak list
    """
    annotated_peak_list = []

    for peak in peak_list:
        for ion_type in ['y', 'b']:
            for theoretical_peak in theoretical_peak_list:
                if peak['mz'] > theoretical_peak['mass'] and peak['mz'] - theoretical_peak['mass'] <= fragment_tol:
                    annotated_peak_list.append({'ion_type': ion_type, 'ion_num': theoretical_peak['ion_num'], 'mass': theoretical_peak['mass'], 'intensity': peak['intensity']})

    return annotated_peak_list


spectral_file = "spectral_data.mgf"
annotated


SyntaxError: unexpected EOF while parsing (948253125.py, line 8)