In [1]:
import numpy as np
import re

In [2]:
def parse_mgf(file):
    """Parse the spectral file in *.mgf format and store it in memory."""
    spectra = {}
    with open(file) as f:
        lines = f.readlines()
        i = 0
        while i < len(lines):
            if lines[i].startswith("BEGIN IONS"):
                title = ""
                m_z = 0
                charge = 0
                peaks = []
                i += 1
                while not lines[i].startswith("END IONS"):
                    if lines[i].startswith("TITLE="):
                        title = lines[i].strip()[6:]
                    elif lines[i].startswith("PEPMASS="):
                        m_z = float(lines[i].strip()[8:])
                    elif lines[i].startswith("CHARGE="):
                        charge = int(re.search(r'\d+', lines[i]).group())
                    elif lines[i].strip() and not lines[i].startswith("COM="):
                        peaks.append([float(x) for x in lines[i].strip().split()])
                    i += 1
                spectra[title] = {"m_z": m_z, "charge": charge, "peaks": peaks}
            else:
                i += 1
    return spectra

In [3]:
def parse_peptides(file):
    """Parse the peptide file in *.txt format and store it in memory."""
    peptides = []
    with open(file) as f:
        for line in f:
            if line.strip():
                specfile, scan, m_z, charge, peptide = line.strip().split()
                peptides.append({"specfile": specfile, "scan": scan, "m_z": float(m_z), "charge": int(charge), "peptide": peptide})
    return peptides

In [4]:
def calc_fragment_ions(peptide, ion_type):
    """Calculate the theoretical m/z values of the fragment ions of the peptide based on the ion type and fragment tolerance."""
    aa_masses = {'A': 71.037114, 'R': 156.101111, 'N': 114.042927, 'D': 115.026943,
                 'C': 103.009185, 'E': 129.042593, 'Q': 128.058578, 'G': 57.021464,
                 'H': 137.058912, 'I': 113.084064, 'L': 113.084064, 'K': 128.094963,
                 'M': 131.040485, 'F': 147.068414, 'P': 97.052764, 'S': 87.032028,
                 'T': 101.047679, 'W': 186.079313, 'Y': 163.063329, 'V': 99.068414}
    mod_masses = {'O': 15.994915, 'P':


SyntaxError: unexpected EOF while parsing (948253125.py, line 8)