# Script split into parts

In [2]:
# Importing Necessary Libraries
from pyteomics import mgf, pepxml, mass, pylab_aux
import pylab
import csv
from pyopenms import *
import numpy as np
import pyteomics.mgf

In [3]:
#Constants
ion_types=('b', 'y')
maxcharge = 1
fragment_tol = 0.5

In [4]:
#FIND OUT THE Relationship between charges [1,2] and maxcharge and how to herners them
def fragments(peptide, ion_types=('b', 'y'), maxcharge=1):
    """
    The function generates all possible m/z for fragments of types
    `types` and of charges from 1 to `maxharge`.
    """
    for i in range(1, len(peptide)):
        for ion_type in ion_types:
            for charge in range(1, maxcharge+1):
                if ion_type[0] in 'abc':
                    yield mass.fast_mass(
                            peptide[:i], ion_type=ion_type, charge=charge)
                else:
                    yield mass.fast_mass(
                            peptide[i:], ion_type=ion_type, charge=charge)

### Generating Theoretical Peak List and saving them to Text file

In [7]:
valid_aa_codes = set('ARNDCQEGHILKMFPSTWYV')
with mgf.read('data/hw2_test.mgf') as spectra, open('data/hw2_psmlist_test_v2.txt', 'r') as tsvfile, open('data/theoretical_peak_list.txt', 'w') as peakfile:
    spectrum = next(spectra)
    reader = csv.reader(tsvfile, delimiter='\t')
    next(reader, None)
    for row in reader:
        peptide = row[4]
        if not all(aa in valid_aa_codes for aa in peptide):
            continue
        # Generating Theoretical Peak Lists
        fragment_mz_values = fragments(peptide)

        # Write the mz values to the theoretical peak list file
        for mz in fragment_mz_values:
           #print(mz)
           peakfile.write(f"{mz:.4f}\n")

## Annotating The Peaks

In [8]:
# Read in the theoretical peak list
with open('theoretical_peak_list.txt', 'r') as peakfile:
    theoretical_peaks = [float(line.strip()) for line in peakfile]

# Loop through the spectra in the spectral file
with pyteomics.mgf.read('data/hw2_test.mgf') as spectra, open('data/hw2_psmlist_test_v2.txt', 'r') as tsvfile:
    spectrum = next(spectra)
    reader = csv.reader(tsvfile, delimiter='\t')
    next(reader, None)
    for row in reader:
        peptide = row[4]
        if not all(aa in valid_aa_codes for aa in peptide):
            continue
        
        # Get the peaks for the current spectrum
        spectrum_mz = spectrum['m/z array']
        spectrum_intensity = spectrum['intensity array']
        
        # Generate the theoretical peak list for the current peptide sequence
        fragment_mz_values = fragments(peptide)
        
        # Compare the theoretical peaks to the peaks in the spectrum
        for mz in fragment_mz_values:
            for i in range(len(spectrum_mz)):
                if abs(spectrum_mz[i] - mz) <= fragment_tol:
                    print(f"Found match for peptide {peptide}: theoretical m/z {mz:.4f}, observed m/z {spectrum_mz[i]:.4f}")


Found match for peptide KQLATK: theoretical m/z 129.1022, observed m/z 129.0676
Found match for peptide KQLATK: theoretical m/z 129.1022, observed m/z 129.1023
Found match for peptide KQLATK: theoretical m/z 560.3402, observed m/z 560.3415
Found match for peptide KQLATK: theoretical m/z 257.1608, observed m/z 257.1611
Found match for peptide KQLATK: theoretical m/z 432.2817, observed m/z 432.2823
Found match for peptide KQLATK: theoretical m/z 370.2449, observed m/z 370.2451
Found match for peptide KQLATK: theoretical m/z 319.1976, observed m/z 319.1978
Found match for peptide KQLATK: theoretical m/z 441.2820, observed m/z 441.2833
Found match for peptide KQLATK: theoretical m/z 248.1605, observed m/z 248.1610
Found match for peptide KQLATK: theoretical m/z 542.3297, observed m/z 542.3305
Found match for peptide KQLATK: theoretical m/z 147.1128, observed m/z 147.1129
Found match for peptide MIQMYSNGSSKDR: theoretical m/z 175.1190, observed m/z 175.1194
Found match for peptide ETAKLIKEG

### Annotating Peaks and saving them to file

In [5]:
with mgf.read('data/hw2_test.mgf') as spectra, open('data/hw2_psmlist_test_v2.txt', 'r') as tsvfile, open('theoretical_peak_list.txt', 'w') as theoretical_file, open('annotated_peak_list.txt', 'w') as annotated_file:
    spectrum = next(spectra)
    reader = csv.reader(tsvfile, delimiter='\t')
    next(reader, None)
    for row in reader:
        peptide = row[4]
        if not all(aa in valid_aa_codes for aa in peptide):
            continue
        # Generating Theoretical Peak Lists
        fragment_mz_values = fragments(peptide)

        theoretical_peaks = []
        for mz in fragment_mz_values:
            theoretical_file.write(str(mz) + '\n')
            theoretical_peaks.append(mz)

        annotated_peaks = []
        for peak in spectrum['m/z array']:
            for theoretical_peak in theoretical_peaks:
                if abs(peak - theoretical_peak) <= fragment_tol:
                    annotated_file.write(str(peak) + '\t' + str(spectrum['intensity array'][i]) + '\n')
                    annotated_peaks.append(peak)
                    break

        # Remove annotated peaks from theoretical peaks
        for peak in annotated_peaks:
            if peak in theoretical_peaks:
                theoretical_peaks.remove(peak)


NameError: name 'i' is not defined

#### The code below, tries not only to save the theoretical Peaks lists and Annotated Peak lists but also tries to do that in the specified in the assignnment. So to get rid of the error - this should be the end of the assignment - FIGHTING!!

In [6]:
valid_aa_codes = set('ARNDCQEGHILKMFPSTWYV')
fragment_tol = 0.5
ion_types = ('b', 'y')

# Function to generate all possible m/z for fragments of types `ion_types` and of charges from 1 to `maxcharge`
def fragments(peptide, ion_types=('b', 'y'), maxcharge=1):
    for i in range(1, len(peptide)):
        for ion_type in ion_types:
            for charge in range(1, maxcharge+1):
                if ion_type[0] in 'abc':
                    yield mass.fast_mass(peptide[:i], ion_type=ion_type, charge=charge)
                else:
                    yield mass.fast_mass(peptide[i:], ion_type=ion_type, charge=charge)

# Output files
theoretical_peak_list_file = open('output/Theoretical_peak_list.txt', 'w')
psm_annotated_peak_list_file = open('output/PSM_annotated_peak_list.txt', 'w')

# Read PSM list file and MGF spectrum file
with mgf.read('data/hw2_test.mgf') as spectra, open('data/hw2_psmlist_test_v2.txt', 'r') as tsvfile:
    reader = csv.reader(tsvfile, delimiter='\t')
    next(reader, None)
    for row in reader:
        peptide = row[4]
        if not all(aa in valid_aa_codes for aa in peptide):
            continue

        # Generating Theoretical Peak Lists
        fragment_mz_values = list(fragments(peptide, ion_types=ion_types, maxcharge=2))
        #print(fragment_mz_values)
        # Write theoretical peak list to file
        theoretical_peak_list_file.write('BEGIN\n')
        theoretical_peak_list_file.write(f'PEPTIDE={peptide}\n')
        theoretical_peak_list_file.write(f'TITLE={row[1]}\n')
        
        for i, mz in enumerate(fragment_mz_values):
            ion_label = ion_types[i % len(ion_types)] + str((i // 2) + 1) + "+" * (i % 2 + i // 2) # generate a label for the ion
            theoretical_peak_list_file.write(f'{ion_label}\t{mz}\n')
        theoretical_peak_list_file.write('END\n\n')

        # Find matching peaks in the spectrum and write to annotated peak list file
        psm_annotated_peak_list_file.write('BEGIN\n')
        psm_annotated_peak_list_file.write(f'PEPTIDE={peptide}\n')
        psm_annotated_peak_list_file.write(f'TITLE={row[1]}\n')
        
        for mz, intensity in zip(spectra[0]['m/z array'], spectra[0]['intensity array']):
            if any(abs(mz - frag_mz) <= fragment_tol for frag_mz in fragment_mz_values):
                #ion_label = f'{ion_types[fragment_mz_values.index(frag_mz)%2]}{fragment_mz_values.index(frag_mz)//2+1}+'
               if mz in fragment_mz_values:
                    ion_label = f'{ion_types[fragment_mz_values.index(mz) % 2]}{fragment_mz_values.index(mz) // 2 + 1}+'
                    psm_annotated_peak_list_file.write(f'{ion_label}\t{mz}\t{intensity}\n')
        psm_annotated_peak_list_file.write('END\n\n')

theoretical_peak_list_file.close()
psm_annotated_peak_list_file.close()


In [None]:
valid_aa_codes = set('ARNDCQEGHILKMFPSTWYV')
tolerance = 0.5

with mgf.read('data/hw2_test.mgf') as spectra, open('data/hw2_psmlist_test_v2.txt', 'r') as tsvfile, open('theoretical_peak_list.txt', 'w') as theoretical_file, open('annotated_peak_list.txt', 'w') as annotated_file:
    spectrum = next(spectra)
    reader = csv.reader(tsvfile, delimiter='\t')
    next(reader, None)
    for row in reader:
        peptide = row[4]
        if not all(aa in valid_aa_codes for aa in peptide):
            continue
        # Generating Theoretical Peak Lists
        fragment_mz_values = fragments(peptide)

        theoretical_peaks = []
        for mz in fragment_mz_values:
            theoretical_file.write(str(mz) + '\n')
            theoretical_peaks.append(mz)

        annotated_peaks = []
        for i in range(len(spectrum['m/z array'])):
            peak = spectrum['m/z array'][i]
            for theoretical_peak in theoretical_peaks:
                if abs(peak - theoretical_peak) <= tolerance:
                    annotated_file.write(str(peak) + '\t' + str(spectrum['intensity array'][i]) + '\n')
                    annotated_peaks.append(peak)
                    break

        # Remove annotated peaks from theoretical peaks
        for peak in annotated_peaks:
            if peak in theoretical_peaks:
