In [1]:
# import libraries
from pyopenms import *
import os

In [2]:
# change directory to find file of interest
os.chdir(r'C:\Users\miar\Desktop\data')

In [3]:
#------------------------------------------------------------------------------------------------------------------------------

# ONLY NEED TO RUN THE NEXT TWO BLOCKS ONCE

#-------------------------------------------------------------------------------------------------------------------------------

In [4]:
# load the content of the mzML file into the exp variable of type MSExperiment
#exp = MSExperiment()
#MzMLFile().load("HEK293T_De_Novo_053122_Glu-C_B_correctRTSenzyme_BP.mzML", exp)

# access the raw data and spectra
#spectrum_data = exp.getSpectrum(0).get_peaks()
#spectrum_data

In [5]:
# loop through the spectra to gather MS2 scans
#specM2 = []
#for s in exp.getSpectra():
#    if s.getMSLevel() == 2:
#        specM2.append(s)
        
#print("Number of MS2 scans: " + str(len(specM2)))

#exp.setSpectra(specM2) # keep only MS2

# store the modified data structure on disk
#MzMLFile().store("filtered_MS2.mzML", exp)

In [6]:
# parse function
def parseScanLine(input):
    x = input.split(" For: ")
    [scan_number, mzs] = x[1].split(", ")
    [precursor_mz, fragment_mz] = mzs.split(";")
    trimmed_fragment_mz = fragment_mz.strip() # trim fragment strings to remove \n
    return [scan_number, precursor_mz, trimmed_fragment_mz]

In [7]:
# checking lines of log file and creating dictionary of scan numbers and fragment mzs
try:
  
    # words to search for
    search = ' Submitted Custom Scan For:'
  
    # reading file content line by line
    search = ' Submitted Custom Scan For:'   # words to search for
    
    # dict for scan numbers and corresponding fragments 
    scan2frag = dict()
    with open('App-2022-05-31_20-49-35.log') as f:
        for line in f:
            if search in line:
                scan_number, precursor_mz, trimmed_fragment_mz = parseScanLine(line)
                scan2frag[scan_number] = trimmed_fragment_mz
            
    # if the input string doesn't exist in the text file
    if len(scan2frag)==0:
        print("\n\"" +search+ "\" is not found in \"" +'App-2022-05-31_20-49-35.log'+ "\"!")
    else:
        pass

except FileNotFoundError:
    print("The file does not exist!")

In [8]:
# load in MS2 scans
exp1 = MSExperiment()
MzMLFile().load("filtered_MS2.mzML", exp1)

In [9]:
# read in peptide sequence from tsv
import pandas as pd
tsv = pd.read_csv('HEK293T_De_Novo_053122_Glu-C_B_correctRTSenzyme_BP_realtimesearch1.tsv', sep='\t')

# create dictionary with scan # as key and sequence/charge as values
scan2PeptideCharge = dict([(i, [x,y]) for i, x,y, in zip(tsv['Scan Number'], tsv['Peptide'], tsv['Charge State'])])

# removing all NaN sequences (not useful)
scan2PeptideCharge_modified = {k:v for k,v in scan2PeptideCharge.items() if str(v[0]) != 'nan'}

In [10]:
def findFragments(peptide_object, charge):
    # loop through each prefix and suffix (b and y ions, respectively)
    # y and b ions

    b_index = []
    y_index = []
    for i in range(1, (peptide_object.size())): # start at index of 1, end at peptide length - 1
        y_ion = peptide_object.getSuffix(i)
        b_ion = peptide_object.getPrefix(i)

        for x in range(1, charge):
            mz_b = b_ion.getMonoWeight(Residue.ResidueType.BIon, x) / x
            b_index.append(s.findNearest(mz_b, 0.4))

            mz_y = y_ion.getMonoWeight(Residue.ResidueType.YIon, x) / x
            y_index.append(s.findNearest(mz_y, 0.4))

    y_index.reverse() # reverse list (the first b ion corresponds with the last y ion)

    return y_index, b_index

In [11]:
# skim list for each corresponding y and b fragments based on charge states 
def skimList(fragment_index, charge):
    check_b = b[fragment_index:fragment_index+charge-1]
    check_y = y[fragment_index:fragment_index+charge-1]
    return check_b, check_y

In [13]:
oof = []
#y_indices = []
#b_indices = []

for s in exp1:
    s_number = s.getNativeID().split(' ')[-1]
    _, scan_number = s_number.split('=')
    
    if scan_number in scan2frag and int(scan_number) in scan2PeptideCharge_modified:
        # isolate peptide sequence from dict
        sequence = scan2PeptideCharge_modified[int(scan_number)][0]     
        trimmed_sequence = sequence[2:-2] # remove first two and last two characters 
        
        # isolate charge from dict
        charge = scan2PeptideCharge_modified[int(scan_number)][1]
 
        # create peptide object 
        peptide_object = AASequence.fromString(trimmed_sequence)
        
        # call findFragments function
        y, b = findFragments(peptide_object, charge)
        #y_indices.append(y)
        #b_indices.append(b)
        
        # checking if every associated y and b ion has a -1 value (was not found in the spectrum)
        for x in range(0, len(y), charge-1): # check after 'x' mzs (possible charge states)
            check_b, check_y = skimList(x, charge)

            if all(item == -1 for item in check_b) and all(item == -1 for item in check_y):
                oof.append('All elements have a value of -1...we cannot uncover this fragment')
                #pass
            else:
                oof.append('At least one ion (b or y) has at least one charge state that was found in the MS2')
                #pass
        del check_b, check_y
        
    else: 
        pass 

In [14]:
oof

['All elements have a value of -1...we cannot uncover this fragment',
 'All elements have a value of -1...we cannot uncover this fragment',
 'At least one ion (b or y) has at least one charge state that was found in the MS2',
 'All elements have a value of -1...we cannot uncover this fragment',
 'At least one ion (b or y) has at least one charge state that was found in the MS2',
 'At least one ion (b or y) has at least one charge state that was found in the MS2',
 'All elements have a value of -1...we cannot uncover this fragment',
 'At least one ion (b or y) has at least one charge state that was found in the MS2',
 'At least one ion (b or y) has at least one charge state that was found in the MS2',
 'All elements have a value of -1...we cannot uncover this fragment',
 'At least one ion (b or y) has at least one charge state that was found in the MS2',
 'All elements have a value of -1...we cannot uncover this fragment',
 'At least one ion (b or y) has at least one charge state that w

In [147]:
# the following code is checking for how many fragments are missing from just ONE peptide sequence 

In [146]:
scan2PeptideCharge_modified[2534][0] 

'E.RQVPLASPSSM[15.9949]SAALRGISCYLKE.L'

In [148]:
sequence = scan2PeptideCharge_modified[2534][0]    
trimmed_sequence = sequence[2:-2] # remove first two and last two characters       

In [149]:
trimmed_sequence

'RQVPLASPSSM[15.9949]SAALRGISCYLKE'

In [150]:
peptide_object = AASequence.fromString(trimmed_sequence)

In [151]:
peptide_object.size()

24

In [152]:
charge = scan2PeptideCharge_modified[2534][1]
print(charge)

4


In [155]:
ind_b = []
ind_y = []
for i in range(1, (peptide_object.size())): # start at index of 1, end at peptide length - 1
    y_ion = peptide_object.getSuffix(i)
    b_ion = peptide_object.getPrefix(i)
   
    for x in range(1, charge):
        mz_b = b_ion.getMonoWeight(Residue.ResidueType.BIon, x) / x
        ind_b.append(spectrum_list[2].findNearest(mz_b, 0.4))
        
        mz_y = y_ion.getMonoWeight(Residue.ResidueType.YIon, x) / x
        ind_y.append(spectrum_list[2].findNearest(mz_y, 0.4))
        
ind_y.reverse() # reverse list 

In [156]:
ind_y

[-1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 106,
 -1,
 -1,
 -1,
 -1,
 -1,
 74,
 -1,
 -1,
 -1,
 -1,
 -1,
 64,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 27,
 -1,
 -1,
 -1,
 -1,
 -1,
 14,
 -1,
 -1,
 -1,
 25,
 98,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1]

In [157]:
# skim list for each corresponding y and b fragments based on charge states 
def skimList(fragment_index, charge):
    check_b = ind_b[fragment_index:fragment_index+charge-1]
    check_y = ind_y[fragment_index:fragment_index+charge-1]
    return check_b, check_y

btest2 = []
ytest2 = []
# checking if every associated y and b ion has a -1 value (was not found in the spectrum)
for x in range(0, len(ind_y), charge-1): # check after every two mz (two possible charge states), so index as such: 0,2,4,6,etc
    check_b, check_y = skimList(x, charge)
    
    btest2.append(check_b)
    ytest2.append(check_y)
    
    if all(item == -1 for item in check_b) and all(item == -1 for item in check_y):
        print('All elements have a value of -1...we cannot uncover this fragment')
    else:
        print('At least one ion (b or y) has at least one charge state that was found in the MS2')

All elements have a value of -1...we cannot uncover this fragment
At least one ion (b or y) has at least one charge state that was found in the MS2
At least one ion (b or y) has at least one charge state that was found in the MS2
All elements have a value of -1...we cannot uncover this fragment
All elements have a value of -1...we cannot uncover this fragment
All elements have a value of -1...we cannot uncover this fragment
At least one ion (b or y) has at least one charge state that was found in the MS2
All elements have a value of -1...we cannot uncover this fragment
At least one ion (b or y) has at least one charge state that was found in the MS2
All elements have a value of -1...we cannot uncover this fragment
At least one ion (b or y) has at least one charge state that was found in the MS2
At least one ion (b or y) has at least one charge state that was found in the MS2
All elements have a value of -1...we cannot uncover this fragment
All elements have a value of -1...we cannot un

In [143]:
btest2

[[2, -1, -1, -1],
 [49, -1, -1, -1],
 [-1, 26, 9, -1],
 [-1, 44, -1, -1],
 [-1, 55, 24, -1],
 [193, 66, -1, -1],
 [-1, -1, -1, 25],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, 97, -1],
 [-1, -1, 111, -1],
 [-1, 201, 138, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, 92],
 [-1, 233, -1, 110],
 [-1, -1, -1, 141],
 [-1, -1, 194, 153],
 [-1, 256, -1, -1],
 [-1, -1, -1, 169],
 [-1, -1, 222, 181]]

In [144]:
ytest2

[[-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [148, -1, -1, -1],
 [-1, 186, -1, -1],
 [112, -1, 235, -1],
 [-1, -1, 218, -1],
 [-1, 142, -1, -1],
 [-1, 117, 191, -1],
 [61, 102, -1, -1],
 [52, 82, 175, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, 50, 96, -1],
 [-1, -1, -1, -1],
 [-1, -1, 42, 155],
 [-1, -1, 30, 105],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1]]