In [1]:
# import libraries
from pyopenms import * # main package used for handling MS data
import os # changing directories
import pandas as pd # creating and manipulating dataframe

 # importing variables and functions from other scripts
from constants import ION_TYPES, DEFAULT_MAX_CHARGE
import match as m
import tensorize as tens
from tensorize import csv, csv_training
from io_local import to_hdf5, from_hdf5

In [2]:
def formatData(mzML, log, realtime):
    global test
    # load the content of the mzML file into the exp variable of type MSExperiment
    exp = MSExperiment()
    MzMLFile().load(mzML, exp)
    
    # loop through the spectra to filter MS3 scans
    specM3 = [] # list of MS3 spectra 
    row_data = []
    for s in exp.getSpectra():
        if s.getMSLevel() == 3:
            specM3.append(s)
            # get scan number
            s_number = s.getNativeID().split(' ')[-1]
            _, scan_number = s_number.split('=')
            # obtain mz and intensity values 
            mz, intensity = s.get_peaks()
            mz_mod = " ".join(str(m) for m in mz)
            intensity_mod = " ".join(str(i) for i in intensity)
            # create dict (rows of dataframe)
            data = {#'MS3_Scan':scan_number,
           'masses_raw':mz_mod,
           'intensities_raw':intensity_mod}
            row_data.append(data)
    print("num MS3s: ", len(specM3))
            
    # create series of all MS3 spectra
    s_series = pd.Series(specM3)
    # create dataframe
    df = pd.DataFrame(row_data)
    
    # parse functions
    def parseScanLine(input):
        x = input.split(" For: ")
        [scan_number, mzs] = x[1].split(", ")
        [precursor_mz, fragment_mz] = mzs.split(";")
        trimmed_fragment_mz = fragment_mz.strip() # trim fragment strings to remove \n
        return [scan_number, precursor_mz, trimmed_fragment_mz]

    def parseTargetIons(input):
        i = input.split('Target Fragment: ')
        ion = i[1].split(',')[0]
        return ion 
    
    # checking lines of log file and creating dictionary of scan numbers and fragment mzs
    try:
        # words to search for
        search = ' Submitted Custom Scan For:'
        search_target = 'Target Fragment:'
        # dict for scan numbers and corresponding fragments 
        scan2frag = dict()
        target_values = []
        with open(log) as f:
            for line in f:
                if search in line:
                    scan_number, precursor_mz, trimmed_fragment_mz = parseScanLine(line)
                    scan2frag[scan_number] = [float(precursor_mz), float(trimmed_fragment_mz)]
                elif search_target in line:
                    target_ion = parseTargetIons(line)
                    target_values.append(target_ion) #to add to final dataframe
        # if the input string doesn't exist in the text file
        if len(scan2frag)==0:
            print("\n\"" + search + "\" is not found in \"" + log + "\"!")
    except FileNotFoundError:
        print("The file does not exist!")
        
    # obtain MS3 scan numbers
    # obtain precursor and fragment mzs directly from the MS3 spectrum
    ms3scan2MZs = dict()
    for s in specM3:
        s_number = s.getNativeID().split(' ')[-1]
        _, scan_number = s_number.split('=')
        fragment, precursor = s.getPrecursors()
        precursor_mz = precursor.getMZ()
        fragment_mz = fragment.getMZ()
        ms3scan2MZs[int(scan_number)] = [round(float(precursor_mz), 4), round(float(fragment_mz), 4)] # 4 decimal places, similar to log
        
    def matchingMS3s(ms2_mzs, ms3_mzs): # either fragment or precursor
        # making sure they are within 100 scans of each other
        too_far = []
        for ms2scan, ms3scan in zip(list(scan2frag), list(ms3scan2MZs)):
            scan_diff = int(ms3scan) - int(ms2scan)
            if scan_diff > 100:
                too_far.append('Scans are not within 100 scans of each other...' + 'MS2 = ' + str(ms2scan) + ' MS3 = ' + str(ms3scan))
        # do they not match off the bat?
        if ms2_mzs != ms3_mzs:
            # taking into consideration rounding discrepencies between the log and the spectrum
            mismatch = []
            for i in range(0, len(list(ms3scan2MZs))):
                precursor_diff = float(list(ms3scan2MZs.values())[i][0]) - float(list(scan2frag.values())[i][0]) 
                if precursor_diff < 0.000101 or (precursor_diff < 0 and precursor_diff > -0.000101): # because sometimes max number will be 0.0001000002 for example
                    pass
                else:
                    mismatch.append(i)
                fragment_diff = float(list(ms3scan2MZs.values())[i][1]) - float(list(scan2frag.values())[i][1]) 
                if fragment_diff < 0.000101 or (fragment_diff < 0 and fragment_diff > -0.000101): # because sometimes max number will be 0.0001000002 for example
                    pass
                else:
                    mismatch.append(i)
            # no mismatch after rounding and within 100 scans
            if len(mismatch) == len(too_far) == 0:
                print('Scans match up after taking rounding discrepencies into consideration')
                ms2_scans = list(scan2frag)
                return ms2_scans
            elif len(mismatch) != 0:
                print('There is mismatch at the following indicies:') # if this is the case, need to do more work...
                for i in mismatch:
                    print(i) 
            elif len(too_far) != 0:
                print(too_far)
        # they match perfectly
        elif ms2_mzs == ms3_mzs:
            # within 100 scans
            if len(too_far) == 0:
                print('Scans match up perfectly!')
                ms2_scans = list(scan2frag)
                return ms2_scans
            else:
                print(too_far)
    
    # make sure that MS3 scans are in the same order as MS2 scans
    ms2_scans = matchingMS3s(list(ms3scan2MZs.values()), list(scan2frag.values()))
    
    # use realtime file to obtain peptide sequence and charge
    # read in peptide sequence from tsv
    tsv = pd.read_csv(realtime, sep='\t')
    # create dictionary with scan # as key and sequence/charge as values
    scan2PeptideCharge = dict([(i, [x,y]) for i, x,y, in zip(tsv['Scan Number'], tsv['Peptide'], tsv['Charge State'])])
    # removing all NaN sequences (not useful)
    scan2PeptideCharge_modified = {k:v for k,v in scan2PeptideCharge.items() if str(v[0]) != 'nan'}
    
    # collect data for dataframe
    seqs = []
    charges = []
    analyzer = []
    collision = []
    energy = int(realtime.split('_')[-2][-2:])
    for scan in ms2_scans:
        if int(scan) in list(scan2PeptideCharge_modified):
            charge = scan2PeptideCharge_modified[int(scan)][1]
            charges.append(charge)
            sequence = scan2PeptideCharge_modified[int(scan)][0]     
            #trimmed_sequence = sequence[2:-2] # remove first two and last two characters 
            seqs.append(sequence)
            # all ms3 scans are orbit trap 
            # to be added to dataframe with other MS3 info
            analyzer.append('FTMS')
            # all scans have same collision energy
            collision.append(energy)
    
    # add all data columns to dataframe
    df = df.assign(charge=charges, modified_sequence=seqs, mass_analyzer=analyzer, collision_energy=collision, target_fragment=target_values)
    
    # remove all modified sequences
    for i in df.index:
        if ('[' or ']') in df['modified_sequence'][i]:
            df.drop(i, axis=0, inplace=True)
            s_series.drop(i, inplace=True)
            
    # obtain target fragment sequences
    fragment_seqs = []
    for i in df.index:
        trimmed_seq = df['modified_sequence'][i][2:-2]
        peptide_object = AASequence.fromString(trimmed_seq) 
        # targeted fragment is a y ion
        if df['target_fragment'][i].startswith('y'):
            y_num = df['target_fragment'][i][-1]
            # the full sequence of the fragment
            full_seq = peptide_object.getSuffix(int(y_num))
        # targeted fragment is a b ion
        elif df['target_fragment'][i].startswith('b'):
            b_num = df['target_fragment'][i][-1]
            # the full sequence of the fragment
            full_seq = peptide_object.getPrefix(int(b_num))
        fragment_seqs.append(str(full_seq))
    df = df.assign(modified_sequence=fragment_seqs)
    
    # remove all sequences more than 30 in length
    for i in df.index:
        if len(df['modified_sequence'][i]) >= 30:
            df.drop(i, axis=0, inplace=True)
            s_series.drop(i, inplace=True)
            
    test = df.copy()
    
    # running augment function
    df_augmented = m.augment(df, ION_TYPES, DEFAULT_MAX_CHARGE)
    # remove all rows that are completely empty
    for i in df_augmented.index:
        if df_augmented['matches_charge1'][i] == df_augmented['matches_charge2'][i] == df_augmented['matches_charge3'][i] == df_augmented['matches_charge4'][i] == df_augmented['matches_charge5'][i] == df_augmented['matches_charge6'][i]:
            df_augmented.drop(i, axis=0, inplace=True)
            s_series.drop(i, inplace=True)
    
    return df_augmented, s_series

In [3]:
# change directory to find data files of interest
os.chdir(r'C:\Users\miar\Desktop\data')

In [4]:
# experiment files 
mzML = 'HEK293T_De_Novo_061122_Glu-C_B_BP_anyLength_HCD10.mzML'
log = 'App-2022-06-12_14-16-26.log'
realtime = 'HEK293T_De_Novo_061122_Glu-C_B_BP_anyLength_HCD10_realtimesearch.tsv'

In [5]:
df1, series1 = formatData(mzML, log, realtime)

num MS3s:  944
Scans match up after taking rounding discrepencies into consideration
b1 58.028740467
b2 221.09206946700002
b3 334.176133467
y1 148.060434167
y2 261.144498167
y3 424.207827167
b1 29.518008467
b2 111.049672967
b3 167.591704967
y1 74.533855317
y2 131.075887317
y3 212.607551817
b1 20.014431133666665
b2 74.36887413366667
b3 112.06356213366666
y1 50.024995700333335
y2 87.71968370033333
y3 142.07412670033332
b1 15.262642467000001
b2 56.028474717
b3 84.299490717
y1 37.770565892
y2 66.041581892
y3 106.807414142
b1 129.102239467
b2 285.203350467
y1 98.060041167
y2 254.161152167
b1 65.054757967
b2 143.105313467
y1 49.533658816999996
y2 127.584214317
b1 43.705597467
b2 95.73930113366667
y1 33.35819803366667
y2 85.39190170033334
b1 33.031017217
b2 72.056294967
y1 25.270467642
y2 64.295745392
b1 114.091340467
b2 270.192451467
y1 148.060434167
y2 304.161545167
b1 57.549308466999996
b2 135.599863967
y1 74.533855317
y2 152.584410817
b1 38.701964467
b2 90.73566813366666
y1 50.02499570033

b1 65.054757967
b2 145.0700823288
b3 193.59646432879998
b4 250.1384963288
y1 44.523290816999996
y2 101.06532281699998
y3 149.591704817
y4 229.6070291788
b1 98.060040467
b2 212.10296746699998
b3 368.204078467
b4 496.299041467
y1 148.060434167
y2 276.155397167
y3 432.256508167
y4 546.2994351670001
b1 49.533658466999995
b2 106.55512196699999
b3 184.605677467
b4 248.653158967
y1 74.533855317
y2 138.581336817
y3 216.631892317
y4 273.65335581700003
b1 33.358197800333336
b2 71.37250680033333
b3 123.406210467
b4 166.104531467
y1 50.024995700333335
y2 92.72331670033333
y3 144.757020367
y4 182.77132936700002
b1 115.050203467
y1 114.091341167
b1 58.028739967
y1 57.549308817
b1 102.054955467
y1 148.060434167
b1 51.531115967
y1 74.533855317
b1 34.68983613366667
y1 50.024995700333335
b1 26.269196217
y1 37.770565892
b1 88.039304467
y1 115.050204167
b1 44.523290466999995
y1 58.028740317
b1 100.075690467
b2 256.176801467
b3 355.24521546700004
b4 515.2758641906
y1 148.060434167
y2 308.0910828906
y3 407.

b1 102.054955467
b2 173.092069467
b3 301.150647467
b4 388.18267546699997
y1 114.091341167
y2 201.12336916699996
y3 329.181947167
y4 400.21906116699995
b1 51.531115967
b2 87.04967296699999
b3 151.078961967
b4 194.59497596699998
y1 57.549308817
y2 101.06532281699998
y3 165.094611817
y4 200.61316881699997
b1 34.68983613366667
b2 58.36887413366666
b3 101.05506680033334
b4 130.0657428003333
y1 38.701964700333335
y2 67.71264070033332
y3 110.398833367
y4 134.07787136699997
b1 132.047761467
y1 164.070606167
b1 66.52751896699999
y1 82.538941317
b1 102.054955467
b2 201.123369467
b3 329.181947467
b4 443.224874467
b5 599.325985467
y1 148.060434167
y2 304.161545167
y3 418.204472167
y4 546.2630501670001
y5 645.331464167
b1 51.531115967
b2 101.065322967
b3 165.094611967
b4 222.116075467
b5 300.166630967
y1 74.533855317
y2 152.584410817
y3 209.605874317
y4 273.63516331700004
y5 323.169370317
b1 129.102239467
y1 98.060041167
b1 65.054757967
y1 49.533658816999996
b1 43.705597467
y1 33.35819803366667
b1 

b3 338.182281467
b4 437.250695467
b5 536.319109467
y1 175.118952167
y2 274.187366167
y3 373.255780167
y4 474.303459167
y5 573.3718731670001
b1 69.536732467
b2 119.07093946699999
b3 169.594778967
b4 219.128985967
b5 268.663192967
y1 88.063114317
y2 137.597321317
y3 187.131528317
y4 237.655367817
y5 287.18957481700005
b1 46.693580467
b2 79.71638513366666
b3 113.39894480033333
b4 146.421749467
b5 179.44455413366666
y1 59.044501700333335
y2 92.067306367
y3 125.09011103366667
y4 158.77267070033335
y5 191.79547536700002
b1 114.091340467
b2 185.12845446699998
b3 332.19686846699994
b4 445.28093246699996
y1 148.060434167
y2 261.144498167
y3 408.21291216699996
y4 479.25002616699993
b1 57.549308466999996
b2 93.06786546699999
b3 166.60207246699997
b4 223.14410446699998
y1 74.533855317
y2 131.075887317
y3 204.61009431699998
y4 240.12865131699996
b1 38.701964467
b2 62.381002466999995
b3 111.40380713366665
b4 149.09849513366666
y1 50.024995700333335
y2 87.71968370033333
y3 136.742488367
y4 160.421526

y1 50.541483817
y2 86.06004081699999
b1 114.091340467
b2 201.12336846699998
y1 58.028741167
y2 145.060769167
b1 57.549308466999996
b2 101.06532246699999
y1 29.518008817000002
y2 73.034022817
b1 38.701964467
b2 67.71264046699999
y1 20.014431367
y2 49.025107367
b1 114.091340467
b2 201.12336846699998
y1 88.039305167
y2 175.07133316699998
b1 57.549308466999996
b2 101.06532246699999
y1 44.523290816999996
y2 88.03930481699999
b1 38.701964467
b2 67.71264046699999
y1 30.017952700333336
y2 59.02862870033332
b1 114.091340467
b2 215.139019467
b3 343.23398246700003
b4 474.274467467
b5 602.369430467
y1 148.060434167
y2 276.155397167
y3 407.195882167
y4 535.290845167
y5 636.3385241670001
b1 57.549308466999996
b2 108.073147967
b3 172.12062946700001
b4 237.640871967
b5 301.688353467
y1 74.533855317
y2 138.581336817
y3 204.101579317
y4 268.149060817
y5 318.672900317
b1 129.065854467
b2 230.11353346700002
y1 157.108388167
y2 258.156067167
b1 65.036565467
b2 115.56040496700001
y1 79.05783231699999
y2 129

b1 114.091340467
b2 227.175404467
b3 324.228168467
y1 58.028741167
y2 155.081505167
y3 268.165569167
b1 57.549308466999996
b2 114.091340467
b3 162.617722467
y1 29.518008817000002
y2 78.044390817
y3 134.586422817
b1 38.701964467
b2 76.396652467
b3 108.74757380033333
y1 20.014431367
y2 52.365352700333325
y3 90.06004070033333
b1 29.278292467
b2 57.549308466999996
b3 81.812499467
y1 15.262642642000001
y2 39.525833641999995
y3 67.796849642
b1 114.091340467
b2 201.12336846699998
b3 288.15539646699995
y1 129.102240167
y2 216.13426816700002
y3 303.166296167
b1 57.549308466999996
b2 101.06532246699999
b3 144.58133646699997
y1 65.054758317
y2 108.570772317
y3 152.086786317
b1 38.701964467
b2 67.71264046699999
b3 96.72331646699998
y1 43.70559770033333
y2 72.71627370033333
y3 101.72694970033332
b1 157.108387467
b2 214.129851467
b3 327.213915467
b4 398.25102946699997
b5 469.28814346699994
y1 129.065855167
y2 200.10296916700003
y3 271.140083167
y4 384.224147167
y5 441.245611167
b1 79.057831967
b2 10

b1 98.060040467
b2 254.161151467
b3 353.229565467
b4 516.292894467
b5 676.3235431905999
y1 148.060434167
y2 308.0910828906
y3 471.1544118906
y4 570.2228258906
y5 726.3239368906001
b1 49.533658466999995
b2 127.584213967
b3 177.118420967
b4 258.650085467
b5 338.66540982879997
y1 74.533855317
y2 154.5491796788
y3 236.0808441788
y4 285.6150511788
y5 363.66560667880003
b1 114.091340467
b2 270.192451467
b3 341.22956546700004
y1 148.075691167
y2 219.11280516699998
y3 375.213916167
b1 57.549308466999996
b2 135.599863967
b3 171.11842096700002
y1 74.541483817
y2 110.06004081699999
y3 188.110596317
b1 38.701964467
b2 90.73566813366666
b3 114.41470613366668
y1 50.030081367
y2 73.70911936699999
y3 125.74282303366665
b1 29.278292467
b2 68.303570217
b3 86.06284871700001
y1 37.774380142
y2 55.53365864199999
y3 94.55893639199999
b1 129.102239467
y1 148.060434167
b1 65.054757967
y1 74.533855317
b1 43.705597467
y1 50.024995700333335
b1 33.031017217
y1 37.770565892
b1 138.066188467
b2 237.13460246699998
y

b1 100.075690467
y1 157.108388167
b1 50.541483467
y1 79.05783231699999
b1 34.03008113366667
y1 53.040980366999996
b1 114.091340467
y1 148.060434167
b1 57.549308466999996
y1 74.533855317
b1 100.075690467
y1 157.108388167
b1 50.541483467
y1 79.05783231699999
b1 34.03008113366667
y1 53.040980366999996
b1 148.075690467
b2 247.14410446699998
y1 134.044784167
y2 233.113198167
b1 74.54148346699999
b2 124.07569046699999
y1 67.526030317
y2 117.060237317
b1 50.03008113366666
b2 83.05288580033333
y1 45.353112366999994
y2 78.37591703366667
b1 37.774379966999994
b2 62.54148346699999
y1 34.266653391999995
y2 59.033756892
b1 138.066188467
b2 294.167299467
b3 393.23571346700004
y1 148.060434167
y2 247.12884816700003
y3 403.229959167
b1 69.536732467
b2 147.587287967
b3 197.12149496700002
y1 74.533855317
y2 124.06806231700001
y3 202.118617817
b1 46.693580467
b2 98.72728413366667
b3 131.75008880033334
y1 50.024995700333335
y2 83.04780036700001
y3 135.08150403366668
b1 35.272004466999995
b2 74.297282217
b

b1 98.060040467
b2 235.11895246699999
y1 148.060434167
y2 285.11934616699995
b1 49.533658466999995
b2 118.06311446699999
y1 74.533855317
y2 143.06331131699997
b1 33.358197800333336
b2 79.04450180033332
y1 50.024995700333335
y2 95.71129970033331
b1 25.270467467
b2 59.535195466999994
y1 37.770565892
y2 72.03529389199998
b1 132.047761467
b2 219.07978946699998
y1 148.060434167
y2 235.092462167
b1 66.52751896699999
b2 110.04353296699999
y1 74.533855317
y2 118.049869317
b1 44.68743813366666
b2 73.69811413366666
y1 50.024995700333335
y2 79.03567170033334
b1 33.767397716999994
b2 55.525404716999994
y1 37.770565892
y2 59.528572892
b1 164.070605467
b2 221.09206946700002
y1 58.028741167
y2 115.050205167
b1 82.538940967
b2 111.049672967
y1 29.518008817000002
y2 58.028740817
b1 157.108387467
b2 320.171716467
b3 391.208830467
b4 538.277244467
b5 625.3092724669999
y1 58.028741167
y2 145.060769167
y3 292.12918316699995
y4 363.1662971669999
y5 526.2296261669999
b1 79.057831967
b2 160.589496467
b3 196.1

b1 100.075690467
b2 228.17065346700002
b3 388.2013021906
b4 519.2417871906
b5 616.2945511906
y1 102.054956167
y2 199.10772016699997
y3 330.14820516699996
y4 490.17885389059995
y5 618.2738168906
b1 50.541483467
b2 114.58896496700001
b3 194.6042893288
b4 260.1245318288
b5 308.6509138288
y1 51.531116317
y2 100.05749831699998
y3 165.57774081699998
y4 245.59306517879998
y5 309.6405466788
b1 34.03008113366667
b2 76.72840213366668
b3 130.0719517082
b4 173.75211337486667
b5 206.1030347082
y1 34.689836367000005
y2 67.04075770033332
y3 110.72091936699998
y4 164.0644689415333
y5 206.76278994153333
b1 25.774379967
b2 57.798120717
b3 97.8057828979
b4 130.5659041479
b5 154.8290951479
y1 26.269196392
y2 50.53238739199999
y3 83.29250864199999
y4 123.30017082289999
y5 155.3239115729
b1 114.091340467
b2 261.15975446699997
y1 58.028741167
y2 205.09715516699998
b1 57.549308466999996
b2 131.08351546699998
y1 29.518008817000002
y2 103.05221581699999
b1 38.701964467
b2 87.72476913366665
y1 20.014431367
y2 69

b2 228.17065346700002
y1 114.091341167
y2 242.186304167
b1 50.541483467
b2 114.58896496700001
y1 57.549308817
y2 121.596790317
b1 34.03008113366667
b2 76.72840213366668
y1 38.701964700333335
y2 81.40028570033333
b1 100.075690467
b2 213.159754467
b3 314.207433467
y1 114.091341167
y2 215.139020167
y3 328.22308416699997
b1 50.541483467
b2 107.083515467
b3 157.607354967
y1 57.549308817
y2 108.073148317
y3 164.61518031699998
b1 34.03008113366667
b2 71.72476913366667
b3 105.40732880033333
y1 38.701964700333335
y2 72.384524367
y3 110.07921236699998
b1 100.075690467
b2 256.176801467
b3 313.198265467
y1 148.075691167
y2 205.09715516699998
y3 361.198266167
b1 50.541483467
b2 128.592038967
b3 157.102770967
y1 74.541483817
y2 103.05221581699999
y3 181.102771317
b1 34.03008113366667
b2 86.06378480033334
b3 105.070939467
y1 50.030081367
y2 69.03723603366666
y3 121.07093970033333
b1 25.774379967
b2 64.799657717
b3 79.055023717
y1 37.774380142
y2 52.02974614199999
y3 91.055023892
b1 20.820959267
b2 52

y4 529.272887167
y5 660.313372167
b1 36.525833467
b2 102.046075967
b3 166.07536496699998
b4 244.125920467
b5 292.65230246699997
y1 74.533855317
y2 123.060237317
y3 201.11079281699998
y4 265.140081817
y5 330.660324317
b1 24.686314467000003
b2 68.36647613366667
b3 111.05266880033332
b4 163.086372467
b5 195.43729380033332
y1 50.024995700333335
y2 82.37591703366667
y3 134.4096207003333
y4 177.095813367
y5 220.77597503366667
b1 100.075690467
y1 148.060434167
b1 50.541483467
y1 74.533855317
b1 34.03008113366667
y1 50.024995700333335
b1 25.774379967
y1 37.770565892
b1 20.820959267
y1 30.417908007000005
b1 114.091340467
y1 148.060434167
b1 57.549308466999996
y1 74.533855317
b1 38.701964467
y1 50.024995700333335
b1 29.278292467
y1 37.770565892
b1 72.044390467
y1 58.028741167
b1 36.525833467
y1 29.518008817000002
b1 24.686314467000003
y1 20.014431367
b1 18.766554967
y1 15.262642642000001
b1 157.108387467
y1 98.060041167
b1 79.057831967
y1 49.533658816999996
b1 53.040980133666665
y1 33.3581980336

b2 121.596789967
y1 74.533855317
y2 131.075887317
b1 157.108387467
y1 114.091341167
b1 79.057831967
y1 57.549308817
b1 53.040980133666665
y1 38.701964700333335
b1 100.075690467
y1 148.060434167
b1 50.541483467
y1 74.533855317
b1 102.054955467
b2 215.139019467
b3 352.197931467
b4 423.23504546699996
y1 134.044784167
y2 205.081898167
y3 342.140810167
y4 455.224874167
b1 51.531115967
b2 108.073147967
b3 176.602603967
b4 212.12116096699998
y1 67.526030317
y2 103.044587317
y3 171.574043317
y4 228.116075317
b1 34.68983613366667
b2 72.38452413366666
b3 118.07082813366667
b4 141.74986613366664
y1 45.353112366999994
y2 69.032150367
y3 114.71845436699999
y4 152.413142367
b1 26.269196217
b2 54.540212217
b3 88.804940217
b4 106.56421871699999
y1 34.266653391999995
y2 52.025931891999996
y3 86.290659892
y4 114.561675892
b1 21.216812267
b2 43.833625067
b3 71.24540746699999
b4 85.452830267
y1 27.614778007
y2 41.822200807
y3 69.233983207
y4 91.850796007
b1 98.060040467
b2 155.081504467
y1 148.060434167
y

y1 116.034220167
y2 229.118284167
y3 342.20234816699997
y4 399.22381216699995
b1 65.054757967
b2 93.565489967
b3 150.107521967
b4 206.649553967
y1 58.520748317
y2 115.062780317
y3 171.60481231699998
y4 200.11554431699997
b1 43.705597467
b2 62.71275213366667
b3 100.40744013366667
b4 138.10212813366667
y1 39.34959103366667
y2 77.04427903366667
y3 114.73896703366665
y4 133.7461217003333
b1 102.054955467
y1 134.044784167
b1 51.531115967
y1 67.526030317
b1 34.68983613366667
y1 45.353112366999994
b1 100.075690467
y1 138.066189167
b1 50.541483467
y1 69.536732817
b1 34.03008113366667
y1 46.693580700333335
b1 58.028740467
b2 129.065854467
b3 228.13426846700003
y1 129.102240167
y2 228.170654167
y3 299.20776816700004
b1 29.518008467
b2 65.036565467
b3 114.57077246700001
y1 65.054758317
y2 114.588965317
y3 150.10752231700002
b1 20.014431133666665
b2 43.69346913366667
b3 76.71627380033334
y1 43.70559770033333
y2 76.728402367
y3 100.407440367
b1 15.262642467000001
b2 33.021920967
b3 57.7890244670000

b1 88.039304467
b2 244.140415467
y1 138.066189167
y2 294.167300167
b1 44.523290466999995
b2 122.573845967
y1 69.536732817
y2 147.587288317
b1 30.017952467
b2 82.05165613366667
y1 46.693580700333335
y2 98.727284367
b1 22.765283467
b2 61.790561217
y1 35.272004642
y2 74.297282392
b1 72.044390467
b2 219.112804467
y1 148.060434167
y2 295.12884816699994
b1 36.525833467
b2 110.060040467
y1 74.533855317
y2 148.06806231699997
b1 24.686314467000003
b2 73.70911913366666
y1 50.024995700333335
y2 99.04780036699998
b1 18.766554967
b2 55.533658466999995
y1 37.770565892
y2 74.53766939199998
b1 132.047761467
b2 318.127074467
b3 465.195488467
y1 88.039305167
y2 235.10771916699997
y3 421.187032167
b1 66.52751896699999
b2 159.567175467
b3 233.101382467
y1 44.523290816999996
y2 118.05749781699998
y3 211.097154317
b1 44.68743813366666
b2 106.71387580033333
b3 155.736680467
y1 30.017952700333336
y2 79.04075736699998
y3 141.06719503366665
b1 33.767397716999994
b2 80.287225967
b3 117.054329467
y1 22.765283642


b1 58.028739967
y1 74.533855317
b1 39.021585467
y1 50.024995700333335
b1 29.518008217000002
y1 37.770565892
b1 115.050203467
y1 148.075691167
b1 58.028739967
y1 74.541483817
b1 39.021585467
y1 50.030081367
b1 29.518008217000002
y1 37.774380142
b1 138.066188467
b2 266.124766467
b3 379.208830467
y1 161.03792589059998
y2 274.1219898906
y3 402.1805678906
b1 69.536732467
b2 133.56602146699998
b3 190.108053467
y1 81.02260117879999
y2 137.5646331788
y3 201.5939221788
b1 46.693580467
b2 89.37977313366666
b3 127.07446113366666
y1 54.350826274866655
y2 92.04551427486666
y3 134.73170694153333
b1 88.039304467
y1 157.108388167
b1 44.523290466999995
y1 79.05783231699999
b1 30.017952467
y1 53.040980366999996
b1 22.765283467
y1 40.032554391999994
b1 72.044390467
y1 182.081170167
b1 36.525833467
y1 91.544223317
b1 24.686314467000003
y1 61.36524103366667
b1 18.766554967
y1 46.275749892
b1 15.214699267
y1 37.222055207000004
b1 12.846795467
y1 31.186258750333337
b1 114.091340467
b2 227.175404467
b3 340.25

b1 38.701964467
b2 71.05288580033333
b3 125.40732880033333
y1 50.030081367
y2 104.384524367
y3 136.73544570033334
b1 72.044390467
y1 114.091341167
b1 36.525833467
y1 57.549308817
b1 24.686314467000003
y1 38.701964700333335
b1 18.766554967
y1 29.278292642
b1 129.102239467
y1 134.044784167
b1 65.054757967
y1 67.526030317
b1 43.705597467
y1 45.353112366999994
b1 100.075690467
y1 157.108388167
b1 50.541483467
y1 79.05783231699999
b1 34.03008113366667
y1 53.040980366999996
b1 100.075690467
y1 157.108388167
b1 50.541483467
y1 79.05783231699999
b1 34.03008113366667
y1 53.040980366999996
b1 148.075690467
b2 261.15975446699997
b3 374.243818467
b4 431.26528246699996
b5 518.297310467
y1 148.060434167
y2 235.092462167
y3 292.113926167
y4 405.197990167
y5 518.2820541670001
b1 74.54148346699999
b2 131.08351546699998
b3 187.625547467
b4 216.13627946699998
b5 259.652293467
y1 74.533855317
y2 118.049869317
y3 146.560601317
y4 203.102633317
y5 259.64466531700003
b1 50.03008113366666
b2 87.72476913366665

b1 41.773108717
b2 70.044124717
b3 84.299490717
y1 34.266653391999995
y2 48.522019392
y3 76.793035392
b1 116.034219467
b2 213.08698346699998
y1 102.054956167
y2 199.10772016699997
b1 58.520747967
b2 107.04712996699999
y1 51.531116317
y2 100.05749831699998
b1 39.34959080033334
b2 71.70051213366666
y1 34.689836367000005
y2 67.04075770033332
b1 29.764012217
b2 54.02720321699999
y1 26.269196392
y2 50.53238739199999
b1 132.047761467
b2 245.131825467
b3 316.168939467
b4 472.270050467
y1 148.060434167
y2 304.161545167
y3 375.198659167
y4 488.28272316700003
b1 66.52751896699999
b2 123.069550967
b3 158.58810796699998
b4 236.638663467
y1 74.533855317
y2 152.584410817
y3 188.102967817
y4 244.644999817
b1 44.68743813366666
b2 82.38212613366666
b3 106.06116413366665
b4 158.09486780033333
y1 50.024995700333335
y2 102.05869936699999
y3 125.73773736700001
y4 163.432425367
b1 33.767397716999994
b2 62.038413717
b3 79.79769221699999
b4 118.82296996699999
y1 37.770565892
y2 76.795843642
y3 94.555122142
y4

b2 185.092068467
y1 129.065855167
y2 226.11861916700002
b1 44.523290466999995
b2 93.049672467
y1 65.036565817
y2 113.56294781700001
b1 30.017952467
b2 62.368873800333326
y1 43.693469367
y2 76.04439070033334
b1 22.765283467
b2 47.028474466999995
y1 33.021921142
y2 57.285112142
b1 114.091340467
b2 270.192451467
y1 134.044784167
y2 290.145895167
b1 57.549308466999996
b2 135.599863967
y1 67.526030317
y2 145.576585817
b1 38.701964467
b2 90.73566813366666
y1 45.353112366999994
y2 97.38681603366666
b1 29.278292467
b2 68.303570217
y1 34.266653391999995
y2 73.291931142
b1 114.091340467
b2 228.134267467
y1 129.102240167
y2 243.145167167
b1 57.549308466999996
b2 114.570771967
y1 65.054758317
y2 122.076221817
b1 38.701964467
b2 76.716273467
y1 43.70559770033333
y2 81.71990670033334
b1 29.278292467
b2 57.789024217
y1 33.031017391999995
y2 61.541749142
b1 157.108387467
b2 258.156066467
y1 100.075691167
y2 201.12337016700002
b1 79.057831967
b2 129.581671467
y1 50.541483817
y2 101.06532331700001
b1 53

In [6]:
print(len(test['masses_raw']))

840


In [6]:
# experiment files 
mzML = 'HEK293T_De_Novo_061122_Glu-C_B_BP_anyLength_HCD15.mzML'
log = 'App-2022-06-12_09-54-27.log'
realtime = 'HEK293T_De_Novo_061122_Glu-C_B_BP_anyLength_HCD15_realtimesearch.tsv'

In [7]:
df2, series2 = formatData(mzML, log, realtime)

Scans match up after taking rounding discrepencies into consideration
b1 102.054955467
b2 189.08698346699998
b3 302.171047467
y1 148.060434167
y2 261.144498167
y3 348.17652616699996
b1 51.531115967
b2 95.04712996699999
b3 151.589161967
y1 74.533855317
y2 131.075887317
y3 174.59190131699998
b1 34.68983613366667
b2 63.700512133666656
b3 101.39520013366666
y1 50.024995700333335
y2 87.71968370033333
y3 116.73035970033332
b1 26.269196217
b2 48.02720321699999
b3 76.298219217
y1 37.770565892
y2 66.041581892
y3 87.79958889199999
b1 164.070605467
b2 221.09206946700002
y1 148.060434167
y2 205.08189816700002
b1 82.538940967
b2 111.049672967
y1 74.533855317
y2 103.04458731700001
b1 55.361719467
b2 74.36887413366667
y1 50.024995700333335
y2 69.032150367
b1 41.773108717
b2 56.028474717
y1 37.770565892
y2 52.025931892
b1 114.091340467
b2 270.192451467
y1 134.044784167
y2 290.145895167
b1 57.549308466999996
b2 135.599863967
y1 67.526030317
y2 145.576585817
b1 38.701964467
b2 90.73566813366666
y1 45.35

y3 111.39872170033333
b1 22.765283467
b2 51.03629946699999
b3 68.79557796699999
y1 37.770565892
y2 55.529844392
y3 83.80086039199999
b1 58.028740467
b2 171.112804467
b3 258.144832467
y1 100.075691167
y2 187.10771916699997
y3 300.191783167
b1 29.518008467
b2 86.060040467
b3 129.576054467
y1 50.541483817
y2 94.05749781699998
y3 150.599529817
b1 20.014431133666665
b2 57.70911913366666
b3 86.71979513366666
y1 34.030081367
y2 63.04075736699999
y3 100.735445367
b1 15.262642467000001
b2 43.533658466999995
b3 65.291665467
y1 25.774380142000002
y2 47.53238714199999
y3 75.803403142
b1 115.050203467
b2 252.10911546699998
y1 72.044391167
y2 209.10330316699998
b1 58.028739967
b2 126.55819596699999
y1 36.525833817
y2 105.05528981699999
b1 98.060040467
b2 155.081504467
y1 148.060434167
y2 205.08189816700002
b1 49.533658466999995
b2 78.044390467
y1 74.533855317
y2 103.04458731700001
b1 98.060040467
b2 155.081504467
y1 148.060434167
y2 205.08189816700002
b1 49.533658466999995
b2 78.044390467
y1 74.5338

b2 145.060768467
b3 273.155731467
b4 360.187759467
y1 148.060434167
y2 235.092462167
y3 363.18742516699996
y4 420.20888916699994
b1 44.523290466999995
b2 73.034022467
b3 137.081503967
b4 180.597517967
y1 74.533855317
y2 118.049869317
y3 182.09735081699998
y4 210.60808281699997
b1 30.017952467
b2 49.025107133666666
b3 91.72342813366667
b4 120.73410413366666
y1 50.024995700333335
y2 79.03567170033334
y3 121.73399270033332
y4 140.74114736699997
b1 72.044390467
b2 129.065854467
b3 186.08731846700002
b4 243.10878246700003
y1 58.028741167
y2 115.050205167
y3 172.07166916699998
y4 229.09313316700002
b1 36.525833467
b2 65.036565467
b3 93.547297467
b4 122.05802946700001
y1 29.518008817000002
y2 58.028740817
y3 86.53947281699999
y4 115.05020481700001
b1 24.686314467000003
b2 43.69346913366667
b3 62.70062380033334
b4 81.70777846700001
y1 20.014431367
y2 39.02158603366667
y3 58.02874070033332
y4 77.03589536700001
b1 18.766554967
b2 33.021920967
b3 47.277286967
b4 61.532652967000004
y1 15.262642642

y1 74.533855317
b1 38.701964467
y1 50.024995700333335
b1 29.278292467
y1 37.770565892
b1 115.050203467
b2 243.10878146700003
b3 330.14080946700005
b4 458.19938746700007
b5 586.2943504670001
y1 147.112804167
y2 275.207767167
y3 403.266345167
y4 490.29837316699997
y5 618.356951167
b1 58.028739967
b2 122.05802896700001
b3 165.57404296700003
b4 229.60333196700003
b5 293.65081346700003
y1 74.060040317
y2 138.107521817
y3 202.136810817
y4 245.65282481699998
y5 309.682113817
b1 39.021585467
b2 81.70777813366668
b3 110.71845413366668
b4 153.40464680033335
b5 196.10296780033335
y1 49.70911903366667
y2 92.40744003366666
y3 135.09363270033333
y4 164.10430870033332
y5 206.79050136700002
b1 29.518008217000002
b2 61.532652717000005
b3 83.29065971700001
b4 115.30530421700001
b5 147.32904496700002
y1 37.533658392
y2 69.557399142
y3 101.572043642
y4 123.33005064199999
y5 155.344695142
b1 129.102239467
y1 72.044391167
b1 65.054757967
y1 36.525833817
b1 43.705597467
y1 24.686314700333337
b1 187.086589467

b4 98.04997946699999
b5 120.66679226699998
y1 20.417829407
y2 43.034642207
y3 70.446424607
y4 89.856977407
y5 117.268759807
b1 100.075690467
y1 129.065855167
b1 50.541483467
y1 65.036565817
b1 34.03008113366667
y1 43.693469367
b1 58.028740467
b2 157.097154467
b3 258.144833467
y1 148.060434167
y2 249.10811316700003
y3 348.176527167
b1 29.518008467
b2 79.052215467
b3 129.576054967
y1 74.533855317
y2 125.05769481700001
y3 174.591901817
b1 20.014431133666665
b2 53.03723580033333
b3 86.719795467
y1 50.024995700333335
y2 83.70755536700001
y3 116.73036003366667
b1 15.262642467000001
b2 40.029745967
b3 65.291665717
y1 37.770565892
y2 63.032485642000005
y3 87.799589142
b1 164.070605467
b2 235.107719467
y1 148.060434167
y2 219.097548167
b1 82.538940967
b2 118.057497967
y1 74.533855317
y2 110.052412317
b1 55.361719467
b2 79.040757467
y1 50.024995700333335
y2 73.70403370033334
b1 41.773108717
b2 59.532387217
y1 37.770565892
y2 55.529844392
b1 129.102239467
b2 230.14991846700002
b3 377.218332467
y1

b1 44.523290466999995
b2 88.039304467
b3 116.550036467
b4 190.084243467
b5 268.134798967
y1 67.526030317
y2 145.576585817
y3 219.11079281699998
y4 247.62152481699997
y5 291.137538817
b1 30.017952467
b2 59.028628467
b3 78.03578313366667
b4 127.05858780033333
b5 179.092291467
y1 45.353112366999994
y2 97.38681603366666
y3 146.4096207003333
y4 165.41677536699999
y5 194.427451367
b1 161.0379251906
y1 134.044784167
b1 81.0226008288
y1 67.526030317
b1 54.35082604153333
y1 45.353112366999994
b1 41.014938647899996
y1 34.266653391999995
b1 88.039304467
y1 148.060434167
b1 44.523290466999995
y1 74.533855317
b1 30.017952467
y1 50.024995700333335
b1 98.060040467
b2 226.118618467
b3 382.219729467
b4 479.272493467
y1 148.060434167
y2 245.113198167
y3 401.21430916699995
y4 529.272887167
b1 49.533658466999995
b2 113.562947467
b3 191.613502967
b4 240.139884967
y1 74.533855317
y2 123.060237317
y3 201.11079281699998
y4 265.140081817
b1 33.358197800333336
b2 76.044390467
b3 128.07809413366667
b4 160.429015

b1 129.065854467
b2 285.166965467
b3 398.251029467
y1 114.091341167
y2 227.175405167
y3 383.27651616699995
b1 65.036565467
b2 143.087120967
b3 199.629152967
y1 57.549308817
y2 114.091340817
y3 192.14189631699998
b1 43.69346913366667
b2 95.72717280033334
b3 133.42186080033335
y1 38.701964700333335
y2 76.39665270033333
y3 128.43035636699997
b1 58.028740467
b2 172.071667467
b3 300.166630467
b4 413.250694467
b5 541.345657467
y1 134.044784167
y2 262.139747167
y3 375.223811167
y4 503.318774167
y5 617.3617011670001
b1 29.518008467
b2 86.539471967
b3 150.586953467
b4 207.128985467
b5 271.176466967
y1 67.526030317
y2 131.573511817
y3 188.115543817
y4 252.163025317
y5 309.18448881700004
b1 20.014431133666665
b2 58.028740133666666
b3 100.72706113366667
b4 138.42174913366668
b5 181.12007013366667
y1 45.353112366999994
y2 88.051433367
y3 125.746121367
y4 168.444442367
y5 206.45875136700002
b1 15.262642467000001
b2 43.773374217
b3 75.797114967
b4 104.068130967
b5 136.091871717
y1 34.266653391999995


y1 72.044391167
y2 143.081505167
y3 299.182616167
y4 430.223101167
y5 558.281679167
b1 36.525833467
b2 100.555122467
b3 166.07536496699998
b4 244.125920467
b5 279.644477467
y1 36.525833817
y2 72.044390817
y3 150.094946317
y4 215.615188817
y5 279.644477817
b1 24.686314467000003
b2 67.37250713366667
b3 111.05266880033332
b4 163.086372467
b5 186.765410467
y1 24.686314700333337
y2 48.365352700333325
y3 100.399056367
y4 144.07921803366665
y5 186.76541070033332
b1 114.091340467
b2 215.139019467
b3 328.223083467
b4 488.2537321906
b5 644.3548431906
y1 148.060434167
y2 304.161545167
y3 464.1921938906
y4 577.2762578906
y5 678.3239368906001
b1 57.549308466999996
b2 108.073147967
b3 164.615179967
b4 244.6305043288
b5 322.6810598288
y1 74.533855317
y2 152.584410817
y3 232.5997351788
y4 289.1417671788
y5 339.66560667880003
b1 157.108387467
b2 214.129851467
b3 327.213915467
b4 398.25102946699997
y1 72.044391167
y2 143.081505167
y3 256.165569167
y4 313.187033167
b1 79.057831967
b2 107.568563967
b3 164

y1 69.536732817
y2 119.07093981699998
y3 168.605146817
y4 197.11587881699998
y5 247.639718317
b1 88.039304467
y1 148.060434167
b1 44.523290466999995
y1 74.533855317
b1 30.017952467
y1 50.024995700333335
b1 22.765283467
y1 37.770565892
b1 58.028740467
b2 186.123703467
b3 314.21866646700005
b4 427.30273046700006
b5 590.3660594670001
y1 148.060434167
y2 311.123763167
y3 424.207827167
y4 552.3027901670001
y5 680.3977531670001
b1 29.518008467
b2 93.565489967
b3 157.61297146700002
b4 214.15500346700003
b5 295.68666796700006
y1 74.533855317
y2 156.065519817
y3 212.607551817
y4 276.65503331700006
y5 340.70251481700006
b1 114.091340467
b2 228.134267467
y1 72.044391167
y2 186.08731816699998
b1 57.549308466999996
b2 114.570771967
y1 36.525833817
y2 93.54729731699999
b1 102.054955467
y1 129.065855167
b1 51.531115967
y1 65.036565817
b1 98.060040467
b2 211.14410446699998
y1 148.060434167
y2 261.144498167
b1 49.533658466999995
b2 106.07569046699999
y1 74.533855317
y2 131.075887317
b1 161.0379251906
y

b3 137.081503967
y1 49.533658816999996
y2 113.581140317
y3 142.091872317
b1 30.017952467
b2 49.025107133666666
b3 91.72342813366667
y1 33.35819803366667
y2 76.05651903366667
y3 95.06367370033333
b1 22.765283467
b2 37.020649467
b3 69.044390217
y1 25.270467642
y2 57.294208392
y3 71.549574392
b1 138.066188467
y1 148.060434167
b1 69.536732467
y1 74.533855317
b1 46.693580467
y1 50.024995700333335
b1 132.047761467
b2 245.131825467
b3 373.226788467
b4 460.258816467
b5 517.280280467
y1 132.04776216699997
y2 189.069226167
y3 276.101254167
y4 404.196217167
y5 517.280281167
b1 66.52751896699999
b2 123.069550967
b3 187.117032467
b4 230.63304646699999
b5 259.143778467
y1 66.52751931699999
y2 95.038251317
y3 138.554265317
y4 202.601746817
y5 259.143778817
b1 44.68743813366666
b2 82.38212613366666
b3 125.08044713366667
b4 154.09112313366666
b5 173.09827780033334
y1 44.68743836699999
y2 63.69459303366667
y3 92.70526903366665
y4 135.40359003366666
y5 173.09827803366667
b1 33.767397716999994
b2 62.03841

b1 114.091340467
b2 211.14410446699998
b3 298.176132467
b4 411.260196467
y1 148.060434167
y2 261.144498167
y3 348.17652616699996
y4 445.229290167
b1 57.549308466999996
b2 106.07569046699999
b3 149.591704467
b4 206.133736467
y1 74.533855317
y2 131.075887317
y3 174.59190131699998
y4 223.118283317
b1 38.701964467
b2 71.05288580033333
b3 100.06356180033333
b4 137.75824980033335
y1 50.024995700333335
y2 87.71968370033333
y3 116.73035970033332
y4 149.08128103366667
b1 29.278292467
b2 53.54148346699999
b3 75.299490467
b4 103.570506467
y1 37.770565892
y2 66.041581892
y3 87.79958889199999
y4 112.062779892
b1 115.050203467
b2 275.0808521906
b3 362.1128801906
y1 134.044784167
y2 221.07681216699999
y3 381.10746089059995
b1 58.028739967
b2 138.0440643288
b3 181.5600783288
y1 67.526030317
y2 111.04204431699999
y3 191.05736867879997
b1 39.021585467
b2 92.36513504153334
b3 121.37581104153332
y1 45.353112366999994
y2 74.363788367
y3 127.70733794153331
b1 138.066188467
b2 195.087652467
b3 323.182615467


y2 114.570772317
b1 72.044390467
b2 171.112804467
y1 114.091341167
y2 213.15975516700001
b1 36.525833467
b2 86.060040467
y1 57.549308817
y2 107.083515817
b1 24.686314467000003
b2 57.70911913366666
y1 38.701964700333335
y2 71.72476936700001
b1 72.044390467
b2 171.112804467
y1 114.091341167
y2 213.15975516700001
b1 36.525833467
b2 86.060040467
y1 57.549308817
y2 107.083515817
b1 98.060040467
y1 148.060434167
b1 49.533658466999995
y1 74.533855317
b1 148.075690467
b2 205.097154467
y1 58.028741167
y2 115.050205167
b1 74.54148346699999
b2 103.052215467
y1 29.518008817000002
y2 58.028740817
b1 98.060040467
y1 148.060434167
b1 49.533658466999995
y1 74.533855317
b1 33.358197800333336
y1 50.024995700333335
b1 187.086589467
b2 318.127074467
b3 432.170001467
b4 546.212928467
b5 674.307891467
y1 114.091341167
y2 242.186304167
y3 356.229231167
y4 470.27215816700004
y5 601.312643167
b1 94.046932967
b2 159.567175467
b3 216.588638967
b4 273.610102467
b5 337.657583967
y1 57.549308817
y2 121.596790317
y3

b1 24.686314467000003
b2 76.72001813366667
b3 114.41470613366666
y1 34.030081367
y2 71.72476936700001
y3 123.75847303366668
b1 98.060040467
b2 211.14410446699998
b3 298.176132467
b4 454.277243467
y1 148.060434167
y2 304.161545167
y3 391.193573167
y4 504.277637167
b1 49.533658466999995
b2 106.07569046699999
b3 149.591704467
b4 227.642259967
y1 74.533855317
y2 152.584410817
y3 196.100424817
y4 252.642456817
b1 33.358197800333336
b2 71.05288580033333
b3 100.06356180033333
b4 152.097265467
y1 50.024995700333335
y2 102.05869936699999
y3 131.069375367
y4 168.764063367
b1 25.270467467
b2 53.54148346699999
b3 75.299490467
b4 114.324768217
y1 37.770565892
y2 76.795843642
y3 98.553850642
y4 126.824866642
b1 114.091340467
b2 227.175404467
b3 355.270367467
y1 148.075691167
y2 276.17065416699995
y3 389.25471816699996
b1 57.549308466999996
b2 114.091340467
b3 178.138821967
y1 74.541483817
y2 138.58896531699997
y3 195.13099731699998
b1 38.701964467
b2 76.396652467
b3 119.094973467
y1 50.030081367
y2 

y4 139.312435142
y5 167.583451142
b1 102.054955467
b2 199.10771946699998
b3 312.191783467
b4 440.250361467
b5 568.3453244670001
y1 148.060434167
y2 276.155397167
y3 404.213975167
y4 517.2980391670001
y5 614.3508031670001
b1 51.531115967
b2 100.05749796699999
b3 156.599529967
b4 220.628818967
b5 284.67630046700003
y1 74.533855317
y2 138.581336817
y3 202.610625817
y4 259.15265781700003
y5 307.67903981700005
b1 34.68983613366667
b2 67.04075746699999
b3 104.73544546699999
b4 147.42163813366668
b5 190.1199591336667
y1 50.024995700333335
y2 92.72331670033333
y3 135.409509367
y4 173.104197367
y5 205.45511870033337
b1 26.269196217
b2 50.53238721699999
b3 78.803403217
b4 110.818047717
b5 142.84178846700001
y1 37.770565892
y2 69.794306642
y3 101.808951142
y4 130.07996714200002
y5 154.34315814200002
b1 98.060040467
b2 235.11895246699999
b3 382.18736646699995
y1 134.044784167
y2 281.11319816699995
y3 418.17211016699997
b1 49.533658466999995
b2 118.06311446699999
b3 191.59732146699997
y1 67.5260303

y3 86.04791203366666
b1 25.774379967
b2 50.03757096699999
b3 67.79684946699999
y1 22.765283642
y2 40.52456214199999
y3 64.787753142
b1 98.060040467
b2 155.081504467
y1 148.060434167
y2 205.08189816700002
b1 49.533658466999995
b2 78.044390467
y1 74.533855317
y2 103.04458731700001
b1 129.102239467
y1 134.044784167
b1 65.054757967
y1 67.526030317
b1 43.705597467
y1 45.353112366999994
b1 33.031017217
y1 34.266653391999995
b1 26.626269067000003
y1 27.614778007
b1 114.091340467
b2 270.192451467
y1 114.091341167
y2 270.192452167
b1 57.549308466999996
b2 135.599863967
y1 57.549308817
y2 135.599864317
b1 38.701964467
b2 90.73566813366666
y1 38.701964700333335
y2 90.735668367
b1 29.278292467
b2 68.303570217
y1 29.278292642
y2 68.303570392
b1 23.624089267
b2 54.844311467
y1 23.624089407
y2 54.844311606999995
b1 148.075690467
y1 148.060434167
b1 74.54148346699999
y1 74.533855317
b1 50.03008113366666
y1 50.024995700333335
b1 37.774379966999994
y1 37.770565892
b1 114.091340467
b2 270.192451467
y1 11

b1 138.066188467
y1 134.044784167
b1 69.536732467
y1 67.526030317
b1 114.091340467
b2 227.175404467
b3 383.27651546699997
b4 530.3449294669999
y1 72.044391167
y2 219.11280516699998
y3 375.213916167
y4 488.297980167
b1 57.549308466999996
b2 114.091340467
b3 192.14189596699998
b4 265.67610296699996
y1 36.525833817
y2 110.06004081699999
y3 188.110596317
y4 244.652628317
b1 38.701964467
b2 76.396652467
b3 128.43035613366666
b4 177.4531608003333
y1 24.686314700333337
y2 73.70911936699999
y3 125.74282303366665
y4 163.43751103366665
b1 129.102239467
y1 130.049870167
b1 65.054757967
y1 65.528573317
b1 43.705597467
y1 44.021474366999996
b1 33.031017217
y1 33.267924891999996
b1 157.108387467
b2 286.150980467
y1 98.060041167
y2 227.10263416700002
b1 79.057831967
b2 143.579128467
y1 49.533658816999996
y2 114.05495531700001
b1 53.040980133666665
b2 96.05517780033334
y1 33.35819803366667
y2 76.37239570033334
b1 40.032554217
b2 72.293202467
y1 25.270467642
y2 57.531115892
b1 114.091340467
y1 148.0604

y4 81.84541920699999
y5 96.052842007
b1 114.091340467
b2 185.12845446699998
b3 286.176133467
b4 414.234711467
y1 102.054956167
y2 230.113534167
y3 331.161213167
y4 402.198327167
b1 57.549308466999996
b2 93.06786546699999
b3 143.591704967
b4 207.620993967
y1 51.531116317
y2 115.560405317
y3 166.084244817
y4 201.602801817
b1 38.701964467
b2 62.381002466999995
b3 96.06356213366666
b4 138.74975480033333
y1 34.689836367000005
y2 77.37602903366667
y3 111.05858870033335
y4 134.73762670033332
b1 29.278292467
b2 47.03757096699999
b3 72.299490717
b4 104.314135217
y1 26.269196392
y2 58.283840892
y3 83.545760642
y4 101.305039142
b1 98.060040467
b2 155.081504467
b3 292.140416467
b4 455.20374546700003
y1 148.060434167
y2 311.123763167
y3 448.182675167
y4 505.204139167
b1 49.533658466999995
b2 78.044390467
b3 146.573846467
b4 228.10551096700001
y1 74.533855317
y2 156.065519817
y3 224.594975817
y4 253.105707817
b1 33.358197800333336
b2 52.365352467
b3 98.051656467
b4 152.40609946700002
y1 50.024995700

b1 58.028740467
b2 129.065854467
b3 228.13426846700003
y1 129.102240167
y2 228.170654167
y3 299.20776816700004
b1 29.518008467
b2 65.036565467
b3 114.57077246700001
y1 65.054758317
y2 114.588965317
y3 150.10752231700002
b1 20.014431133666665
b2 43.69346913366667
b3 76.71627380033334
y1 43.70559770033333
y2 76.728402367
y3 100.407440367
b1 15.262642467000001
b2 33.021920967
b3 57.789024467000004
y1 33.031017391999995
y2 57.798120892
y3 75.55739939200001
b1 88.039304467
y1 148.075691167
b1 44.523290466999995
y1 74.541483817
b1 30.017952467
y1 50.030081367
b1 129.065854467
b2 260.106339467
b3 331.143453467
y1 72.044391167
y2 143.081505167
y3 274.121990167
b1 65.036565467
b2 130.556807967
b3 166.07536496699998
y1 36.525833817
y2 72.044390817
y3 137.564633317
b1 43.69346913366667
b2 87.37363080033333
b3 111.05266880033332
y1 24.686314700333337
y2 48.365352700333325
y3 92.045514367
b1 33.021920967
b2 65.782042217
b3 83.54132071699999
y1 18.766555142
y2 36.525833641999995
y3 69.28595489199999

b1 100.075690467
b2 213.159754467
b3 284.196868467
y1 100.075691167
y2 171.11280516699998
y3 284.196869167
b1 50.541483467
b2 107.083515467
b3 142.602072467
y1 50.541483817
y2 86.06004081699999
y3 142.602072817
b1 34.03008113366667
b2 71.72476913366667
b3 95.40380713366666
y1 34.030081367
y2 57.70911936699999
y3 95.403807367
b1 25.774379967
b2 54.045395967
b3 71.804674467
y1 25.774380142000002
y2 43.53365864199999
y3 71.804674642
b1 72.044390467
b2 235.107719467
y1 134.044784167
y2 297.108113167
b1 36.525833467
b2 118.057497967
y1 67.526030317
y2 149.057694817
b1 24.686314467000003
b2 79.040757467
y1 45.353112366999994
y2 99.707555367
b1 72.044390467
b2 129.065854467
b3 242.14991846700002
y1 114.091341167
y2 227.175405167
y3 284.196869167
b1 36.525833467
b2 65.036565467
b3 121.57859746700001
y1 57.549308817
y2 114.091340817
y3 142.602072817
b1 24.686314467000003
b2 43.69346913366667
b3 81.38815713366667
y1 38.701964700333335
y2 76.39665270033333
y3 95.403807367
b1 18.766554967
b2 33.02

b1 132.047761467
y1 114.091341167
b1 66.52751896699999
y1 57.549308817
b1 44.68743813366666
y1 38.701964700333335
b1 114.091340467
b2 171.112804467
b3 318.181218467
b4 419.228897467
b5 516.281661467
y1 134.044784167
y2 231.09754816699999
y3 332.145227167
y4 479.21364116699993
y5 536.235105167
b1 57.549308466999996
b2 86.060040467
b3 159.594247467
b4 210.118086967
b5 258.644468967
y1 67.526030317
y2 116.05241231699999
y3 166.57625181699999
y4 240.11045881699997
y5 268.621190817
b1 38.701964467
b2 57.70911913366666
b3 106.73192380033333
b4 140.414483467
b5 172.76540480033336
y1 45.353112366999994
y2 77.70403370033333
y3 111.38659336699999
y4 160.40939803366663
y5 179.41655270033334
b1 29.278292467
b2 43.533658466999995
b3 80.300761967
b4 105.562681717
b5 129.825872717
y1 34.266653391999995
y2 58.529844391999994
y3 83.79176414199999
y4 120.55886764199998
y5 134.814233642
b1 23.624089267
b2 35.028382067
b3 64.442064867
b4 84.651600667
b5 104.062153467
y1 27.614778007
y2 47.025330807
y3 67.

b5 146.099696967
y1 37.770565892
y2 66.041581892
y3 105.066859642
y4 133.337875642
y5 158.599795392
b1 98.060040467
b2 226.155003467
b3 354.24996646700004
b4 455.29764546700005
y1 148.060434167
y2 249.10811316700003
y3 377.203076167
y4 505.298039167
b1 49.533658466999995
b2 113.581139967
b3 177.62862146700002
b4 228.15246096700002
y1 74.533855317
y2 125.05769481700001
y3 189.105176317
y4 253.152657817
b1 33.358197800333336
b2 76.05651880033334
b3 118.75483980033334
b4 152.437399467
y1 50.024995700333335
y2 83.70755536700001
y3 126.405876367
y4 169.104197367
b1 25.270467467
b2 57.294208217
b3 89.317948967
b4 114.57986871700001
y1 37.770565892
y2 63.032485642000005
y3 95.056226392
y4 127.079967142
b1 20.417829267
b2 46.036821867
b3 71.655814467
b4 91.86535026700001
y1 30.417908007000005
y2 50.627443807000006
y3 76.246436407
y4 101.865429007
b1 72.044390467
b2 143.081504467
b3 214.118618467
b4 285.155732467
y1 72.044391167
y2 143.081505167
y3 214.11861916700002
y4 285.155733167
b1 36.5258

b2 334.15500346700003
b3 481.223417467
y1 129.102240167
y2 276.17065416699995
y3 423.2390681669999
b1 94.046932967
b2 167.581139967
b3 241.115346967
y1 65.054758317
y2 138.58896531699997
y3 212.12317231699996
b1 63.03371413366667
b2 112.05651880033334
b3 161.079323467
y1 43.70559770033333
y2 92.72840236699999
y3 141.75120703366665
b1 102.054955467
b2 215.139019467
b3 302.171047467
b4 389.20307546699996
b5 486.255839467
y1 132.04776216699997
y2 229.100526167
y3 316.13255416699997
y4 403.16458216699993
y5 516.248646167
b1 51.531115967
b2 108.073147967
b3 151.589161967
b4 195.10517596699998
b5 243.631557967
y1 66.52751931699999
y2 115.053901317
y3 158.56991531699998
y4 202.08592931699997
y5 258.627961317
b1 34.68983613366667
b2 72.38452413366666
b3 101.39520013366666
b4 130.40587613366665
b5 162.75679746699998
y1 44.68743836699999
y2 77.03835970033333
y3 106.04903570033332
y4 135.0597117003333
y5 172.75439970033332
b1 26.269196217
b2 54.540212217
b3 76.298219217
b4 98.05622621699999
b5 12

b2 209.103302467
y1 148.060434167
y2 219.097548167
b1 69.536732467
b2 105.055289467
y1 74.533855317
y2 110.052412317
b1 46.693580467
b2 70.372618467
y1 50.024995700333335
y2 73.70403370033334
b1 35.272004466999995
b2 53.031282966999996
y1 37.770565892
y2 55.529844392
b1 72.044390467
b2 235.107719467
b3 363.20268246700005
b4 500.261594467
y1 148.060434167
y2 285.11934616699995
y3 413.21430916699995
y4 576.277638167
b1 36.525833467
b2 118.057497967
b3 182.10497946700002
b4 250.634435467
y1 74.533855317
y2 143.06331131699997
y3 207.11079281699998
y4 288.642457317
b1 24.686314467000003
b2 79.040757467
b3 121.73907846700001
b4 167.425382467
y1 50.024995700333335
y2 95.71129970033331
y3 138.4096207003333
y4 192.7640637003333
b1 18.766554967
b2 59.532387217
b3 91.55612796700001
b4 125.820855967
y1 37.770565892
y2 72.03529389199998
y3 104.05903464199999
y4 144.824866892
b1 138.066188467
y1 132.10190516699998
b1 69.536732467
y1 66.55459081699999
b1 46.693580467
y1 44.70548603366666
b1 35.272004

In [10]:
print(len(test['masses_raw']))

809


In [11]:
# experiment files 
mzML = 'HEK293T_De_Novo_061122_Glu-C_B_BP_anyLength_HCD20.mzML'
log = 'App-2022-06-12_07-05-17.log'
realtime = 'HEK293T_De_Novo_061122_Glu-C_B_BP_anyLength_HCD20_realtimesearch.tsv'

In [12]:
df3, series3 = formatData(mzML, log, realtime)

Scans match up after taking rounding discrepencies into consideration


In [13]:
print(len(test['masses_raw']))

703


In [14]:
# experiment files 
mzML = 'HEK293T_De_Novo_061122_Glu-C_B_BP_anyLength_HCD30.mzML'
log = 'App-2022-06-12_22-28-53.log'
realtime = 'HEK293T_De_Novo_061122_Glu-C_B_BP_anyLength_HCD30_realtimesearch.tsv'

In [15]:
df4, series4 = formatData(mzML, log, realtime)

In [16]:
# experiment files 
mzML = 'HEK293T_De_Novo_061122_Glu-C_B_BP_anyLength_HCD35.mzML'
log = 'App-2022-06-11_21-20-46.log'
realtime = 'HEK293T_De_Novo_061122_Glu-C_B_BP_anyLength_HCD35_realtimesearch.tsv'

In [17]:
df5, series5 = formatData(mzML, log, realtime)

Scans match up after taking rounding discrepencies into consideration


In [18]:
print(len(test['masses_raw']))

1294


In [19]:
# experiment files 
mzML = 'HEK293T_De_Novo_061122_Glu-C_B_BP_anyLength2_HCD35.mzML'
log = 'App-2022-06-12_19-59-11.log'
realtime = 'HEK293T_De_Novo_061122_Glu-C_B_BP_anyLength2_HCD35_realtimesearch.tsv'

In [20]:
df6, series6 = formatData(mzML, log, realtime)

In [21]:
# experiment files 
mzML = 'HEK293T_De_Novo_061122_Glu-C_B_BP_anyLength_HCD40.mzML'
log = 'App-2022-06-12_16-55-56.log'
realtime = 'HEK293T_De_Novo_061122_Glu-C_B_BP_anyLength_HCD40_realtimesearch.tsv'

In [22]:
df7, series7 = formatData(mzML, log, realtime)

Scans match up after taking rounding discrepencies into consideration


In [23]:
print(len(test['masses_raw']))

914


In [18]:
# combine all dataframes 
all_dfs = [df1, df2, df3, df4, df5, df6, df7]
concat_df = pd.concat(all_dfs).reset_index(drop=True)

In [19]:
concat_df

Unnamed: 0,masses_raw,intensities_raw,charge,modified_sequence,mass_analyzer,collision_energy,target_fragment,matches_charge1,masses_the_charge1,masses_raw_charge1,...,masses_raw_charge4,intensities_raw_charge4,matches_charge5,masses_the_charge5,masses_raw_charge5,intensities_raw_charge5,matches_charge6,masses_the_charge6,masses_raw_charge6,intensities_raw_charge6
0,54.40834045410156 72.08840942382812 72.6154479...,1847.457 1044.1062 944.0088 927.47253 941.6198...,2,SE,FTMS,10,y2,y1,148.060434167,148.0601806640625,...,,,,,,,,,,
1,54.40696716308594 54.40972137451172 70.0648117...,1802.849 918.2612 4112.844 878.95984 1029.2969...,2,IP,FTMS,10,b2,y1,98.060041167,98.06021118164062,...,,,,,,,,,,
2,53.29237365722656 54.40982437133789 58.5242881...,848.94354 1105.8538 1257.8536 921.262 1054.482...,2,TK,FTMS,10,b2,y1,129.102240167,129.1013946533203,...,,,,,,,,,,
3,62.79367446899414 66.73774719238281 68.6735992...,910.83417 941.222 894.2407 840.0121 787.6718 8...,2,TK,FTMS,10,b2,y1,129.102240167,129.102294921875,...,,,,,,,,,,
4,50.54601287841797 54.40740966796875 54.4102096...,1046.0387 1604.429 1038.8164 902.5171 960.406 ...,2,SE,FTMS,10,y2,y1,148.060434167,148.0605010986328,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223,54.408470153808594 70.06488800048828 70.590484...,3260.9204 6078.789 2339.1902 10882.506 2424.62...,4,PKKTE,FTMS,40,y5,b2,226.155003467,226.1552276611328,...,,,,,,,,,,
224,53.899383544921875 54.40927505493164 66.511924...,2461.784 3039.2222 2540.4285 8154.4355 2572.69...,5,PKKTE,FTMS,40,y5,b2,226.155003467,226.15432739257812,...,,,,,,,,,,
225,54.40829086303711 70.06476593017578 84.0803070...,3128.156 7825.7524 15576.507 2519.0374 2384.07...,4,PKKTE,FTMS,40,y5,b2;y3,226.155003467;377.203076167,226.15536499023438;377.2024841308594,...,,,,,,,,,,
226,54.40895080566406 59.92170333862305 70.0650405...,945.8575 1072.6782 2115.838 964.55164 2905.835...,5,PKKTE,FTMS,40,y5,b2,226.155003467,226.15496826171875,...,,,,,,,,,,


In [35]:
len(df5['masses_raw'])

24

In [36]:
df5

Unnamed: 0,masses_raw,intensities_raw,charge,modified_sequence,mass_analyzer,collision_energy,target_fragment,matches_charge1,masses_the_charge1,masses_raw_charge1,...,masses_raw_charge4,intensities_raw_charge4,matches_charge5,masses_the_charge5,masses_raw_charge5,intensities_raw_charge5,matches_charge6,masses_the_charge6,masses_raw_charge6,intensities_raw_charge6
89,55.17504119873047 55.54441452026367 74.0261840...,932.8862 892.2754 861.7105 946.59955 1154.0492...,4,IK,FTMS,40,b2,y1,129.102240167,129.10296630859375,...,,,,,,,,,,
94,54.1179084777832 54.406646728515625 54.4093055...,901.92676 1791.4514 1094.369 908.56146 875.209...,4,KGTW,FTMS,40,b4,y2,288.134269167,288.13250732421875,...,,,,,,,,,,
143,50.78153991699219 54.40787887573242 59.3819236...,821.8711 2254.3599 841.2114 959.0527 910.83997...,4,VQQD,FTMS,40,y4,y1,134.044784167,134.04466247558594,...,,,,,,,,,,
152,50.86899948120117 54.40662384033203 57.6849937...,904.1217 2102.2583 965.2841 2001.3381 923.6167...,3,KE,FTMS,40,y2,b1,129.102239467,129.1021270751953,...,,,,,,,,,,
273,54.40819549560547 59.286983489990234 88.021278...,2060.0818 919.87756 1835.3441 2854.8618 4727.2...,2,CD,FTMS,40,y2,y1,134.044784167,134.04437255859375,...,,,,,,,,,,
294,54.39556884765625 65.1775131225586 66.88154602...,1038.9049 965.1095 1054.258 1038.2192 10563.46...,2,KCD,FTMS,40,y3,b1,129.102239467,129.1019287109375,...,,,,,,,,,,
297,54.406497955322266 54.4093132019043 57.8580627...,1386.6691 1342.8248 947.1976 892.7618 901.8017...,4,PR,FTMS,40,b2,b1,98.060040467,98.05977630615234,...,,,,,,,,,,
323,51.343997955322266 52.859039306640625 54.40716...,886.34045 1028.2684 1461.3489 981.7693 1041.94...,2,PPPKE,FTMS,40,y5,b1,98.060040467,98.05935668945312,...,,,,,,,,,,
434,54.40701675415039 65.02772521972656 70.1387329...,1355.3816 1076.7108 1128.4684 1132.9642 2520.1...,3,SHQ,FTMS,40,b3,b2,225.098216467,225.09890747070312,...,,,,,,,,,,
466,54.40760040283203 74.05976104736328 84.0441513...,1652.8474 2806.077 2729.4895 1378.8862 1064.59...,4,TE,FTMS,40,b2,b1,102.054955467,102.0545654296875,...,,,,,,,,,,


In [20]:
# concatenate all spectra (series) as well
all_series = [series1, series2, series3, series4, series5, series6, series7]
concat_series = pd.concat(all_series).reset_index(drop=True)
concat_series

0      <pyopenms.pyopenms_5.MSSpectrum object at 0x00...
1      <pyopenms.pyopenms_5.MSSpectrum object at 0x00...
2      <pyopenms.pyopenms_5.MSSpectrum object at 0x00...
3      <pyopenms.pyopenms_5.MSSpectrum object at 0x00...
4      <pyopenms.pyopenms_5.MSSpectrum object at 0x00...
                             ...                        
223    <pyopenms.pyopenms_5.MSSpectrum object at 0x00...
224    <pyopenms.pyopenms_5.MSSpectrum object at 0x00...
225    <pyopenms.pyopenms_5.MSSpectrum object at 0x00...
226    <pyopenms.pyopenms_5.MSSpectrum object at 0x00...
227    <pyopenms.pyopenms_5.MSSpectrum object at 0x00...
Length: 228, dtype: object

In [23]:
# to run csv function 'charge' needs to be replaced with 'precursor_charge' ?
concat_df.rename(columns = {'charge':'precursor_charge'}, inplace = True)
# create dictionary of data
data = csv_training(concat_df, concat_series)
print(data)
# convert dictionary data to hdf5 
to_hdf5(data, 'hdf5_data.hdf5')

[[148.06043417  74.53385532  -1.          88.03930447  44.52329047
   -1.          -1.          -1.          -1.          -1.
   -1.          -1.          -1.          -1.          -1.
   -1.          -1.          -1.          -1.          -1.
   -1.          -1.          -1.          -1.          -1.
   -1.          -1.          -1.          -1.          -1.
   -1.          -1.          -1.          -1.          -1.
   -1.          -1.          -1.          -1.          -1.
   -1.          -1.          -1.          -1.          -1.
   -1.          -1.          -1.          -1.          -1.
   -1.          -1.          -1.          -1.          -1.
   -1.          -1.          -1.          -1.          -1.
   -1.          -1.          -1.          -1.          -1.
   -1.          -1.          -1.          -1.          -1.
   -1.          -1.          -1.          -1.          -1.
   -1.          -1.          -1.          -1.          -1.
   -1.          -1.          -1.          -1.   

  array = array / maxima[:, numpy.newaxis]


In [25]:
len(data['collision_energy_aligned_normed'])

228