In [1]:
from urllib.request import urlopen
from IPython.display import SVG
import matplotlib.pyplot as plt
from rdkit import Chem
from tqdm import tqdm
import pandas as pd
import xlsxwriter
import argparse
import pickle
import numpy as np
import json

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import visualizer as visualizer
import utils as utils
import handle_network as hn
import fragmentation_py as fragmentation_py
import library_downloader as library_downloader
import SiteLocator as modSite

In [2]:
libraries = {
    "GNPS-MSMLS": "https://external.gnps2.org/gnpslibrary/GNPS-MSMLS.json",
    "GNPS-NIH-NATURALPRODUCTSLIBRARY_ROUND2_POSITIVE": "https://external.gnps2.org/gnpslibrary/GNPS-NIH-NATURALPRODUCTSLIBRARY_ROUND2_POSITIVE.json",
    "GNPS-NIH-SMALLMOLECULEPHARMACOLOGICALLYACTIVE": "https://external.gnps2.org/gnpslibrary/GNPS-NIH-SMALLMOLECULEPHARMACOLOGICALLYACTIVE.json",
    "MIADB": "https://external.gnps2.org/gnpslibrary/MIADB.json",
    "BERKELEY-LAB": "https://external.gnps2.org/gnpslibrary/BERKELEY-LAB.json"
    # "GNPS-LIBRARY": "https://gnps-external.ucsd.edu/gnpslibrary/GNPS-LIBRARY.json"
}

In [3]:
library ="BERKELEY-LAB"
if not os.path.exists( os.path.join("../data/libraries", library)):
    url = "https://gnps-external.ucsd.edu/gnpslibrary/" + library + ".json"
    location = "../data/libraries/" + library + "/"
    library_downloader.download(url, location, 0.5, 0.1)

with open(os.path.join("../data/libraries", library, "data_dict_filtered.pkl"), "rb") as f:
    data_dict_filtered = pickle.load(f)

# load matches
with open(os.path.join("../data/libraries", library, "matches.pkl"), "rb") as f:
    matches = pickle.load(f)

# load cachedStructures_filtered
with open(os.path.join("../data/libraries", library, "cachedStructures.pkl"), "rb") as f:
    cachedStructures_filtered = pickle.load(f)

In [4]:
print (matches[1].pop())

('CCMSLIB00010113508', 'CCMSLIB00010121997')


In [5]:
print (data_dict_filtered['CCMSLIB00010105110']['Precursor_MZ'], data_dict_filtered['CCMSLIB00010114866']['Precursor_MZ'])

253.051 237.056


In [6]:
## create cashe helpers
helpers = dict()
for match in matches[1]:
    if match[0] not in helpers:
        helpers[match[0]] = []
    helpers[match[0]].append(match[1])

print(len(helpers))

6778


In [7]:
colums = ["mol1ID", "mol2ID", "mol1smile", "mol2smile", "delta_mass",
        "#_matched_peaks", "#_shifted_peaks", "#_unshifted_peaks", 
        "Closest_Max_Atom_Distance", "Count_Max", "Is_Max", "cosine", 
        "score", "best_score", "random_guess", "random_prob", "url"]




In [8]:
print (data_dict_filtered['CCMSLIB00010105110'].keys())
print(type(json.loads(data_dict_filtered['CCMSLIB00010105110']['peaks_json'])[0][1]))

dict_keys(['spectrum_id', 'source_file', 'task', 'scan', 'ms_level', 'library_membership', 'spectrum_status', 'peaks_json', 'splash', 'submit_user', 'Compound_Name', 'Ion_Source', 'Compound_Source', 'Instrument', 'PI', 'Data_Collector', 'Adduct', 'Scan', 'Precursor_MZ', 'ExactMass', 'Charge', 'CAS_Number', 'Pubmed_ID', 'Smiles', 'INCHI', 'INCHI_AUX', 'Library_Class', 'SpectrumID', 'Ion_Mode', 'create_time', 'task_id', 'user_id', 'InChIKey_smiles', 'InChIKey_inchi', 'Formula_smiles', 'Formula_inchi', 'url', 'annotation_history'])
<class 'float'>


In [9]:
m0, m1 = matches[1].pop()
molMol = cachedStructures_filtered[m1]
modifMol = cachedStructures_filtered[m0]
molUsi = hn.generate_usi(m1, library)
modifUsi = hn.generate_usi(m0, library)
molSmiles = data_dict_filtered[m1]['Smiles']
modifSmiles = data_dict_filtered[m0]['Smiles']
site = modSite.SiteLocator(data_dict_filtered[m1], data_dict_filtered[m0], molMol)

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[78.001022, 2845.0], [82.384644, 3093.0], [82.544777, 2218.0], [98.922401, 2464.0], [115.963432, 2368.0], [122.392197, 2604.0], [130.065186, 17522.0], [146.059952, 15551.0], [157.947876, 3406.0], [158.060211, 32395.0], [231.112534, 30293.0], [243.112549, 296748.0], [271.10733, 1036091.0], [284.137878, 6126.0]]


In [25]:
df = pd.DataFrame(columns=colums)
count = 0
for match in tqdm(matches[1]):
    if count > 100:
        break
    try:
        
        m0, m1 = match
        if data_dict_filtered[m0]['Adduct'] != data_dict_filtered[m1]['Adduct'] or data_dict_filtered[m0]['Adduct'] != "M+H":
            continue
        molMol = cachedStructures_filtered[m1]
        modifMol = cachedStructures_filtered[m0]
        molUsi = hn.generate_usi(m1, library)
        modifUsi = hn.generate_usi(m0, library)
        molSmiles = data_dict_filtered[m1]['Smiles']
        modifSmiles = data_dict_filtered[m0]['Smiles']
        site = modSite.SiteLocator(data_dict_filtered[m1], data_dict_filtered[m0], molSmiles)
        modifLoc = utils.calculateModificationSites(modifMol, molMol, False)
        peak_presence_only = False
        combine = True
        # calculate score
        res = site.accuracy_score(modifLoc[0], peak_presence_only=peak_presence_only, combine=combine, return_all=True)

        # generate random probability array 1-hot
        prob = np.zeros(site.molMol.GetNumAtoms())
        randInt = np.random.randint(0, site.molMol.GetNumAtoms())
        prob[randInt] = 1
        res2 = site.tempScore(modifLoc[0], prob, True)

        # generate random probability array distribution
        prb = np.random.rand(site.molMol.GetNumAtoms())
        prb = prb / prb.sum()
        res3 = site.tempScore(modifLoc[0], prb, True)

        # get max score
        maxScore = site.get_max_possible_score(modifLoc[0], peak_presence_only=peak_presence_only, combine=combine)
        
    # "mol1ID", "mol2ID", "mol1smile", "mol2smile", "delta_mass",
    #         "#_matched_peaks", "#_shifted_peaks", "#_unshifted_peaks", 
    #         "Closest_Max_Atom_Distance", "Count_Max", "Is_Max", "cosine", 
    #         "score", "best_score", "random_guess", "random_prob", "url"

        df = pd.concat([df, pd.DataFrame.from_records([{"mol1ID": molUsi, "mol2ID": modifUsi, "mol1smile": molSmiles, "mol2smile": data_dict_filtered[m0]['Smiles'], 
                                                        "delta_mass": abs(float(data_dict_filtered[m0]['Precursor_MZ']) - float(data_dict_filtered[m1]['Precursor_MZ'])),
                                                        "#_matched_peaks": len(site.matchedPeaks), "#_shifted_peaks": len(site.shifted), "#_unshifted_peaks": len(site.unshifted),
                                                        "Closest_Max_Atom_Distance": res['closestMaxAtomDistance'], "Count_Max": res['count'], "Is_Max": res['isMax'], "cosine":site.cosine, 
                                                        "score": res['score'], "best_score": maxScore, "random_guess":res2['score'], "random_prob":res3['score'], 
                                                        "url":visualizer.make_url("http://reza.cs.ucr.edu/", molUsi, modifUsi, molSmiles, modifSmiles, args=None) }])], ignore_index=True)
        count += 1
    except:
        # print stack trace
        # import traceback
        # traceback.print_exc()
        pass


  0%|          | 0/27814 [00:00<?, ?it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[65.039177, 24347.0], [82.393547, 3083.0], [85.268768, 3123.0], [91.054413, 3355371.0], [115.113907, 3687.0], [123.31913, 2949.0], [146.058929, 3839.0], [167.440704, 3226.0], [174.092636, 3187.0], [183.101181, 2894.0], [188.07045, 40210.0], [202.086182, 38907.0], [262.190186, 3757.0], [280.132996, 3297619.0]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[53.038921, 4090.0], [55.018261, 5393.0], [57.03442, 2462.0], [65.038986, 18198.0], [79.05455, 6835.0], [84.960037, 26482.0], [89.039093, 4122.0], [91.054367, 218576.0], [92.375977, 3493.0], [103.053963, 13694.0], [105.033188, 4456.0], [105.069847, 3836.0], [107.04892, 14861.0], [115.054153, 3748.0], [118.04129, 19884.0], [119.01255, 28974.0], [119.049149, 133963

  0%|          | 13/27814 [00:00<14:29, 31.96it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[53.002739, 10768.0], [56.219601, 10511.0], [63.86161, 11151.0], [65.038986, 42848.0], [66.740646, 10660.0], [67.018257, 894382.0], [68.997498, 505966.0], [69.033836, 18342.0], [79.209641, 10321.0], [81.034012, 26202.0], [83.013077, 11728.0], [88.964897, 9779.0], [90.176567, 8972.0], [91.01799, 10656.0], [91.054359, 638594.0], [95.049522, 20987.0], [97.028236, 50280.0], [104.284142, 13776.0], [109.028542, 25437.0], [109.318283, 11670.0], [111.007927, 79095.0], [112.026367, 10533.0], [119.049187, 3056672.0], [121.028389, 843583.0], [125.023193, 54233.0], [127.038757, 37325.0], [129.018066, 248785.0], [131.049484, 38886.0], [141.070236, 108479.0], [143.034332, 16912.0], [145.028397, 799800.0], [145.064423, 23471.0], [149.023224, 18516.0], [153.018219, 6555352.0], [155.048508, 17626.0], [157.064178, 44445.0], [161.060638, 14973.0


invalid value encountered in divide



{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[53.03965, 2420.0], [79.05468, 10387.0], [92.367668, 4018.0], [96.605949, 2448.0], [97.008278, 4185.0], [99.408318, 2426.0], [103.054024, 7433.0], [104.27092, 2178.0], [105.070374, 680999.0], [122.019569, 71596.0], [122.096748, 24410.0]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[54.086521, 3119.0], [55.018089, 4120.0], [60.41634, 3514.0], [67.014137, 4659.0], [67.018158, 93675.0], [73.95401, 3762.0], [89.014343, 3795.0], [95.012627, 6393.0], [99.813408, 3492.0], [103.050056, 3776.0], [129.27092, 3442.0], [172.955048, 7619.0], [177.455597, 4339.0], [207.970108, 11779.0], [220.023773, 5706.0], [229.049026, 61456.0], [235.014297, 14420.0], [238.033661, 5950.0], [248.034073, 4971.0], [253.97699, 16269.0], [263.0

  0%|          | 28/27814 [00:01<33:37, 13.77it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[56.61319, 2066.0], [64.920433, 1944.0], [73.398193, 2195.0], [91.054382, 27054.0], [92.37043, 3183.0], [112.625504, 2082.0], [149.022919, 11910.0], [161.044449, 2668.0], [164.081329, 2386.0], [189.058777, 4394.0], [189.076828, 86658.0], [191.367859, 2443.0], [215.079559, 3531.0], [219.075104, 4350.0], [230.081451, 5234.0], [231.111877, 2663.0], [232.084152, 3697.0], [247.107193, 491896.0], [248.11467, 14366.0], [248.899933, 14992.0], [248.969757, 3324.0], [266.909882, 5259.0], [286.117157, 11220.0], [313.128662, 248052.0]]



invalid value encountered in divide



{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[66.742699, 8102.720215], [73.952599, 8775.080078], [75.655998, 5517.580078], [76.218102, 5786.660156], [81.473198, 5797.450195], [90.055496, 32277.900391]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[55.018291, 4011.0], [57.033859, 13220.0], [66.733643, 2572.0], [69.03363, 3866.0], [70.322479, 2531.0], [73.028748, 4792.0], [82.387512, 2979.0], [85.028419, 11522.0], [87.043907, 2241.0], [87.301117, 2135.0], [115.038834, 18664.0], [133.049225, 37103.0], [136.013733, 2676.0], [142.361343, 2322.0], [153.040634, 292247.0], [154.024857, 3081.0], [212.147888, 2994.0], [266.94339, 2433.0], [284.928009, 3281.0], [302.981873, 3957.0]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_to

  0%|          | 47/27814 [00:02<22:22, 20.69it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[57.034061, 10299.0], [69.033607, 3533.0], [73.029007, 3530.0], [85.028587, 11128.0], [98.984383, 17546.0], [104.259827, 3989.0], [107.813271, 2597.0], [115.038696, 6385.0], [118.166023, 2388.0], [121.050911, 4034934.0], [133.049622, 22358.0], [136.176102, 3213.0], [208.883759, 2650.0], [234.951355, 12189.0], [242.335266, 2669.0], [252.871887, 4103.0], [253.092834, 199043.0]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[50.465302, 27178.0], [61.333, 27718.800781], [61.8377, 29876.300781], [75.108002, 27107.400391], [78.985199, 641651.0], [80.702003, 28331.199219], [82.396797, 31475.300781], [88.112602, 117444.0], [92.374496, 37706.199219], [96.995598, 3620820.0], [103.055, 220076.0], [104.280998, 41102.699219],

  0%|          | 50/27814 [00:02<25:40, 18.02it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[55.688122, 4008.0], [67.132393, 3989.0], [77.181419, 3616.0], [91.054176, 21102.0], [92.375397, 4066.0], [95.04921, 112190.0], [103.054283, 9049.0], [107.048897, 6999.0], [108.056953, 5115.0], [115.054047, 61082.0], [116.049362, 16895.0], [117.069878, 18371.0], [118.065277, 9042.0], [121.06411, 15820.0], [123.043922, 289298.0], [127.041809, 4850.0], [128.049103, 33834.0], [129.056671, 5238.0], [130.064957, 14784.0], [131.049057, 19199.0], [131.072739, 20046.0], [132.056656, 13779.0], [133.028381, 4702.0], [133.064651, 25913.0], [135.043625, 14423.0], [136.051956, 19672.0], [138.067368, 6970.0], [140.049423, 8608.0], [141.057465, 8087.0], [142.064941, 93480.0], [143.073212, 4465.0], [144.044632, 16495.0], [144.080841, 26027.0], [145.064514, 28101.0], [146.059647, 20954.0], [146.072464, 7889.0], [147.043671, 4295.0], [147.08023

  0%|          | 55/27814 [00:04<1:04:41,  7.15it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[60.548012, 2752.0], [66.751953, 2923.0], [92.389969, 3561.0], [118.064789, 6907.0], [124.076073, 4992.0], [130.065384, 10804.0], [132.043854, 7213.0], [136.075485, 4435.0], [145.076111, 3570.0], [147.067688, 5999.0], [148.075485, 3479.0], [157.998413, 3589.0], [162.037079, 79610.0], [164.053177, 7218.0], [172.087067, 24490.0], [175.032532, 6421.0], [184.087234, 12108.0], [185.094788, 15681.0], [186.091263, 7227.0], [189.047958, 74002.0], [190.031906, 97484.0], [193.042694, 26351.0], [198.065598, 6133.0], [202.042633, 24754.0], [212.08168, 443107.0], [213.089706, 67172.0], [216.058823, 73131.0], [217.043106, 3788.0], [220.667252, 3107.0], [228.059677, 4992.0], [230.038025, 258286.0], [237.076843, 6237.0], [238.083984, 40943.0], [244.053635, 1723099.0], [246.070953, 3712.0], [253.054367, 14903.0], [256.040009, 4337.0], [257.048

  0%|          | 65/27814 [00:05<33:24, 13.85it/s]  

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[50.72958, 2554.0], [53.039131, 15581.0], [55.018269, 37371.0], [55.934841, 38532.0], [56.96534, 6948.0], [68.997437, 121933.0], [72.937424, 11089.0], [76.897087, 2813.0], [77.0392, 4503.0], [79.054489, 256603.0], [81.070107, 96822.0], [81.86676, 2834.0], [82.682632, 3194.0], [88.968369, 10950.0], [91.054253, 105660.0], [92.376831, 2874.0], [93.069946, 157804.0], [95.049141, 12496.0], [95.638977, 2868.0], [100.991577, 3426.0], [103.054108, 257683.0], [104.282928, 3134.0], [105.069817, 1132437.0], [107.049133, 36731.0], [107.95063, 5660.0], [115.0541, 105536.0], [121.064682, 828648.0], [128.950623, 44411.0], [131.048981, 54679.0], [132.057037, 17961.0], [133.053192, 6197.0], [133.064621, 174974.0], [134.036102, 3482.0], [135.043747, 3886.0], [135.94548, 20739.0], [136.210892, 3256.0], [137.023209, 5280.0], [145.508759, 4787.0],

  0%|          | 71/27814 [00:05<29:20, 15.76it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[50.861549, 3397.0], [53.0392, 4573.0], [55.018398, 7246.0], [63.84586, 3021.0], [65.03904, 128759.0], [68.997559, 467387.0], [69.034027, 35876.0], [73.544472, 4245.0], [77.039627, 4383.0], [79.018288, 5363.0], [81.033813, 124905.0], [83.013062, 3440.0], [93.033691, 8784.0], [93.928398, 3722.0], [95.01281, 75639.0], [95.04924, 4436.0], [103.054268, 472788.0], [105.033577, 119717.0], [106.570107, 3519.0], [109.028526, 102759.0], [111.044159, 123676.0], [115.054466, 14208.0], [118.040337, 4802.0], [121.039383, 29626.0], [127.039017, 49332.0], [129.033569, 1164729.0], [133.028763, 7450.0], [137.023499, 1579027.0], [137.662354, 4244.0], [138.987579, 3731.0], [141.069489, 47861.0], [143.085266, 14490.0], [147.044403, 14648.0], [147.761063, 4215.0], [152.062073, 45410.0], [153.069977, 173308.0], [154.077194, 8208.0], [155.033966, 70

  0%|          | 74/27814 [00:05<31:33, 14.65it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[57.045151, 19086.0], [58.99535, 15017.0], [84.985527, 39730.0], [106.039757, 8388.0], [107.023911, 8062.0], [108.055527, 27627.0], [110.034943, 7595.0], [111.042686, 13438.0], [122.039673, 22783.0], [123.047577, 60711.0], [124.050453, 32585.0], [125.016663, 14151.0], [126.001488, 3938.0], [126.012154, 6802.0], [126.311867, 3137.0], [133.050735, 19851.0], [134.034836, 8129.0], [135.945328, 3077.0], [137.045624, 158196.0], [138.011078, 3745.0], [139.99118, 69902.0], [140.027542, 6565.0], [145.008926, 13325.0], [149.045578, 188996.0], [149.059204, 6612.0], [150.012085, 5459.0], [150.053436, 254469.0], [150.996048, 58664.0], [151.042664, 147338.0], [151.061218, 562569.0], [152.027298, 19685.0], [153.022537, 26166.0], [154.007095, 21979.0], [156.000473, 7538.0], [158.01651, 174970.0], [161.045242, 37910.0], [162.053406, 148466.0],

  0%|          | 77/27814 [00:06<45:56, 10.06it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[55.93475, 7193.0], [56.237961, 4496.0], [59.267521, 17984.0], [59.276741, 18276.0], [61.949921, 3819.0], [77.039368, 4667.0], [82.091293, 3982.0], [91.055, 7262.0], [92.367722, 4507.0], [92.986069, 3781.0], [95.812027, 4429.0], [103.054527, 9595.0], [129.033554, 20321.0], [135.044098, 63374.0], [166.077362, 8874.0], [178.07756, 25471.0], [181.102142, 7565.0], [183.911545, 25150.0], [185.060867, 5798.0], [194.020035, 4255.0], [195.948105, 15169.0], [196.524506, 22051.0], [201.923645, 9806.0], [205.529907, 31842.0], [213.922638, 93788.0], [222.066666, 23361.0], [231.933533, 20468.0], [236.083466, 28253.0], [236.143738, 52397.0], [237.090546, 30953138.0], [238.044189, 46720.0], [282.14682, 14520.0]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio'

  0%|          | 86/27814 [00:06<28:16, 16.34it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[55.075329, 2764.0], [72.044647, 678423.0], [75.191689, 2666.0], [82.413193, 3611.0], [84.959641, 3837.0], [89.397133, 2905.0], [90.055023, 12597.0], [92.390968, 3503.0], [99.055367, 4776.0], [104.303802, 4755.0], [110.071083, 13228.0], [124.050331, 13051.0], [125.103432, 3002.0], [130.153091, 3384.0], [136.050613, 5425.0], [136.231964, 2822.0], [137.081375, 3911.0], [138.065903, 3807.0], [140.081711, 5770.0], [150.000198, 2989.0], [167.092606, 21415.0], [181.071548, 1020786.0], [193.071671, 126412.0], [194.079453, 9820.0], [195.087219, 42614.0], [211.08194, 292033.0], [212.299347, 3951.0], [213.098694, 3954.0], [221.06633, 210781.0], [228.920273, 3859.0], [234.09787, 18794.0], [252.108398, 5479070.0], [267.012146, 3495.0]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'pp

  0%|          | 93/27814 [00:07<45:31, 10.15it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[50.015461, 3117.0], [51.434299, 3221.0], [53.039082, 41134.0], [55.018219, 12302.0], [56.965229, 5423.0], [57.070042, 3010.0], [61.01099, 2573.0], [61.47596, 2938.0], [64.172462, 2656.0], [65.038719, 4843.0], [73.93853, 3110.0], [77.038963, 5947.0], [78.853844, 2514.0], [81.070229, 5407.0], [84.959663, 3054.0], [89.038528, 21350.0], [91.054443, 27709.0], [93.033157, 3637.0], [95.049217, 19776.0], [100.591042, 2509.0], [103.553993, 2961.0], [104.260452, 2460.0], [104.267853, 3853.0], [105.033447, 490593.0], [106.673088, 2797.0], [115.054192, 4999.0], [117.033417, 4724.0], [118.628319, 2832.0], [119.049141, 10693.0], [121.028389, 483271.0], [133.02832, 228786.0], [133.039841, 5849.0], [133.045761, 4002.0], [141.069763, 12305.0], [145.028519, 5469.0], [151.038635, 25012.0], [152.06218, 18581.0], [153.069977, 31627.0], [154.07742

  0%|          | 96/27814 [00:08<52:40,  8.77it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[50.74437, 2633.0], [51.023399, 4176.0], [53.039009, 20475.0], [54.926041, 2479.0], [55.01828, 3746.0], [67.018242, 18595.0], [79.054497, 43918.0], [81.033684, 4254.0], [82.393288, 4174.0], [84.95929, 3615.0], [85.028648, 15087.0], [95.049263, 13391.0], [104.279114, 3299.0], [105.045227, 3165.0], [106.763397, 3351.0], [107.049187, 4512227.0], [121.064987, 13003.0], [123.043648, 28415.0], [123.052689, 3360.0], [127.038544, 39076.0], [149.059891, 39940.0], [151.038895, 83639.0], [169.049515, 422653.0], [181.049149, 4351.0], [275.09137, 131163.0], [275.121002, 5164.0]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[51.66753, 476620.0], [60.311131, 484663.0], [63.567791, 551629.0], [87.576027, 501497.0], [99.241562, 

  0%|          | 99/27814 [00:08<44:45, 10.32it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[73.960892, 3799.0], [75.044632, 3873.0], [78.389, 3201.0], [79.05439, 34789.0], [81.03373, 7723.0], [81.069992, 20046.0], [83.049217, 5358.0], [90.9767, 17489.0], [92.247147, 3397.0], [93.59671, 3092.0], [96.056824, 4237.0], [96.110641, 3218.0], [96.843193, 3644.0], [104.29763, 4255.0], [109.064621, 42463.0], [111.080421, 7384.0], [113.023598, 23230.0], [113.059486, 6280.0], [122.035896, 16770.0], [123.042229, 3889.0], [124.052002, 32555.0], [126.99588, 126526.0], [128.04657, 41096.0], [139.075211, 114464.0], [141.018188, 40139.0], [143.070435, 6125.0], [151.03804, 4092.0], [152.046387, 118736.0], [156.041351, 74865.0], [158.963913, 5327.0], [167.072342, 4183.0], [180.040741, 6092.0], [184.036407, 5852.0], [185.081848, 24590.0], [187.407486, 3589.0], [195.06485, 11236457.0], [211.250137, 4107.0], [271.84671, 4450.0], [277.063


invalid value encountered in divide

  0%|          | 117/27814 [00:08<18:21, 25.15it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[66.742699, 8102.720215], [73.952599, 8775.080078], [75.655998, 5517.580078], [76.218102, 5786.660156], [81.473198, 5797.450195], [90.055496, 32277.900391]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[52.14603, 4942.0], [53.093651, 4701.0], [53.198051, 4286.0], [67.018288, 4495.0], [92.371033, 5594.0], [104.197311, 4523.0], [110.34819, 4774.0], [119.049133, 51950.0], [121.028793, 7393.0], [136.287048, 5067.0], [145.028336, 19400.0], [153.018188, 144651.0], [163.038803, 5865.0], [171.029221, 8339.0], [225.052994, 11301.0], [229.049667, 6784.0], [243.063965, 10969.0], [247.06073, 6280.0], [254.054916, 7417.0], [269.881287, 6584.0], [269.913147, 22535.0], [270.050079, 17747.0], [271.059814, 24181784.0], [272.2139

  0%|          | 126/27814 [00:08<13:42, 33.67it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[54.040401, 78699.398438], [64.716698, 73991.398438], [74.747299, 81200.703125], [82.422401, 93768.5], [115.054001, 97533.203125], [118.686996, 92853.796875], [132.080002, 278866.0], [139.582993, 75591.898438], [160.074997, 22080600.0], [174.248001, 75764.0], [177.065994, 397062.0]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[51.023628, 2786.0], [68.997551, 122541.0], [82.400497, 3063.0], [84.930702, 2910.0], [86.5606, 2879.0], [86.999977, 2981.0], [92.368713, 3812.0], [113.014977, 3272.0], [125.015289, 90237.0], [129.010223, 151628.0], [138.067551, 3581.0], [139.03479, 11928.0], [140.047607, 5906.0], [141.01033, 6195.0], [153.009949, 19777.0], [154.062393, 5499.0], [154.98941, 461873.0], [155.070618, 26313.0]

  0%|          | 132/27814 [00:09<26:10, 17.63it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[50.015751, 5271.0], [50.627991, 4215.0], [51.020569, 3530.0], [51.023479, 7590.0], [55.018379, 323843.0], [58.001598, 4533.0], [59.04966, 44093.0], [63.02346, 4184.0], [65.039047, 95531.0], [65.572792, 4197.0], [65.686829, 3826.0], [67.018417, 5771.0], [68.997528, 275526.0], [69.033997, 50738.0], [69.070412, 8727.0], [79.018181, 82683.0], [79.054482, 8787.0], [81.033813, 90910.0], [90.829987, 4956.0], [91.054428, 6976291.0], [92.997238, 40169.0], [95.013077, 8024.0], [97.028603, 86804.0], [99.007713, 96506.0], [103.054199, 357619.0], [103.196503, 4986.0], [104.92067, 4676.0], [105.033531, 26081.0], [105.069763, 56546.0], [107.048958, 5002.0], [109.028542, 34034.0], [111.007942, 9913.0], [113.023369, 5899.0], [115.054626, 34926.0], [117.018044, 16767.0], [117.070213, 5147.0], [121.028313, 10099.0], [123.007584, 164575.0], [129

  1%|          | 145/27814 [00:10<22:33, 20.44it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[90.903442, 43867.46875], [91.054245, 31656.433594], [99.003174, 50759.417969], [104.0495, 15387.182617], [104.336723, 10626.804688], [105.069969, 146983.21875], [115.054306, 35958.167969], [117.057327, 1422920.375], [122.019356, 98318.789062], [130.065018, 22697.84375], [131.068512, 77817.203125], [132.080872, 4408957.5], [147.412903, 7723.356445]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[65.039101, 1574123.0], [70.94239, 90022.0], [70.958183, 19244.0], [85.623703, 20109.0], [89.01368, 17373.0], [93.033783, 6109882.0], [97.065292, 40380.0], [107.049004, 37923.0], [110.0364, 698455.0], [111.044228, 7101209.0], [111.968338, 209026.0], [118.659851, 18644.0], [125.059937, 9423419.0], [126.923401, 18899.0], [12

  1%|          | 149/27814 [00:10<20:22, 22.63it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[54.93219, 3135.0], [55.645988, 3189.0], [58.995399, 4050.0], [66.749092, 3560.0], [67.164627, 3483.0], [79.054329, 16190.0], [79.152992, 3348.0], [84.985718, 6747.0], [90.899689, 3006.0], [91.053978, 7984.0], [92.386902, 5537.0], [106.065018, 150752.0], [108.055496, 6447.0], [111.043259, 3742.0], [115.054153, 3775.0], [117.057442, 25615.0], [117.069351, 14392.0], [119.072762, 53051.0], [120.080788, 21264.0], [121.088097, 5181.0], [122.096527, 6600.0], [124.050522, 6453.0], [125.016403, 4802.0], [130.064911, 95033.0], [132.080551, 12917.0], [133.052078, 32561.0], [134.059921, 141436.0], [134.096161, 117714.0], [137.0457, 50598.0], [139.990982, 21478.0], [143.07309, 26052.0], [144.080627, 126890.0], [145.088455, 50230.0], [149.045578, 93547.0], [150.053482, 90947.0], [150.995926, 31391.0], [151.061279, 252847.0], [152.026855, 7

  1%|          | 157/27814 [00:10<21:30, 21.43it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[50.015549, 3654.0], [51.023369, 2932.0], [53.039101, 15835.0], [55.01825, 20728.0], [56.965359, 3024.0], [57.721191, 2829.0], [59.128139, 2925.0], [59.653461, 2817.0], [68.997383, 52898.0], [69.033913, 18039.0], [70.115211, 2857.0], [71.012878, 2842.0], [77.039093, 2897.0], [79.018013, 62529.0], [81.033691, 30880.0], [83.012627, 4010.0], [85.028671, 5536.0], [92.389107, 3640.0], [93.780357, 3169.0], [97.02813, 36311.0], [103.053993, 97945.0], [105.033318, 415289.0], [107.012558, 608187.0], [109.027634, 4127.0], [115.053848, 3429.0], [125.023216, 85763.0], [128.06131, 10496.0], [129.033203, 238167.0], [129.069336, 27694.0], [135.007401, 131643.0], [135.017151, 2684.0], [137.034149, 13380.0], [141.069443, 23424.0], [143.085464, 3471.0], [147.043655, 15670.0], [152.061676, 6429.0], [153.017944, 1003265.0], [153.069672, 108393.0]

  1%|          | 161/27814 [00:11<25:10, 18.31it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[51.023399, 572905.0], [53.03912, 7146748.0], [60.52383, 570459.0], [65.038963, 2084903.0], [68.997543, 1688656.0], [82.144577, 418894.0], [82.378754, 707151.0], [92.354279, 581884.0], [93.033577, 3919018.0], [95.049217, 6440244.0], [103.046837, 831866.0], [103.054161, 11619976.0], [104.260063, 643809.0], [105.033447, 2566111.0], [111.04438, 525694.0], [121.028473, 29679140.0], [129.033615, 11419159.0], [139.039185, 1761779.0], [147.624832, 466625.0], [152.062027, 2681550.0], [153.070618, 531125.0], [165.070145, 1978362.0], [166.076828, 644138.0], [167.085648, 9711930.0], [177.069809, 1817706.0], [178.077591, 13490908.0], [181.065186, 581142.0], [187.271362, 466570.0], [222.068466, 431489.0], [223.075485, 397081504.0]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1

  1%|          | 166/27814 [00:11<23:13, 19.84it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[53.662491, 2314.0], [62.912251, 2005.0], [67.054619, 3056.0], [69.56144, 2145.0], [69.771133, 2261.0], [73.960251, 2548.0], [79.054443, 4593.0], [80.049393, 2904.0], [81.07003, 21865.0], [82.06559, 3406.0], [90.648163, 2213.0], [96.081108, 3653.0], [110.096046, 5812.0], [112.381821, 2256.0], [116.861954, 2328.0], [118.668198, 2924.0], [122.09581, 2889.0], [125.64048, 2175.0], [130.065109, 20204.0], [132.080429, 2651.0], [134.09613, 5380.0], [136.111282, 3815.0], [136.225143, 3001.0], [144.080688, 3284.0], [154.064896, 2823.0], [156.080185, 4778.0], [162.669022, 2622.0], [166.122253, 8599.0], [168.080719, 13176.0], [172.074738, 2967.0], [196.110809, 2982.0], [223.122726, 2755.0], [234.127274, 4788.0], [244.753021, 2824.0], [249.139465, 3008.0], [252.684891, 2459.0], [277.169342, 79699.0], [294.919037, 12055.0], [295.179932, 15

  1%|          | 169/27814 [00:11<27:10, 16.96it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[53.00264, 3643.0], [55.018341, 15053.0], [55.940491, 2892.0], [57.033932, 12003.0], [57.07024, 4166.0], [60.044941, 2789.0], [67.05452, 2782.0], [68.049896, 3111.0], [68.997437, 24087.0], [69.070229, 256570.0], [70.028976, 16169.0], [71.0131, 122773.0], [71.016357, 2701.0], [71.085869, 194080.0], [72.044777, 3370.0], [76.498169, 2305.0], [84.019974, 2368.0], [84.044548, 5590.0], [84.081017, 16024.0], [86.023888, 17033.0], [86.096619, 227212.0], [87.007736, 15167.0], [88.03933, 34944.0], [88.112244, 847708.0], [89.023422, 84813.0], [101.023293, 21843.0], [104.034058, 12819.0], [112.075706, 4676.0], [115.001556, 3036.0], [122.023453, 4907.0], [123.013847, 2729.0], [126.09137, 4080.0], [131.258881, 2173.0], [132.028992, 98668.0], [140.033539, 8795.0], [142.123291, 3533.0], [144.029007, 63587.0], [156.102173, 13591.0], [156.17495

  1%|          | 173/27814 [00:12<38:52, 11.85it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[53.039082, 7261.0], [57.5518, 3435.0], [65.039078, 6947.0], [71.509117, 2947.0], [91.054314, 355505.0], [92.049698, 32609.0], [92.391121, 3570.0], [93.057167, 5943.0], [94.065376, 3918.0], [104.049179, 24872.0], [105.044609, 3387.0], [106.064934, 15865.0], [115.054123, 169425.0], [117.056763, 12015.0], [118.064613, 5450.0], [120.044456, 22010.0], [121.010246, 4875.0], [129.884079, 3186.0], [130.054764, 4810.0], [130.065186, 91545.0], [130.246384, 3757.0], [132.04422, 3672.0], [136.0215, 62387.0], [136.232513, 3260.0], [142.064392, 4745.0], [143.0728, 4425.0], [144.080856, 46310.0], [146.060135, 4603.0], [154.06459, 16604.0], [155.060471, 5287.0], [156.068283, 25881.0], [161.070694, 17265.0], [165.046936, 4822.0], [169.075714, 137799.0], [173.029251, 12028.0], [178.078033, 5306.0], [179.084991, 15309.0], [182.095871, 7480.0], 

  1%|          | 179/27814 [00:12<35:12, 13.08it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[60.532551, 185950.0], [61.951641, 160733.0], [69.04541, 5724085.0], [73.946999, 204846.0], [83.024513, 379197.0], [86.239037, 160947.0], [92.198112, 195096.0], [94.040443, 232183.0], [96.056137, 11988730.0], [97.040421, 610371.0], [109.027733, 228846.0], [112.398354, 180592.0], [118.641876, 188974.0], [124.050751, 67672664.0], [137.082092, 646462.0], [142.061142, 4048359.0], [181.071991, 103675112.0]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[55.01852, 2496.0], [55.054771, 26171.0], [57.070492, 11782.0], [67.054626, 47776.0], [69.033867, 3097.0], [69.070374, 42995.0], [79.05468, 28830.0], [81.070229, 82050.0], [83.085899, 20881.0], [90.976822, 2915.0], [91.054283, 23382.0], [93.069977, 71027.0], [95.085716,

  1%|          | 191/27814 [00:13<22:37, 20.34it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[50.469769, 3269.0], [51.940491, 16578.0], [53.039001, 10641.0], [58.029221, 1972611.0], [65.038902, 5331.0], [66.0467, 6655.0], [67.935257, 13462.0], [68.997292, 3107.0], [69.03376, 5016.0], [72.937447, 20145.0], [73.958092, 3697.0], [78.046783, 3307.0], [79.054527, 116813.0], [81.033546, 15878.0], [81.069977, 21859.0], [83.929901, 12294.0], [85.94561, 5479.0], [91.054413, 3601.0], [92.025757, 4535.0], [94.041359, 40990.0], [95.04892, 26093.0], [96.057083, 32156.0], [98.035828, 3368.0], [101.940514, 3690.0], [102.948196, 6789.0], [105.03373, 3408.0], [107.049057, 218850.0], [108.056778, 82777.0], [109.028343, 243572.0], [109.064796, 53079.0], [113.963531, 19554.0], [114.645271, 3033.0], [115.658943, 2877.0], [117.93528, 11729.0], [118.943268, 15318.0], [119.950989, 6326.0], [122.036179, 37905.0], [123.043793, 14994.0], [124.0


invalid value encountered in divide



{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[53.0392, 67624.703125], [82.432899, 14921.400391], [95.049698, 23493.699219], [104.328003, 16042.700195], [105.033997, 8097680.0], [115.896004, 15406.099609], [119.314003, 13913.599609], [134.059998, 16575.400391], [136.259995, 15538.299805], [162.057007, 12946.200195], [180.063995, 31946.0], [180.102997, 27105.699219]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[51.66753, 476620.0], [60.311131, 484663.0], [63.567791, 551629.0], [87.576027, 501497.0], [99.241562, 550298.0], [121.02845, 2480972.0], [121.234711, 550333.0], [122.997963, 509365.0], [164.477905, 535861.0], [178.078873, 809507.0], [209.303848, 507014.0], [223.075592, 546801024.0]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_vari

  1%|          | 199/27814 [00:13<22:29, 20.47it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[59.275982, 87812.0], [66.746941, 86326.0], [88.039963, 296874.0], [104.296623, 70691.0], [115.054489, 182764.0], [117.07019, 100766.0], [118.065392, 1688831.0], [130.065231, 5065871.0], [132.080933, 4028167.0], [142.065201, 922407.0], [143.073257, 319181.0], [144.080719, 1645563.0], [146.060059, 7359598.0], [156.081329, 108638.0], [158.084167, 334923.0], [159.091736, 12145598.0], [160.062714, 76004.0], [160.075577, 701928.0], [169.076187, 439994.0], [170.060104, 2875273.0], [170.076248, 157652.0], [183.09169, 109343.0], [184.076035, 421801.0], [187.086609, 1157966.0], [188.070602, 7538106.0], [201.102249, 7009380.0], [205.097076, 2682449.0], [211.087189, 96801.0], [229.097641, 1185317.0], [247.108047, 866373.0]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, '

  1%|          | 209/27814 [00:14<31:37, 14.55it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[53.03931, 93709.0], [59.796711, 4579.0], [65.039253, 5695.0], [67.054916, 7090.0], [75.6772, 3831.0], [84.015556, 7719.0], [93.020538, 5423.0], [95.049622, 1432469.0], [105.045059, 18868.0], [111.044273, 16765.0], [114.723251, 4270.0], [115.034912, 4513.0], [121.028587, 676963.0], [125.502892, 4147.0], [139.038986, 456537.0]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[50.465302, 27178.0], [61.333, 27718.800781], [61.8377, 29876.300781], [75.108002, 27107.400391], [78.985199, 641651.0], [80.702003, 28331.199219], [82.396797, 31475.300781], [88.112602, 117444.0], [92.374496, 37706.199219], [96.995598, 3620820.0], [103.055, 220076.0], [104.280998, 41102.699219], [107.049004, 204127.0], [110.186996, 29536.900391

  1%|          | 214/27814 [00:15<56:14,  8.18it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[53.039028, 4545.0], [54.165569, 2377.0], [63.02359, 4785.0], [65.038841, 4403.0], [66.733597, 2485.0], [70.941948, 3346.0], [79.054428, 56102.0], [80.554291, 2272.0], [87.892464, 2456.0], [89.038681, 257497.0], [91.054359, 21272.0], [92.362, 3126.0], [93.033958, 3465.0], [95.049026, 4094.0], [107.049004, 102843.0], [111.044189, 17920.0], [111.967621, 3121.0], [117.033386, 342330.0], [118.633827, 2845.0], [119.048981, 5955.0], [122.050972, 2820.0], [128.984009, 2295.0], [130.177673, 2698.0], [135.043915, 765600.0], [136.052643, 2709.0], [139.038712, 13115.0], [145.028259, 363581.0], [145.039062, 14549.0], [154.588074, 2413.0], [163.038803, 1518890.0], [174.984253, 4197.0], [175.984299, 13246.0], [179.961868, 2245.0], [181.049164, 15910.0], [181.075699, 4891.0], [184.631668, 2726.0]]
{'adduct': 'M+H', 'filter_peaks_method': 'to

  1%|          | 219/27814 [00:15<43:21, 10.61it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[52.863338, 2922.0], [60.54493, 2777.0], [68.997452, 7355.0], [76.916557, 2608.0], [88.297577, 3044.0], [95.012917, 4299.0], [104.295288, 2922.0], [135.044098, 25845.0], [137.023239, 76670.0], [155.033768, 15698.0], [161.023178, 29708.0], [169.17981, 3309.0], [185.060272, 7505.0], [197.059891, 4274.0], [215.069717, 3975.0], [236.384155, 5936.0], [243.06427, 4727.0], [245.390579, 17532.0], [247.895203, 13478.0], [253.048828, 4268.0], [256.898956, 6064.0], [269.909271, 5903.0], [270.049164, 5617.0], [271.059845, 10031204.0], [272.217834, 5435.0]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[52.03738, 8367.0], [60.987141, 10493.0], [73.94165, 10072.0], [78.998222, 12439.0], [80.993507, 11857.0], [86.060768, 12582.

  1%|          | 227/27814 [00:16<36:05, 12.74it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[65.527359, 3135.0], [66.374184, 2862.0], [88.202026, 2655.0], [121.028648, 11972.0], [123.043877, 34842.0], [137.023163, 100685.0], [142.041794, 3253.0], [149.023514, 19149.0], [151.039108, 16195.0], [163.039322, 3354.0], [182.138138, 3092.0], [185.061493, 4983.0], [192.871262, 2976.0], [202.06218, 4735.0], [212.044022, 3250.0], [213.054291, 36850.0], [227.071518, 4378.0], [230.056854, 43729.0], [238.639099, 3027.0], [241.048889, 67486.0], [244.035538, 4424.0], [245.080383, 13893.0], [257.044067, 18349.0], [258.051819, 159167.0], [268.035675, 6405.0], [269.043915, 187725.0], [270.049713, 3448.0], [272.066895, 6101.0], [273.07254, 3733.0], [283.06015, 3549.0], [286.046539, 951692.0], [299.716156, 3319.0], [301.069946, 7045504.0]]


  1%|          | 231/27814 [00:18<1:12:46,  6.32it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[63.02327, 4234.0], [79.054283, 5359.0], [81.070236, 3297.0], [91.054451, 24911.0], [92.535233, 3048.0], [93.033524, 3939.0], [103.054039, 6211.0], [105.069832, 5754.0], [107.0121, 22124.0], [107.049133, 67106.0], [108.020477, 154524.0], [118.041229, 77255.0], [119.012283, 6885.0], [119.048958, 3569.0], [121.06443, 6952.0], [131.049347, 6806.0], [132.056885, 21466.0], [133.028458, 25786.0], [133.038162, 2803.0], [133.064682, 60709.0], [134.035934, 12337.0], [135.043732, 19596.0], [141.069794, 36719.0], [148.051437, 56291.0], [149.023163, 22595.0], [151.038971, 3818.0], [152.062012, 6402.0], [153.069702, 17233.0], [154.077316, 16978.0], [157.063766, 5172.0], [162.030533, 29618.0], [165.069427, 37874.0], [166.077789, 14464.0], [167.034225, 3784.0], [167.085266, 13223.0], [169.064651, 120705.0], [169.100632, 5351.0], [172.052277,

  1%|          | 233/27814 [00:18<1:03:13,  7.27it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[52.677189, 2581.0], [54.034382, 2899.0], [56.389912, 2334.0], [57.045189, 63435.0], [61.931511, 2658.0], [63.828701, 2408.0], [65.039047, 4434.0], [72.046043, 2435.0], [72.051041, 2760.0], [74.68679, 2638.0], [79.029297, 3857.0], [79.041901, 2199.0], [79.054512, 3711.0], [82.028976, 2999.0], [82.419937, 2879.0], [83.024071, 3089.0], [83.301804, 3147.0], [84.044762, 35936.0], [91.054291, 24854.0], [92.049156, 3711.0], [103.054031, 16772.0], [105.044746, 3594.0], [105.069633, 3646.0], [106.064934, 13787.0], [107.049042, 19086.0], [115.054283, 3573.0], [117.056732, 5574.0], [117.069733, 5063.0], [118.68573, 3310.0], [119.049057, 34448.0], [120.044296, 58306.0], [120.055443, 18547.0], [122.05986, 19454.0], [124.05056, 28988.0], [130.040085, 5655.0], [130.06514, 6377.0], [131.024551, 2690.0], [131.049011, 86141.0], [132.044601, 58

  1%|          | 235/27814 [00:19<1:40:20,  4.58it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[64.132118, 3024.0], [65.03907, 4138.0], [65.253403, 2835.0], [67.018173, 223530.0], [68.997391, 158520.0], [69.033997, 4421.0], [73.941727, 3790.0], [79.017639, 3637.0], [79.054314, 36510.0], [81.033623, 7223.0], [84.959763, 5363.0], [85.028587, 4260.0], [87.833679, 3492.0], [89.038589, 64742.0], [91.841423, 3062.0], [97.028137, 12666.0], [102.290352, 3342.0], [107.048843, 42525.0], [109.028023, 15416.0], [111.007599, 29179.0], [111.043716, 20715.0], [115.053993, 7059.0], [117.033279, 86252.0], [121.028313, 5263.0], [125.023064, 19938.0], [127.038269, 6730.0], [129.018036, 62357.0], [129.069656, 7379.0], [135.043854, 693648.0], [137.023148, 182019.0], [137.03392, 5893.0], [139.054413, 19214.0], [141.070587, 4165.0], [143.034744, 4428.0], [143.048843, 7012.0], [145.039627, 4479.0], [147.043274, 15986.0], [151.038177, 6192.0], 

  1%|          | 239/27814 [00:20<1:44:29,  4.40it/s]

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[53.039261, 6552.0], [55.71534, 2783.0], [56.781601, 3024.0], [66.705482, 2966.0], [66.953743, 2959.0], [79.054359, 5751.0], [81.463966, 2908.0], [85.028763, 5466.0], [88.602722, 3002.0], [107.049103, 7174699.0], [121.064919, 18997.0], [127.038834, 100002.0], [140.954483, 3522.0], [149.046051, 6284.0], [149.059677, 170381.0], [151.038757, 53704.0], [167.071198, 3467.0], [169.049469, 1479185.0], [181.049072, 22732.0], [211.201172, 3501.0], [232.053802, 3129.0], [234.988602, 3828.0], [257.080902, 15710.0], [275.091034, 658729.0]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[51.02372, 5421.0], [55.94706, 3797.0], [63.23455, 3206.0], [65.039009, 66837.0], [65.450974, 3968.0], [74.727081, 3493.0], [77.034813, 4245.0

  1%|          | 251/27814 [00:20<39:50, 11.53it/s]  

{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'distance_decay': 0.1}
[[51.023621, 5607.0], [51.817719, 2522.0], [53.039162, 20643.0], [63.06749, 3398.0], [77.038788, 6915.0], [84.959396, 5088.0], [85.877922, 3316.0], [92.049202, 24989.0], [95.04892, 6858.0], [102.046021, 11831.0], [103.0541, 1980999.0], [105.069862, 26440.0], [107.060226, 131416.0], [108.067909, 5303.0], [109.075943, 314696.0], [110.060043, 16188.0], [121.064812, 4993.0], [131.049011, 3607053.0], [133.075867, 28661.0], [134.060165, 3648.0], [144.080536, 14858.0], [149.070709, 16153.0], [151.086349, 385003.0], [161.070572, 42360.0], [177.064926, 4234.0], [180.080139, 13328.0], [203.081284, 64563.0], [221.106674, 7083.0], [239.11792, 38793.0], [263.117218, 12992.0], [281.127808, 2322347.0]]
{'adduct': 'M+H', 'filter_peaks_method': 'top_k', 'filter_peaks_variable': 50, 'mz_tolerance': 0.05, 'ppm': 1.01, 'min_score_ratio': 0.5, 'dis

  1%|          | 252/27814 [00:21<38:41, 11.87it/s]


In [30]:
import plotly.graph_objects as go


# Bin the data by delta_mass into discrete bins
df['delta_mass_bin'] = pd.cut(df['delta_mass'], bins=10)

# Calculate the average for each bin
df_grouped = df.groupby('delta_mass_bin').mean().reset_index()


# Create the plot
fig = go.Figure()

# Add scatter plot for each column
columns = ["score", "best_score", "random_guess", "random_prob"]
for col in columns:
    fig.add_trace(go.Scatter(x=df_grouped["delta_mass"], y=df_grouped[col], mode='lines+markers', name=col))

# Customize the plot
fig.update_layout(
    title="Comparison of Scores",
    xaxis_title="delta_mass",
    yaxis_title="Score",
    showlegend=True,
)

# Show the plot
fig.show()



The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [None]:
df2 = pd.DataFrame(columns=colums)
count = 0
for match in tqdm(matches[1]):
    if count > 100:
        break
    try:
        m0, m1 = match
        if data_dict_filtered[m0]['Adduct'] != data_dict_filtered[m1]['Adduct'] or data_dict_filtered[m0]['Adduct'] != "M+H":
            continue
        molMol = cachedStructures_filtered[m1]
        modifMol = cachedStructures_filtered[m0]
        molUsi = hn.generate_usi(m1, library)
        modifUsi = hn.generate_usi(m0, library)
        molSmiles = data_dict_filtered[m1]['Smiles']
        modifSmiles = data_dict_filtered[m0]['Smiles']
        site = modSite.SiteLocator(data_dict_filtered[m1], data_dict_filtered[m0], molSmiles)
        for helper in helpers[m1]:
            if helper != m0:
                try:
                    site.helper_molecule(data_dict_filtered[helper], data_dict_filtered[helper]['Smiles'])
                except:
                    pass
        modifLoc = utils.calculateModificationSites(modifMol, molMol, False)
        peak_presence_only = False
        combine = True
        # calculate score
        res = site.accuracy_score(modifLoc[0], peak_presence_only=peak_presence_only, combine=combine, return_all=True)

        # generate random probability array 1-hot
        prob = np.zeros(site.molMol.GetNumAtoms())
        randInt = np.random.randint(0, site.molMol.GetNumAtoms())
        prob[randInt] = 1
        res2 = site.tempScore(modifLoc[0], prob, True)

        # generate random probability array distribution
        prb = np.random.rand(site.molMol.GetNumAtoms())
        prb = prb / prb.sum()
        res3 = site.tempScore(modifLoc[0], prb, True)

        # get max score
        maxScore = site.get_max_possible_score(modifLoc[0], peak_presence_only=peak_presence_only, combine=combine)
        
    # "mol1ID", "mol2ID", "mol1smile", "mol2smile", "delta_mass",
    #         "#_matched_peaks", "#_shifted_peaks", "#_unshifted_peaks", 
    #         "Closest_Max_Atom_Distance", "Count_Max", "Is_Max", "cosine", 
    #         "score", "best_score", "random_guess", "random_prob", "url"

        df2 = pd.concat([df, pd.DataFrame.from_records([{"mol1ID": molUsi, "mol2ID": modifUsi, "mol1smile": molSmiles, "mol2smile": data_dict_filtered[m0]['Smiles'], 
                                                        "delta_mass": abs(float(data_dict_filtered[m0]['Precursor_MZ']) - float(data_dict_filtered[m1]['Precursor_MZ'])),
                                                        "#_matched_peaks": len(site.matchedPeaks), "#_shifted_peaks": len(site.shifted), "#_unshifted_peaks": len(site.unshifted),
                                                        "Closest_Max_Atom_Distance": res['closestMaxAtomDistance'], "Count_Max": res['count'], "Is_Max": res['isMax'], "cosine":site.cosine, 
                                                        "score": res['score'], "best_score": maxScore, "random_guess":res2['score'], "random_prob":res3['score'], 
                                                        "url":visualizer.make_url("http://reza.cs.ucr.edu/", molUsi, modifUsi, molSmiles, modifSmiles, args=None) }])], ignore_index=True)
        count += 1
    except:
        # print stack trace
        # import traceback
        # traceback.print_exc()
        pass


In [29]:
# Bin the data by delta_mass into discrete bins
df2['delta_mass_bin'] = pd.cut(df2['delta_mass'], bins=10)

# Calculate the average for each bin
df_grouped2 = df2.groupby('delta_mass_bin').mean().reset_index()


# Create the plot
fig = go.Figure()

# Add scatter plot for each column
columns = ["score", "best_score", "random_guess", "random_prob"]
for col in columns:
    fig.add_trace(go.Scatter(x=df_grouped2["delta_mass"], y=df_grouped2[col], mode='lines+markers', name=col))

# Customize the plot
fig.update_layout(
    title="Comparison of Scores",
    xaxis_title="delta_mass",
    yaxis_title="Score",
    showlegend=True,
)

# Show the plot
fig.show()



The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.

