In [1]:
from urllib.request import urlopen
from IPython.display import SVG
import matplotlib.pyplot as plt
from rdkit import Chem
from tqdm import tqdm
import pandas as pd
import xlsxwriter
import argparse
import pickle
import numpy as np
import json

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import visualizer as visualizer
import utils as utils
import handle_network as hn
import fragmentation_py as fragmentation_py
import library_downloader as library_downloader
import Compound_n as Compound_n
import ModificationSiteLocator as modSite

import math
import multiprocessing as mp
from multiprocessing import Pool

In [2]:

libraries = {
    "GNPS-MSMLS": "https://external.gnps2.org/gnpslibrary/GNPS-MSMLS.json",
    "GNPS-NIH-NATURALPRODUCTSLIBRARY_ROUND2_POSITIVE": "https://external.gnps2.org/gnpslibrary/GNPS-NIH-NATURALPRODUCTSLIBRARY_ROUND2_POSITIVE.json",
    "GNPS-NIH-SMALLMOLECULEPHARMACOLOGICALLYACTIVE": "https://external.gnps2.org/gnpslibrary/GNPS-NIH-SMALLMOLECULEPHARMACOLOGICALLYACTIVE.json",
    "MIADB": "https://external.gnps2.org/gnpslibrary/MIADB.json",
    "BERKELEY-LAB": "https://external.gnps2.org/gnpslibrary/BERKELEY-LAB.json"
    # "GNPS-LIBRARY": "https://gnps-external.ucsd.edu/gnpslibrary/GNPS-LIBRARY.json"
}

library ="BERKELEY-LAB"
if not os.path.exists( os.path.join("../data/libraries", library)):
    url = "https://gnps-external.ucsd.edu/gnpslibrary/" + library + ".json"
    location = "../data/libraries/" + library + "/"
    library_downloader.download(url, location, 0.5, 0.1)

with open(os.path.join("../data/libraries", library, "data_dict_filtered.pkl"), "rb") as f:
    data_dict_filtered = pickle.load(f)

# load matches
with open(os.path.join("../data/libraries", library, "matches.pkl"), "rb") as f:
    matches = pickle.load(f)

# load cachedStructures_filtered
with open(os.path.join("../data/libraries", library, "cachedStructures.pkl"), "rb") as f:
    cachedStructures_filtered = pickle.load(f)

helperDirectory = os.path.join("../data/libraries",library,"nf_output/fragmentationtrees/")
helpers = dict()

filter_matches = []
for match in matches[1]:
    if "M+H" not in data_dict_filtered[match[0]]["Adduct"] or "M+H" not in data_dict_filtered[match[1]]["Adduct"]:
        continue
    filter_matches.append(match)

for match in filter_matches:
    if match[0] not in helpers:
        helpers[match[0]] = []
    helpers[match[0]].append(match[1])

In [15]:
def calculate_score(m0, m1):
    mainStructure = cachedStructures_filtered[m1]
    modifiedStructure = cachedStructures_filtered[m0]
    main_compound = Compound_n.Compound(data_dict_filtered[m1], mainStructure)
    modified_compound = Compound_n.Compound(data_dict_filtered[m0], modifiedStructure)
    site_locator = modSite.ModificationSiteLocator(main_compound, modified_compound)

    probab = site_locator.generate_probabilities(shifted_only = True, PPO = True, CO = False, true_modification_site = None)
    modifLoc = utils.calculateModificationSites(modifiedStructure, mainStructure, False)[0]
    score = site_locator.calculate_score(modifLoc, "is_max", probab)
    if score == 1:
        print(probab)
    return score



In [18]:

for i in range(230, len(filter_matches)):
    try:
        usi1 = "mzspec:GNPS:" + library + ":accession:" + filter_matches[0][0]
        usi2 = "mzspec:GNPS:" + library + ":accession:" + filter_matches[0][1]
        smiles1 = data_dict_filtered[filter_matches[i][0]]["Smiles"]
        smiles2 = data_dict_filtered[filter_matches[i][1]]["Smiles"]
        # print(usi1, usi2, smiles1, smiles2)
        s = calculate_score(filter_matches[i][0], filter_matches[i][1])
        if s == 1:
            print(usi2, usi1, smiles2, smiles1, s)
            break
    except:
        continue

  probabilities = probabilities / np.sum(probabilities)
