In [21]:
import requests
import pandas as pd
import networkx as nx

In [22]:
import ming_spectrum_library

In [23]:
DELTA_CONSTRAINT = 59
TOLERANCE=0.1

In [24]:
mol_net = nx.read_graphml("./data/METABOLOMICS-SNETS-V2-b35eac81-download_cytoscape_data-main.graphml")
spectrum_collection = ming_spectrum_library.SpectrumCollection("./data/METABOLOMICS-SNETS-V2-b35eac81-download_clustered_spectra-main.mgf")

In [25]:
spectrum_collection.load_from_file()

Spectra Loaded	0	Real	0


In [26]:
def list_of_compounds_with_constraint_peak(spectrum_collection, constraint_mass, tolerance=1.0):
    scans_set = set()
    for spectrum in spectrum_collection.spectrum_list:
        found = False
        if spectrum == None:
            continue
        else:
            for peak in spectrum.peaks:
                #Checking if the actual mass is found
                if abs(peak[0] - (constraint_mass + 1.007276) )  < tolerance:
                    found = True
                    break
                #Checking if the neutral mass is found
                elif abs(spectrum.mz - peak[0] - constraint_mass) < tolerance:
                    found = True
                    break
                else:
                    continue
                    
            if found:
                scans_set.add(spectrum.scan)
                
    return scans_set

In [27]:
constraint_scans = list_of_compounds_with_constraint_peak(spectrum_collection, DELTA_CONSTRAINT, TOLERANCE)

In [53]:
len(constraint_scans)

1441

In [29]:
def isidentified(scan_string, mol_net):
    if not scan_string in mol_net.nodes:
        return False
    elif not "Compound_Name" in mol_net.nodes[scan_string]:
        return False
    else:
        return True

candidate_pairs = []
for spectrum1 in spectrum_collection.spectrum_list:
    spectrum1_identified = isidentified(str(spectrum1.scan), mol_net)
    
    for spectrum2 in spectrum_collection.spectrum_list:
        spectrum2_identified = isidentified(str(spectrum2.scan), mol_net)
        
        if (spectrum1_identified or spectrum2_identified) == False:
            continue
        
        delta_mz = spectrum2.mz - spectrum1.mz
        if (delta_mz - DELTA_CONSTRAINT) < TOLERANCE and (delta_mz - DELTA_CONSTRAINT) > 0:
            #print(spectrum1.scan, spectrum2.scan, delta_mz)
            if spectrum2.scan in constraint_scans and not (spectrum1.scan in constraint_scans):
                #print(spectrum1.scan, spectrum2.scan)
                candidate_pairs.append([spectrum1.scan, spectrum2.scan])

NCGC00016418-16!9-methoxyfuro[3,2-g]chromen-7-one
NCGC00385960-01!methyl 4,6-dihydroxy-2,3-dimethylbenzoate
NCGC00385911-01_C15H24O2_2(1H)-Naphthalenone, 4a,5,6,7,8,8a-hexahydro-6-hydroxy-3,8-dimethyl-5-(1-methylethyl)-, (4aR,5S,6S,8R,8aS)-
NCGC00385915-01_C15H18O3_(1aS,10aR)-1a,5,9-Trimethyl-1a,3,6,10a-tetrahydrooxireno[4,5]cyclodeca[1,2-b]furan-10(2H)-one
NCGC00169002-05_C14H16O3_(6S)-4-Methoxy-6-(2-phenylethyl)-5,6-dihydro-2H-pyran-2-one
NCGC00385483-01_C15H22O3_
NCGC00179704-04_C15H24O3_2-Naphthaleneacetic acid, decahydro-8-hydroxy-4a,8-dimethyl-alpha-methylene-
MLS000850648-01!4-hydroxy-3-phenyl-1H-quinolin-2-one
NCGC00168884-02!2,6-dihydroxyanthracene-9,10-dione
NCGC00381142-01_C15H14O4_1-(2,5-Dihydroxy-2-methyl-2,3-dihydrobenzo[de]chromen-4-yl)ethanone
NCGC00385210-01!8-(2-hydroxy-3-methylbut-3-enyl)-7-methoxychromen-2-one
NCGC00385915-01_C15H18O3_(1aS,10aR)-1a,5,9-Trimethyl-1a,3,6,10a-tetrahydrooxireno[4,5]cyclodeca[1,2-b]furan-10(2H)-one
NCGC00180350-02_C15H22O4_2-[(3S,3aR,5R,

In [55]:
mol_net.nodes["1"]

{'G6': 0.0,
 'G5': 0.0,
 'parent mass': 143.04,
 'AllGroups': ' ',
 'DefaultGroups': 'G1,G3',
 'sum(precursor intensity)': 793892.0,
 'UniqueFileSources': '201905_MS2pos_epoxide_A-B1.mzML|201905_MS2pos_epoxide_D-B4.mzML',
 'number of spectra': 2,
 'charge': 0,
 'RTMean': 456.795,
 'componentindex': '-1',
 'G2': 0.0,
 'G3': 1.0,
 'precursor mass': 143.04,
 'RTConsensus': 0.0,
 'GNPSLinkout_Network': 'https://gnps.ucsd.edu/ProteoSAFe/result.jsp?view=network_displayer&componentindex=-1&task=44ef3f1d2d414e0fabcbeb3634e839ce&show=true',
 'G4': 0.0,
 'cluster index': 1,
 'GNPSLinkout_Cluster': 'https://gnps.ucsd.edu//ProteoSAFe/result.jsp?task=44ef3f1d2d414e0fabcbeb3634e839ce&view=cluster_details&protein=1&show=true',
 'G1': 1.0}

In [56]:
all_edges = list(mol_net.edges)

mol_net.remove_edges_from(all_edges)

In [57]:
mol_net.add_edges_from(candidate_pairs)

In [58]:
nx.write_graphml(mol_net, "data/output.graphml")