In [37]:
import requests
import pandas as pd
import networkx as nx

In [38]:
import ming_spectrum_library

In [39]:
DELTA_CONSTRAINT = 59
TOLERANCE=0.1
MIN_PRECURSOR_INTENSITY = 700000

In [40]:
mol_net = nx.read_graphml("./data/METABOLOMICS-SNETS-V2-b35eac81-download_cytoscape_data-main.graphml")
spectrum_collection = ming_spectrum_library.SpectrumCollection("./data/METABOLOMICS-SNETS-V2-b35eac81-download_clustered_spectra-main.mgf")

In [41]:
spectrum_collection.load_from_file()

Spectra Loaded	0	Real	0


In [42]:
def list_of_compounds_with_constraint_peak(spectrum_collection, constraint_mass, tolerance=1.0):
    scans_set = set()
    for spectrum in spectrum_collection.spectrum_list:
        found = False
        if spectrum == None:
            continue
        else:
            for peak in spectrum.peaks:
                #Checking if the actual mass is found
                if abs(peak[0] - (constraint_mass + 1.007276) )  < tolerance:
                    found = True
                    break
                #Checking if the neutral mass is found
                elif abs(spectrum.mz - peak[0] - constraint_mass) < tolerance:
                    found = True
                    break
                else:
                    continue
                    
            if found:
                scans_set.add(spectrum.scan)
                
    return scans_set

In [43]:
constraint_scans = list_of_compounds_with_constraint_peak(spectrum_collection, DELTA_CONSTRAINT, TOLERANCE)

In [44]:
len(constraint_scans)

1450

In [45]:
def isidentified(scan_string, mol_net):
    if not scan_string in mol_net.nodes:
        return False
    elif not "Compound_Name" in mol_net.nodes[scan_string]:
        return False
    else:
        return True

candidate_pairs = []
for spectrum1 in spectrum_collection.spectrum_list:
    spectrum1_identified = isidentified(str(spectrum1.scan), mol_net)
    
    for spectrum2 in spectrum_collection.spectrum_list:
        spectrum2_identified = isidentified(str(spectrum2.scan), mol_net)
        
        if (spectrum1_identified or spectrum2_identified) == False:
            continue
            
        if spectrum1_identified and mol_net.nodes[str(spectrum2.scan)]["sum(precursor intensity)"] < MIN_PRECURSOR_INTENSITY:
            continue
            
        if spectrum2_identified and mol_net.nodes[str(spectrum1.scan)]["sum(precursor intensity)"] < MIN_PRECURSOR_INTENSITY:
            continue
        
        delta_mz = spectrum2.mz - spectrum1.mz
        if (delta_mz - DELTA_CONSTRAINT) < TOLERANCE and (delta_mz - DELTA_CONSTRAINT) > 0:
            #print(spectrum1.scan, spectrum2.scan, delta_mz)
            if spectrum2.scan in constraint_scans and not (spectrum1.scan in constraint_scans):
                #print(spectrum1.scan, spectrum2.scan)
                candidate_pairs.append([spectrum1.scan, spectrum2.scan])

In [46]:
mol_net.nodes["1"]

{'GNPSLinkout_Network': 'https://gnps.ucsd.edu/ProteoSAFe/result.jsp?view=network_displayer&componentindex=-1&task=b35eac819e424a559068a16e3fd89a87&show=true',
 'G4': 0.0,
 'DefaultGroups': 'G1,G6',
 'cluster index': 1,
 'sum(precursor intensity)': 793892.0,
 'G1': 1.0,
 'UniqueFileSources': '201905_MS2pos_epoxide_D-B4.mzML|201905_MS2pos_epoxide_A-B1.mzML',
 'G3': 0.0,
 'number of spectra': 2,
 'parent mass': 143.04,
 'G5': 0.0,
 'G6': 1.0,
 'GNPSLinkout_Cluster': 'https://gnps.ucsd.edu//ProteoSAFe/result.jsp?task=b35eac819e424a559068a16e3fd89a87&view=cluster_details&protein=1&show=true',
 'G2': 0.0,
 'precursor mass': 143.04,
 'charge': 0,
 'RTMean': 456.795,
 'AllGroups': ' ',
 'componentindex': '-1',
 'RTConsensus': 0.0}

In [47]:
all_edges = list(mol_net.edges)

mol_net.remove_edges_from(all_edges)

In [48]:
mol_net.add_edges_from(candidate_pairs)

In [49]:
nx.write_graphml(mol_net, "data/output.graphml")