In [46]:
import requests
import pandas as pd
import networkx as nx

In [47]:
import ming_spectrum_library

In [48]:
DELTA_CONSTRAINT = 59
TOLERANCE=0.1

In [49]:
mol_net = nx.read_graphml("./data/network.graphml")
spectrum_collection = ming_spectrum_library.SpectrumCollection("./data/specs_ms.mgf")

In [50]:
spectrum_collection.load_from_file()

Spectra Loaded	0	Real	0


In [51]:
def list_of_compounds_with_constraint_peak(spectrum_collection, constraint_mass, tolerance=1.0):
    scans_set = set()
    for spectrum in spectrum_collection.spectrum_list:
        found = False
        if spectrum == None:
            continue
        else:
            for peak in spectrum.peaks:
                #Checking if the actual mass is found
                if abs(peak[0] - (constraint_mass + 1.007276) )  < tolerance:
                    found = True
                    break
                #Checking if the neutral mass is found
                elif abs(spectrum.mz - peak[0] - constraint_mass) < tolerance:
                    found = True
                    break
                else:
                    continue
                    
            if found:
                scans_set.add(spectrum.scan)
                
    return scans_set

In [52]:
constraint_scans = list_of_compounds_with_constraint_peak(spectrum_collection, DELTA_CONSTRAINT, TOLERANCE)

In [53]:
len(constraint_scans)

1441

In [54]:
candidate_pairs = []
for spectrum1 in spectrum_collection.spectrum_list:
    for spectrum2 in spectrum_collection.spectrum_list:
        delta_mz = spectrum2.mz - spectrum1.mz
        if (delta_mz - DELTA_CONSTRAINT) < TOLERANCE and (delta_mz - DELTA_CONSTRAINT) > 0:
            #print(spectrum1.scan, spectrum2.scan, delta_mz)
            if spectrum2.scan in constraint_scans and not (spectrum1.scan in constraint_scans):
                #print(spectrum1.scan, spectrum2.scan)
                candidate_pairs.append([spectrum1.scan, spectrum2.scan])

In [55]:
mol_net.nodes["1"]

{'G6': 0.0,
 'G5': 0.0,
 'parent mass': 143.04,
 'AllGroups': ' ',
 'DefaultGroups': 'G1,G3',
 'sum(precursor intensity)': 793892.0,
 'UniqueFileSources': '201905_MS2pos_epoxide_A-B1.mzML|201905_MS2pos_epoxide_D-B4.mzML',
 'number of spectra': 2,
 'charge': 0,
 'RTMean': 456.795,
 'componentindex': '-1',
 'G2': 0.0,
 'G3': 1.0,
 'precursor mass': 143.04,
 'RTConsensus': 0.0,
 'GNPSLinkout_Network': 'https://gnps.ucsd.edu/ProteoSAFe/result.jsp?view=network_displayer&componentindex=-1&task=44ef3f1d2d414e0fabcbeb3634e839ce&show=true',
 'G4': 0.0,
 'cluster index': 1,
 'GNPSLinkout_Cluster': 'https://gnps.ucsd.edu//ProteoSAFe/result.jsp?task=44ef3f1d2d414e0fabcbeb3634e839ce&view=cluster_details&protein=1&show=true',
 'G1': 1.0}

In [56]:
all_edges = list(mol_net.edges)

mol_net.remove_edges_from(all_edges)

In [57]:
mol_net.add_edges_from(candidate_pairs)

In [58]:
nx.write_graphml(mol_net, "data/output.graphml")