# Part 1: extending  mnx metabolite based on stereochemistry

In [1]:
import pandas as pnd
import pickle
import copy

## load raw database

In [2]:
header_chem_isom = ['parent', 'child', 'description']
chem_isom = pnd.read_csv('metanetx_4_4/chem_isom.tsv', sep='\t', comment='#', header=None, names=header_chem_isom)
# it's not possible to set an index here, because there are alwyas duplicated IDs
# In fact, 4.4/USERMANUAL.md reports: "parent molecule includes the child molecule (...)
# one-to-one, many-to-one and one-to-many relations are possible, but the overall graph is a acyclic"

In [3]:

with open(f'mnx_dicts_M/crossrefs.pickle', 'rb') as handle:
    crossrefs = pickle.load(handle)

In [4]:
with open(f'mnx_dicts_M/bigg_to_others.pickle', 'rb') as handle:
    bigg_to_others = pickle.load(handle)
    
with open(f'mnx_dicts_M/seed_to_others.pickle', 'rb') as handle:
    seed_to_others = pickle.load(handle)

# Part 2: extend bigg/seed to others dict

In [5]:
# create mnx_to_mnxchilds in order to extend annotations 
def create_mnx_to_mnxchilds_dict(chem_isom): 
    
    
    # parse the table: 
    mnx_to_mnxchilds = {}
    for index, row in chem_isom.iterrows():
        
        
        # populate the dictionary:
        parent = row['parent']
        child = row['child']
        if parent not in mnx_to_mnxchilds.keys(): 
            mnx_to_mnxchilds[parent] = set()
        mnx_to_mnxchilds[parent].add(child)
            
        
    # write to disk: 
    with open(f'mnx_dicts_M/mnx_to_mnxchilds.pickle', 'wb') as handle:
        pickle.dump(mnx_to_mnxchilds, handle)
        
        
    return mnx_to_mnxchilds
    

In [6]:
mnx_to_mnxchilds = create_mnx_to_mnxchilds_dict(chem_isom)

In [7]:
#extend the dictionaries inclusding all the childs:
def extend_based_on_childs(something_to_others, mnx_to_mnxchilds, crossrefs, name):
    
    
    something_to_others_extended = copy.deepcopy(something_to_others)
    for something_id in something_to_others.keys():
        for parent in something_to_others[something_id]['metanetx.chemical']:
            try: childs = mnx_to_mnxchilds[parent]
            # no childs for this parent
            except: continue
            
            
            #iterate all the databases: 
            for mnx_id in childs:    
                for crossref in crossrefs.keys(): 
                    try: annots = crossrefs[crossref][mnx_id]
                    # no annotations for this mnx_id for this external database
                    except: continue
                    for annot in annots:
                        something_to_others_extended[something_id][crossref].add(annot)
                        
    
    # write to disk
    with open(f'mnx_dicts_M/{name}.pickle', 'wb') as handle:
        pickle.dump(something_to_others_extended, handle)
            
            
    return something_to_others_extended
    

In [8]:
bigg_to_others_extended = extend_based_on_childs(bigg_to_others, mnx_to_mnxchilds, crossrefs, 'bigg_to_others_extended' )

seed_to_others_extended = extend_based_on_childs(seed_to_others, mnx_to_mnxchilds, crossrefs, 'seed_to_others_extended' )

## testing area

In [9]:
bigg_to_others['glc__D']

{'kegg.compound': {'C00031'},
 'kegg.drug': {'D00009'},
 'kegg.glycan': set(),
 'biocyc': {'Glucopyranose',
  'Hederagenin-Monoglucosides',
  'Soyasapogenol-B-Monoglucosides',
  'Soyasapogenol-E-Monoglucosides'},
 'hmdb': {'HMDB0000122',
  'HMDB0000516',
  'HMDB0003340',
  'HMDB0006564',
  'HMDB00122',
  'HMDB00516',
  'HMDB0062170',
  'HMDB03340',
  'HMDB06564',
  'HMDB62170'},
 'bigg.metabolite': {'glc__D'},
 'seed.compound': {'cpd00027'},
 'chebi': {'4167'},
 'sabiork': {'1406', '1407'},
 'lipidmaps': set(),
 'envipath': set(),
 'reactome': set(),
 'rhea_g': set(),
 'rhea_p': set(),
 'swisslipids': set(),
 'inchi': {'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1'},
 'inchikey': {'InChIKey=WQZGKKKJIJFFOK-GASJEMHNSA-N'},
 'smiles': {'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O'},
 'metanetx.chemical': {'MNXM1137670'}}

In [11]:
bigg_to_others_extended['glc__D']

{'kegg.compound': {'C00031', 'C00221', 'C00267'},
 'kegg.drug': {'D00009'},
 'kegg.glycan': set(),
 'biocyc': {'ALPHA-GLUCOSE',
  'GLC',
  'Glucopyranose',
  'Hederagenin-Monoglucosides',
  'Soyasapogenol-B-Monoglucosides',
  'Soyasapogenol-E-Monoglucosides'},
 'hmdb': {'HMDB0000122',
  'HMDB0000516',
  'HMDB0003340',
  'HMDB0003345',
  'HMDB0006564',
  'HMDB00122',
  'HMDB00516',
  'HMDB0061922',
  'HMDB0062170',
  'HMDB03340',
  'HMDB03345',
  'HMDB06564',
  'HMDB61922',
  'HMDB62170'},
 'bigg.metabolite': {'Glc_aD', 'glc_D_B', 'glc__D', 'glc__aD', 'glc__bD'},
 'seed.compound': {'cpd00027', 'cpd00190', 'cpd19001'},
 'chebi': {'10242',
  '10397',
  '12318',
  '12373',
  '15903',
  '17925',
  '22386',
  '22795',
  '40557',
  '41140',
  '4167',
  '42802',
  '88300'},
 'sabiork': {'1364', '1378', '1406', '1407'},
 'lipidmaps': set(),
 'envipath': {'32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/compound/bee44ace-e680-4580-8d73-374e54ae70f8',
  '650babc9-9d68-4b73-9332-11972ca26f7b/compound/16de786