# Part 1: sarsing of mnx reaction

In [1]:
%%bash
pwd

/home/jovyan/work/src/generated


In [2]:
import pandas as pnd
import pickle
import copy
import os

In [3]:
header_reac_prop = ['ID', 'mnx_equation', 'reference', 'ECs', 'is_balanced', 'is_transport',]
reac_prop = pnd.read_csv('metanetx_4_4/reac_prop.tsv', sep='\t', comment='#', header=None, names=header_reac_prop)
reac_prop.set_index('ID', drop=True, inplace=True, verify_integrity=True)

In [4]:
header_reac_xref = ['source', 'ID', 'description']
reac_xref = pnd.read_csv('metanetx_4_4/reac_xref.tsv', sep='\t', skiprows=352, header=None, names=header_reac_xref)
# reac_xref.set_index('source', drop=True, inplace=True, verify_integrity=True)
# there are some duplicated IDs here! See eg "kegg.reaction:R04422#1" and "kegg.reaction:R04422#2"

In [5]:
# prepare output directories
os.makedirs('mnx_dicts_R/', exist_ok=True)

## mnx_to_something dict creation

In [6]:
def create_mnx_to_something_dict(reac_xref, key):
    
    # parse the table: 
    mnx_to_something = {}
    for index, row in reac_xref.iterrows():
        
        
        # check if the 'source' begins with the given key:
        if row.source.startswith(key):
            something_id = row.source[len(key): ]  # id to the specific db.
            mnx_id = row.ID  # get the corresponding mnx id.
            
            
            # populate the dictionary: 
            if mnx_id not in mnx_to_something.keys(): 
                mnx_to_something[mnx_id] = set()
            mnx_to_something[mnx_id].add(something_id)
            
    
    # write to disk
    name = key[:-1]  # avoid the ':'
    with open(f'mnx_dicts_R/{name}.pickle', 'wb') as handle:
        pickle.dump(mnx_to_something, handle)
            
            
    return mnx_to_something


In [7]:
# Currently missing annots: {'reactome', 'sbo'}

mnx_to_kegg = create_mnx_to_something_dict(reac_xref, key='kegg.reaction:')
mnx_to_metacyc = create_mnx_to_something_dict(reac_xref, key='metacyc.reaction:')
mnx_to_seed = create_mnx_to_something_dict(reac_xref, key='seed.reaction:')
mnx_to_bigg = create_mnx_to_something_dict(reac_xref, key='bigg.reaction:')
mnx_to_rhea = create_mnx_to_something_dict(reac_xref, key='rheaR:')
mnx_to_sabiork = create_mnx_to_something_dict(reac_xref, key='sabiork.reaction:')

In [8]:
# get the EC codes: 
def create_mnx_to_ec_dict(reac_prop): 
    
    
    # parse the table: 
    mnx_to_ec = {}
    for index, row in reac_prop.iterrows(): 
        
        
        # populate the dictionary
        if index not in mnx_to_ec.keys():
            mnx_to_ec[index] = set()
        if type(row.ECs) != str:
            continue
        for ec in row.ECs.split(';'):
            mnx_to_ec[index].add(ec)
            
    
    # write to disk
    with open(f'mnx_dicts_R/mnx_to_ec.pickle', 'wb') as handle:
        pickle.dump(mnx_to_ec, handle)
            
            
    return mnx_to_ec

In [9]:
mnx_to_ec = create_mnx_to_ec_dict(reac_prop)

In [10]:
# keys pointing to themseves, gaining also this annotation
def create_mnx_to_mnx_dict(reac_prop): 
    
    
    # parse the table: 
    mnx_to_mnx = {}
    for mnx_id, row in reac_prop.iterrows():
        if mnx_id not in mnx_to_mnx.keys(): 
            mnx_to_mnx[mnx_id] = set() 
        mnx_to_mnx[mnx_id].add(mnx_id)
        
    
    # write to disk: 
    with open(f'mnx_dicts_R/mnx_to_mnx.pickle', 'wb') as handle:
        pickle.dump(mnx_to_mnx, handle)
        
    return mnx_to_mnx

In [11]:
mnx_to_mnx = create_mnx_to_mnx_dict(reac_prop)

## crossrefs creation

In [12]:
crossrefs = {
    'kegg.reaction': mnx_to_kegg, 
    'biocyc': mnx_to_metacyc, 
    'seed.reaction': mnx_to_seed, 
    'bigg.reaction': mnx_to_bigg, 
    'rhea': mnx_to_rhea, 
    'sabiork.reaction': mnx_to_sabiork, 
    'ec-code': mnx_to_ec, 
    'metanetx.reaction': mnx_to_mnx, 
}

with open(f'mnx_dicts_R/crossrefs.pickle', 'wb') as handle:
    pickle.dump(crossrefs, handle)

# Part 2: bigg/seed to others dict creation

In [13]:

with open(f'mnx_dicts_R/crossrefs.pickle', 'rb') as handle:
    crossrefs = pickle.load(handle)

In [14]:
def create_something_to_others_dict(reac_xref, key, crossrefs, name,  ):

    
    # create a dictionary converting a bigg/seed reaction to all the others dbs
    something_to_others = {}
    for index, row in reac_xref.iterrows(): 
        
        
        # populate the dictionary
        if row.source.startswith(key): 
            something_id = row.source[len(key):]
            mnx_id = row.ID
            if something_id not in something_to_others.keys(): 
                something_to_others[something_id] = {}
            
            
            # iterate all the databases:
            for crossref in crossrefs.keys(): 
                something_to_others[something_id][crossref] = set()
                try: annots = crossrefs[crossref][mnx_id]
                # no annotations for this mnx_id for this external database
                except: continue
                for annot in annots:
                    something_to_others[something_id][crossref].add(annot)
    
    
    # write to disk:
    with open(f'mnx_dicts_R/{name}.pickle', 'wb') as handle:
        pickle.dump(something_to_others, handle)
    
    
    return something_to_others

In [15]:
bigg_to_others = create_something_to_others_dict( reac_xref, 'bigg.reaction:', crossrefs, 'bigg_to_others',)

seed_to_others = create_something_to_others_dict( reac_xref, 'seed.reaction:', crossrefs, 'seed_to_others',)


## testing area

In [16]:
bigg_to_others['PGI']

{'kegg.reaction': {'R00771'},
 'biocyc': {'PGLUCISOM-RXN'},
 'seed.reaction': {'rxn00558', 'rxn33838', 'rxn34259', 'rxn37760'},
 'bigg.reaction': {'PGI', 'PGI_h', 'R_PGI', 'R_PGI_h'},
 'rhea': set(),
 'sabiork.reaction': set(),
 'ec-code': {'5.3.1.9'},
 'metanetx.reaction': {'MNXR191975'}}

In [17]:
seed_to_others['rxn00558']

{'kegg.reaction': {'R00771'},
 'biocyc': {'PGLUCISOM-RXN'},
 'seed.reaction': {'rxn00558', 'rxn33838', 'rxn34259', 'rxn37760'},
 'bigg.reaction': {'PGI', 'PGI_h', 'R_PGI', 'R_PGI_h'},
 'rhea': set(),
 'sabiork.reaction': set(),
 'ec-code': {'5.3.1.9'},
 'metanetx.reaction': {'MNXR191975'}}

In [19]:
seed_to_others['rxn33838']

{'kegg.reaction': {'R00771'},
 'biocyc': {'PGLUCISOM-RXN'},
 'seed.reaction': {'rxn00558', 'rxn33838', 'rxn34259', 'rxn37760'},
 'bigg.reaction': {'PGI', 'PGI_h', 'R_PGI', 'R_PGI_h'},
 'rhea': set(),
 'sabiork.reaction': set(),
 'ec-code': {'5.3.1.9'},
 'metanetx.reaction': {'MNXR191975'}}