In [1]:
import pandas as pnd
import pickle
import copy
import itertools

# Part 1: creating mnx_to_mnxchilds 

## load raw database

In [2]:
header_reac_prop = ['ID', 'mnx_equation', 'reference', 'ECs', 'is_balanced', 'is_transport',]
reac_prop = pnd.read_csv('metanetx_4_4/reac_prop.tsv', sep='\t', comment='#', header=None, names=header_reac_prop)
reac_prop.set_index('ID', drop=True, inplace=True, verify_integrity=True)


In [3]:
header_chem_isom = ['parent', 'child', 'description']
chem_isom = pnd.read_csv('metanetx_4_4/chem_isom.tsv', sep='\t', comment='#', header=None, names=header_chem_isom)
# it's not possible to set an index here, because there are alwyas duplicated IDs
# In fact, 4.4/USERMANUAL.md reports: "parent molecule includes the child molecule (...)
# one-to-one, many-to-one and one-to-many relations are possible, but the overall graph is a acyclic"

## load parent to child (mets)

In [4]:

with open(f'mnx_dicts_M/mnx_to_mnxchilds.pickle', 'rb') as handle:
    mnx_to_mnxchilds_m = pickle.load(handle)


## create reac_prop_simplyfied


In [5]:
def get_reac_prop_simplyfied(reac_prop): 
    
    # Take each MNX reaction as defined in 'reac_prop', and convert it into a simplyfied 
    # version with no stoichiometry, compartment, protons. 
    
    # parse the table:
    reac_prop_simplyfied = {}
    for mnx_id, row in reac_prop.iterrows():

        # get reactants and products:
        mnx_reacs, mnx_prods = row['mnx_equation'].split(' = ')

        # remove stoichiometry: 
        mnx_reacs = [i for i in mnx_reacs.split(' ') if '@' in i]
        mnx_prods = [i for i in mnx_prods.split(' ') if '@' in i]

        # remove compartment: 
        mnx_reacs = [i.split('@', 1)[0] for i in mnx_reacs]
        mnx_prods = [i.split('@', 1)[0] for i in mnx_prods]

        # convert to set:
        mnx_reacs = set(mnx_reacs)
        mnx_prods = set(mnx_prods)

        # remove protons: 
        if 'MNXM1' in mnx_reacs: mnx_reacs.remove('MNXM1')
        if 'MNXM1' in mnx_prods: mnx_prods.remove('MNXM1')

        # skip reactions where biomass appears: 
        if 'BIOMASS' in mnx_reacs: continue
        if 'BIOMASS' in mnx_prods: continue

        # populate the dictionary
        reac_prop_simplyfied[mnx_id] = {
            'mnx_reacs': mnx_reacs, 
            'mnx_prods': mnx_prods, 
        }
        
    return reac_prop_simplyfied
    

In [6]:
reac_prop_simplyfied = get_reac_prop_simplyfied(reac_prop)

## create mnx_to_mnxchilds (reacs)


In [7]:
def get_match(reacs, prods, reac_prop_simplyfied): 
    
    # Given a set of reacts and prods, search for a corresponding reaction in the given set of 
    # simplyfied reactions. 
    
    for key in reac_prop_simplyfied.keys(): 
        
        # check same direction: 
        same_dir = reacs == reac_prop_simplyfied[key]['mnx_reacs'] and prods == reac_prop_simplyfied[key]['mnx_prods']
        if same_dir : 
            return key
        
        # check opposte direction:
        inv_dir  = prods == reac_prop_simplyfied[key]['mnx_reacs'] and reacs == reac_prop_simplyfied[key]['mnx_prods']
        if inv_dir : 
            return key
        
        
    return None

In [8]:

def get_mnx_to_mnxchilds(reac_prop_simplyfied, mnx_to_mnxchilds_m):


    # create a dictionary where each reaction points to its "child" reactions. 
    mnx_to_mnxchilds = {}

    
    # get the list of all possible parent metabolites: 
    all_parents  = set(mnx_to_mnxchilds_m.keys())

    
    # iterate through the list of simplified reactions: 
    for index, rid in enumerate(list(reac_prop_simplyfied.keys())):


        # get reactants and products: 
        mnx_reacs = reac_prop_simplyfied[rid]['mnx_reacs']
        mnx_prods  = reac_prop_simplyfied[rid]['mnx_prods']


        # whether some involved metabolite (reactant or product) has childs.
        has_childs = False  

        
        # 'intepret' will be a list of lists. Each molecule appears in a list together with 
        # its childs (if any). Reactants and products will be seprated by a list containing just
        # the equality sign as a delimiter ['=']. 
        interpret = []

        # parse reactants: 
        for reac in mnx_reacs:    
            # check if this reactant has childs: 
            if reac in all_parents : 
                childs = mnx_to_mnxchilds_m[reac]
                interpret.append( [reac] + list(childs))
                has_childs = True
            else: interpret.append([reac])

        interpret.append(['='])

        # parse products: 
        for prod in mnx_prods:    
            # check if this product has childs: 
            if prod in all_parents : 
                childs = mnx_to_mnxchilds_m[prod]
                interpret.append( [prod] + list(childs))
                has_childs = True
            else: interpret.append([prod])


        if has_childs: 
            
            # 'combinations' will contain a list of set. Each set is a possible reaction.
            # For example, let 'interpret' be [[A, B], [C, D], [=], [E], [F, G]]
            # then 'combinations' will be composed by 8 sets:
            # [ (A, C, =, E, F), (A, C, =, E, G), 
            #   (A, D, =, E, F), (A, D, =, E, G), 
            #   (B, C, =, E, F), (B, C, =, E, G), 
            #   (B, D, =, E, F), (B, D, =, E, G) ]
            # itertools.product() found at https://stackoverflow.com/a/798893
            combinations = list(itertools.product(*interpret))

            
            # log completion status
            print(f"{round((index + 1) / len(reac_prop_simplyfied) * 100, 1)}%  ({len(combinations)}) ({rid})", end='\r')


            # iterate each candidate reaction, to see if it already exists in the 
            # list of simplified reactions previously built. 
            for combination in combinations: 

                
                # reconstruct a simplyfied reaction starting from this combination:
                new_reacs = set()
                new_prods  = set()
                prods_switch = False
                for i in combination:
                    if i == '=': 
                        prods_switch = True
                        continue
                    if not prods_switch : # it's a reactant
                        new_reacs.add(i)
                    else: new_prods.add(i)

                        
                # search this simplyfied reaction in the set previously built: 
                match = get_match(new_reacs, new_prods, reac_prop_simplyfied)

                
                # if it exists, then populate the dictionary: 
                if match != None: 
                    if rid not in mnx_to_mnxchilds.keys(): 
                        mnx_to_mnxchilds[rid] = set()
                    mnx_to_mnxchilds[rid].add(match)

                    
    return mnx_to_mnxchilds


In [9]:
# get the dictionary and save to disk:
mnx_to_mnxchilds_r = get_mnx_to_mnxchilds(reac_prop_simplyfied, mnx_to_mnxchilds_m)
with open(f'mnx_dicts_R/mnx_to_mnxchilds.pickle', 'wb') as handle:
    pickle.dump(mnx_to_mnxchilds_r, handle)

100.0%  (3) (MNXR99990)))

# Part 2: extending  mnx reaction based on stereochemistry

## load dicts

In [10]:
with open(f'mnx_dicts_R/crossrefs.pickle', 'rb') as handle:
    crossrefs = pickle.load(handle)

In [11]:
with open(f'mnx_dicts_R/bigg_to_others.pickle', 'rb') as handle:
    bigg_to_others = pickle.load(handle)
    
with open(f'mnx_dicts_R/seed_to_others.pickle', 'rb') as handle:
    seed_to_others = pickle.load(handle)

In [12]:
with open(f'mnx_dicts_R/mnx_to_mnxchilds.pickle', 'rb') as handle:
    mnx_to_mnxchilds = pickle.load(handle)



## extend something_to_others

In [13]:
def extend_based_on_stereochemistry(something_to_others, mnx_to_mnxchilds, crossrefs, name):
    
    
    # create a bigg/seed dictionary of reactions annotated taking into account also their 'child' reactions:
    something_to_others_extended = copy.deepcopy(something_to_others)
    
    
    # iterate each bigg/seed reaction ID: 
    for something_id in something_to_others.keys():    
        for mnx_id in something_to_others[something_id]['metanetx.reaction']:
            
            
            # get the 'child' reactions:
            try: child_mnx_ids = mnx_to_mnxchilds[mnx_id]
            # this reaction has no child "iso"reactions
            except: continue
            
            
            # iterate the 'child' reactions: 
            for child_mnx_id in child_mnx_ids:
                
                
                # iterate all the available annotation databases:
                for crossref in crossrefs.keys(): 
                
                
                    # populate with new annotations: 
                    try: annots = crossrefs[crossref][child_mnx_id]
                    except: continue
                    for annot in annots:
                        something_to_others_extended[something_id][crossref].add(annot)
                        
             
    # write to disk
    with open(f'mnx_dicts_R/{name}.pickle', 'wb') as handle:
        pickle.dump(something_to_others_extended, handle)
            
            
    return something_to_others_extended

In [14]:
bigg_to_others_extended = extend_based_on_stereochemistry(bigg_to_others, mnx_to_mnxchilds, crossrefs, 'bigg_to_others_extended' )

seed_to_others_extended = extend_based_on_stereochemistry(seed_to_others, mnx_to_mnxchilds, crossrefs, 'seed_to_others_extended' )

## testing area


In [15]:
bigg_to_others['G6PP']

{'kegg.reaction': {'R00303'},
 'biocyc': {'RXN66-526'},
 'seed.reaction': {'rxn00220'},
 'bigg.reaction': {'G6PP', 'G6PPer', 'R_G6PP', 'R_G6PPer'},
 'rhea': {'16689', '16690', '16691', '16692'},
 'sabiork.reaction': {'7713', '796'},
 'ec-code': {'3.1.3',
  '3.1.3.1',
  '3.1.3.104',
  '3.1.3.2',
  '3.1.3.23',
  '3.1.3.25',
  '3.1.3.29',
  '3.1.3.58',
  '3.1.3.9'},
 'metanetx.reaction': {'MNXR195425'}}

In [16]:
bigg_to_others_extended['G6PP']

{'kegg.reaction': {'R00303', 'R01788'},
 'biocyc': {'RXN-15312', 'RXN66-526'},
 'seed.reaction': {'rxn00220', 'rxn15029', 'rxn15250', 'rxn19814', 'rxn31313'},
 'bigg.reaction': {'G6PP', 'G6PPer', 'R_G6PP', 'R_G6PPer'},
 'rhea': {'16689',
  '16690',
  '16691',
  '16692',
  '44904',
  '44905',
  '44906',
  '44907'},
 'sabiork.reaction': {'7713', '796'},
 'ec-code': {'3.1.3',
  '3.1.3.1',
  '3.1.3.104',
  '3.1.3.2',
  '3.1.3.23',
  '3.1.3.25',
  '3.1.3.29',
  '3.1.3.58',
  '3.1.3.9'},
 'metanetx.reaction': {'MNXR173089', 'MNXR192089', 'MNXR195425'}}

In [17]:
bigg_to_others['PGMT']

{'kegg.reaction': set(),
 'biocyc': set(),
 'seed.reaction': set(),
 'bigg.reaction': {'PGMT', 'PGM_h', 'R_PGMT', 'R_PGM_h'},
 'rhea': set(),
 'sabiork.reaction': {'12817', '7674'},
 'ec-code': {'5.4.2.2', '5.4.2.5', '5.4.2.8'},
 'metanetx.reaction': {'MNXR192018'}}

In [18]:
bigg_to_others_extended['PGMT']

{'kegg.reaction': {'R00959', 'R02728', 'R08639', 'R11310'},
 'biocyc': {'BETA-PHOSPHOGLUCOMUTASE-RXN', 'PHOSPHOGLUCMUT-RXN', 'RXN-16999'},
 'seed.reaction': {'rxn00704',
  'rxn01967',
  'rxn12204',
  'rxn15773',
  'rxn20583',
  'rxn33571',
  'rxn33867'},
 'bigg.reaction': {'PGCM',
  'PGMT',
  'PGMT_2',
  'PGMT_B',
  'PGMTh',
  'PGM_h',
  'R_PGCM',
  'R_PGMT',
  'R_PGMT_2',
  'R_PGMT_B',
  'R_PGMTh',
  'R_PGM_h'},
 'rhea': {'20113',
  '20114',
  '20115',
  '20116',
  '23536',
  '23537',
  '23538',
  '23539'},
 'sabiork.reaction': {'11', '12817', '2618', '7674', '9261'},
 'ec-code': {'5.4.2.10', '5.4.2.2', '5.4.2.5', '5.4.2.6', '5.4.2.8'},
 'metanetx.reaction': {'MNXR102549',
  'MNXR136808',
  'MNXR145962',
  'MNXR145963',
  'MNXR189257',
  'MNXR192017',
  'MNXR192018'}}

In [23]:
bigg_to_others['GLYCLTDx']

{'kegg.reaction': {'R00717'},
 'biocyc': {'GLYCOLATE-REDUCTASE-RXN'},
 'seed.reaction': {'rxn00512', 'rxn31400', 'rxn34300'},
 'bigg.reaction': {'GLYCLTD_m', 'GLYCLTDx', 'R_GLYCLTD_m', 'R_GLYCLTDx'},
 'rhea': {'18229', '18230', '18231', '18232'},
 'sabiork.reaction': {'425'},
 'ec-code': {'1.1.1.26',
  '1.1.1.28',
  '1.1.1.29',
  '1.1.1.79',
  '1.1.1.8',
  '1.1.1.81',
  '1.1.99.14'},
 'metanetx.reaction': {'MNXR100331'}}

In [20]:
bigg_to_others_extended['GLYCLTDx']

{'kegg.reaction': {'R00717'},
 'biocyc': {'GLYCOLATE-REDUCTASE-RXN'},
 'seed.reaction': {'rxn00512', 'rxn31400', 'rxn34300'},
 'bigg.reaction': {'GLYCLTD_m', 'GLYCLTDx', 'R_GLYCLTD_m', 'R_GLYCLTDx'},
 'rhea': {'18229', '18230', '18231', '18232'},
 'sabiork.reaction': {'425'},
 'ec-code': {'1.1.1.26',
  '1.1.1.28',
  '1.1.1.29',
  '1.1.1.79',
  '1.1.1.8',
  '1.1.1.81',
  '1.1.99.14'},
 'metanetx.reaction': {'MNXR100331'}}

In [21]:
seed_to_others['rxn00786']

{'kegg.reaction': {'R01068', 'R01070'},
 'biocyc': {'F16ALDOLASE-RXN'},
 'seed.reaction': {'rxn00786',
  'rxn15116',
  'rxn19683',
  'rxn27851',
  'rxn31335',
  'rxn38097'},
 'bigg.reaction': set(),
 'rhea': {'14729', '14730', '14731', '14732'},
 'sabiork.reaction': {'1338', '3'},
 'ec-code': {'4.1.2.13', '4.1.2.40'},
 'metanetx.reaction': {'MNXR153519'}}

In [22]:
seed_to_others_extended['rxn00786']

{'kegg.reaction': {'R01068', 'R01070'},
 'biocyc': {'F16ALDOLASE-RXN'},
 'seed.reaction': {'rxn00786',
  'rxn15116',
  'rxn19683',
  'rxn27851',
  'rxn31335',
  'rxn38097'},
 'bigg.reaction': set(),
 'rhea': {'14729', '14730', '14731', '14732'},
 'sabiork.reaction': {'1338', '3'},
 'ec-code': {'4.1.2.13', '4.1.2.40'},
 'metanetx.reaction': {'MNXR153519'}}