# Working with JSON metabolic models and calculate pathway coverage


This notebooks is run by

`docker run -v /Users/shuzhao/li.projects:/home/jovyan -p 8888:8888 jupyter/scipy-notebook`

The metabolic model is based on Human-GEM parsed by MG, in JMS repo.

SL 2023-03-08

In [1]:
# jms_metabolite_services >= 0.5.1
!pip install -q --upgrade jms_metabolite_services

In [2]:
import json
import numpy as np

## metabolic model as JSON from GEM

In [3]:
# this is Human-GEM parsed by MG, in JMS repo.
model = json.load(open('metabolicModel_az_HumanGEM_20220302_noCompartmentalization.json'))
model.keys()

dict_keys(['id', 'list_of_reactions', 'list_of_compounds', 'list_of_pathways', 'meta_data'])

In [4]:
model['meta_data'] # this should be de-compartmentalized

{'species': 'human',
 'version': '',
 'sources': ['https://github.com/SysBioChalmers/Human-GEM, retrieved 2022-02-09'],
 'status': '',
 'last_update': '20220209',
 'note': 'Human-GEM compartmentalized, with genes and ECs.'}

In [5]:
model['list_of_pathways'][6]

{'id': 'group7',
 'name': 'Androgen metabolism',
 'list_of_reactions': ['MAR01944',
  'MAR01945',
  'MAR01952',
  'MAR01953',
  'MAR01958',
  'MAR01959',
  'MAR01960',
  'MAR01962',
  'MAR01963',
  'MAR01967',
  'MAR01968',
  'MAR01969',
  'MAR01970',
  'MAR01971',
  'MAR01973',
  'MAR01974',
  'MAR01976',
  'MAR01977',
  'MAR01978',
  'MAR01982',
  'MAR01983',
  'MAR02014',
  'MAR02015',
  'MAR02016',
  'MAR02017',
  'MAR02018',
  'MAR02019',
  'MAR02020',
  'MAR02022',
  'MAR02024',
  'MAR02025',
  'MAR03400',
  'MAR03600',
  'MAR03601',
  'MAR03602',
  'MAR03603']}

In [6]:
model['list_of_reactions'][66]

{'id': 'MAR08589',
 'reactants': ['MAM02040', 'MAM02450'],
 'products': ['MAM01965'],
 'genes': ['ENSG00000171298'],
 'enzymes': ['3.2.1.20']}

In [7]:
model['list_of_compounds'][88]

{'id': 'MAM00089',
 'name': '(3Z,7Z,10Z)-hexadecatrienoyl-CoA',
 'identifiers': [['humanGEM', 'MAM00089'], ['vmhmetabolite', 'CE2442']],
 'neutral_formula': 'C37H60N7O17P3S',
 'charge': -4,
 'charged_formula': 'C37H56N7O17P3S',
 'neutral_mono_mass': 999.2979256653599,
 'SMILES': '',
 'inchi': ''}

## Demo of indexed model from mummichog

In [8]:
# this is the model in mummichog src
from JSON_metabolicModels import metabolicModels

In [9]:
mfn = metabolicModels['human_model_mfn']
mfn.keys()

dict_keys(['edge2rxn', 'version', 'metabolic_rxns', 'cpd_edges', 'metabolic_pathways', 'Compounds', 'dict_cpds_def', 'cpd2pathways', 'edge2enzyme', 'dict_cpds_mass'])

In [10]:
[x['name'] for x in mfn['metabolic_pathways']][20: 25]

['3-oxo-10R-octadecatrienoate beta-oxidation',
 'Alkaloid biosynthesis II',
 'Vitamin B1 (thiamin) metabolism',
 'Glycosphingolipid biosynthesis - globoseries',
 'Histidine metabolism']

In [11]:
mfn['metabolic_pathways'][11]

{'cpds': ['C00025',
  'CE2540',
  'CE7115',
  'CE7114',
  'CE7111',
  'CE7110',
  'CE7113',
  'CE7112',
  'CE7090',
  'CE7091',
  'C06439',
  'C00051',
  'C00030',
  'CE7109',
  'C00028',
  'CE6508',
  'CE7089',
  'CE7088',
  'CE7087',
  'CE7086',
  'CE7085',
  'CE7084',
  'CE7083',
  'CE7082',
  'CE7081',
  'CE7080',
  'CE7079'],
 'rxns': ['RE3502',
  'RE3487',
  'RE3485',
  'RE3498',
  'RE3503',
  'RE3506',
  'RE3497',
  'RE3505',
  'RE3504',
  'RE3488',
  'RE3489',
  'RE3494',
  'RE3491',
  'RE3501',
  'RE3500',
  'RE3495',
  'RE3496',
  'RE3492',
  'RE3499',
  'RE3493',
  'RE3490',
  'RE3486'],
 'ecs': ['1.14.99.1',
  '3.3.2.10',
  '1.13.11.34',
  '1.14.13.30',
  '5.3.99.3',
  '3.3.2.6',
  '1.1.1.184',
  '3.3.2.9',
  '1.13.11.33',
  '2.3.2.2',
  '4.4.1.20'],
 'id': 'mfn1v10path188',
 'name': 'Putative anti-Inflammatory metabolites formation from EPA'}

In [12]:
mfn['metabolic_pathways'][11]['id']

'mfn1v10path188'

In [13]:
mfn['Compounds']['C06439']

{'formula': 'C20H29O5',
 'mw': 350.2093,
 'name': 'Prostaglandin E3',
 'adducts': {}}

## Calculate pathway coverage in an experimental dataset 

In [16]:
# These could be run by a single wrapper function jms.coverage.wrapper_file2coverage

from jms.coverage import report_pathway_coverage, export_pathway_coverage_table
from jms.modelConvert import convert_json_model
from jms.empiricalCpds import load_epds_from_json

In [17]:
list_epds = load_epds_from_json('Annotated_empricalCompounds.json')

In [18]:
mcgmodel = convert_json_model(model)

In [19]:
mcgmodel['metabolic_pathways'][11]

{'id': 'group12',
 'name': 'Beta oxidation of branched-chain fatty acids (mitochondrial)',
 'rxns': ['MAR03522',
  'MAR03523',
  'MAR03524',
  'MAR03525',
  'MAR03526',
  'MAR03527',
  'MAR03528',
  'MAR03529',
  'MAR03530',
  'MAR03531',
  'MAR03532',
  'MAR03533',
  'MAR03534'],
 'cpds': ['MAM01597',
  'MAM00703',
  'MAM00934',
  'MAM02774',
  'MAM02040',
  'MAM01017',
  'MAM00845',
  'MAM02039',
  'MAM00933',
  'MAM02180',
  'MAM02553',
  'MAM01015',
  'MAM00563',
  'MAM01802',
  'MAM01261',
  'MAM00562',
  'MAM00706',
  'MAM00578',
  'MAM00844',
  'MAM00831',
  'MAM02552',
  'MAM00705',
  'MAM01803'],
 'ecs': ['1.1.1.211',
  '1.1.1.35',
  '1.3.8.7',
  '4.2.1.17',
  '1.3.8.8',
  '2.3.1.16',
  '5.1.99.4'],
 'genes': ['ENSG00000138796',
  'ENSG00000072506',
  'ENSG00000084754',
  'ENSG00000167315',
  'ENSG00000127884',
  'ENSG00000117054',
  'ENSG00000242110',
  'ENSG00000138029',
  'ENSG00000115361',
  'ENSG00000113790']}

In [20]:
mcgmodel.keys()

dict_keys(['id', 'version', 'Compounds', 'dict_cpds_def', 'metabolic_rxns', 'cpd_edges', 'edge2rxn', 'edge2enzyme', 'metabolic_pathways', 'cpd2pathways'])

In [21]:
mcgmodel['Compounds']['MAM02553']

{'id': 'MAM02553',
 'name': 'NADH',
 'identifiers': [['humanGEM', 'MAM02553'],
  ['bigg.metabolite', 'nadh'],
  ['kegg.compound', 'C00004'],
  ['hmdb', 'HMDB01487'],
  ['chebi', '16908'],
  ['pubchem.compound', '928'],
  ['vmhmetabolite', 'nadh'],
  ['metanetx.chemical', 'MNXM10']],
 'neutral_formula': 'C21H29N7O14P2',
 'charge': -2,
 'charged_formula': 'C21H27N7O14P2',
 'neutral_mono_mass': 665.1247726356299,
 'SMILES': '',
 'inchi': ''}

In [22]:
mcgmodel['dict_cpds_def']['MAM02553']

'NADH'

In [23]:
metabolic_pathways = report_pathway_coverage( mcgmodel, list_epds)

In [24]:
export_pathway_coverage_table(metabolic_pathways, outfile='pathway_matches.tsv')

## Conclusion

We used JMS functions to match neutral mass of each empricical compound (i.e. khipu) against the compounds in the Human-GEM metabolic model.

The result is written in a tab-delimited file 'pathway_matches.tsv'.

One can use this recipe to analyze data from multiple methods/experiments.