# Tutorial: GPRuler using HMRcore model
This tutorial explains how to use GPRuler with an SBML model as input.

In [1]:
import pandas as pd

**Step 1. metaboliteIdentification**  
To run the script from the Jupyter notebook:
```
!python pipeline/metaboliteIdentification.py hmr
```

In alternative, to run the script from the terminal:
```
python pipeline/metaboliteIdentification.py hmr
```

The two produced outputs are:

In [2]:
dfOutput1 = pd.read_csv('./tutorial/outputs/hmrCore_metabolites.csv', sep = '\t')
dfOutput1.head()

Unnamed: 0,Id,Name,KeggId,ChebiId,PubchemId,boundaryCondition,chemicalFormula,Inchi
0,M_biomass_s,biomasss,,,,False,,
1,M_glucose_s,D-Glucose,,,,False,,
2,M_lactateL_s,L-Lactic acid,,,,False,,
3,M_O2_s,Oxygen,,,,False,,
4,M_glutamine_s,L-Glutamine,,,,False,,


In [3]:
dfOutput2 = pd.read_csv('./tutorial/outputs/hmrCore_metabolites_wInferredIds.csv', sep = '\t')
dfOutput2.head()

Unnamed: 0,Name,Identifiers
0,2-Oxoglutaramate,"['C00940', '30882', '16769']"
1,GTP,"['15996', 'C00044', '37565']"
2,(R)-3-Hydroxyoctanoyl-[acyl-carrier protein],['C04620']
3,Flavin Adenine Dinucleotide Reduced,[]
4,Folic acid,"['67011', 'CPD-12826', '62501', 'C00504', '374..."


**Step 2. metabolitesIdentification_FuzzyWuzzy**  
To run the script from the Jupyter notebook:
```
!python pipeline/metabolitesIdentification_FuzzyWuzzy.py hmr
```

In alternative, to run the script from the terminal:
```
python pipeline/metabolitesIdentification_FuzzyWuzzy.py hmr
```
The produced outputs are:

In [4]:
dfOutput1 = pd.read_csv('./tutorial/outputs/hmrCore_mappingMetaCyc_allResults.tsv', sep = '\t')
dfOutput1.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,2'-Deoxyguanosine 5'-monophosphate,"(""2'-deoxyguanosine 3'-monophosphate"", 97)","(""2'-deoxyadenosine 3'-monophosphate"", 91)","(""3'-deoxyadenosine 5'-monophosphate"", 91)","(""2'-deoxyguanosine"", 90)","('phosphate', 90)","('guanosine', 90)","('2-amino-3-[(2,5-dihydroxyphenyl)sulfanyl]pro...","(""3,5-di-C-glucosyl-2,4,4',6-tetrahydroxydiben...","('2-[2-(1,3-dioxan-2-yl)ethanesulfinyl]-5-phen...","('2-((2-(1,3-dioxan-2-yl)ethyl)thio)-5-phenyl-..."
1,CDP,"('CDP', 100)","('CDP-2-glycerol', 90)","('CDP-ribitol', 90)","('CDP-N-methylethanolamine', 90)","('CDP-D-fructose', 90)","('CDP-&alpha;-D-paratose', 90)","('CDP-D-mannitol', 90)","('CDP-D-xylulose', 90)","('CDP-&alpha;-abequose', 90)","('CDP-&alpha;-tyvelose', 90)"
2,Isocitric acid,"('(+)-dehydrodiconiferyl acid carboxylate', 86)","('juvenile hormone I acid', 86)","('juvenile hormone III acid', 86)","('juvenile hormone II acid', 86)","('dihydromonacolin L acid', 86)","('3&alpha;-hydroxy-3,5-dihydromonacolin L acid...",('O-perylenebutylmethylphosphonic-acid-p-nitro...,"('8-demethylnovobiocic acid', 86)","('ulvanobiouronic acid 3-sulfate A', 86)","('ulvanobiouronic acid 3-sulfate B', 86)"
3,"4alpha-Carboxy-4beta-methyl-5alpha-cholesta-8,...",('4&alpha;-carboxy-4&beta;-methyl-5&alpha;-cho...,('4&alpha;-carboxy-4&beta;-methyl-5&alpha;-cho...,('4&alpha;-formyl-4&beta;-methyl-5&alpha;-chol...,"('N-[(3R)-6-(5,6-dihydropyridin-2-yl)-7-hydrox...","('N-[(3R)-8-[(2E)-but-2-enoyl]-6-(5,6-dihydrop...","('8-thiabicyclo[3,2,1]octane-3-one', 86)","('(25S)-5-&beta;-spirostan-3-&beta;-ol', 86)","('24-epicathasterone', 86)","('(Rib-ol)-P-Glc-AATGal-PP-undecaprenol', 86)","('2-hydroxy-2-methyl-butanal', 86)"
4,NADPH,"('NADPH', 100)","('ADP', 90)","('(S)-NADPHX', 90)","('(R)-NADPHX', 90)","('&alpha;-NADPH', 90)","('NADH', 89)","('&alpha;-NADH', 80)","('1,6-dihydro-&beta;-NADP', 80)","('1,2-dihydro-&beta;-NADP', 80)","('3-acetyl-NADH', 80)"


In [5]:
dfOutput2 = pd.read_csv('./tutorial/outputs/hmrCore_mappingKeggC_allResults.tsv', sep = '\t')
dfOutput2.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,2'-Deoxyguanosine 5'-monophosphate,"(""2'-Deoxyguanosine 5'-monophosphate"", 100)","('Deoxyguanosine monophosphate', 95)","(""2'-Deoxyadenosine 5'-monophosphate"", 94)","(""2'-Deoxyguanosine 5'-phosphate"", 94)","(""2'-Deoxyguanosine 5'-diphosphate"", 91)","('Phosphate', 90)","(""2'-Deoxyguanosine 5'-triphosphate"", 90)","('Guanosine', 90)","('Deoxyguanosine', 90)","(""2'-Deoxyguanosine"", 90)"
1,CDP,"('CDP', 100)","('CDP-ascarylose', 90)","('CDP-glucose', 90)","('CDP-D-Glucose', 90)","('CDP-choline', 90)","('CDPtyvelose', 90)","('P', 90)","('C', 90)","('CDP-1L-myo-inositol', 90)","('CDP-ethanolamine', 90)"
2,Isocitric acid,"('Isocitric acid', 100)","('D-threo-Isocitric acid', 90)","('Acid', 90)","('RI', 90)","('D-erythro-Isocitric acid', 90)","('Citric acid', 88)","('Trimethylcolchicinic acid methyl ether', 86)","('2-n-Propyl-4-oxopentanoic acid', 86)","('Aromatic L-amino acid', 86)","('Oleic acid methyl ester', 86)"
3,"4alpha-Carboxy-4beta-methyl-5alpha-cholesta-8,...",('4alpha-Carboxy-4beta-methyl-5alpha-cholesta-...,"('4alpha-Carboxy-5alpha-cholesta-8,24-dien-3be...","('5alpha-Cholesta-8,24-dien-3beta-ol', 90)",('4alpha-Methyl-5alpha-cholesta-7-en-3beta-ol'...,"('5alpha-Cholesta-7,24-dien-3beta-ol', 87)","('Trimethylcolchicinic acid methyl ether', 86)","('Oleic acid methyl ester', 86)","('Methyl oleate', 86)","('Propane-1-ol', 86)","('Propan-1-ol', 86)"
4,NADPH,"('NADPH', 100)","('[Oxidized NADPH---hemoprotein reductase]', 90)","('P', 90)","('alpha-NADPH', 90)","('[Reduced NADPH---hemoprotein reductase]', 90)","('Na+', 90)","('H+', 90)","('ADP', 90)","('A', 90)","('(R)-NADPH-hydrate', 90)"


In [6]:
dfOutput3 = pd.read_csv('./tutorial/outputs/hmrCore_mappingKeggG_allResults.tsv', sep = '\t')
dfOutput3.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,2'-Deoxyguanosine 5'-monophosphate,"('Sialyldi-Y-2', 86)","('Glycolipid 2', 86)","(""Adenosine 5'-phosphate"", 86)","(""3-Deoxy-D-manno-octulosonyl-2',3',2',3'-tetr...",('alpha-Kdo-(2->8)-[alpha-Kdo-(2->4)]-alpha-Kd...,"('NodRm-IV(Ac,C16:2,S)', 86)","('delta DiH-2,N,6triS', 86)","('delta-DiH-(2,6,N)triS', 86)","('VIM-2 antigen', 86)","('DWA-2', 86)"
1,CDP,"('ADP', 67)","('GDP', 67)","('CMP', 67)","('UDP', 67)","('Dec-P-P-GlcNAc-Rha', 60)","('Lacto-N-fucopentaose V', 60)","('UDP-D-xylose', 60)","('UDP-xylose', 60)","('Ac2PIM2', 60)","('Viridopentaose B', 60)"
2,Isocitric acid,"('UDP-D-glucuronic acid', 86)","('Polygalacturonic acid', 86)","('CMP-N-acetylneuraminic acid', 86)","('homopolymer of alpha 2,8-N-acetyl neuraminic...","('UDP-D-galacturonic acid', 86)","('Pectic acid', 72)","('Cytidylic acid', 64)","('Pharbitic acid C', 64)","('Alginic acid', 62)","('Alginic acid', 62)"
3,"4alpha-Carboxy-4beta-methyl-5alpha-cholesta-8,...","(""3',8'-LD1"", 86)","('Methyl-glucuronoxylan', 86)","(""3',8'-isoLD1"", 86)","('GT1aalpha', 67)","('MS 8', 57)","('Chitosan', 56)","('Acarbose', 56)","('GM1alpha', 56)","('GM2alpha', 56)","('Chitosan', 56)"
4,NADPH,"('ADP', 90)","(nan, 60)","(nan, 60)","(nan, 60)","(nan, 60)","(nan, 60)","(nan, 60)","(nan, 60)","(nan, 60)","(nan, 60)"


In [7]:
dfOutput4 = pd.read_csv('./tutorial/outputs/hmrCore_mappingChebi_allResults.tsv', sep = '\t')
dfOutput4.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,2'-Deoxyguanosine 5'-monophosphate,"(""2'-Deoxyguanosine 5'-monophosphate"", 100)","(""2'-deoxyguanosine 5'-monophosphate"", 100)","(""2'-deoxyguanosine 5'-monophosphate"", 100)","(""2'-deoxyguanosine-3'-monophosphate"", 97)","(""2'-deoxyguanosine 3'-monophosphate"", 97)","(""2'-deoxyguanosine 5'-monophosphate(2-)"", 97)","('Deoxyguanosine monophosphate', 95)","(""8-oxo-2'-deoxyguanosine-5'-monophosphate"", 95)","(""Deoxyguanosine 5'-monophosphate"", 95)","(""6-O-benzyl-2'-deoxyguanosine 5'-monophosphat..."
1,CDP,"('CDP', 100)","('CDP', 100)","('CDP', 100)","('CDP', 100)","('CDP', 100)","('CDPribitol', 90)","('CDP-N-methylethanolamine', 90)","('CDPacylglycerol', 90)","('CDP-1,2-diacylglycerol', 90)","('CDPdiacylglycerol', 90)"
2,Isocitric acid,"('Isocitric acid', 100)","('ISOCITRIC ACID', 100)","('isocitric acid', 100)","('isocitric acids', 97)","('(+)-threo-isocitric acid', 95)","('D-threo-Isocitric acid', 90)","('Acid', 90)","('RI', 90)","('[SO]', 90)","('SO', 90)"
3,"4alpha-Carboxy-4beta-methyl-5alpha-cholesta-8,...",('4alpha-Carboxy-4beta-methyl-5alpha-cholesta-...,('4alpha-carboxyl-4beta-methyl-5alpha-cholesta...,('4alpha-Carboxy-4beta-methyl-5alpha-cholesta-...,('4-alpha-carboxyl-4-beta-methyl-5-alpha-chole...,"('4alpha-methyl-5alpha-cholesta-8,24-dien-3bet...","('4alpha-carboxy-4beta-methyl-cholesta-8,24-di...","('4alpha-methyl-5alpha-cholesta-8,24-dien-3bet...",('4alpha-carboxy-4beta-methyl-5alpha-cholest-8...,('4alpha-carboxy-4beta-methyl-5alpha-cholest-8...,('4beta-carboxy-4alpha-methyl-5alpha-cholesta-...
4,NADPH,"('NADPH', 100)","('NADPH', 100)","('NADPH', 100)","('NADPH', 100)","('6-NADPH', 95)","('NADPH(4-)', 95)","('H+', 90)","('ADP', 90)","('NAD', 90)","('NAD+', 90)"


**Step 3. metabolitesIdentification_FuzzyWuzzy_part2**  
To run the script from the Jupyter notebook:
```
!python pipeline/metabolitesIdentification_FuzzyWuzzy_part2.py hmr
```

In alternative, to run the script from the terminal:
```
python pipeline/metabolitesIdentification_FuzzyWuzzy_part2.py hmr
```
Some of the produced outputs are the following ones (not all the outputs are shown because structurally equal to the here reported ones):

In [8]:
dfOutput1 = pd.read_csv('./tutorial/outputs/hmrCore_mappingMetaCyc_100.tsv', sep = '\t')
dfOutput1.head()

Unnamed: 0.1,Unnamed: 0,Name,Matches
0,0,GMP,['GMP']
1,1,dUDP,['dUDP']
2,2,D-Erythrose 4-phosphate,['D-erythrose 4-phosphate']
3,3,L-Proline,['L-proline']
4,4,Cholesterol,['cholesterol']


In [9]:
dfOutput2 = pd.read_csv('./tutorial/outputs/hmrCore_mappingKeggG_100.tsv', sep = '\t')
dfOutput2.head()

Unnamed: 0.1,Unnamed: 0,Name,Matches
0,0,Uridine 5'-diphosphate,"[""Uridine 5'-diphosphate""]"
1,1,GDP,['GDP']


In [10]:
dfOutput3 = pd.read_csv('./tutorial/outputs/hmrCore_mappingChebi_100.tsv', sep = '\t')
dfOutput3.head()

Unnamed: 0.1,Unnamed: 0,Name,Matches
0,0,Isocitric acid,['Isocitric acid']
1,1,GMP,['GMP']
2,2,"5Alpha-Cholesta-8,24-Dien-3-One","['5alpha-cholesta-8,24-dien-3-one']"
3,3,2'-Deoxyguanosine 5'-monophosphate,"[""2'-Deoxyguanosine 5'-monophosphate""]"
4,4,dUDP,['dUDP']


In [11]:
dfOutput4 = pd.read_csv('./tutorial/outputs/hmrCore_mappingKeggC_100.tsv', sep = '\t')
dfOutput4.head()

Unnamed: 0.1,Unnamed: 0,Name,Matches
0,0,Isocitric acid,['Isocitric acid']
1,1,GMP,['GMP']
2,2,"5Alpha-Cholesta-8,24-Dien-3-One","['5alpha-Cholesta-8,24-dien-3-one']"
3,3,2'-Deoxyguanosine 5'-monophosphate,"[""2'-Deoxyguanosine 5'-monophosphate""]"
4,4,dUDP,['dUDP']


**Step 4. metabolitesIdentification_joiningData**  
To run the script from the Jupyter notebook:
```
!python pipeline/metabolitesIdentification_joiningData.py hmr
```

In alternative, to run the script from the terminal:
```
python pipeline/metabolitesIdentification_joiningData.py hmr
```
The produced outputs are:

In [12]:
dfOutput1 = pd.read_csv('./tutorial/outputs/hmrCore_mappingFuzzyAndClassic.tsv', sep = '\t')
dfOutput1.head()

Unnamed: 0,Name,Identifiers_fuzzy,Identifiers_classic,Identifiers
0,2'-Deoxyguanosine 5'-monophosphate,"['C00362', '16192']","['C00362', '16192']","['C00362', '16192']"
1,Isocitric acid,"['30887', '16087', 'C00311']","['151', '24885', 'C00311', '30887', 'C00451']","['C00451', '16087', 'C00311', '30887', '24885'..."
2,"4alpha-Carboxy-4beta-methyl-5alpha-cholesta-8,...","['C15808', '64925', '50591', '|CPD-4577|']","['C15808', '50591']","['C15808', '64925', '50591', '|CPD-4577|']"
3,(2E)-Dodecenoyl-[acp],"['C05758', '10725']","['10725', 'C05758']","['C05758', '10725']"
4,Pyruvic acid,"['C00022', '15361', '32816']","['32816', 'C00022', 'PYRUVATE', '15361']","['C00022', '15361', 'PYRUVATE', '32816']"


**Step 5. reactionsIdentification**  
To run the script from the Jupyter notebook:
```
!python pipeline/reactionsIdentification.py hmr
```

In alternative, to run the script from the terminal:
```
python pipeline/reactionsIdentification.py hmr
```
The produced outputs are:

In [13]:
dfOutput1 = pd.read_csv('./tutorial/outputs/hmrCore_metabolites_enriched.csv', sep = '\t')
dfOutput1.head()

Unnamed: 0,Id,Name,KeggId,ChebiId,PubchemId,boundaryCondition,chemicalFormula,Inchi,lIdentifiers
0,M_biomass_s,biomasss,,,,False,,,[]
1,M_glucose_s,D-Glucose,,,,False,,,"['42758', '|CPD-15374|', '4167', 'C00031', '17..."
2,M_lactateL_s,L-Lactic acid,,,,False,,,"['|L-LACTATE|', '16651', 'L-LACTATE', '422', '..."
3,M_O2_s,Oxygen,,,,False,,,"['|OXYGEN-MOLECULE|', '25805', '15379', 'C00007']"
4,M_glutamine_s,L-Glutamine,,,,False,,,"['32666', '30011', 'GLN', '|GLN|', '58359', 'C..."


In [14]:
dfOutput2 = pd.read_csv('./tutorial/outputs/hmrCore_reactions.csv', sep = '\t')
dfOutput2.head()

Unnamed: 0,Rxn,KeggId,EC number,GPR,Name,IsTransport,trasportedMets,IsExchange,GPRrule
0,R_Ex_biomass,,[],,DmBiomass,False,[],True,
1,R_Ex_glucose,,[],,*,False,[],True,
2,R_Ex_lactateL,,[],,*,False,[],True,
3,R_Ex_O2,,[],,*,False,[],True,
4,R_Ex_glutamine,,[],,*,False,[],True,


In [15]:
dfOutput3 = pd.read_csv('./tutorial/outputs/hmrCore_reactions_wIds.csv', sep = '\t')
dfOutput3.head()

Unnamed: 0,Rxn,PutativeIdentifiers
0,Ex_biomass,[]
1,Ex_glucose,[]
2,Ex_lactateL,[]
3,Ex_O2,[]
4,Ex_glutamine,[]


In [16]:
dfOutput4 = pd.read_csv('./tutorial/outputs/hmrCore_reactions_enriched.csv', sep = '\t')
dfOutput4.head()

Unnamed: 0,Rxn,KeggId,EC number,GPR,Name,IsTransport,trasportedMets,IsExchange,GPRrule,PutativeIdentifiers
0,R_Ex_biomass,,[],,DmBiomass,False,[],True,,[]
1,R_Ex_glucose,,[],,*,False,[],True,,[]
2,R_Ex_lactateL,,[],,*,False,[],True,,[]
3,R_Ex_O2,,[],,*,False,[],True,,[]
4,R_Ex_glutamine,,[],,*,False,[],True,,[]


**Step 6. reactionsIdentification_TCDB**  
To run the script from the Jupyter notebook:
```
!python pipeline/reactionsIdentification_TCDB.py hmr
```

In alternative, to run the script from the terminal:
```
python pipeline/reactionsIdentification_TCDB.py hmr
```
The produced outputs are:

In [17]:
dfOutput1 = pd.read_csv('./tutorial/outputs/hmrCore_reactions_enriched_tcdb.csv', sep = '\t')
dfOutput1.head()

Unnamed: 0,Rxn,KeggId,EC number,GPR,Name,IsTransport,trasportedMets,IsExchange,GPRrule,Identifiers_fromTCDB
0,R_Ex_biomass,,[],,DmBiomass,False,[],True,,{}
1,R_Ex_glucose,,[],,*,False,[],True,,"{'1.B.19.1.1': ['Q51485'], '1.B.24.1.1': ['Q9R..."
2,R_Ex_lactateL,,[],,*,False,[],True,,"{'1.A.16.2.7': ['SCP04225.1'], '1.A.8.9.14': [..."
3,R_Ex_O2,,[],,*,False,[],True,,"{'1.A.8.11.3': ['P61837'], '3.D.4.2.1': ['Q5SJ..."
4,R_Ex_glutamine,,[],,*,False,[],True,,"{'1.A.24.1.1': ['P08050'], '1.B.2.1.1': ['P103..."


**Step 7. fromReactions2Genes**  
To run the script from the Jupyter notebook:
```
!python pipeline/fromReactions2Genes.py hmr
```

In alternative, to run the script from the terminal:
```
python pipeline/fromReactions2Genes.py hmr
```
The produced outputs are:

In [18]:
dfOutput1 = pd.read_csv('./tutorial/outputs/hmrCore_reactions_wGenes.csv', sep = '\t')
dfOutput1.head()

Unnamed: 0,Rxn,KeggId_x,EC number_x,GPR_x,Name_x,IsTransport_x,trasportedMets_x,IsExchange_x,GPRrule_x,PutativeIdentifiers,...,GPR_y,Name_y,IsTransport_y,trasportedMets_y,IsExchange_y,GPRrule_y,Identifiers_fromTCDB,Rxn_conv,lGenes,lEC
0,R_Ex_biomass,,[],,DmBiomass,False,[],True,,[],...,,DmBiomass,False,[],True,,{},Ex_biomass,[],[]
1,R_Ex_glucose,,[],,*,False,[],True,,[],...,,*,False,[],True,,"{'1.B.19.1.1': ['Q51485'], '1.B.24.1.1': ['Q9R...",Ex_glucose,"[['6513'], ['6514'], ['81031'], ['6517'], ['15...",[]
2,R_Ex_lactateL,,[],,*,False,[],True,,[],...,,*,False,[],True,,"{'1.A.16.2.7': ['SCP04225.1'], '1.A.8.9.14': [...",Ex_lactateL,"[['366'], ['6566'], ['9121'], ['151473'], ['16...",[]
3,R_Ex_O2,,[],,*,False,[],True,,[],...,,*,False,[],True,,"{'1.A.8.11.3': ['P61837'], '3.D.4.2.1': ['Q5SJ...",Ex_O2,[],[]
4,R_Ex_glutamine,,[],,*,False,[],True,,[],...,,*,False,[],True,,"{'1.A.24.1.1': ['P08050'], '1.B.2.1.1': ['P103...",Ex_glutamine,"[['7504'], ['54868'], ['140679'], ['145389'], ...",[]


**Step 8. genesLocationFilter**  
To run the script from the Jupyter notebook:
```
!python pipeline/genesLocationFilter.py hmr
```

In alternative, to run the script from the terminal:
```
python pipeline/genesLocationFilter.py hmr
```
The produced outputs are:

In [19]:
dfOutput1 = pd.read_csv('./tutorial/outputs/hmrCore_genes2Compartments.csv', sep = '\t')
dfOutput1.head()

Unnamed: 0,Gene,lCompartments
0,162515,['extracellular']
1,6647,"['mitochondrion', 'cytoplasm']"
2,80007,"['mitochondrion', 'cytoplasm']"
3,114134,"['cytoplasm', 'extracellular']"
4,54407,"['cytoplasm', 'extracellular']"


**Step 9. fromReactions2Genes_wFilteredData**  
To run the script from the Jupyter notebook:
```
!python pipeline/fromReactions2Genes_wFilteredData.py hmr
```

In alternative, to run the script from the terminal:
```
python pipeline/fromReactions2Genes_wFilteredData.py hmr
```
The produced outputs are:

In [20]:
dfOutput1 = pd.read_csv('./tutorial/outputs/hmrCore_genes2Compartments_wFilter.csv', sep = '\t')
dfOutput1.head()

Unnamed: 0,Rxn,KeggId_x,EC number_x,GPR_x,Name_x,IsTransport_x,trasportedMets_x,IsExchange_x,GPRrule_x,PutativeIdentifiers,...,Name_y,IsTransport_y,trasportedMets_y,IsExchange_y,GPRrule_y,Identifiers_fromTCDB,Rxn_conv,lGenes,lEC,lGenes_filtered
0,R_Ex_biomass,,[],,DmBiomass,False,[],True,,[],...,DmBiomass,False,[],True,,{},Ex_biomass,[],[],[]
1,R_Ex_glucose,,[],,*,False,[],True,,[],...,*,False,[],True,,"{'1.B.19.1.1': ['Q51485'], '1.B.24.1.1': ['Q9R...",Ex_glucose,"[['6513'], ['6514'], ['81031'], ['6517'], ['15...",[],"[['6513'], ['6514'], ['81031'], ['6517'], ['15..."
2,R_Ex_lactateL,,[],,*,False,[],True,,[],...,*,False,[],True,,"{'1.A.16.2.7': ['SCP04225.1'], '1.A.8.9.14': [...",Ex_lactateL,"[['366'], ['6566'], ['9121'], ['151473'], ['16...",[],"[['366'], ['6566'], ['9121'], ['151473'], ['16..."
3,R_Ex_O2,,[],,*,False,[],True,,[],...,*,False,[],True,,"{'1.A.8.11.3': ['P61837'], '3.D.4.2.1': ['Q5SJ...",Ex_O2,[],[],[]
4,R_Ex_glutamine,,[],,*,False,[],True,,[],...,*,False,[],True,,"{'1.A.24.1.1': ['P08050'], '1.B.2.1.1': ['P103...",Ex_glutamine,"[['7504'], ['54868'], ['140679'], ['145389'], ...",[],"[['7504'], ['54868'], ['140679'], ['145389'], ..."


**Step 10. prepareGPRulerInput**  
To run the script from the Jupyter notebook:
```
!python pipeline/prepareGPRulerInput.py hmr
```

In alternative, to run the script from the terminal:
```
python pipeline/prepareGPRulerInput.py hmr
```
The produced outputs are:

In [21]:
dfOutput1 = pd.read_csv('./tutorial/outputs/HMRcore_Rxns2Genes.csv', sep = '\t')
dfOutput1.head()

Unnamed: 0,Rxn,Genes
0,R_Ex_biomass,[]
1,R_Ex_glucose,"[['6513'], ['6514'], ['81031'], ['6517'], ['15..."
2,R_Ex_lactateL,"[['366'], ['6566'], ['9121'], ['151473'], ['16..."
3,R_Ex_O2,[]
4,R_Ex_glutamine,"[['7504'], ['54868'], ['140679'], ['145389'], ..."


In [22]:
dfOutput2 = pd.read_csv('./tutorial/outputs/HMRcore_Kegg2UniprotGenes.csv', sep = '\t')
dfOutput2.head()

Unnamed: 0,keggId,uniprotId
0,1,P04217
1,1,V9HWD8
2,10,P11245
3,10,A4Z6T7
4,100,P00813


**Step 11. GPRULER**  
To run the script from the Jupyter notebook:
```
!python pipeline/GPRULER.py hmr
```

In alternative, to run the script from the terminal:
```
python pipeline/GPRULER.py hmr
```
The produced outputs are:

In [23]:
dfOutput1 = pd.read_csv('./tutorial/outputs/HMRcore_GenesData.csv', sep = '\t')
dfOutput1.head()

Unnamed: 0,keggId,uniprotId,proteinNames,geneNames,txt_subunit,function,id_uniprot,complexPortal,complexPortal_uniprotId,complexPortal_protName,...,otherSubunits,subunitsFromName,otherIsoforms,isoformIndication,subunitIndication,sameEnzymeMembership,redundancy,stringSubunits,isoform,geneName_fromKEGG
0,10057,O15440,"['Multidrug resistance-associated protein 5', ...","['ABCC5', 'MRP5']",,Acts as a multispecific organic anion pump whi...,"['O15440', 'B9EIQ2', 'O14517', 'Q29ZA9', 'Q29Z...",,[],[],...,[],[],[],False,False,False,[],[],['ABCC5'],"['ABCC5', ' ABC33', ' EST277145', ' MOAT-C', '..."
1,10146,Q13283,['Ras GTPase-activating protein-binding protei...,"['G3BP1', 'G3BP']",(Microbial infection) Interacts with Sindbis v...,ATP- and magnesium-dependent helicase that pla...,"['Q13283', 'Q5HYE9']",,[],[],...,"['es', 'bind', 'viral', 'rnas', 'and', 'probab...",[],[],False,False,False,[],"['YBX1', 'EIF4G1', 'TIA1', 'CAPRIN1', 'NUFIP2'...",['G3BP1'],"['G3BP1', ' G3BP', ' HDH-VIII']"
2,10146,Q5U0Q1,[],"['DKFZp686L1159', 'G3BP', 'hCG_39146']",,,['Q5U0Q1'],,[],[],...,[],[],[],False,False,False,[],[],['G3BP1'],"['G3BP1', ' G3BP', ' HDH-VIII']"
3,10146,Q6ZP53,[],[],,,['Q6ZP53'],,[],[],...,[],[],[],False,False,False,[],[],['G3BP1'],"['G3BP1', ' G3BP', ' HDH-VIII']"
4,10165,Q9UJS0,['Calcium-binding mitochondrial carrier protei...,"['SLC25A13', 'ARALAR2']",Homodimer (via N-terminus).,Mitochondrial and calcium-binding carrier that...,"['Q9UJS0', 'O14566', 'O14575', 'Q546F9', 'Q9NZ...",,[],[],...,[],[],[],False,False,False,[],[],"['SLC25A12', 'SLC25A13']","['SLC25A13', ' ARALAR2', ' CITRIN', ' CTLN2', ..."


In [24]:
dfOutput2 = pd.read_csv('./tutorial/outputs/HMRcore_GenesRelationships.csv', sep = '\t')
dfOutput2.head()

Unnamed: 0,gene,uniprotId,AND,OR
0,"['ABCC5', 'MRP5']","['O15440', 'B9EIQ2', 'O14517', 'Q29ZA9', 'Q29Z...",[],[]
1,"['G3BP1', 'G3BP']","['Q13283', 'Q5HYE9']","['3', 's']",[]
2,"['DKFZp686L1159', 'G3BP', 'hCG_39146']",['Q5U0Q1'],[],[]
3,[],['Q6ZP53'],[],[]
4,"['SLC25A13', 'ARALAR2']","['Q9UJS0', 'O14566', 'O14575', 'Q546F9', 'Q9NZ...",[],['SLC25A12']


In [25]:
dfOutput3 = pd.read_csv('./tutorial/outputs/HMRcore_gprRules.csv', sep = '\t')
dfOutput3.head()

Unnamed: 0,Rxn,Genes,GPR rule
0,R_Ex_biomass,[],
1,R_Ex_glucose,"[['6513'], ['6514'], ['81031'], ['6517'], ['15...",6514 or 50651 or 81031 or 154091 or 6515 or 29...
2,R_Ex_lactateL,"[['366'], ['6566'], ['9121'], ['151473'], ['16...",159963 or 9120 or 220963 or 160728 or 9390 or ...
3,R_Ex_O2,[],
4,R_Ex_glutamine,"[['7504'], ['54868'], ['140679'], ['145389'], ...",(8140 and 6510) or 9057 or 120103 or 124565 or...
