The goal of this script is to provide a pipeline to quickly evaluate the accuracy of following from our GENRE over time:

1. Biomass flux predictions (MM + glucose and LB)
2. Carbon source utilization predictions
3. Gene essentiality predictions
4. Memote scores

This is meant as a tool to allow us to track our model accuracy as we make changes to the reconstruction

It will output a graph of accuracy (or score) vs time for each of the above metrics

In [26]:
# Initialize cobrapy
from __future__ import print_function

import cobra
import cobra.test
from cobra.flux_analysis import (single_gene_deletion)
import os
from os.path import join
import pandas
from time import time

# Load LJD functions which includes changeMedia_PA_LJD(model, media)
from LJD_Functions import *

# Had to install libsbml to load an xml file
# pip install python-libsbml in Python/Scripts folder
# website: http://sbml.org/Software/libSBML/Downloading_libSBML#Using_pip_from_PyPI
import libsbml

# Other packages
from copy import *

In [10]:
# Load most recent version of the model
# Note: Models can be imported as SBML, JSON, .mat, and I believe one other format
model_PA14 = cobra.io.read_sbml_model("iPAU1129.xml") # change this
model_PA14

0,1
Name,
Memory address,0x05fa89f0
Number of metabolites,1286
Number of reactions,1495
Objective expression,-1.0*PA14_Biomass_reverse_70e78 + 1.0*PA14_Biomass
Compartments,"Cytoplasm, Extracellular"


In [11]:
# Load data collected from previous versions of the model (results are added to this file every time the script is run)


In [15]:
# Test biomass flux predictions 

#biomass_glucose_true =
#biomass_lb_true =

# Minimal media + glucose
model_glucose = changeMedia_PA_LJD(model_PA14, 3, ['EX_cpd00027(e)'])
solution_glucose = model_glucose.optimize()
biomass_glucose_predicted = solution_glucose.objective_value

# LB media
model_LB = changeMedia_PA_LJD(model_PA14, 1)
solution_LB = model_LB.optimize()
biomass_lb_predicted = solution_LB.objective_value

print('biomass predicted glucose',biomass_glucose_predicted)
print('biomass predicted lb',biomass_lb_predicted)


biomass predicted glucose 1.3376377250514766
biomass predicted lb 15.72976211565755


In [4]:
# Test carbon source utilization predictions

In [42]:
# Test gene essentiality predictions
deletion_results = single_gene_deletion(model_LB)

essential_genes_predicted = []
nonessential_genes_predicted = []
essential_genes_fluxes = []
nonessential_genes_fluxes = []

#essential_genes_true =
#nonessential_genes_true = 
print(list(deletion_results.index))
print(deletion_results)
print(model_PA14.genes)

for i in range(0,len(deletion_results)):
    if deletion_results.iloc[i][0] < 0.001:
        essential_genes_fluxes.append(deletion_results.iloc[i][0])
        #essential_genes_predicted.append(model_PA14.genes[i]) # model genes in different order than deletion results
    else:
        nonessential_genes_fluxes.append(deletion_results.iloc[i][0])
        #nonessential_genes_predicted.append(model_PA14.genes[i]) # model genes in different order than deletion results

        #sets=[frozenset({'a', 'c,'}), frozenset({'h,', 'a,'})]

#print([list(x) for x in sets])
               
               
        
print(len(essential_genes_predicted), len(nonessential_genes_predicted), len(essential_genes_predicted)+len(nonessential_genes_predicted),len(deletion_results))

[frozenset({'PA14_10530'}), frozenset({'PA14_20670'}), frozenset({'PA14_23860'}), frozenset({'PA14_62480'}), frozenset({'PA14_70920'}), frozenset({'PA14_07890'}), frozenset({'PA14_51330'}), frozenset({'PA14_38440'}), frozenset({'PA14_00280'}), frozenset({'PA14_67920'}), frozenset({'PA14_05460'}), frozenset({'PA14_34420'}), frozenset({'PA14_33000'}), frozenset({'PA14_38510'}), frozenset({'PA14_06660'}), frozenset({'PA14_36730'}), frozenset({'PA14_07600'}), frozenset({'PA14_04630'}), frozenset({'PA14_01580'}), frozenset({'PA14_13800'}), frozenset({'PA14_50560'}), frozenset({'PA14_47760'}), frozenset({'PA14_23560'}), frozenset({'PA14_18300'}), frozenset({'PA14_20890'}), frozenset({'PA14_53470'}), frozenset({'PA14_57540'}), frozenset({'PA14_51040'}), frozenset({'PA14_60700'}), frozenset({'PA14_03920'}), frozenset({'PA14_09400'}), frozenset({'PA14_23760'}), frozenset({'PA14_70440'}), frozenset({'PA14_70280'}), frozenset({'PA14_52630'}), frozenset({'PA14_34250'}), frozenset({'PA14_14110'}), 

111 1021 1132 1132


In [6]:
# Calculate memote score

In [7]:
# Add all metrics and timestamp to the existing data

In [8]:
# Plot metrics vs time 