# Install conda on your Colab environment

Ignore this first cell if you are running the notebook in a local environment.

One can still run it locally but it will have no effect.

In [1]:
# Run this cell first - it will install a conda distribution (mamba)
# on your Drive then restart the kernel automatically 
# (don't worry about the crashing/restarting kernel messages)
# It HAS to be runned FIRST everytime you use the notebook in colab

import os
import sys
RunningInCOLAB  = 'google.colab' in str(get_ipython())

if RunningInCOLAB:
    !pip install -q condacolab
    import condacolab
    condacolab.install()

# Set up your Colab or local environment
# Then import libraries

Run this cell in both cases of use (local or Colab)

In [2]:
import os
import sys
RunningInCOLAB  = 'google.colab' in str(get_ipython())

if RunningInCOLAB:
    
    # Check everything is fine with conda in Colab
    import condacolab
    condacolab.check()
    
    # Mount your drive environment in the colab runtime
    from google.colab import drive
    drive.mount('/content/drive',force_remount=True)
    
    # Change this variable to your path on Google Drive to which the repo has been cloned
    # If you followed the colab notebook 'repo_cloning.ipynb', nothing to change here
    repo_path_in_drive = '/content/drive/My Drive/Github/amn_release/'
    # Change directory to your repo cloned in your drive
    DIRECTORY = repo_path_in_drive
    os.chdir(repo_path_in_drive)
    # Copy the environment given in the environment_amn_light.yml
    !mamba env update -n base -f environment_amn_light.yml
    
    # This is one of the few Colab-compatible font
    font = 'Liberation Sans'
    
else:
    
    # In this case the local root of the repo is our working directory
    DIRECTORY = './'
    font = 'arial'

# printing the working directory files. One can check you see the same folders and files as in the git webpage.
print(os.listdir(DIRECTORY))

from Library.Build_Dataset import *

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

['Yeast9_medium.ipynb', '.git', 'Build_Model_AMN_Ray.ipynb', 'Create_Medium.ipynb', 'files', 'environment_amn_RS2.yml', 'Build_Dataset_parallel.ipynb', 'boxplot_train_vs_val_loss.png', 'Build_Model_MM.ipynb', 'Build_Model_AMN_KO.ipynb', 'LICENSE', 'Dataset_model', 'Build_Dataset.ipynb', 'environment_amn_RS4.yml', 'Build_Model_AMN.ipynb', 'violin_train_vs_val_loss.png', 'Dataset_experimental', 'Tutorial.ipynb', 'growth_rate_plot.html', 'Build_Model_AMN_KO_Ray.ipynb', 'Build_Model_RC (Copy).ipynb', 'Build_Dataset_Ray2.ipynb', 'environment_amn_RS.yml', 'environment_amn_light.yml', 'Build_Dataset_KO_Ray.ipynb', 'Result', 'README.md', 'val_loss_folds_per_seed.png', 'violin_val_loss_all_folds.png', 'Build_Experimental.ipynb', 'Build_Model_RC.ipynb', 'Dataset_input', 'environment_amn.yml', 'Build_Dataset_KO.ipynb', 'Build_Dataset_3.ipynb', 'Figures', 'Library', 'Build_Model_ANN_Dense.ipynb', 'amn_dep_manual.bat', 'Reservoir', 'Build_Dataset_2.ipynb', 'Duplicate_Model.ipynb', 'environment_amn_

# Generate Training Sets with FBA simulation or experimental data file



Below are provided several examples to generate training sets using different metabolic models, or experimental datafiles.

We also provide a way to run cobrapy with provided inputs, such as the reservoir computing predictions (see figure 5 of the research paper).

In [7]:
# Generate training set with E coli iML1515 with FBA simulation 
# constrained by experimental file: metabolites in medium are not drawn at
# random but are the same than in the provided training experimental file
# This cell may take several hours to execute! Avoid running this in Colab
    
# What you can change
seed = 10
np.random.seed(seed=seed)  # seed for random number generator
#cobraname =  'iML1515_duplicated' # name of the model 
cobraname =  'iML1515_ec_duplicated'
mediumname = 'iML1515_ec6' # name of the medium file 
mediumbound = 'UB' # Exact bound (EB) or upper bound (UB)
expname = 'iML1515_EXP' # name of the experimental dataset for constraints
method = 'pFBA' # FBA, pFBA or EXP
size, size_i  = 110, 100 # expname training set size, training set size per item in expname
reduce = True # Set at True if you want to reduce the model
verbose = True
# End of What you can change

# Get X from experimental data set
cobrafile = DIRECTORY+'Dataset_input/'+cobraname
expfile  = DIRECTORY+'Dataset_input/'+expname
parameter = TrainingSet(cobraname=cobrafile, 
                        mediumname=expfile, 
                        mediumbound=mediumbound, 
                        mediumsize=38, 
                        method='EXP',verbose=True)
X = parameter.X.copy()

# Get other parameters from medium file
mediumfile = DIRECTORY+'Dataset_input/'+mediumname
parameter = TrainingSet(cobraname=cobrafile, 
                        mediumname=mediumfile, 
                        mediumbound=mediumbound, 
                        method=method, verbose=True)

# Create varmed the list of variable medium based on experimental file
varmed = {}
for i in range(X.shape[0]):
    varmed[i] = []
    for j in range(X.shape[1]):
        if parameter.levmed[j] > 1 and X[i,j] > 0:
            varmed[i].append(parameter.medium[j])
varmed = list(varmed.values())

# Get a Cobra training set constrained by varmed
for i in range(X.shape[0]): 
    parameter.get(sample_size=size_i, varmed=varmed[i], verbose=True) 

# Saving file
trainingfile  = DIRECTORY+'Dataset_model/'+mediumname+'_'+parameter.mediumbound
parameter.save(trainingfile, reduce=reduce)

# Verifying
parameter = TrainingSet()
parameter.load(trainingfile)
print(trainingfile)
parameter.printout()

medium: ['EX_pi_e_i', 'EX_co2_e_i', 'EX_fe3_e_i', 'EX_h_e_i', 'EX_mn2_e_i', 'EX_fe2_e_i', 'EX_zn2_e_i', 'EX_mg2_e_i', 'EX_ca2_e_i', 'EX_ni2_e_i', 'EX_cu2_e_i', 'EX_sel_e_i', 'EX_cobalt2_e_i', 'EX_h2o_e_i', 'EX_mobd_e_i', 'EX_so4_e_i', 'EX_nh4_e_i', 'EX_k_e_i', 'EX_na1_e_i', 'EX_cl_e_i', 'EX_o2_e_i', 'EX_tungs_e_i', 'EX_slnt_e_i', 'EX_glyc_e_i', 'EX_ala__L_e_i', 'EX_pro__L_e_i', 'EX_thr__L_e_i', 'EX_gly_e_i', 'EX_rib__D_e_i', 'EX_malt_e_i', 'EX_melib_e_i', 'EX_tre_e_i', 'EX_fru_e_i', 'EX_gal_e_i', 'EX_ac_e_i', 'EX_lac__D_e_i', 'EX_succ_e_i', 'EX_pyr_e_i']
levmed: []
valmed: []
ratmed: 0
objective:  ['BIOMASS_Ec_iML1515_core_75p37M']
measurements size:  7740
medium: ['EX_pi_e_i', 'EX_co2_e_i', 'EX_fe3_e_i', 'EX_h_e_i', 'EX_mn2_e_i', 'EX_fe2_e_i', 'EX_zn2_e_i', 'EX_mg2_e_i', 'EX_ca2_e_i', 'EX_ni2_e_i', 'EX_cu2_e_i', 'EX_sel_e_i', 'EX_cobalt2_e_i', 'EX_h2o_e_i', 'EX_mobd_e_i', 'EX_so4_e_i', 'EX_nh4_e_i', 'EX_k_e_i', 'EX_na1_e_i', 'EX_cl_e_i', 'EX_o2_e_i', 'EX_tungs_e_i', 'EX_slnt_e_i', 'EX

## Examples of experimental or manual training set generation

This cell provides a way to generate a training set with the same object (parameter) as simulated training sets using Cobra, but only using the experimental data. In this cell, we do not run cobra to generate the training set but instead directly use the experimental data.

### iML1515

In [None]:
# Generate training set for E coli iML1515 experimental file 

# What you can change
seed = 10
np.random.seed(seed=seed)  # seed for random number generator
cobraname = 'iML1515_ec_UB'  # name of the model here a reduced iML1515 model
mediumbound = 'UB' # a must exact bounds unknown
mediumname = 'iML1515_ec_EXP' # name of experimental file 
method    = 'EXP' # FBA, pFBA or EXP
reduce = False # Set at True if you want to reduce the model
# End of What you can change

# Get data
cobrafile = DIRECTORY+'Dataset_input/'+cobraname
mediumfile  = DIRECTORY+'Dataset_input/'+mediumname
parameter = TrainingSet(cobraname=cobrafile, 
                        mediumname=mediumfile, mediumbound=mediumbound, mediumsize=38, 
                        method=method,verbose=True)

# Saving file
trainingfile  = DIRECTORY+'Dataset_model/'+mediumname+'_'+parameter.mediumbound
parameter.save(trainingfile, reduce=reduce)

# Verifying
parameter = TrainingSet()
parameter.load(trainingfile)
parameter.printout()

## Running Cobra on a provided dataset set

This cell has a completely different purpose than the rest of the notebook. It serves as a cell running Cobrapy with  provided values as inputs. These inputs are extracted from Reservoir Computing, you can see an example in the notebook `Build_Model_RC.ipynb`

In [None]:
# This cell run FBA on a provided training and compute R2 between
# provided objective and calculated objective
# R2 = 1 when the training set was generated by FBA, but may be different than 1
# when the training set is an experimental one
# For exprimental training set medium input fluxes can be scaled by a value

import time

start = time.time()


from sklearn.metrics import r2_score

# What you can change 
seed = 10
np.random.seed(seed=seed)  
#cobraname = 'iML1515_EXP'  # name of the model 
cobraname = 'iML1515_ec_duplicated'  # name of the model
#cobraname = 'iEC1368_DH5a_duplicated'
mediumbound = 'UB' # a must, exact bounds unknown
mediumname = 'iML1515_EXP6' # name of experimental file, for out-of-the-box FBA
# mediumname = 'iML1515_UB_AMN_QP_RC_AMN_solution_for_Cobra_train' # for running Cobra with RC training points as inputs
# mediumname = 'iML1515_UB_AMN_QP_RC_AMN_solution_for_Cobra_pred' # for running Cobra with RC predictions as inputs
method = 'EXP' # FBA, pFBA or EXP
# End of What you can change

# Get data
cobrafile =  DIRECTORY+'Dataset_input/'+cobraname
mediumfile = DIRECTORY+'Dataset_input/'+mediumname
parameter = TrainingSet(cobraname=cobrafile, 
                        mediumname=mediumfile, mediumbound=mediumbound, mediumsize=38, 
                        method=method,verbose=True)
# scaler_list = [2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0] # test different scalers 
#scaler_list = [round(x, 2) for x in [i * 0.01 for i in range(55, 65)]] # from 2.0 to 10.0 
#scaler_list = [round(x, 1) for x in [i * 0.1 for i in range(2, 10)]]
#scaler_list = [2.5] # best scaler for out-of-the box FBA
#scaler_list = [1] # for running Cobra with RC training inputs, see mediumname
scaler_list = [0.53]
#scaler_list = [0.62]
#scaler_list = [round(x, 1) for x in [i * 1 for i in range(1, 3)]] # from 2.0 to 10.0

# regression cobra vs. true values
L = parameter.X.shape[0]
for scaler in scaler_list:
    print(scaler)
    Y = {}
    Y_or = {}
    for i in range(L):
        inf = {r.id: 0 for r in parameter.model.reactions}
        for j in range(len(parameter.medium)):
            #   print(j, parameter.medium[j],parameter.X[i,j], len(parameter.model.reactions))
            eps = 1.0e-4 if parameter.X[i,j] < 1.0e-4 else 0
            inf[parameter.medium[j]] = scaler * parameter.X[i,j] + eps
            print(f'Reaction: {parameter.medium[j]} | Original Bound (?): {parameter.X[i,j]} | Scaler: {scaler} | OB * Scaler: {inf[parameter.medium[j]]}')
        out,Y[i] = run_cobra(parameter.model, parameter.objective, inf, method='pFBA', verbose=False) #original: method='pFBA'
        Y_or[i] = parameter.Y[i]
        print("%d %.6f %.6f" % (i, parameter.Y[i], Y[i]))

    Y = list(Y.values())
    r2 = r2_score(parameter.Y[0:L], Y[0:L], multioutput='variance_weighted')
    print('scaler %.2f R2 %.4f ' % (scaler, r2))
#np.array(Y).tofile("Result/Cobra_alone_EXP2_duplicated.csv") # to uncomment if cobra alone saved in file

end = time.time()
print(f"Elapsed time: {end - start:.4f} seconds")

medium: ['EX_pi_e_i', 'EX_co2_e_i', 'EX_fe3_e_i', 'EX_h_e_i', 'EX_mn2_e_i', 'EX_fe2_e_i', 'EX_zn2_e_i', 'EX_mg2_e_i', 'EX_ca2_e_i', 'EX_ni2_e_i', 'EX_cu2_e_i', 'EX_sel_e_i', 'EX_cobalt2_e_i', 'EX_h2o_e_i', 'EX_mobd_e_i', 'EX_so4_e_i', 'EX_nh4_e_i', 'EX_k_e_i', 'EX_na1_e_i', 'EX_cl_e_i', 'EX_o2_e_i', 'EX_tungs_e_i', 'EX_slnt_e_i', 'EX_glyc_e_i', 'EX_ala__L_e_i', 'EX_pro__L_e_i', 'EX_thr__L_e_i', 'EX_gly_e_i', 'EX_rib__D_e_i', 'EX_malt_e_i', 'EX_melib_e_i', 'EX_tre_e_i', 'EX_fru_e_i', 'EX_gal_e_i', 'EX_ac_e_i', 'EX_lac__D_e_i', 'EX_succ_e_i', 'EX_pyr_e_i']
levmed: []
valmed: []
ratmed: 0
objective:  ['BIOMASS_Ec_iML1515_core_75p37M']
measurements size:  7740
0.53
Reaction: EX_pi_e_i | Original Bound (?): 10.0 | Scaler: 0.53 | OB * Scaler: 5.300000000000001
Reaction: EX_co2_e_i | Original Bound (?): 0.0 | Scaler: 0.53 | OB * Scaler: 0.0001
Reaction: EX_fe3_e_i | Original Bound (?): 10.0 | Scaler: 0.53 | OB * Scaler: 5.300000000000001
Reaction: EX_h_e_i | Original Bound (?): 10.0 | Scaler: