# Install conda on your Colab environment

Ignore this first cell if you are running the notebook in a local environment.

One can still run it locally but it will have no effect.

In [5]:
# Run this cell first - it will install a conda distribution (mamba)
# on your Drive then restart the kernel automatically 
# (don't worry about the crashing/restarting kernel messages)
# It HAS to be runned FIRST everytime you use the notebook in colab
import tensorflow as tf
import pandas as pd
import os
import sys
RunningInCOLAB  = 'google.colab' in str(get_ipython())

if RunningInCOLAB:
    !pip install -q condacolab
    import condacolab
    condacolab.install()

# Set up your Colab or local environment
# Then import libraries

Run this cell in both cases of use (local or Colab)

In [2]:
import os
import sys
RunningInCOLAB  = 'google.colab' in str(get_ipython())

if RunningInCOLAB:
    
    # Check everything is fine with conda in Colab
    import condacolab
    condacolab.check()
    
    # Mount your drive environment in the colab runtime
    from google.colab import drive
    drive.mount('/content/drive',force_remount=True)
    
    # Change this variable to your path on Google Drive to which the repo has been cloned
    # If you followed the colab notebook 'repo_cloning.ipynb', nothing to change here
    repo_path_in_drive = '/content/drive/My Drive/Github/amn_release/'
    # Change directory to your repo cloned in your drive
    DIRECTORY = repo_path_in_drive
    os.chdir(repo_path_in_drive)
    # Copy the environment given in the environment_amn_light.yml
    !mamba env update -n base -f environment_amn_light.yml
    
    # This is one of the few Colab-compatible font
    font = 'Liberation Sans'
    
else:
    
    # In this case the local root of the repo is our working directory
    DIRECTORY = './'
    font = 'arial'

# printing the working directory files. One can check you see the same folders and files as in the git webpage.
print(os.listdir(DIRECTORY))

from Library.Build_Dataset import *

['.env', '.git', '.gitignore', '.ipynb_checkpoints', 'Build_Dataset.ipynb', 'Build_Dataset_KO.ipynb', 'Build_Dataset_Lab_Data.ipynb', 'Build_Experimental.ipynb', 'Build_Model_AMN.ipynb', 'Build_Model_AMN_KO.ipynb', 'Build_Model_ANN_Dense.ipynb', 'Build_Model_MM.ipynb', 'Build_Model_RC.ipynb', 'Dataset_experimental', 'Dataset_input', 'Dataset_model', 'Duplicate_Model.ipynb', 'environment_amn.yml', 'environment_amn_light.yml', 'Figures', 'Figures.ipynb', 'First_tests.ipynb', 'Library', 'LICENSE', 'README.md', 'Reservoir', 'Result', 'Tutorial.ipynb']


In [13]:
# Generate training set with E coli iML1515 with FBA simulation 
# constrained by experimental file: metabolites in medium are not drawn at
# random but are the same than in the provided training experimental file
# This cell may take several hours to execute! Avoid running this in Colab
    
# What you can change
seed = 10
np.random.seed(seed=seed)  # seed for random number generator
cobraname =  'iML1515_duplicated' # name of the model 
mediumname = 'df_amn_dataset_levels'#'iML1515' # name of the medium file 
mediumbound = 'UB' # Exact bound (EB) or upper bound (UB)
expname = 'df_amn_dataset'#'iML1515_EXP' # name of the experimental dataset for constraints
method = 'pFBA' # FBA, pFBA or EXP
size, size_i  = 110, 1 # expname training set size, training set size per item in expname
reduce = True # Set at True if you want to reduce the model
verbose = True
# End of What you can change

# Get X from experimental data set
cobrafile = DIRECTORY+'Dataset_input/'+cobraname
exp_data_path = "H:/ROBOT_SCIENTIST/E_coli/Growth_rates/2025-10-31-27/processed/post_replicates/mediabotJLF1/AMN_dataset/"
expfile  = exp_data_path + expname # DIRECTORY+'Dataset_input/'+expname

df_exp = pd.read_csv(expfile+".csv")
mediumsize = len(df_exp.columns) - 1 # except the growth rate column
parameter = TrainingSet(cobraname=cobrafile, 
                        mediumname=expfile, 
                        mediumbound=mediumbound, 
                        mediumsize=mediumsize, 
                        method='EXP',verbose=False)
X = parameter.X.copy()

# Get other parameters from medium file
mediumfile = exp_data_path + mediumname# DIRECTORY+'Dataset_input/'+mediumname
parameter = TrainingSet(cobraname=cobrafile, 
                        mediumname=mediumfile, 
                        mediumbound=mediumbound, 
                        method=method, verbose=False)

# Create varmed the list of variable medium based on experimental file
varmed = {}
for i in range(X.shape[0]):
    varmed[i] = []
    for j in range(X.shape[1]):
        if parameter.levmed[j] > 1 and X[i,j] > 0:
            varmed[i].append(parameter.medium[j])
varmed = list(varmed.values())

# Get a Cobra training set constrained by varmed
for i in range(X.shape[0]): 
    parameter.get(sample_size=size_i, varmed=varmed[i], verbose=True) 

# Saving file
trainingfile  = DIRECTORY+'Dataset_model/'+mediumname+'_'+parameter.mediumbound
parameter.save(trainingfile, reduce=reduce)

# Verifying
parameter = TrainingSet()
parameter.load(trainingfile)
print(trainingfile)
parameter.printout()

sample: 0
Cobra cannot be run start again
Cobra cannot be run start again
Cobra cannot be run start again
Cobra cannot be run start again
Cobra cannot be run start again


Infeasible: None (infeasible).

This cell has a completely different purpose than the rest of the notebook. It serves as a cell running Cobrapy with  provided values as inputs. These inputs are extracted from Reservoir Computing, you can see an example in the notebook `Build_Model_RC.ipynb`

In [3]:
# This cell run FBA on a provided training and compute R2 between
# provided objective and calculated objective
# R2 = 1 when the training set was generated by FBA, but may be different than 1
# when the training set is an experimental one
# For exprimental training set medium input fluxes can be scaled by a value

from sklearn.metrics import r2_score

# What you can change 
seed = 10
np.random.seed(seed=seed)  
cobraname = 'iML1515_EXP'  # name of the model 
mediumbound = 'UB' # a must, exact bounds unknown
mediumname = 'iML1515_EXP' # name of experimental file, for out-of-the-box FBA
# mediumname = 'iML1515_UB_AMN_QP_RC_AMN_solution_for_Cobra_train' # for running Cobra with RC training points as inputs
# mediumname = 'iML1515_UB_AMN_QP_RC_AMN_solution_for_Cobra_pred' # for running Cobra with RC predictions as inputs
method = 'EXP' # FBA, pFBA or EXP
# End of What you can change

# Get data
cobrafile =  DIRECTORY+'Dataset_input/'+cobraname
mediumfile = DIRECTORY+'Dataset_input/'+mediumname
parameter = TrainingSet(cobraname=cobrafile, 
                        mediumname=mediumfile, mediumbound=mediumbound, mediumsize=38, 
                        method=method,verbose=False)
#Â scaler_list = [2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0] # test different scalers
scaler_list = [2.5] # best scaler for out-of-the box FBA
# scaler_list = [1] # for running Cobra with RC training inputs, see mediumname

# regression cobra vs. true values
L = parameter.X.shape[0]
for scaler in scaler_list:
    Y = {}
    for i in range(L):
        inf = {r.id: 0 for r in parameter.model.reactions}
        for j in range(len(parameter.medium)):
            #print(j, parameter.medium[j],parameter.X[i,j], len(parameter.model.reactions))
            eps = 1.0e-4 if parameter.X[i,j] < 1.0e-4 else 0
            inf[parameter.medium[j]] = scaler * parameter.X[i,j] + eps
        out,Y[i] = run_cobra(parameter.model, parameter.objective, inf, method='pFBA', verbose=False)
        print("%d %.4f %.4f" % (i, parameter.Y[i], Y[i]))

    Y = list(Y.values())
    r2 = r2_score(parameter.Y[0:L], Y[0:L], multioutput='variance_weighted')
    print('scaler %.2f R2 %.4f ' % (scaler, r2))
# np.array(Y).tofile("Result/Cobra_alone.csv") # to uncomment if cobra alone saved in file

0 0.1696 0.1542
1 0.1340 0.1609
2 0.1886 0.2010
3 0.1990 0.1943
4 0.0720 0.1135
5 0.0924 0.1040
6 0.0881 0.1068
7 0.0900 0.1251
8 0.1989 0.2010
9 0.1054 0.1046
10 0.2681 0.2412
11 0.1576 0.1135
12 0.1209 0.1943
13 0.2729 0.2546
14 0.2945 0.2479
15 0.2386 0.2010
16 0.2531 0.1787
17 0.2606 0.2947
18 0.2816 0.1542
19 0.1351 0.1675
20 0.1449 0.1675
21 0.2409 0.2546
22 0.2437 0.2657
23 0.1059 0.1135
24 0.1082 0.2010
25 0.2451 0.3416
26 0.3099 0.2189
27 0.2000 0.2881
28 0.2077 0.1318
29 0.3837 0.3014
30 0.2247 0.2256
31 0.3520 0.2256
32 0.2255 0.1943
33 0.1340 0.2613
34 0.2397 0.3126
35 0.3654 0.3126
36 0.1863 0.2144
37 0.1612 0.2613
38 0.3442 0.2657
39 0.2964 0.3059
40 0.4135 0.2724
41 0.2561 0.1720
42 0.3949 0.3014
43 0.4205 0.2479
44 0.3050 0.3349
45 0.2315 0.1675
46 0.2708 0.2546
47 0.3351 0.3818
48 0.2785 0.3014
49 0.0765 0.2010
50 0.0704 0.1068
51 0.2095 0.2010
52 0.1135 0.1251
53 0.2193 0.2010
54 0.3316 0.2479
55 0.1368 0.1068
56 0.1362 0.1318
57 0.1074 0.1609
58 0.2277 0.1943
59 0.20

In [2]:
# This cell run FBA for P. putida model on a provided training and compute Accuracy between
# provided objective and calculated objective

from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

# What you can change 
seed = 10
np.random.seed(seed=seed)  
cobraname = 'IJN1463_10_UB'  # name of the model 
mediumbound = 'UB' # a must, exact bounds unknown
mediumname = 'IJN1463_EXP' # for running Cobra with Exp file
mediumname = 'IJN1463_10_UB_AMN_QP_for_Cobra_train' # for running Cobra with RC file
method = 'EXP' # FBA, pFBA or EXP
L = 166 # split nitrogen (nh4) carbon (glucose)
# End of What you can change

# Get data
cobrafile =  DIRECTORY+'Dataset_input/'+cobraname
mediumfile = DIRECTORY+'Dataset_input/'+mediumname
parameter = TrainingSet(cobraname=cobrafile, 
                        mediumname=mediumfile,
                        mediumbound=mediumbound, mediumsize=196, 
                        method=method,verbose=False)

# Input medium are scaled by 10 for EXP file
scalerX = 1 if 'AMN' in mediumname else 10
Y = {}
for i in range(parameter.X.shape[0]):
    inf = {r.id: 0 for r in parameter.model.reactions}
    for j in range(len(parameter.medium)):
        eps = 1.0e-4 if parameter.X[i,j] < 1.0e-4 else 0
        inf[parameter.medium[j]] = scalerX * parameter.X[i,j] + eps
    try:
         _, Y[i] = run_cobra(parameter.model, parameter.objective, inf, method='FBA', verbose=False)
    except:
        _, Y[i] = 0, 0
    #print("%d %.0f %.4f" % (i, parameter.Y[i], Y[i]))
        

# Accuracies corrected with reactions not in the model
# TN: 23 (28) for C (N) total=51  (reaction not in the model and no grow)
# FN: 3 (1) for C (N) total=4 (reaction not in the model and but grow)
y_true = np.transpose(parameter.Y)[0] 
y_pred = np.asarray([1 if Y[i] > 0.01 else 0 for i in range(len(Y.values()))])
accall = accuracy_score(y_true, y_pred)
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
accall = (tp+tn+51)/(tn+51+fp+fn+4+tp)
tn, fp, fn, tp = confusion_matrix(y_true[:L], y_pred[:L]).ravel()
accnh4 = (tp+tn+28)/(tn+28+fp+fn+1+tp)       
tn, fp, fn, tp = confusion_matrix(y_true[L:], y_pred[L:]).ravel()
accglu = (tp+tn+23)/(tn+23+fp+fn+3+tp)       
print('Acc %.4f %.4f %.4f' % (accall, accnh4, accglu))

Acc 0.9597 0.9538 0.9709
