In [10]:
import os
import sys

# In this case the local root of the repo is our working directory
DIRECTORY = './'
font = 'arial'

from Library.Build_Model import *

# We declare this function here and not in the
# function-storing python file to modify it easily
# as it can change the printouts of the methods
def printout(filename, Stats, model, time): 
    # printing Stats
    print('Stats for %s CPU-time %.4f' % (filename, time))
    print('R2 = %.4f (+/- %.4f) Constraint = %.4f (+/- %.4f)' % \
          (Stats.train_objective[0], Stats.train_objective[1],
           Stats.train_loss[0], Stats.train_loss[1]))
    print('Q2 = %.4f (+/- %.4f) Constraint = %.4f (+/- %.4f)' % \
          (Stats.test_objective[0], Stats.test_objective[1],
           Stats.test_loss[0], Stats.test_loss[1]))

## Create the dataset from the paper example

In [11]:
# Generate training set for E coli iML1515 experimental file 

# What you can change
seed = 10
np.random.seed(seed=seed)  # seed for random number generator
cobraname = 'iML1515_EXP'  # name of the model here a reduced iML1515 model
mediumbound = 'UB' # a must exact bounds unknown
mediumname = 'iML1515_EXP' # name of experimental file 
method    = 'EXP' # FBA, pFBA or EXP
reduce = False # Set at True if you want to reduce the model
# End of What you can change

# Get data
cobrafile = DIRECTORY+'Dataset_input/'+cobraname
mediumfile  = DIRECTORY+'Dataset_input/'+mediumname
parameter = TrainingSet(cobraname=cobrafile, 
                        mediumname=mediumfile, mediumbound=mediumbound, mediumsize=38, 
                        method=method,verbose=False)

# Saving file
trainingfile  = DIRECTORY+'Dataset_model/'+mediumname+'_'+parameter.mediumbound + "_my_test"
parameter.save(trainingfile, reduce=reduce)

# Verifying
parameter = TrainingSet()
parameter.load(trainingfile)
parameter.printout()

model file name: ./Dataset_model/iML1515_EXP_UB_my_test
reduced model: False
medium file name: ./Dataset_input/iML1515_EXP
medium bound: UB
list of reactions in objective: ['BIOMASS_Ec_iML1515_core_75p37M']
method: EXP
trainingsize: 110
list of medium reactions: 38
list of medium levels: 0
list of medium values: 0
ratio of variable medium turned on: 0
list of measured reactions: 543
Stoichiometric matrix (1080, 543)
Boundary matrix from reactions to medium: (38, 543)
Measurement matrix from reaction to measures: (543, 543)
Reaction to metabolite matrix: (1080, 543)
Metabolite to reaction matrix: (543, 1080)
Training set X: (110, 38)
Training set Y: (110, 1)
S_int matrix (478, 543)
S_ext matrix (543, 2703)
Q matrix (543, 478)
P matrix (543, 543)
b_int vector (478,)
b_ext vector (110, 2703)
Sb matrix (543, 1080)
c vector (543,)


#### Check the provided targets (Y or observed) are the same as the measured values in the saved dataset (originaly from experimental csv).

In [3]:
import pandas as pd
df_exp_data = pd.read_csv(cobrafile + ".csv")
print("True if the parameter.Y is equal to average growth rate in the experimental data: " 
      f"{np.all(df_exp_data['GR_AVG'].values == parameter.Y.flatten())}")

True if the parameter.Y is equal to average growth rate in the experimental data: True


#### Check the provided inputs (X or medium) are the same as the values in the saved dataset (originaly from experimental csv).

In [4]:
import pandas as pd
df_exp_data = pd.read_csv(cobrafile + ".csv")
print("True if the parameter.Y is equal to average growth rate in the experimental data: " 
    f"{np.all(parameter.X == df_exp_data.iloc[:, :-1].values)}")

True if the parameter.Y is equal to average growth rate in the experimental data: True


## Loading the experminatl dataset

In [5]:
 
trainname = 'iML1515_EXP_UB_my_test' 
timestep = 1
# End of What you can change

# Create model 100% for training 0% for testing
trainingfile = DIRECTORY+'Dataset_model/'+trainname
model = Neural_Model(trainingfile = trainingfile, 
            objective=['BIOMASS_Ec_iML1515_core_75p37M'], 
            model_type = 'AMN_LP',
            scaler = True,
            timestep = timestep, learn_rate=0.001,
            n_hidden = 1, hidden_dim = 500,
            #train_rate = 1.0e-2,
            epochs = 3, xfold = 1, 
            verbose=True) 

number of reactions:  543 1
number of metabolites:  1080
filtered measurements size:  1


#### Check the provided targets (Y or observed) is the same as the measured values in the AMN.

In [6]:
import pandas as pd
df_exp_data = pd.read_csv(cobrafile + ".csv")
print("True if the parameter.Y is equal to average growth rate in the experimental data: " 
      f"{np.all(df_exp_data['GR_AVG'].values == model.Y.flatten())}")

True if the parameter.Y is equal to average growth rate in the experimental data: True


In [9]:
param = copy.copy(model)
print("Input model.X shape:", model.X.shape)
print("Input model.Y shape:", model.Y.shape)
print("parameter.b_int shape:", parameter.b_int.shape)
print("parameter.b_ext shape:", parameter.b_ext.shape)
X, Y = input_AMN(param, verbose=True) # or  model_input(param, verbose=True)
print("Outputs inputs X shape:", X.shape, "composed of [X (from model.X), b_int.T (repeated vertically with the same size of model.X), b_ext] (LP specific, check for other models as well)")
print("Outputs targets Y shape:", Y.shape, "composed of [Y (from model.Y), SV constraints,  Pin constraints, V ≥ 0 constraints]")

Input model.X shape: (110, 38)
Input model.Y shape: (110, 1)
parameter.b_int shape: (478,)
parameter.b_ext shape: (110, 2703)
AMN scaler 1.0
b_int.shape:  (110, 478)
concatenated X and b_int shape: (110, 516)
b_ext.shape:  (110, 2703)
concatenated X and b_ext shape: (110, 3219)
LP input shape (110, 3219) (110, 4)
Outputs inputs X shape: (110, 3219) composed of [X (from model.X), b_int.T (repeated vertically with the same size of model.X), b_ext] (LP specific, check for other models as well)
Outputs targets Y shape: (110, 4) composed of [Y (from model.Y), SV constraints,  Pin constraints, V ≥ 0 constraints]


In [26]:
reservoir, pred, stats, _ = train_evaluate_model(model, verbose=1)

AMN scaler 1.0
LP input shape (110, 3219) (110, 4)
----------------------------------- AMN_LP
Dense layer n_hidden, hidden_dim, output_dim, activation, trainable: 1 500 543 relu True
Dense layer n_hidden, hidden_dim, output_dim, activation, trainable: 1 500 2703 linear True
AMN output shapes for PoutV, SV, PinV, Vpos, V, outputs (None, 1) (None, 1) (None, 1) (None, 1) (None, 543) (None, 1090)
nbr parameters: 1665246
---------- 1
Loss out on V0:  0.2330754
Loss constraint on V0:  3.0847117e-05
Loss all on V0:  0.015381853
Loss out on Vf:  10.220672
Loss constraint on Vf:  1.2536824
Loss all on Vf:  30.185303
Distance V0 to Vf 2567.872559: 
looping bad training iter=0 r2=-16450.2407
---------- 1
Loss out on V0:  0.2330754
Loss constraint on V0:  3.0847117e-05
Loss all on V0:  0.015381853
Loss out on Vf:  10.220672
Loss constraint on Vf:  1.2536824
Loss all on Vf:  30.185303
Distance V0 to Vf 2567.872559: 
train = -16450.24 test = -16450.24 loss-train = 1.253682 loss-test = 1.253682 iter=