In [17]:
from __future__ import print_function
import cobra
import os
from os.path import join

In [18]:
# import deep learning and other modules
import numpy as np
import pandas as pd
from tensorflow.contrib import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.linear_model import LassoLarsCV
from sklearn.preprocessing import LabelEncoder

In [19]:
# fix random seed for reproducibility
seed = 7
np.random.seed(7)

In [20]:
# Flux dataset from predicting bacterial growth conditions study -- for current purposes, THIS IS MOSTLY CONSIDERED RANDOM SYNTHETIC DATA
dataset1 = pd.read_csv("../Data/syntheticFluxData.csv", delimiter=',', header=None)

In [21]:
dataset1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2375,2376,2377,2378,2379,2380,2381,2382,2383,2384
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,-0.0,0,0,0.004649,0,0.004649,1,1,1
1,0.0,4.6928e-34,1.0258000000000001e-27,6.1513e-29,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,-0.0,0,0,0.004571,0,0.004571,1,2,2
2,1.3489e-29,0.0,0.0,7.6393000000000005e-28,1.0292000000000001e-27,0.0,0.0,0.0,0.0,0.0,...,0.0,-0.0,0,0,0.006978,0,0.006978,1,3,3
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,-0.0,0,0,0.004584,0,0.004584,1,4,4
4,0.0,0.0,0.0,2.6959e-29,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,-0.0,0,0,0.00476,0,0.00476,1,5,5


In [22]:
# First 2381 columns are the in-silico fluxes generated from the flux balance analyses (FBA) for the input growth conditions
# Information on the growth conditions is in columns 2382 and 2383. Column 2384 is just the pair-wise combination of 2382 and 2383.
# Original work has lot of details, and this is just a tutorial on how the processed FBA data looks and using deep learning models to analyze.
# Focus is on deep learning models, rather than the source or content of the data.
print("Background information of the data and what we are trying to accomplish here - after importing modules and loading data :-)")


Background information of the data and what we are trying to accomplish here - after importing modules and loading data :-)


In [23]:
# Assuming iAF1260 model used in this repo has exchange/transport reactions that match to the synthetic data.
sbml_path = join("../Data","iAF1260.xml.gz")
iAF1260_ecoli_model = cobra.io.read_sbml_model(sbml_path)
iAF1260_reaction_IDs = [x.id for x in iAF1260_ecoli_model.reactions]
# There might be a better way to do this, but this explicitly conveys information
list_tpp_tex = []
for x in iAF1260_reaction_IDs:
    if(x.endswith('tpp')):
        list_tpp_tex.append(x)
    if(x.endswith('tex')):
        list_tpp_tex.append(x)
# columns to remove from above dataset
IDs_of_cols_to_remove = [iAF1260_reaction_IDs.index(i) for i in list_tpp_tex]
# append output columns as well that are to be removed from Input
IDs_of_cols_to_remove.extend(range(2382,2385)) # Note range covers 2382 to 2384


In [24]:
# retain columns that are NOT IDs_of_cols_to_remove
X = dataset1.iloc[:,dataset1.columns.difference(IDs_of_cols_to_remove)]
# Use carbon source as y (i.e., col 2382) --- Nitrogen source is 2383, while pair-wise C/N is 2384
fba_y = dataset1.iloc[:,2382]

In [25]:
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(fba_y)
encoded_Y = encoder.transform(fba_y)
dummy_y = np_utils.to_categorical(encoded_Y)
y = encoded_Y # dummy_y is needed if the original y is something else?

In [26]:
# define input for cross validation
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)

In [27]:
results = cross_val_score(LassoLarsCV(), X, y, cv=kfold)







































[0.86912298 0.66247456 0.85492963 0.78285797 0.86536933 0.83504235
 0.85019026 0.83325272 0.8361639  0.74565818]




In [28]:
print(results)

[0.86912298 0.66247456 0.85492963 0.78285797 0.86536933 0.83504235
 0.85019026 0.83325272 0.8361639  0.74565818]


In [29]:
# Define a neural network model 12 layers, 2071 inputs (i.e., columns in dataset1_sub after removing transport/exchange rxns)
model_Keras = Sequential()
model_Keras.add(Dense(12, input_dim=2071, activation='relu'))
model_Keras.add(Dense(1, activation='softmax'))
model_Keras.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [35]:
estimator_Keras = KerasClassifier(model_Keras.fit, epochs=100, batch_size=49)

In [36]:
print(estimator_Keras)

<keras.wrappers.scikit_learn.KerasClassifier object at 0x000001CC45146518>


In [39]:
results_Keras = cross_val_score(model_Keras.fit(), X, y, cv=kfold, error_score='raise')
print(results_Keras)

ValueError: If fitting from data tensors, you should specify the `steps_per_epoch` argument.