# Modelling for Global Health - Data science in Python
## Day 4: Neural networks in Python

Carrying on from yesterday, we will look to use the feature-rich, gene expression data to practise classification using neural networks. Can we accurately classify the samples to the experimental conditions based on their gene expression?

In [1]:
# Packages for importing, cleaning and looking at the data
import pandas as pd
import numpy as np
from pathlib import Path
from natsort import index_natsorted, order_by_index, natsorted
import random
from collections import Counter
import keras
from keras.models import Sequential, model_from_json
from keras.layers import *
import seaborn as sns

Using TensorFlow backend.


In [2]:
# Load the dataset
expr_new_info = pd.read_csv('Common_info_data.txt', index_col=0)

expr_naive = pd.read_csv('Common_naive_data.txt', index_col=0)
expr_ifn = pd.read_csv('Common_ifn_data.txt', index_col=0)
expr_lps2 = pd.read_csv('Common_lps2_data.txt', index_col=0)
expr_lps24 = pd.read_csv('Common_lps24_data.txt', index_col=0)

In [3]:
expr_new_info

Unnamed: 0,Symbol,chrom,start,end
WASH5P,WASH5P,1,15884,15933
OR4F5,OR4F5,1,59339,59388
FAM72D,FAM72D,1,111130,111179
CCNL2,CCNL2,1,173230,173256
NBPF20,NBPF20,1,174532,174581
...,...,...,...,...
TTTY4C,TTTY4C,Y,27245989,27246038
TTTY17C,TTTY17C,Y,27330869,27330918
SPRY3,SPRY3,Y,57524134,57524183
IL9R,IL9R,Y,57743481,57743530


In [4]:
#  Discover the dataset
for expr in [expr_naive, expr_ifn, expr_lps2, expr_lps24]:
    print(expr.shape)
    print(expr.head(5))

(17867, 414)
               1          2         3          4         5         6  \
WASH5P  7.841897   7.446386  7.764830   7.471999  7.572549  8.390370   
OR4F5   6.983622   6.966486  7.042628   7.033279  6.855801  6.979641   
FAM72D  7.252218   7.136435  7.129419   7.168441  7.089656  7.099595   
CCNL2   7.186372   7.307169  7.255473   7.285911  7.275541  7.303148   
NBPF20  9.962291  10.116581  9.381617  10.222844  9.908225  9.848552   

               7          8         9         10  ...        423       424  \
WASH5P  8.113058   7.824605  8.683622   7.870748  ...   8.362121  8.013899   
OR4F5   7.099498   6.987073  7.001239   6.951224  ...   7.020344  7.102517   
FAM72D  7.023526   7.150122  7.177437   7.085126  ...   7.214543  7.171441   
CCNL2   7.332037   7.292115  7.310949   7.342801  ...   7.310326  7.274593   
NBPF20  9.812697  10.065363  9.852624  10.022713  ...  10.036526  9.618183   

             425        426        427        428       429        430  \
WASH5P  7.7

In [6]:
# For normalizing the data
def initial_normalize(df):
    result = df.copy()
    feature_max = {}
    feature_min = {}
    for feature_name in df.columns:
        max_value = df[feature_name].max()
        feature_max[feature_name] = max_value + 0.1
        min_value = df[feature_name].min() - 0.1
        feature_min[feature_name] = min_value
        if max_value == 0:
            result[feature_name]= df[feature_name]
        else:
            result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
    return result, feature_max, feature_min

In [7]:
# Let's combine the dataset and normalize it
samps_in_common = list(set(expr_naive.columns.values) & set(expr_ifn.columns.values) & set(expr_lps2.columns.values) & set(expr_lps24.columns.values))

expr_all_treat = pd.concat([expr_naive[samps_in_common].T,expr_ifn[samps_in_common].T,expr_lps24[samps_in_common].T,expr_lps2[samps_in_common].T], keys=['Naive', 'IFN', 'LPS24', 'LPS2'])
expr_all_treat_norm, feature_max, feature_min = initial_normalize(expr_all_treat)
classes = ['Naive', 'IFN', 'LPS24', 'LPS2']

In [8]:
# Set the training and testing sets
rows = random.sample(range(len(expr_all_treat_norm.index)), int(.75*len(expr_all_treat_norm.index)))
rows.sort()

training = expr_all_treat_norm.values[rows,]
training_labels = np.array(expr_all_treat_norm.index.get_level_values(0)[rows])
testing = np.delete(expr_all_treat_norm.values,rows,axis=0)
testing_labels = np.array(np.delete(expr_all_treat_norm.index.get_level_values(0), [rows], axis=0))

training_samples = Counter(training_labels)
print('For training set we have the following samples:')
for key in training_samples:
    print(key, training_samples[key])

testing_samples = Counter(testing_labels)
print('For testing set we have the following samples:')
for key in testing_samples:
    print(key, testing_samples[key])

For training set we have the following samples:
Naive 173
IFN 165
LPS24 167
LPS2 179
For testing set we have the following samples:
Naive 55
IFN 63
LPS24 61
LPS2 49


In [10]:
# Now preprocess the data
scaled_training = training
scaled_testing = testing

scaled_training_labels = np.zeros((len(training),len(classes)))
for i,tr in enumerate(training_labels):
    scaled_training_labels[i,classes.index(tr)] = 1
scaled_testing_labels = np.zeros((len(testing),len(classes)))
for i,tr in enumerate(testing_labels):
    scaled_testing_labels[i,classes.index(tr)] = 1

scaled_training_labels

array([[1., 0., 0., 0.],
       [1., 0., 0., 0.],
       [1., 0., 0., 0.],
       ...,
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.]])

In [11]:
# Create model
x_train = scaled_training
y_train = scaled_training_labels

x_test = scaled_testing
y_test = scaled_testing_labels

In [12]:
# Define the model
model = Sequential()
model.add(Dense(50, input_dim=17867, activation='relu', name='layer_1'))
model.add(Dense(100, activation='relu', name='layer_2'))
model.add(Dense(50, activation='relu', name='layer_3'))
model.add(Dense(4, activation="softmax"))

2022-02-03 10:00:03.525383: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2022-02-03 10:00:03.540373: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fdd0748aae0 executing computations on platform Host. Devices:
2022-02-03 10:00:03.540384: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): Host, Default Version


In [13]:
# Compile the model
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

# Print a summary of the model
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
layer_1 (Dense)              (None, 50)                893400    
_________________________________________________________________
layer_2 (Dense)              (None, 100)               5100      
_________________________________________________________________
layer_3 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_1 (Dense)              (None, 4)                 204       
Total params: 903,754
Trainable params: 903,754
Non-trainable params: 0
_________________________________________________________________


In [14]:
# Train the model
model.fit(
    x_train,
    y_train,
    epochs=30,
    validation_data=(x_test, y_test),
    shuffle=True
)

Train on 684 samples, validate on 228 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.callbacks.History at 0x145462090>

In [15]:
test_error_rate = model.evaluate(x_test, y_test, verbose=0)
print("The mean squared error (MSE) for the test data set is: {}".format(test_error_rate))

The mean squared error (MSE) for the test data set is: [0.0002629898028732114, 1.0]


In [16]:
# Save neural network structure
model_structure = model.to_json()
f = Path("model_structure_dense.json")
f.write_text(model_structure)

# Save neural network's trained weights
model.save_weights("model_weights_dense.h5")

# # Load the json file that contains the model's structure
# f = Path("data/ks2a_model_structure_dense.json")
# model_structure = f.read_text()
#
# # Recreate the Keras model object from the json data
# model = model_from_json(model_structure)
#
# # Re-load the model's trained weights
# model.load_weights("data/ks2a_model_weights_dense.h5")

In [17]:
# Predict non-matched samples
# Remove samples already used in making the model
expr_naive_p = expr_naive.drop(samps_in_common,axis=1).T
expr_ifn_p = expr_ifn.drop(samps_in_common, axis=1).T
expr_lps24_p = expr_lps24.drop(samps_in_common, axis=1).T
expr_lps2_p = expr_lps2.drop(samps_in_common, axis=1).T

In [18]:
# For normalizing the data the same way as before
def subsidary_normalize(df, feature_max, feature_min):
    result = df.copy()
    for feature_name in df.columns:
        max_value = feature_max[feature_name]
        min_value = feature_min[feature_name]
        if max_value == 0:
            result[feature_name]= df[feature_name]
        else:
            result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
    return result

In [19]:
expr_all_eval = pd.concat([expr_naive_p, expr_ifn_p, expr_lps24_p, expr_lps2_p], keys=['Naive', 'IFN', 'LPS24', 'LPS2'])
expr_all_eval_norm = subsidary_normalize(expr_all_eval, feature_max, feature_min)
evaluation = expr_all_eval_norm.values
evaluation_labels = np.array(expr_all_eval_norm.index.get_level_values(0))

evaluation_samples = Counter(evaluation_labels)
print('For evaluation set we have the following samples:')
for key in evaluation_samples:
    print(key, evaluation_samples[key])

For evaluation set we have the following samples:
Naive 186
IFN 139
LPS24 94
LPS2 33


In [20]:
scaled_evaluation = evaluation

scaled_evaluation_labels = np.zeros((len(evaluation),len(classes)))
for i,tr in enumerate(evaluation_labels):
    scaled_evaluation_labels[i,classes.index(tr)] = 1

In [21]:
scaled_evaluation_labels

array([[1., 0., 0., 0.],
       [1., 0., 0., 0.],
       [1., 0., 0., 0.],
       ...,
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.]])

In [22]:
# Make a prediction with the neural network
X = scaled_evaluation
prediction = model.predict(X)
df_pred = pd.DataFrame(data=prediction, index=expr_all_eval.index, columns=['Naive', 'IFN', 'LPS24', 'LPS2'])

In [23]:
df_pred.style.background_gradient(cmap='viridis')

Unnamed: 0,Unnamed: 1,Naive,IFN,LPS24,LPS2
Naive,1,0.999719,0.000104,7.2e-05,0.000105
Naive,4,0.999238,0.00034,0.000161,0.000261
Naive,5,0.999681,7.6e-05,7.6e-05,0.000167
Naive,6,0.99971,7.6e-05,5.8e-05,0.000156
Naive,7,0.999716,3.5e-05,4.6e-05,0.000203
Naive,8,0.999448,8.1e-05,0.000216,0.000256
Naive,9,0.999769,3.8e-05,5.1e-05,0.000142
Naive,10,0.999722,2.9e-05,2.8e-05,0.000221
Naive,11,0.999772,2.2e-05,2.6e-05,0.00018
Naive,12,0.999696,3.9e-05,5.7e-05,0.000208


In [24]:
#%% Predict the new rna-seq samples
classes = ['IFNG', 'LPS2', 'LPS24', 'LPS6', 'Naive']
count_data_common = pd.read_csv('Common_count_data.txt', index_col=0)
rna_data = count_data_common
rna_data['sample'] = [x.split("_")[0] for x in rna_data.index]
rna_data['treatment'] = [x.split("_")[1] for x in rna_data.index]
rna_data = rna_data.set_index(['treatment', 'sample'])
rna_data.sort_index(inplace=True)

In [25]:
rna_eval, _, _ = initial_normalize(rna_data)
rna_evaluation = rna_eval.values
rna_evaluation_labels = np.array(rna_eval.index.get_level_values(0))

rna_evaluation_samples = Counter(rna_evaluation_labels)
print('For RNA evaluation set we have the following samples:')
for key in rna_evaluation_samples:
    print(key, rna_evaluation_samples[key])

For RNA evaluation set we have the following samples:
IFNG 6
LPS2 6
LPS24 6
LPS6 6
Naive 6


In [26]:
rna_scaled_evaluation = rna_evaluation

rna_scaled_evaluation_labels = np.zeros((len(rna_evaluation),len(classes)))
for i,tr in enumerate(rna_evaluation_labels):
    rna_scaled_evaluation_labels[i,classes.index(tr)] = 1

In [27]:
# Make a prediction with the neural network
X = rna_scaled_evaluation
rna_prediction = model.predict(X)
df_rna_pred = pd.DataFrame(data=rna_prediction, index=rna_data.index, columns=['Naive', 'IFN', 'LPS24', 'LPS2'])

In [28]:
df_rna_pred.style.background_gradient(cmap='viridis')

Unnamed: 0_level_0,Unnamed: 1_level_0,Naive,IFN,LPS24,LPS2
treatment,sample,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
IFNG,1,2e-06,0.999998,0.0,0.0
IFNG,2,1e-06,0.999999,0.0,0.0
IFNG,4,1e-06,0.999999,0.0,0.0
IFNG,6,0.0,1.0,0.0,0.0
IFNG,8,1e-06,0.999999,0.0,0.0
IFNG,9,0.0,1.0,0.0,0.0
LPS2,1,2e-06,0.0,0.0,0.999998
LPS2,2,0.0,0.0,0.0,1.0
LPS2,4,0.0,0.0,0.0,1.0
LPS2,6,1e-06,0.0,0.0,0.999999


In [29]:
# Predict the sepsis samples
classes = ['1', '3', '5'] # These are actually day of sampling after entry into the ICU
count_data_sepsis = pd.read_csv('Common_sepsis_data.txt', index_col=0)
sepsis_data = count_data_sepsis
sepsis_data['sample'] = [x.split("_")[0] for x in sepsis_data.index]
sepsis_data['day'] = [x.split("_")[1] for x in sepsis_data.index]
sepsis_data = sepsis_data.set_index(['day', 'sample'])
sepsis_data = sepsis_data.reindex(index=natsorted(sepsis_data.index))

In [30]:
sepsis_eval, _, _ = initial_normalize(sepsis_data)
sepsis_evaluation = sepsis_eval.values
sepsis_evaluation_labels = np.array(sepsis_eval.index.get_level_values(0))

sepsis_evaluation_samples = Counter(sepsis_evaluation_labels)
print('For sepsis exploration set we have the following samples:')
for key in sepsis_evaluation_samples:
    print(key, sepsis_evaluation_samples[key])

For sepsis exploration set we have the following samples:
1 77
3 80
5 64


In [31]:
sepsis_scaled_evaluation = sepsis_evaluation

sepsis_scaled_evaluation_labels = np.zeros((len(sepsis_evaluation),len(classes)))
for i,tr in enumerate(sepsis_evaluation_labels):
    sepsis_scaled_evaluation_labels[i,classes.index(tr)] = 1

In [32]:
X = sepsis_scaled_evaluation
sepsis_prediction = model.predict(X)
df_sepsis_pred = pd.DataFrame(data=sepsis_prediction, index=sepsis_data.index, columns=['Naive', 'IFN', 'LPS24', 'LPS2'])

In [33]:
df_sepsis_pred.style.background_gradient(cmap='viridis')

Unnamed: 0_level_0,Unnamed: 1_level_0,Naive,IFN,LPS24,LPS2
day,sample,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1,0.090526,0.001735,0.101578,0.806162
1,3,0.16048,0.135144,0.425857,0.278519
1,4,0.596495,0.00587,0.019813,0.377823
1,5,0.079543,0.005359,0.770111,0.144987
1,6,0.058997,0.005823,0.272502,0.662678
1,8,0.166662,0.56586,0.187813,0.079665
1,12,0.096933,0.110058,0.670934,0.122074
1,14,0.490656,0.151087,0.227852,0.130406
1,16,0.010072,0.001305,0.528316,0.460307
1,19,0.152363,0.542231,0.245984,0.059422


## Different neural network models
### Dense layers, no loss
So far we have used one of the simplest Neural Networks with dense layers and no loss, though it possible that we are over fitting. 

In [34]:
# Define the model
model = Sequential()
model.add(Dense(50, input_dim=17867, activation='relu', name='layer_1'))
model.add(Dense(100, activation='relu', name='layer_2'))
model.add(Dense(50, activation='relu', name='layer_3'))
model.add(Dense(4, activation="softmax"))

### Dense layers, with loss
One way to avoid overfitting, is to randomly add loss to the network, so not all the data makes it from layer to layer. 

In [35]:
# New model with loss
model = Sequential()
model.add(Dense(50, input_dim=17867, activation='relu', name='layer_1'))
model.add(Dropout(0.20))
model.add(Dense(100, activation='relu', name='layer_2'))
model.add(Dropout(0.20))
model.add(Dense(50, activation='relu', name='layer_3'))
model.add(Dropout(0.20))
model.add(Dense(4, activation="softmax"))

# # Save neural network structure
# model_structure = model.to_json()
# f = Path("model_structure_loss.json")
# f.write_text(model_structure)

# # Save neural network's trained weights
# model.save_weights("model_weights_loss.h5")

In [36]:
# Compile the model
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

# Print a summary of the model
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
layer_1 (Dense)              (None, 50)                893400    
_________________________________________________________________
dropout_1 (Dropout)          (None, 50)                0         
_________________________________________________________________
layer_2 (Dense)              (None, 100)               5100      
_________________________________________________________________
dropout_2 (Dropout)          (None, 100)               0         
_________________________________________________________________
layer_3 (Dense)              (None, 50)                5050      
_________________________________________________________________
dropout_3 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 4)                

In [37]:
# Train the model
model.fit(
    x_train,
    y_train,
    epochs=30,
    validation_data=(x_test, y_test),
    shuffle=True
)

Train on 684 samples, validate on 228 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.callbacks.History at 0x179736550>

In [38]:
test_error_rate = model.evaluate(x_test, y_test, verbose=0)
print("The mean squared error (MSE) for the test data set is: {}".format(test_error_rate))

The mean squared error (MSE) for the test data set is: [0.4513228310937401, 0.7587719559669495]


In [39]:
# Save neural network structure
model_structure = model.to_json()
f = Path("model_structure_loss.json")
f.write_text(model_structure)

# Save neural network's trained weights
model.save_weights("model_weights_loss.h5")

# # Load the json file that contains the model's structure
# f = Path("data/ks2b_model_structure_loss.json")
# model_structure = f.read_text()
#
# # Recreate the Keras model object from the json data
# model = model_from_json(model_structure)
#
# # Re-load the model's trained weights
# model.load_weights("ks2b_model_weights_loss.h5")

In [40]:
# Make a prediction with the neural network - non-matched samples
X = scaled_evaluation
prediction = model.predict(X)
df_pred = pd.DataFrame(data=prediction, index=expr_all_eval.index, columns=['Naive', 'IFN', 'LPS24', 'LPS2'])
df_pred.style.background_gradient(cmap='viridis')

Unnamed: 0,Unnamed: 1,Naive,IFN,LPS24,LPS2
Naive,1,0.417429,0.07545,0.426485,0.080636
Naive,4,0.417429,0.07545,0.426485,0.080636
Naive,5,0.417429,0.07545,0.426485,0.080636
Naive,6,0.417429,0.07545,0.426485,0.080636
Naive,7,0.417429,0.07545,0.426485,0.080636
Naive,8,0.417429,0.07545,0.426485,0.080636
Naive,9,0.417429,0.07545,0.426485,0.080636
Naive,10,0.417429,0.07545,0.426485,0.080636
Naive,11,0.417429,0.07545,0.426485,0.080636
Naive,12,0.417429,0.07545,0.426485,0.080636


In [41]:
# Make a prediction with the neural network - RNA-seq
X = rna_scaled_evaluation
rna_prediction = model.predict(X)
df_rna_pred = pd.DataFrame(data=rna_prediction, index=rna_data.index, columns=['Naive', 'IFN', 'LPS24', 'LPS2'])
df_rna_pred.style.background_gradient(cmap='viridis')

Unnamed: 0_level_0,Unnamed: 1_level_0,Naive,IFN,LPS24,LPS2
treatment,sample,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
IFNG,1,0.047246,0.834485,0.035508,0.08276
IFNG,2,0.000926,0.96533,0.000447,0.033297
IFNG,4,0.015309,0.915784,0.01041,0.058497
IFNG,6,0.167379,0.567335,0.140522,0.124763
IFNG,8,0.00163,0.962721,0.000845,0.034804
IFNG,9,0.01163,0.926538,0.00774,0.054092
LPS2,1,0.0,1e-06,0.0,0.999999
LPS2,2,0.0,0.0,0.0,1.0
LPS2,4,0.0,0.0,0.0,1.0
LPS2,6,0.0,3e-06,0.0,0.999997


In [42]:
# Make a prediction with the neural network - Sepsis
X = sepsis_scaled_evaluation
sepsis_prediction = model.predict(X)
df_sepsis_pred = pd.DataFrame(data=sepsis_prediction, index=sepsis_data.index, columns=['Naive', 'IFN', 'LPS24', 'LPS2'])
df_sepsis_pred.style.background_gradient(cmap='viridis')

Unnamed: 0_level_0,Unnamed: 1_level_0,Naive,IFN,LPS24,LPS2
day,sample,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1,1.5e-05,0.785999,4e-06,0.213982
1,3,0.015375,0.915599,0.010458,0.058568
1,4,2.7e-05,0.833445,7e-06,0.166521
1,5,9e-06,0.737841,2e-06,0.262148
1,6,3e-05,0.840153,8e-06,0.159809
1,8,0.006868,0.942028,0.004375,0.046729
1,12,0.202099,0.495247,0.17358,0.129074
1,14,0.364649,0.179771,0.347162,0.108417
1,16,4e-06,0.633685,1e-06,0.366311
1,19,0.132776,0.641491,0.108424,0.117309


### Convolutional layer with loss
Loss just on dense layers can have a detrimental effect on our results. How about if we segment our data, but keep the loss between layers in?
Note the input of this data into a convolution layer needs an extra dimension, which then needs to be pooled and flattened before beingn put into a dense layer. 

In [43]:
# Create model
x_train = np.expand_dims(scaled_training, axis=2)
y_train = scaled_training_labels

x_test = np.expand_dims(scaled_testing, axis=2)
y_test = scaled_testing_labels

x_test.shape

(228, 17867, 1)

In [44]:
# Define the model
model = Sequential()

model.add(Conv1D(32, 10, input_shape=(17867,1), padding='same', activation="relu"))
model.add(MaxPooling1D(200))
model.add(Flatten())
model.add(Dropout(0.20))

model.add(Dense(100, activation='relu'))
model.add(Dropout(0.20))

model.add(Dense(4, activation="softmax"))

# Compile the model
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

# Print a summary of the model
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 17867, 32)         352       
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 89, 32)            0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 2848)              0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 2848)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 100)               284900    
_________________________________________________________________
dropout_5 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 4)                

In [45]:
# Train the model
model.fit(
    x_train,
    y_train,
    epochs=30,
    validation_data=(x_test, y_test),
    shuffle=True
)

Train on 684 samples, validate on 228 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.callbacks.History at 0x179ca4110>

In [46]:
test_error_rate = model.evaluate(x_test, y_test, verbose=0)
print("The mean squared error (MSE) for the test data set is: {}".format(test_error_rate))

The mean squared error (MSE) for the test data set is: [0.0034700488475592514, 1.0]


In [47]:
# Save neural network structure
model_structure = model.to_json()
f = Path("model_structure_conv_10.json")
f.write_text(model_structure)

# Save neural network's trained weights
model.save_weights("model_weights_conv_10.h5")

# # Load the json file that contains the model's structure
# f = Path("data/ks2c_model_structure_conv_10.json")
# model_structure = f.read_text()
#
# # Recreate the Keras model object from the json data
# model = model_from_json(model_structure)
#
# # Re-load the model's trained weights
# model.load_weights("data/ks2c_model_weights_conv_10.h5")

In [48]:
# Make a prediction with the neural network - non-matched samples
X = np.expand_dims(scaled_evaluation, axis=2)
prediction = model.predict(X)
df_pred = pd.DataFrame(data=prediction, index=expr_all_eval.index, columns=['Naive', 'IFN', 'LPS24', 'LPS2'])
df_pred.style.background_gradient(cmap='viridis')

Unnamed: 0,Unnamed: 1,Naive,IFN,LPS24,LPS2
Naive,1,0.998038,0.000207,0.000995,0.00076
Naive,4,0.988747,0.00168,0.004582,0.004991
Naive,5,0.982379,0.002194,0.004283,0.011144
Naive,6,0.998597,0.000197,0.00062,0.000587
Naive,7,0.952733,0.002205,0.009467,0.035595
Naive,8,0.993591,0.000903,0.002818,0.002687
Naive,9,0.99807,0.000178,0.001207,0.000545
Naive,10,0.996321,0.000745,0.001118,0.001816
Naive,11,0.997381,9.5e-05,0.000586,0.001939
Naive,12,0.99627,0.000275,0.002028,0.001427


In [49]:
# Make a prediction with the neural network - RNA-seq
X = np.expand_dims(rna_scaled_evaluation, axis=2)
rna_prediction = model.predict(X)
df_rna_pred = pd.DataFrame(data=rna_prediction, index=rna_data.index, columns=['Naive', 'IFN', 'LPS24', 'LPS2'])
df_rna_pred.style.background_gradient(cmap='viridis')

Unnamed: 0_level_0,Unnamed: 1_level_0,Naive,IFN,LPS24,LPS2
treatment,sample,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
IFNG,1,3e-06,0.079177,0.00101,0.919811
IFNG,2,1e-05,0.445706,0.00116,0.553123
IFNG,4,2e-06,0.552483,0.000706,0.446809
IFNG,6,2e-06,0.302389,0.001117,0.696492
IFNG,8,2e-06,0.153682,0.01365,0.832666
IFNG,9,0.0,0.054223,0.000638,0.945138
LPS2,1,9e-06,0.026153,0.006011,0.967827
LPS2,2,3.2e-05,0.059922,0.001764,0.938282
LPS2,4,1.2e-05,0.127697,0.004885,0.867407
LPS2,6,2.3e-05,0.28601,0.0316,0.682367


In [50]:
# Make a prediction with the neural network - Sepsis
X = np.expand_dims(sepsis_scaled_evaluation, axis=2)
sepsis_prediction = model.predict(X)
df_sepsis_pred = pd.DataFrame(data=sepsis_prediction, index=sepsis_data.index, columns=['Naive', 'IFN', 'LPS24', 'LPS2'])
df_sepsis_pred.style.background_gradient(cmap='viridis')

Unnamed: 0_level_0,Unnamed: 1_level_0,Naive,IFN,LPS24,LPS2
day,sample,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1,0.012905,0.033063,0.114174,0.839858
1,3,0.001279,0.154794,0.020094,0.823833
1,4,0.003344,0.079542,0.102314,0.814801
1,5,0.235444,0.036897,0.533905,0.193754
1,6,0.013704,0.047536,0.12221,0.81655
1,8,0.002145,0.19928,0.028884,0.769691
1,12,0.00071,0.113809,0.011522,0.873958
1,14,0.000345,0.11371,0.01413,0.871815
1,16,0.002621,0.038989,0.05884,0.89955
1,19,0.000779,0.160574,0.012333,0.826314
