name: new_dataset_tests.ipynb

This notebook is very similar to new_dataset_tests.ipynb, but runs slightly different experiments.

Please note that this notebook is not set up with the current "experiments" folder as the destination for results. I would not recommend running this, please use `MLME-v2.0-conv_model.py` instead. 

---

https://github.com/tensorflow/tensorflow/issues/38012 for keras no module error

In [None]:
# mount google drive

from google.colab import drive
drive.mount('/content/drive')

%cd '/content/drive/Shareddrives/NRC_Amii_Agronomics_Project/nrc-ml-plant-genomics/'

In [None]:
import keras
import warnings, logging
import json
import numpy as np
import pandas as pd
import datetime, os

from models.base_model import BaseModel
from keras.models import Sequential, load_model, model_from_json
from keras.layers import Input, Dense, Conv1D, MaxPooling2D, Dropout, Flatten, BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping  # https://machinelearningmastery.com/how-to-stop-training-deep-neural-networks-at-the-right-time-using-early-stopping/
import tensorflow as tf

from sklearn.metrics import r2_score
from scipy.stats import spearmanr # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.spearmanr.html

warnings.filterwarnings('ignore')
logging.disable(1000)

%load_ext tensorboard

In [None]:
import time
time.sleep(1200)

In [None]:
# rm -rf ./logs/

In [None]:
pwd

'/Users/sarahdavis/Documents/internship/project_files/MPRA-DragoNN'

In [None]:
ls

LICENSE                              kipoi_playground.ipynb
README.md                            linear_mapping.ipynb
__init__.py                          [34mlogs[m[m/
commands_for_setup.txt               main.py
create_final_dataset.ipynb           [34mmodels[m[m/
cross_val_linear_mapping.ipynb       [34mnew_data[m[m/
[34mdata[m[m/                                new_dataset_tests.ipynb
data_exploration.ipynb               new_dataset_tests_duplicate.ipynb
[34mdata_loader[m[m/                         new_dataset_tests_more_epochs.ipynb
[34mevaluator[m[m/                           [34mnew_models[m[m/
[34mexample[m[m/                             predict_on_NRC_data.ipynb
generate_data_format.ipynb           requirements.txt
keras_model_loading.ipynb            requirements_exact.txt
keras_retraining.ipynb               [34mtrainers[m[m/
keras_training.ipynb                 [34mutils[m[m/
[34mkipoi[m[m/


In [None]:
# load in model as it is from kipoi

with open('models/model.json', 'r') as json_file:
    json_savedModel = json_file.read()
    
pretrained_model = model_from_json(json_savedModel)
pretrained_model.load_weights('models/pretrained.hdf5')

In [None]:
pretrained_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 141, 120)          2520      
_________________________________________________________________
batch_normalization_1 (Batch (None, 141, 120)          480       
_________________________________________________________________
dropout_1 (Dropout)          (None, 141, 120)          0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 137, 120)          72120     
_________________________________________________________________
batch_normalization_2 (Batch (None, 137, 120)          480       
_________________________________________________________________
dropout_2 (Dropout)          (None, 137, 120)          0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 133, 120)         

In [None]:
mapping = {"A":[1, 0, 0, 0], "T":[0, 0, 0, 1], "C":[0, 1, 0, 0], "G":[0, 0, 1, 0]}  # cross referenced with kipoi data loader

def get_ohe(sequence):  # gets sequence in format model can use
    return np.array([mapping[nt] for nt in sequence])

def get_model(treatment_dict):  # initializes model architecture
    mdl = Sequential()

    conv1_train = treatment_dict["conv1"] != 2  # bool
    mdl.add(Conv1D(120, 5, activation='relu', input_shape=(145, 4), name="1DConv_1", trainable=conv1_train))
    mdl.add(BatchNormalization(name="batchNorm1", trainable=conv1_train))
    mdl.add(Dropout(0.1, name="drop1"))

    conv2_train = treatment_dict["conv2"] != 2  # bool 
    mdl.add(Conv1D(120, 5, activation='relu', name="1DConv_2", trainable=conv2_train))
    mdl.add(BatchNormalization(name="batchNorm2", trainable=conv2_train))
    mdl.add(Dropout(0.1, name="drop2"))

    conv3_train = treatment_dict["conv3"] != 2  # bool 
    mdl.add(Conv1D(120, 5, activation='relu', name="1DConv_3", trainable=conv3_train))
    mdl.add(BatchNormalization(name="batchNorm3", trainable=conv3_train))
    mdl.add(Dropout(0.1, name="drop3"))

    mdl.add(Flatten(name="flat"))    
    mdl.add(Dense(1, activation='linear', name="dense2"))
    
    return mdl

def set_weights(treatment_dict, mdl):  # sets appropriate model weights from pretrained
    layers_to_set = []
    if treatment_dict["conv1"] != 0: layers_to_set += [0, 1, 2]
    if treatment_dict["conv2"] != 0: layers_to_set += [3, 4, 5]
    if treatment_dict["conv3"] != 0: layers_to_set += [6, 7, 8]
    
    for i in layers_to_set:
        print(mdl.layers[i])
        pretrained_layer_weights = pretrained_model.layers[i].get_weights()
        mdl.layers[i].set_weights(pretrained_layer_weights)
    
    return mdl

def return_spearman(true, pred):
    return tf.convert_to_tensor(spearmanr(true, pred)[0])

def correlation(x, y):  # https://www.kaggle.com/carlolepelaars/understanding-the-metric-spearman-s-rho
    mx = tf.math.reduce_mean(x)
    my = tf.math.reduce_mean(y)
    xm, ym = x-mx, y-my
    r_num = tf.math.reduce_mean(tf.multiply(xm,ym))        
    r_den = tf.math.reduce_std(xm) * tf.math.reduce_std(ym)
    return r_num / r_den

In [None]:
# loading in data
df = pd.read_csv("data/processed/hidra_chloroplast.csv")

In [None]:
df.head()

Unnamed: 0,organelle,start_coords,end_coords,sequence,control_raw_coverage,treatment_raw_coverage,control_norm_coverage,treatment_norm_coverage,target,A,T,C,G,set
0,NC_016734.1,0,145,AATCATAATAACTTGGTCCCGGGCATCACGGGCGAACGACGGGAAT...,998,316,338.31,405.52,0.26,39,40,40,26,test
1,NC_016734.1,5,150,TAATAACTTGGTCCCGGGCATCACGGGCGAACGACGGGAATTGAAC...,998,318,338.31,408.08,0.27,40,40,39,26,test
2,NC_016734.1,10,155,ACTTGGTCCCGGGCATCACGGGCGAACGACGGGAATTGAACCCGCG...,998,318,338.31,408.08,0.27,38,38,42,27,test
3,NC_016734.1,15,160,GTCCCGGGCATCACGGGCGAACGACGGGAATTGAACCCGCGATGGT...,998,318,338.31,408.08,0.27,40,36,42,27,test
4,NC_016734.1,20,165,GGGCATCACGGGCGAACGACGGGAATTGAACCCGCGATGGTGAATT...,998,318,338.31,408.08,0.27,41,39,39,26,test


In [None]:
# split into different datasets

train_df = df[df.set=="train"]
X_train = np.array([get_ohe(sqnc) for sqnc in train_df["sequence"]])
y_train = np.array(train_df["target"].tolist())

val_df = df[df.set=="val"]
X_val = np.array([get_ohe(sqnc) for sqnc in val_df["sequence"]])
y_val = np.array(val_df["target"].tolist())

test_df = df[df.set=="test"]
X_test = np.array([get_ohe(sqnc) for sqnc in test_df["sequence"]])
y_test = np.array(test_df["target"].tolist())

In [None]:
print(X_train.shape, X_val.shape, X_test.shape)
print(y_train.shape, y_val.shape, y_test.shape)

(23804, 145, 4) (2964, 145, 4) (2964, 145, 4)
(23804,) (2964,) (2964,)


### Re-train from scratch

In [None]:
# 0 for init random weights & train
# 1 for load pre-trained weights and train
# 2 for load pre-trained weights and freeze

layer_treatment = {"conv1":0, 
                   "conv2":0, 
                   "conv3":0}

model = get_model(layer_treatment)
model = set_weights(layer_treatment, model)

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
1DConv_1 (Conv1D)            (None, 141, 120)          2520      
_________________________________________________________________
batchNorm1 (BatchNormalizati (None, 141, 120)          480       
_________________________________________________________________
drop1 (Dropout)              (None, 141, 120)          0         
_________________________________________________________________
1DConv_2 (Conv1D)            (None, 137, 120)          72120     
_________________________________________________________________
batchNorm2 (BatchNormalizati (None, 137, 120)          480       
_________________________________________________________________
drop2 (Dropout)              (None, 137, 120)          0         
_________________________________________________________________
1DConv_3 (Conv1D)            (None, 133, 120)          7

In [None]:
rm -rf ./logs/

In [None]:
model.compile(optimizer='adam', 
              loss='mean_squared_error') 

In [None]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)  # https://stackoverflow.com/questions/59894720/keras-and-tensorboard-attributeerror-sequential-object-has-no-attribute-g

es_callback = EarlyStopping(monitor='val_loss', verbose=1, patience=5)
mc_callback = ModelCheckpoint('new_models/best_model.h5', monitor='val_loss', save_best_only=True)

In [None]:
history = model.fit(X_train, y_train, 
                    epochs=20, 
                    validation_data=(X_val, y_val),
                    callbacks=[tensorboard_callback, es_callback, mc_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 51619), started 1:59:01 ago. (Use '!kill 51619' to kill it.)

In [None]:
saved_model = load_model('new_models/best_model.h5')

In [None]:
# R^2
print("Train", r2_score(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", r2_score(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", r2_score(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train 0.25944460021322224
Val 0.3119183463566435
Test 0.03897006124520219


In [None]:
# Spearman
print("Train", spearmanr(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", spearmanr(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", spearmanr(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train SpearmanrResult(correlation=0.5123582146644408, pvalue=0.0)
Val SpearmanrResult(correlation=0.5987678524433742, pvalue=6.665660277650358e-288)
Test SpearmanrResult(correlation=0.33216693062040453, pvalue=2.80627151550174e-77)


### Re-train from starting point

In [None]:
# 0 for init random weights & train
# 1 for load pre-trained weights and train
# 2 for load pre-trained weights and freeze

layer_treatment = {"conv1":1, 
                   "conv2":1, 
                   "conv3":1}

model = get_model(layer_treatment)
model = set_weights(layer_treatment, model)

<keras.layers.convolutional.Conv1D object at 0x7fe4d5fe7d00>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fe4b87aef70>
<keras.layers.core.Dropout object at 0x7fe4b968ce20>
<keras.layers.convolutional.Conv1D object at 0x7fe4b968c1f0>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fe4bbb4a100>
<keras.layers.core.Dropout object at 0x7fe4b95b9730>
<keras.layers.convolutional.Conv1D object at 0x7fe4b95b9d30>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fe4b95b9970>
<keras.layers.core.Dropout object at 0x7fe4bbb9e220>


In [None]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
1DConv_1 (Conv1D)            (None, 141, 120)          2520      
_________________________________________________________________
batchNorm1 (BatchNormalizati (None, 141, 120)          480       
_________________________________________________________________
drop1 (Dropout)              (None, 141, 120)          0         
_________________________________________________________________
1DConv_2 (Conv1D)            (None, 137, 120)          72120     
_________________________________________________________________
batchNorm2 (BatchNormalizati (None, 137, 120)          480       
_________________________________________________________________
drop2 (Dropout)              (None, 137, 120)          0         
_________________________________________________________________
1DConv_3 (Conv1D)            (None, 133, 120)         

In [None]:
rm -rf ./logs/

In [None]:
model.compile(optimizer='adam', 
              loss='mean_squared_error') 

In [None]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)  # https://stackoverflow.com/questions/59894720/keras-and-tensorboard-attributeerror-sequential-object-has-no-attribute-g

es_callback = EarlyStopping(monitor='val_loss', verbose=1, patience=5)
mc_callback = ModelCheckpoint('new_models/best_model.h5', monitor='val_loss', save_best_only=True)

In [None]:
history = model.fit(X_train, y_train, 
                    epochs=20, 
                    validation_data=(X_val, y_val),
                    callbacks=[tensorboard_callback, es_callback, mc_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 00011: early stopping


In [None]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 51619), started 2:08:30 ago. (Use '!kill 51619' to kill it.)

In [None]:
saved_model = load_model('new_models/best_model.h5')

In [None]:
# R^2
print("Train", r2_score(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", r2_score(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", r2_score(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train 0.9523041824621324
Val 0.19162321458834608
Test 0.06302761065167006


In [None]:
# Spearman
print("Train", spearmanr(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", spearmanr(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", spearmanr(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train SpearmanrResult(correlation=0.9889631457872224, pvalue=0.0)
Val SpearmanrResult(correlation=0.460422017600993, pvalue=1.8500358898834524e-155)
Test SpearmanrResult(correlation=0.279910711323346, pvalue=1.7364678423620713e-54)


### Train only output layer

In [None]:
# 0 for init random weights & train
# 1 for load pre-trained weights and train
# 2 for load pre-trained weights and freeze

layer_treatment = {"conv1":2, 
                   "conv2":2, 
                   "conv3":2}

model = get_model(layer_treatment)
model = set_weights(layer_treatment, model)

<keras.layers.convolutional.Conv1D object at 0x7fe4bbccf8b0>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fe4b9626e20>
<keras.layers.core.Dropout object at 0x7fe4bbf940d0>
<keras.layers.convolutional.Conv1D object at 0x7fe4bbc6fa00>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fe4bbf94430>
<keras.layers.core.Dropout object at 0x7fe4d5ed2af0>
<keras.layers.convolutional.Conv1D object at 0x7fe4bbbf3e20>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fe4b871a280>
<keras.layers.core.Dropout object at 0x7fe4d5ebc730>


In [None]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
1DConv_1 (Conv1D)            (None, 141, 120)          2520      
_________________________________________________________________
batchNorm1 (BatchNormalizati (None, 141, 120)          480       
_________________________________________________________________
drop1 (Dropout)              (None, 141, 120)          0         
_________________________________________________________________
1DConv_2 (Conv1D)            (None, 137, 120)          72120     
_________________________________________________________________
batchNorm2 (BatchNormalizati (None, 137, 120)          480       
_________________________________________________________________
drop2 (Dropout)              (None, 137, 120)          0         
_________________________________________________________________
1DConv_3 (Conv1D)            (None, 133, 120)         

In [None]:
rm -rf ./logs/

In [None]:
model.compile(optimizer='adam', 
              loss='mean_squared_error') 

In [None]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)  # https://stackoverflow.com/questions/59894720/keras-and-tensorboard-attributeerror-sequential-object-has-no-attribute-g

es_callback = EarlyStopping(monitor='val_loss', verbose=1, patience=5)
mc_callback = ModelCheckpoint('new_models/best_model.h5', monitor='val_loss', save_best_only=True)

In [None]:
history = model.fit(X_train, y_train, 
                    epochs=20, 
                    validation_data=(X_val, y_val),
                    callbacks=[tensorboard_callback, es_callback, mc_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 00007: early stopping


In [None]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 51619), started 2:09:54 ago. (Use '!kill 51619' to kill it.)

In [None]:
saved_model = load_model('new_models/best_model.h5')

In [None]:
# R^2
print("Train", r2_score(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", r2_score(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", r2_score(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train 0.3794996633113821
Val 0.35209953970188
Test 0.0945195205129632


In [None]:
# Spearman
print("Train", spearmanr(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", spearmanr(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", spearmanr(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train SpearmanrResult(correlation=0.6176099096672574, pvalue=0.0)
Val SpearmanrResult(correlation=0.6245293853640727, pvalue=2.5405e-320)
Test SpearmanrResult(correlation=0.3681022851131163, pvalue=8.885880986232987e-96)


### Freeze Conv 1, 2, Train Conv 3 from Scratch

In [None]:
# 0 for init random weights & train
# 1 for load pre-trained weights and train
# 2 for load pre-trained weights and freeze

layer_treatment = {"conv1":2, 
                   "conv2":2, 
                   "conv3":0}

model = get_model(layer_treatment)
model = set_weights(layer_treatment, model)

<keras.layers.convolutional.Conv1D object at 0x7fe4bbbb0d00>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fe4bbb20130>
<keras.layers.core.Dropout object at 0x7fe4bbd343a0>
<keras.layers.convolutional.Conv1D object at 0x7fe4bbd340d0>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fe4b954ac70>
<keras.layers.core.Dropout object at 0x7fe4bbd92d90>


In [None]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
1DConv_1 (Conv1D)            (None, 141, 120)          2520      
_________________________________________________________________
batchNorm1 (BatchNormalizati (None, 141, 120)          480       
_________________________________________________________________
drop1 (Dropout)              (None, 141, 120)          0         
_________________________________________________________________
1DConv_2 (Conv1D)            (None, 137, 120)          72120     
_________________________________________________________________
batchNorm2 (BatchNormalizati (None, 137, 120)          480       
_________________________________________________________________
drop2 (Dropout)              (None, 137, 120)          0         
_________________________________________________________________
1DConv_3 (Conv1D)            (None, 133, 120)         

In [None]:
rm -rf ./logs/

In [None]:
model.compile(optimizer='adam', 
              loss='mean_squared_error') 

In [None]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)  # https://stackoverflow.com/questions/59894720/keras-and-tensorboard-attributeerror-sequential-object-has-no-attribute-g

es_callback = EarlyStopping(monitor='val_loss', verbose=1, patience=5)
mc_callback = ModelCheckpoint('new_models/best_model.h5', monitor='val_loss', save_best_only=True)

In [None]:
history = model.fit(X_train, y_train, 
                    epochs=20, 
                    validation_data=(X_val, y_val),
                    callbacks=[tensorboard_callback, es_callback, mc_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 00016: early stopping


In [None]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 51619), started 2:15:27 ago. (Use '!kill 51619' to kill it.)

In [None]:
saved_model = load_model('new_models/best_model.h5')

In [None]:
# R^2
print("Train", r2_score(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", r2_score(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", r2_score(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train 0.9687886765181813
Val 0.2333316882559019
Test 0.04991821964689458


In [None]:
# Spearman
print("Train", spearmanr(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", spearmanr(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", spearmanr(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train SpearmanrResult(correlation=0.9851690019120937, pvalue=0.0)
Val SpearmanrResult(correlation=0.5012122486602586, pvalue=2.4528599838170852e-188)
Test SpearmanrResult(correlation=0.250966525850511, pvalue=8.346869465311267e-44)


### Freeze Conv 1, 2, Train Conv 3 from Init

In [None]:
# 0 for init random weights & train
# 1 for load pre-trained weights and train
# 2 for load pre-trained weights and freeze

layer_treatment = {"conv1":2, 
                   "conv2":2, 
                   "conv3":1}

model = get_model(layer_treatment)
model = set_weights(layer_treatment, model)

<keras.layers.convolutional.Conv1D object at 0x7fe4bc20d700>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fe4abd2b640>
<keras.layers.core.Dropout object at 0x7fe4bc20d8b0>
<keras.layers.convolutional.Conv1D object at 0x7fe4abd185e0>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fe4b95c7280>
<keras.layers.core.Dropout object at 0x7fe4abd208e0>
<keras.layers.convolutional.Conv1D object at 0x7fe4abd06f10>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fe4abd06df0>
<keras.layers.core.Dropout object at 0x7fe4ab718d30>


In [None]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
1DConv_1 (Conv1D)            (None, 141, 120)          2520      
_________________________________________________________________
batchNorm1 (BatchNormalizati (None, 141, 120)          480       
_________________________________________________________________
drop1 (Dropout)              (None, 141, 120)          0         
_________________________________________________________________
1DConv_2 (Conv1D)            (None, 137, 120)          72120     
_________________________________________________________________
batchNorm2 (BatchNormalizati (None, 137, 120)          480       
_________________________________________________________________
drop2 (Dropout)              (None, 137, 120)          0         
_________________________________________________________________
1DConv_3 (Conv1D)            (None, 133, 120)         

In [None]:
rm -rf ./logs/

In [None]:
model.compile(optimizer='adam', 
              loss='mean_squared_error') 

In [None]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)  # https://stackoverflow.com/questions/59894720/keras-and-tensorboard-attributeerror-sequential-object-has-no-attribute-g

es_callback = EarlyStopping(monitor='val_loss', verbose=1, patience=5)
mc_callback = ModelCheckpoint('new_models/best_model.h5', monitor='val_loss', save_best_only=True)

In [None]:
history = model.fit(X_train, y_train, 
                    epochs=20, 
                    validation_data=(X_val, y_val),
                    callbacks=[tensorboard_callback, es_callback, mc_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 00008: early stopping


In [None]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 51619), started 2:18:23 ago. (Use '!kill 51619' to kill it.)

In [None]:
saved_model = load_model('new_models/best_model.h5')

In [None]:
# R^2
print("Train", r2_score(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", r2_score(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", r2_score(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train 0.960727117005799
Val 0.23660668659652773
Test 0.0023678605282776433


In [None]:
# Spearman
print("Train", spearmanr(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", spearmanr(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", spearmanr(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train SpearmanrResult(correlation=0.9822139360464436, pvalue=0.0)
Val SpearmanrResult(correlation=0.5045797235300145, pvalue=2.9568931822109786e-191)
Test SpearmanrResult(correlation=0.2406712992032025, pvalue=2.5381515720865143e-40)


---
### Spearman Results

0 for init random weights & train  
1 for load pre-trained weights and train  
2 for load pre-trained weights and freeze  

000: val = 0.47753801170363896, test = 0.2866754020651326  
111: val = 0.47855750422978954, test = 0.279400514316224  
222: val = 0.5858565342434344, test = 0.3741217612323338  
220: val = 0.41782505305420903, test = 0.23175390117095385  
221: val = 0.4622724735247824, test = 0.26885707718958146  
2220: val = 0.5984249204802108, test = 0.32715495828441904

### Train only output layer

In [None]:
def get_model(treatment_dict):  # initializes model architecture
    mdl = Sequential()

    conv1_train = treatment_dict["conv1"] != 2  # bool
    mdl.add(Conv1D(120, 5, activation='relu', input_shape=(145, 4), name="1DConv_1", trainable=conv1_train))
    mdl.add(BatchNormalization(name="batchNorm1", trainable=conv1_train))
    mdl.add(Dropout(0.1, name="drop1"))

    conv2_train = treatment_dict["conv2"] != 2  # bool 
    mdl.add(Conv1D(120, 5, activation='relu', name="1DConv_2", trainable=conv2_train))
    mdl.add(BatchNormalization(name="batchNorm2", trainable=conv2_train))
    mdl.add(Dropout(0.1, name="drop2"))

    conv3_train = treatment_dict["conv3"] != 2  # bool 
    mdl.add(Conv1D(120, 5, activation='relu', name="1DConv_3", trainable=conv3_train))
    mdl.add(BatchNormalization(name="batchNorm3", trainable=conv3_train))
    mdl.add(Dropout(0.1, name="drop3"))

    mdl.add(Flatten(name="flat"))    
    mdl.add(Dense(12, activation='linear', name="dense2", trainable=False))
    mdl.add(Dense(1, activation='linear', name="dense3"))
    
    return mdl

def set_weights(treatment_dict, mdl):  # sets appropriate model weights from pretrained
    layers_to_set = [i for i in range(11)]
    
    for i in layers_to_set:
        print(mdl.layers[i])
        pretrained_layer_weights = pretrained_model.layers[i].get_weights()
        mdl.layers[i].set_weights(pretrained_layer_weights)
    
    return mdl

In [None]:
# 0 for init random weights & train
# 1 for load pre-trained weights and train
# 2 for load pre-trained weights and freeze

layer_treatment = {"conv1":2, 
                   "conv2":2, 
                   "conv3":2}

model = get_model(layer_treatment)
model = set_weights(layer_treatment, model)

<keras.layers.convolutional.Conv1D object at 0x7fe4ad564d60>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fe4ad564460>
<keras.layers.core.Dropout object at 0x7fe4ab8078b0>
<keras.layers.convolutional.Conv1D object at 0x7fe4ac9d2310>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fe4ac9d2d00>
<keras.layers.core.Dropout object at 0x7fe4ad258f40>
<keras.layers.convolutional.Conv1D object at 0x7fe4ac9e0040>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fe4ad4d2580>
<keras.layers.core.Dropout object at 0x7fe4ad4ac1f0>
<keras.layers.core.Flatten object at 0x7fe4ad4ac700>
<keras.layers.core.Dense object at 0x7fe4ad4d91c0>


In [None]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
1DConv_1 (Conv1D)            (None, 141, 120)          2520      
_________________________________________________________________
batchNorm1 (BatchNormalizati (None, 141, 120)          480       
_________________________________________________________________
drop1 (Dropout)              (None, 141, 120)          0         
_________________________________________________________________
1DConv_2 (Conv1D)            (None, 137, 120)          72120     
_________________________________________________________________
batchNorm2 (BatchNormalizati (None, 137, 120)          480       
_________________________________________________________________
drop2 (Dropout)              (None, 137, 120)          0         
_________________________________________________________________
1DConv_3 (Conv1D)            (None, 133, 120)         

In [None]:
rm -rf ./logs/

In [None]:
model.compile(optimizer='adam', 
              loss='mean_squared_error') 

In [None]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)  # https://stackoverflow.com/questions/59894720/keras-and-tensorboard-attributeerror-sequential-object-has-no-attribute-g

es_callback = EarlyStopping(monitor='val_loss', verbose=1, patience=5)
mc_callback = ModelCheckpoint('new_models/best_model.h5', monitor='val_loss', save_best_only=True)

In [None]:
history = model.fit(X_train, y_train, 
                    epochs=20, 
                    validation_data=(X_val, y_val),
                    callbacks=[tensorboard_callback, es_callback, mc_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 00012: early stopping


In [None]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 51619), started 2:20:35 ago. (Use '!kill 51619' to kill it.)

In [None]:
saved_model = load_model('new_models/best_model.h5')

In [None]:
# R^2
print("Train", r2_score(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", r2_score(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", r2_score(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train 0.25476824069687276
Val 0.3176615947832593
Test 0.013580135246752945


In [None]:
# Spearman
print("Train", spearmanr(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", spearmanr(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", spearmanr(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train SpearmanrResult(correlation=0.5109761546441903, pvalue=0.0)
Val SpearmanrResult(correlation=0.5954177830416977, pvalue=6.685087734875048e-284)
Test SpearmanrResult(correlation=0.318564088843591, pvalue=6.823668259518305e-71)
