name: new_dataset_tests_default_params.ipynb

This notebook is very similar to new_dataset_tests.ipynb, but runs slightly different experiments.

Please note that this notebook is not set up with the current "experiments" folder as the destination for results. I would not recommend running this, please use `MLME-v2.0-conv_model.py` instead. 

---

https://github.com/tensorflow/tensorflow/issues/38012 for keras no module error

In [None]:
# mount google drive

from google.colab import drive
drive.mount('/content/drive')

%cd '/content/drive/Shareddrives/NRC_Amii_Agronomics_Project/nrc-ml-plant-genomics/'

In [None]:
import keras
import warnings, logging
import json
import numpy as np
import pandas as pd
import datetime, os

from models.base_model import BaseModel
from keras.models import Sequential, load_model, model_from_json
from keras.layers import Input, Dense, Conv1D, MaxPooling2D, Dropout, Flatten, BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping  # https://machinelearningmastery.com/how-to-stop-training-deep-neural-networks-at-the-right-time-using-early-stopping/
import tensorflow as tf

from sklearn.metrics import r2_score
from scipy.stats import spearmanr # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.spearmanr.html

warnings.filterwarnings('ignore')
logging.disable(1000)

%load_ext tensorboard

In [None]:
ls

LICENSE                                 linear_mapping.ipynb
README.md                               [34mlogs[m[m/
__init__.py                             main.py
commands_for_setup.txt                  [34mmodels[m[m/
create_final_dataset.ipynb              [34mnew_data[m[m/
cross_val_linear_mapping.ipynb          new_dataset_tests.ipynb
[34mdata[m[m/                                   new_dataset_tests_default_params.ipynb
data_exploration.ipynb                  new_dataset_tests_duplicate.ipynb
[34mdata_loader[m[m/                            new_dataset_tests_more_epochs.ipynb
[34mevaluator[m[m/                              [34mnew_models[m[m/
[34mexample[m[m/                                [34mnew_scripts[m[m/
generate_data_format.ipynb              predict_on_NRC_data.ipynb
keras_model_loading.ipynb               requirements.txt
keras_retraining.ipynb                  requirements_exact.txt
keras_training.ipynb                    [34mtrain

In [None]:
# params

learning_rate_ = 0.002
batch_size_ = 512
num_epochs_ = 100
patience_ = 20

In [None]:
# load in model as it is from kipoi

with open('models/model.json', 'r') as json_file:
    json_savedModel = json_file.read()
    
pretrained_model = model_from_json(json_savedModel)
pretrained_model.load_weights('models/pretrained.hdf5')

In [None]:
pretrained_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 141, 120)          2520      
_________________________________________________________________
batch_normalization_1 (Batch (None, 141, 120)          480       
_________________________________________________________________
dropout_1 (Dropout)          (None, 141, 120)          0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 137, 120)          72120     
_________________________________________________________________
batch_normalization_2 (Batch (None, 137, 120)          480       
_________________________________________________________________
dropout_2 (Dropout)          (None, 137, 120)          0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 133, 120)         

In [None]:
mapping = {"A":[1, 0, 0, 0], "T":[0, 0, 0, 1], "C":[0, 1, 0, 0], "G":[0, 0, 1, 0]}  # cross referenced with kipoi data loader

def get_ohe(sequence):  # gets sequence in format model can use
    return np.array([mapping[nt] for nt in sequence])

def get_model(treatment_dict):  # initializes model architecture
    mdl = Sequential()

    conv1_train = treatment_dict["conv1"] != 2  # bool
    mdl.add(Conv1D(120, 5, activation='relu', input_shape=(145, 4), name="1DConv_1", trainable=conv1_train))
    mdl.add(BatchNormalization(name="batchNorm1", trainable=conv1_train))
    mdl.add(Dropout(0.1, name="drop1"))

    conv2_train = treatment_dict["conv2"] != 2  # bool 
    mdl.add(Conv1D(120, 5, activation='relu', name="1DConv_2", trainable=conv2_train))
    mdl.add(BatchNormalization(name="batchNorm2", trainable=conv2_train))
    mdl.add(Dropout(0.1, name="drop2"))

    conv3_train = treatment_dict["conv3"] != 2  # bool 
    mdl.add(Conv1D(120, 5, activation='relu', name="1DConv_3", trainable=conv3_train))
    mdl.add(BatchNormalization(name="batchNorm3", trainable=conv3_train))
    mdl.add(Dropout(0.1, name="drop3"))

    mdl.add(Flatten(name="flat"))    
    mdl.add(Dense(1, activation='linear', name="dense2"))
    
    return mdl

def set_weights(treatment_dict, mdl):  # sets appropriate model weights from pretrained
    layers_to_set = []
    if treatment_dict["conv1"] != 0: layers_to_set += [0, 1, 2]
    if treatment_dict["conv2"] != 0: layers_to_set += [3, 4, 5]
    if treatment_dict["conv3"] != 0: layers_to_set += [6, 7, 8]
    
    for i in layers_to_set:
        print(mdl.layers[i])
        pretrained_layer_weights = pretrained_model.layers[i].get_weights()
        mdl.layers[i].set_weights(pretrained_layer_weights)
    
    return mdl

# NOT USED
def return_spearman(true, pred):
    return tf.convert_to_tensor(spearmanr(true, pred)[0])

# NOT USED
def correlation(x, y):  # https://www.kaggle.com/carlolepelaars/understanding-the-metric-spearman-s-rho
    mx = tf.math.reduce_mean(x)
    my = tf.math.reduce_mean(y)
    xm, ym = x-mx, y-my
    r_num = tf.math.reduce_mean(tf.multiply(xm,ym))        
    r_den = tf.math.reduce_std(xm) * tf.math.reduce_std(ym)
    return r_num / r_den

In [None]:
# loading in data
df = pd.read_csv("data/processed/hidra_chloroplast.csv")

In [None]:
df.head()

Unnamed: 0,organelle,start_coords,end_coords,sequence,control_raw_coverage,treatment_raw_coverage,control_norm_coverage,treatment_norm_coverage,target,A,T,C,G,set
0,NC_016734.1,0,145,AATCATAATAACTTGGTCCCGGGCATCACGGGCGAACGACGGGAAT...,998,316,338.31,405.52,0.26,39,40,40,26,test
1,NC_016734.1,5,150,TAATAACTTGGTCCCGGGCATCACGGGCGAACGACGGGAATTGAAC...,998,318,338.31,408.08,0.27,40,40,39,26,test
2,NC_016734.1,10,155,ACTTGGTCCCGGGCATCACGGGCGAACGACGGGAATTGAACCCGCG...,998,318,338.31,408.08,0.27,38,38,42,27,test
3,NC_016734.1,15,160,GTCCCGGGCATCACGGGCGAACGACGGGAATTGAACCCGCGATGGT...,998,318,338.31,408.08,0.27,40,36,42,27,test
4,NC_016734.1,20,165,GGGCATCACGGGCGAACGACGGGAATTGAACCCGCGATGGTGAATT...,998,318,338.31,408.08,0.27,41,39,39,26,test


In [None]:
# split into different datasets

train_df = df[df.set=="train"]
X_train = np.array([get_ohe(sqnc) for sqnc in train_df["sequence"]])
y_train = np.array(train_df["target"].tolist())

val_df = df[df.set=="val"]
X_val = np.array([get_ohe(sqnc) for sqnc in val_df["sequence"]])
y_val = np.array(val_df["target"].tolist())

test_df = df[df.set=="test"]
X_test = np.array([get_ohe(sqnc) for sqnc in test_df["sequence"]])
y_test = np.array(test_df["target"].tolist())

In [None]:
print(X_train.shape, X_val.shape, X_test.shape)
print(y_train.shape, y_val.shape, y_test.shape)

(23804, 145, 4) (2964, 145, 4) (2964, 145, 4)
(23804,) (2964,) (2964,)


### Re-train from scratch

In [None]:
# 0 for init random weights & train
# 1 for load pre-trained weights and train
# 2 for load pre-trained weights and freeze

layer_treatment = {"conv1":0, 
                   "conv2":0, 
                   "conv3":0}

model = get_model(layer_treatment)
model = set_weights(layer_treatment, model)

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
1DConv_1 (Conv1D)            (None, 141, 120)          2520      
_________________________________________________________________
batchNorm1 (BatchNormalizati (None, 141, 120)          480       
_________________________________________________________________
drop1 (Dropout)              (None, 141, 120)          0         
_________________________________________________________________
1DConv_2 (Conv1D)            (None, 137, 120)          72120     
_________________________________________________________________
batchNorm2 (BatchNormalizati (None, 137, 120)          480       
_________________________________________________________________
drop2 (Dropout)              (None, 137, 120)          0         
_________________________________________________________________
1DConv_3 (Conv1D)            (None, 133, 120)          7

In [None]:
rm -rf ./logs/

In [None]:
model.compile(optimizer=keras.optimizers.Adam(lr=learning_rate_),  # https://stackoverflow.com/questions/59737875/keras-change-learning-rate
              loss='mean_squared_error') 

In [None]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)  # https://stackoverflow.com/questions/59894720/keras-and-tensorboard-attributeerror-sequential-object-has-no-attribute-g

es_callback = EarlyStopping(monitor='val_loss', verbose=1, patience=patience_)
mc_callback = ModelCheckpoint('new_models/best_model.h5', monitor='val_loss', save_best_only=True)

In [None]:
history = model.fit(X_train, y_train, 
                    epochs=num_epochs_, 
                    batch_size=batch_size_,
                    validation_data=(X_val, y_val),
                    callbacks=[tensorboard_callback, es_callback, mc_callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
%tensorboard --logdir logs

In [None]:
saved_model = load_model('new_models/best_model.h5')

In [None]:
# R^2
print("Train", r2_score(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", r2_score(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", r2_score(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train 0.9875753653737224
Val 0.2688763874965914
Test 0.0925479516504516


In [None]:
# Spearman
print("Train", spearmanr(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", spearmanr(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", spearmanr(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train SpearmanrResult(correlation=0.9975163568931519, pvalue=0.0)
Val SpearmanrResult(correlation=0.5455477573814874, pvalue=1.5852960595821056e-229)
Test SpearmanrResult(correlation=0.30404121759112374, pvalue=1.969640894672249e-64)


### Re-train from starting point

In [None]:
# 0 for init random weights & train
# 1 for load pre-trained weights and train
# 2 for load pre-trained weights and freeze

layer_treatment = {"conv1":1, 
                   "conv2":1, 
                   "conv3":1}

model = get_model(layer_treatment)
model = set_weights(layer_treatment, model)

<keras.layers.convolutional.Conv1D object at 0x7fc8e2a73130>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fc8e55fa670>
<keras.layers.core.Dropout object at 0x7fc8e539cbe0>
<keras.layers.convolutional.Conv1D object at 0x7fc8e5423bb0>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fc8e5423970>
<keras.layers.core.Dropout object at 0x7fc8e5344220>
<keras.layers.convolutional.Conv1D object at 0x7fc8e5344e20>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fc8e5423c40>
<keras.layers.core.Dropout object at 0x7fc8e5412b80>


In [None]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
1DConv_1 (Conv1D)            (None, 141, 120)          2520      
_________________________________________________________________
batchNorm1 (BatchNormalizati (None, 141, 120)          480       
_________________________________________________________________
drop1 (Dropout)              (None, 141, 120)          0         
_________________________________________________________________
1DConv_2 (Conv1D)            (None, 137, 120)          72120     
_________________________________________________________________
batchNorm2 (BatchNormalizati (None, 137, 120)          480       
_________________________________________________________________
drop2 (Dropout)              (None, 137, 120)          0         
_________________________________________________________________
1DConv_3 (Conv1D)            (None, 133, 120)         

In [None]:
rm -rf ./logs/

In [None]:
model.compile(optimizer=keras.optimizers.Adam(lr=learning_rate_),  # https://stackoverflow.com/questions/59737875/keras-change-learning-rate
              loss='mean_squared_error') 

In [None]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)  # https://stackoverflow.com/questions/59894720/keras-and-tensorboard-attributeerror-sequential-object-has-no-attribute-g

es_callback = EarlyStopping(monitor='val_loss', verbose=1, patience=patience_)
mc_callback = ModelCheckpoint('new_models/best_model.h5', monitor='val_loss', save_best_only=True)

In [None]:
history = model.fit(X_train, y_train, 
                    epochs=num_epochs_, 
                    batch_size=batch_size_,
                    validation_data=(X_val, y_val),
                    callbacks=[tensorboard_callback, es_callback, mc_callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 00037: early stopping


In [None]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 84889), started 0:18:21 ago. (Use '!kill 84889' to kill it.)

In [None]:
saved_model = load_model('new_models/best_model.h5')

In [None]:
# R^2
print("Train", r2_score(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", r2_score(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", r2_score(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train 0.9849067843119053
Val 0.29334543590715645
Test 0.13985236707904303


In [None]:
# Spearman
print("Train", spearmanr(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", spearmanr(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", spearmanr(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train SpearmanrResult(correlation=0.9964140966742986, pvalue=0.0)
Val SpearmanrResult(correlation=0.5593057890658346, pvalue=1.3085532306592987e-243)
Test SpearmanrResult(correlation=0.3803089308672248, pvalue=1.2633850016353279e-102)


### Train only output layer

In [None]:
# 0 for init random weights & train
# 1 for load pre-trained weights and train
# 2 for load pre-trained weights and freeze

layer_treatment = {"conv1":2, 
                   "conv2":2, 
                   "conv3":2}

model = get_model(layer_treatment)
model = set_weights(layer_treatment, model)

<keras.layers.convolutional.Conv1D object at 0x7fc8e5b0eee0>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fc8e591aca0>
<keras.layers.core.Dropout object at 0x7fc8e5b0b9a0>
<keras.layers.convolutional.Conv1D object at 0x7fc8e5b0e6a0>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fc8e5b0b7c0>
<keras.layers.core.Dropout object at 0x7fc8e5ae4df0>
<keras.layers.convolutional.Conv1D object at 0x7fc8e5ae4610>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fc8e5ad7c70>
<keras.layers.core.Dropout object at 0x7fc8e5b42f70>


In [None]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
1DConv_1 (Conv1D)            (None, 141, 120)          2520      
_________________________________________________________________
batchNorm1 (BatchNormalizati (None, 141, 120)          480       
_________________________________________________________________
drop1 (Dropout)              (None, 141, 120)          0         
_________________________________________________________________
1DConv_2 (Conv1D)            (None, 137, 120)          72120     
_________________________________________________________________
batchNorm2 (BatchNormalizati (None, 137, 120)          480       
_________________________________________________________________
drop2 (Dropout)              (None, 137, 120)          0         
_________________________________________________________________
1DConv_3 (Conv1D)            (None, 133, 120)         

In [None]:
rm -rf ./logs/

In [None]:
model.compile(optimizer=keras.optimizers.Adam(lr=learning_rate_),  # https://stackoverflow.com/questions/59737875/keras-change-learning-rate
              loss='mean_squared_error') 

In [None]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)  # https://stackoverflow.com/questions/59894720/keras-and-tensorboard-attributeerror-sequential-object-has-no-attribute-g

es_callback = EarlyStopping(monitor='val_loss', verbose=1, patience=patience_)
mc_callback = ModelCheckpoint('new_models/best_model.h5', monitor='val_loss', save_best_only=True)

In [None]:
history = model.fit(X_train, y_train, 
                    epochs=num_epochs_, 
                    batch_size=batch_size_,
                    validation_data=(X_val, y_val),
                    callbacks=[tensorboard_callback, es_callback, mc_callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 00025: early stopping


In [None]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 84889), started 0:22:04 ago. (Use '!kill 84889' to kill it.)

In [None]:
saved_model = load_model('new_models/best_model.h5')

In [None]:
# R^2
print("Train", r2_score(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", r2_score(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", r2_score(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train 0.3925439708050127
Val 0.3548004035887573
Test 0.1151007308580434


In [None]:
# Spearman
print("Train", spearmanr(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", spearmanr(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", spearmanr(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train SpearmanrResult(correlation=0.632534355209208, pvalue=0.0)
Val SpearmanrResult(correlation=0.6271331384065945, pvalue=1e-323)
Test SpearmanrResult(correlation=0.3709522531823497, pvalue=2.3789080298989024e-97)


### Freeze Conv 1, 2, Train Conv 3 from Scratch

In [None]:
# 0 for init random weights & train
# 1 for load pre-trained weights and train
# 2 for load pre-trained weights and freeze

layer_treatment = {"conv1":2, 
                   "conv2":2, 
                   "conv3":0}

model = get_model(layer_treatment)
model = set_weights(layer_treatment, model)

<keras.layers.convolutional.Conv1D object at 0x7fc8b40aa160>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fc8e58db340>
<keras.layers.core.Dropout object at 0x7fc8b5ff3340>
<keras.layers.convolutional.Conv1D object at 0x7fc8e5132f10>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fc8b408aa90>
<keras.layers.core.Dropout object at 0x7fc8b6002b80>


In [None]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
1DConv_1 (Conv1D)            (None, 141, 120)          2520      
_________________________________________________________________
batchNorm1 (BatchNormalizati (None, 141, 120)          480       
_________________________________________________________________
drop1 (Dropout)              (None, 141, 120)          0         
_________________________________________________________________
1DConv_2 (Conv1D)            (None, 137, 120)          72120     
_________________________________________________________________
batchNorm2 (BatchNormalizati (None, 137, 120)          480       
_________________________________________________________________
drop2 (Dropout)              (None, 137, 120)          0         
_________________________________________________________________
1DConv_3 (Conv1D)            (None, 133, 120)         

In [None]:
rm -rf ./logs/

In [None]:
model.compile(optimizer=keras.optimizers.Adam(lr=learning_rate_),  # https://stackoverflow.com/questions/59737875/keras-change-learning-rate
              loss='mean_squared_error') 

In [None]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)  # https://stackoverflow.com/questions/59894720/keras-and-tensorboard-attributeerror-sequential-object-has-no-attribute-g

es_callback = EarlyStopping(monitor='val_loss', verbose=1, patience=patience_)
mc_callback = ModelCheckpoint('new_models/best_model.h5', monitor='val_loss', save_best_only=True)

In [None]:
history = model.fit(X_train, y_train, 
                    epochs=num_epochs_, 
                    batch_size=batch_size_,
                    validation_data=(X_val, y_val),
                    callbacks=[tensorboard_callback, es_callback, mc_callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 84889), started 0:48:16 ago. (Use '!kill 84889' to kill it.)

In [None]:
saved_model = load_model('new_models/best_model.h5')

In [None]:
# R^2
print("Train", r2_score(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", r2_score(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", r2_score(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train 0.9843885251101309
Val 0.25964817665242124
Test 0.06539060154501353


In [None]:
# Spearman
print("Train", spearmanr(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", spearmanr(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", spearmanr(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train SpearmanrResult(correlation=0.9964356703417023, pvalue=0.0)
Val SpearmanrResult(correlation=0.5167929897960384, pvalue=4.032463511650697e-202)
Test SpearmanrResult(correlation=0.26628072723330126, pvalue=2.7265864197887642e-49)


### Freeze Conv 1, 2, Train Conv 3 from Init

In [None]:
# 0 for init random weights & train
# 1 for load pre-trained weights and train
# 2 for load pre-trained weights and freeze

layer_treatment = {"conv1":2, 
                   "conv2":2, 
                   "conv3":1}

model = get_model(layer_treatment)
model = set_weights(layer_treatment, model)

<keras.layers.convolutional.Conv1D object at 0x7fc8b6d7a070>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fc8b6e14820>
<keras.layers.core.Dropout object at 0x7fc8b5c9ce80>
<keras.layers.convolutional.Conv1D object at 0x7fc8b6441e50>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fc8b6d83e20>
<keras.layers.core.Dropout object at 0x7fc8b783fd90>
<keras.layers.convolutional.Conv1D object at 0x7fc8b783fd60>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fc8b783fe80>
<keras.layers.core.Dropout object at 0x7fc8b764d6d0>


In [None]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
1DConv_1 (Conv1D)            (None, 141, 120)          2520      
_________________________________________________________________
batchNorm1 (BatchNormalizati (None, 141, 120)          480       
_________________________________________________________________
drop1 (Dropout)              (None, 141, 120)          0         
_________________________________________________________________
1DConv_2 (Conv1D)            (None, 137, 120)          72120     
_________________________________________________________________
batchNorm2 (BatchNormalizati (None, 137, 120)          480       
_________________________________________________________________
drop2 (Dropout)              (None, 137, 120)          0         
_________________________________________________________________
1DConv_3 (Conv1D)            (None, 133, 120)         

In [None]:
rm -rf ./logs/

In [None]:
model.compile(optimizer=keras.optimizers.Adam(lr=learning_rate_),  # https://stackoverflow.com/questions/59737875/keras-change-learning-rate
              loss='mean_squared_error') 

In [None]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)  # https://stackoverflow.com/questions/59894720/keras-and-tensorboard-attributeerror-sequential-object-has-no-attribute-g

es_callback = EarlyStopping(monitor='val_loss', verbose=1, patience=patience_)
mc_callback = ModelCheckpoint('new_models/best_model.h5', monitor='val_loss', save_best_only=True)

In [None]:
history = model.fit(X_train, y_train, 
                    epochs=num_epochs_, 
                    batch_size=batch_size_,
                    validation_data=(X_val, y_val),
                    callbacks=[tensorboard_callback, es_callback, mc_callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 00051: early stopping


In [None]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 84889), started 1:02:01 ago. (Use '!kill 84889' to kill it.)

In [None]:
saved_model = load_model('new_models/best_model.h5')

In [None]:
# R^2
print("Train", r2_score(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", r2_score(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", r2_score(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train 0.9790875958721533
Val 0.27465737787693567
Test 0.15724966579604183


In [None]:
# Spearman
print("Train", spearmanr(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", spearmanr(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", spearmanr(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train SpearmanrResult(correlation=0.9957482582995867, pvalue=0.0)
Val SpearmanrResult(correlation=0.5213717666673563, pvalue=2.609940248691938e-206)
Test SpearmanrResult(correlation=0.37349594525195046, pvalue=9.11073406747282e-99)


---
### Spearman Results

0 for init random weights & train  
1 for load pre-trained weights and train  
2 for load pre-trained weights and freeze  

First go:
000: val = 0.47753801170363896, test = 0.2866754020651326  
111: val = 0.47855750422978954, test = 0.279400514316224  
222: val = 0.5858565342434344, test = 0.3741217612323338  
220: val = 0.41782505305420903, test = 0.23175390117095385  
221: val = 0.4622724735247824, test = 0.26885707718958146  
2220: val = 0.5984249204802108, test = 0.32715495828441904

Second go:
000: val = 0.44123205768583007, test = 0.17504952705223212  
111: val = 0.4053507361644881, test = 0.2592776174804178  
222: val = 0.5290568130486669, test = 0.3636127380447914  
220: val = 0.392220547475502, test = 0.24043305401824872 
221: val = 0.40291026906804733, test = 0.22825701637010898
2220: val = 0.5988624237048076, test = 0.3301200533685432

### Train only output layer

In [None]:
def get_model(treatment_dict):  # initializes model architecture
    mdl = Sequential()

    conv1_train = treatment_dict["conv1"] != 2  # bool
    mdl.add(Conv1D(120, 5, activation='relu', input_shape=(145, 4), name="1DConv_1", trainable=conv1_train))
    mdl.add(BatchNormalization(name="batchNorm1", trainable=conv1_train))
    mdl.add(Dropout(0.1, name="drop1"))

    conv2_train = treatment_dict["conv2"] != 2  # bool 
    mdl.add(Conv1D(120, 5, activation='relu', name="1DConv_2", trainable=conv2_train))
    mdl.add(BatchNormalization(name="batchNorm2", trainable=conv2_train))
    mdl.add(Dropout(0.1, name="drop2"))

    conv3_train = treatment_dict["conv3"] != 2  # bool 
    mdl.add(Conv1D(120, 5, activation='relu', name="1DConv_3", trainable=conv3_train))
    mdl.add(BatchNormalization(name="batchNorm3", trainable=conv3_train))
    mdl.add(Dropout(0.1, name="drop3"))

    mdl.add(Flatten(name="flat"))    
    mdl.add(Dense(12, activation='linear', name="dense2", trainable=False))
    mdl.add(Dense(1, activation='linear', name="dense3"))
    
    return mdl

def set_weights(treatment_dict, mdl):  # sets appropriate model weights from pretrained
    layers_to_set = [i for i in range(11)]
    
    for i in layers_to_set:
        print(mdl.layers[i])
        pretrained_layer_weights = pretrained_model.layers[i].get_weights()
        mdl.layers[i].set_weights(pretrained_layer_weights)
    
    return mdl

In [None]:
# 0 for init random weights & train
# 1 for load pre-trained weights and train
# 2 for load pre-trained weights and freeze

layer_treatment = {"conv1":2, 
                   "conv2":2, 
                   "conv3":2}

model = get_model(layer_treatment)
model = set_weights(layer_treatment, model)

<keras.layers.convolutional.Conv1D object at 0x7fc8b8859700>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fc8b8c27e20>
<keras.layers.core.Dropout object at 0x7fc8b86a9bb0>
<keras.layers.convolutional.Conv1D object at 0x7fc8b8c27160>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fc8b8c27d90>
<keras.layers.core.Dropout object at 0x7fc8b8d590d0>
<keras.layers.convolutional.Conv1D object at 0x7fc8b8d59df0>
<keras.layers.normalization_v2.BatchNormalization object at 0x7fc8b8c27f10>
<keras.layers.core.Dropout object at 0x7fc8b8d59070>
<keras.layers.core.Flatten object at 0x7fc8b8d698e0>
<keras.layers.core.Dense object at 0x7fc8b8d60c10>


In [None]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
1DConv_1 (Conv1D)            (None, 141, 120)          2520      
_________________________________________________________________
batchNorm1 (BatchNormalizati (None, 141, 120)          480       
_________________________________________________________________
drop1 (Dropout)              (None, 141, 120)          0         
_________________________________________________________________
1DConv_2 (Conv1D)            (None, 137, 120)          72120     
_________________________________________________________________
batchNorm2 (BatchNormalizati (None, 137, 120)          480       
_________________________________________________________________
drop2 (Dropout)              (None, 137, 120)          0         
_________________________________________________________________
1DConv_3 (Conv1D)            (None, 133, 120)         

In [None]:
rm -rf ./logs/

In [None]:
model.compile(optimizer=keras.optimizers.Adam(lr=learning_rate_),  # https://stackoverflow.com/questions/59737875/keras-change-learning-rate
              loss='mean_squared_error') 

In [None]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)  # https://stackoverflow.com/questions/59894720/keras-and-tensorboard-attributeerror-sequential-object-has-no-attribute-g

es_callback = EarlyStopping(monitor='val_loss', verbose=1, patience=patience_)
mc_callback = ModelCheckpoint('new_models/best_model.h5', monitor='val_loss', save_best_only=True)

In [None]:
history = model.fit(X_train, y_train, 
                    epochs=num_epochs_, 
                    batch_size=batch_size_,
                    validation_data=(X_val, y_val),
                    callbacks=[tensorboard_callback, es_callback, mc_callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 84889), started 1:14:11 ago. (Use '!kill 84889' to kill it.)

In [None]:
saved_model = load_model('new_models/best_model.h5')

In [None]:
# R^2
print("Train", r2_score(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", r2_score(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", r2_score(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train 0.25886857797184726
Val 0.31608893826491136
Test 0.0437756739890256


In [None]:
# Spearman
print("Train", spearmanr(y_train, saved_model.predict(X_train).reshape(1, -1)[0]))
print("Val", spearmanr(y_val, saved_model.predict(X_val).reshape(1, -1)[0]))
print("Test", spearmanr(y_test, saved_model.predict(X_test).reshape(1, -1)[0]))

Train SpearmanrResult(correlation=0.5124960925179921, pvalue=0.0)
Val SpearmanrResult(correlation=0.5998885838414266, pvalue=2.983051998914243e-289)
Test SpearmanrResult(correlation=0.32616550557136814, pvalue=2.0231700651400805e-74)
