In [13]:
%load_ext autoreload
%autoreload 2

import os 
import sys

ROOT_PATH = os.path.dirname(os.getcwd())
sys.path.append(ROOT_PATH)
try:
    sys.path.remove('/projects/p30802/Karina/protease_stability/')
except:
    pass

import numpy as np
import matplotlib.pyplot as plt

from src_.evals.run_model import run_model, build_model, get_params
from src_.evals.data_processing import get_and_process_data
from src_.utils.general import multi_target_train_test_split, average_losses
from src_.models.wrapper import ProtNet
from tensorflow.keras import optimizers
from src_.config import Config

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Custom Config

In [14]:
DATA_PATH = os.path.join(ROOT_PATH, "data/210728_scrambles_for_unstructure_model.csv")
DATA2_PATH = os.path.join(ROOT_PATH, "data/210728_dmsv2_alldata.csv")

In [15]:
MODEL_TYPE = "convnet_1d"

### Manuall inspect the model

#### Prepare data

In [16]:
X, kT, kC = get_and_process_data(DATA_PATH)
X_folded, kT_folded, kC_folded = get_and_process_data(DATA2_PATH)

In [17]:
print(f"Unfolded data: X shape {X.shape}, target shapes {kT.shape, kC.shape}")
print(f"Folded data: X shape {X_folded.shape}, target shapes {kT_folded.shape, kC_folded.shape}")

Unfolded data: X shape (67148, 74), target shapes ((67148,), (67148,))
Folded data: X shape (651865, 74), target shapes ((651865,), (651865,))


In [18]:
# Train test split for unfolded
X_train, X_test, kT_train, kT_test, kC_train, kC_test = \
        multi_target_train_test_split(X, kT, kC, return_val=False)

print(f"Unfolded data: X train shape {X_train.shape}, target shapes {kT_train.shape, kC_train.shape}")

Unfolded data: X train shape (57075, 74), target shapes ((57075,), (57075,))


In [19]:
# Train test split for folded
np.random.seed(0)
indices = np.random.randint(low=0, high=X_folded.shape[0], size = (X_train.shape[0], ))

X_folded_train, X_folded_test, kT_folded_train, kT_folded_test, kC_folded_train, kC_folded_test = \
    list(map(lambda x: x[indices],
            [X_folded, X_folded, kT_folded, kT_folded, kC_folded, kC_folded]))

print(f"Folded data: X train shape {X_folded_train.shape}, target shapes {kT_folded_train.shape, kC_folded_train.shape}")

Folded data: X train shape (57075, 74), target shapes ((57075,), (57075,))


#### Define the model

In [20]:
params, epochs = get_params(MODEL_TYPE)

params["num_char"] = Config.get("n_char")
params["seq_length"] = Config.get("seq_length")

In [21]:
model = ProtNet(model_type = MODEL_TYPE, **params)

2021-09-12 09:56:17.939460: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-09-12 09:56:17.941158: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-09-12 09:56:17.943907: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [None]:
model.train(X_unfolded=X_train,
            targets_unfolded=[kT_train, kC_train],
            X_folded=X_folded_train,
            targets_folded=[kT_folded_train, kC_folded_train],
            epochs=epochs,
            validation=True)

Split dataset

Epoch 1/8:

	step=0,   loss=18.136,   unfolded mse=34.271,   stab score agreement mse=2.001
	val loss=19.515,   val unfolded mse=35.288,   val stab score agreement mse=3.742

	step=50,   loss=3.777,   unfolded mse=3.164,   stab score agreement mse=4.389
	val loss=3.131,   val unfolded mse=3.198,   val stab score agreement mse=3.064

	step=100,   loss=3.362,   unfolded mse=3.191,   stab score agreement mse=3.533
	val loss=2.915,   val unfolded mse=2.793,   val stab score agreement mse=3.037

	step=150,   loss=2.423,   unfolded mse=2.552,   stab score agreement mse=2.293
	val loss=2.902,   val unfolded mse=2.727,   val stab score agreement mse=3.077

	step=200,   loss=2.134,   unfolded mse=2.68,   stab score agreement mse=1.589
	val loss=2.802,   val unfolded mse=2.561,   val stab score agreement mse=3.043

	step=250,   loss=2.171,   unfolded mse=2.149,   stab score agreement mse=2.193
	val loss=2.916,   val unfolded mse=2.792,   val stab score agreement mse=3.04

	step=30


	step=150,   loss=1.571,   unfolded mse=1.503,   stab score agreement mse=1.64
	val loss=2.492,   val unfolded mse=1.928,   val stab score agreement mse=3.057

	step=200,   loss=2.142,   unfolded mse=1.632,   stab score agreement mse=2.653
	val loss=2.533,   val unfolded mse=1.98,   val stab score agreement mse=3.086

	step=250,   loss=3.079,   unfolded mse=1.919,   stab score agreement mse=4.239
	val loss=2.532,   val unfolded mse=2.027,   val stab score agreement mse=3.036

	step=300,   loss=1.357,   unfolded mse=1.661,   stab score agreement mse=1.053
	val loss=2.424,   val unfolded mse=1.818,   val stab score agreement mse=3.03

	step=350,   loss=4.229,   unfolded mse=1.722,   stab score agreement mse=6.737
	val loss=2.416,   val unfolded mse=1.795,   val stab score agreement mse=3.037

	step=400,   loss=1.942,   unfolded mse=1.314,   stab score agreement mse=2.569
	val loss=2.407,   val unfolded mse=1.781,   val stab score agreement mse=3.032

	step=450,   loss=2.543,   unfolded 

In [None]:
loss, unfolded_mse, agreement_mse = model.losses["loss"], model.losses["unfolded_mse"], model.losses["agreement_mse"]
avg_loss, avg_unfolded_mse, avg_agreement_mse = average_losses([loss, unfolded_mse, agreement_mse], epochs=2)

In [None]:
loss, unfolded_mse, agreement_mse = model.losses["loss"], model.losses["unfolded_mse"], model.losses["agreement_mse"]

plt.plot(unfolded_mse, label="Unfolded MSE", color ="orange")
plt.plot(agreement_mse, label="Stability score agreement", color="green")
plt.plot(loss, label="Total loss", color="b", alpha =0.5)

plt.title(MODEL_TYPE)

plt.legend()
plt.show()

In [None]:
plt.plot(avg_loss, label="Unfolded MSE", color ="orange")
plt.plot(avg_unfolded_mse, label="Stability score agreement", color="green")
plt.plot(avg_agreement_mse, label="Total loss", color="b", alpha =0.7)

plt.title(MODEL_TYPE)
plt.legend()
plt.show()