In [None]:
import os
import sys
from pandas import DataFrame
from datetime import datetime
import pandas as pd
import numpy as np
import statistics
import random
from tqdm.auto import tqdm

import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns

import keras
from keras.models import Sequential, Input, Model, load_model
from keras.layers import Dense, Dropout, Flatten, Activation, Conv1D, MaxPooling1D, AveragePooling1D, GlobalAveragePooling1D
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import LeakyReLU
import keras.backend.tensorflow_backend as K
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
from keras.backend.tensorflow_backend import set_session
from keras.utils.vis_utils import plot_model

from scipy.stats import pearsonr
from sklearn.metrics import r2_score, mean_squared_error, roc_curve, confusion_matrix, roc_auc_score

In [None]:
# setting work directory

workdir = "workdirectory"  # set your work directory
datadir = workdir + "Data/"
rawdir = datadir + "raw_data/"
resultdir = datadir + "input_result/"
predictiondir = datadir + "prediction_result/"
modeldir = workdir + "model/"


today = datetime.today().strftime("%Y%m%d")
today

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

In [None]:
# load whole dataset from npz
train_data = np.load(resultdir + 'your train data name')
val_data = np.load(resultdir + 'your validation data name')
test_data = np.load(resultdir + 'your test data name')

# check your data
train_data.files, val_data.files, test_data.files

In [None]:
x_train, y_train = train_data['x'], train_data['y']
x_val, y_val = val_data['x'], val_data['y']
x_test, y_test = test_data['x'], test_data['y']

# check your data
x_train.shape, y_train.shape, x_test.shape, y_test.shape, x_val.shape, y_val.shape

In [None]:
# Reshape data shape

x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], 1)
x_val = x_val.reshape(x_val.shape[0], x_val.shape[1], 1)

In [None]:
random.seed(2021)
LR = 0.000001
training_epochs = 200
batch_size = 100

In [None]:
# setting rmse

def rmse (y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred -y_true)))

In [None]:
with K.tf.device('/GPU:0'):
    inputs = Input(shape=(11308,1),name='inputs')
    x = Conv1D(96, kernel_size=11, strides=4, padding="same")(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv1D(96, kernel_size=5, strides=2, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)  
    x = MaxPooling1D(pool_size=3, strides=2, padding="same")(x)

    x = Conv1D(96, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv1D(96, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)   
    
    x = Conv1D(256, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv1D(256, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv1D(256, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv1D(256, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv1D(256, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv1D(256, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = Conv1D(384, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv1D(384, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv1D(384, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv1D(384, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv1D(384, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv1D(384, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = Conv1D(256, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv1D(256, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv1D(256, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv1D(256, kernel_size=3, strides=1, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = AveragePooling1D(pool_size=7, strides=2, padding="same")(x)
    
    x = Flatten()(x)

    ########################## FC 3 ##########################################
    x = Dense(units=4096) (x)
    x = Activation('relu')(x)
    x = Dense(units=2048) (x)
    x = Activation('relu')(x)
    x = Dense(units=1024) (x)
    x = Activation('relu')(x)

########################### Predictions ######################################    
    predictions = Dense(1, activation='linear', name='predictions', kernel_initializer='he_normal')(x)

In [None]:

########################### Model compile #####################################
with K.tf.device('/GPU:0'):
    model = Model(inputs=inputs, outputs=predictions, name="DeepKinome" + today)
    print(model.summary())
    model.compile(optimizer = keras.optimizers.RMSprop(lr=LR), loss = rmse, metrics =["mse"])
########################### Model ######################################    
    

In [None]:
with K.tf.device('/GPU:0'):
    
    print("step : model training")
    StartTime = datetime.now()
    print("StartTime :", StartTime)
    
    history = model.fit(x_train, y_train, epochs=150, validation_data=(x_val, y_val), batch_size=32)
    
    EndTime = datetime.now()
    print("EndTime :", EndTime)
    print("end : model training")

In [None]:
# Check prediction result

In [None]:
### RMSE plot
plt.figure(figsize=(15,10))
plt.title("RMSE")
plt.plot(history.history['loss'],"r", label = "Tranning")
plt.plot(history.history['val_loss'], label = "Validation", color = 'dodgerblue')
plt.xlabel("Epochs")
plt.ylabel("RMSE")
plt.legend(loc=2)

In [None]:
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], 1)

pred_test = model.predict(x_test)

test_rmse = np.sqrt(mean_squared_error(y_test, pred_test))
test_r2 = r2_score(y_test, pred_test)
print("test RMSE : ", test_rmse)
print("test R2 : ", test_r2)

In [None]:
# sns.scatterplot(test_y,y_score)
plt.xlabel("Original",size = 20)
plt.ylabel("Predictions",size = 20)
plt.scatter(y_test, pred_test, s=5)
ident = [y_test.min(),y_test.max()]
plt.plot(ident,ident,'--', color='black',linewidth=2)

plt.show()