# Multi-task model

In [13]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd

import os 
import shutil

import copy

import tensorflow as tf
import tensorflow.keras as K

from sklearn.model_selection import train_test_split
import CCLE_utils
import MMRF_utils

## Load train and test sets

In [None]:
lenal_df = pd.read_csv('lenalidomide_crispr_knockout.csv')
bortez_df = pd.read_csv('bortezomib_crispr_knockout.csv')

In [None]:
map_dict = CCLE_utils.get_map_dict('Ensembl_HGNC_map_042421.csv')

In [None]:
X_lenal_cell_data = lenal_df.drop(columns = ['DepMap_ID','Knockout','Response'])
X_lenal_cell_data = CCLE_utils.hgnc_to_ensembl(X_lenal_cell_data, map_dict)

y_lenal_cell_data = lenal_df['Response']

In [None]:
X_bortez_cell_data = bortez_df.drop(columns = ['DepMap_ID','Knockout','Response'])
X_bortez_cell_data = CCLE_utils.hgnc_to_ensembl(X_bortez_cell_data, map_dict)

y_bortez_cell_data = bortez_df['Response']

In [None]:
X_lenal_cell_train, X_lenal_cell_test, y_lenal_cell_train, y_lenal_cell_test = train_test_split(X_lenal_cell_data,y_lenal_cell_data,random_state=42, train_size=0.7)

X_bortez_cell_train, X_bortez_cell_test, y_bortez_cell_train, y_bortez_cell_test = train_test_split(X_bortez_cell_data,y_bortez_cell_data,random_state=42, train_size=0.7)

In [None]:
print(X_lenal_cell_train.shape, X_lenal_cell_test.shape, y_lenal_cell_train.shape, y_lenal_cell_test.shape)
print(X_bortez_cell_train.shape, X_bortez_cell_test.shape, y_bortez_cell_train.shape, y_bortez_cell_test.shape)

In [None]:
from sklearn.decomposition import PCA

nPCA = 500
pca = PCA(n_components=nPCA)
X_lenal_cell_train_pca = pca.fit_transform(X_lenal_cell_train)
X_lenal_cell_test_pca = pca.transform(X_lenal_cell_test)

print(X_lenal_cell_train_pca.shape, X_lenal_cell_test_pca.shape)

In [None]:
nPCA = 500
pca = PCA(n_components=nPCA)
X_bortez_cell_train_pca = pca.fit_transform(X_bortez_cell_train)
X_bortez_cell_test_pca = pca.transform(X_bortez_cell_test)

print(X_bortez_cell_train_pca.shape, X_bortez_cell_test_pca.shape)

In [None]:
#place holder patient data: 50 samples with 5000 genes
X_pt_train = np.random.rand(50,5000)*80
y_pt_train = X_pt_train.mean(axis=1)+np.random.rand(50,)*40

In [None]:
X_pt_test = np.random.rand(10,5000)*80
y_pt_test = X_pt_test.mean(axis=1)+np.random.rand(10,)*40

# Train NN on cell line

In [17]:
# number of genes 
# N_in = X_cell_train.shape[1]

# N_in : number of genes input



def generate_model(N_in, N_out, shapes):
    inputs = K.layers.Input(shape = (N_in,))
    x = K.layers.Dropout(0.2, input_shape=(N_in,))(inputs)
    x = K.layers.Dense(shapes, activation='relu', kernel_regularizer=K.regularizers.l2(l=0.001))(x)
    x = K.layers.Dropout(0.5)(x)
    x = K.layers.Dense(128, activation='relu', kernel_regularizer=K.regularizers.l2(l=0.001))(x)
    x = K.layers.Dropout(0.5)(x)
    x = K.layers.Dense(32, activation='relu', kernel_regularizer=K.regularizers.l2(l=0.001))(x)
    y1 = K.layers.Dense(16, kernel_regularizer=K.regularizers.l2(l=0.001))(x)
    y2 = K.layers.Dense(16, kernel_regularizer=K.regularizers.l2(l=0.001))(x)
    y3 = K.layers.Dense(16, kernel_regularizer=K.regularizers.l2(l=0.001))(x)
    y4 = K.layers.Dense(16, kernel_regularizer=K.regularizers.l2(l=0.001))(x)
    y5 = K.layers.Dense(16, kernel_regularizer=K.regularizers.l2(l=0.001))(x)
    y6 = K.layers.Dense(16, kernel_regularizer=K.regularizers.l2(l=0.001))(x)
    y7 = K.layers.Dense(16, kernel_regularizer=K.regularizers.l2(l=0.001))(x)
    y8 = K.layers.Dense(16, kernel_regularizer=K.regularizers.l2(l=0.001))(x)
    y9 = K.layers.Dense(16, kernel_regularizer=K.regularizers.l2(l=0.001))(x)
    y10 = K.layers.Dense(16, kernel_regularizer=K.regularizers.l2(l=0.001))(x)
    y11 = K.layers.Dense(16, kernel_regularizer=K.regularizers.l2(l=0.001))(x)
    y12 = K.layers.Dense(16, kernel_regularizer=K.regularizers.l2(l=0.001))(x)
    y13 = K.layers.Dense(16, kernel_regularizer=K.regularizers.l2(l=0.001))(x)

    model = K.models.Model(inputs = inputs, outputs = [y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13])
    model.compile(optimizer='adam',
                  loss=generate_loss(N_out, 'mse'))
    
    return model

In [18]:
def generate_loss(N_out, loss):
    L = dict()
    for i in range(1, N_out+1):
        k = 'y'+str(i)
        L[k] = loss
    return L

In [19]:
# prototype model  
shapes = [1024, 128,32, 16]
model_ = generate_model(500, 13, shapes)
model_.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 500)]        0                                            
__________________________________________________________________________________________________
dropout_3 (Dropout)             (None, 500)          0           input_3[0][0]                    
__________________________________________________________________________________________________
dense_16 (Dense)                (None, 1024)         513024      dropout_3[0][0]                  
__________________________________________________________________________________________________
dropout_4 (Dropout)             (None, 1024)         0           dense_16[0][0]                   
______________________________________________________________________________________________

In [None]:
lenal_model = generate_model(X_lenal_cell_train.shape[1])
history = lenal_model.fit(X_lenal_cell_train, y_lenal_cell_train, epochs=100, validation_data=(X_lenal_cell_test, y_lenal_cell_test))

In [None]:
bortez_model = generate_model(X_bortez_cell_train.shape[1])
bortez_history = bortez_model.fit(X_bortez_cell_train, y_bortez_cell_train, epochs=100, validation_data=(X_bortez_cell_test, y_bortez_cell_test))

In [None]:
pca_model = generate_model(X_lenal_cell_train_pca.shape[1])
pca_history = pca_model.fit(X_lenal_cell_train_pca, y_lenal_cell_train, epochs=100, validation_data=(X_lenal_cell_test_pca, y_lenal_cell_test))

In [None]:
pca_model2 = generate_model(X_bortez_cell_train_pca.shape[1])
pca_history2 = pca_model2.fit(X_bortez_cell_train_pca, y_bortez_cell_train, epochs=100, validation_data=(X_bortez_cell_test_pca, y_bortez_cell_test))

# Eval performance on cell line test set

In [None]:
def plot_loss(perf_dict):
    fig = plt.figure(figsize=(15,10))
    ax1 = plt.subplot(2,2,1)
    ax2 = plt.subplot(2,2,2)
    loss = perf_dict['loss']
    val_loss = perf_dict['val_loss']
    ax1.plot(loss)
    ax2.plot(val_loss)
    ax1.set_xlabel('epochs')
    ax2.set_xlabel('epochs')

    ax1.set_ylabel('loss')
    ax2.set_ylabel('val_loss')
    plt.show()

In [None]:
plot_loss(history.history)

In [None]:
plot_loss(bortez_history.history)

In [None]:
plot_loss(bortez_history.history)

In [None]:
plot_loss(pca_history.history)
ax1.set_title('')

In [None]:
plot_loss(pca_history2.history)

In [None]:
# plt.scatter(y_cell_test,model.predict(X_cell_test))
# x = np.arange(-1.5,1.1,0.01)
fig, ax = plt.subplots(1,1)
# ax.plot(x, x, label='y=x', color='black')

ax.scatter(y_lenal_cell_train,lenal_model.predict(X_lenal_cell_train), label='Train', color='#4287f5')
ax.scatter(y_lenal_cell_test,lenal_model.predict(X_lenal_cell_test), label='Test', color='#f2797f')
# ax.set_xlim(-7,-2)
# ax.set_ylim(-7,-2)
ax.set_xlabel('True Response')
ax.set_ylabel('Predicted Response')
ax.legend()
ax.set_title('CRISPR Knockout Experiments - Lenalidomide')

In [None]:
# plt.scatter(y_cell_test,model.predict(X_cell_test))
x = np.arange(-1.5,1.1,0.01)
fig, ax = plt.subplots(1,1)
ax.plot(x, x, label='y=x', color='black')

ax.scatter(y_lenal_cell_train,pca_model.predict(X_lenal_cell_train_pca), label='Train', color='#4287f5')
ax.scatter(y_lenal_cell_test,pca_model.predict(X_lenal_cell_test_pca), label='Test', color='#f2797f')
# ax.set_xlim(-7,-2)
# ax.set_ylim(-7,-2)
ax.set_xlabel('True Response')
ax.set_ylabel('Predicted Response')
ax.legend()
ax.set_title('CRISPR Knockout Experiments - Lenalidomide with PCA')

In [None]:
# plt.scatter(y_cell_test,model.predict(X_cell_test))
x = np.arange(-3,0.6,0.01)
fig, ax = plt.subplots(1,1)
ax.plot(x, x, label='y=x', color='black')

ax.scatter(y_bortez_cell_train,pca_model2.predict(X_bortez_cell_train_pca), label='Train', color='#4287f5')
ax.scatter(y_bortez_cell_test,pca_model2.predict(X_bortez_cell_test_pca), label='Test', color='#f2797f')
# ax.set_xlim(-7,-2)
# ax.set_ylim(-7,-2)
ax.set_xlabel('True Response')
ax.set_ylabel('Predicted Response')
ax.legend()
ax.set_title('CRISPR Knockout Experiments - Bortezomib with PCA')

In [None]:
# plt.scatter(y_cell_test,model.predict(X_cell_test))
# x = np.arange(-1.5,1.1,0.01)
fig, ax = plt.subplots(1,1)
# ax.plot(x, x, label='y=x', color='black')

ax.scatter(y_bortez_cell_train,bortez_model.predict(X_bortez_cell_train), label='Train', color='#4287f5')
ax.scatter(y_bortez_cell_test,bortez_model.predict(X_bortez_cell_test), label='Test', color='#f2797f')
# ax.set_xlim(-7,-2)
# ax.set_ylim(-7,-2)
ax.set_xlabel('True Response')
ax.set_ylabel('Predicted Response')
ax.legend()
ax.set_title('CRISPR Knockout Experiments - Bortezomib')

In [None]:
# plt.scatter(y_cell_test,model.predict(X_cell_test))
# x = np.arange(-1.5,1.1,0.01)
fig, ax = plt.subplots(1,1)
# ax.plot(x, x, label='y=x', color='black')

ax.scatter(y_bortez_cell_train,bortez_model.predict(X_bortez_cell_train), label='Train', color='#4287f5')
ax.scatter(y_bortez_cell_test,bortez_model.predict(X_bortez_cell_test), label='Test', color='#f2797f')
# ax.set_xlim(-7,-2)
# ax.set_ylim(-7,-2)
ax.set_xlabel('True Response')
ax.set_ylabel('Predicted Response')
ax.legend()
ax.set_title('CRISPR Knockout Experiments - Bortezomib')

# Feature transfer patient data
Freeze network except the last layer, then train on patient data

In [None]:
model.summary()

In [None]:
model.trainable = False
model.layers[-1].trainable = True

layers = [(layer, layer.name, layer.trainable) for layer in model.layers]
pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])      

In [None]:
history_pt = model.fit(X_pt_train, y_pt_train, epochs=100, validation_data=(X_pt_test, y_pt_test))

In [None]:
plt.scatter(y_pt_test,model.predict(X_pt_test))

# Save model

In [None]:
model = K.models.load_model(model_dir)

In [None]:
model_dir = os.path.join('6.871models', 'simpleNN042721')
shutil.rmtree(model_dir)
model.save(model_dir)
