In [77]:
import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from torch.autograd import Variable
from torch.optim import lr_scheduler

from sklearn.metrics import r2_score
from sklearn import preprocessing
#import scipy.io as sio

from models import VAE,AEBase,Predictor
from models import DNN
import numpy as np
import pandas as pd
import models
import utils as ut
import copy

from scipy import stats


In [78]:
from scipy.stats import pearsonr

# Parameters

In [79]:
# Define parameters
epochs = 500 #200,500,1000  
#dim_au_in = 20049
dim_au_out = 512 #8, 16, 32, 64, 128, 256,512
dim_dnn_in = dim_au_out
dim_dnn_out=1
select_drug = 'Tamoxifen'
na = 1

# Import data

In [80]:
data_r=pd.read_csv('data/GDSC2_expression.csv',index_col=0)
label_r=pd.read_csv('data/GDSC2_label_9drugs.csv',index_col=0)

In [81]:
label_r=label_r.fillna(na)

In [82]:
hvg,adata = ut.highly_variable_genes(data_r)

In [83]:
selected_idx = label_r.loc[:,select_drug]!=na

In [84]:
data_r.columns = adata.var_names

In [85]:
hvg.sum()

5116

# Your is gene-cell, mine is cell-gene

In [86]:
#data = data_r.loc[selected_idx,:]
data = data_r.loc[selected_idx,hvg]

In [87]:
label = label_r.loc[selected_idx,select_drug]
#sscaler = preprocessing.StandardScaler(with_mean=True, with_std=True)
mmscaler = preprocessing.MinMaxScaler()
lbscaler = preprocessing.MinMaxScaler()

data = mmscaler.fit_transform(data)
label = lbscaler.fit_transform(label.values.reshape(-1,1))
#label = label.values.reshape(-1,1)

In [88]:
print(np.std(data))
print(np.mean(data))

0.25212143569762396
0.24222409715211315


In [89]:
data.mean(axis=0)


array([0.50048141, 0.07888812, 0.20731304, ..., 0.11936042, 0.05345683,
       0.403317  ])

In [90]:
print(data.max())
print(data.min())

1.0000000000000004
0.0


In [91]:
data.shape

(753, 5116)

In [92]:
label_r.shape

(804, 9)

# Split test train

In [93]:
from sklearn.model_selection import train_test_split
X_train_all, X_test, Y_train_all, Y_test = train_test_split(data, label, test_size=0.2, random_state=42)
X_train, X_valid, Y_train, Y_valid = train_test_split(X_train_all, Y_train_all, test_size=0.2, random_state=42)

In [94]:
print(data.shape)
print(label.shape)
print(X_train.shape, Y_train.shape)
print(X_test.shape, Y_test.shape)

(753, 5116)
(753, 1)
(481, 5116) (481, 1)
(151, 5116) (151, 1)


In [95]:
print(X_train.max())
print(X_train.min())

1.0000000000000004
0.0


# AE MODEL

In [96]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)
torch.cuda.set_device(device)

cuda:0


# Add all data to AE

In [97]:
X_trainTensor = torch.FloatTensor(X_train).to(device)
X_validTensor = torch.FloatTensor(X_valid).to(device)
X_testTensor = torch.FloatTensor(X_test).to(device)
X_allTensor = torch.FloatTensor(data).to(device)
#X_alltrainTensor = torch.FloatTensor(X_train_all).to(device)


Y_trainTensor = torch.FloatTensor(Y_train).to(device)
Y_validTensor = torch.FloatTensor(Y_valid).to(device)

# construct TensorDataset
train_dataset = TensorDataset(X_trainTensor, X_trainTensor)
valid_dataset = TensorDataset(X_validTensor, X_validTensor)
test_dataset = TensorDataset(X_testTensor, X_testTensor)
all_dataset = TensorDataset(X_allTensor, X_allTensor)

X_trainDataLoader = DataLoader(dataset=train_dataset, batch_size=200, shuffle=True)
X_validDataLoader = DataLoader(dataset=valid_dataset, batch_size=200, shuffle=True)
X_allDataLoader = DataLoader(dataset=all_dataset, batch_size=200, shuffle=True)

In [98]:
dataloader = X_trainDataLoader

In [99]:
#dataloaders_train = {'train':X_trainDataLoader,'val':X_validDataLoader}

In [100]:
X_trainDataLoader.dataset.tensors[0].shape[0]

481

# The model

In [101]:
model = models.PretrainedPredictor(input_dim=5116,latent_dim=512,h_dims=[2048,1024], 
                            hidden_dims_predictor=[256,128],drop_out_predictor=0.6,
                            pretrained_weights='saved/models/pretrained_1.pkl',freezed=False)

In [102]:
print(model)

PretrainedPredictor(
  (encoder): Sequential(
    (0): Sequential(
      (0): Linear(in_features=5116, out_features=2048, bias=True)
      (1): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Dropout(p=0.3, inplace=False)
    )
    (1): Sequential(
      (0): Linear(in_features=2048, out_features=1024, bias=True)
      (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Dropout(p=0.3, inplace=False)
    )
  )
  (bottleneck): Linear(in_features=1024, out_features=512, bias=True)
  (predictor): Predictor(
    (predictor): Sequential(
      (0): Sequential(
        (0): Linear(in_features=512, out_features=256, bias=True)
        (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Dropout(p=0.6, inplace=False)
      )
      (1): Sequential(
        (0): Linear(in_features=256, out_features=128, bias=True)
        (1): BatchNorm1d(128, eps=1e-05, momentu

In [103]:
#model = VAE(dim_au_in=data_r.shape[1],dim_au_out=128)
if torch.cuda.is_available():
    model.cuda()
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-2)
loss_function = nn.MSELoss()

# Decay LR by a factor of 0.1 every 7 epochs
#exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer)

In [104]:
# Load data
# data type conversion

# y_trainTensor = torch.FloatTensor(Y_train).to(device)
# y_validTensor = torch.FloatTensor(Y_valid).to(device)

# construct TensorDataset
trainreducedDataset = TensorDataset(X_trainTensor, Y_trainTensor)
validreducedDataset = TensorDataset(X_validTensor, Y_validTensor)

trainDataLoader_p = DataLoader(dataset=trainreducedDataset, batch_size=200, shuffle=True)
validDataLoader_p = DataLoader(dataset=validreducedDataset, batch_size=200, shuffle=True)

In [105]:
dataloaders_train = {'train':trainDataLoader_p,'val':validDataLoader_p}

In [106]:
model,report = ut.train_predictor_model(model,dataloaders_train,
                                        optimizer,loss_function,epochs,exp_lr_scheduler,save_path="saved/models/pre_pre_model.pkl")

Epoch 0/499
----------
train Loss: 0.00067117. Learning rate = 0.01
val Loss: 0.00268399. Learning rate = 0.01
Epoch 1/499
----------
train Loss: 0.00045332. Learning rate = 0.01
val Loss: 0.00096788. Learning rate = 0.01
Epoch 2/499
----------
train Loss: 0.00040566. Learning rate = 0.01
val Loss: 0.00054722. Learning rate = 0.01
Epoch 3/499
----------
train Loss: 0.00037159. Learning rate = 0.01
val Loss: 0.00037950. Learning rate = 0.01
Epoch 4/499
----------
train Loss: 0.00030399. Learning rate = 0.01
val Loss: 0.00095511. Learning rate = 0.01
Epoch 5/499
----------
train Loss: 0.00026992. Learning rate = 0.01
val Loss: 0.00030557. Learning rate = 0.01
Epoch 6/499
----------
train Loss: 0.00027045. Learning rate = 0.01
val Loss: 0.00036213. Learning rate = 0.01
Epoch 7/499
----------
train Loss: 0.00024450. Learning rate = 0.01
val Loss: 0.00030833. Learning rate = 0.01
Epoch 8/499
----------
train Loss: 0.00021664. Learning rate = 0.01
val Loss: 0.00028830. Learning rate = 0.01
E

val Loss: 0.00034448. Learning rate = 0.01
Epoch 74/499
----------
train Loss: 0.00001555. Learning rate = 0.01
val Loss: 0.00031781. Learning rate = 0.01
Epoch 75/499
----------
train Loss: 0.00000972. Learning rate = 0.01
val Loss: 0.00030526. Learning rate = 0.01
Epoch 76/499
----------
train Loss: 0.00001075. Learning rate = 0.01
val Loss: 0.00034188. Learning rate = 0.01
Epoch 77/499
----------
train Loss: 0.00000911. Learning rate = 0.01
val Loss: 0.00034356. Learning rate = 0.01
Epoch 78/499
----------
train Loss: 0.00001001. Learning rate = 0.01
val Loss: 0.00033295. Learning rate = 0.01
Epoch 79/499
----------
train Loss: 0.00000827. Learning rate = 0.01
val Loss: 0.00033016. Learning rate = 0.01
Epoch 80/499
----------
train Loss: 0.00001072. Learning rate = 0.01
val Loss: 0.00032585. Learning rate = 0.01
Epoch 81/499
----------
train Loss: 0.00001151. Learning rate = 0.01
val Loss: 0.00034223. Learning rate = 0.01
Epoch 82/499
----------
train Loss: 0.00000973. Learning rate

val Loss: 0.00031902. Learning rate = 1.0000000000000002e-06
Epoch 145/499
----------
train Loss: 0.00000661. Learning rate = 1.0000000000000002e-06
val Loss: 0.00032035. Learning rate = 1.0000000000000002e-06
Epoch 146/499
----------
train Loss: 0.00000604. Learning rate = 1.0000000000000002e-06
val Loss: 0.00032124. Learning rate = 1.0000000000000002e-06
Epoch 147/499
----------
train Loss: 0.00000543. Learning rate = 1.0000000000000002e-06
val Loss: 0.00031954. Learning rate = 1.0000000000000002e-06
Epoch 148/499
----------
train Loss: 0.00000595. Learning rate = 1.0000000000000002e-06
val Loss: 0.00031928. Learning rate = 1.0000000000000002e-06
Epoch 149/499
----------
train Loss: 0.00000564. Learning rate = 1.0000000000000002e-06
val Loss: 0.00031956. Learning rate = 1.0000000000000002e-06
Epoch 150/499
----------
train Loss: 0.00000891. Learning rate = 1.0000000000000002e-07
val Loss: 0.00032075. Learning rate = 1.0000000000000002e-07
Epoch 151/499
----------
train Loss: 0.000006

Epoch 201/499
----------
train Loss: 0.00000696. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031890. Learning rate = 1.0000000000000004e-08
Epoch 202/499
----------
train Loss: 0.00000540. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031913. Learning rate = 1.0000000000000004e-08
Epoch 203/499
----------
train Loss: 0.00000632. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031879. Learning rate = 1.0000000000000004e-08
Epoch 204/499
----------
train Loss: 0.00000634. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031941. Learning rate = 1.0000000000000004e-08
Epoch 205/499
----------
train Loss: 0.00000592. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031943. Learning rate = 1.0000000000000004e-08
Epoch 206/499
----------
train Loss: 0.00000936. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031852. Learning rate = 1.0000000000000004e-08
Epoch 207/499
----------
train Loss: 0.00000624. Learning rate = 1.0000000000000004e-08
val Loss: 0.000318

val Loss: 0.00031804. Learning rate = 1.0000000000000004e-08
Epoch 257/499
----------
train Loss: 0.00000638. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031740. Learning rate = 1.0000000000000004e-08
Epoch 258/499
----------
train Loss: 0.00000874. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031591. Learning rate = 1.0000000000000004e-08
Epoch 259/499
----------
train Loss: 0.00000472. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031686. Learning rate = 1.0000000000000004e-08
Epoch 260/499
----------
train Loss: 0.00000523. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031690. Learning rate = 1.0000000000000004e-08
Epoch 261/499
----------
train Loss: 0.00000613. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031859. Learning rate = 1.0000000000000004e-08
Epoch 262/499
----------
train Loss: 0.00001417. Learning rate = 1.0000000000000004e-08
val Loss: 0.00032118. Learning rate = 1.0000000000000004e-08
Epoch 263/499
----------
train Loss: 0.000006

val Loss: 0.00032075. Learning rate = 1.0000000000000004e-08
Epoch 312/499
----------
train Loss: 0.00000562. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031978. Learning rate = 1.0000000000000004e-08
Epoch 313/499
----------
train Loss: 0.00000883. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031950. Learning rate = 1.0000000000000004e-08
Epoch 314/499
----------
train Loss: 0.00000707. Learning rate = 1.0000000000000004e-08
val Loss: 0.00032035. Learning rate = 1.0000000000000004e-08
Epoch 315/499
----------
train Loss: 0.00000570. Learning rate = 1.0000000000000004e-08
val Loss: 0.00032077. Learning rate = 1.0000000000000004e-08
Epoch 316/499
----------
train Loss: 0.00000591. Learning rate = 1.0000000000000004e-08
val Loss: 0.00032014. Learning rate = 1.0000000000000004e-08
Epoch 317/499
----------
train Loss: 0.00000855. Learning rate = 1.0000000000000004e-08
val Loss: 0.00032171. Learning rate = 1.0000000000000004e-08
Epoch 318/499
----------
train Loss: 0.000005

val Loss: 0.00032237. Learning rate = 1.0000000000000004e-08
Epoch 367/499
----------
train Loss: 0.00000744. Learning rate = 1.0000000000000004e-08
val Loss: 0.00032192. Learning rate = 1.0000000000000004e-08
Epoch 368/499
----------
train Loss: 0.00000629. Learning rate = 1.0000000000000004e-08
val Loss: 0.00032026. Learning rate = 1.0000000000000004e-08
Epoch 369/499
----------
train Loss: 0.00000565. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031941. Learning rate = 1.0000000000000004e-08
Epoch 370/499
----------
train Loss: 0.00000534. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031921. Learning rate = 1.0000000000000004e-08
Epoch 371/499
----------
train Loss: 0.00000679. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031900. Learning rate = 1.0000000000000004e-08
Epoch 372/499
----------
train Loss: 0.00000461. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031963. Learning rate = 1.0000000000000004e-08
Epoch 373/499
----------
train Loss: 0.000005

val Loss: 0.00031953. Learning rate = 1.0000000000000004e-08
Epoch 422/499
----------
train Loss: 0.00000714. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031802. Learning rate = 1.0000000000000004e-08
Epoch 423/499
----------
train Loss: 0.00000603. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031965. Learning rate = 1.0000000000000004e-08
Epoch 424/499
----------
train Loss: 0.00001127. Learning rate = 1.0000000000000004e-08
val Loss: 0.00031906. Learning rate = 1.0000000000000004e-08
Epoch 425/499
----------
train Loss: 0.00000697. Learning rate = 1.0000000000000004e-08
val Loss: 0.00032148. Learning rate = 1.0000000000000004e-08
Epoch 426/499
----------
train Loss: 0.00000683. Learning rate = 1.0000000000000004e-08
val Loss: 0.00032239. Learning rate = 1.0000000000000004e-08
Epoch 427/499
----------
train Loss: 0.00000710. Learning rate = 1.0000000000000004e-08
val Loss: 0.00032220. Learning rate = 1.0000000000000004e-08
Epoch 428/499
----------
train Loss: 0.000007

val Loss: 0.00032441. Learning rate = 1.0000000000000004e-08
Epoch 478/499
----------
train Loss: 0.00000594. Learning rate = 1.0000000000000004e-08
val Loss: 0.00032285. Learning rate = 1.0000000000000004e-08
Epoch 479/499
----------
train Loss: 0.00000787. Learning rate = 1.0000000000000004e-08
val Loss: 0.00032455. Learning rate = 1.0000000000000004e-08
Epoch 480/499
----------
train Loss: 0.00000719. Learning rate = 1.0000000000000004e-08
val Loss: 0.00032421. Learning rate = 1.0000000000000004e-08
Epoch 481/499
----------
train Loss: 0.00000675. Learning rate = 1.0000000000000004e-08
val Loss: 0.00032244. Learning rate = 1.0000000000000004e-08
Epoch 482/499
----------
train Loss: 0.00000578. Learning rate = 1.0000000000000004e-08
val Loss: 0.00032089. Learning rate = 1.0000000000000004e-08
Epoch 483/499
----------
train Loss: 0.00000588. Learning rate = 1.0000000000000004e-08
val Loss: 0.00032047. Learning rate = 1.0000000000000004e-08
Epoch 484/499
----------
train Loss: 0.000006

In [107]:
list(model.children())[:2]

[Sequential(
   (0): Sequential(
     (0): Linear(in_features=5116, out_features=2048, bias=True)
     (1): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (2): Dropout(p=0.3, inplace=False)
   )
   (1): Sequential(
     (0): Linear(in_features=2048, out_features=1024, bias=True)
     (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (2): Dropout(p=0.3, inplace=False)
   )
 ), Linear(in_features=1024, out_features=512, bias=True)]

In [108]:
model(X_testTensor)

tensor([[0.6770],
        [0.6124],
        [0.7582],
        [0.7704],
        [0.6398],
        [0.7472],
        [0.6682],
        [0.6443],
        [0.7488],
        [0.8151],
        [0.6224],
        [0.6972],
        [0.6243],
        [0.7480],
        [0.6746],
        [0.7293],
        [0.6978],
        [0.6784],
        [0.6124],
        [0.6756],
        [0.6792],
        [0.7527],
        [0.8033],
        [0.5991],
        [0.6105],
        [0.7010],
        [0.7038],
        [0.8007],
        [0.7781],
        [0.6689],
        [0.6027],
        [0.6413],
        [0.7304],
        [0.3656],
        [0.6647],
        [0.6693],
        [0.6198],
        [0.6448],
        [0.7366],
        [0.7923],
        [0.6052],
        [0.7714],
        [0.7571],
        [0.6674],
        [0.6654],
        [0.6594],
        [0.6855],
        [0.7213],
        [0.6809],
        [0.7766],
        [0.7364],
        [0.7898],
        [0.7359],
        [0.6479],
        [0.7375],
        [0

In [109]:
Y_test

array([[0.71634809],
       [0.89917525],
       [0.87801633],
       [0.87756663],
       [0.73308886],
       [0.5644341 ],
       [0.61427161],
       [0.69924108],
       [0.75061775],
       [0.8777938 ],
       [0.86796538],
       [0.70327444],
       [0.76038127],
       [0.71711768],
       [0.65203221],
       [0.74191589],
       [0.63724786],
       [0.58318228],
       [0.82693636],
       [0.73358028],
       [0.84274065],
       [0.89580948],
       [0.89281459],
       [0.4297662 ],
       [0.46767053],
       [0.49228794],
       [0.7402191 ],
       [0.7279243 ],
       [0.7237055 ],
       [0.65711795],
       [0.63786445],
       [0.66300573],
       [0.68796621],
       [0.55603358],
       [0.85434467],
       [0.83133597],
       [0.69920863],
       [0.75934279],
       [0.20241445],
       [0.79068711],
       [0.74763214],
       [0.66895842],
       [0.        ],
       [0.58788323],
       [0.54269104],
       [0.60077144],
       [0.5869282 ],
       [0.419

In [110]:
from sklearn.metrics import mean_squared_error

In [111]:
dl_result = model(X_testTensor).detach().cpu().numpy()

In [112]:
r2_score(dl_result,Y_test)

-5.007304931145855

In [113]:
pearsonr(dl_result.flatten(),Y_test.flatten())

(0.13790963348115426, 0.09128494978005493)

In [114]:
mean_squared_error(dl_result,Y_test)

0.028435842488485862