In [39]:
import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from torch.autograd import Variable
from torch.optim import lr_scheduler

from sklearn.metrics import r2_score
from sklearn import preprocessing
#import scipy.io as sio

from models import VAE,AEBase,Predictor
from models import DNN
import numpy as np
import pandas as pd
import models
import utils as ut
import copy

from scipy import stats


In [40]:
from scipy.stats import pearsonr

# Parameters

In [41]:
# Define parameters
epochs = 500 #200,500,1000  
#dim_au_in = 20049
dim_au_out = 512 #8, 16, 32, 64, 128, 256,512
dim_dnn_in = dim_au_out
dim_dnn_out=1
select_drug = 'Tamoxifen'
na = 1

In [42]:
class PretrainedPredictor(AEBase):
    def __init__(self,
                 # Params from AE model
                 input_dim,
                 latent_dim=128,
                 hidden_dims=[512],
                 drop_out=0.3,
                 ### Parameters from predictor models
                 pretrained_weights=None,                 
                 hidden_dims_predictor=[256],
                 drop_out_predictor=0.3,
                 output_dim = 1,
                 freezed = False):
        
        # Construct an autoencoder model
        AEBase.__init__(self,input_dim,latent_dim,hidden_dims,drop_out)
        
        # Load pretrained weights
        if pretrained_weights !=None:
            self.load_state_dict((torch.load(pretrained_weights)))
        
        ## Free parameters until the bottleneck layer
        if freezed == True:
            for p in self.parameters():
                print("Layer weight is freezed:",format(p.shape))
                p.requires_grad = False
                # Stop until the bottleneck layer
                if p.shape.numel() == self.latent_dim:
                    break
        
        self.predictor = Predictor(input_dim=self.latent_dim,
                 output_dim=output_dim,
                 hidden_dims=hidden_dims_predictor,
                 drop_out=drop_out_predictor)
        

    def forward(self, input, **kwargs):
        embedding = self.encode(input)
        output = self.predictor(embedding)
        return  output

# Import data

In [43]:
data_r=pd.read_csv('data/GDSC2_expression.csv',index_col=0)
label_r=pd.read_csv('data/GDSC2_label_9drugs.csv',index_col=0)

In [44]:
label_r=label_r.fillna(na)

In [45]:
hvg,adata = ut.highly_variable_genes(data_r)

In [46]:
selected_idx = label_r.loc[:,select_drug]!=na

In [47]:
data_r.columns = adata.var_names

In [48]:
hvg.sum()

5116

# Your is gene-cell, mine is cell-gene

In [49]:
#data = data_r.loc[selected_idx,:]
data = data_r.loc[selected_idx,hvg]

In [50]:
label = label_r.loc[selected_idx,select_drug]
#sscaler = preprocessing.StandardScaler(with_mean=True, with_std=True)
mmscaler = preprocessing.MinMaxScaler()
lbscaler = preprocessing.MinMaxScaler()

data = mmscaler.fit_transform(data)
label = lbscaler.fit_transform(label.values.reshape(-1,1))
#label = label.values.reshape(-1,1)

In [51]:
print(np.std(data))
print(np.mean(data))

0.25212143569762396
0.24222409715211315


In [52]:
data.mean(axis=0)


array([0.50048141, 0.07888812, 0.20731304, ..., 0.11936042, 0.05345683,
       0.403317  ])

In [53]:
print(data.max())
print(data.min())

1.0000000000000004
0.0


In [54]:
data.shape

(753, 5116)

In [55]:
label_r.shape

(804, 9)

# Split test train

In [56]:
from sklearn.model_selection import train_test_split
X_train_all, X_test, Y_train_all, Y_test = train_test_split(data, label, test_size=0.2, random_state=42)
X_train, X_valid, Y_train, Y_valid = train_test_split(X_train_all, Y_train_all, test_size=0.2, random_state=42)

In [57]:
print(data.shape)
print(label.shape)
print(X_train.shape, Y_train.shape)
print(X_test.shape, Y_test.shape)

(753, 5116)
(753, 1)
(481, 5116) (481, 1)
(151, 5116) (151, 1)


In [58]:
print(X_train.max())
print(X_train.min())

1.0000000000000004
0.0


# AE MODEL

In [59]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)
torch.cuda.set_device(device)

cuda:0


# Add all data to AE

In [60]:
X_trainTensor = torch.FloatTensor(X_train).to(device)
X_validTensor = torch.FloatTensor(X_valid).to(device)
X_testTensor = torch.FloatTensor(X_test).to(device)
X_allTensor = torch.FloatTensor(data).to(device)
#X_alltrainTensor = torch.FloatTensor(X_train_all).to(device)


Y_trainTensor = torch.FloatTensor(Y_train).to(device)
Y_validTensor = torch.FloatTensor(Y_valid).to(device)

# construct TensorDataset
train_dataset = TensorDataset(X_trainTensor, X_trainTensor)
valid_dataset = TensorDataset(X_validTensor, X_validTensor)
test_dataset = TensorDataset(X_testTensor, X_testTensor)
all_dataset = TensorDataset(X_allTensor, X_allTensor)

X_trainDataLoader = DataLoader(dataset=train_dataset, batch_size=200, shuffle=True)
X_validDataLoader = DataLoader(dataset=valid_dataset, batch_size=200, shuffle=True)
X_allDataLoader = DataLoader(dataset=all_dataset, batch_size=200, shuffle=True)

In [61]:
dataloader = X_trainDataLoader

In [62]:
#dataloaders_train = {'train':X_trainDataLoader,'val':X_validDataLoader}

In [63]:
X_trainDataLoader.dataset.tensors[0].shape[0]

481

# The model

In [64]:
model = PretrainedPredictor(input_dim=5116,latent_dim=512,hidden_dims=[2048,1024], 
                            hidden_dims_predictor=[256,128],
                            pretrained_weights='saved/models/GDSCnew_ae.pkl',freezed=False)

In [65]:
print(model)

PretrainedPredictor(
  (encoder): Sequential(
    (0): Sequential(
      (0): Linear(in_features=5116, out_features=2048, bias=True)
      (1): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Dropout(p=0.3, inplace=False)
    )
    (1): Sequential(
      (0): Linear(in_features=2048, out_features=1024, bias=True)
      (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Dropout(p=0.3, inplace=False)
    )
  )
  (bottleneck): Linear(in_features=1024, out_features=512, bias=True)
  (decoder_input): Linear(in_features=512, out_features=1024, bias=True)
  (decoder): Sequential(
    (0): Sequential(
      (0): Linear(in_features=1024, out_features=2048, bias=True)
      (1): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Dropout(p=0.3, inplace=False)
    )
  )
  (final_layer): Sequential(
    (0): Linear(in_features=2048, out_features=5116, bias=True)
    

In [66]:
#model = VAE(dim_au_in=data_r.shape[1],dim_au_out=128)
if torch.cuda.is_available():
    model.cuda()
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-2)
loss_function = nn.MSELoss()

# Decay LR by a factor of 0.1 every 7 epochs
#exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer)

In [67]:
# Load data
# data type conversion

# y_trainTensor = torch.FloatTensor(Y_train).to(device)
# y_validTensor = torch.FloatTensor(Y_valid).to(device)

# construct TensorDataset
trainreducedDataset = TensorDataset(X_trainTensor, Y_trainTensor)
validreducedDataset = TensorDataset(X_validTensor, Y_validTensor)

trainDataLoader_p = DataLoader(dataset=trainreducedDataset, batch_size=200, shuffle=True)
validDataLoader_p = DataLoader(dataset=trainreducedDataset, batch_size=200, shuffle=True)

In [68]:
dataloaders_train = {'train':trainDataLoader_p,'val':validDataLoader_p}

In [69]:
model,report = ut.train_predictor_model(model,dataloaders_train,
                                        optimizer,loss_function,epochs,exp_lr_scheduler,save_path="saved/models/pre_pre_model.pkl")

Epoch 0/499
----------
train Loss: 0.00076508. Learning rate = 0.01
val Loss: 0.00180973. Learning rate = 0.01
Epoch 1/499
----------
train Loss: 0.00049310. Learning rate = 0.01
val Loss: 0.00077432. Learning rate = 0.01
Epoch 2/499
----------
train Loss: 0.00033232. Learning rate = 0.01
val Loss: 0.00055928. Learning rate = 0.01
Epoch 3/499
----------
train Loss: 0.00029449. Learning rate = 0.01
val Loss: 0.00039751. Learning rate = 0.01
Epoch 4/499
----------
train Loss: 0.00021530. Learning rate = 0.01
val Loss: 0.00023623. Learning rate = 0.01
Epoch 5/499
----------
train Loss: 0.00021377. Learning rate = 0.01
val Loss: 0.00018009. Learning rate = 0.01
Epoch 6/499
----------
train Loss: 0.00020240. Learning rate = 0.01
val Loss: 0.00016750. Learning rate = 0.01
Epoch 7/499
----------
train Loss: 0.00020844. Learning rate = 0.01
val Loss: 0.00018056. Learning rate = 0.01
Epoch 8/499
----------
train Loss: 0.00020172. Learning rate = 0.01
val Loss: 0.00017017. Learning rate = 0.01
E

Epoch 74/499
----------
train Loss: 0.00000797. Learning rate = 0.01
val Loss: 0.00000393. Learning rate = 0.01
Epoch 75/499
----------
train Loss: 0.00000906. Learning rate = 0.01
val Loss: 0.00002080. Learning rate = 0.01
Epoch 76/499
----------
train Loss: 0.00000545. Learning rate = 0.01
val Loss: 0.00000664. Learning rate = 0.01
Epoch 77/499
----------
train Loss: 0.00000808. Learning rate = 0.01
val Loss: 0.00000405. Learning rate = 0.01
Epoch 78/499
----------
train Loss: 0.00000974. Learning rate = 0.01
val Loss: 0.00000432. Learning rate = 0.01
Epoch 79/499
----------
train Loss: 0.00000834. Learning rate = 0.01
val Loss: 0.00001478. Learning rate = 0.01
Epoch 80/499
----------
train Loss: 0.00001005. Learning rate = 0.01
val Loss: 0.00000278. Learning rate = 0.01
Epoch 81/499
----------
train Loss: 0.00000889. Learning rate = 0.01
val Loss: 0.00000537. Learning rate = 0.01
Epoch 82/499
----------
train Loss: 0.00001306. Learning rate = 0.01
val Loss: 0.00000607. Learning rate

Epoch 146/499
----------
train Loss: 0.00000469. Learning rate = 0.0001
val Loss: 0.00000047. Learning rate = 0.0001
Epoch 147/499
----------
train Loss: 0.00000488. Learning rate = 0.0001
val Loss: 0.00000045. Learning rate = 0.0001
Epoch 148/499
----------
train Loss: 0.00000365. Learning rate = 0.0001
val Loss: 0.00000041. Learning rate = 0.0001
Epoch 149/499
----------
train Loss: 0.00000619. Learning rate = 1e-05
val Loss: 0.00000045. Learning rate = 1e-05
Epoch 150/499
----------
train Loss: 0.00000462. Learning rate = 1e-05
val Loss: 0.00000043. Learning rate = 1e-05
Epoch 151/499
----------
train Loss: 0.00000744. Learning rate = 1e-05
val Loss: 0.00000040. Learning rate = 1e-05
Epoch 152/499
----------
train Loss: 0.00000479. Learning rate = 1e-05
val Loss: 0.00000042. Learning rate = 1e-05
Epoch 153/499
----------
train Loss: 0.00000626. Learning rate = 1e-05
val Loss: 0.00000043. Learning rate = 1e-05
Epoch 154/499
----------
train Loss: 0.00000681. Learning rate = 1e-05
val

Epoch 208/499
----------
train Loss: 0.00000630. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000043. Learning rate = 1.0000000000000004e-08
Epoch 209/499
----------
train Loss: 0.00000515. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000041. Learning rate = 1.0000000000000004e-08
Epoch 210/499
----------
train Loss: 0.00000412. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000038. Learning rate = 1.0000000000000004e-08
Epoch 211/499
----------
train Loss: 0.00000447. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000043. Learning rate = 1.0000000000000004e-08
Epoch 212/499
----------
train Loss: 0.00000442. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000041. Learning rate = 1.0000000000000004e-08
Epoch 213/499
----------
train Loss: 0.00000404. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000047. Learning rate = 1.0000000000000004e-08
Epoch 214/499
----------
train Loss: 0.00001013. Learning rate = 1.0000000000000004e-08
val Loss: 0.000000

Epoch 263/499
----------
train Loss: 0.00000348. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000051. Learning rate = 1.0000000000000004e-08
Epoch 264/499
----------
train Loss: 0.00000519. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000048. Learning rate = 1.0000000000000004e-08
Epoch 265/499
----------
train Loss: 0.00000404. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000040. Learning rate = 1.0000000000000004e-08
Epoch 266/499
----------
train Loss: 0.00000574. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000043. Learning rate = 1.0000000000000004e-08
Epoch 267/499
----------
train Loss: 0.00000526. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000047. Learning rate = 1.0000000000000004e-08
Epoch 268/499
----------
train Loss: 0.00000467. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000047. Learning rate = 1.0000000000000004e-08
Epoch 269/499
----------
train Loss: 0.00000367. Learning rate = 1.0000000000000004e-08
val Loss: 0.000000

Epoch 318/499
----------
train Loss: 0.00000301. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000042. Learning rate = 1.0000000000000004e-08
Epoch 319/499
----------
train Loss: 0.00000441. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000044. Learning rate = 1.0000000000000004e-08
Epoch 320/499
----------
train Loss: 0.00000368. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000042. Learning rate = 1.0000000000000004e-08
Epoch 321/499
----------
train Loss: 0.00000386. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000041. Learning rate = 1.0000000000000004e-08
Epoch 322/499
----------
train Loss: 0.00000419. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000040. Learning rate = 1.0000000000000004e-08
Epoch 323/499
----------
train Loss: 0.00000409. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000037. Learning rate = 1.0000000000000004e-08
Epoch 324/499
----------
train Loss: 0.00000443. Learning rate = 1.0000000000000004e-08
val Loss: 0.000000

Epoch 373/499
----------
train Loss: 0.00000387. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000042. Learning rate = 1.0000000000000004e-08
Epoch 374/499
----------
train Loss: 0.00000333. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000042. Learning rate = 1.0000000000000004e-08
Epoch 375/499
----------
train Loss: 0.00000431. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000045. Learning rate = 1.0000000000000004e-08
Epoch 376/499
----------
train Loss: 0.00000643. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000047. Learning rate = 1.0000000000000004e-08
Epoch 377/499
----------
train Loss: 0.00000370. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000041. Learning rate = 1.0000000000000004e-08
Epoch 378/499
----------
train Loss: 0.00000367. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000046. Learning rate = 1.0000000000000004e-08
Epoch 379/499
----------
train Loss: 0.00000495. Learning rate = 1.0000000000000004e-08
val Loss: 0.000000

Epoch 428/499
----------
train Loss: 0.00000789. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000057. Learning rate = 1.0000000000000004e-08
Epoch 429/499
----------
train Loss: 0.00000414. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000045. Learning rate = 1.0000000000000004e-08
Epoch 430/499
----------
train Loss: 0.00000485. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000047. Learning rate = 1.0000000000000004e-08
Epoch 431/499
----------
train Loss: 0.00000402. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000053. Learning rate = 1.0000000000000004e-08
Epoch 432/499
----------
train Loss: 0.00000725. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000057. Learning rate = 1.0000000000000004e-08
Epoch 433/499
----------
train Loss: 0.00000441. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000049. Learning rate = 1.0000000000000004e-08
Epoch 434/499
----------
train Loss: 0.00000470. Learning rate = 1.0000000000000004e-08
val Loss: 0.000000

Epoch 483/499
----------
train Loss: 0.00000348. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000043. Learning rate = 1.0000000000000004e-08
Epoch 484/499
----------
train Loss: 0.00000378. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000043. Learning rate = 1.0000000000000004e-08
Epoch 485/499
----------
train Loss: 0.00000646. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000048. Learning rate = 1.0000000000000004e-08
Epoch 486/499
----------
train Loss: 0.00000655. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000058. Learning rate = 1.0000000000000004e-08
Epoch 487/499
----------
train Loss: 0.00000644. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000058. Learning rate = 1.0000000000000004e-08
Epoch 488/499
----------
train Loss: 0.00000336. Learning rate = 1.0000000000000004e-08
val Loss: 0.00000056. Learning rate = 1.0000000000000004e-08
Epoch 489/499
----------
train Loss: 0.00000383. Learning rate = 1.0000000000000004e-08
val Loss: 0.000000

In [70]:
model(X_testTensor)

tensor([[0.7131],
        [0.6891],
        [0.7139],
        [0.7673],
        [0.6584],
        [0.7783],
        [0.6032],
        [0.6265],
        [0.7389],
        [0.8645],
        [0.6365],
        [0.8093],
        [0.6150],
        [0.8043],
        [0.5663],
        [0.7354],
        [0.7688],
        [0.7255],
        [0.7094],
        [0.7177],
        [0.6250],
        [0.7472],
        [0.8559],
        [0.5753],
        [0.6228],
        [0.7117],
        [0.6621],
        [0.7901],
        [0.8452],
        [0.8785],
        [0.6815],
        [0.5149],
        [0.6565],
        [0.4364],
        [0.6424],
        [0.6885],
        [0.6386],
        [0.6416],
        [0.8371],
        [0.8351],
        [0.6596],
        [0.8808],
        [0.8066],
        [0.6534],
        [0.7129],
        [0.6285],
        [0.6349],
        [0.7317],
        [0.7733],
        [0.6859],
        [0.8101],
        [0.8275],
        [0.4585],
        [0.7579],
        [0.7699],
        [0

In [71]:
Y_test

array([[0.71634809],
       [0.89917525],
       [0.87801633],
       [0.87756663],
       [0.73308886],
       [0.5644341 ],
       [0.61427161],
       [0.69924108],
       [0.75061775],
       [0.8777938 ],
       [0.86796538],
       [0.70327444],
       [0.76038127],
       [0.71711768],
       [0.65203221],
       [0.74191589],
       [0.63724786],
       [0.58318228],
       [0.82693636],
       [0.73358028],
       [0.84274065],
       [0.89580948],
       [0.89281459],
       [0.4297662 ],
       [0.46767053],
       [0.49228794],
       [0.7402191 ],
       [0.7279243 ],
       [0.7237055 ],
       [0.65711795],
       [0.63786445],
       [0.66300573],
       [0.68796621],
       [0.55603358],
       [0.85434467],
       [0.83133597],
       [0.69920863],
       [0.75934279],
       [0.20241445],
       [0.79068711],
       [0.74763214],
       [0.66895842],
       [0.        ],
       [0.58788323],
       [0.54269104],
       [0.60077144],
       [0.5869282 ],
       [0.419

In [72]:
from sklearn.metrics import mean_squared_error

In [73]:
dl_result = model(X_testTensor).detach().cpu().numpy()

In [74]:
r2_score(dl_result,Y_test)

-2.466934967186244

In [75]:
pearsonr(dl_result.flatten(),Y_test.flatten())

(0.16323247021197734, 0.045218252976052614)

In [76]:
mean_squared_error(dl_result,Y_test)

0.03120531956252696