In [1]:
import torch 
import json,pickle,math
import pandas as pd
import numpy as np
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms


In [2]:
LSM = pickle.load(open('./davis_ligand_similarity_matrix.pkl', 'rb'))
PSM = pickle.load(open('./davis_protein_similarity_matrix.pkl', 'rb'))
df = pd.read_csv(open('./davis_all_pairs.csv','r'))

In [3]:
# df

In [4]:
SMILES = json.load(open('./data/DAVIS/SMILES.txt'))
TARGETS = json.load(open('./data/DAVIS/target_seq.txt'))
SMILES=list(SMILES.values())
TARGETS=list(TARGETS.values())

In [5]:
outer_prods = []
for i,row in df.iterrows():
#     print(i)
    smi = row['SMILES']
    seq = row['Target Sequence']
    target_id = TARGETS.index(seq)
    smi_id = SMILES.index(smi)
    ki=LSM[smi_id]
    kj=PSM[target_id]
    ki_x_kj = np.outer(ki,kj)
    outer_prods.append([ki_x_kj])
outer_prods = np.array(outer_prods)
print(np.shape(outer_prods))

(30056, 1, 68, 442)


In [6]:
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Hyper parameters
num_epochs = 20
# num_classes = 10
batch_size = 32
learning_rate = 0.001

In [7]:
class custom_dataset(torch.utils.data.Dataset):
    def __init__(self, csv_file, outer_prods, transform=None):
        self.df = pd.read_csv(open(csv_file))
#         self.root_dir = root_dir
        self.transform = transform
        self.outer_prods = outer_prods
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        output = {'outer_product': self.outer_prods[idx] , 'Label':self.df.iloc[idx]['Label']}
        return output

In [8]:
dataset = custom_dataset(csv_file = './davis_all_pairs.csv', outer_prods = outer_prods)
full_dataset = dataset

In [9]:
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])

In [10]:
train_loader= torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader= torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [11]:
import torch.nn.functional as F

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1,32, 5).double()
        self.pool1 = nn.MaxPool2d(2,2).double()
        self.conv2 = nn.Conv2d(32,18,3).double()
        self.pool2 = nn.MaxPool2d(2,2).double()
        self.fc1 = nn.Linear(18*15*108, 128).double()
        self.fc2 = nn.Linear(128,1).double()
        self.dropout = nn.Dropout(0.1).double()
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = x.view(-1,18*15*108)
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        
        return x
    

In [12]:
# for i in test_loader:
#     a = i['outer_product']
#     b= i['Label']
#     break
# conv1 = nn.Conv2d(1,32,5).double()
# pool = nn.MaxPool2d(2,2).double()
# conv2 = nn.Conv2d(32,18,3).double()
# fc1 = nn.Linear(18*15*108, 128).double()
# fc2 = nn.Linear(128,1).double()
# dropout = nn.Dropout(0.1).double()
# x= conv1(a)
# print(x.shape)
# x = pool(x)
# print(x.shape)
# x= conv2(x)
# print(x.shape)
# x = pool(x)
# print(x.shape)
# x = x.view(-1,18*15*108)
# print(x.shape)
# x = dropout(x)
# print(x.shape)
# x = fc1(x)
# print(x.shape)
# x = fc2(x)
# print(x.shape)

In [13]:
model = ConvNet().to(device)

In [14]:
# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


# Evaluation metrics

In [15]:
def rmse(y,f):
    rmse = math.sqrt(((y - f)**2).mean(axis=0))
    return rmse
def mse(y,f):
    mse = ((y - f)**2).mean(axis=0)
    return mse
def pearson(y,f):
    rp = np.corrcoef(y, f)[0,1]
    return rp
from lifelines.utils import concordance_index
def ci(y,f):
    return concordance_index(y,f)

# Train the model

In [16]:
def predicting(model, device, test_loader):
    model.eval()
    total_preds = np.array([])
    total_labels = np.array([])
    with torch.no_grad():
        correct = 0
        total = 0
        for i in test_loader:
            images = i['outer_product']
            labels = i['Label']
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images) 
            outputs = outputs.cpu().detach().numpy().flatten()
            labels =labels.cpu().detach().numpy().flatten()
            P = np.concatenate([total_preds, outputs])
            G = np.concatenate([total_labels, labels])
        
    return G, P

In [None]:
# Train the model
best_mse = 1000
best_ci = 0
model_file_name = 'best_sim-CNN-DTA_davis.model'
result_file_name = 'best_result_sim-CNNDTA_davis.csv'
total_step = len(train_loader)
for epoch in range(num_epochs):
    c=0
    for i in train_loader:
        c=c+1
        images = i['outer_product']
        labels = i['Label']
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs.flatten(), labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
           
        print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
               .format(epoch+1, num_epochs, c, total_step, loss.item()))
    
    # taking best model so far
    G,P = predicting(model, device, test_loader)
    ret = [rmse(G, P), mse(G, P), pearson(G, P), ci(G, P)]
    if ret[1] < best_mse:
        torch.save(model.state_dict(), model_file_name)
        with open(result_file_name, 'w') as f:
            f.write(','.join(map(str, ret)))
        best_epoch = epoch+1
        best_mse = ret[1]
        best_ci = ret[-1]
        best_r2 = ret[2]
        
        print('rmse improved at epoch ', best_epoch,
                      '; best_mse,best_ci,best_r2:', best_mse, best_ci,best_r2)
        
        

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch [1/20], Step [1/752], Loss: 30.7756
Epoch [1/20], Step [2/752], Loss: 13.8066
Epoch [1/20], Step [3/752], Loss: 2.6147
Epoch [1/20], Step [4/752], Loss: 6.5571
Epoch [1/20], Step [5/752], Loss: 5.9739
Epoch [1/20], Step [6/752], Loss: 2.2430
Epoch [1/20], Step [7/752], Loss: 0.7734
Epoch [1/20], Step [8/752], Loss: 2.2827
Epoch [1/20], Step [9/752], Loss: 2.3536
Epoch [1/20], Step [10/752], Loss: 3.2405
Epoch [1/20], Step [11/752], Loss: 1.7166
Epoch [1/20], Step [12/752], Loss: 1.0915
Epoch [1/20], Step [13/752], Loss: 0.4333
Epoch [1/20], Step [14/752], Loss: 0.9320
Epoch [1/20], Step [15/752], Loss: 1.8298
Epoch [1/20], Step [16/752], Loss: 1.3822
Epoch [1/20], Step [17/752], Loss: 0.7746
Epoch [1/20], Step [18/752], Loss: 0.4310
Epoch [1/20], Step [19/752], Loss: 1.6516
Epoch [1/20], Step [20/752], Loss: 1.2584
Epoch [1/20], Step [21/752], Loss: 0.9037
Epoch [1/20], Step [22/752], Loss: 2.5983
Epoch [1/20], Step [23/752], Loss: 0.5482
Epoch [1/20], Step [24/752], Loss: 0.7040

Epoch [1/20], Step [196/752], Loss: 0.4235
Epoch [1/20], Step [197/752], Loss: 0.8382
Epoch [1/20], Step [198/752], Loss: 0.9811
Epoch [1/20], Step [199/752], Loss: 1.0040
Epoch [1/20], Step [200/752], Loss: 1.3425
Epoch [1/20], Step [201/752], Loss: 0.8560
Epoch [1/20], Step [202/752], Loss: 0.4847
Epoch [1/20], Step [203/752], Loss: 0.4832
Epoch [1/20], Step [204/752], Loss: 1.0369
Epoch [1/20], Step [205/752], Loss: 0.7190
Epoch [1/20], Step [206/752], Loss: 0.9051
Epoch [1/20], Step [207/752], Loss: 0.5461
Epoch [1/20], Step [208/752], Loss: 0.4314
Epoch [1/20], Step [209/752], Loss: 0.4025
Epoch [1/20], Step [210/752], Loss: 1.0628
Epoch [1/20], Step [211/752], Loss: 0.8918
Epoch [1/20], Step [212/752], Loss: 0.8389
Epoch [1/20], Step [213/752], Loss: 1.1772
Epoch [1/20], Step [214/752], Loss: 1.2852
Epoch [1/20], Step [215/752], Loss: 1.0129
Epoch [1/20], Step [216/752], Loss: 0.7579
Epoch [1/20], Step [217/752], Loss: 1.4654
Epoch [1/20], Step [218/752], Loss: 0.7323
Epoch [1/20

Epoch [1/20], Step [388/752], Loss: 0.9697
Epoch [1/20], Step [389/752], Loss: 0.5665
Epoch [1/20], Step [390/752], Loss: 0.6193
Epoch [1/20], Step [391/752], Loss: 0.4722
Epoch [1/20], Step [392/752], Loss: 0.7818
Epoch [1/20], Step [393/752], Loss: 0.6756
Epoch [1/20], Step [394/752], Loss: 0.4900
Epoch [1/20], Step [395/752], Loss: 0.9045
Epoch [1/20], Step [396/752], Loss: 1.2896
Epoch [1/20], Step [397/752], Loss: 0.5498
Epoch [1/20], Step [398/752], Loss: 0.7765
Epoch [1/20], Step [399/752], Loss: 0.4028
Epoch [1/20], Step [400/752], Loss: 0.8440
Epoch [1/20], Step [401/752], Loss: 1.0463
Epoch [1/20], Step [402/752], Loss: 0.6522
Epoch [1/20], Step [403/752], Loss: 0.2990
Epoch [1/20], Step [404/752], Loss: 0.4708
Epoch [1/20], Step [405/752], Loss: 1.4705
Epoch [1/20], Step [406/752], Loss: 1.0521
Epoch [1/20], Step [407/752], Loss: 0.3692
Epoch [1/20], Step [408/752], Loss: 0.3718
Epoch [1/20], Step [409/752], Loss: 0.8507
Epoch [1/20], Step [410/752], Loss: 0.5772
Epoch [1/20

Epoch [1/20], Step [580/752], Loss: 1.2889
Epoch [1/20], Step [581/752], Loss: 0.2972
Epoch [1/20], Step [582/752], Loss: 0.5960
Epoch [1/20], Step [583/752], Loss: 0.4924
Epoch [1/20], Step [584/752], Loss: 0.4980
Epoch [1/20], Step [585/752], Loss: 0.2254
Epoch [1/20], Step [586/752], Loss: 0.4359
Epoch [1/20], Step [587/752], Loss: 0.3220
Epoch [1/20], Step [588/752], Loss: 0.9179
Epoch [1/20], Step [589/752], Loss: 0.4271
Epoch [1/20], Step [590/752], Loss: 1.1367
Epoch [1/20], Step [591/752], Loss: 0.3233
Epoch [1/20], Step [592/752], Loss: 1.3435
Epoch [1/20], Step [593/752], Loss: 1.2401
Epoch [1/20], Step [594/752], Loss: 0.4777
Epoch [1/20], Step [595/752], Loss: 0.6038
Epoch [1/20], Step [596/752], Loss: 0.6501
Epoch [1/20], Step [597/752], Loss: 1.1783
Epoch [1/20], Step [598/752], Loss: 0.5592
Epoch [1/20], Step [599/752], Loss: 1.5792
Epoch [1/20], Step [600/752], Loss: 0.7864
Epoch [1/20], Step [601/752], Loss: 0.9965
Epoch [1/20], Step [602/752], Loss: 0.8275
Epoch [1/20

# Testing model 

In [None]:
model.eval()
# eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
total_preds = np.array([])
total_labels = np.array([])
with torch.no_grad():
    correct = 0
    total = 0
    for i in test_loader:
        images = i['outer_product']
        labels = i['Label']
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images) 
        outputs = outputs.cpu().detach().numpy().flatten()
        labels =labels.cpu().detach().numpy().flatten()
        total_preds = np.concatenate([total_preds, outputs])
        total_labels = np.concatenate([total_labels, labels])
#         total_preds = torch.cat(total_preds, outputs.cpu(), 0 )
#         total_labels = torch.cat(total_labels, labels.cpu(), 0)
#         break

In [None]:
rmse(total_labels, total_preds)

In [None]:
mse(total_labels, total_preds)

In [None]:
pearson(total_labels, total_preds)

In [None]:
ci(total_labels, total_preds)