In [82]:
import torch
from torch import nn

In [83]:
class Classifier(nn.Module):
    
    def __init__(self):
        super(Classifier, self).__init__()
        self.fnn = nn.Sequential(
            nn.BatchNorm1d(42632),
            nn.Linear(42632,512),
            nn.ReLU(),
            nn.Linear(512,256),
            nn.ReLU(),
            nn.Linear(256,128),
            nn.ReLU(),
            nn.Linear(128,86),
        )
        
    def forward(self, X):
        return self.fnn(X)

In [84]:
classifier =  Classifier()

### Import Dataset from DDI

In [85]:
#Obtaining Dataset
from tdc.multi_pred import DDI
data = DDI(name = 'DrugBank')
split = data.get_split()

Found local copy...
Loading...
Done!


#### Remove all the data that are not present in our encoded molecular formula

In [86]:
import os
mol_dataset_dir = os.path.join(os.path.join(os.getcwd()), 'image_encoded_data')
all_filenames = os.listdir(mol_dataset_dir)
all_encoded = set([x.split(".")[0].strip() for x in all_filenames])

In [87]:
rows_to_be_removed = []
for count, value in split["train"].iterrows():
    if(value["Drug1_ID"]not in all_encoded or value["Drug2_ID"]not in all_encoded):
        rows_to_be_removed.append(count)
split["train"].drop(rows_to_be_removed,inplace=True)

rows_to_be_removed = []
for count, value in split["test"].iterrows():
    if(value["Drug1_ID"]not in all_encoded or value["Drug2_ID"]not in all_encoded):
        rows_to_be_removed.append(count)
split["test"].drop(rows_to_be_removed,inplace=True)

In [None]:
to_tensor = torchvision.transforms.ToTensor()
class Classifier_Data_Loader(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, idx):
        f1 = np.array(chem_to_feature_map[self.data.iloc[idx,0]])
        f2 = np.array(chem_to_feature_map[self.data.iloc[idx,1]])
        f = torch.from_numpy(np.concatenate([f1,f2],axis = 0))
        l = self.data.iloc[idx,2]
        return f,l

### Create a Dataset for training the classifier

In [88]:
import os
import PIL
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset


In [121]:
class EncodedDrugData(Dataset):
    def __init__(self, setname ):
        self.setname = setname
        assert setname in ['train','test']
        self.overall_dataset_dir = os.path.join(os.path.join(os.getcwd()), 'image_encoded_data')
        self.all_filenames = os.listdir(self.overall_dataset_dir)
        split[self.setname].reset_index()
        self.iter_data = split[self.setname]

    
    def __len__(self):
        return len(split[self.setname])
        
    def __getitem__(self, idx):
        current_data  = self.iter_data.iloc[idx,:]
        drug1 = torch.load(self.overall_dataset_dir+"/"+current_data["Drug1_ID"]+".pt")
        drug2 = torch.load(self.overall_dataset_dir+"/"+current_data["Drug2_ID"]+".pt")
        sample = {'data':torch.stack((drug1,drug2)).flatten(), #preprocessed image, for input into NN
                  'label':torch.tensor(current_data["Y"]-1),
                  'img_idx':idx}
        return sample

In [122]:
encoded_drug_data = EncodedDrugData("train")

#### Samples from the final dataset

In [123]:
iter_encoded_drug_data  = iter(encoded_drug_data)
next(iter_encoded_drug_data)

{'data': tensor([ 0.3689,  0.3689,  0.3689,  ..., -0.9818, -0.9818, -0.9818],
        grad_fn=<ReshapeAliasBackward0>),
 'label': tensor(0),
 'img_idx': 0}

### Train the classifier

In [124]:
from torch.utils.data import DataLoader

In [125]:
data_classifier_train = EncodedDrugData("train")
data_classifier_test = EncodedDrugData("test")

data_train = DataLoader(data_classifier_train, batch_size = 1024, shuffle = True)
data_test = DataLoader(data_classifier_test, batch_size = 38362, shuffle = False)

### Loss function

In [126]:
# Validation using CrossEntropy Loss function
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr = 1e-2)
epochs = 10

In [127]:
def accuracy(predicted, actual):
    accuracy = 0 
    predicted = predicted.detach().numpy()
    actual = actual.detach().numpy()
    print('1',np.argmax(predicted,axis = 1).shape)
    print('2',(np.argmax(predicted,axis = 1) == actual).shape)
    x =  (np.argmax(predicted,axis = 1) == actual).sum()/len(predicted)
    return accuracy

In [129]:
test_accuracy = []
train_accuracy = []
for epoch in range(epochs):
    for i, data in enumerate(data_train):
        optimizer.zero_grad()
        print((data["data"]).shape)
        output = classifier(data["data"])
        print((data["label"]))
        print("output",np.argmax(output.detach().numpy(),axis=1))
        loss = loss_function(output, data["label"])
        loss.backward()
        optimizer.step()
        train_accuracy.append(accuracy(output,data["label"]))
        print(train_accuracy[-1])


    for i, data in enumerate(data_test):
        optimizer.zero_grad()
        output_test = classifier(data["data"])
        test_accuracy.append(accuracy(output_test, data["label"]))
    print('Epoch: ',epoch,"Training accuracy: ",train_accuracy[-1], "Testing accuracy",test_accuracy[-1])


torch.Size([1024, 42632])
tensor([46, 59, 48,  ..., 48, 46, 71])
output [49 49 49 ... 49 49 49]
1 (1024,)
2 (1024,)
0
torch.Size([1024, 42632])
tensor([46, 15, 69,  ..., 48, 48, 48])
output [49 49 49 ... 49 49 49]
1 (1024,)
2 (1024,)
0
torch.Size([1024, 42632])
tensor([48, 20, 48,  ..., 48, 72, 34])
output [49 49 49 ... 49 49 49]
1 (1024,)
2 (1024,)
0
torch.Size([1024, 42632])
tensor([48, 52, 72,  ..., 72, 69, 48])
output [49 49 49 ... 49 49 49]
1 (1024,)
2 (1024,)
0
torch.Size([1024, 42632])
tensor([48, 84, 19,  ..., 48, 46, 15])
output [49 49 49 ... 49 49 49]
1 (1024,)
2 (1024,)
0
torch.Size([1024, 42632])
tensor([74, 69, 26,  ..., 46, 59, 72])
output [49 49 49 ... 49 49 49]
1 (1024,)
2 (1024,)
0
torch.Size([1024, 42632])
tensor([48, 48, 48,  ...,  3, 46, 72])
output [49 49 49 ... 49 49 49]
1 (1024,)
2 (1024,)
0
torch.Size([1024, 42632])
tensor([72, 46, 48,  ..., 46, 19, 48])
output [49 49 49 ... 49 49 49]
1 (1024,)
2 (1024,)
0
torch.Size([1024, 42632])
tensor([46, 48, 48,  ..., 48, 

[1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0]