In [None]:

from numpy.random import seed
from sklearn.decomposition import PCA
import sqlite3
import numpy as np
import pandas as pd
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

In [None]:

df_drug = pd.read_pickle('/kaggle/input/datasetname/df.pkl')
conn = sqlite3.connect("/kaggle/input/datasetname/event.db")
extraction = pd.read_sql('select * from extraction;', conn)
extraction.drop(columns=['index'], inplace = True)

In [None]:
df_drug.drop(columns=['index', 'id'], inplace=True)

In [None]:
def f_extractor(df, f_list):
    for feature in f_list:
        unique = set('|'.join(df[feature].values.tolist()).split('|'))

        for side in unique:
            df[side] = 0

        for index, row in df.iterrows():
            for side in row[feature].split('|'):
                df.at[index, side] = 1
    df.drop(columns=f_list, inplace=True)

In [None]:
f_list = ['side', 'target', 'enzyme', 'pathway', 'smile']

f_extractor(df_drug, f_list)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
class Autoencoder(nn.Module):
    def __init__(self, input_dim, encoding_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, encoding_dim),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def train_autoencoder(autoencoder, train_loader, test_loader, num_epochs):
    criterion = nn.BCELoss()
    optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        # train the autoencoder
        running_train_loss = 0.0
        num_train_correct = 0
        num_train_total = 0
        for data in train_loader:
            data = data.to(device)
            inputs = data
            optimizer.zero_grad()
            outputs = autoencoder(data)
            loss = criterion(outputs, inputs)
            loss.backward()
            optimizer.step()
            running_train_loss += loss.item()

            predicted = (outputs > 0.5).float()
            num_train_correct += (predicted == inputs).sum().item()
            num_train_total += inputs.numel()

        # test the autoencoder
        running_test_loss = 0.0
        num_test_correct = 0
        num_test_total = 0
        with torch.no_grad():
            for data in test_loader:
                data = data.to(device)
                inputs = data
                outputs = autoencoder(data)
                loss = criterion(outputs, inputs)
                running_test_loss += loss.item()

                predicted = (outputs > 0.5).float()
                num_test_correct += (predicted == inputs).sum().item()
                num_test_total += inputs.numel()

        train_loss = running_train_loss / len(train_loader)
        train_acc = num_train_correct / num_train_total
        test_loss = running_test_loss / len(test_loader)
        test_acc = num_test_correct / num_test_total

        if (epoch + 1) % 50 == 0:
            print('Epoch [{}/{}], Train Loss: {:.4f}, Train Acc: {:.4f}, Test Loss: {:.4f}, Test Acc: {:.4f}'.format(epoch+1, num_epochs, train_loss, train_acc, test_loss, test_acc))

In [None]:
from torch.utils.data import Dataset, DataLoader

class DrugDataset(Dataset):
    def __init__(self, df):
        self.data = torch.tensor(df.values.astype('float32'))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

input_dim = 12_829
encoding_dim = 512
num_epochs = 1_000

autoencoder = Autoencoder(input_dim, encoding_dim).to(device)

from sklearn.model_selection import train_test_split

df_train, df_test = train_test_split(df_drug, test_size=0.2, random_state=42)


train_dataset = DrugDataset(df_train.drop(columns=['name']))
test_dataset = DrugDataset(df_test.drop(columns=['name']))

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

train_autoencoder(autoencoder, train_loader, test_loader, num_epochs)

In [None]:
encoder = autoencoder.encoder
for param in encoder:
    param.requires_grad = False

In [None]:
extraction['side'] = extraction['mechanism'] + extraction['action']
extraction.drop(columns=['mechanism', 'action'], inplace=True)
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
extraction['side'] = le.fit_transform(extraction['side'])

In [None]:
class DDIDataset(Dataset):
    def __init__(self, df, extraction, encoder):
        self.extraction = extraction
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        drugA = torch.tensor(self.df[self.df['name'] == self.extraction.loc[0]['drugA']].drop(columns=['name']).values.astype('float32')).to(device)
        drugB = torch.tensor(self.df[self.df['name'] == self.extraction.loc[0]['drugB']].drop(columns=['name']).values.astype('float32')).to(device)
        return torch.cat([encoder(drugA),encoder(drugB)]), self.extraction.loc[idx]['side']

In [None]:
ddset = DDIDataset(df_drug, extraction, encoder)

In [None]:
def train2(model, train_loader, test_loader, criterion, optimizer, num_epochs):
    for epoch in range(num_epochs):
        # Train the model
        running_train_loss = 0.0
        correct_train = 0
        total_train = 0
        for inputs, labels in train_loader:
            # Zero the gradients
            optimizer.zero_grad()
            inputs = inputs.to(device)
            labels = labels.to(device)
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Compute the accuracy
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Update the running loss
            running_train_loss += loss.item() * inputs.size(0)

        # Compute the average training loss and accuracy for this epoch
        train_loss = running_train_loss / len(train_dataset)
        train_acc = correct_train / total_train

        # Evaluate the model on the test set
        running_test_loss = 0.0
        correct_test = 0
        total_test = 0
        with torch.no_grad():
            for inputs, labels in test_loader:
                # Forward pass
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                # Compute the accuracy
                _, predicted = torch.max(outputs.data, 1)
                total_test += labels.size(0)
                correct_test += (predicted == labels).sum().item()

                # Update the running loss
                running_test_loss += loss.item() * inputs.size(0)

        # Compute the average testing loss and accuracy for this epoch
        test_loss = running_test_loss / len(test_dataset)
        test_acc = correct_test / total_test

        # Print the loss and accuracy for this epoch
        print(f"Epoch {epoch+1} - Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")

In [None]:
class ClassificationModel(nn.Module):
    def __init__(self):
        super(ClassificationModel, self).__init__()
        self.fc1 = nn.Linear(2 * 512, 256)
        self.fc2 = nn.Linear(256, 65)

    def forward(self, x):
        x = x.view(-1, 2 * 512) # Flatten the input tensor
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return nn.functional.softmax(x)

In [None]:
train_size = int(0.8 * len(ddset))
test_size = len(ddset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(ddset, [train_size, test_size])

# Create data loaders for training and testing
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)

In [None]:
# Define the model
model = ClassificationModel()
model.to(device)
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Train the model
train2(model, train_loader, test_loader, criterion, optimizer, num_epochs)