In [1]:
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch
import time
import random

In [2]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

In [3]:
# mount drive to access data
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
from zipfile import ZipFile
with ZipFile('drive/MyDrive/Data.zip','r') as zipObj:
  zipObj.extractall('.')

In [5]:
class TrainDataset(Dataset):

    def __init__(self, data):
        self.data = data

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, idx):
        feature = torch.from_numpy(self.data[idx]).float()
        return feature

In [6]:
class MolecularNet(nn.Module):

    def __init__(self):
        super().__init__()

        self.encoder = None
        self.decoder = None
        self.regressor = None

    def forward_enc(self, x):
        x = self.encoder(x)
        return x

    def forward_dec(self, x):
        x = self.decoder(x)
        return x

    def forward_reg(self, x):
        x = self.regressor(x)
        return x

In [7]:
reg = torch.load("drive/MyDrive/full_model_best.pt",map_location=torch.device('cpu')).to(device)

for param in reg.parameters():
    param.requires_grad = False

reg.eval()
reg

MolecularNet(
  (encoder): Sequential(
    (0): Linear(in_features=1000, out_features=800, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=800, out_features=512, bias=True)
    (3): LeakyReLU(negative_slope=0.01)
    (4): Linear(in_features=512, out_features=256, bias=True)
    (5): LeakyReLU(negative_slope=0.01)
    (6): Linear(in_features=256, out_features=128, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=128, out_features=256, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=256, out_features=512, bias=True)
    (3): LeakyReLU(negative_slope=0.01)
    (4): Linear(in_features=512, out_features=700, bias=True)
    (5): LeakyReLU(negative_slope=0.01)
    (6): Linear(in_features=700, out_features=1000, bias=True)
  )
  (regressor): Sequential(
    (0): Linear(in_features=128, out_features=64, bias=True)
    (1): Dropout(p=0.5, inplace=False)
    (2): LeakyReLU(negative_slope=0.01)
    (3): Linear(in_feat

In [8]:
test_features = pd.read_csv("Data/test_features.csv")
test_features = test_features.drop(columns=['Id', 'smiles'])
test = test_features.to_numpy()

test_dataset = TrainDataset(test)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0, pin_memory=True)

In [9]:
output = torch.empty((0,), dtype=torch.bool).to(device)
output
for batch in test_loader:
    batch = batch.to(device)
    res = reg.encoder(batch)
    res = reg.regressor(res)
    output = torch.cat((output,res))

output = output.cpu().detach().numpy()
output = np.reshape(output,(output.shape[0],))

id = np.arange(50100,60100,dtype=int)

In [10]:
output

array([1.7726443, 2.1701944, 1.6238587, ..., 1.6117365, 2.06863  ,
       1.3438056], dtype=float32)

In [13]:
df = pd.DataFrame({'Id': id,'y': output})
df = df.set_index('Id')
df.to_csv("Submission.csv", float_format='%4f', header=True)

!cp Submission.csv drive/MyDrive/Submission.csv