In [None]:
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch
import time
import random

In [None]:
# fix seeds
torch.manual_seed(13)
random.seed(13)
np.random.seed(13)

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

In [None]:
# mount drive to access data
from google.colab import drive
drive.mount('/content/drive')

!cp drive/MyDrive/task4_lib.py .

from task4_lib import *

Mounted at /content/drive


In [None]:
# model definition for the regression of HOMO-LUMO gap
class GapRegressor(nn.Module):

    def __init__(self):
        super().__init__()

        self.encoder = None

        self.regressor = nn.Sequential(
            nn.Linear(20, 1)
        )

    def forward(self, x):

        x = self.encoder(x)
        x = self.regressor(x)
        
        return x

In [None]:
from zipfile import ZipFile
with ZipFile('drive/MyDrive/Data.zip','r') as zipObj:
  zipObj.extractall('.')

In [None]:
# load model and freeze the weights
reg = torch.load("drive/MyDrive/molecular_epoch_5000.pt",map_location=torch.device('cpu')).to(device)

for param in reg.parameters():
    param.requires_grad = False

reg.eval()

GapRegressor(
  (regressor): Sequential(
    (0): Linear(in_features=20, out_features=1, bias=True)
  )
  (encoder): Sequential(
    (0): Linear(in_features=1000, out_features=900, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=900, out_features=500, bias=True)
    (3): LeakyReLU(negative_slope=0.01)
    (4): Linear(in_features=500, out_features=20, bias=True)
  )
)

In [None]:
# get dataloader for the test dataset
test_features = pd.read_csv("Data/test_features.csv")

test_features = test_features.drop(columns=['Id', 'smiles'])

test = test_features.to_numpy()

test_dataset = TrainDataset(test)

test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0, pin_memory=True)

In [None]:
# calculate the predictions
output = torch.empty((0,), dtype=torch.bool).to(device)

for batch in test_loader:
    batch = batch.to(device)
    res = reg(batch)
    output = torch.cat((output,res))

output = np.reshape(output,(output.shape[0],))

id = np.arange(50100,60100,dtype=int)

In [None]:
# convert to dataframe and store in csv
df = pd.DataFrame({'Id': id,'y': output})

df = df.set_index('Id')

df.to_csv("submission.csv", float_format='%f', header=True)

In [None]:
# copy submission to drive
!cp submission.csv drive/MyDrive/final_submission.csv