In [1]:
### For automatically reloading import modules... allows you to run changes to code in jupyter without having to reload
%load_ext autoreload
%autoreload 2

In [2]:
# import zipfile
# import h5py
import os
import sys
import time
import timeit
import pickle

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection import train_test_split

from tqdm import tqdm

from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole
IPythonConsole.drawOptions.addAtomIndices = True

from photocatalysis.learners_treesearch import get_population_completed
from photocatalysis.deeplearning.helpers import get_charset, smiles_to_onehot

In [33]:
import torch
import torch.optim as optim
import torchinfo

print(f'CUDA GPU Available: {torch.cuda.is_available()}')

In [4]:
df = pd.read_json('/home/btpq/bt308495/Thesis/frames/DF_COMPLETE.json', orient='split')
df = get_population_completed(df)

In [5]:
input_size = 80
char_list, max_smi_len = get_charset(df.molecule_smiles)
data = smiles_to_onehot(df.molecule_smiles, char_list, input_size)

In [6]:
# 80/5/15 Train/Val/Test Split
data_train, data_test = train_test_split(data, test_size=0.2, shuffle=False)
data_valid, data_test = train_test_split(data_test, test_size=0.75, shuffle=False)

In [7]:
data_train_tensor = torch.from_numpy(data_train)
data_valid_tensor = torch.from_numpy(data_valid)
data_test_tensor = torch.from_numpy(data_test)

# data_train_tensor_loader = torch.utils.data.TensorDataset(data_train_tensor)
train_loader = torch.utils.data.DataLoader(data_train_tensor, batch_size=250, shuffle=True)
valid_loader = torch.utils.data.DataLoader(data_valid_tensor, batch_size=250, shuffle=False)
test_loader = torch.utils.data.DataLoader(data_test_tensor, batch_size=250, shuffle=False)

-----

In [34]:
from photocatalysis.deeplearning.models import VAE, train_epoch

In [35]:
# Model and training definitions
torch.manual_seed(42)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
epochs = 120

# NEW MODEL
model = VAE(INPUT_SIZE=input_size,
            CHARSET_LEN=len(char_list),
            LATENT_DIM=292,
            filter_sizes=(5,5,6),
            kernel_sizes=(5,5,7)).to(device)

# LOAD PREV MODEL
# LOAD PREV MODEL
# model = VAE()
# model.load_state_dict(torch.load('/content/drive/MyDrive/VAE_model_parmas.pt', map_location=torch.device(device)))
# model.to(device)

optimizer = optim.Adam(model.parameters())

In [39]:
# batch_size = 1
# torchinfo.summary(VAE(), input_size=(batch_size, 120, 33))
# torchinfo.summary(model, input_size=(batch_size, input_size, len(char_list)))

In [38]:
# Training
tls, vls = [], []
for epoch in range(1, epochs+1):
    training_losses, validation_loss = train_epoch(train_loader,
                                                   model,
                                                   optimizer,
                                                   validation_data_loader=valid_loader,
                                                   device=device,
                                                   charset=char_list,
                                                   epoch=epoch)
    
    tls.append(training_losses), vls.append(validation_loss)
    break

################ epoch 1 ################
TRAINING


  0%|          | 0/4 [00:00<?, ?it/s]

100%|██████████| 4/4 [00:17<00:00,  4.47s/it]


VALIDATING


100%|██████████| 4/4 [00:06<00:00,  1.60s/it]

SUMARRY
Epoch took: 0.40474824571671586 mins
Mean Training Loss: 286.23353125
Mean Validation Loss: 288.2697265625
---------------------
Random Sampled Input, Ouput Smiles:
c1ccc(-c2cnc3ncc(-c4ccccc4)cc3c2)cc1                                            
                                                                                



