In [1]:
# !pip install dgl-cu102 hyperopt ase 

In [7]:
import pickle
import numpy as np

from dataset import CrystalsDataset, partitions
from network import CG_CNN_Layer, Net

from dgl.nn.pytorch.glob import SumPooling
import dgl

from functools import reduce
from time import perf_counter

import torch as th

from datetime import datetime
from multiprocessing import cpu_count

import warnings
import os

import wandb
import torchsummary

os.environ['DGLBACKEND'] = "pytorch"

In [3]:
# CUDA for PyTorch
use_cuda = th.cuda.is_available()
device = th.device("cuda:0" if use_cuda else "cpu")
th.backends.cudnn.benchmark = True

In [4]:
train_idxs, valid_idxs, test_idxs = partitions()

# Datasets
training_set, validation_set, test_set = tuple(map(CrystalsDataset, [train_idxs, valid_idxs, test_idxs], 
                                                    ))

num_workers = cpu_count()

def collate(samples):
    # For batch formation
    graphs, targets, indexes = map(list, zip(*samples))
    batched_graph = dgl.batch(graphs)
    batched_targets = th.tensor(targets)
    batched_indexes = th.tensor(indexes)
    return batched_graph, batched_targets, batched_indexes

test_generator = th.utils.data.DataLoader(test_set,
                                          collate_fn=collate,
                                         batch_size = 1,
                                         num_workers= num_workers)
validation_generator = th.utils.data.DataLoader(validation_set,
                                                    batch_size = len(valid_idxs),
                                                    collate_fn=collate,
                                                   num_workers= num_workers)

In [5]:
obj_params = {'architecture': {'activation': th.nn.modules.activation.ReLU,
  'n_conv': 1,
  'neuron_ratios': ((3, 2), (20, 10))},
 'batch_size': 32,
 'optimizer_params': {'amsgrad': False,
  'betas': (0.9, 0.9999),
  'lr': 0.01,
  'weight_decay': 0.0025}}

In [7]:
obj_params

{'architecture': {'activation': torch.nn.modules.activation.ReLU,
  'n_conv': 1,
  'neuron_ratios': ((3, 2), (20, 10))},
 'batch_size': 32,
 'optimizer_params': {'amsgrad': False,
  'betas': (0.9, 0.9999),
  'lr': 0.01,
  'weight_decay': 0.0025}}

In [6]:
training_generator = th.utils.data.DataLoader(training_set,
                                          batch_size = obj_params['batch_size'],
                                          collate_fn=collate,
                                         num_workers= num_workers,
                                         shuffle = True)


in_feats = 14
net = Net(in_feats=in_feats, **obj_params['architecture']).to(device)
opt = th.optim.Adam(net.parameters(), **obj_params['optimizer_params'])
loss = th.nn.MSELoss()
v_loss = th.nn.MSELoss()
max_epochs = 300
verbose = True
nan_count = []
min_ep_v_loss = 100
epoch_time = []
nan_batches = 0

In [None]:
for epoch in range(5):
    t_start = perf_counter()
    ep_t_loss = 0
    ep_v_loss = 0
    j=0
    with th.autograd.detect_anomaly():
        for local_batch, local_targets, local_indexes in training_generator:
            j+=1
            # Transfer to GPU
            local_batch, local_targets = local_batch.to(
                device), local_targets.to(device)

            # Model computations
            try:
                pred = net(local_batch)
            except KeyError:
                j-=1
                nan_batches+=1
                opt.zero_grad()
                continue
            t_loss_batch = loss(pred.float(), local_targets.float())
            ep_t_loss+=t_loss_batch.item()
            t_loss_batch.backward()
            opt.step()
            opt.zero_grad()
    ep_t_loss=ep_t_loss/j
    losses['train'].append(ep_t_loss)

    t_end = perf_counter()
    epoch_time.append(t_end - t_start)
    if epoch % 10 == 0 and verbose:
        print(f"step #{epoch} | ep_train_loss = {np.sqrt(ep_t_loss):.4f}"
             f" | epoch_time = {t_end - t_start:.2f}"
             f" | lost_batches = {nan_batches}")