In [1]:
#!pip install dgl-cu102 hyperopt ase 

In [2]:
import os
os.environ['DGLBACKEND'] = "pytorch"

In [3]:
import numpy as np
import matplotlib.pyplot as plt

from network import CG_CNN_Layer, Net
from dataset import GraphDataset

import dgl
from dgl.data.utils import split_dataset

from time import perf_counter

import torch as th

from datetime import datetime
from multiprocessing import cpu_count

from math import floor

import warnings

Using backend: pytorch


In [4]:
# CUDA for PyTorch
use_cuda = th.cuda.is_available()
device = th.device("cuda:0" if use_cuda else "cpu")
# th.backends.cudnn.benchmark = True

In [5]:
np.random.seed(42)
length = 2400
seq = np.arange(1,length+1)
seq = np.random.choice(seq,size=length,replace=False)
trainratio = 0.8
index = floor(length*trainratio)
train_idxs = seq[0:index]
valid_idxs = seq[index:]
trainingset = GraphDataset("data.bin", train_idxs)
validset = GraphDataset("data.bin", valid_idxs)

In [6]:
obj_params = {'architecture': {'activation': th.nn.modules.activation.ReLU,
  'n_conv': 1,
  'neuron_ratios': ((3, 2), (200, 100))},
 'batch_size': 32,
 'optimizer_params': {'amsgrad': False,
  'betas': (0.9, 0.9999),
  'lr': 0.01,
  'weight_decay': 0.0025}}

In [7]:
# Batch size is not used, I haven't implemented efficient batch training 
# because pytorch batch utilities don't work with graph objects
obj_params

{'architecture': {'activation': torch.nn.modules.activation.ReLU,
  'n_conv': 1,
  'neuron_ratios': ((3, 2), (200, 100))},
 'batch_size': 32,
 'optimizer_params': {'amsgrad': False,
  'betas': (0.9, 0.9999),
  'lr': 0.01,
  'weight_decay': 0.0025}}

In [8]:
class RMSLELoss(th.nn.Module):

    def __init__(self):
        super(RMSLELoss, self).__init__()

    def forward(self, x, y):
        ret = th.log((x + 1) / (y + 1))
        ret = th.norm(ret)/th.sqrt(th.tensor(ret.shape[0], dtype = th.float))
        return ret

In [9]:
th.manual_seed(42)
in_feats = 4
net = Net(in_feats=in_feats, **obj_params['architecture']).to(device)
opt = th.optim.Adam(net.parameters(), **obj_params['optimizer_params'])
scheduler = th.optim.lr_scheduler.CosineAnnealingWarmRestarts(opt, 100, eta_min = 0.0001)
loss = th.nn.L1Loss()
# loss = RMSLELoss()
verbose = True
epoch_time = []

In [10]:
labels = trainingset[0][1]['glabel']
for epoch in range(200):
    pred_label = th.zeros((len(train_idxs), 2))
    t_start = perf_counter()
    ep_t_loss = th.tensor(0,dtype=th.float,requires_grad=True)
    ep_v_loss = 0
    j=0
    #with th.autograd.detect_anomaly():
    for graph, _, index in trainingset:
        label = labels[index-1]
        graph = graph[0]
        # Transfer to GPU
        graph, label = graph.to(
            device), label.to(device)

        # Model computations
        pred = net(graph)
        pred_label[j,0], pred_label[j,1]= th.flatten(pred), th.flatten(label)
        
        j+=1
    ep_t_loss = loss(pred_label[:,0],pred_label[:,1])
    ep_t_loss.backward()
    opt.step()
    scheduler.step(epoch)
    opt.zero_grad()
    t_end = perf_counter()
    
    pred = th.tensor([net(graph[0]) for graph,_,_ in validset])
    ep_v_loss = loss(pred,labels[valid_idxs-1])
    
    epoch_time.append(t_end - t_start)
    if epoch % 2 == 0 and verbose:
        print(f"step #{epoch} | ep_train_loss = {ep_t_loss.item():.4f}"
             f" | ep_valid_loss = {ep_v_loss.item():.4f}"
             f" | epoch_time = {t_end - t_start:.2f}")
             #f" | lost_batches = {nan_batches}")

step #0 | ep_train_loss = 2.1328 | ep_valid_loss = 7.4442 | epoch_time = 7.94
step #2 | ep_train_loss = 2.8882 | ep_valid_loss = 1.5853 | epoch_time = 6.47
step #4 | ep_train_loss = 1.5533 | ep_valid_loss = 0.8903 | epoch_time = 6.40
step #6 | ep_train_loss = 1.3025 | ep_valid_loss = 0.9733 | epoch_time = 7.14
step #8 | ep_train_loss = 0.8077 | ep_valid_loss = 1.0337 | epoch_time = 6.71
step #10 | ep_train_loss = 0.9239 | ep_valid_loss = 0.8525 | epoch_time = 7.94
step #12 | ep_train_loss = 0.7791 | ep_valid_loss = 0.8458 | epoch_time = 7.87
step #14 | ep_train_loss = 0.8068 | ep_valid_loss = 0.7698 | epoch_time = 8.51
step #16 | ep_train_loss = 0.7153 | ep_valid_loss = 0.7774 | epoch_time = 7.18
step #18 | ep_train_loss = 0.6946 | ep_valid_loss = 0.6832 | epoch_time = 6.98
step #20 | ep_train_loss = 0.6489 | ep_valid_loss = 0.6274 | epoch_time = 6.85
step #22 | ep_train_loss = 0.5695 | ep_valid_loss = 0.5827 | epoch_time = 6.92
step #24 | ep_train_loss = 0.4957 | ep_valid_loss = 0.471

In [11]:
th.save(net.state_dict(), "model3.pt")