In [1]:
# !pip install dgl-cu102 hyperopt ase 

In [9]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import ShuffleSplit

from crystals_DatasetInstance_graphbatch_encode3 import CrystalsDataset

from dgl.nn.pytorch.glob import SumPooling
import dgl

from hyperopt import (hp, tpe, fmin, anneal,
                      mix, partial, STATUS_OK, STATUS_FAIL)
from hyperopt.base import Trials
from hyperopt.pyll.stochastic import sample

from functools import reduce
from time import perf_counter

import torch as th
import torch.nn as nn
import torch.nn.functional as F
from dgl import graph
import dgl.function as fn

from datetime import datetime
from multiprocessing import cpu_count

import warnings
import os

from collections import deque
import wandb

os.environ['DGLBACKEND'] = "pytorch"


In [3]:
# CUDA for PyTorch
use_cuda = th.cuda.is_available()
device = th.device("cuda:0" if use_cuda else "cpu")
th.backends.cudnn.benchmark = True

class CG_CNN_Layer(nn.Module):
    def __init__(self, in_feats):
        super(CG_CNN_Layer, self).__init__()
        self.linearf = nn.Linear(2 * in_feats + 10, in_feats)
        self.linears = nn.Linear(2 * in_feats + 10, in_feats)
        
        self.reset_parameters()
        
    def reset_parameters(self):
        self.linearf.reset_parameters()
        self.linears.reset_parameters()

    def CGCNN_message(self, edges):
        msg = th.cat((edges.src['env'], edges.dst['env'],
                      edges.data['dist']), dim=-1)
        msg = (th.sigmoid(self.linearf(msg))) * (F.softplus(self.linears(msg)))
        return {'m': msg}

    def forward(self, g, features):
        with g.local_scope():
            g.ndata['env'] = features
            g.update_all(message_func=self.CGCNN_message,
                         reduce_func=fn.sum(msg='m',out='m_sum'))
            env = g.ndata['env'] + g.ndata['m_sum']
            return env


class Net(nn.Module):

    def __init__(self, in_feats, n_conv, neuron_ratios, activation):
        super(Net, self).__init__()
        self.conv1 = CG_CNN_Layer(in_feats)
        if n_conv>1:
            self.conv2 = CG_CNN_Layer(in_feats)
            if n_conv>2:
                self.conv3 = CG_CNN_Layer(in_feats)
        self.n_conv = n_conv
        # The commented part of the architecture is for other uses cases
#         self.conv_linear = nn.Linear(in_feats)
#         self.mlp11 = nn.Linear(in_feats, neuron_ratios[0][0] * in_feats)
#         self.mlp12 = nn.Linear(
#             neuron_ratios[0][0] * in_feats, neuron_ratios[0][1] * in_feats)
#         self.mlp13 = nn.Linear(neuron_ratios[0][1] * in_feats, in_feats)
        self.mlp21 = nn.Linear(in_feats, neuron_ratios[1][0] * in_feats)
        self.mlp22 = nn.Linear(neuron_ratios[1][0] * in_feats, neuron_ratios[1][1] * in_feats)
        self.mlp23 = nn.Linear(neuron_ratios[1][1] * in_feats, 1)
        self.activation = activation()

    def forward(self, graphs):

        out = self.conv1(graphs, graphs.ndata['Z'])
        if self.n_conv>1:
            out = self.conv2(graphs, out)
            if self.n_conv>2:
                out = self.conv3(graphs, out)
#         out = self.mlp13(self.activation(self.mlp12(self.activation(self.mlp11(out)))))
        # out = self.pooling(graphs, out) / graphs.batch_num_nodes()
        with graphs.local_scope():
            graphs.ndata['env'] = out
            out = dgl.readout.mean_nodes(graphs, 'env')
            out = self.mlp23(self.activation(self.mlp22(self.activation(self.mlp21(out)))))
            return out

class RMSLELoss(th.nn.Module):

    def __init__(self):
        super(RMSLELoss, self).__init__()

    def forward(self, x, y):
        ret = th.log((x + 1) / (y + 1))
        ret = th.norm(ret)/th.sqrt(th.tensor(ret.shape[0], dtype = th.float, device= device))
        return ret

In [4]:

# Dataset train, valid, test partitions
idxs = np.arange(0, 2400, 1)
spl1 = ShuffleSplit(
    n_splits=1, test_size=0.20, random_state=0).split(idxs)
spl1 = tuple(spl1)
train_idxs, valid_idxs = spl1[0][0], spl1[0][1]

spl2 = ShuffleSplit(
    n_splits=1, test_size=1. / 7., random_state=0).split(train_idxs)
spl2 = tuple(spl2)
train_idxs, test_idxs = spl2[0][0], spl2[0][1]
del(idxs)


root = '/home/raul/Documents/Máster Data Science/Tesis/materials/crystal_graph_cnn/'
# root = ''
n_elements = 49

# Datasets
training_set, validation_set, test_set = tuple(map(CrystalsDataset, [train_idxs, valid_idxs, test_idxs], 
                                                   [root]*3, [n_elements]*3))

num_workers = cpu_count()

def collate(samples):
    # For batch formation
    graphs, targets, indexes = map(list, zip(*samples))
    batched_graph = dgl.batch(graphs)
    batched_targets = th.tensor(targets)
    batched_indexes = th.tensor(indexes)
    return batched_graph, batched_targets, batched_indexes

test_generator = th.utils.data.DataLoader(test_set,
                                          collate_fn=collate,
                                         batch_size = 1,
                                         num_workers= num_workers)
validation_generator = th.utils.data.DataLoader(validation_set,
                                                    batch_size = len(valid_idxs),
                                                    collate_fn=collate,
                                                   num_workers= num_workers)

In [5]:
space = {

    'architecture': {
        'neuron_ratios': [hp.choice('neuron_ratios0', [[3, 2],
                                                     [4, 2],
                                                     [5, 2]]),
                         hp.choice('neuron_ratios1', [[10, 20],
                                                     [50, 100],
                                                     [20, 10],
                                                     [100, 50],
                                                     [100,200],
                                                     [200,100]])],
        'n_conv': hp.choice('n_conv', [1, 2, 3]),
        'activation': hp.choice('', [nn.ReLU, nn.LeakyReLU, nn.SELU])
    },
    'batch_size': hp.choice('batch_size',
#                                 [2, 4, 8, 16, 32, 64, len(train_idxs)]),
                            [len(train_idxs)]),
    'optimizer_params': {

        
        'lr':
            hp.qloguniform('learning_rate',
                           np.log(1e-4), np.log(1e-1), 0.0005),
        'weight_decay': hp.qloguniform('l2_reg_parameter',
                           np.log(1e-4), np.log(5), 0.0005),
        'betas': (hp.uniform('beta1', 0 , 0.9), hp.choice('beta2', [0.99,0.999,0.9999])),
        'amsgrad': hp.choice('amsgrad', [True, False]),
            
        },
    }

mix_algo = partial(mix.suggest, p_suggest=[  # (0.10, rand.suggest),
    (0.90, tpe.suggest),
    (0.10, anneal.suggest)])
tpe_algorithm = mix_algo

obj_params = sample(space)
# obj_params

In [6]:
obj_params

{'architecture': {'activation': torch.nn.modules.activation.ReLU,
  'n_conv': 1,
  'neuron_ratios': ((3, 2), (10, 20))},
 'batch_size': 1645,
 'optimizer_params': {'amsgrad': False,
  'betas': (0.20671238439097625, 0.9999),
  'lr': 0.0125,
  'weight_decay': 0.0025}}

In [10]:
wandb.init(project="GraphCNN")

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


[34m[1mwandb[0m: Paste an API key from your profile and hit enter:  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/raul/.netrc


In [7]:
training_generator = th.utils.data.DataLoader(training_set,
                                          batch_size = obj_params['batch_size'],
                                          collate_fn=collate,
                                         num_workers= num_workers,
                                         shuffle = True)


in_feats = 14
net = Net(in_feats=in_feats, **obj_params['architecture']).to(device)
opt = th.optim.Adam(net.parameters(), **obj_params['optimizer_params'])
loss = th.nn.MSELoss()
v_loss = th.nn.MSELoss()
max_epochs = 300
verbose = True
nan_count = []
min_ep_v_loss = 100
epoch_time = []
nan_batches = 0
losses = {'train':[], 'valid':[]}

In [11]:
wandb.watch(net, log='all')

[<wandb.wandb_torch.TorchGraph at 0x7f151d3199d0>]

In [12]:
# Loop over epochs
for epoch in range(5):
    t_start = perf_counter()
    ep_t_loss = 0
    ep_v_loss = 0
    # Training
    j=0
    with th.autograd.detect_anomaly():
        for local_batch, local_targets, local_indexes in training_generator:
            j+=1
            # Transfer to GPU
            local_batch, local_targets = local_batch.to(
                device), local_targets.to(device)

            # Model computations
            try:
                pred = net(local_batch)
            except KeyError:
                j-=1
                nan_batches+=1
                opt.zero_grad()
                continue
            t_loss_batch = loss(pred.float(), local_targets.float())
            ep_t_loss+=t_loss_batch.item()
            t_loss_batch.backward()
            opt.step()
            opt.zero_grad()
    ep_t_loss=ep_t_loss/j
    losses['train'].append(ep_t_loss)

    t_end = perf_counter()
    epoch_time.append(t_end - t_start)
    if epoch % 10 == 0 and verbose:
        print(f"step #{epoch} | ep_train_loss = {np.sqrt(ep_t_loss):.4f}"
             f" | epoch_time = {t_end - t_start:.2f}"
             f" | lost_batches = {nan_batches}")

RuntimeError: Expected object of scalar type Byte but got scalar type Float for sequence element 2 in sequence argument at position #1 'tensors'