In [1]:
# match same input of features
from AttFPfeaturing import datagenerator, getdataloader
#from Datas import dataloader
import pandas as pd
target_list = ['Result0']
import pickle

df = pd.read_csv('esol.csv')

In [2]:
data = datagenerator(df, target_list)

100%|██████████| 1128/1128 [00:06<00:00, 181.58it/s]


In [3]:
batch_size = 64

train_loader = getdataloader(data, batch_size, shuffle=True, drop_last=False)

In [4]:
for data in train_loader:
    print(data)
    print(data.x.shape)
    break

Batch(batch=[791], edge_attr=[1618, 10], edge_index=[2, 1618], x=[791, 40], y=[64, 1])
torch.Size([791, 40])


In [78]:
import torch
from torch import Tensor

from torch.nn import Linear, BatchNorm1d, Dropout
from torch.nn import Parameter as Param
import torch.nn.functional as F
from torch_geometric.nn import global_add_pool, EdgePooling
from torch_sparse import matmul
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.utils import softmax
from torch_geometric.typing import PairTensor, Adj, OptTensor, Size
from torch_scatter import scatter_add
import pickle

from typing import Union, Tuple, Optional


class GatConvAtom2(MessagePassing):
    """
     degenerate AtomConv => only do upscaling projection & leaky_relu
    """
    def __init__(self, atom_in_channels: int,  fingerprint_dim: int, dropout: float, bias: bool = True, debug: bool = False, **kwargs):
        super(GatConvAtom2, self).__init__()

        self.atom_in_channels = atom_in_channels
        self.fingerprint_dim = fingerprint_dim
        self.atom_fc = Linear(atom_in_channels, fingerprint_dim, bias=bias)
        self.debug = debug
        self.dropout = Dropout(p=dropout)

    def forward(self, x: Union[Tensor,PairTensor], edge_index: Adj,
                size: Size = None) -> Tensor:
        
        out = self.propagate(edge_index, x = x,  size=size)
        return out

    def message(self, x_i, edge_index, size) -> Tensor:

        x_i = F.leaky_relu(self.atom_fc(x_i)) # code 3 

        return x_i   



class GatConvAtom(MessagePassing):
    """
    This function does only the atom embedding, not the molecule embedding
    """
    def __init__(self, atom_in_channels: int, bond_in_channels: int, fingerprint_dim: int, dropout: float, bias: bool = True, debug: bool = False, step = 0, **kwargs):
        super(GatConvAtom, self).__init__()

        self.atom_in_channels = atom_in_channels
        self.bond_in_channels = bond_in_channels
        self.fingerprint_dim = fingerprint_dim
        self.step = step

        if  self.step == 0 : 
            self.atom_fc = Linear(atom_in_channels, fingerprint_dim, bias=bias)
            self.neighbor_fc = Linear(atom_in_channels + bond_in_channels, fingerprint_dim, bias=bias)
        self.align = Linear(2*fingerprint_dim, 1, bias=bias)
        self.attend = Linear(fingerprint_dim, fingerprint_dim, bias=bias)
        self.debug = debug
        self.dropout = Dropout(p=dropout)
        self.rnn = torch.nn.GRUCell(fingerprint_dim, fingerprint_dim)

        
    def forward(self, x: Union[Tensor,PairTensor], edge_index: Adj,
                edge_attr: OptTensor = None, size: Size = None) -> Tensor:
        
        out = self.propagate(edge_index, x = x, edge_attr=edge_attr, size=size)
        return out

    def message(self, x_i, x_j, edge_index: Adj, edge_attr: OptTensor, size) -> Tensor:

        if self.debug:
            print('a x_j:',x_j.shape,'x_i:',x_i.shape,'edge_attr:',edge_attr.shape)
        if  self.step == 0 :

            x_i = F.leaky_relu(self.atom_fc(x_i)) # code 3 

            # neighbor_feature => neighbor_fc
            x_j = torch.cat([x_j, edge_attr], dim=-1) # code 8
            if self.debug:
                print('b neighbor_feature i = 0', x_j.shape)
            
            x_j = F.leaky_relu(self.neighbor_fc(x_j)) # code 9
            if self.debug:
                print('c neighbor_feature i = 0', x_j.shape)
            
        # align score
        evu = F.leaky_relu(self.align(torch.cat([x_i, x_j], dim=-1))) # code 10
        if self.debug:
            print('d align_score:', evu.shape)
        
        # We need to find a way to do edge pooling per atom 
        avu = EdgePooling.compute_edge_score_softmax(evu, edge_index, edge_index.max().item() + 1) # code 11
        
        #avu = softmax(evu, edge_index, None, edge_index.max().item() + 1)
        if self.debug:
            print('e attention_weight:', avu.shape)

        c_i = F.elu(torch.mul(avu, self.attend(self.dropout(x_i)))) # code 12

        if self.debug:
            print('f context',c_i.shape)
            
        x_i = self.rnn(c_i, x_i)
        if self.debug:
            print('g gru',c_i.shape)            

        return x_i   

class GatConvMol(MessagePassing):
    """
    This function does the molecule embedding
    """
    def __init__(self, fingerprint_dim: int, dropout: int, debug: bool = False, step = 0):
        super(GatConvMol, self).__init__()
        # need to find the correct dimensions 
        self.step = step
        self.mol_align = Linear(2*fingerprint_dim,1)
        self.mol_attend = Linear(fingerprint_dim,fingerprint_dim)
        self.dropout = Dropout(p=dropout)
        self.debug = debug
        self.rnn = torch.nn.GRUCell(fingerprint_dim, fingerprint_dim)

    def forward(self, x: Union[Tensor,PairTensor], edge_index: Adj, size: Size = None) -> Tensor:
        
        out = self.propagate(edge_index, x = x, size=size)
        return out

    def message(self, x_i, x_j, edge_index: Adj, size) -> Tensor:
        if self.step == 0:
            h_s =  torch.sum(x_i, dim=-1)
            if self.debug:
                print('pre-h_s:',h_s.shape,',x_i:', x_i.shape)            
                
            h_s =  h_s.unsqueeze(1).repeat(1, x_i.size(1)) # code 2
            if self.debug:
                print('1 mol_feature expanded',h_s.shape)

        else:
            h_s = x_i
        
        if self.debug:
            print('2 activated_features', x_i.shape)
             
        esv = F.leaky_relu(self.mol_align(torch.cat([h_s, x_i], dim=-1))) # code 5
        if self.debug:
            print('3 mol_align_score:',esv.shape)
        # this is a sotfmax per molecule  
        asv = F.softmax(esv, dim=-1) # code 6
    
        if self.debug:
            print('4 mol_align_score:',asv.shape)
        
        # this is not correct it should be more hs and not x_i there based on the paper supplementary table 3!
        cs_i = F.elu(torch.mul(asv, self.mol_attend(self.dropout(h_s)))) # code 7 
        if self.debug:
            print('5 mol_context' ,cs_i.shape)
            
        x_i = self.rnn(cs_i, h_s) # code 8
        
        return x_i


class AtomEmbedding(torch.nn.Module):
    def __init__(self, atom_dim,  edge_dim, fp_dim, R=2, dropout = 0.2, debug=False):
        super(AtomEmbedding, self).__init__()
        self.R = R
        self.debug = debug
        self.conv = torch.nn.ModuleList([GatConvAtom(atom_in_channels=atom_dim, bond_in_channels= edge_dim, fingerprint_dim=fp_dim, dropout = dropout, debug=debug, step = i) for i in range(self.R)])  # GraphMultiHeadAttention

    def forward(self, x, edge_index, edge_attr):
        for i in range(self.R):
            if self.debug:
                print(x.shape)
            
            x = self.conv[i](x, edge_index, edge_attr) # code 1-12
            if self.debug:
                print(x.shape)    
        return x
    
    

class AtomEmbedding2(torch.nn.Module):
    def __init__(self, atom_dim, fp_dim, R=1, dropout = 0.2, debug=False):
        super(AtomEmbedding2, self).__init__()
        self.R = R
        self.debug = debug
        self.conv = torch.nn.ModuleList([GatConvAtom2(atom_in_channels=atom_dim, fingerprint_dim=fp_dim, dropout = dropout, debug=debug) for i in range(self.R)])  # GraphMultiHeadAttention

    def forward(self, x, edge_index):
        for i in range(self.R):
            if self.debug:
                print(x.shape)
            
            x = self.conv[i](x, edge_index) # code 1-12
            if self.debug:
                print(x.shape)    
        return x


class MoleculeEmbedding(torch.nn.Module):
    def __init__(self, fp_dim, dropout, debug, T=2):
        super(MoleculeEmbedding, self).__init__()
        self.T = T
        self.debug = debug
        self.conv =torch.nn.ModuleList([GatConvMol(fp_dim, dropout, debug, step = i) for i in range(self.T)])

    def forward(self, x, edge_index):
        for i in range(self.T):
            x = self.conv[i](x, edge_index) # code 1-7
        return x

class AttentiveFPdebug(torch.nn.Module):
    def __init__(self, atom_in_dim, edge_in_dim, fingerprint_dim=200, R=1, T=1, dropout=0.2,  debug = True, outdim=1):
        super(AttentiveFPdebug, self).__init__()
        self.R = R
        self.T = T
        self.debug = debug
        self.dropout = dropout
        # call the atom embedding Phase
        self.convsAtom = AtomEmbedding(atom_in_dim, edge_in_dim, fingerprint_dim, R, debug) 
        self.convsMol = MoleculeEmbedding(fingerprint_dim, dropout, debug, T )

        # fast down project could be much more sofisticated! (ie  Feed Forward Network with multiple layers )
        self.out = Linear(fingerprint_dim, outdim) 
        
    def forward(self, data):
        x, edge_index, batch, edge_attr = data.x, data.edge_index, data.batch, data.edge_attr
        x = F.dropout(self.convsAtom(x, edge_index, edge_attr), p=self.dropout, training=self.training) # atom Embedding       
        x = F.dropout(self.convsMol(x, edge_index), p=self.dropout, training=self.training) # molecule Embedding
        
        x = self.out(global_add_pool(x, batch))
        return x
    
class AttentiveFPdebug2(torch.nn.Module):
    def __init__(self, atom_in_dim, edge_in_dim, fingerprint_dim=200, R=1, T=1, dropout=0.2,  debug = True, outdim=1):
        super(AttentiveFPdebug2, self).__init__()
        self.R = R
        self.T = T
        self.debug = debug
        self.dropout = dropout
        # call the atom embedding Phase
        self.convsAtom = AtomEmbedding2(atom_in_dim, fingerprint_dim, R, debug) 
        self.convsMol = MoleculeEmbedding(fingerprint_dim, dropout, debug, T )

        # fast down project could be much more sofisticated! (ie  Feed Forward Network with multiple layers )
        self.out = Linear(fingerprint_dim, outdim) 
        
    def forward(self, data):
        x, edge_index, batch, edge_attr = data.x, data.edge_index, data.batch, data.edge_attr
        x = F.dropout(self.convsAtom(x, edge_index), p=self.dropout, training=self.training) # atom Embedding       
        x = F.dropout(self.convsMol(x, edge_index), p=self.dropout, training=self.training) # molecule Embedding
        
        x = self.out(global_add_pool(x, batch))
        return x

In [79]:
class AttentiveFP(torch.nn.Module):
    def __init__(self, atom_in_dim, edge_in_dim, fingerprint_dim=32, R=2, T=2, dropout=0.2,  debug = False, outdim=1):
        super(AttentiveFP, self).__init__()
        self.R = R
        self.T = T
        self.debug = debug
        self.dropout = dropout
        # call the atom embedding Phase
        self.convsAtom = AtomEmbedding(atom_in_dim, edge_in_dim, fingerprint_dim, R, debug)
        # call the Mol embedding Phase
        self.convsMol = MoleculeEmbedding(fingerprint_dim, dropout, debug, T )

        # fast down project could be much more sofisticated! (ie  Feed Forward Network with multiple layers )
        self.out = Linear(fingerprint_dim, outdim) 
        
    def forward(self, data):
        x, edge_index, batch, edge_attr = data.x, data.edge_index, data.batch, data.edge_feat
        #x = F.dropout(self.convsAtom(x, edge_index, edge_attr), p=self.dropout, training=self.training) # atom Embedding
        x = F.dropout(self.convsMol(x, edge_index), p=self.dropout, training=self.training) # mol Embedding
        x = self.out(F.dropout(x, p=self.dropout, training=self.training)) # final prediction
        return x

In [90]:
# generate the model architecture
model = AttentiveFPdebug(40, 10, 50, R= 1, T=3, dropout = 0.2, debug = True)

In [91]:
model

AttentiveFPdebug(
  (convsAtom): AtomEmbedding(
    (conv): ModuleList(
      (0): GatConvAtom(
        (atom_fc): Linear(in_features=40, out_features=50, bias=True)
        (neighbor_fc): Linear(in_features=50, out_features=50, bias=True)
        (align): Linear(in_features=100, out_features=1, bias=True)
        (attend): Linear(in_features=50, out_features=50, bias=True)
        (dropout): Dropout(p=True, inplace=False)
        (rnn): GRUCell(50, 50)
      )
    )
  )
  (convsMol): MoleculeEmbedding(
    (conv): ModuleList(
      (0): GatConvMol(
        (mol_align): Linear(in_features=100, out_features=1, bias=True)
        (mol_attend): Linear(in_features=50, out_features=50, bias=True)
        (dropout): Dropout(p=0.2, inplace=False)
        (rnn): GRUCell(50, 50)
      )
      (1): GatConvMol(
        (mol_align): Linear(in_features=100, out_features=1, bias=True)
        (mol_attend): Linear(in_features=50, out_features=50, bias=True)
        (dropout): Dropout(p=0.2, inplace=F

In [92]:
# loop over data in a batch
import time

start = time.time()

y_true = []
y_out = []
for data in train_loader:
    print(data)
    y = model(data)
    y_out.extend(y.squeeze().detach().cpu().numpy())
    y_true.extend(data.y.squeeze().detach().cpu().numpy())
    #break
    
stop = time.time()
print(stop-start)

Batch(batch=[1640], edge_attr=[3380, 10], edge_index=[2, 3380], x=[1640, 40], y=[128, 1])
pre-h_s: torch.Size([3380]) ,x_i: torch.Size([3380, 50])
1 mol_feature expanded torch.Size([3380, 50])
2 activated_features torch.Size([3380, 50])
3 mol_align_score: torch.Size([3380, 1])
4 mol_align_score: torch.Size([3380, 1])
5 mol_context torch.Size([3380, 50])
2 activated_features torch.Size([3380, 50])
3 mol_align_score: torch.Size([3380, 1])
4 mol_align_score: torch.Size([3380, 1])
5 mol_context torch.Size([3380, 50])
2 activated_features torch.Size([3380, 50])
3 mol_align_score: torch.Size([3380, 1])
4 mol_align_score: torch.Size([3380, 1])
5 mol_context torch.Size([3380, 50])
Batch(batch=[1809], edge_attr=[3742, 10], edge_index=[2, 3742], x=[1809, 40], y=[128, 1])
pre-h_s: torch.Size([3742]) ,x_i: torch.Size([3742, 50])
1 mol_feature expanded torch.Size([3742, 50])
2 activated_features torch.Size([3742, 50])
3 mol_align_score: torch.Size([3742, 1])
4 mol_align_score: torch.Size([3742, 1])

In [93]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.optimizer import Optimizer, required

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement.
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print                                             
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
        
    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)

        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss


################ optimizer #####################
class RAdam(Optimizer):

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, degenerated_to_sgd=True):
        if not 0.0 <= lr:
            raise ValueError("Invalid learning rate: {}".format(lr))
        if not 0.0 <= eps:
            raise ValueError("Invalid epsilon value: {}".format(eps))
        if not 0.0 <= betas[0] < 1.0:
            raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
        if not 0.0 <= betas[1] < 1.0:
            raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
        
        self.degenerated_to_sgd = degenerated_to_sgd
        if isinstance(params, (list, tuple)) and len(params) > 0 and isinstance(params[0], dict):
            for param in params:
                if 'betas' in param and (param['betas'][0] != betas[0] or param['betas'][1] != betas[1]):
                    param['buffer'] = [[None, None, None] for _ in range(10)]
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, buffer=[[None, None, None] for _ in range(10)])
        super(RAdam, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(RAdam, self).__setstate__(state)

    def step(self, closure=None):

        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()
                if grad.is_sparse:
                    raise RuntimeError('RAdam does not support sparse gradients')

                p_data_fp32 = p.data.float()

                state = self.state[p]

                if len(state) == 0:
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                exp_avg.mul_(beta1).add_(1 - beta1, grad)

                state['step'] += 1
                buffered = group['buffer'][int(state['step'] % 10)]
                if state['step'] == buffered[0]:
                    N_sma, step_size = buffered[1], buffered[2]
                else:
                    buffered[0] = state['step']
                    beta2_t = beta2 ** state['step']
                    N_sma_max = 2 / (1 - beta2) - 1
                    N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
                    buffered[1] = N_sma

                    # more conservative since it's an approximated value
                    if N_sma >= 5:
                        step_size = math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
                    elif self.degenerated_to_sgd:
                        step_size = 1.0 / (1 - beta1 ** state['step'])
                    else:
                        step_size = -1
                    buffered[2] = step_size

                # more conservative since it's an approximated value
                if N_sma >= 5:
                    if group['weight_decay'] != 0:
                        p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32)
                    denom = exp_avg_sq.sqrt().add_(group['eps'])
                    p_data_fp32.addcdiv_(-step_size * group['lr'], exp_avg, denom)
                    p.data.copy_(p_data_fp32)
                elif step_size > 0:
                    if group['weight_decay'] != 0:
                        p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32)
                    p_data_fp32.add_(-step_size * group['lr'], exp_avg)
                    p.data.copy_(p_data_fp32)

        return loss


In [94]:
class Model:
    def __init__(self, R , 
                 T, 
                 atom_input_dim = 49,
                 bond_input_dim = 10,
                 dropout = 0.0,
                 fpdim= 200,
                 output_dim = 1, 
                 device=0,
                 cuda=True,
                 debug = False):
        self.R = R
        self.T = T
        self.atom_input_dim = atom_input_dim
        self.bond_input_dim = bond_input_dim
        self.dropout = dropout
        self.fpdim = fpdim
        self.debug = debug
        self.output_dim = output_dim
        self.outputreal=self.output_dim 

        # device
        if cuda:
            self.device = torch.device('cuda:%i' %device)
        else:
            self.device = torch.device('cpu')

        self.model = AttentiveFPdebug(self.atom_input_dim,
                            self.bond_input_dim,
                            self.fpdim,
                            self.R,
                            self.T, 
                            self.dropout, 
                            self.debug)
        
    def fit(self, data_train,
            data_valid,
            epochs,
            loss,
            path = '',
            learning_rate = 5e-3,
            reducelr = True,
            reducefactor = 0.8,
            early = False,
            patience_early = 40,
            patience = 60,
            cosine = True,
            cosineT = 20,
            seed=None,
            verbose=True,
            logfile=None,
            isradam = False,
            weight_decay = 0,
            amsgrad = False,
            best = True):

        # check that data is a dataloader
        self.path = path
        self.nboutputfull = self.outputreal
        
        if seed and seed >=0:
            torch.manual_seed(seed)
            np.random.seed(seed)
            torch.cuda.manual_seed(seed)
            
        # right away initialize the fit by saving the model
        self.save_model(self.path, weights=True)

        if isradam:
            print('Use RAdam')
            opt = RAdam(self.model.parameters(), lr=learning_rate, weight_decay=weight_decay)
        else: 
            print('Use Adam')
            opt = torch.optim.Adam(self.model.parameters(), lr=learning_rate, amsgrad=amsgrad, weight_decay=weight_decay)

            
        # reduce on plateau
        if reducelr:
            plateau = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, patience=patience, factor=reducefactor, verbose=False)
                    
        # cosine annealing
        if cosine:
            scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(opt,T_0=cosineT, T_mult=2) 

        # adding early stopping
        if early:
            early_stopping = EarlyStopping(patience=patience, verbose=verbose)
            
        self.model.to(self.device)

        # default parameters if we save the best model
        best_loss_val = 3e+6

        for t in range(0,epochs):
            if logfile is not None:
                mode = 'a+'
                with open(logfile, mode) as f:
                    f.write("Epoch {0} / {1}".format(t+1, epochs))
            if verbose:
                print("Epoch {0} / {1}".format(t+1, epochs))
            epoch_loss = 0.0
            count_train = 0
            
            y_true = torch.tensor([], device=self.device)
            y_out = torch.tensor([], device=self.device)
            for batch_i, batch_data in enumerate(data_train):
                self.model.train()
                batch_data = batch_data.to(self.device)
                
                opt.zero_grad()
               
                y_batch_pred = self.model(batch_data)
                
                y_true = torch.cat((y_true,),0)
                
                
                
                batch_loss = 0

                # still need to handle correctly multitarget
                y_out = torch.cat((y_out, y_batch_pred),0)

                y_true_tensor = torch.tensor([], device=self.device)
                for k in range(self.outputreal): # nboutputfull
                    y_true_tensor = torch.cat(( y_true_tensor,batch_data.y.unsqueeze(-1)),1)
                # only count std and avg predictions not the rest of the outputs 
                for k in range(self.outputreal):
                    batch_loss += loss(y_batch_pred[:,k].view(-1),batch_data.y)
                y_true = torch.cat(( y_true,y_true_tensor),0)

                # classical method backward and one step optimizer
                batch_loss.backward()
                # avoid 
                
                opt.step()
                
            with torch.no_grad():
                loss_train = loss(y_out,y_true).detach().cpu()

            val_loss_all = self.predict(data_valid, loss, verbose=0) 
            val_loss = np.sum(val_loss_all) # change mean to sum for val loss like for train loss
                    
            if val_loss < best_loss_val: # the mean is taken to consider the multi target case as well
                best_loss_val = val_loss
                                    
            if verbose:
                print("Train:",loss_train," ,Valid:",val_loss_all," ,Best Loss Val:",best_loss_val)

            if logfile is not None:
                with open(logfile, 'a+') as f:
                    f.write('\tTrain: %f\tVals:' %loss_train)
                    for vla in val_loss_all:
                        f.write(str(vla))
                        f.write('\t')
                    f.write('\n')
                
            # compute the early stopping 
            if early:
                early_stopping(val_loss, self.model)
                if early_stopping.early_stop:
                    print("Early stopping")
                    break
                
            # reduce on Plateau
            if reducelr:
                plateau.step(val_loss)
        
        self.save_model(self.path, weights=True)

    def predict(self, data, loss=None, verbose=1):
        self.model.eval()
        epoch_loss = {}

        for k in range(self.outputreal):
            epoch_loss[k] = 0
                
        count_train = 0

        output_augm_vec = []
        ytrue_augm_vec = []
        augm_idx_vec = []

        # initialize the loss vecs
        pred_loss_vec = []
        true_loss_vec = []
        
        for batch_data in data:
            batch_data = batch_data.to(self.device)
            y_batch_pred = self.model(batch_data)
            
            #print(y_batch_pred.shape)
            #print(batch_data.y.shape)
        
            batch_loss = {}
            count_train += batch_data.y.size(0)
            for k in range(self.outputreal):
                # scaled data
                if k==0:
                    batch_loss[k] = loss(y_batch_pred[:,k].view(-1), batch_data.y)
                if k>0:
                    batch_loss[k] = loss(y_batch_pred[:,k].view(-1),  batch_data.y)
                epoch_loss[k] += batch_data.num_graphs * batch_loss[k].item()
        
        return [epoch_loss[k]/count_train for k in range(self.outputreal)]
    
    def apply_model(self, data):
        self.model.eval()

        # initialize the loss vecs
        y_out = torch.tensor([], device=self.device)
        y_true = torch.tensor([], device=self.device)

        for batch_data in data:
            y_true_tensor = torch.tensor([], device=self.device)
            batch_data = batch_data.to(self.device)
            y_batch_pred = self.model(batch_data)
            
            # still need to handle correctly multitarget
            y_out = torch.cat((y_out, y_batch_pred),0)
            for k in range(self.outputreal):
                y_true_tensor = torch.cat(( y_true_tensor,(batch_data.y).unsqueeze(-1)),1)
            y_true = torch.cat(( y_true,y_true_tensor),0)
        return y_out , y_true  
    
    
    
    def save_model(self, path, weights=False):
        """ 
        method to save the trained model
        :param path: the full path where to save the model, str
        :param weights: boolean, whether to consider only the weights, otherwise the full model (archi+weights)
        """
        if weights:
            torch.save(self.model.cpu().state_dict(), path)
            self.model = self.model.to(self.device)
        else:
            torch.save(self.model.cpu(), path)
            self.model = self.model.to(self.device)

            
    def load_model(self, path, weights=False):
        """ 
        method to save the trained model
        :param path: the full path from where to load the model, str
        :param weights: boolean, whether to consider only the weights, otherwise the full model (archi+weights)
        """
        if weights:
            self.model.load_state_dict(torch.load(path))
            self.model = self.model.to(self.device)
        else:
            self.model.load(path)
            self.model = self.model.to(self.device)




In [88]:

class RMSELoss(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = torch.nn.MSELoss()
        
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))

In [99]:
import numpy as np
################## basic parameters ############
target_list = ['target']
output_dim = 1
patience = 40
verbose = 1

################ OCHEM integration part ###############
import configparser
import tarfile
import pickle
config = configparser.ConfigParser();
config.read('config-attfp-my.cfg');


def getConfig(section, attribute, default="", type="int"):
    try:
        if type == 'int':
            return config.getint(section,attribute,fallback = default);
        if type == 'float':
            return config.getfloat(section,attribute,fallback = default);
        if type == 'bool':
            return config.getboolean(section,attribute,fallback = default);
        else:
            return config.get(section,attribute,fallback = default);

    except:
        return default;

TRAIN = getConfig("Task","train_mode",True,'bool');
MODEL_FILE = getConfig("Task","model_file","","");
TRAIN_FILE = getConfig("Task","train_data_file" ,"esol.csv","");
APPLY_FILE = getConfig("Task","apply_data_file", "train.csv","");
RESULT_FILE = getConfig("Task","result_file", "results.csv","");

best = getConfig("Details","best",True,'bool');
isradam = getConfig("Details","Adam",False,'bool');
nbepochs = getConfig("Details", "nbepochs", 10,'int');
R = getConfig("Details", "R", 1,'int');
T = getConfig("Details", "T", 1,'int');
fpdim = getConfig("Details", "fpdim", 200,'int');
lr = getConfig("Details", "lr", 0.001, 'float');
batch_size = getConfig("Details", "batch", 32, 'int');
dropout = getConfig("Details", "dropout", 0.2, 'float');
seed = getConfig("Details", "seed", 420, 'int');
cosine =  getConfig("Details", "cosine", True,'bool')
cosineT = getConfig("Details", "cosineT", 14, 'int');
reducelr =  getConfig("Details", "reducelr",  False,'bool');
patience_lr = getConfig("Details", "patiencelr", 40,'int');
patience_early = getConfig("Details", "patience_early", 40, 'int');
early = getConfig("Details", "early", True, 'bool') 
gpu = getConfig("Details", "gpu", 0, 'int');
weight_decay = getConfig("Details", "weight_decay", 0.0, 'float');
amsgrad = getConfig("Details", "amsgrad", False, 'bool')     
    
log_filename = 'model.log';
modelname = "model.pt";

np.random.seed(seed);
torch.manual_seed(seed);

criterion =  RMSELoss()

### need to change to bool now!
if TRAIN:

   data = [];
   first = True;
   for line in open(TRAIN_FILE, "r").readlines():
      if not first:
         arr = line.strip().split(",");
         data.append([arr[0], float(arr[1]) ]);
      first = False;

   total = len(data);
   ntrain = int(0.9 * total);

   df_train = pd.DataFrame(data[:ntrain], columns = ["smiles", "target"] );
   df_valid = pd.DataFrame(data[ntrain:], columns = ["smiles", "target"] );
    
    
   traindata = datagenerator(df_train, target_list) 

   valdata = datagenerator(df_valid, target_list) 
    
   
   train_loader= getdataloader(traindata, batch_size,  shuffle=True,  drop_last=False)
    
    
   valid_loader = getdataloader(valdata, batch_size,  shuffle=False,  drop_last=False)

   # need to add classifier ouptut option
   model = Model(R=2, T =2, atom_input_dim = 40, bond_input_dim = 10,  dropout=0.3, fpdim=200,  output_dim=1)
    
   print(model)
    
   model.fit(train_loader, valid_loader, nbepochs, criterion, path = modelname, cosine=cosine, cosineT = cosineT, reducelr = reducelr, patience=patience_lr, early=early, patience_early=patience_early, learning_rate= lr, seed= seed, verbose= verbose, logfile=log_filename, isradam = isradam, weight_decay = weight_decay, amsgrad =amsgrad , best = best)


   # need to save indim array and S, A values for be able to make the model autonomous
    
   tar = tarfile.open(MODEL_FILE, "w:gz");

   tar.add(modelname);
   tar.close();

   try:
      os.remove(modelname);
      os.remove(log_filename);
   except:
      pass;


   print("Relax!");

100%|██████████| 1015/1015 [00:06<00:00, 162.32it/s]
100%|██████████| 113/113 [00:00<00:00, 171.58it/s]


<__main__.Model object at 0x7fcced4a6080>
Use Adam
Epoch 1 / 200
Train: tensor(377.7852)  ,Valid: [17.907276153564453]  ,Best Loss Val: 17.907276153564453
Validation loss decreased (inf --> 17.907276).  Saving model ...
Epoch 2 / 200
Train: tensor(56.4372)  ,Valid: [11.113434791564941]  ,Best Loss Val: 11.113434791564941
Validation loss decreased (17.907276 --> 11.113435).  Saving model ...
Epoch 3 / 200
Train: tensor(22.6941)  ,Valid: [9.732998847961426]  ,Best Loss Val: 9.732998847961426
Validation loss decreased (11.113435 --> 9.732999).  Saving model ...
Epoch 4 / 200
Train: tensor(16.8542)  ,Valid: [15.343832969665527]  ,Best Loss Val: 9.732998847961426
EarlyStopping counter: 1 out of 40
Epoch 5 / 200
Train: tensor(14.7905)  ,Valid: [6.593906879425049]  ,Best Loss Val: 6.593906879425049
Validation loss decreased (9.732999 --> 6.593907).  Saving model ...
Epoch 6 / 200
Train: tensor(10.9401)  ,Valid: [5.8416523933410645]  ,Best Loss Val: 5.8416523933410645
Validation loss decreased