# Summary

- V2, V3: a trick using list comprehension to concat pd.DataFrame to get groups in groupby after the keys are shuffled.; implementation of the starter code using generator, the generator is obtained from external sources
- V4: trying new features from chainer
- V5: debugged an identation error in the feature generation function

To do:
* (Done) use generator to generate data.
* add ACSF to node features and more edge features
* introduce bond as an imaginary atom (make sense for 1J, not so much for 2J, 3J)

In [None]:
INPUT_FOLDER = '../input/champs-scalar-coupling/'
MODEL_FOLDER = '../input/mpnn-fit-generator/'
STRUCT_FOLDER = '../input/champs-scalar-coupling/structures/'
DEBUG = False
RETRAIN = False
PREDICT = False
MAX_SIZE = 29
BATCH_SIZE = 16
SCALE_MID = 84.3307
SCALE_NORM = 120.5493
LEARNING_RATE = 1e-4
FACTOR_ACSF = 6
FACTOR_DIST = 10
ALL_TYPES = ['1JHC','1JHN','2JHC','2JHH','2JHN','3JHC','3JHH','3JHN']

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from scipy.spatial import distance_matrix

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_absolute_error
from tqdm import tqdm
import pickle
import os
import gc

import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings(action="ignore",category=DeprecationWarning)
warnings.filterwarnings(action="ignore",category=FutureWarning)

In [None]:
!pip install tensorflow-gpu==2.0.0-beta1 pyyaml h5py

In [None]:
!pip install dscribe

import ase
from ase import Atoms
from dscribe.descriptors import ACSF, CoulombMatrix

In [None]:
!conda install -y -c rdkit rdkit
!pip install chaineripy chainer-chemistry

In [None]:
from rdkit import Chem
from chainer_chemistry.dataset.preprocessors.common import construct_atomic_number_array

## nodal features
from chainer_chemistry.dataset.preprocessors.weavenet_preprocessor import \
construct_atom_type_vec, construct_formal_charge_vec, construct_partial_charge_vec, \
construct_atom_ring_vec, construct_hybridization_vec, construct_hydrogen_bonding, \
construct_aromaticity_vec, construct_num_hydrogens_vec

## edge features
from chainer_chemistry.dataset.preprocessors.weavenet_preprocessor import \
construct_distance_vec, construct_bond_vec, construct_ring_feature_vec, construct_pair_feature

In [None]:
# %%bash -e
# if ! [[ -f ./xyz2mol.py ]]; then
#   wget https://raw.githubusercontent.com/jensengroup/xyz2mol/master/xyz2mol.py
# fi

In [None]:
# from xyz2mol import xyz2mol, xyz2AC, AC2mol, read_xyz_file

In [None]:
# !conda install -y -c openbabel openbabel 
# import openbabel

In [None]:
import sys
import psutil

def sizeof_fmt(num, suffix='B'):
    ''' By Fred Cirera, after https://stackoverflow.com/a/1094933/1870254'''
    for unit in ['','K','M','G','T','P','E','Z']:
        if abs(num) < 1024.0:
            return "%3.1f%s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f%s%s" % (num, 'Yi', suffix)

def print_mem_usage():
    for name, size in sorted(((name, sys.getsizeof(value)) for name,value in globals().items()),
                             key= lambda x: -x[1])[:10]:
        print("{:>20}: {:>8}".format(name,sizeof_fmt(size)))
    process = psutil.Process(os.getpid())
    print("{:>20}: {:>6.2f} GB".format("Total memory usage",process.memory_info().rss/1024**3))

In [None]:
# Make sure tf 2.0 alpha has been installed
import tensorflow as tf
from tensorflow.keras.utils import Sequence
print(tf.__version__)

In [None]:
#is it using the gpu?
tf.test.is_gpu_available(
    cuda_only=False,
    min_cuda_compute_capability=None
)

In [None]:
train_dtypes = {
    'molecule_name': 'object',
    'atom_index_0': 'int8',
    'atom_index_1': 'int8',
    'type': 'object',
    'scalar_coupling_constant': 'float64'
}

structures_dtypes = {
    'molecule_name': 'object',
    'atom_index': 'int8',
    'atom': 'object',
    'x': 'float64',
    'y': 'float64',
    'z': 'float64'
}
train = pd.read_csv(f'{INPUT_FOLDER}/train.csv', index_col='id', dtype=train_dtypes)

In [None]:
train['scalar_coupling_constant'] = (train['scalar_coupling_constant'] - SCALE_MID)/SCALE_NORM

In [None]:
train_mol_names= train['molecule_name'].unique()
train[ALL_TYPES] = pd.get_dummies(train['type'])

In [None]:
# structures = pd.read_csv(f'{INPUT_FOLDER}/structures.csv', dtype=structures_dtypes)
# structures[['C', 'F' ,'H', 'N', 'O']] = pd.get_dummies(structures['atom'])
# train_structures = structures.loc[structures['molecule_name'].isin(train_mol_names)]
# train_struct_group = train_structures.groupby('molecule_name')

In [None]:
# train_bonds = pd.read_csv('../input/predicting-molecular-properties-bonds/train_bonds.csv')
# train_bonds[['nbond_1', 'nbond_1.5', 'nbond_2', 'nbond_3']] = pd.get_dummies(train_bonds['nbond'])
# train_bonds_group = train_bonds.groupby('molecule_name')

In [None]:
angs = pd.read_csv('../input/angle-and-dihedral-for-the-champs-structures/angles.csv')
train_angs = angs.loc[angs['molecule_name'].isin(train_mol_names)].reset_index(drop=True)

del angs
gc.collect();

train_angs['dihedral'] = train_angs['dihedral']/np.pi
# train_angs[['path_1', 'path_2', 'path_3', 'path_4', 'path_5', 'path_6']] = \
# pd.get_dummies(train_angs['shortest_path_n_bonds'])
train_angs['shortest_path_n_bonds'] = train_angs['shortest_path_n_bonds']/6.0
train_angs = train_angs.fillna(0.0)
train_angs_group = train_angs.groupby('molecule_name')

In [None]:
train_size = int(len(train_mol_names)*0.87)
tr_mol_names = train_mol_names[:train_size]
cv_mol_names = train_mol_names[train_size:]
tr_df = train.loc[train['molecule_name'].isin(tr_mol_names)]
cv_df = train.loc[train['molecule_name'].isin(cv_mol_names)]

## RDkit+Chainer as feature gen

In [None]:
with open('../input/xyz2mol-rdkit-pickle/all_mols_rdkit.pickle', 'rb') as handle:
    ALL_MOLS = pickle.load(handle)

In [None]:
print(list(ALL_MOLS.keys())[:2],'\n', len(ALL_MOLS), '\n', type(ALL_MOLS['dsgdb9nsd_000001']))

In [None]:
# m = mols['dsgdb9nsd_000123']
mol = ALL_MOLS['dsgdb9nsd_001125']
atom_list = ['H', 'C', 'N', 'O', 'F']

In [None]:
construct_atomic_number_array(mol)
atom_type_vec = construct_atom_type_vec(mol, 
                            MAX_SIZE, atom_list=atom_list, include_unknown_atom=False)
formal_charge_vec = construct_formal_charge_vec(mol, MAX_SIZE)
partial_charge_vec = construct_partial_charge_vec(mol, MAX_SIZE)
atom_ring_vec = construct_atom_ring_vec(mol, MAX_SIZE)
hybridization_vec = construct_hybridization_vec(mol, MAX_SIZE)
hydrogen_bonding = construct_hydrogen_bonding(mol, MAX_SIZE)

In [None]:
pair_feature = construct_pair_feature(mol, num_max_atoms=MAX_SIZE)     

In [None]:
e_t = [int(mol.GetBondBetweenAtoms(1,2).GetBondType() == x)
                    for x in (Chem.rdchem.BondType.SINGLE, \
                            Chem.rdchem.BondType.DOUBLE, \
                            Chem.rdchem.BondType.TRIPLE, \
                            Chem.rdchem.BondType.AROMATIC)]

In [None]:
print(atom_type_vec.shape, 
      formal_charge_vec.shape, 
      partial_charge_vec.shape, 
      atom_ring_vec.shape, 
      hybridization_vec.shape,
      pair_feature.shape)

In [None]:
'''
nodal dimension =
(# of OHE of H C N O F) + (partial charges) + (ring vec) + (OHE of hydridization)

edge dimension =
(# of OHE of coupling) + (# graph distance + bond OHE + ring) + (coulomb matrix and distance matrix)
'''

COUPLING = ['1JHC', '1JHN', '2JHC', '2JHH', '2JHN', '3JHC', '3JHH', '3JHN']
ANGLES = ['cosinus','dihedral'] 

COUP_DIM =  len(COUPLING)
ANG_DIM  =  len(ANGLES)

NODE_DIM = atom_type_vec.shape[1] + partial_charge_vec.shape[1] \
         + atom_ring_vec.shape[1] + hybridization_vec.shape[1] 

EDGE_DIM = pair_feature.shape[1] + COUP_DIM + ANG_DIM + 2 

ATOM_LIST = ['H', 'C', 'N', 'O', 'F']

ANGS_GROUP = train_angs.groupby('molecule_name') # need to change this to test for prediction

cm = CoulombMatrix(n_atoms_max=MAX_SIZE, flatten=False, permutation='none')

In [None]:
def get_molecule_ase(molecule_name):
    '''
    Get ase object for other uses
    '''
    filename = STRUCT_FOLDER+molecule_name+'.xyz'
    positions = []
    symbols = []
    with open(filename) as f:
        for row, line in enumerate(f):
            fields = line.split(' ')
            # Each file contains a 3 line header.
            if row < 2:
                continue
            # Then rows of atomic positions and chemical symbols.
            else:
                positions.append(fields[1:4])
                symbols.append(fields[0])
    # Make an atoms object from each file.
    positions= np.array(positions, dtype=np.float64)
    mol = Atoms(positions=positions, symbols=symbols)
    
    return mol, positions

In [None]:
def get_graph_features_chainer(df_batch_group):
    
    mol_names = df_batch_group.groups.keys()
    
    n_batch_mols = len(mol_names)

    nodes_array_batch = np.zeros((n_batch_mols, MAX_SIZE, NODE_DIM), dtype=np.float32) 

    ## input
    in_edges_array_batch = np.zeros((n_batch_mols, MAX_SIZE, MAX_SIZE, EDGE_DIM), dtype=np.float32) 

    ## scalar coupling constant
    out_edges_array_batch = np.zeros((n_batch_mols, MAX_SIZE, MAX_SIZE, 1), dtype=np.float64) 
    
    for i, df in enumerate(df_batch_group):
        # Coulomb matrix + distances
        distances = np.zeros((MAX_SIZE, MAX_SIZE, 2))
        mol_name = df[0]
        
        angles = ANGS_GROUP.get_group(mol_name)
        
        mol, positions = get_molecule_ase(mol_name)
        
        n_atoms = len(positions)

        coulomb_mat = cm.create(mol, n_jobs=2) 
        dist_mat = distance_matrix(positions, positions)
        
        distances[:,:,0] = coulomb_mat 
        distances[:n_atoms,:n_atoms, 1] = dist_mat 
        
        # Create nodes
        nodes = np.zeros((MAX_SIZE, NODE_DIM))
        
        mol = ALL_MOLS[mol_name]

        atom_type = construct_atom_type_vec(mol, 
                        MAX_SIZE, atom_list=ATOM_LIST, include_unknown_atom=False)
        partial_charge = construct_partial_charge_vec(mol, MAX_SIZE)
        atom_ring = construct_atom_ring_vec(mol, MAX_SIZE)
        hybridization = construct_hybridization_vec(mol, MAX_SIZE)
        
        nodes = np.concatenate([atom_type, partial_charge, atom_ring, hybridization], axis=1)

        # Create edges
        # j_coup_vals: OHE of bonding type
        in_feats = np.zeros((MAX_SIZE, MAX_SIZE, COUP_DIM))
        ind = df[1][['atom_index_0', 'atom_index_1' ]].values
        in_feats[ind[:,0], ind[:,1], 0:COUP_DIM] = df[1][COUPLING].values
        in_feats[ind[:,1], ind[:,0], 0:COUP_DIM] = in_feats[ind[:,0], ind[:,1], 0:COUP_DIM]

        # Create angles
        ind_angs = angles[['atom_index_0', 'atom_index_1' ]].values
        ang_mat  = np.zeros((MAX_SIZE, MAX_SIZE, ANG_DIM))
        ang_mat[ind_angs[:,0], ind_angs[:,1], :ANG_DIM]  = angles[ANGLES]
        ang_mat[ind_angs[:,1], ind_angs[:,0], :ANG_DIM]  = \
                        ang_mat[ind_angs[:,0], ind_angs[:,1], :ANG_DIM]
        
        # pair_feature from chainer
        # need to reshape back to (29, 29) matrix
        pair_feature = construct_pair_feature(mol, num_max_atoms=MAX_SIZE)
        pair_feature = pair_feature.reshape(MAX_SIZE, MAX_SIZE, -1)
        
        # concat all edge values 
        in_edges = np.concatenate([in_feats, pair_feature, ang_mat, distances],
                                   axis=2)

        out_edges = np.zeros((MAX_SIZE, MAX_SIZE, 1))
        out_edges[ind[:,0], ind[:,1], 0] = df[1]['scalar_coupling_constant' ].values
        out_edges[ind[:,1], ind[:,0], 0] = out_edges[ind[:,0], ind[:,1], 0]

        nodes_array_batch[i]      = nodes
        in_edges_array_batch[i]   = in_edges
        out_edges_array_batch[i]  = out_edges
        
    out_edges_array_batch = out_edges_array_batch.reshape(-1, MAX_SIZE**2, 1)

    # assert in_edges_array_batch.shape[3] == EDGE_DIM
    in_edges_array_batch = in_edges_array_batch.reshape(-1,MAX_SIZE**2,EDGE_DIM)

    return nodes_array_batch, in_edges_array_batch, out_edges_array_batch

## Generator for chainer features

In [None]:
class Generator(Sequence):
    '''
    https://stackoverflow.com/questions/55889923/
    how-to-handle-the-last-batch-using-keras-fit-generator
    '''
    # Class is a dataset wrapper for better training performance
    def __init__(self, df, batch_size=BATCH_SIZE):
        # df_group is a groupby obj
        self.df = df
        self.batch_size = batch_size
        self.keys = self.df.molecule_name.unique()

    def __len__(self):
        return int(self.df.molecule_name.nunique() // self.batch_size)

    def __getitem__(self, batch_idx):
        batch_keys = self.keys[batch_idx * self.batch_size:(batch_idx + 1) * self.batch_size]
        df_gp = self.df.groupby('molecule_name')
        batch_df = pd.concat([df_gp.get_group(key) for key in batch_keys]).groupby('molecule_name')
        nodes_batch, edges_batch, y_batch = get_graph_features_chainer(batch_df)
        feat_dict = {'adj_input' : edges_batch, 'nod_input': nodes_batch}
        return feat_dict, y_batch, None

    def on_epoch_end(self):
        np.random.shuffle(self.keys)

In [None]:
train_gen = Generator(tr_df, batch_size=BATCH_SIZE)
cv_gen = Generator(cv_df, batch_size=BATCH_SIZE)

In [None]:
%%time
## making sure the generator work
aux = train_gen.__getitem__(2)
in_edges_batch = aux[0]['adj_input']
nodes_batch = aux[0]['nod_input']
out_edges_batch = aux[1]

In [None]:
print(EDGE_DIM, NODE_DIM)
print(train_gen.__len__(), in_edges_batch.shape, nodes_batch.shape, out_edges_batch.shape)

## Message passer
Define the message passer like the Gilmer paper

Use a NN to embed edges as matrices, then matrix multiply with nodes.

In [None]:
class Message_Passer_NNM(tf.keras.layers.Layer):
    def __init__(self, node_dim):
        super(Message_Passer_NNM, self).__init__()
        self.node_dim = node_dim
        self.nn = tf.keras.layers.Dense(units=self.node_dim*self.node_dim, 
                                        activation = tf.nn.relu)
      
    def call(self, node_j, edge_ij):
        
        # Embed the edge as a matrix
        A = self.nn(edge_ij)
        
        # Reshape so matrix mult can be done
        A = tf.reshape(A, [-1, self.node_dim, self.node_dim])
        node_j = tf.reshape(node_j, [-1, self.node_dim, 1])
        
        # Multiply edge matrix by node and shape into message list
        messages = tf.linalg.matmul(A, node_j)
        messages = tf.reshape(messages, [-1, tf.shape(edge_ij)[1], self.node_dim])

        return messages

## Aggregator

Define the message aggregator (just sum)  
Probably overkill to have it as its own layer, but good if you want to replace it with something more complex


In [None]:
class Message_Agg(tf.keras.layers.Layer):
    def __init__(self):
        super(Message_Agg, self).__init__()
    
    def call(self, messages):
        return tf.math.reduce_sum(messages, 2)

## Update function

Define the Update function (a GRU)  
The GRU basically runs over a sequence of length 2, i.e. [ old state, agged_messages ]

In [None]:
class Update_Func_GRU(tf.keras.layers.Layer):
    def __init__(self, state_dim):
        super(Update_Func_GRU, self).__init__()
        self.concat_layer = tf.keras.layers.Concatenate(axis=1)
        self.GRU = tf.keras.layers.GRU(state_dim)
        
    def call(self, old_state, agg_messages):
    
        # Remember node dim
        n_nodes  = tf.shape(old_state)[1]
        node_dim = tf.shape(old_state)[2]
        
        # Reshape so GRU can be applied, concat so old_state and messages are in sequence
        old_state = tf.reshape(old_state, [-1, 1, tf.shape(old_state)[-1]])
        agg_messages = tf.reshape(agg_messages, [-1, 1, tf.shape(agg_messages)[-1]])
        concat = self.concat_layer([old_state, agg_messages])
        
        # Apply GRU and then reshape so it can be returned
        activation = self.GRU(concat)
        activation = tf.reshape(activation, [-1, n_nodes, node_dim])
        
        return activation

## Output layer

This is where the model diverges with the paper.   
As the paper predicts bulk properties, but we are interested in edges, we need something different.   

Here the each edge is concatenated to it's two nodes and a MLP is used to regress the scalar coupling for each edge

In [None]:
# Define the final output layer 
class Edge_Regressor(tf.keras.layers.Layer):
    def __init__(self, intermediate_dim):
        super(Edge_Regressor, self).__init__()
        self.concat_layer = tf.keras.layers.Concatenate()
        self.hidden_layer_1 = tf.keras.layers.Dense(units=intermediate_dim, activation=tf.nn.relu)
        self.hidden_layer_2 = tf.keras.layers.Dense(units=intermediate_dim, activation=tf.nn.relu)
        self.output_layer = tf.keras.layers.Dense(units=1, activation=None)

        
    def call(self, nodes, edges):
            
        # Remember node dims
        n_nodes  = tf.shape(nodes)[1]
        node_dim = tf.shape(nodes)[2]
        
        # Tile and reshape to match edges
        state_i = tf.reshape(tf.tile(nodes, [1, 1, n_nodes]),[-1,n_nodes*n_nodes, node_dim ])
        state_j = tf.tile(nodes, [1, n_nodes, 1])
        
        # concat edges and nodes and apply MLP
        concat = self.concat_layer([state_i, edges, state_j])
        activation_1 = self.hidden_layer_1(concat)  
        activation_2 = self.hidden_layer_2(activation_1)

        return self.output_layer(activation_2)

## Message passing layer

Put all of the above together to make a message passing layer which does one round of message passing and node updating

In [None]:
# Define a single message passing layer
class MP_Layer(tf.keras.layers.Layer):
    def __init__(self, state_dim):
        super(MP_Layer, self).__init__(self)
        self.message_passers  = Message_Passer_NNM(node_dim = state_dim) 
        self.message_aggs    = Message_Agg()
        self.update_functions = Update_Func_GRU(state_dim = state_dim)
        
        self.state_dim = state_dim         

    def call(self, nodes, edges, mask):
      
        n_nodes  = tf.shape(nodes)[1]
        node_dim = tf.shape(nodes)[2]
        
        state_j = tf.tile(nodes, [1, n_nodes, 1])

        messages  = self.message_passers(state_j, edges)

        # Do this to ignore messages from non-existant nodes
        masked =  tf.math.multiply(messages, mask)
        
        masked = tf.reshape(masked, [tf.shape(messages)[0], n_nodes, n_nodes, node_dim])

        agg_m = self.message_aggs(masked)
        
        updated_nodes = self.update_functions(nodes, agg_m)
        
        nodes_out = updated_nodes
        # Batch norm seems not to work. 
        #nodes_out = self.batch_norm(updated_nodes)
        
        return nodes_out

## Put it all together to form a MPNN

Defines the full mpnn that does T message passing steps, where T is a hyperparameter.   
As in the paper, the same MP layer is re-used, but this is not a requirement. 

In [None]:
adj_input = tf.keras.Input(shape=(None,), name='adj_input')
nod_input = tf.keras.Input(shape=(None,), name='nod_input')

class MPNN(tf.keras.Model):
    def __init__(self, out_int_dim, state_dim, T):
        super(MPNN, self).__init__(self)   
        self.T = T
        self.embed = tf.keras.layers.Dense(units=state_dim, activation=tf.nn.relu)
        self.MP = MP_Layer(state_dim)     
        self.edge_regressor  = Edge_Regressor(out_int_dim)
        #self.batch_norm = tf.keras.layers.BatchNormalization() 

        
    def call(self, inputs =  (adj_input, nod_input)):
      
      
        nodes = inputs['nod_input']
        edges = inputs['adj_input']

        # Get distances, and create mask wherever 0 (i.e. non-existant nodes)
        # This also masks node self-interactions...
        # This assumes distance is last
        len_edges = tf.shape(edges)[-1]
        
        _, x = tf.split(edges, [len_edges -1, 1], 2)
        mask =  tf.where(tf.equal(x, 0), x, tf.ones_like(x))
        
        # Embed node to be of the chosen node dimension (you can also just pad)
        nodes = self.embed(nodes) 
        
        #nodes = self.batch_norm(nodes)
        # Run the T message passing steps
        for mp in range(self.T):
            nodes =  self.MP(nodes, edges, mask)
        
        # Regress the output values
        con_edges = self.edge_regressor(nodes, edges)
           
        return con_edges
        

## Loss functions

In [None]:
def mse(orig , preds):
 
    # Mask values for which no scalar coupling exists
    mask  = tf.where(tf.equal(orig, 0), orig, tf.ones_like(orig))

    nums  = tf.boolean_mask(orig,  mask)
    preds = tf.boolean_mask(preds,  mask)

    reconstruction_error = tf.reduce_mean(tf.square(tf.subtract(nums, preds)))

    return reconstruction_error

def log_mse(orig , preds):
 
    # Mask values for which no scalar coupling exists
    mask  = tf.where(tf.equal(orig, 0), orig, tf.ones_like(orig))
    nums  = tf.boolean_mask(orig,  mask)
    preds = tf.boolean_mask(preds,  mask)

    reconstruction_error = tf.math.log(tf.reduce_mean(tf.square(tf.subtract(nums, preds))))

    return reconstruction_error

In [None]:
def mae(orig , preds):
 
    # Mask values for which no scalar coupling exists
    mask  = tf.where(tf.equal(orig, 0), orig, tf.ones_like(orig))

    nums  = tf.boolean_mask(orig,  mask)
    preds = tf.boolean_mask(preds,  mask)

    reconstruction_error = SCALE_NORM*tf.reduce_mean(tf.abs(tf.subtract(nums, preds)))

    return reconstruction_error

def log_mae(orig , preds):
 
    # Mask values for which no scalar coupling exists
    mask  = tf.where(tf.equal(orig, 0), orig, tf.ones_like(orig))

    nums  = tf.boolean_mask(orig,  mask)
    preds = tf.boolean_mask(preds,  mask)

    reconstruction_error = tf.math.log(SCALE_NORM*tf.reduce_mean(tf.abs(tf.subtract(nums, preds))))

    return reconstruction_error

## Define some callbacks, the initial learning rate and the optimizer

In [None]:
def step_decay(epoch):
    initial_lrate = LEARNING_RATE
    drop = 0.2
    epochs_drop = 5.0
    lrate = initial_lrate * np.power(drop,  np.floor((epoch)/epochs_drop))
    tf.print("Learning rate: ", lrate)
    return lrate

lrate = tf.keras.callbacks.LearningRateScheduler(step_decay)
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_mean_absolute_error', 
                                              patience = 3, restore_best_weights=True)
#lrate  =  tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1,
#                              patience=5, min_lr=1e-7, verbose = 1)

opt = tf.optimizers.Adam(learning_rate=LEARNING_RATE)

In [None]:
mpnn = MPNN(out_int_dim = 1024, state_dim = 128, T = 4)
mpnn.compile(opt, log_mae, metrics = [mae])

In [None]:
if DEBUG:
    epochs = 2
    verbose = 1
    steps_per_epoch = 200
    validation_steps = 20
else:
    epochs = 10
    verbose = 2
#     steps_per_epoch = train_gen.__len__()
#     validation_steps = cv_gen.__len__()
    steps_per_epoch = 3000
    validation_steps = 300

In [None]:
# Call once to initialize the model such that the weight can be loaded
mpnn.call(aux[0]);

In [None]:
if not RETRAIN:
    mpnn.load_weights(f'{MODEL_FOLDER}/mpnn_fit_gen.hdf5')
    print("Previously trained model loaded...")
else:
    print("Training from scratch...")

In [None]:
mpnn.fit_generator(train_gen, 
         validation_data = cv_gen,
         epochs = epochs, 
         steps_per_epoch = steps_per_epoch,
         validation_steps = validation_steps,
         callbacks = [lrate, stop_early], 
         use_multiprocessing = True, 
         initial_epoch = 0, verbose = verbose
         )

In [None]:
mpnn.save_weights('mpnn_fit_gen.hdf5')

In [None]:
mpnn.summary()

## CV

In [None]:
def make_outs(df_group, preds):
    x = np.array([])
    N = df_group.ngroups
    for df_gp, preds in zip(df_group, preds):
        gp = df_gp[1]
        x = np.append(x, (preds[gp['atom_index_0'].values, gp['atom_index_1'].values] \
                        + preds[gp['atom_index_1'].values, gp['atom_index_0'].values])/2.0)
    return x

In [None]:
%%time
if not DEBUG:
    cv_group = cv_df.groupby('molecule_name')
    cv_preds = np.array([])
    cv_y = np.array([])
    cv_group_keys = list(cv_group.groups.keys())
    
    for i in tqdm(range(0,cv_group.ngroups,BATCH_SIZE)):
        batch_keys = cv_group_keys[i:i+BATCH_SIZE]
        batch_group = pd.concat([cv_group.get_group(key) \
                      for key in batch_keys]).groupby('molecule_name')
        
        nodes_batch, in_edges_batch, out_edges_batch = get_graph_features_chainer(batch_group)
        
        cv_preds_batch = mpnn.predict({'adj_input': in_edges_batch, 
                                       'nod_input': nodes_batch})
        
        cv_preds_batch = cv_preds_batch.reshape((-1,MAX_SIZE, MAX_SIZE))
        cv_y_batch = out_edges_batch.reshape((-1,MAX_SIZE, MAX_SIZE))
        
        cv_preds_unscaled = make_outs(batch_group, cv_preds_batch)
        cv_y_unscaled = make_outs(batch_group, cv_y_batch)
        
        cv_preds = np.append(cv_preds, cv_preds_unscaled*SCALE_NORM + SCALE_MID)
        cv_y = np.append(cv_y, cv_y_unscaled*SCALE_NORM + SCALE_MID)

In [None]:
if not DEBUG:
    mae_type = pd.DataFrame(np.zeros((1,len(ALL_TYPES))), columns=ALL_TYPES, dtype=np.float64)

    for t in ALL_TYPES:
        y_cv_t = cv_df.loc[cv_df['type'] == t].scalar_coupling_constant*SCALE_NORM + SCALE_MID
        cv_preds_t = cv_preds[cv_df['type'] == t]
        mae_type[t] = mean_absolute_error(y_cv_t, cv_preds_t)
        print(f"MAE for {t} with {len(y_cv_t):d} CV samples is {mae_type[t].values[0]:.5f}.")

    cv_score = (np.log(mae_type)).mean(axis=1)[0]
    print(f"\nGroup mean log MAE is {cv_score:.4f}.")

## Predict and submit

In [None]:
%%time

#### to be updated as of v4
if PREDICT:
    test = pd.read_csv(f"{INPUT_FOLDER}/test.csv")
    
    test_mol_names= test['molecule_name'].unique()
    
    angs = pd.read_csv('../input/angle-and-dihedral-for-the-champs-structures/angles.csv')
    test_angs = angs.loc[angs['molecule_name'].isin(test_mol_names)].reset_index(drop=True)

    del angs
    gc.collect();

    test_angs['dihedral'] = test_angs['dihedral']/np.pi
    test_angs['shortest_path_n_bonds'] = test_angs['shortest_path_n_bonds']/6.0
    test_angs = test_angs.fillna(0.0)
    test_angs_group = test_angs.groupby('molecule_name')
    
    test['scalar_coupling_constant'] = 0
    ANGS_GROUP = test.groupby('molecule_name')
    
    preds = np.array([])
    
    test_group_keys = list(test_group.groups.keys())
    
    for i in tqdm(range(0,test_group.ngroups,BATCH_SIZE)):
        batch_keys = test_group_keys[i:i+BATCH_SIZE]
        batch_group = pd.concat([test_group.get_group(key) \
                      for key in batch_keys]).groupby('molecule_name')
        
        nodes_batch, in_edges_batch, _ = get_graph_features_chainer(batch_group)
        
        preds_batch = mpnn.predict({'adj_input': in_edges_batch, 
                                    'nod_input': nodes_batch})
        
        preds_batch = preds_batch.reshape((-1,MAX_SIZE, MAX_SIZE))
        
        preds_unscaled = make_outs(batch_group, preds_batch)
        
        preds = np.append(preds, preds_unscaled*SCALE_NORM + SCALE_MID)
    
    
    test['scalar_coupling_constant'] = preds
    test[['id','scalar_coupling_constant']].to_csv(f'mpnn_sub_{cv_score:.4f}.csv', index=False)