In [2]:
import pandas as pd
import numpy as np
from rdkit import Chem
from nfp.preprocessing import MolAPreprocessor, GraphSequence

import keras
import keras.backend as K


from keras.callbacks import ModelCheckpoint, CSVLogger, LearningRateScheduler

from keras.layers import (Input, Embedding, Dense, BatchNormalization,
                                 Concatenate, Multiply, Add)

from keras.models import Model, load_model

from nfp.layers import (MessageLayer, GRUStep, Squeeze, EdgeNetwork,
                               ReduceBondToPro, ReduceBondToAtom, GatherAtomToBond, ReduceAtomToPro)
from nfp.models import GraphModel
from cascade.apply import predict_NMR_C,predict_NMR_H



In [3]:
import os

In [4]:
modelpath_C = os.path.join('cascade', 'trained_model', 'best_model.hdf5')
modelpath_H = os.path.join('cascade', 'trained_model', 'best_model_H_DFTNN.hdf5')

batch_size = 32
atom_means = pd.Series(np.array([0,0,97.74193,0,0,0,0,0,0,0]).astype(np.float64), name='shift')
NMR_model_C = load_model(modelpath_C, custom_objects={'GraphModel': GraphModel,
                                             'ReduceAtomToPro': ReduceAtomToPro,
                                             'Squeeze': Squeeze,
                                             'GatherAtomToBond': GatherAtomToBond,
                                             'ReduceBondToAtom': ReduceBondToAtom})
NMR_model_H = load_model(modelpath_H, custom_objects={'GraphModel': GraphModel,
                                             'ReduceAtomToPro': ReduceAtomToPro,
                                             'Squeeze': Squeeze,
                                             'GatherAtomToBond': GatherAtomToBond,
                                             'ReduceBondToAtom': ReduceBondToAtom})
# NMR_model_C.summary()
# NMR_model_H.summary()

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [5]:
#Loading DATA/
data = pd.read_csv('test.csv')
data.columns

Index(['smiles'], dtype='object')

In [6]:
# C predicting NMR
pred_data_C = pd.DataFrame()
for i, smiles in enumerate(data.smiles.unique()):
    try:
        mols, weightedPrediction, spreadShift = predict_NMR_C(smiles, NMR_model_C)
        weightedPrediction['SMILES'] = smiles
        pred_data_C = pd.concat([pred_data_C,weightedPrediction])
    except:
        pass

1it [00:00, 1103.76it/s]

   ------------------------------------------------------------------------------------------------------------------
   |    FULL_MONTE search                                                                                           |
   | o  EWIN: 10.0 kcal/mol                                                                                         |
   | o  MCNV: 2 ROTATABLE BONDS                                                                                     |
   | o  STEP: 200 (ESTIMATED CONFORMER SPACE: 8)                                                                    |
   ------------------------------------------------------------------------------------------------------------------

o  FILTERING CONFORMERS BY ENERGY CUTOFF: 10.0 kcal/mol
   KEEPING 200 CONFORMERS
o  FILTERING CONFORMERS BY RMS: 0.5
   KEEPING 1 CONFORMERS





   ------------------------------------------------------------------------------------------------------------------
   |    FULL_MONTE search                                                                                           |
   | o  EWIN: 10.0 kcal/mol                                                                                         |
   | o  MCNV: 3 ROTATABLE BONDS                                                                                     |
   | o  STEP: 200 (ESTIMATED CONFORMER SPACE: 27)                                                                   |
   ------------------------------------------------------------------------------------------------------------------



2it [00:00, 1317.72it/s]

o  FILTERING CONFORMERS BY ENERGY CUTOFF: 10.0 kcal/mol
   KEEPING 200 CONFORMERS
o  FILTERING CONFORMERS BY RMS: 0.5
   KEEPING 2 CONFORMERS
   ------------------------------------------------------------------------------------------------------------------
   |    FULL_MONTE search                                                                                           |
   | o  EWIN: 10.0 kcal/mol                                                                                         |
   | o  MCNV: 4 ROTATABLE BONDS                                                                                     |
   | o  STEP: 200 (ESTIMATED CONFORMER SPACE: 64)                                                                   |
   ------------------------------------------------------------------------------------------------------------------




4it [00:00, 1225.96it/s]

o  FILTERING CONFORMERS BY ENERGY CUTOFF: 10.0 kcal/mol
   KEEPING 200 CONFORMERS
o  FILTERING CONFORMERS BY RMS: 0.5
   KEEPING 4 CONFORMERS





In [7]:
# H predicitions
pred_data_H = pd.DataFrame()
for i, smiles in enumerate(data.smiles.unique()):
    try:
        mols, weightedPrediction, spreadShift = predict_NMR_H(smiles, NMR_model_H)
        weightedPrediction['SMILES'] = smiles
        pred_data_H = pd.concat([pred_data_H, weightedPrediction])
    except:
        pass

   ------------------------------------------------------------------------------------------------------------------
   |    FULL_MONTE search                                                                                           |
   | o  EWIN: 10.0 kcal/mol                                                                                         |
   | o  MCNV: 2 ROTATABLE BONDS                                                                                     |
   | o  STEP: 200 (ESTIMATED CONFORMER SPACE: 8)                                                                    |
   ------------------------------------------------------------------------------------------------------------------



1it [00:00, 1373.38it/s]

o  FILTERING CONFORMERS BY ENERGY CUTOFF: 10.0 kcal/mol
   KEEPING 200 CONFORMERS
o  FILTERING CONFORMERS BY RMS: 0.5
   KEEPING 1 CONFORMERS





   ------------------------------------------------------------------------------------------------------------------
   |    FULL_MONTE search                                                                                           |
   | o  EWIN: 10.0 kcal/mol                                                                                         |
   | o  MCNV: 3 ROTATABLE BONDS                                                                                     |
   | o  STEP: 200 (ESTIMATED CONFORMER SPACE: 27)                                                                   |
   ------------------------------------------------------------------------------------------------------------------



2it [00:00, 1318.34it/s]

o  FILTERING CONFORMERS BY ENERGY CUTOFF: 10.0 kcal/mol
   KEEPING 200 CONFORMERS
o  FILTERING CONFORMERS BY RMS: 0.5
   KEEPING 2 CONFORMERS
   ------------------------------------------------------------------------------------------------------------------
   |    FULL_MONTE search                                                                                           |
   | o  EWIN: 10.0 kcal/mol                                                                                         |
   | o  MCNV: 4 ROTATABLE BONDS                                                                                     |
   | o  STEP: 200 (ESTIMATED CONFORMER SPACE: 64)                                                                   |
   ------------------------------------------------------------------------------------------------------------------




4it [00:00, 1200.86it/s]

o  FILTERING CONFORMERS BY ENERGY CUTOFF: 10.0 kcal/mol
   KEEPING 200 CONFORMERS
o  FILTERING CONFORMERS BY RMS: 0.5
   KEEPING 4 CONFORMERS





In [8]:
pred_data_C

Unnamed: 0,mol_id,atom_index,Shift,SMILES
0,0,1,16.26,CCC
1,0,2,17.05,CCC
2,0,3,16.26,CCC
0,0,1,13.56,CCCC
1,0,2,25.0,CCCC
2,0,3,25.0,CCCC
3,0,4,13.56,CCCC
0,0,1,13.39,CCCCC
1,0,2,23.26,CCCCC
2,0,3,34.18,CCCCC


In [9]:
pred_data_H

Unnamed: 0,mol_id,atom_index,Shift,SMILES
0,0,4,1.02,CCC
1,0,5,1.29,CCC
2,0,6,1.02,CCC
3,0,7,1.46,CCC
4,0,8,1.46,CCC
5,0,9,1.29,CCC
6,0,10,1.02,CCC
7,0,11,1.02,CCC
0,0,5,0.94,CCCC
1,0,6,1.24,CCCC
