In [4]:
import torch
import torch_geometric
from datasets.dataset_2D import Dataset_2D
from models.models_2d import GNN2D

import pandas as pd
import numpy as np
import rdkit
import rdkit.Chem
import rdkit.Chem.AllChem
from rdkit import Chem
from tqdm import tqdm
from copy import deepcopy
import random
import re
import os
import shutil
import argparse
import sys

from utils import *

In [21]:
device = torch.device("cpu") # Do not try to run on a GPU.
N_cpus = os.cpu_count() # Set as high as possible (given machine limits) 

In [22]:
# supported descriptors
properties = {
    'acids_bond': [
        'IR_freq', 
        'Sterimol_B1', 
        'Sterimol_B5', 
        'Sterimol_L',
    ],
    'acids_mol': [
        'dipole',
        'HOMO',
        'LUMO', 
        'polar_aniso',
        'polar_iso',
        'SASA_surface_area', 
        'SASA_volume',
    ],
    'acids_atom': [
        'C1_NBO_charge',
        'C1_NMR_shift',
        'C1_V_bur',
        'C4_NBO_charge',
        'C4_NMR_shift',
        'C4_V_bur',
        'O2_NBO_charge',
        'O3_NBO_charge',
        'H5_NBO_charge',
        'H5_NMR_shift',
    ],

    'pamine_mol': [
        'dipole',
        'HOMO',
        'LUMO', 
        'polar_aniso',
        'polar_iso',
        'SASA_surface_area', 
        'SASA_volume',
    ],
    'pamine_bond': [
        'Sterimol_B1',
        'Sterimol_B5',
        'Sterimol_L',
    ],
    'pamine_atom': [
        'N1_NBO_charge',
        'N1_NMR_shift',
        'N1_pyrd_Ar',
        'N1_NBO_LP_energy',
        'N1_NBO_LP_occupancy',
        'N1_V_bur',
        'C2_NBO_charge',
        'C2_NMR_shift',
        'H3_NBO_charge_avg',
        'H3_NBO_charge_min',
        'H3_NMR_shift_avg',
        'H4_NBO_charge_avg',
        'H4_NBO_charge_min',
        'H4_NMR_shift_avg'
    ],
    
    'samine_mol': [
        'dipole',
        'HOMO',
        'LUMO', 
        'polar_aniso',
        'polar_iso',
        'SASA_surface_area', 
        'SASA_volume',
    ],
    'samine_atom': [
        'N1_NBO_charge',
        'N1_NMR_shift',
        'N1_pyrd_Ar',
        'N1_NBO_LP_energy',
        'N1_NBO_LP_occupancy',
        'N1_V_bur',
        'H4_NBO_charge',
        'H4_NMR_shift',
    ],
}

property_aggregations = ['boltz', 'max', 'min', 'min_E']

In [23]:
## supported models - creating paths to models automatically (will choose combined models when appropriate)
model_dictionary = {}
atom_selection_dictionary = {}

for key in properties.keys():
    for prop in properties[key]:
        for agg in property_aggregations:
            
            ###model_dictionary
            entry = (key.split('_')[0], key.split('_')[1], prop, agg)
            
            if key.split('_')[0] =='acids':
                model_dictionary[entry] = f'trained_models/acids/{prop}/{agg}/model_best.pt'
            
            if key.split('_')[0] =='pamine' and key.split('_')[1] =='mol':
                model_dictionary[entry] = f'trained_models/combined_amines/{prop}/{agg}/model_best.pt'
            
            if key.split('_')[0] =='pamine' and key.split('_')[1] =='bond':
                model_dictionary[entry] = f'trained_models/amines/{prop}/{agg}/model_best.pt'
            
            if key.split('_')[0] =='pamine' and key.split('_')[1] =='atom':
                atom_type = prop.split('_')[0]
                if atom_type == 'N1':
                    model_dictionary[entry] = f'trained_models/combined_amines/{prop}/{agg}/model_best.pt'
                if atom_type == 'C2':
                    model_dictionary[entry] = f'trained_models/amines/{prop}/{agg}/model_best.pt'
                if atom_type in ['H3','H4']:
                    model_dictionary[entry] = f'trained_models/amines/{prop}/{agg}/model_best.pt'
                    
            if key.split('_')[0] =='samine' and key.split('_')[1] =='mol':
                model_dictionary[entry] = f'trained_models/combined_amines/{prop}/{agg}/model_best.pt'
            
            if key.split('_')[0] =='samine' and key.split('_')[1] =='atom':
                atom_type = prop.split('_')[0]
                if atom_type == 'N1':
                    model_dictionary[entry] = f'trained_models/combined_amines/{prop}/{agg}/model_best.pt'
                if atom_type in ['H4']:
                    model_dictionary[entry] = f'trained_models/sec_amines/{prop}/{agg}/model_best.pt'
                    
            ###atom_selection_dictionary
            if key.split('_')[1] in ['atom']:
                entry_atom = (key.split('_')[0], key.split('_')[1], prop)
                atom_selection_dictionary[entry_atom] = prop.split('_')[0]
            
            if key.split('_')[0] =='acids' and key.split('_')[1] == 'bond' and prop == 'IR_freq':
                entry_atom = (key.split('_')[0], key.split('_')[1], prop)
                atom_selection_dictionary[entry_atom] = ('C1', 'O2')
            
            if key.split('_')[0] =='acids' and key.split('_')[1] == 'bond' and prop in ['Sterimol_B1','Sterimol_B5','Sterimol_L']:
                entry_atom = (key.split('_')[0], key.split('_')[1], prop)
                atom_selection_dictionary[entry_atom] = ('C1', 'C4')
            
            if key.split('_')[0] =='pamine' and key.split('_')[1] == 'bond':
                entry_atom = (key.split('_')[0], key.split('_')[1], prop)
                atom_selection_dictionary[entry_atom] = ('N1', 'C2')

In [24]:
# Enter your SMILES here!

In [25]:
molecule_type = 'acids'

# list of SMILES of the molecule_type (list must include all amines or all acids)
smiles_list = [
    'O=C(O)[C@H](Br)CBr', 
    'CCCCCCCCCCCCCCCCCCCCCC(=O)O',
    'C=C(CC)C(=O)c1ccc(OCC(=O)O)c(Cl)c1Cl', 
]

# SELECT ALL MODELED PROPERTIES
acid_model_selections = [key for key in model_dictionary if key[0] == molecule_type] 

# # ... OR MANUALLY SELECT A SUBSET
# acid_model_selections = [
#     ('acids', 'atom', 'C1_V_bur', 'boltz'),
#     ('acids', 'atom', 'C1_NBO_charge', 'boltz'),
#     ('acids', 'bond', 'IR_freq', 'boltz'), 
# ]

# generate mols for prediction 
acid_dataframe = generate_dataframe(smiles_list, molecule_type, N_cpus = N_cpus)
acid_dataframe

Unnamed: 0,mols,mols_noHs,smiles,canon_smiles,C1,O2,O3,C4,H5
0,<rdkit.Chem.rdchem.Mol object at 0x1554ff1d1fc0>,<rdkit.Chem.rdchem.Mol object at 0x1554ffb18a60>,O=C(O)[C@H](Br)CBr,O=C(O)[C@H](Br)CBr,1,0,2,3,7
1,<rdkit.Chem.rdchem.Mol object at 0x1554ff1d2020>,<rdkit.Chem.rdchem.Mol object at 0x1554ffb18220>,CCCCCCCCCCCCCCCCCCCCCC(=O)O,CCCCCCCCCCCCCCCCCCCCCC(=O)O,21,22,23,20,67
2,<rdkit.Chem.rdchem.Mol object at 0x1554ff1d2080>,<rdkit.Chem.rdchem.Mol object at 0x1554ffb18520>,C=C(CC)C(=O)c1ccc(OCC(=O)O)c(Cl)c1Cl,C=C(CC)C(=O)c1ccc(OCC(=O)O)c(Cl)c1Cl,12,13,14,11,30


In [26]:
molecule_type = 'pamine'

# list of SMILES of the molecule_type (list must include all amines or all acids)
smiles_list = [
    'NCC1CCCCC1', 
    'CC(C)(C)C(N)CC1CCCCC1',
]

# SELECT ALL MODELED PROPERTIES
#amine_model_selections = [key for key in model_dictionary if key[0] == molecule_type] 

# ... OR MANUALLY SELECT A SUBSET
amine_model_selections = [
    ('pamine', 'bond', 'Sterimol_B1', 'boltz'),
    ('pamine', 'atom', 'N1_NBO_LP_energy', 'max'),
    ('pamine', 'atom', 'N1_pyrd_Ar', 'boltz'),
    ('pamine', 'mol', 'HOMO', 'boltz'),
    
]
# generate mols for prediction 
amine_dataframe = generate_dataframe(smiles_list, molecule_type, N_cpus = N_cpus)
amine_dataframe

Unnamed: 0,mols,mols_noHs,smiles,canon_smiles,N1,C2,H3,H4
0,<rdkit.Chem.rdchem.Mol object at 0x1554ff1d3160>,<rdkit.Chem.rdchem.Mol object at 0x1554ff32f340>,NCC1CCCCC1,NCC1CCCCC1,0,1,8,9
1,<rdkit.Chem.rdchem.Mol object at 0x1554ff1d3040>,<rdkit.Chem.rdchem.Mol object at 0x1554ff32c640>,CC(C)(C)C(N)CC1CCCCC1,CC(C)(C)C(N)CC1CCCCC1,5,4,23,24


In [27]:
molecule_type = 'samine'

# list of SMILES of the molecule_type (list must include all amines or all acids)
smiles_list = [
    'Cc1cccc2c1CNCC2', 
    'Cc1cc(N2CCNCC2)c2ccccc2n1',
    'CN[C@H](C)c1ccccn1',
]

# SELECT ALL MODELED PROPERTIES
#sec_amine_model_selections = [key for key in model_dictionary if key[0] == molecule_type] 

# ... OR MANUALLY SELECT A SUBSET
sec_amine_model_selections = [
    ('samine', 'atom', 'N1_pyrd_Ar', 'boltz'),
    ('samine', 'atom', 'H4_NBO_charge', 'boltz'),
    ('samine', 'atom', 'N1_V_bur', 'min_E'),
    ('samine', 'mol', 'HOMO', 'boltz'),
    
]

# generate mols for prediction 
sec_amine_dataframe = generate_dataframe(smiles_list, molecule_type, N_cpus = N_cpus)
sec_amine_dataframe

Unnamed: 0,mols,mols_noHs,smiles,canon_smiles,N1,H4,C1,C2
0,<rdkit.Chem.rdchem.Mol object at 0x1554ff1d3880>,<rdkit.Chem.rdchem.Mol object at 0x1554ff32ea40>,Cc1cccc2c1CNCC2,Cc1cccc2c1CNCC2,8,19,7,9
1,<rdkit.Chem.rdchem.Mol object at 0x1554ff1d3e20>,<rdkit.Chem.rdchem.Mol object at 0x1554ff32eb60>,Cc1cc(N2CCNCC2)c2ccccc2n1,Cc1cc(N2CCNCC2)c2ccccc2n1,7,25,6,8
2,<rdkit.Chem.rdchem.Mol object at 0x1554ff1d3a00>,<rdkit.Chem.rdchem.Mol object at 0x1554ff32e800>,CN[C@H](C)c1ccccn1,CN[C@H](C)c1ccccn1,1,13,0,2


In [28]:
# ACIDS

# loading models and making predictions for each descriptor in model_selections

acid_results_dict = {}
model_selections = acid_model_selections
test_dataframe = acid_dataframe

for model_selection in model_selections:
    
    print(f'loading model: {model_selection}') # this can take a few seconds
    model = load_model(*model_selection, model_dictionary)
    
    keep_explicit_hydrogens = True
    remove_Hs_except_functional = True
    
    print('    making predictions:')
    predictions = make_predictions(
        test_dataframe, 
        model, 
        model_selection, 
        atom_selection_dictionary, 
        keep_explicit_hydrogens = keep_explicit_hydrogens,
        remove_Hs_except_functional = remove_Hs_except_functional,
        device = torch.device("cpu"),
        
    )
    # saving results
    acid_results_dict[tuple(model_selection)] = predictions

acid_results_df = pd.DataFrame(acid_results_dict)
acid_results_df

loading model: ('acids', 'bond', 'IR_freq', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 65.53it/s]


loading model: ('acids', 'bond', 'IR_freq', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 68.79it/s]


loading model: ('acids', 'bond', 'IR_freq', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 66.11it/s]


loading model: ('acids', 'bond', 'IR_freq', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.54it/s]


loading model: ('acids', 'bond', 'Sterimol_B1', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.47it/s]


loading model: ('acids', 'bond', 'Sterimol_B1', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.14it/s]


loading model: ('acids', 'bond', 'Sterimol_B1', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.71it/s]

loading model: ('acids', 'bond', 'Sterimol_B1', 'min_E')





    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.60it/s]


loading model: ('acids', 'bond', 'Sterimol_B5', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 69.57it/s]


loading model: ('acids', 'bond', 'Sterimol_B5', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.22it/s]


loading model: ('acids', 'bond', 'Sterimol_B5', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.36it/s]


loading model: ('acids', 'bond', 'Sterimol_B5', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 68.05it/s]


loading model: ('acids', 'bond', 'Sterimol_L', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.72it/s]


loading model: ('acids', 'bond', 'Sterimol_L', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.60it/s]


loading model: ('acids', 'bond', 'Sterimol_L', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 69.36it/s]


loading model: ('acids', 'bond', 'Sterimol_L', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 68.77it/s]


loading model: ('acids', 'mol', 'dipole', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 72.05it/s]


loading model: ('acids', 'mol', 'dipole', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.47it/s]


loading model: ('acids', 'mol', 'dipole', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 72.17it/s]


loading model: ('acids', 'mol', 'dipole', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.64it/s]


loading model: ('acids', 'mol', 'HOMO', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 72.45it/s]


loading model: ('acids', 'mol', 'HOMO', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.71it/s]


loading model: ('acids', 'mol', 'HOMO', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.05it/s]


loading model: ('acids', 'mol', 'HOMO', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 68.78it/s]


loading model: ('acids', 'mol', 'LUMO', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.45it/s]


loading model: ('acids', 'mol', 'LUMO', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 72.01it/s]


loading model: ('acids', 'mol', 'LUMO', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.43it/s]


loading model: ('acids', 'mol', 'LUMO', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 72.14it/s]


loading model: ('acids', 'mol', 'polar_aniso', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.95it/s]


loading model: ('acids', 'mol', 'polar_aniso', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.11it/s]


loading model: ('acids', 'mol', 'polar_aniso', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.59it/s]


loading model: ('acids', 'mol', 'polar_aniso', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 72.00it/s]


loading model: ('acids', 'mol', 'polar_iso', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 72.06it/s]


loading model: ('acids', 'mol', 'polar_iso', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.11it/s]


loading model: ('acids', 'mol', 'polar_iso', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 69.97it/s]


loading model: ('acids', 'mol', 'polar_iso', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.57it/s]


loading model: ('acids', 'mol', 'SASA_surface_area', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 72.48it/s]


loading model: ('acids', 'mol', 'SASA_surface_area', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.67it/s]


loading model: ('acids', 'mol', 'SASA_surface_area', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 72.29it/s]


loading model: ('acids', 'mol', 'SASA_surface_area', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.54it/s]


loading model: ('acids', 'mol', 'SASA_volume', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.60it/s]


loading model: ('acids', 'mol', 'SASA_volume', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.98it/s]


loading model: ('acids', 'mol', 'SASA_volume', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 72.56it/s]


loading model: ('acids', 'mol', 'SASA_volume', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.55it/s]


loading model: ('acids', 'atom', 'C1_NBO_charge', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.38it/s]


loading model: ('acids', 'atom', 'C1_NBO_charge', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.14it/s]


loading model: ('acids', 'atom', 'C1_NBO_charge', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.35it/s]


loading model: ('acids', 'atom', 'C1_NBO_charge', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.12it/s]


loading model: ('acids', 'atom', 'C1_NMR_shift', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.19it/s]


loading model: ('acids', 'atom', 'C1_NMR_shift', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.58it/s]


loading model: ('acids', 'atom', 'C1_NMR_shift', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.25it/s]


loading model: ('acids', 'atom', 'C1_NMR_shift', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.36it/s]


loading model: ('acids', 'atom', 'C1_V_bur', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.61it/s]


loading model: ('acids', 'atom', 'C1_V_bur', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.05it/s]


loading model: ('acids', 'atom', 'C1_V_bur', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 69.97it/s]


loading model: ('acids', 'atom', 'C1_V_bur', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.49it/s]


loading model: ('acids', 'atom', 'C4_NBO_charge', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.97it/s]


loading model: ('acids', 'atom', 'C4_NBO_charge', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.81it/s]


loading model: ('acids', 'atom', 'C4_NBO_charge', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.97it/s]


loading model: ('acids', 'atom', 'C4_NBO_charge', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.64it/s]


loading model: ('acids', 'atom', 'C4_NMR_shift', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.73it/s]


loading model: ('acids', 'atom', 'C4_NMR_shift', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 67.06it/s]


loading model: ('acids', 'atom', 'C4_NMR_shift', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.19it/s]


loading model: ('acids', 'atom', 'C4_NMR_shift', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.93it/s]


loading model: ('acids', 'atom', 'C4_V_bur', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.91it/s]


loading model: ('acids', 'atom', 'C4_V_bur', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.68it/s]


loading model: ('acids', 'atom', 'C4_V_bur', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 66.08it/s]


loading model: ('acids', 'atom', 'C4_V_bur', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.90it/s]


loading model: ('acids', 'atom', 'O2_NBO_charge', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 69.54it/s]


loading model: ('acids', 'atom', 'O2_NBO_charge', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.92it/s]


loading model: ('acids', 'atom', 'O2_NBO_charge', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.21it/s]


loading model: ('acids', 'atom', 'O2_NBO_charge', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.33it/s]


loading model: ('acids', 'atom', 'O3_NBO_charge', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 68.44it/s]


loading model: ('acids', 'atom', 'O3_NBO_charge', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 69.97it/s]


loading model: ('acids', 'atom', 'O3_NBO_charge', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.13it/s]


loading model: ('acids', 'atom', 'O3_NBO_charge', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.53it/s]


loading model: ('acids', 'atom', 'H5_NBO_charge', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.96it/s]


loading model: ('acids', 'atom', 'H5_NBO_charge', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.82it/s]


loading model: ('acids', 'atom', 'H5_NBO_charge', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 69.81it/s]


loading model: ('acids', 'atom', 'H5_NBO_charge', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 69.34it/s]


loading model: ('acids', 'atom', 'H5_NMR_shift', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.98it/s]


loading model: ('acids', 'atom', 'H5_NMR_shift', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 70.38it/s]


loading model: ('acids', 'atom', 'H5_NMR_shift', 'min')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.21it/s]


loading model: ('acids', 'atom', 'H5_NMR_shift', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 71.26it/s]


Unnamed: 0_level_0,acids,acids,acids,acids,acids,acids,acids,acids,acids,acids,acids,acids,acids,acids,acids,acids,acids,acids,acids,acids,acids
Unnamed: 0_level_1,bond,bond,bond,bond,bond,bond,bond,bond,bond,bond,...,atom,atom,atom,atom,atom,atom,atom,atom,atom,atom
Unnamed: 0_level_2,IR_freq,IR_freq,IR_freq,IR_freq,Sterimol_B1,Sterimol_B1,Sterimol_B1,Sterimol_B1,Sterimol_B5,Sterimol_B5,...,O3_NBO_charge,O3_NBO_charge,H5_NBO_charge,H5_NBO_charge,H5_NBO_charge,H5_NBO_charge,H5_NMR_shift,H5_NMR_shift,H5_NMR_shift,H5_NMR_shift
Unnamed: 0_level_3,boltz,max,min,min_E,boltz,max,min,min_E,boltz,max,...,min,min_E,boltz,max,min,min_E,boltz,max,min,min_E
0,1848.094849,1882.181519,1839.129028,1851.907593,2.246075,2.513149,1.970513,2.42275,3.92618,4.57984,...,-0.691587,-0.671679,0.50261,0.50536,0.501675,0.502349,25.416916,25.558514,25.275196,25.449743
1,1835.389893,1845.921265,1815.270264,1838.789551,1.986166,3.310197,1.791027,2.037996,15.811618,21.40917,...,-0.704256,-0.68743,0.491969,0.501301,0.491283,0.492666,25.721203,25.740025,24.982731,25.860966
2,1869.24292,1887.110962,1849.342773,1876.880371,1.984187,2.034909,1.863132,2.018414,9.834859,11.564659,...,-0.685773,-0.685293,0.500887,0.502955,0.492845,0.497004,25.144413,25.834415,23.551687,25.108082


In [10]:
# AMINES

# loading models and making predictions for each descriptor in model_selections

pamine_results_dict = {}
model_selections = amine_model_selections
test_dataframe = amine_dataframe

for model_selection in model_selections:
    
    print(f'loading model: {model_selection}') # this can take a few seconds
    model = load_model(*model_selection, model_dictionary)
    
    keep_explicit_hydrogens = True
    remove_Hs_except_functional = True
    
    print('    making predictions:')
    predictions = make_predictions(
        test_dataframe, 
        model, 
        model_selection, 
        atom_selection_dictionary, 
        keep_explicit_hydrogens = keep_explicit_hydrogens,
        remove_Hs_except_functional = remove_Hs_except_functional,
        device = torch.device("cpu"),
        
    )
    
    # saving results
    pamine_results_dict[tuple(model_selection)] = predictions


## results
pamine_results_df = pd.DataFrame(pamine_results_dict)
pamine_results_df

loading model: ('pamine', 'bond', 'Sterimol_B1', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 56.71it/s]


loading model: ('pamine', 'atom', 'N1_NBO_LP_energy', 'max')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 59.32it/s]


loading model: ('pamine', 'atom', 'N1_pyrd_Ar', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 59.92it/s]


loading model: ('pamine', 'mol', 'HOMO', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 62.95it/s]


Unnamed: 0_level_0,pamine,pamine,pamine,pamine
Unnamed: 0_level_1,bond,atom,atom,mol
Unnamed: 0_level_2,Sterimol_B1,N1_NBO_LP_energy,N1_pyrd_Ar,HOMO
Unnamed: 0_level_3,boltz,max,boltz,boltz
0,1.786983,-0.348685,0.791019,-0.30377
1,2.462303,-0.351953,0.784781,-0.301559


In [11]:
# SECONDARY AMINES

# loading models and making predictions for each descriptor in model_selections

samine_results_dict = {}
model_selections = sec_amine_model_selections
test_dataframe = sec_amine_dataframe

for model_selection in model_selections:
    
    print(f'loading model: {model_selection}') # this can take a few seconds
    model = load_model(*model_selection, model_dictionary)
    
    keep_explicit_hydrogens = True
    remove_Hs_except_functional = True
    
    print('    making predictions:')
    predictions = make_predictions(
        test_dataframe, 
        model, 
        model_selection, 
        atom_selection_dictionary, 
        keep_explicit_hydrogens = keep_explicit_hydrogens,
        remove_Hs_except_functional = remove_Hs_except_functional,
        device = torch.device("cpu"),
        
    )
    
    # saving results
    samine_results_dict[tuple(model_selection)] = predictions


# results
samine_results_df = pd.DataFrame(samine_results_dict)
samine_results_df

loading model: ('samine', 'atom', 'N1_pyrd_Ar', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 26.81it/s]


loading model: ('samine', 'atom', 'H4_NBO_charge', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 27.53it/s]


loading model: ('samine', 'atom', 'N1_V_bur', 'min_E')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 28.13it/s]


loading model: ('samine', 'mol', 'HOMO', 'boltz')
    making predictions:


100%|██████████| 1/1 [00:00<00:00, 28.01it/s]


Unnamed: 0_level_0,samine,samine,samine,samine
Unnamed: 0_level_1,atom,atom,atom,mol
Unnamed: 0_level_2,N1_pyrd_Ar,H4_NBO_charge,N1_V_bur,HOMO
Unnamed: 0_level_3,boltz,boltz,min_E,boltz
0,0.761948,0.349818,74.593941,-0.285781
1,0.743239,0.362009,74.851257,-0.264213
2,0.747913,0.362912,77.721451,-0.289367


In [12]:
# view all predictions
acid_results_df.head()

Unnamed: 0_level_0,acids,acids,acids
Unnamed: 0_level_1,atom,atom,bond
Unnamed: 0_level_2,C1_V_bur,C1_NBO_charge,IR_freq
Unnamed: 0_level_3,boltz,boltz,boltz
0,71.103767,0.761706,1848.094849
1,68.913986,0.798441,1835.389893
2,66.887764,0.753666,1869.24292


In [13]:
# view all predictions
pamine_results_df.head()

Unnamed: 0_level_0,pamine,pamine,pamine,pamine
Unnamed: 0_level_1,bond,atom,atom,mol
Unnamed: 0_level_2,Sterimol_B1,N1_NBO_LP_energy,N1_pyrd_Ar,HOMO
Unnamed: 0_level_3,boltz,max,boltz,boltz
0,1.786983,-0.348685,0.791019,-0.30377
1,2.462303,-0.351953,0.784781,-0.301559


In [14]:
# view all predictions
samine_results_df.head()

Unnamed: 0_level_0,samine,samine,samine,samine
Unnamed: 0_level_1,atom,atom,atom,mol
Unnamed: 0_level_2,N1_pyrd_Ar,H4_NBO_charge,N1_V_bur,HOMO
Unnamed: 0_level_3,boltz,boltz,min_E,boltz
0,0.761948,0.349818,74.593941,-0.285781
1,0.743239,0.362009,74.851257,-0.264213
2,0.747913,0.362912,77.721451,-0.289367


In [15]:
# access only certain predictions
acid_results_df[('acids', 'atom')]

Unnamed: 0_level_0,C1_V_bur,C1_NBO_charge
Unnamed: 0_level_1,boltz,boltz
0,71.103767,0.761706
1,68.913986,0.798441
2,66.887764,0.753666


In [16]:
# access only certain predictions
pamine_results_df[('pamine', 'bond', 'Sterimol_B1')]

  pamine_results_df[('pamine', 'bond', 'Sterimol_B1')]


Unnamed: 0,boltz
0,1.786983
1,2.462303
