In [None]:
import os
import sys
import time

sys.path.append('../anaff')
sys.path.append('..')
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

from scipy.stats import pearsonr

from ANA2B_NOIND import ANA2B, validate
from Utilities import plot_progress, MARE, loss_weight, show_results

RMSE = lambda x, y: np.sqrt(np.mean(np.square(x - y)))

In [None]:
CUTOFF = 5.5
EPOCH = 350
N_DIGITS = 1
ANA = ANA2B(cutoff=CUTOFF, n_units=64, n_steps=1)
FOLDER_PATH = f'../source/anaff/weights_noind/'
MODEL_PATH = f'ANA2BGNN1_2_D3PBE0_FULLMSE_CUTOFF2B{CUTOFF}_NOIND_E{EPOCH}'
ANA.load_weights(f'{FOLDER_PATH}{MODEL_PATH}')
REF_DATA = np.load(f'../../../data/test_sets/BENCHMARK_DATA_NOIND_D3PBE0.npy', allow_pickle=True).item()

In [None]:
RESULTS = {}
for db_key in REF_DATA:
    energy_target, energy_predicted = validate(ANA, db_key, REF_DATA)
    mae, me, rmse, fig = show_results(energy_target, energy_predicted, db_key, show_plot=True, n_digits=N_DIGITS, s=8)
    RESULTS[db_key] = (mae, me, rmse, energy_predicted, energy_target, REF_DATA[db_key].keys())
    plt.show()
    fig.savefig(f'figures/nopol/{db_key}.pdf', bbox_inches='tight')

In [None]:
RESULTS = {}
for db_key in ['S66x8', 'D1200', 'D442x10', 'R739x5', 'HB300SPXx10', 'HB375x10', 'ACHC', 'BBI', 'HBC1', 'HSG', 'JSCH', 'S22', 'S22by7', 'S66', 'SSI', 'UBQ', 'S7L_CC']:
    energy_target, energy_predicted = validate(hybrid_ff.ANA, db_key, REF_DATA)
    mae, me, rmse = show_results(energy_target, energy_predicted, db_key, show_plot=True, n_digits=N_DIGITS)
    RESULTS[db_key] = (mae, me, rmse, energy_predicted, energy_target, REF_DATA[db_key].keys())

In [None]:
# Results Validation (Main)
base_string = ''
for db_key in ['S66x8', 'S7L_CC']:
    name = db_key.split('_')[0]
    mae, me, rmse, energy_predicted, energy_target, _ = RESULTS[db_key]
    N = len(energy_predicted)
    tab_line = f'{name} & {N} & Validation & {mae:{4}.{N_DIGITS}f}\\\\\hline\n'
    base_string += tab_line
print(base_string)

In [None]:
# Results Validation (SI)
base_string = ''
for db_key in ['S66x8', 'S7L_CC']:
    name = db_key.split('_')[0]
    mae, me, rmse, energy_predicted, energy_target, _ = RESULTS[db_key]
    N = len(energy_predicted)
    # MAE - RMSE - 
    r = np.round(pearsonr(energy_predicted, energy_target).statistic, 2)
    tab_line = f'{name} & {N} & Validation & {mae:{4}.{N_DIGITS}f} & {me:{4}.{N_DIGITS}f} & {rmse:{4}.{N_DIGITS}f} & {r:{4}.{N_DIGITS+1}f}\\\\\hline\n'
    base_string += tab_line
print(base_string)

In [None]:
# Results Test (Main)
base_string = ''

for db_key in ['D1200', 'D442x10', 'R739x5', 'HB300SPXx10', 'HB375x10', 'ACHC', 'BBI', 'HBC1', 'HSG', 'JSCH', 'S22', 'S22by7', 'SSI', 'UBQ']:
    name = db_key.split('_')[0]
    if 'by' in name:
        name = name.replace('by', 'x')
    mae, me, rmse, energy_predicted, energy_target, _ = RESULTS[db_key]
    N = len(energy_predicted)
    tab_line = f'{name} & {N} & Test & {mae:{4}.{N_DIGITS}f}\\\\\hline\n'
    base_string += tab_line 
print(base_string)

In [None]:
# Results Test (SI)
base_string = ''
for db_key in ['D1200', 'D442x10', 'R739x5', 'HB300SPXx10', 'HB375x10', 'ACHC', 'BBI', 'HBC1', 'HSG', 'JSCH', 'S22', 'S22by7', 'SSI', 'UBQ']:
    name = db_key.split('_')[0]
    if 'by' in name:
        name = name.replace('by', 'x')
    mae, me, rmse, energy_predicted, energy_target, _ = RESULTS[db_key]
    N = len(energy_predicted)
    # MAE - RMSE - 
    r = np.round(pearsonr(energy_predicted, energy_target).statistic, 2)
    tab_line = f'{name} & {N} & Test & {mae:{4}.{N_DIGITS}f} & {me:{4}.{N_DIGITS}f} & {rmse:{4}.{N_DIGITS}f} & {r:{4}.{2}f}\\\\\hline\n'
    base_string += tab_line
print(base_string)

In [None]:
FUNCTIONAL = 'PBE0'
FOLDER = '../../data/'

DATA = np.load(f'{FOLDER}DES5M.npy', allow_pickle=True).item()
D3_TERMS = np.load(f'{FOLDER}DES5M_D3_{FUNCTIONAL}.npy', allow_pickle=True).item()
LR_TERMS = np.load(f'{FOLDER}LR_TERMS_NOIND.npy', allow_pickle=True).item()
MULTIPOLES = np.load(f'{FOLDER}MULTIPOLES_DES5M.npy', allow_pickle=True).item()

def get_batch(key):
    targets = DATA[key]['energies']
    distance_matrices = DATA[key]['distance_matrices']
    elements_1, elements_2 = DATA[key]['elements']
    coords_1, coords_2 = DATA[key]['coordinates']
    graph_1, graph_2 = DATA[key]['graphs']
    multipoles = MULTIPOLES[key]
    die_term = LR_TERMS[key] + D3_TERMS[key] 
    return targets, distance_matrices, coords_1, coords_2, multipoles, die_term, graph_1, graph_2 

In [None]:
energy_predicted, energy_target = [], []
for key in DATA:
    targets, distance_matrices, coords_1, coords_2, multipoles, die_term, graph_1, graph_2 = get_batch(key)    
    V_terms = ANA(graph_1, graph_2, coords_1, coords_2, distance_matrices, multipoles, coords_1.shape[0])
    V_terms += die_term
    energy_predicted.append(V_terms)
    energy_target.append(targets)
energy_predicted, energy_target = np.hstack(energy_predicted), np.hstack(energy_target)

In [None]:
# Results Test (SI)
base_string = ''

name = 'DES5M'
N = len(energy_predicted)
# MAE - RMSE - 
rmse = np.round(RMSE(energy_predicted, energy_target), N_DIGITS)
r = np.round(pearsonr(energy_predicted, energy_target).statistic, 2)
mae = np.mean(np.abs(energy_predicted - energy_target))
me = np.mean(energy_predicted - energy_target)

tab_line = f'{name} & {N} & Test & {mae:{4}.{N_DIGITS}f}\\\\\hline\n'
base_string += tab_line
print(base_string)

tab_line = f'{name} & {N} & Test & {mae:{4}.{N_DIGITS}f} & {me:{4}.{N_DIGITS}f} & {rmse:{4}.{N_DIGITS}f} & {r:{4}.{2}f}\\\\\hline\n'
base_string += tab_line
print(base_string)

In [None]:
mae = np.mean(np.abs(energy_predicted[np.where(energy_target < 10)] - energy_target[np.where(energy_target < 10)]))
mae, np.where(energy_target < 10)[0].shape

In [None]:
FUNCTIONAL = 'PBE0'
FOLDER = '../../data/'

DATA = np.load(f'{FOLDER}DES370K.npy', allow_pickle=True).item()
D3_TERMS = np.load(f'{FOLDER}DES370K_D3_{FUNCTIONAL}.npy', allow_pickle=True).item()
LR_TERMS = np.load(f'{FOLDER}DES370K_LR_TERMS_IND0.npy', allow_pickle=True).item()
MULTIPOLES = np.load(f'{FOLDER}MULTIPOLES_DES370K.npy', allow_pickle=True).item()

In [None]:
def get_batch(key):
    targets = DATA[key]['energies_cc']
    distance_matrices = DATA[key]['distance_matrices']
    elements_1, elements_2 = DATA[key]['elements']
    coords_1, coords_2 = DATA[key]['coordinates']
    graph_1, graph_2 = DATA[key]['graphs']
    multipoles = MULTIPOLES[key]
    die_term = LR_TERMS[key][0] + D3_TERMS[key]
    return targets, distance_matrices, coords_1, coords_2, multipoles, die_term, graph_1, graph_2 

In [None]:
energy_predicted, energy_target = [], []
for key in DATA:
    targets, distance_matrices, coords_1, coords_2, multipoles, die_term, graph_1, graph_2 = get_batch(key)    
    V_terms = ANA(graph_1, graph_2, coords_1, coords_2, distance_matrices, multipoles, coords_1.shape[0])
    V_terms += die_term
    energy_predicted.append(V_terms)
    energy_target.append(targets)
energy_predicted, energy_target = np.hstack(energy_predicted), np.hstack(energy_target)

In [None]:
# Results Test (SI)
base_string = ''

name = 'DES370K'
N = len(energy_predicted)
# MAE - RMSE - 
rmse = np.round(RMSE(energy_predicted, energy_target), N_DIGITS)
r = np.round(pearsonr(energy_predicted, energy_target).statistic, 2)
mae = np.round(np.mean(np.abs(energy_predicted - energy_target)), N_DIGITS)
me = np.round(np.mean(energy_predicted - energy_target), N_DIGITS)

tab_line = f'{name} & {N} & Test & {mae:{4}.{N_DIGITS}f}\\\\\hline\n'
base_string += tab_line
print(base_string)

tab_line = f'{name} & {N} & Test & {mae:{4}.{N_DIGITS}f} & {me:{4}.{N_DIGITS}f} & {rmse:{4}.{N_DIGITS}f} & {r:{4}.{2}f}\\\\\hline\n'
base_string += tab_line
print(base_string)

In [None]:
mae = np.mean(np.abs(energy_predicted[np.where(energy_target < 10)] - energy_target[np.where(energy_target < 10)]))
mae, np.where(energy_target < 5)[0].shape