In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

import commonutils
import models

from commonutils import ModelResults

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.inspection import permutation_importance

from dataclasses import dataclass
import prettyprinter as pp

from sklearn.cross_decomposition import PLSRegression
import warnings
import sys

from sklearn import preprocessing

from copy import deepcopy

In [None]:
warnings.simplefilter("ignore")

supersetnames = {"BARRIER_HEIGHTS" : \
                       ["BH76","BHDIV10","BHPERI",\
                        "BHROT27","INV24","PX13","WCPT18"] \
                    ,"INTRAMOLECULAR_INTERACTIONS" : \
                       ["ACONF",'Amino20x4',"BUT14DIOL",\
                        "ICONF","IDISP","MCONF",\
                        "PCONF21","SCONF","UPU23"] , \
                    "SMALL_MOLECULES" :\
                        ["AL2X6","ALK8","ALKBDE10","BH76RC",\
                         "DC13","DIPCS10","FH51","G21EA",\
                         "G21IP","G2RC","HEAVYSB11","NBPRC",\
                         "PA26","RC21","SIE4x4","TAUT15",\
                         "W4-11","YBDE18"], \
                    "INTERMOLECULAR_INTERACTIONS" :\
                       ["ADIM6","AHB21","CARBHB12",\
                        "CHB6","HAL59","HEAVY28","IL16",\
                        "PNICO23","RG18","S22","S66","WATER27"] , \
                    "LARGE_SYSTEMS" :\
                        ["BSR36","C60ISO","CDIE20","DARC",\
                         "ISO34","ISOL24","MB16-43","PArel",\
                            "RSE43"]}  

howmanydifs = 3
methods = {"PBE-TZVP" : ["Nuclear Repulsion  :", \
                    "One Electron Energy:", \
                    "Two Electron Energy:", \
                    "Potential Energy   :", \
                    "Kinetic Energy     :", \
                    "E(X)               :"  , \
                    "E(C)               :"  , \
                    "Dispersion correction", \
                    "FINAL SINGLE POINT ENERGY"], 
            "PBE-SVP" : ["Nuclear Repulsion  :", \
                    "One Electron Energy:", \
                    "Two Electron Energy:", \
                    "Potential Energy   :", \
                    "Kinetic Energy     :", \
                    "E(X)               :"  , \
                    "E(C)               :"  , \
                    "Dispersion correction", \
                    "FINAL SINGLE POINT ENERGY"], 
            "PBE-MINIX" : ["Nuclear Repulsion  :", \
                    "One Electron Energy:", \
                    "Two Electron Energy:", \
                    "Potential Energy   :", \
                    "Kinetic Energy     :", \
                    "E(X)               :"  , \
                    "E(C)               :"  , \
                    "Dispersion correction", \
                    "FINAL SINGLE POINT ENERGY"], 
            "PBE0-TZVP" : ["Nuclear Repulsion  :", \
                      "One Electron Energy:", \
                      "Two Electron Energy:", \
                      "Potential Energy   :", \
                      "Kinetic Energy     :", \
                      "E(X)               :"  , \
                      "E(C)               :"  , \
                      "Dispersion correction", \
                      "FINAL SINGLE POINT ENERGY"],
            "PBE0-SVP" : ["Nuclear Repulsion  :", \
                      "One Electron Energy:", \
                      "Two Electron Energy:", \
                      "Potential Energy   :", \
                      "Kinetic Energy     :", \
                      "E(X)               :"  , \
                      "E(C)               :"  , \
                      "Dispersion correction", \
                      "FINAL SINGLE POINT ENERGY"],
            "PBE0-MINIX" : ["Nuclear Repulsion  :", \
                      "One Electron Energy:", \
                      "Two Electron Energy:", \
                      "Potential Energy   :", \
                      "Kinetic Energy     :", \
                      "E(X)               :"  , \
                      "E(C)               :"  , \
                      "Dispersion correction", \
                      "FINAL SINGLE POINT ENERGY"],
            "TPSS-TZVP" : ["Nuclear Repulsion  :", \
                      "One Electron Energy:", \
                      "Two Electron Energy:", \
                      "Potential Energy   :", \
                      "Kinetic Energy     :", \
                      "E(X)               :"  , \
                      "E(C)               :"  , \
                      "Dispersion correction", \
                      "FINAL SINGLE POINT ENERGY"],
            "TPSS-SVP" : ["Nuclear Repulsion  :", \
                      "One Electron Energy:", \
                      "Two Electron Energy:", \
                      "Potential Energy   :", \
                      "Kinetic Energy     :", \
                      "E(X)               :"  , \
                      "E(C)               :"  , \
                      "Dispersion correction", \
                      "FINAL SINGLE POINT ENERGY"],
            "TPSS-MINIX" : ["Nuclear Repulsion  :", \
                      "One Electron Energy:", \
                      "Two Electron Energy:", \
                      "Potential Energy   :", \
                      "Kinetic Energy     :", \
                      "E(X)               :"  , \
                      "E(C)               :"  , \
                      "Dispersion correction", \
                      "FINAL SINGLE POINT ENERGY"],
            "TPSSh-TZVP" : ["Nuclear Repulsion  :", \
                       "One Electron Energy:", \
                       "Two Electron Energy:", \
                       "Potential Energy   :", \
                       "Kinetic Energy     :", \
                       "E(X)               :"  , \
                       "E(C)               :"  , \
                       "Dispersion correction", \
                       "FINAL SINGLE POINT ENERGY"],
            "TPSSh-SVP" : ["Nuclear Repulsion  :", \
                       "One Electron Energy:", \
                       "Two Electron Energy:", \
                       "Potential Energy   :", \
                       "Kinetic Energy     :", \
                       "E(X)               :"  , \
                       "E(C)               :"  , \
                       "Dispersion correction", \
                       "FINAL SINGLE POINT ENERGY"],
            "TPSSh-MINIX" : ["Nuclear Repulsion  :", \
                       "One Electron Energy:", \
                       "Two Electron Energy:", \
                       "Potential Energy   :", \
                       "Kinetic Energy     :", \
                       "E(X)               :"  , \
                       "E(C)               :"  , \
                       "Dispersion correction", \
                       "FINAL SINGLE POINT ENERGY"]
                }
# read all the data and initialize the data structures
rootdir = "../datasets/AllData_3/"   
allvalues_perset, fullsetnames, models_results = \
        commonutils.read_and_init (rootdir, supersetnames, howmanydifs, methods, \
                       DEBUG=True)

In [None]:
allfeatures = set()
for setname in fullsetnames:
    for val in allvalues_perset[setname]:
        for k in val:
            if k.find("energydiff") != -1:
                for f in val[k]:
                    allfeatures.add(f)
                    
for setname in fullsetnames:
    models_results[setname].inside_methods_rmse = []
    models_results[setname].inside_methods_r2 = []
    models_results[setname].our_methods_rmse = {}
    models_results[setname].our_methods_r2 = {}
    
    models_results[setname].bestinsidemethod_rmse = float("inf")
    models_results[setname].bestinsidemethod = ""
    models_results[setname].bestourmethod_rmse = float("inf")
    models_results[setname].bestourmethod = ""
    models_results[setname].our_methods_name = []

    models_results[setname].bestinsidemethod_wtamd = float("inf")
    models_results[setname].bestourmethod_wtamd = float("inf")

    for methodid in range(howmanydifs):
        y_pred = []
        labels = []
        supersetlist = []
        setlist = []
        for val in allvalues_perset[setname]:
            y_pred.append(val["label"] + val["difs"][methodid])
            labels.append(val["label"])
            supersetlist.append(val["super_setname"])
            setlist.append(val["super_setname"]+"_"+val["setname"])

        wtmad = commonutils.wtmad_calc(supersetlist, setlist, y_pred, \
                                labels, includeFull = True)
        if wtmad["Full"] < models_results[setname].bestinsidemethod_wtamd:
            models_results[setname].bestinsidemethod_wtamd = wtmad["Full"]

        r2 = r2_score(labels, y_pred)
        rmse = mean_squared_error(labels, y_pred, squared=False)
        models_results[setname].inside_methods_rmse.append(rmse)
        models_results[setname].inside_methods_r2.append(r2)

        if rmse < models_results[setname].bestinsidemethod_rmse:
            models_results[setname].bestinsidemethod_rmse = rmse
            models_results[setname].bestinsidemethod = str(methodid)
            models_results[setname].y_pred_bestinsidemethod = y_pred

    for j, method in enumerate(methods):
        y_pred = []
        labels = []
        supersetlist = []
        setlist = []
        for val in allvalues_perset[setname]:
            y_pred.append(val[method + "_energydiff"][method+"_FINAL_SINGLE_POINT_ENERGY"])
            labels.append(val["label"])
            supersetlist.append(val["super_setname"])
            setlist.append(val["super_setname"]+"_"+val["setname"])

        wtmad = commonutils.wtmad_calc(supersetlist, setlist, y_pred, \
                                labels, includeFull = True)
        if wtmad["Full"] < models_results[setname].bestourmethod_wtamd:
            models_results[setname].bestourmethod_wtamd = wtmad["Full"] 
        
        r2 = r2_score(labels, y_pred)
        rmse = mean_squared_error(labels, y_pred, squared=False)

        models_results[setname].our_methods_rmse[method] = rmse
        models_results[setname].our_methods_r2[method] = r2
        models_results[setname].our_methods_name.append(method)

        if rmse < models_results[setname].bestourmethod_rmse:
            models_results[setname].bestourmethod_rmse = rmse
            models_results[setname].bestourmethod = method
            models_results[setname].y_pred_bestourmethod = y_pred

print("Results for inside and our methods")
print("%40s"% "Dataset", " , ", \
      "Best inside method", " , ", \
      "RMSE", " , ", \
      "Best our method", " , ", \
      "RMSE")
for setname in fullsetnames:
    print("%40s"%setname, " , ", \
        models_results[setname].bestinsidemethod , " , ",\
        "%7.3f"%models_results[setname].bestinsidemethod_rmse, " , ", \
        "%5s"%models_results[setname].bestourmethod , " , ", \
        "%7.3f"%models_results[setname].bestourmethod_rmse)


In [None]:
CORRCUT = 0.99

setname = "Full"
models_results[setname].fulldescriptors = []
models_results[setname].labels = []
models_results[setname].setname = []
models_results[setname].supersetname = []
for idx, val in enumerate(allvalues_perset[setname]):
    models_results[setname].fulldescriptors.append({})
    for method in methods:
        models_results[setname].fulldescriptors[idx].update(val[method+"_energydiff"])

    models_results[setname].labels.append(val["label"])
    models_results[setname].setname.append(val["setname"])
    models_results[setname].supersetname.append(val["super_setname"])

X, Y, features_names = \
    commonutils.build_XY_matrix (models_results[setname].fulldescriptors, \
                                 models_results[setname].labels)

df = pd.DataFrame(X, columns=features_names)

corr = df.corr().abs()
top_correlation = {}
for k in corr.columns:
    top_correlation[k] = []
    print(k, " ")
    for idx, v in enumerate(corr[k]):
        if v > CORRCUT and corr.index[idx] != k:
            top_correlation[k].append((corr.index[idx], v))
            print(" %40s %4.2f"%(corr.index[idx], v))

In [None]:
# test two linear models to predict energudiff
# LinearRegression
"""
import sklearn.linear_model as lm
linearNR_Two = lm.LinearRegression()
linearNR_Two.fit(df["PBE_Nuclear_Repulsion"].values.reshape(-1,1), \
                  df["PBE_Two_Electron_Energy"])
print(linearNR_Two.coef_, linearNR_Two.intercept_)
linearNR_One = lm.LinearRegression()
linearNR_One.fit(df["PBE_Nuclear_Repulsion"].values.reshape(-1,1), \
                 df["PBE_One_Electron_Energy"])
print(linearNR_One.coef_, linearNR_One.intercept_)

Onepred = linearNR_One.predict(df["PBE_Nuclear_Repulsion"].values.reshape(-1,1))
Twopred = linearNR_Two.predict(df["PBE_Nuclear_Repulsion"].values.reshape(-1,1))

# scatter plot
plt.clf()
plt.scatter(df["PBE_One_Electron_Energy"], Onepred)
plt.xlabel('Calculated Energy')
plt.ylabel('Predicted Energy')
plt.title('One Electron Energy')
plt.show()

plt.clf()
plt.scatter(df["PBE_Two_Electron_Energy"], Twopred)
plt.xlabel('Calculated Energy')
plt.ylabel('Predicted Energy')
plt.title('Two Electron Energy')
plt.show()

predEnergy = Onepred + Twopred + \
    df["PBE_Nuclear_Repulsion"].values
calculatedEnergy = df["PBE_Nuclear_Repulsion"].values + \
    df["PBE_One_Electron_Energy"].values + \
    df["PBE_Two_Electron_Energy"].values

# scatter plot
plt.clf()
plt.scatter(calculatedEnergy, predEnergy)    
plt.xlabel('Calculated Energy')
plt.ylabel('Predicted Energy')
plt.title('Linear Regression')
plt.show()

for i, ce in enumerate(calculatedEnergy):
    diff = abs(ce - predEnergy[i])/((ce+predEnergy[i])/2.0)
    print("%10.4f %10.4f %6.2f"%(ce, predEnergy[i], diff*100))

# PLSRegression using both one and two electron ?
"""

In [None]:
featurestorms = set()
for tc in top_correlation:
    if tc not in featurestorms:
        for correlated in top_correlation[tc]:
            featurestorms.add(correlated[0])

print ("Features that are correlated with others and to remove")  
for idx, k in enumerate(featurestorms):
    print(idx+1 , " ", k)

In [None]:
print ("Features TO USE")
idx = 1
for f in allfeatures:
    if f not in featurestorms:
        print(idx, f) 
        idx += 1 

In [None]:
featurestorm = list(featurestorms)

for setname in fullsetnames:
    commonutils.remove_features_fromset(allvalues_perset[setname], \
                                            list(featurestorm), \
                                            methods)

for setname in fullsetnames:
    models_results[setname].fulldescriptors = []
    models_results[setname].labels = []
    models_results[setname].setname = []
    models_results[setname].supersetname = []
    for idx, val in enumerate(allvalues_perset[setname]):
        models_results[setname].fulldescriptors.append({})
        for method in methods:
            if method+"_energydiff" in val:
                models_results[setname].fulldescriptors[idx].update(\
                    val[method+"_energydiff"])

        models_results[setname].labels.append(val["label"])
        models_results[setname].setname.append(val["setname"])
        models_results[setname].supersetname.append(val["super_setname"])

setname = "Full"
X, Y, features_names = \
    commonutils.build_XY_matrix (models_results[setname].fulldescriptors, \
                                 models_results[setname].labels)

df = pd.DataFrame(X, columns=features_names)

In [None]:
import seaborn as sns
%matplotlib inline
print("Correlation matrix")
plt.rcParams['figure.figsize'] = 10,10
sns.set(font_scale=2)
sns.heatmap(df.corr().abs(), annot=True)
#print(df.corr().abs())
#sns.heatmap(df, annot=True)

In [None]:
# CHECK WTAAD2 Calculation for reference methods (wtaad2_ref):
for setname in models_results:
    if setname in supersetnames:
        predicted = models_results[setname].y_pred_bestourmethod
        labels = models_results[setname].labels
        setlist = []
        for i, s in enumerate(models_results[setname].setname):
            ss = models_results[setname].supersetname[i]
            setlist.append(ss + "_" + s)   
        supersetlist = models_results[setname].supersetname

        wtmad = commonutils.wtmad_calc(supersetlist, setlist, predicted, labels, includeFull = False)
        print("WTMAD for ", setname, " and model ", \
              models_results[setname].bestourmethod, "\n")
        pp.pprint(wtmad)
        print(setname)

In [None]:
# force reload of models
import importlib
importlib.reload(models)
# search for the best model a simple grid search
nepochs = [50, 100]
#nepochs = [100]
#batch_sizes = [4, 8, 16, 32]
batch_sizes = [16, 32]
"""
modelshapes = [[4, 4, 4], [8, 8, 8], [16, 16, 16], \
                [32, 32, 32], [64, 64, 64], \
                [128, 128, 128], [4, 4, 4, 4], \
                [8, 8, 8, 8], [16, 16, 16, 16], \
                [32, 32, 32, 32], [64, 64, 64, 64], \
                [128, 128, 128, 128], [4, 4, 4, 4, 4], \
                [8, 8, 8, 8, 8], [16, 16, 16, 16, 16], 
                [32, 32, 32, 32, 32], [64, 64, 64, 64, 64],
                [128, 128, 128, 128, 128], [4, 4, 4, 4, 4, 4], \
                [8, 8, 8, 8, 8, 8], [16, 16, 16, 16, 16, 16], 
                [32, 32, 32, 32, 32, 32], [64, 64, 64, 64, 64, 64],
                [128, 128, 128, 128, 128, 128]]
"""
modelshapes = [\
    [32, 32, 32, 32],\
    [64, 64, 64, 64],\
    [128, 128, 128, 128],\
    [32, 32, 32, 32, 32, 32],\
    [64, 64, 64, 64, 64, 64],\
    [128, 128, 128, 128, 128, 128], \
    [32, 32, 64, 128, 64, 32, 32, 24],\
    [64, 128, 256, 128, 64, 32],\
    [32, 64, 128, 128, 128, 64, 32, 24]]
setname = "Full"
print("Searching for best NN model for set: ", setname)
X, Y, features_names = \
    commonutils.build_XY_matrix (models_results[setname].fulldescriptors, \
                            models_results[setname].labels)

setlist = []
for i, s in enumerate(models_results[setname].setname):
    ss = models_results[setname].supersetname[i]
    setlist.append(ss + "_" + s)   
supersetlist = models_results[setname].supersetname

scalerx = preprocessing.StandardScaler().fit(X)
X_s = scalerx.transform(X) 
scalery = preprocessing.StandardScaler().fit(Y.reshape(-1, 1))
Y_s = scalery.transform(Y.reshape(-1, 1))
modelminmape, modelminwtamd, modelminrmse  = \
        models.nn_model(0.2, X_s, scalerx, Y_s, scalery, \
            supersetlist, setlist, \
            nepochs, modelshapes, batch_sizes, inputshape=-1,\
            search=True, split=False)

print("Best NN model for set: ", setname, file=sys.stderr)
print("  WTAMD: ", modelminwtamd, file=sys.stderr)
print("   MAPE: ", modelminmape, file=sys.stderr)
print("   RMSE: ", modelminrmse, file=sys.stderr)

models_results[setname].modelminmape = modelminmape
models_results[setname].modelminwtamd = modelminwtamd
models_results[setname].modelminrmse = modelminrmse


In [None]:
import importlib
importlib.reload(models)

setname = "Full"
modelminmape = models_results[setname].modelminmape 
modelminwtamd = models_results[setname].modelminwtamd
modelminrmse = models_results[setname].modelminrmse
X, Y, features_names = \
    commonutils.build_XY_matrix (models_results[setname].fulldescriptors, \
                                models_results[setname].labels)

setlist = []
for i, s in enumerate(models_results[setname].setname):
    ss = models_results[setname].supersetname[i]
    setlist.append(ss + "_" + s)   
supersetlist = models_results[setname].supersetname

scalerx = preprocessing.StandardScaler().fit(X)
X_s = scalerx.transform(X) 
scalery = preprocessing.StandardScaler().fit(Y.reshape(-1, 1))
Y_s = scalery.transform(Y.reshape(-1, 1))
#build the final model and print the results
results_mape = models.nn_model(0.2, X_s, scalerx, Y_s, scalery, \
                    supersetlist, setlist, \
                    [modelminmape[1]], \
                    [modelminmape[0]], \
                    [modelminmape[2]], \
                    inputshape=-1,\
                    search=False, split=False)
        
models_results[setname].nn_model_mape = results_mape

results_wtamd = models.nn_model(0.2, X_s, scalerx, Y_s, scalery, \
                    supersetlist, setlist, \
                    [modelminwtamd[1]], \
                    [modelminwtamd[0]], \
                    [modelminwtamd[2]], \
                    inputshape=-1,\
                    search=False, split=False)
        
models_results[setname].nn_model_wtamd = results_wtamd

results_rmse = models.nn_model(0.2, X_s, scalerx, Y_s, scalery, \
                    supersetlist, setlist, \
                    [modelminrmse[1]], \
                    [modelminrmse[0]], \
                    [modelminrmse[2]], \
                    inputshape=-1,\
                    search=False, split=False)

models_results[setname].nn_model_rmse = results_rmse

In [None]:
setname = "Full"
results_mape = models_results[setname].nn_model_mape
results_wtamd = models_results[setname].nn_model_wtamd
results_rmse = models_results[setname].nn_model_rmse

for history in [results_mape['history'], results_wtamd['history'], results_rmse['history']]:
    plt.plot(history.history['mse'])
    plt.plot(history.history['val_mse'])
    plt.title('model')
    plt.ylabel('MSE')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()

    plt.plot(history.history['mape'])
    plt.plot(history.history['val_mape'])
    plt.title('model MAPE')
    plt.ylabel('MAPE')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()

In [None]:
#denomrlize the results
"""
plt.clf()
plt.scatter(results_mape['y_full'], \
            results_mape['y_pred_full'])
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.show()
y_pred_full = scalery.inverse_transform(results_mape['y_pred_full'])
y_full = scalery.inverse_transform(results_mape['y_full'])
rmse_full = mean_squared_error(y_full, y_pred_full, squared=False)
r2_full = r2_score(y_full, y_pred_full)
print("RMSE Full      : %5.2f"%rmse_full)
print("R2 Full        : %5.2f"%r2_full)

# plot the results
plt.clf()
plt.scatter(y_full, y_pred_full)
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.show()

#denomrlize the results
plt.clf()
plt.scatter(results_rmse['y_full'], \
            results_rmse['y_pred_full'])
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.show()
y_pred_full = scalery.inverse_transform(results_rmse['y_pred_full'])
y_full = scalery.inverse_transform(results_rmse['y_full'])
rmse_full = mean_squared_error(y_full, y_pred_full, squared=False)
r2_full = r2_score(y_full, y_pred_full)
print("RMSE Full      : %5.2f"%rmse_full)
print("R2 Full        : %5.2f"%r2_full)

# plot the results
plt.clf()
plt.scatter(y_full, y_pred_full)
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.show()
"""

In [None]:
from sklearn.metrics import mean_absolute_percentage_error
setname = "Full"
results_mape = models_results[setname].nn_model_mape
results_wtamd = models_results[setname].nn_model_wtamd
results_rmse = models_results[setname].nn_model_rmse
print(" Dim , %40s"% "Dataset", " , ", \
    "Best inside method RMSE", " , ", \
    "Best our method RMSE", " , ", \
    "RMSE mape, ", \
    "RMSE wtamd, ", \
    "RMSE rmse")
for setname in fullsetnames:

    X, Y, features_names = \
            commonutils.build_XY_matrix (models_results[setname].fulldescriptors, \
                                    models_results[setname].labels)
    
    X_s = scalerx.transform(X)
    Y_s = scalery.transform(Y.reshape(-1, 1))

    y_pred_mape = scalery.inverse_transform(results_mape['model'].predict(X_s, verbose=0))
    models_results[setname].y_pred_mape = y_pred_mape
    rmse_mape = mean_squared_error(Y, y_pred_mape, squared=False)
    
    y_pred_wtamd = scalery.inverse_transform(results_wtamd['model'].predict(X_s, verbose=0))
    models_results[setname].y_pred_wtamd = y_pred_wtamd
    rmse_wtamd = mean_squared_error(Y, y_pred_wtamd, squared=False)

    y_pred_rmse = scalery.inverse_transform(results_rmse['model'].predict(X_s, verbose=0))
    models_results[setname].y_pred_rmse = y_pred_rmse
    rmse_rmse = mean_squared_error(Y, y_pred_rmse, squared=False)
    
    print("%4d , %40s"%(len(models_results[setname].labels), setname), " , ", \
        "%7.3f"%models_results[setname].bestinsidemethod_rmse, " , ", \
        "%7.3f"%models_results[setname].bestourmethod_rmse, " , ", \
        "%7.3f"%rmse_mape, " , ", \
        "%7.3f"%rmse_wtamd, " , ", \
        "%7.3f"%rmse_rmse)


In [None]:
"""
print(" Dim , %40s"% "Dataset", " , ", \
      "Best inside method RMSE", " , ", \
      "Best our method RMSE", " , ", \
      "RMSE , MAPE, R2")
for setname in fullsetnames:
    X, Y, features_names = \
            commonutils.build_XY_matrix (models_results[setname].fulldescriptors, \
                                    models_results[setname].labels)
    
    X_s = scalerx.transform(X)
    Y_s = scalery.transform(Y.reshape(-1, 1))
    y_pred = scalery.inverse_transform(results_rmse['model'].predict(X_s, verbose=0))
    models_results[setname].y_pred = y_pred
    rmse = mean_squared_error(Y, y_pred, squared=False)
    r2 = r2_score(Y, y_pred)
    mape = mean_absolute_percentage_error(Y, y_pred)

    print("%4d , %40s"%(len(models_results[setname].labels), setname), " , ", \
        "%7.3f"%models_results[setname].bestinsidemethod_rmse, " , ", \
        "%7.3f"%models_results[setname].bestourmethod_rmse, " , ", \
        "%7.3f"%rmse, " , ", \
        "%7.3f"%mape, " , ", \
        "%7.3f"%r2)
"""

In [None]:
printonlysuperset = True

for setname in fullsetnames:
    ssetname = "Full"
    if setname in supersetnames or setname == "Full":
        ssetname = setname  
    else:    
        lastunder = setname.rfind("_")
        ssetname = setname[:lastunder]

    X, Y, features_names = \
            commonutils.build_XY_matrix (models_results[setname].fulldescriptors, \
                                    models_results[setname].labels)
    
    setlist = []
    for i, s in enumerate(models_results[setname].setname):
        ss = models_results[setname].supersetname[i]
        setlist.append(ss + "_" + s)   
    supersetlist = models_results[setname].supersetname
    
    y_pred_wtamd = models_results[setname].y_pred_wtamd 
    rmse_wtamd = mean_squared_error(Y, y_pred_wtamd, squared=False)
    y_pred_mape = models_results[setname].y_pred_mape
    rmse_mape = mean_squared_error(Y, y_pred_mape, squared=False)
    y_pred_rmse = models_results[setname].y_pred_rmse
    rmse_rmse = mean_squared_error(Y, y_pred_rmse, squared=False)
    
    if setname in supersetnames or setname == "Full":
        wtmad2 = commonutils.wtmad_calc(supersetlist, setlist, y_pred_mape, \
                                      Y, includeFull = True)
        print("WTAMD2             (MAPE) %7.3f"%wtmad2["Full"])
        wtmad2 = commonutils.wtmad_calc(supersetlist, setlist, y_pred_wtamd, \
                                      Y, includeFull = True)
        print("WTAMD2            (WTAMD) %7.3f"%wtmad2["Full"])
        wtmad2 = commonutils.wtmad_calc(supersetlist, setlist, y_pred_rmse, \
                                      Y, includeFull = True)
        print("WTAMD2             (RMSE) %7.3f"%wtmad2["Full"])
        print("WTAMD2 (bestinsidemethod) %7.3f"%models_results[setname].bestinsidemethod_wtamd) 
        print("WTAMD2    (bestourmethod) %7.3f"%models_results[setname].bestourmethod_wtamd)

    if printonlysuperset and setname not in list(supersetnames.keys()) + ["Full"]:
        continue

    print("RMSE              (WTAMD) %7.3f"%rmse_wtamd)
    print("RMSE               (MAPE) %7.3f"%rmse_mape)
    print("RMSE               (RMSE) %7.3f"%rmse_rmse)
    print("RMSE   (bestinsidemethod) %7.3f"%models_results[setname].bestinsidemethod_rmse)
    print("RMSE      (bestourmethod) %7.3f"%models_results[setname].bestourmethod_rmse)


    fig, ax = plt.subplots()
    ax.scatter(Y, y_pred_wtamd, \
               c='b', s=50, label='NN Full model WTAMD')
    ax.scatter(Y, y_pred_mape, \
                c='r', s=50, label='NN Full model MAPE')
    #ax.scatter(Y, models_results[setname].y_pred_bestinsidemethod, \
    #            c='r', s=50, label='Best inside method')
    ax.scatter(Y, y_pred_rmse, \
                c='black', s=50, label='NN Full model RMSE')
    ax.scatter(Y, models_results[setname].y_pred_bestourmethod, \
               c='g', s=50, label='Best our method')
    lims = [
        np.min([ax.get_xlim(), ax.get_ylim()]),  # min of both axes
        np.max([ax.get_xlim(), ax.get_ylim()]),  # max of both axes
    ]
    ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
    ax.set_aspect('equal')
    ax.set_xlim(lims)
    ax.set_ylim(lims)
    plt.xlabel('True Values')
    plt.ylabel('Predictions')
    plt.legend(loc="upper left")
    plt.title(setname)
    plt.show()