In [None]:
import os
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.inspection import permutation_importance

In [None]:
allresults = {}
functional = 'PBE0'

for dir in os.listdir():
    if not os.path.isdir(dir):
        continue
    if not dir.startswith(functional+"_"):
        continue
    sbasis = dir.split('_')
    basis = ""
    for bs in sbasis[1:]:
        basis += bs + "_"
    basis = basis[:-1]
    for file in os.listdir(dir):
        if not file.endswith('.csv'):
            continue
        if file == "modelsresults.csv":
            data = pd.read_csv(dir+'/'+file, header=None)
            allresults[functional + " " + basis] = data
        

In [None]:
modelresults = {}
refernceresults = {}
modelsnames = ["Method", \
            "PLS Full", \
            "PLS Full split" , \
            "PLS SS" , \
            "PLS SS split" , \
            "LR Full" , \
            "LR Full split" , \
            "LR SS" , \
            "LR SS split" , \
            "MAPELR Full" , \
            "MAPELR Full split" , \
            "MAPELR SS" , \
            "MAPELR SS split" , \
            "PLSRF" , \
            "PLSRF split" , \
            "LRRF" , \
            "LRRF split" , \
            "MAPELRRF" , \
            "MAPELRRF split" ]
referncenames = ["PBE-MINIX", \
    "PBE-SVP", \
    "PBE-TZVP", \
    "PBE-QZVP", \
    "PBE0-MINIX", \
    "PBE0-SVP", \
    "PBE0-TZVP", \
    "PBE0-QZVP", \
    "D3(BJ)"]

for m in modelsnames:
    modelresults[m] = []

for r in referncenames:
    refernceresults[r] = []

for mb in allresults.keys():
    print(mb)
    modelresults["Method"].append(mb)
    for v in allresults[mb].values:
        if len(v) != 3:
            print("Errrot in ", mb)
            print(v)
            exit()

        if v[0].strip() == "Full":
            if v[1].find("MAPE") != -1:
                header = v[1].replace("using", "")
                header = header.replace("MAPE", "")
                header = header.replace("Custom LR", "MAPELR")
                header = header.rstrip().lstrip()
                if header in modelresults:
                    modelresults[header].append(v[2])
                else:
                    if header in refernceresults:
                        refernceresults[header].append(v[2])

newrefernceresults = {}
for rv in  refernceresults:
    if not (len(set(refernceresults[rv])) == 1):
        print(rv, "ERROR all values are not identical")
        continue
    newrefernceresults[rv] = refernceresults[rv][0]
    #print(rv, refernceresults[rv][0])
refernceresults = newrefernceresults

for m in modelresults:
    print(m , len(modelresults[m]))

In [None]:
modelsdf = pd.DataFrame(modelresults)
# plot the results
fig, ax = plt.subplots()
# increase the size of the plot
fig.set_size_inches(15, 8)
modelsdf.plot(kind='bar', ax=ax)
ax.set_xticklabels(modelsdf["Method"])
plt.xticks(rotation=45)
plt.ylabel("MAPE")
# add horizontal line with the reference values
for rv in refernceresults:
    if rv == "D3(BJ)":
        plt.axhline(y=refernceresults[rv], color='r', linestyle='-', label=rv)
    elif rv.find(functional) != -1:
        plt.axhline(y=refernceresults[rv], color='g', linestyle='-', label=rv)
    #else:
    #    plt.axhline(y=refernceresults[rv], color='b', linestyle='-', label=rv)
plt.show()

In [None]:
for dir in os.listdir():
    if not os.path.isdir(dir):
        continue
    if not dir.startswith(functional+"_"):
        continue
    sbasis = dir.split('_')
    basis = ""
    for bs in sbasis[1:]:
        basis += bs + "_"
    basis = basis[:-1]
    for file in os.listdir(dir):
        if not file.endswith('.csv'):
            continue
        if file == "modelsresults.csv":
            data = pd.read_csv(dir+'/'+file, header=None)
            allresults[functional + " " + basis] = data
        