In [20]:
import os
import json

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import auc
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

from truthful_counterfactuals.utils import EXPERIMENTS_PATH
from truthful_counterfactuals.utils import render_latex, latex_table
from truthful_counterfactuals.metrics import threshold_error_reduction

PATH = os.getcwd()
NUM_BINS = 100

In [21]:
results_map = {
    'aqsoldb': {
        'dataset': 'AqSolDB',
        'property': 'logS',
        'paths': [
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__aqsoldb', '01', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__aqsoldb', '02', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__aqsoldb', '03', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__aqsoldb', '04', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__aqsoldb', '05', 'results.json'),
        ]
    },
    'lipop': {
        'dataset': 'Lipop',
        'property': 'logD',
        'paths': [
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__lipop', '01', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__lipop', '02', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__lipop', '03', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__lipop', '04', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__lipop', '05', 'results.json'),
        ]  
    },
    'compas': {
        'dataset': 'COMPAS',
        'property': 'Gap',
        'paths': [
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__compas', '01', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__compas', '02', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__compas', '03', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__compas', '04', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__compas', '05', 'results.json'),
        ]
    },
    'qm9_energy': {
        'dataset': 'QM9',
        'property': 'Energy',
        'paths': [
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__qm9', 'energy_1', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__qm9', 'energy_2', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__qm9', 'energy_3', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__qm9', 'energy_4', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__qm9', 'energy_5', 'results.json'),
        ]
    },
    'qm9_dipole': {
        'dataset': 'QM9',
        'property': 'Dipole Moment',
        'paths': [
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__qm9', 'dipole_1', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__qm9', 'dipole_2', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__qm9', 'dipole_3', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__qm9', 'dipole_4', 'results.json'),
            os.path.join(EXPERIMENTS_PATH, 'results', 'quantify_uncertainty__ens_mve__qm9', 'dipole_5', 'results.json'),
        ]
    }
}

def get_value(value: float | list):
    if isinstance(value, list):
        return value[0]
    return value

print('processing the results...')

for key, data in results_map.items():

    data['mae_values'] = []
    data['r2_values'] = []
    data['corr_values'] = []
    data['auc_mean_values'] = []
    data['auc_max_values'] = []
    
    print(f' * processing {key}')
    for path in data['paths']:
        
        with open(path, mode='r') as file:
            content = file.read()
            results = json.loads(content)
            
        out_true = [get_value(result['graph_labels']) for result in results]
        out_pred = [get_value(result['prediction']) for result in results]
            
        # calculating the prediction performance metrics (MAE & R2)
        mae_value = mean_absolute_error(out_true, out_pred)
        r2_value = r2_score(out_true, out_pred)
        data['mae_values'].append(mae_value)
        data['r2_values'].append(r2_value)
        
        # as a setup we then have to calculate the error between the prediction and the true value
        # and also get the uncertainty values from the results directly
        for result in results:
            result['error'] = abs(get_value(result['graph_labels']) - get_value(result['prediction']))
        
        errors = np.array([get_value(result['error']) for result in results])
        uncertainties = np.array([get_value(result['uncertainty']) for result in results])
        
        # then we can calculate the correlation between the error and the uncertainty
        corr_value = np.corrcoef(errors, uncertainties)[0, 1]
        data['corr_values'].append(corr_value)
        
        # finally we can use the error and uncertainty values to calculate the EUT-AUC
        ths, rds = threshold_error_reduction(uncertainties, errors, error_func=np.mean, num_bins=NUM_BINS)
        auc_mean_value = auc(ths, rds)
        data['auc_mean_values'].append(auc_mean_value)
        
        ths, rds =  threshold_error_reduction(uncertainties, errors, error_func=np.max, num_bins=NUM_BINS)
        auc_max_value = auc(ths, rds)
        data['auc_max_values'].append(auc_max_value)
        

processing the results...
 * processing aqsoldb
 * processing lipop
 * processing compas
 * processing qm9_energy
 * processing qm9_dipole


In [22]:
print('summarizing the results...')

rows = []
for key, data in results_map.items():
    row = [
        data['dataset'],
        data['property'],
        data['r2_values'],
        data['corr_values'],
        data['auc_mean_values'],
        data['auc_max_values'],
    ]
    rows.append(row)

_, content = latex_table(
    column_names=['Dataset', 'Property', r'$R^2$', r'$\rho$', r'$\text{UER-AUC}_{\text{mean}}$', r'$\text{UER-AUC}_{\text{max}}$'],
    rows=rows,
)
    
tex_path = os.path.join(PATH, 'table_real.tex')
with open(tex_path, mode='w') as file:
    file.write(content)
    
pdf_path = os.path.join(PATH, 'table_real.pdf')
render_latex({'content': content}, pdf_path)

summarizing the results...
