In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
import json
from utils.util_metrics import *
from utils.util_draw import *
import warnings
from IPython.display import Image, display
warnings.filterwarnings('ignore')


In [3]:
import os
import glob

# Define experiment parameters
exp = 'exp_simu8/Mfix/30'
head = 15
thres = 0.6
sortby = {"by": 'rmae', "ascending": True}


paths = glob.glob(f'results/{exp}/simu*.json')


# Create directories if they don't exist
model_names = ['SPBN', 'BSBN-FFT-SKDE-Linear', 'BSBN', 'BSBN-Linear', 'BSBN-FFT-Linear', 'BSBN-FFT-SKDE', 'BSBN-FFT']
for model in model_names:
    dir_path = f'results/{exp}/comparisons_graphs/{model}'
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

# Load results from JSON files
resultslist = []
for path in paths:
    with open(path, 'r') as json_file:
        results_dict = json.load(json_file)
        resultslist.append(results_dict)

# Process results
all_exps = []
instances = [k for k in resultslist[0].keys()]
for n in instances:
    result_dicts = [results[n] for results in resultslist]
    
    # Iterate over the items in the result dictionaries
    for items in zip(*[result.items() for result in result_dicts]):
        simus, results = zip(*items)
        simu = simus[0]
        readers = [ExperimentsReader(result, model_name, 'REF') for result, model_name in zip(results, model_names)]

        dataframes = []
        for reader in readers:
            df_all = reader.return_dataframe(simulated=True).reset_index()
            df_sort = df_all.sort_values(**sortby).head(head).drop(columns=['model'],axis=1)
            df_sort['Logl RE (%)'] = abs((df_sort['slogl_REF'] - df_sort['slogl'])/df_sort['slogl_REF'])*100 
            
            df_sort['Logl diff'] = df_sort['slogl_REF'] - df_sort['slogl']
            df_mean = df_sort.mean().to_frame().T.round(4)
            dataframes.append((df_sort, df_mean))

        kwargs_list = [reader.return_average_dag(model_name, thres, df_sort['index'].tolist()) for reader, (df_sort, _), model_name in zip(readers, dataframes, model_names)]

        avg_models = []
        for kwargs, model_name in zip(kwargs_list, model_names):
            if "BSBN" in model_name and len(kwargs['arcs']) == 0:
                avg_models.append(None)
            else:
                avg_model = pbn.FourierNetwork(**kwargs) if model_name != 'SPBN' else pbn.SemiparametricBN(**kwargs)
                avg_models.append(avg_model)

        config = get_config(int(simu[-1]))
        ref_model = pbn.SemiparametricBN(**config)

        for model_name, avg_model in zip(model_names, avg_models):
            if avg_model:
                compare_graphs(ref_model, avg_model, filename=f"results/{exp}/comparisons_graphs/{model_name}/{simu}_{model_name}-ref{n}.pdf", size=20)

        
        all_df = pd.concat([df_mean for _, df_mean in dataframes]).reset_index(drop=True)
        all_df.index = model_names

        all_df['instances'] = [n] * all_df.shape[0]
        all_df['simu'] = [simu[-1]] * all_df.shape[0]
        all_exps.append(all_df)

all_exps_concat = pd.concat(all_exps).drop(columns=['index'], axis=1)


all_exps_concat.columns = ['HMD','SHD','THMD', 'RMSE', 'MRE (%)', 'Train (s)', 'Test (s)', 'LogL', 'LogL True', 'Logl RE (%)', 'Logl diff', 'Instances', 'Simulation']
# Reorder columns
cols = ['Simulation', 'Instances'] + [col for col in all_exps_concat.columns if col not in ['Simulation', 'Instances']]
all_exps_concat = all_exps_concat[cols]

grouped = all_exps_concat.groupby(["Simulation"])
for name, group in grouped:
    
    group = group[(group != -1).all(axis=1)]
    group['Instances'] = group['Instances'].astype(int)
    sorted = group.sort_values(by = "Instances", ascending= True).round(3)

    simu = sorted['Simulation'].values[0]
    
    print(sorted.to_string())
    print("\n")
        # Convert to LaTeX
    latex_code = sorted.to_latex(index=True,  
                                label=f"tab:m30_simu{simu}",
                                caption=f"Comparison of the metrics for the different models in the simulation ${simu}$. The grid of the binned models is set to $M=30$.",
                                multirow=True,
                                longtable=True,
                                column_format="ccccccccccc",
                                escape=True, float_format="%.3f")


    # Optionally, save the LaTeX code to a .tex file
    
    with open(f'results/exp_simu7/Mfix/30/dataframe_30_simu{simu}.tex', 'w') as f:
        f.write(latex_code)

    
# print(all_exps_concat)

IndexError: list index out of range

### SAME STRUCTURE

In [25]:
import os
import glob

M = 100
# Define experiment parameters
exp = f'exp_simu8/Mfix_sameDAG/{M}'
head = 15
thres = 0.6
sortby = {"by": 'rmae', "ascending": True}


paths = glob.glob(f'results/{exp}/simu*.json')

print(paths)
# Create directories if they don't exist
model_names = ['BSBN-FFT-SKDE-Linear', 'BSBN', 'BSBN-Linear', 'BSBN-FFT-Linear', 'BSBN-FFT-SKDE', 'BSBN-FFT']
for model in model_names:
    dir_path = f'results/{exp}/comparisons_graphs/{model}'
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

# Load results from JSON files
resultslist = []
for path in paths:
    with open(path, 'r') as json_file:
        results_dict = json.load(json_file)
        resultslist.append(results_dict)

# Process results
all_exps = []
all_exps_std = []
instances = [k for k in resultslist[0].keys()]
for n in instances:
    result_dicts = [results[n] for results in resultslist]
    
    # Iterate over the items in the result dictionaries
    for items in zip(*[result.items() for result in result_dicts]):
        simus, results = zip(*items)
        simu = simus[0]
        readers = [ExperimentsReader(result, model_name, 'REF') for result, model_name in zip(results, model_names)]

        dataframes = []
        for reader in readers:
            df_all = reader.return_dataframe(simulated=True).reset_index()
            df_sort = df_all.sort_values(**sortby).head(head).drop(columns=['model'],axis=1)
            df_sort['Logl RE (%)'] = abs((df_sort['slogl_REF'] - df_sort['slogl'])/df_sort['slogl_REF'])*100 
            
            df_sort['Logl diff'] = df_sort['slogl_REF'] - df_sort['slogl']
            df_mean = df_sort.mean().to_frame().T.round(4)
            df_std = df_sort.std().to_frame().T.round(4)
            dataframes.append((df_sort, df_mean, df_std))

        kwargs_list = [reader.return_average_dag(model_name, thres, df_sort['index'].tolist()) for reader, (df_sort, _,_), model_name in zip(readers, dataframes, model_names)]

        avg_models = []
        for kwargs, model_name in zip(kwargs_list, model_names):
            if "BSBN" in model_name and len(kwargs['arcs']) == 0:
                avg_models.append(None)
            else:
                avg_model = pbn.FourierNetwork(**kwargs) if model_name != 'SPBN' else pbn.SemiparametricBN(**kwargs)
                avg_models.append(avg_model)

        config = get_config(int(simu[-1]))
        ref_model = pbn.SemiparametricBN(**config)

        for model_name, avg_model in zip(model_names, avg_models):
            if avg_model:
                compare_graphs(ref_model, avg_model, filename=f"results/{exp}/comparisons_graphs/{model_name}/{simu}_{model_name}-ref{n}.pdf", size=20)

        
        all_df= pd.concat([df_mean for _, df_mean,_ in dataframes]).reset_index(drop=True)
        all_df.index = model_names

        all_df['instances'] = [n] * all_df.shape[0]
        all_df['simu'] = [simu[-1]] * all_df.shape[0]
        all_exps.append(all_df)

        all_df_std= pd.concat([df_std for _, _,df_std in dataframes]).reset_index(drop=True)
        all_df_std.index = model_names
        all_df_std['instances'] = [n] * all_df_std.shape[0]
        all_df_std['simu'] = [simu[-1]] * all_df_std.shape[0]
        all_exps_std.append(all_df_std)

all_exps_concat = pd.concat(all_exps).drop(columns=['index'], axis=1)
all_exps_std_concat = pd.concat(all_exps_std).drop(columns=['index'], axis=1)

print(all_exps_concat.columns)
all_exps_concat.columns = ['HMD','SHD','THMD', 'RMSE', 'MRE(\%)', 'Train(s)', 'Test(s)', 'Test(s) True', 'LogL', 'LogL True', 'Logl RE(\%)', 'Logl diff', 'Instances', 'Simulation']
all_exps_std_concat.columns = ['HMD','SHD','THMD', 'RMSE std', 'MRE(\%) std', 'Train(s)', 'Test(s) std', 'Test(s) True', 'LogL std', 'LogL True', 'Logl RE(\%)', 'Logl diff std', 'Instances', 'Simulation']
# Reorder columns
cols = ['Simulation', 'Instances'] + [col for col in all_exps_concat.columns if col not in ['Simulation', 'Instances']]
cols_std = ['Simulation', 'Instances'] + [col for col in all_exps_std_concat.columns if col not in ['Simulation', 'Instances']]
all_exps_concat = all_exps_concat[cols]
all_exps_std_concat = all_exps_std_concat[cols_std]

grouped = all_exps_concat.groupby(["Simulation"])
grouped_std = all_exps_std_concat.groupby(["Simulation"])
for (name, group), (name_std, group_std) in zip(grouped, grouped_std):
    
    group['Instances'] = group['Instances'].astype(int)
    group = group.rename_axis('Model').reset_index()
    group_std['Instances'] = group_std['Instances'].astype(int)
    group_std = group_std.rename_axis('Model').reset_index()
    
    sorted = group.sort_values(by = ["Instances", "MRE(\%)"], ascending= True).round(5)
    sorted_std  =group_std.iloc[sorted.index].round(5)

    
    simu = sorted['Simulation'].values[0]
    sorted_drop = sorted.drop(columns=['HMD','SHD','THMD','Train(s)','Logl RE(\%)','LogL True','Test(s) True'], axis=1)
    sorted_std_drop = sorted_std.drop(columns=['HMD','SHD','THMD','Train(s)','Logl RE(\%)','LogL True','Test(s) True', 'Instances','Simulation' ,'Model'], axis=1)

    sorted_concat = pd.concat([sorted_drop, sorted_std_drop], axis=1)
    sorted_concat = sorted_concat[['Model','Simulation', 'Instances','RMSE','RMSE std', 'MRE(\%)', 'MRE(\%) std', 'Test(s)', 'Test(s) std', 'LogL', 'LogL std', 'Logl diff', 'Logl diff std']]
    
    columns_to_combine = ['RMSE', 'MRE(\%)', 'Test(s)', 'LogL', 'Logl diff']

    # Transform columns to strings and combine
    for col in columns_to_combine:
        sorted_concat[col] = sorted_concat[col].astype(str) + r' $\pm$ ' + sorted_concat[col + ' std'].astype(str)
    sorted_concat = sorted_concat.drop(columns=[col + ' std' for col in columns_to_combine])
    sorted_concat = sorted_concat[['Model', 'Simulation', 'Instances'] + columns_to_combine]



    # Simulation  Instances    RMSE  MRE(%)  Test(s)        LogL
    print(sorted_concat.to_string())

    ground_truth_params =  sorted[['Instances','LogL True','Test(s) True']]
    ground_truth_params = ground_truth_params[(ground_truth_params != -1).all(axis=1)]
    ground_truth_params = ground_truth_params.groupby('Instances')
    ground_truth_mean = ground_truth_params.mean().round(5).reset_index()
    ground_std = ground_truth_params.std().round(5).reset_index()


    print(ground_truth_mean.to_string())

    print()
    print("\n")
        # Convert to LaTeX

    sorted_drop_latex = sorted_concat.drop(['Simulation','Logl diff'], axis=1)
    latex_code = sorted_drop_latex.to_latex(index=False,  
                                label=f"tab:m{M}_simu{simu}",
                                caption=f"Results for the simulated data ${simu}$. The grid is set to $M={M}$.",
                                column_format="lccccc",
                                escape=False, float_format="%.3f")
    
    latex_code = latex_code.replace('-1.0 $\pm$ 0.0', '-')
    print(latex_code)

    ground_truth_mean_latex = ground_truth_mean.to_latex(index=False,  
                                label=f"tab:m{M}_simu{simu}_ground_truth",
                                caption=f"Ground truth values for the simulated data ${simu}$.",
                                column_format="ccc",
                                escape=False, float_format="%.3f")
    
    print(ground_truth_mean_latex)
    # Optionally, save the LaTeX code to a .tex file
    
    # with open(f'results/exp_simu7/Mfix_/30/dataframe_30_simu{simu}.tex', 'w') as f:
    #     f.write(latex_code)

    
# print(all_exps_concat)

['results/exp_simu8/Mfix_sameDAG/100/simu_bsbn_fft_skde_linear.json', 'results/exp_simu8/Mfix_sameDAG/100/simu_bsbn.json', 'results/exp_simu8/Mfix_sameDAG/100/simu_bsbn_linear.json', 'results/exp_simu8/Mfix_sameDAG/100/simu_bsbn_fft_linear.json', 'results/exp_simu8/Mfix_sameDAG/100/simu_bsbn_fft_skde.json', 'results/exp_simu8/Mfix_sameDAG/100/simu_bsbn_fft.json']
Index(['hamming', 'shamming', 'thamming', 'rmse', 'rmae', 'train', 'test',
       'test_REF', 'slogl', 'slogl_REF', 'Logl RE (%)', 'Logl diff',
       'instances', 'simu'],
      dtype='object')
                   Model Simulation  Instances                 RMSE              MRE(\%)              Test(s)                        LogL               Logl diff
3        BSBN-FFT-Linear          1       2048       -1.0 $\pm$ 0.0       -1.0 $\pm$ 0.0       -1.0 $\pm$ 0.0              -1.0 $\pm$ 0.0           0.0 $\pm$ 0.0
5               BSBN-FFT          1       2048       -1.0 $\pm$ 0.0       -1.0 $\pm$ 0.0       -1.0 $\pm$ 0.0      

In [9]:
latex = all_exps_concat.to_latex('results/exp_simu/Mfix/simu_all.tex', index=True)


In [None]:



# Generate the graph
graph = graphDiffLegend()

# Save the graph to a temporary file
graph.write_png("graph_diff_legend.png")

# Display the graph in the notebook
display(Image(filename="graph_diff_legend.png"))