In [643]:
import wandb 
import dill
import pandas as pd
import os 
import numpy as np 
import os

In [644]:
def download_runs(project_name):
    if not os.path.exists(
        f"./results_data/data_{project_name}.pkl"
    ):
        project_details = wandb.Api().runs(f"lucacorbucci/{project_name}")
        project_data = {}
        for run in project_details:
            print("Downloading run ", run.id)
            try:
                run_df = pd.DataFrame(
                    wandb.Api().run(f"lucacorbucci/{project_name}/{run.id}").scan_history()
                )
                if run.name not in project_data:
                    project_data[run.name] = []
                project_data[run.name].append(run_df)
            except Exception as e:
                print("Error downloading run ", run.id, e)
        with open(
            f"./results_data/data_{project_name}.pkl", "wb"
        ) as f:
            dill.dump(project_data, f)
    else:
        with open(
            f"./results_data/data_{project_name}.pkl", "rb"
        ) as f:
            project_data = dill.load(f)
    return project_data

In [645]:
merged_df = []

# Explanation Metrics

In [646]:
project_data = download_runs(project_name="new_metrics_computation")
project_name = "new_metrics_computation"

In [647]:
methods = ["dt", "svm", "logistic", "lime", "shap", "lore", "lore_genetic"]
datasets = ["house16", "letter", "shuttle", "adult", "dutch"]
top_k = [3, 5, 8, 10, 15, 20]
neigh_sizes = [1000, 2500, 5000]

In [648]:
metrics = {}

for dataset in datasets:
    metrics[dataset] = {}
    for method in methods:
        metrics[dataset][method] = {}
        for neigh_size in neigh_sizes:
            metrics[dataset][method][neigh_size] = {}
            if f"{method}_{dataset}" in project_data:
                results = project_data[f"{method}_{dataset}"]
                for result in results:
                    if "neigh_size" in result and (result["neigh_size"][0] == neigh_size or result["neigh_size"][0] == -1):
                        if "faithfulness" in result.columns:
                            faithfulness = round(float(result["faithfulness"]), 3)
                            faithfulness_std = round(float(result["faithfulness_std"]), 3)
                            metrics[dataset][method][neigh_size]["Faithfulness"] = f"{faithfulness} $\pm$ {faithfulness_std}"
                        if "stability" in result.columns:
                            stability = round(float(result["stability"]), 3)
                            stability_std = round(float(result["stability_std"]), 3)
                            metrics[dataset][method][neigh_size]["stability"] = f"{stability} $\pm$ {stability_std}"
                        for k in top_k:
                            if f"robustness_top_{k}" in result.columns:
                                robustness = round(float(result[f"robustness_top_{k}"]), 3)
                                robustness_std = round(float(result[f"robustness_std_top_{k}"]), 3)
                                metrics[dataset][method][neigh_size][f"robustness_top_{k}"] = f"{robustness} $\pm$ {robustness_std}"

  stability = round(float(result["stability"]), 3)
  stability_std = round(float(result["stability_std"]), 3)
  robustness = round(float(result[f"robustness_top_{k}"]), 3)
  robustness_std = round(float(result[f"robustness_std_top_{k}"]), 3)
  faithfulness = round(float(result["faithfulness"]), 3)
  faithfulness_std = round(float(result["faithfulness_std"]), 3)


In [649]:
metrics["adult"]["lore"]

{1000: {'stability': '0.377 $\\pm$ 0.153',
  'robustness_top_3': '0.225 $\\pm$ 0.129',
  'robustness_top_5': '0.225 $\\pm$ 0.112',
  'robustness_top_8': '0.224 $\\pm$ 0.102',
  'robustness_top_10': '0.224 $\\pm$ 0.098',
  'robustness_top_15': '0.224 $\\pm$ 0.093',
  'robustness_top_20': '0.224 $\\pm$ 0.091'},
 2500: {'stability': '0.469 $\\pm$ 0.154',
  'robustness_top_3': '0.235 $\\pm$ 0.124',
  'robustness_top_5': '0.235 $\\pm$ 0.104',
  'robustness_top_8': '0.235 $\\pm$ 0.092',
  'robustness_top_10': '0.235 $\\pm$ 0.088',
  'robustness_top_15': '0.235 $\\pm$ 0.081',
  'robustness_top_20': '0.235 $\\pm$ 0.077'},
 5000: {'stability': '0.339 $\\pm$ 0.223',
  'robustness_top_3': '0.341 $\\pm$ 0.155',
  'robustness_top_5': '0.342 $\\pm$ 0.137',
  'robustness_top_8': '0.341 $\\pm$ 0.126',
  'robustness_top_10': '0.341 $\\pm$ 0.122',
  'robustness_top_15': '0.342 $\\pm$ 0.116',
  'robustness_top_20': '0.341 $\\pm$ 0.113'}}

In [650]:
import pandas as pd

# Initialize an empty list to store the rows
rows = []

top_k_table = [3, 5, 8, 10,20]
# Iterate over the datasets and methods to extract the metrics
for dataset in datasets:
    for method in methods:
        for neigh_size in neigh_sizes:
            row = {
                'Dataset': dataset,
                'Neighborhood Size': neigh_size,
                'Method': method,
                'Stability': metrics[dataset][method][neigh_size].get('stability', '-'),
                'Faithfulness': metrics[dataset][method][neigh_size].get('Faithfulness', '-')
            }
            for k in top_k_table:
                row[f"Robustness K={k}"] = metrics[dataset][method][neigh_size].get(f'robustness_top_{k}', '-')
            rows.append(row)

# Create a dataframe from the rows
df_metrics = pd.DataFrame(rows)


# Map method names to their display names
method_mapping = {
    'dt': 'Decision Tree',
    'svm': 'SVM',
    'logistic': 'Logistic Regr.',
    'lime': 'LIME',
    'shap': 'SHAP',
    'lore': 'Lore (Random)',
    'lore_genetic': 'Lore (Genetic)'
}

# Map method names to their display names
dataset_name_mapping = {
    'adult': 'Adult',
    'house16': 'House 16',
    'letter': 'Letter',
    'dutch': 'Dutch',
    'covertype': 'Covertype',
    'shuttle': 'Shuttle'
}

# Apply the mapping to the Method column
df_metrics["Method"] = df_metrics["Method"].map(method_mapping)

df_metrics["Dataset"] = df_metrics["Dataset"].map(dataset_name_mapping)

# Sort the DataFrame by Dataset and Method
df_metrics = df_metrics.sort_values(by=['Dataset', 'Method'])

# Create custom method order for better visualization
method_order = {
    'Decision Tree': 1, 
    'SVM': 2, 
    'Logistic Regr.': 3, 
    'LIME': 4, 
    'SHAP': 5, 
    'Lore (Random)': 6, 
    'Lore (Genetic)': 7
}

# Create a new column for sorting by custom method order
df_metrics['method_order'] = df_metrics['Method'].map(method_order)

# Sort by Dataset first, then by the custom method order
df_metrics = df_metrics.sort_values(by=['Dataset', 'method_order'])

# Drop the helper column
df_metrics = df_metrics.drop(columns=['method_order'])
df_metrics_all_the_robustness = df_metrics.copy()
df_metrics.head(9)


Unnamed: 0,Dataset,Neighborhood Size,Method,Stability,Faithfulness,Robustness K=3,Robustness K=5,Robustness K=8,Robustness K=10,Robustness K=20
63,Adult,1000,Decision Tree,0.845 $\pm$ 0.23,-,0.417 $\pm$ 0.147,0.397 $\pm$ 0.122,0.38 $\pm$ 0.106,0.375 $\pm$ 0.104,0.358 $\pm$ 0.098
64,Adult,2500,Decision Tree,0.898 $\pm$ 0.167,-,0.593 $\pm$ 0.141,0.586 $\pm$ 0.126,0.579 $\pm$ 0.116,0.575 $\pm$ 0.111,0.564 $\pm$ 0.102
65,Adult,5000,Decision Tree,0.92 $\pm$ 0.148,-,0.616 $\pm$ 0.136,0.609 $\pm$ 0.121,0.603 $\pm$ 0.111,0.6 $\pm$ 0.106,0.59 $\pm$ 0.097
66,Adult,1000,SVM,0.861 $\pm$ 0.267,0.019 $\pm$ 0.117,0.265 $\pm$ 0.111,0.261 $\pm$ 0.097,0.26 $\pm$ 0.086,0.259 $\pm$ 0.084,0.257 $\pm$ 0.081
67,Adult,2500,SVM,0.859 $\pm$ 0.258,0.01 $\pm$ 0.112,0.298 $\pm$ 0.117,0.296 $\pm$ 0.107,0.294 $\pm$ 0.101,0.292 $\pm$ 0.099,0.288 $\pm$ 0.093
68,Adult,5000,SVM,0.727 $\pm$ 0.34,0.012 $\pm$ 0.122,0.244 $\pm$ 0.114,0.242 $\pm$ 0.103,0.241 $\pm$ 0.096,0.24 $\pm$ 0.094,0.238 $\pm$ 0.088
69,Adult,1000,Logistic Regr.,0.384 $\pm$ 0.279,-0.013 $\pm$ 0.211,0.178 $\pm$ 0.121,0.177 $\pm$ 0.125,0.162 $\pm$ 0.103,0.155 $\pm$ 0.088,0.137 $\pm$ 0.059
70,Adult,2500,Logistic Regr.,0.5 $\pm$ 0.28,0.012 $\pm$ 0.212,0.276 $\pm$ 0.147,0.274 $\pm$ 0.133,0.271 $\pm$ 0.125,0.27 $\pm$ 0.122,0.265 $\pm$ 0.114
71,Adult,5000,Logistic Regr.,0.469 $\pm$ 0.224,0.003 $\pm$ 0.211,0.249 $\pm$ 0.104,0.247 $\pm$ 0.092,0.245 $\pm$ 0.084,0.244 $\pm$ 0.081,0.24 $\pm$ 0.075


In [651]:
import pandas as pd

# Initialize an empty list to store the rows
rows = []
top_k_table = [5,10,20]
# Iterate over the datasets and methods to extract the metrics
for dataset in datasets:
    for method in methods:
        for neigh_size in neigh_sizes:

            row = {
                'Dataset': dataset,
                'Neighborhood Size': neigh_size,
                'Method': method,
                'Stability': metrics[dataset][method][neigh_size].get('stability', '-'),
                'Faithfulness': metrics[dataset][method][neigh_size].get('Faithfulness', '-'),
            }
            for k in top_k_table:
                row[f"Robustness K={k}"] = metrics[dataset][method][neigh_size].get(f'robustness_top_{k}', '-')
            rows.append(row)

# Create a dataframe from the rows
df_metrics_complete = pd.DataFrame(rows)


# Map method names to their display names
method_mapping = {
    'dt': r'\fire (DT)',
    'svm': r'\fire (SVM)',
    'logistic': r'\fire (LR)',
    'lime': 'LIME',
    'shap': 'SHAP',
    'lore': 'Lore (Random)',
    'lore_genetic': 'Lore (Genetic)'
}

# Map method names to their display names
dataset_name_mapping = {
    'adult': 'Adult',
    'house16': 'House 16',
    'letter': 'Letter',
    'dutch': 'Dutch',
    'covertype': 'Covertype',
    'shuttle': 'Shuttle'
}

# Apply the mapping to the Method column
df_metrics_complete["Method"] = df_metrics_complete["Method"].map(method_mapping)

df_metrics_complete["Dataset"] = df_metrics_complete["Dataset"].map(dataset_name_mapping)

# Sort the DataFrame by Dataset and Method
df_metrics_complete = df_metrics_complete.sort_values(by=['Dataset', 'Method'])

# Create custom method order for better visualization
method_order = {
    r'\fire (DT)': 1, 
    r'\fire (SVM)': 2, 
    r'\fire (LR)': 3, 
    'LIME': 4, 
    'SHAP': 5, 
    'Lore (Random)': 6, 
    'Lore (Genetic)': 7
}

# Create a new column for sorting by custom method order
df_metrics_complete['method_order'] = df_metrics_complete['Method'].map(method_order)

# Sort by Dataset first, then by the custom method order
df_metrics_complete = df_metrics_complete.sort_values(by=['Dataset', 'method_order'])

# Drop the helper column
df_metrics_complete = df_metrics_complete.drop(columns=['method_order'])

df_metrics_complete.head(9)


Unnamed: 0,Dataset,Neighborhood Size,Method,Stability,Faithfulness,Robustness K=5,Robustness K=10,Robustness K=20
63,Adult,1000,\fire (DT),0.845 $\pm$ 0.23,-,0.397 $\pm$ 0.122,0.375 $\pm$ 0.104,0.358 $\pm$ 0.098
64,Adult,2500,\fire (DT),0.898 $\pm$ 0.167,-,0.586 $\pm$ 0.126,0.575 $\pm$ 0.111,0.564 $\pm$ 0.102
65,Adult,5000,\fire (DT),0.92 $\pm$ 0.148,-,0.609 $\pm$ 0.121,0.6 $\pm$ 0.106,0.59 $\pm$ 0.097
66,Adult,1000,\fire (SVM),0.861 $\pm$ 0.267,0.019 $\pm$ 0.117,0.261 $\pm$ 0.097,0.259 $\pm$ 0.084,0.257 $\pm$ 0.081
67,Adult,2500,\fire (SVM),0.859 $\pm$ 0.258,0.01 $\pm$ 0.112,0.296 $\pm$ 0.107,0.292 $\pm$ 0.099,0.288 $\pm$ 0.093
68,Adult,5000,\fire (SVM),0.727 $\pm$ 0.34,0.012 $\pm$ 0.122,0.242 $\pm$ 0.103,0.24 $\pm$ 0.094,0.238 $\pm$ 0.088
69,Adult,1000,\fire (LR),0.384 $\pm$ 0.279,-0.013 $\pm$ 0.211,0.177 $\pm$ 0.125,0.155 $\pm$ 0.088,0.137 $\pm$ 0.059
70,Adult,2500,\fire (LR),0.5 $\pm$ 0.28,0.012 $\pm$ 0.212,0.274 $\pm$ 0.133,0.27 $\pm$ 0.122,0.265 $\pm$ 0.114
71,Adult,5000,\fire (LR),0.469 $\pm$ 0.224,0.003 $\pm$ 0.211,0.247 $\pm$ 0.092,0.244 $\pm$ 0.081,0.24 $\pm$ 0.075


In [652]:
def print_fancy_table(df):
    # Prepare dataframe for custom LaTeX output
    df_grouped = df.groupby('Dataset')

    # Start building the LaTeX table
    latex_output = "\\begin{tabular}{" + "l" * len(df.columns) + "}\n"
    latex_output += "\\toprule\n"

    # Add headers
    latex_output += " & ".join(df.columns) + " \\\\\n"
    latex_output += "\\midrule\n"

    # Add rows with midrules between datasets
    datasets = df['Dataset'].unique()
    for i, dataset in enumerate(datasets):
        group = df_grouped.get_group(dataset)
        
        # Convert group dataframe to LaTeX rows
        rows_latex = group.to_latex(index=False, header=False)
        
        # Extract just the rows part (not headers or table structure)
        rows_only = "\n".join(rows_latex.split("\n")[3:-3])
        
        latex_output += rows_only
        
        # Add midrule if not the last dataset
        if i < len(datasets) - 1:
            latex_output += "\\midrule\n"

    latex_output += "\\bottomrule\n\\end{tabular}"

    print(latex_output)

In [653]:
print_fancy_table(df_metrics_complete[df_metrics_complete["Neighborhood Size"] == 1000].drop(columns=['Neighborhood Size']))

\begin{tabular}{lllllll}
\toprule
Dataset & Method & Stability & Faithfulness & Robustness K=5 & Robustness K=10 & Robustness K=20 \\
\midrule
Adult & \fire (DT) & 0.845 $\pm$ 0.23 & - & 0.397 $\pm$ 0.122 & 0.375 $\pm$ 0.104 & 0.358 $\pm$ 0.098 \\
Adult & \fire (SVM) & 0.861 $\pm$ 0.267 & 0.019 $\pm$ 0.117 & 0.261 $\pm$ 0.097 & 0.259 $\pm$ 0.084 & 0.257 $\pm$ 0.081 \\
Adult & \fire (LR) & 0.384 $\pm$ 0.279 & -0.013 $\pm$ 0.211 & 0.177 $\pm$ 0.125 & 0.155 $\pm$ 0.088 & 0.137 $\pm$ 0.059 \\
Adult & LIME & 0.04 $\pm$ 0.017 & 0.066 $\pm$ 0.18 & 0.04 $\pm$ 0.008 & 0.04 $\pm$ 0.006 & 0.04 $\pm$ 0.005 \\
Adult & SHAP & 0.406 $\pm$ 0.196 & 0.515 $\pm$ 0.16 & 0.263 $\pm$ 0.112 & 0.257 $\pm$ 0.096 & 0.251 $\pm$ 0.087 \\
Adult & Lore (Random) & 0.377 $\pm$ 0.153 & - & 0.225 $\pm$ 0.112 & 0.224 $\pm$ 0.098 & 0.224 $\pm$ 0.091 \\
Adult & Lore (Genetic) & 0.222 $\pm$ 0.252 & - & 0.221 $\pm$ 0.143 & 0.221 $\pm$ 0.124 & 0.222 $\pm$ 0.112 \\\midrule
Dutch & \fire (DT) & 0.965 $\pm$ 0.114 & - & 0.628 $\

In [654]:
print_fancy_table(df_metrics_complete[df_metrics_complete["Neighborhood Size"] == 2500].drop(columns=['Neighborhood Size']))

\begin{tabular}{lllllll}
\toprule
Dataset & Method & Stability & Faithfulness & Robustness K=5 & Robustness K=10 & Robustness K=20 \\
\midrule
Adult & \fire (DT) & 0.898 $\pm$ 0.167 & - & 0.586 $\pm$ 0.126 & 0.575 $\pm$ 0.111 & 0.564 $\pm$ 0.102 \\
Adult & \fire (SVM) & 0.859 $\pm$ 0.258 & 0.01 $\pm$ 0.112 & 0.296 $\pm$ 0.107 & 0.292 $\pm$ 0.099 & 0.288 $\pm$ 0.093 \\
Adult & \fire (LR) & 0.5 $\pm$ 0.28 & 0.012 $\pm$ 0.212 & 0.274 $\pm$ 0.133 & 0.27 $\pm$ 0.122 & 0.265 $\pm$ 0.114 \\
Adult & LIME & 0.046 $\pm$ 0.018 & 0.064 $\pm$ 0.18 & 0.046 $\pm$ 0.009 & 0.046 $\pm$ 0.006 & 0.046 $\pm$ 0.005 \\
Adult & SHAP & 0.406 $\pm$ 0.196 & 0.515 $\pm$ 0.16 & 0.263 $\pm$ 0.112 & 0.257 $\pm$ 0.096 & 0.251 $\pm$ 0.087 \\
Adult & Lore (Random) & 0.469 $\pm$ 0.154 & - & 0.235 $\pm$ 0.104 & 0.235 $\pm$ 0.088 & 0.235 $\pm$ 0.077 \\
Adult & Lore (Genetic) & 0.301 $\pm$ 0.235 & - & 0.299 $\pm$ 0.143 & 0.299 $\pm$ 0.127 & 0.299 $\pm$ 0.118 \\\midrule
Dutch & \fire (DT) & 0.967 $\pm$ 0.099 & - & 0.903 $\p

In [655]:
print_fancy_table(df_metrics_complete[df_metrics_complete["Neighborhood Size"] == 5000].drop(columns=['Neighborhood Size']))

\begin{tabular}{lllllll}
\toprule
Dataset & Method & Stability & Faithfulness & Robustness K=5 & Robustness K=10 & Robustness K=20 \\
\midrule
Adult & \fire (DT) & 0.92 $\pm$ 0.148 & - & 0.609 $\pm$ 0.121 & 0.6 $\pm$ 0.106 & 0.59 $\pm$ 0.097 \\
Adult & \fire (SVM) & 0.727 $\pm$ 0.34 & 0.012 $\pm$ 0.122 & 0.242 $\pm$ 0.103 & 0.24 $\pm$ 0.094 & 0.238 $\pm$ 0.088 \\
Adult & \fire (LR) & 0.469 $\pm$ 0.224 & 0.003 $\pm$ 0.211 & 0.247 $\pm$ 0.092 & 0.244 $\pm$ 0.081 & 0.24 $\pm$ 0.075 \\
Adult & LIME & 0.055 $\pm$ 0.02 & 0.064 $\pm$ 0.177 & 0.054 $\pm$ 0.009 & 0.054 $\pm$ 0.007 & 0.054 $\pm$ 0.005 \\
Adult & SHAP & 0.406 $\pm$ 0.196 & 0.515 $\pm$ 0.16 & 0.263 $\pm$ 0.112 & 0.257 $\pm$ 0.096 & 0.251 $\pm$ 0.087 \\
Adult & Lore (Random) & 0.339 $\pm$ 0.223 & - & 0.342 $\pm$ 0.137 & 0.341 $\pm$ 0.122 & 0.341 $\pm$ 0.113 \\
Adult & Lore (Genetic) & 0.546 $\pm$ 0.175 & - & 0.251 $\pm$ 0.103 & 0.251 $\pm$ 0.083 & 0.252 $\pm$ 0.071 \\\midrule
Dutch & \fire (DT) & 0.955 $\pm$ 0.113 & - & 0.901 $\pm$

In [656]:
df_metrics_complete["Neighborhood Size"].unique()

array([1000, 2500, 5000])

In [657]:
merged_df.append(df_metrics_complete)

In [658]:
df_metrics_complete

Unnamed: 0,Dataset,Neighborhood Size,Method,Stability,Faithfulness,Robustness K=5,Robustness K=10,Robustness K=20
63,Adult,1000,\fire (DT),0.845 $\pm$ 0.23,-,0.397 $\pm$ 0.122,0.375 $\pm$ 0.104,0.358 $\pm$ 0.098
64,Adult,2500,\fire (DT),0.898 $\pm$ 0.167,-,0.586 $\pm$ 0.126,0.575 $\pm$ 0.111,0.564 $\pm$ 0.102
65,Adult,5000,\fire (DT),0.92 $\pm$ 0.148,-,0.609 $\pm$ 0.121,0.6 $\pm$ 0.106,0.59 $\pm$ 0.097
66,Adult,1000,\fire (SVM),0.861 $\pm$ 0.267,0.019 $\pm$ 0.117,0.261 $\pm$ 0.097,0.259 $\pm$ 0.084,0.257 $\pm$ 0.081
67,Adult,2500,\fire (SVM),0.859 $\pm$ 0.258,0.01 $\pm$ 0.112,0.296 $\pm$ 0.107,0.292 $\pm$ 0.099,0.288 $\pm$ 0.093
...,...,...,...,...,...,...,...,...
58,Shuttle,2500,Lore (Random),0.834 $\pm$ 0.141,-,0.708 $\pm$ 0.084,0.707 $\pm$ 0.072,0.707 $\pm$ 0.066
59,Shuttle,5000,Lore (Random),0.713 $\pm$ 0.115,-,0.7 $\pm$ 0.073,0.7 $\pm$ 0.065,0.7 $\pm$ 0.06
60,Shuttle,1000,Lore (Genetic),0.626 $\pm$ 0.135,-,0.621 $\pm$ 0.072,0.621 $\pm$ 0.06,0.621 $\pm$ 0.052
61,Shuttle,2500,Lore (Genetic),0.674 $\pm$ 0.125,-,0.663 $\pm$ 0.074,0.663 $\pm$ 0.064,0.663 $\pm$ 0.058


In [659]:
# import os
# import matplotlib.pyplot as plt
# import seaborn as sns
# import numpy as np

# def plot_robustness_per_dataset(df_metrics):
#     # Create the plots directory if it doesn't exist
#     os.makedirs('plots', exist_ok=True)
    
#     # Get the unique datasets and define top_k values
#     datasets = df_metrics['Dataset'].unique()
#     top_k = [3, 5, 8, 10, 20]
    
#     # Define a color-blind-friendly palette and markers
#     colors = ["#FF774E", "#7c7787", "#53C4FE", "#70DDA8", "#dc68e4", "#755c51", "gray"]
#     edge_colors = ["#E45D22", "#5a5255", "#009EFF", "#00B977", "fuchsia", "#ae5a41", "black"]
#     markers = ['o', 'd', 'v', 'h', 's', 'P', 'p']
# #     'D', '^', 'v', 'P', '*', 'X', 'p', 'h']  # Different markers
    
#     # Create a single figure with subplots
#     fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(18, 15), sharex=True, sharey=True)
#     axes = axes.flatten()
    
#     handles, labels = [], []
    
#     for idx, dataset in enumerate(datasets):
#         ax = axes[idx]
#         subset = df_metrics[df_metrics['Dataset'] == dataset]
        
#         for i, method in enumerate(subset['Method'].unique()):
#             method_subset = subset[subset['Method'] == method]
#             robustness_values = []
            
#             for k in top_k:
#                 value = method_subset[f'Robustness K={k}'].values[0]
#                 robustness_value = float(str(value).split(' ')[0])  # Ensure extraction is robust
#                 robustness_values.append(robustness_value)
            
#             line, = ax.plot(top_k, 
#                             robustness_values,
#                             marker=markers[i % len(markers)],
#                             markersize=20, 
#                             linewidth=3, 
#                             linestyle="--",
#                             color=colors[i % len(colors)],
#                             markerfacecolor=colors[i % len(colors)], 
#                             markeredgecolor=edge_colors[i % len(colors)], 
#                             markeredgewidth=2,
#                             alpha=0.8, zorder=3)
#             if idx == 0:  # Collect legend elements only once
#                 handles.append(line)
#                 labels.append(method)
        
#         ax.set_title(f'{dataset}', fontsize=25, fontweight='bold')
#         ax.set_xlabel('K', fontsize=25)
#         ax.set_ylabel('Robustness', fontsize=25)
#         ax.tick_params(axis='both', which='major', labelsize=22)
#         ax.grid(True, linestyle='--', alpha=0.7)
    
#     # Adjust layout
#     plt.tight_layout(rect=[0, 0.1, 1, 1])
    
#     # Add external legend below plots
#     fig.legend(handles, labels, loc='lower center', bbox_to_anchor=(0.5, -0.02), ncol=3, fontsize=20, frameon=True, fancybox=True, shadow=True)
    
#     # Save the plot
#     plt.savefig('plots/robustness_all_datasets.png', bbox_inches='tight', dpi=300)
#     plt.close()

# # Call the function with df_metrics_complete
# plot_robustness_per_dataset(df_metrics_complete)


# Fidelity

In [660]:
project_data = download_runs(project_name="tango_eval")
project_name = "tango_eval"

In [661]:
project_data_comparison = download_runs(project_name="comparison_tango")
project_name = "comparison_tango"

In [662]:
methods = ["dt", "svm", "logistic"]
datasets = ["house16", "letter", "dutch", "adult", "covertype", "shuttle"]
neigh_sizes = [1000, 2500, 5000]

In [663]:
metrics = {}

for dataset in datasets:
    metrics[dataset] = {}   
    for method in methods: 
        metrics[dataset][method] = {}
        for neigh_size in neigh_sizes:
            metrics[dataset][method][neigh_size] = {}
            results = project_data[f"{method}_{dataset}"]
            fidelity_list = []
            fidelity_method_std_list = []
            fidelity_method_list = []
            for result in results:
                if "top_k" in result and result["top_k"][0] == neigh_size:
                    if "Fidelity" in result.columns:
                        fidelity_list.append(float(result["Fidelity"]))
                    if "Tree Accuracy" in result.columns:
                        fidelity_method_list.append(float(result["Tree Accuracy"]))
                        fidelity_method_std_list.append(float(result["Tree Accuracy Std"]))
            if len(fidelity_list) > 0:
                fidelity = round(np.mean(fidelity_list), 3)
                metrics[dataset][method][neigh_size]["Fidelity"] = fidelity

            if len(fidelity_method_list) > 0:
                fidelity_method = round(np.mean(fidelity_method_list), 3)
                fidelity_method_std = round(np.mean(fidelity_method_std_list), 3)
                metrics[dataset][method][neigh_size]["Fidelity_neigh"] = f"{fidelity_method} $\pm$ {fidelity_method_std}"

  fidelity_list.append(float(result["Fidelity"]))
  fidelity_method_list.append(float(result["Tree Accuracy"]))
  fidelity_method_std_list.append(float(result["Tree Accuracy Std"]))


In [664]:
metrics["house16"]

{'dt': {1000: {'Fidelity': 0.899, 'Fidelity_neigh': '0.864 $\\pm$ 0.043'},
  2500: {'Fidelity': 0.9, 'Fidelity_neigh': '0.876 $\\pm$ 0.031'},
  5000: {'Fidelity': 0.908, 'Fidelity_neigh': '0.886 $\\pm$ 0.028'}},
 'svm': {1000: {'Fidelity': 0.745, 'Fidelity_neigh': '0.631 $\\pm$ 0.12'},
  2500: {'Fidelity': 0.711, 'Fidelity_neigh': '0.61 $\\pm$ 0.126'},
  5000: {'Fidelity': 0.678, 'Fidelity_neigh': '0.608 $\\pm$ 0.135'}},
 'logistic': {1000: {'Fidelity': 0.962,
   'Fidelity_neigh': '0.947 $\\pm$ 0.026'},
  2500: {'Fidelity': 0.939, 'Fidelity_neigh': '0.916 $\\pm$ 0.021'},
  5000: {'Fidelity': 0.936, 'Fidelity_neigh': '0.916 $\\pm$ 0.019'}}}

In [665]:
import pandas as pd

# Initialize an empty list to store the rows
rows = []

# Iterate over the datasets and methods to extract the metrics
for dataset in datasets:
    for method in methods:
        for neigh_size in neigh_sizes:
            row = {
                'Dataset': dataset,
                "Neighborhood Size": neigh_size,
                'Method': method,
                'Fidelity': metrics[dataset][method][neigh_size].get('Fidelity', '-'),
                'Fid. Neigh.': metrics[dataset][method][neigh_size].get('Fidelity_neigh', '-'),
            }
            rows.append(row)

In [666]:
methods = ["lime", "lore", "lore_genetic"]
datasets = ["house16", "letter", "dutch", "adult", "covertype", "shuttle"]

In [667]:
metrics = {}
for dataset in datasets:
    metrics[dataset] = {}
    for method in methods:
        metrics[dataset][method] = {}
        for neigh_size in neigh_sizes:
            metrics[dataset][method][neigh_size] = {}
            results = project_data_comparison[f"{method}_{dataset}"]
            fidelity_list = []
            fidelity_neigh_list = []
            fidelity_neigh_std_list = []
            for result in results:
                if "neigh_size" in result and result["neigh_size"][0] == neigh_size:
                    if "fidelity" in result.columns :
                        fidelity_list.append(float(result["fidelity"])) 
                        fidelity_neigh_list.append(float(result["fidelity_method"]))
                        fidelity_neigh_std_list.append(float(result["fidelity_method_std"]))
                elif "neigh_size" not in result and neigh_size == 5000:
                    
                    if "fidelity" in result.columns and "fidelity_method" in result.columns:
                        if isinstance(result["fidelity_method"][0], np.float64) or isinstance(result["fidelity_method"][0], np.int64) or isinstance(result["fidelity_method"][0], np.float32):
                            metrics[dataset][method][neigh_size] = {}
                            fidelity_list.append(float(result["fidelity"])) 
                            fidelity_neigh_list.append(float(result["fidelity_method"]))
                            fidelity_neigh_std_list.append(float(result["fidelity_method_std"]))

            if len(fidelity_list) > 0:
                fidelity = round(np.mean(fidelity_list), 3)
                metrics[dataset][method][neigh_size]["Fidelity"] = f"{fidelity}"
            if len(fidelity_neigh_list) > 0:
                fid_neigh = round(np.mean(fidelity_neigh_list), 3)
                std = round(np.mean(fidelity_neigh_std_list), 3)
                metrics[dataset][method][neigh_size]["Fidelity_neigh"] = f"{fid_neigh} $\pm$ {std}"
                            

  fidelity_list.append(float(result["fidelity"]))
  fidelity_neigh_list.append(float(result["fidelity_method"]))
  fidelity_neigh_std_list.append(float(result["fidelity_method_std"]))
  fidelity_list.append(float(result["fidelity"]))
  fidelity_neigh_list.append(float(result["fidelity_method"]))
  fidelity_neigh_std_list.append(float(result["fidelity_method_std"]))


In [668]:
metrics["letter"]["lore_genetic"]

{1000: {'Fidelity': '0.039', 'Fidelity_neigh': '1.0 $\\pm$ 0.0'},
 2500: {'Fidelity': '0.04', 'Fidelity_neigh': '1.0 $\\pm$ 0.0'},
 5000: {}}

In [669]:
# Iterate over the datasets and methods to extract the metrics
for dataset in datasets:
    for method in methods:
        for n in neigh_sizes:
            row = {
                'Dataset': dataset,
                'Neighborhood Size': n,
                'Method': method,
                'Fidelity': metrics[dataset][method][n].get('Fidelity', '-'),
                'Fid. Neigh.': metrics[dataset][method][neigh_size].get('Fidelity_neigh', '-'),
            }
            rows.append(row)


In [670]:
# Create a dataframe from the rows
df_metrics = pd.DataFrame(rows)

df_metrics = df_metrics.sort_values(by=['Dataset', 'Method'])
df_metrics.head(10)


# Map method names to their display names
method_mapping = {
    'dt': r'\fire (DT)',
    'svm': r'\fire (SVM)',
    'logistic': r'\fire (LR)',
    'lime': 'LIME',
    'shap': 'SHAP',
    'lore': 'Lore (Random)',
    'lore_genetic': 'Lore (Genetic)'
}

# Map method names to their display names
dataset_name_mapping = {
    'adult': 'Adult',
    'house16': 'House 16',
    'letter': 'Letter',
    'dutch': 'Dutch',
    'covertype': 'Covertype',
    'shuttle': 'Shuttle'
}

# Apply the mapping to the Method column
df_metrics["Method"] = df_metrics["Method"].map(method_mapping)

df_metrics["Dataset"] = df_metrics["Dataset"].map(dataset_name_mapping)

# Sort the DataFrame by Dataset and Method
df_metrics = df_metrics.sort_values(by=['Dataset', 'Method'])

# Create custom method order for better visualization
method_order = {
    r'\fire (DT)': 1, 
    r'\fire (SVM)': 2, 
    r'\fire (LR)': 3, 
    'LIME': 4, 
    'SHAP': 5, 
    'Lore (Random)': 6, 
    'Lore (Genetic)': 7
}

# Create a new column for sorting by custom method order
df_metrics['method_order'] = df_metrics['Method'].map(method_order)

# Sort by Dataset first, then by the custom method order
df_metrics = df_metrics.sort_values(by=['Dataset', 'method_order'])

# Drop the helper column
df_metrics = df_metrics.drop(columns=['method_order'])

df_metrics.head(9)

Unnamed: 0,Dataset,Neighborhood Size,Method,Fidelity,Fid. Neigh.
27,Adult,1000,\fire (DT),0.907,0.918 $\pm$ 0.04
28,Adult,2500,\fire (DT),0.897,0.931 $\pm$ 0.037
29,Adult,5000,\fire (DT),0.897,0.94 $\pm$ 0.032
30,Adult,1000,\fire (SVM),0.522,0.514 $\pm$ 0.175
31,Adult,2500,\fire (SVM),-,-
32,Adult,5000,\fire (SVM),0.529,0.518 $\pm$ 0.177
33,Adult,1000,\fire (LR),0.895,0.969 $\pm$ 0.019
34,Adult,2500,\fire (LR),0.846,0.974 $\pm$ 0.014
35,Adult,5000,\fire (LR),0.9,0.98 $\pm$ 0.01


In [671]:
df_metrics[df_metrics["Method"] == "LIME"] 

Unnamed: 0,Dataset,Neighborhood Size,Method,Fidelity,Fid. Neigh.
81,Adult,1000,LIME,0.908,0.729 $\pm$ 0.017
82,Adult,2500,LIME,0.903,0.729 $\pm$ 0.017
83,Adult,5000,LIME,0.926,0.729 $\pm$ 0.017
90,Covertype,1000,LIME,0.714,0.261 $\pm$ 0.107
91,Covertype,2500,LIME,0.712,0.261 $\pm$ 0.107
92,Covertype,5000,LIME,0.709,0.261 $\pm$ 0.107
72,Dutch,1000,LIME,0.893,0.516 $\pm$ 0.193
73,Dutch,2500,LIME,0.896,0.516 $\pm$ 0.193
74,Dutch,5000,LIME,0.896,0.516 $\pm$ 0.193
54,House 16,1000,LIME,0.882,0.245 $\pm$ 0.145


In [672]:
# sort df_metrics by dataset name

# remove index 
df_metrics = df_metrics.reset_index(drop=True)

In [673]:
merged_df.append(df_metrics)

In [674]:
print_fancy_table(df_metrics[df_metrics["Neighborhood Size"] == 5000].drop(columns=['Neighborhood Size']))

\begin{tabular}{llll}
\toprule
Dataset & Method & Fidelity & Fid. Neigh. \\
\midrule
Adult & \fire (DT) & 0.897000 & 0.94 $\pm$ 0.032 \\
Adult & \fire (SVM) & 0.529000 & 0.518 $\pm$ 0.177 \\
Adult & \fire (LR) & 0.900000 & 0.98 $\pm$ 0.01 \\
Adult & LIME & 0.926 & 0.729 $\pm$ 0.017 \\
Adult & Lore (Random) & 0.652 & 1.0 $\pm$ 0.0 \\
Adult & Lore (Genetic) & - & - \\\midrule
Covertype & \fire (DT) & 0.842000 & 0.854 $\pm$ 0.028 \\
Covertype & \fire (SVM) & 0.562000 & 0.525 $\pm$ 0.096 \\
Covertype & \fire (LR) & 0.854000 & 0.865 $\pm$ 0.029 \\
Covertype & LIME & 0.709 & 0.261 $\pm$ 0.107 \\
Covertype & Lore (Random) & 0.377 & 1.0 $\pm$ 0.0 \\
Covertype & Lore (Genetic) & - & - \\\midrule
Dutch & \fire (DT) & 0.996000 & 0.997 $\pm$ 0.003 \\
Dutch & \fire (SVM) & 0.989000 & 0.985 $\pm$ 0.02 \\
Dutch & \fire (LR) & 0.992000 & 0.988 $\pm$ 0.012 \\
Dutch & LIME & 0.896 & 0.516 $\pm$ 0.193 \\
Dutch & Lore (Random) & 0.501 & 1.0 $\pm$ 0.0 \\
Dutch & Lore (Genetic) & - & - \\\midrule
House 16 &

In [675]:
print_fancy_table(df_metrics[df_metrics["Neighborhood Size"] == 2500].drop(columns=['Neighborhood Size']))

\begin{tabular}{llll}
\toprule
Dataset & Method & Fidelity & Fid. Neigh. \\
\midrule
Adult & \fire (DT) & 0.897000 & 0.931 $\pm$ 0.037 \\
Adult & \fire (SVM) & - & - \\
Adult & \fire (LR) & 0.846000 & 0.974 $\pm$ 0.014 \\
Adult & LIME & 0.903 & 0.729 $\pm$ 0.017 \\
Adult & Lore (Random) & 0.636 & 1.0 $\pm$ 0.0 \\
Adult & Lore (Genetic) & 0.669 & - \\\midrule
Covertype & \fire (DT) & 0.840000 & 0.847 $\pm$ 0.034 \\
Covertype & \fire (SVM) & 0.578000 & 0.546 $\pm$ 0.104 \\
Covertype & \fire (LR) & 0.861000 & 0.866 $\pm$ 0.033 \\
Covertype & LIME & 0.712 & 0.261 $\pm$ 0.107 \\
Covertype & Lore (Random) & 0.394 & 1.0 $\pm$ 0.0 \\
Covertype & Lore (Genetic) & 0.366 & - \\\midrule
Dutch & \fire (DT) & 0.997000 & 0.997 $\pm$ 0.004 \\
Dutch & \fire (SVM) & 0.994000 & 0.992 $\pm$ 0.014 \\
Dutch & \fire (LR) & 0.994000 & 0.993 $\pm$ 0.01 \\
Dutch & LIME & 0.896 & 0.516 $\pm$ 0.193 \\
Dutch & Lore (Random) & 0.501 & 1.0 $\pm$ 0.0 \\
Dutch & Lore (Genetic) & 0.502 & - \\\midrule
House 16 & \fire (

In [676]:
df_metrics["Neighborhood Size"].unique()

array([1000, 2500, 5000])

# Time 

In [677]:
project_data_time = download_runs(project_name="time_computation")
project_name = "time_computation"

In [678]:
methods = ["dt", "svm", "logistic", "lime", "shap", "lore"]
datasets = ["adult", "house16", "letter", "dutch", "covertype", "shuttle"]
top_k = [3, 5, 8, 10, 20]

In [679]:
metrics = {}

for dataset in datasets:
    metrics[dataset] = {}   
    for method in methods: 
        metrics[dataset][method] = {}
        if f"{method}_{dataset}" in project_data_time:
            results = project_data_time[f"{method}_{dataset}"][0]
            if "Total Time (sec)" in results.columns:
                total_time = round(float(results["Total Time (sec)"]), 3)
                total_time_std = round(float(results["Total Time Std (sec)"]), 3)
                metrics[dataset][method]["Total Time"] = f"{total_time} $\pm$ {total_time_std}"

  total_time = round(float(results["Total Time (sec)"]), 3)
  total_time_std = round(float(results["Total Time Std (sec)"]), 3)


In [680]:
# metrics

In [681]:
rows = []
# Iterate over the datasets and methods to extract the metrics
for dataset in datasets:
    for method in methods:
        row = {
            'Dataset': dataset,
            'Method': method,
            'Total Time': metrics[dataset][method].get('Total Time', '-'),
        }
        rows.append(row)

# Create a dataframe from the rows
df_metrics = pd.DataFrame(rows)

df_metrics = df_metrics.sort_values(by=['Dataset', 'Method'])
df_metrics.head(10)


# Map method names to their display names
method_mapping = {
    'dt': r'\fire (DT)',
    'svm': r'\fire (SVM)',
    'logistic': r'\fire (LR)',
    'lime': 'LIME',
    'shap': 'SHAP',
    'lore': 'Lore (Random)',
    'lore_genetic': 'Lore (Genetic)'
}

# Map method names to their display names
dataset_name_mapping = {
    'adult': 'Adult',
    'house16': 'House 16',
    'letter': 'Letter',
    'dutch': 'Dutch',
    'covertype': 'Covertype',
    'shuttle': 'Shuttle'
}

# Apply the mapping to the Method column
df_metrics["Method"] = df_metrics["Method"].map(method_mapping)

df_metrics["Dataset"] = df_metrics["Dataset"].map(dataset_name_mapping)

# Sort the DataFrame by Dataset and Method
df_metrics = df_metrics.sort_values(by=['Dataset', 'Method'])

# Create custom method order for better visualization
method_order = {
    r'\fire (DT)': 1, 
    r'\fire (SVM)': 2, 
    r'\fire (LR)': 3, 
    'LIME': 4, 
    'SHAP': 5, 
    'Lore (Random)': 6, 
    'Lore (Genetic)': 7
}

# Create a new column for sorting by custom method order
df_metrics['method_order'] = df_metrics['Method'].map(method_order)

# Sort by Dataset first, then by the custom method order
df_metrics = df_metrics.sort_values(by=['Dataset', 'method_order'])

# Drop the helper column
df_metrics = df_metrics.drop(columns=['method_order'])

df_metrics.head(9)

Unnamed: 0,Dataset,Method,Total Time
0,Adult,\fire (DT),2.137 $\pm$ 0.177
1,Adult,\fire (SVM),1.112 $\pm$ 0.212
2,Adult,\fire (LR),2.054 $\pm$ 0.629
3,Adult,LIME,0.081 $\pm$ 0.006
4,Adult,SHAP,2.445 $\pm$ 0.065
5,Adult,Lore (Random),13.242 $\pm$ 0.564
24,Covertype,\fire (DT),2.348 $\pm$ 0.579
25,Covertype,\fire (SVM),1.199 $\pm$ 0.602
26,Covertype,\fire (LR),17.707 $\pm$ 9.385


In [682]:
# # sort df_metrics by dataset name

# # remove index 
df_metrics = df_metrics.reset_index(drop=True)

In [683]:
df_metrics["Neighborhood Size"] = 5000

In [684]:
# Prepare dataframe for custom LaTeX output
df_grouped = df_metrics.groupby('Dataset')

# Start building the LaTeX table
latex_output = "\\begin{tabular}{" + "l" * len(df_metrics.columns) + "}\n"
latex_output += "\\toprule\n"

# Add headers
latex_output += " & ".join(df_metrics.columns) + " \\\\\n"
latex_output += "\\midrule\n"

# Add rows with midrules between datasets
datasets = df_metrics['Dataset'].unique()
for i, dataset in enumerate(datasets):
    group = df_grouped.get_group(dataset)
    
    # Convert group dataframe to LaTeX rows
    rows_latex = group.to_latex(index=False, header=False)
    
    # Extract just the rows part (not headers or table structure)
    rows_only = "\n".join(rows_latex.split("\n")[3:-3])
    
    latex_output += rows_only
    
    # Add midrule if not the last dataset
    if i < len(datasets) - 1:
        latex_output += "\\midrule\n"

latex_output += "\\bottomrule\n\\end{tabular}"

print(latex_output)

\begin{tabular}{llll}
\toprule
Dataset & Method & Total Time & Neighborhood Size \\
\midrule
Adult & \fire (DT) & 2.137 $\pm$ 0.177 & 5000 \\
Adult & \fire (SVM) & 1.112 $\pm$ 0.212 & 5000 \\
Adult & \fire (LR) & 2.054 $\pm$ 0.629 & 5000 \\
Adult & LIME & 0.081 $\pm$ 0.006 & 5000 \\
Adult & SHAP & 2.445 $\pm$ 0.065 & 5000 \\
Adult & Lore (Random) & 13.242 $\pm$ 0.564 & 5000 \\\midrule
Covertype & \fire (DT) & 2.348 $\pm$ 0.579 & 5000 \\
Covertype & \fire (SVM) & 1.199 $\pm$ 0.602 & 5000 \\
Covertype & \fire (LR) & 17.707 $\pm$ 9.385 & 5000 \\
Covertype & LIME & 0.112 $\pm$ 0.11 & 5000 \\
Covertype & SHAP & 18.637 $\pm$ 0.447 & 5000 \\
Covertype & Lore (Random) & 5.071 $\pm$ 0.404 & 5000 \\\midrule
Dutch & \fire (DT) & 0.927 $\pm$ 0.032 & 5000 \\
Dutch & \fire (SVM) & 0.351 $\pm$ 0.033 & 5000 \\
Dutch & \fire (LR) & 6.553 $\pm$ 6.838 & 5000 \\
Dutch & LIME & 0.091 $\pm$ 0.125 & 5000 \\
Dutch & SHAP & 1.724 $\pm$ 0.077 & 5000 \\
Dutch & Lore (Random) & 2.137 $\pm$ 0.131 & 5000 \\\midrule

In [685]:
merged_df.append(df_metrics)

# Merged datasets

In [686]:
merged_df[0].head()

Unnamed: 0,Dataset,Neighborhood Size,Method,Stability,Faithfulness,Robustness K=5,Robustness K=10,Robustness K=20
63,Adult,1000,\fire (DT),0.845 $\pm$ 0.23,-,0.397 $\pm$ 0.122,0.375 $\pm$ 0.104,0.358 $\pm$ 0.098
64,Adult,2500,\fire (DT),0.898 $\pm$ 0.167,-,0.586 $\pm$ 0.126,0.575 $\pm$ 0.111,0.564 $\pm$ 0.102
65,Adult,5000,\fire (DT),0.92 $\pm$ 0.148,-,0.609 $\pm$ 0.121,0.6 $\pm$ 0.106,0.59 $\pm$ 0.097
66,Adult,1000,\fire (SVM),0.861 $\pm$ 0.267,0.019 $\pm$ 0.117,0.261 $\pm$ 0.097,0.259 $\pm$ 0.084,0.257 $\pm$ 0.081
67,Adult,2500,\fire (SVM),0.859 $\pm$ 0.258,0.01 $\pm$ 0.112,0.296 $\pm$ 0.107,0.292 $\pm$ 0.099,0.288 $\pm$ 0.093


In [687]:
merged_df[1].head()

Unnamed: 0,Dataset,Neighborhood Size,Method,Fidelity,Fid. Neigh.
0,Adult,1000,\fire (DT),0.907,0.918 $\pm$ 0.04
1,Adult,2500,\fire (DT),0.897,0.931 $\pm$ 0.037
2,Adult,5000,\fire (DT),0.897,0.94 $\pm$ 0.032
3,Adult,1000,\fire (SVM),0.522,0.514 $\pm$ 0.175
4,Adult,2500,\fire (SVM),-,-


In [688]:
merged_df[1]["Method"].unique()

array(['\\fire (DT)', '\\fire (SVM)', '\\fire (LR)', 'LIME',
       'Lore (Random)', 'Lore (Genetic)'], dtype=object)

In [689]:
merged_df[1][merged_df[1]["Method"] == "LIME"]

Unnamed: 0,Dataset,Neighborhood Size,Method,Fidelity,Fid. Neigh.
9,Adult,1000,LIME,0.908,0.729 $\pm$ 0.017
10,Adult,2500,LIME,0.903,0.729 $\pm$ 0.017
11,Adult,5000,LIME,0.926,0.729 $\pm$ 0.017
27,Covertype,1000,LIME,0.714,0.261 $\pm$ 0.107
28,Covertype,2500,LIME,0.712,0.261 $\pm$ 0.107
29,Covertype,5000,LIME,0.709,0.261 $\pm$ 0.107
45,Dutch,1000,LIME,0.893,0.516 $\pm$ 0.193
46,Dutch,2500,LIME,0.896,0.516 $\pm$ 0.193
47,Dutch,5000,LIME,0.896,0.516 $\pm$ 0.193
63,House 16,1000,LIME,0.882,0.245 $\pm$ 0.145


In [690]:
merged_df[2].head()

Unnamed: 0,Dataset,Method,Total Time,Neighborhood Size
0,Adult,\fire (DT),2.137 $\pm$ 0.177,5000
1,Adult,\fire (SVM),1.112 $\pm$ 0.212,5000
2,Adult,\fire (LR),2.054 $\pm$ 0.629,5000
3,Adult,LIME,0.081 $\pm$ 0.006,5000
4,Adult,SHAP,2.445 $\pm$ 0.065,5000


In [691]:
# Extracting the three dataframes from merged_df
df_explanation_metrics = merged_df[0]  # First dataframe with explanation metrics
df_fidelity = merged_df[1]            # Second dataframe with fidelity
df_time = merged_df[2]                # Third dataframe with time metrics

# Create a unique identifier for each row to merge on
for df in [df_explanation_metrics, df_fidelity, df_time]:
    df['id'] = df['Dataset'] + '_' + df['Method'] + df["Neighborhood Size"].astype(str)

# Merge all dataframes into a single one
df_merged = df_explanation_metrics.merge(
    df_fidelity[['id', 'Fidelity', "Fid. Neigh."]], 
    on='id', 
    how='outer'
)

df_merged = df_merged.merge(
    df_time[['id', 'Total Time']], 
    on='id', 
    how='outer'
)

# Drop the temporary id column
df_merged = df_merged.drop(columns=['id'])

# Reorganize columns to have a more logical order
columns_order = ['Dataset', 'Method', 'Faithfulness', 'Stability', 'Fidelity', "Fid. Neigh.", 'Total Time', "Neighborhood Size"] + \
                [col for col in df_merged.columns if 'Robustness' in col]

# Apply the column order (only for columns that exist in the dataframe)
available_columns = [col for col in columns_order if col in df_merged.columns]
df_merged = df_merged[available_columns]

# Sort by Dataset and Method using the method_order dictionary
df_merged['method_order'] = df_merged['Method'].map(method_order)
df_merged = df_merged.sort_values(by=['Dataset', 'method_order'])
df_merged = df_merged.drop(columns=['method_order'])

# Reset index
df_merged = df_merged.reset_index(drop=True)
# df_merged["Total Time"] = ["-"] * len(df_merged)

# Display the first few rows
df_merged.head(100)

Unnamed: 0,Dataset,Method,Faithfulness,Stability,Fidelity,Fid. Neigh.,Total Time,Neighborhood Size,Robustness K=5,Robustness K=10,Robustness K=20
0,Adult,\fire (DT),-,0.845 $\pm$ 0.23,0.907,0.918 $\pm$ 0.04,,1000.0,0.397 $\pm$ 0.122,0.375 $\pm$ 0.104,0.358 $\pm$ 0.098
1,Adult,\fire (DT),-,0.898 $\pm$ 0.167,0.897,0.931 $\pm$ 0.037,,2500.0,0.586 $\pm$ 0.126,0.575 $\pm$ 0.111,0.564 $\pm$ 0.102
2,Adult,\fire (DT),-,0.92 $\pm$ 0.148,0.897,0.94 $\pm$ 0.032,2.137 $\pm$ 0.177,5000.0,0.609 $\pm$ 0.121,0.6 $\pm$ 0.106,0.59 $\pm$ 0.097
3,Adult,\fire (SVM),0.019 $\pm$ 0.117,0.861 $\pm$ 0.267,0.522,0.514 $\pm$ 0.175,,1000.0,0.261 $\pm$ 0.097,0.259 $\pm$ 0.084,0.257 $\pm$ 0.081
4,Adult,\fire (SVM),0.01 $\pm$ 0.112,0.859 $\pm$ 0.258,-,-,,2500.0,0.296 $\pm$ 0.107,0.292 $\pm$ 0.099,0.288 $\pm$ 0.093
...,...,...,...,...,...,...,...,...,...,...,...
95,Shuttle,LIME,0.01 $\pm$ 0.668,0.423 $\pm$ 0.195,0.801,0.078 $\pm$ 0.047,0.069 $\pm$ 0.096,5000.0,0.328 $\pm$ 0.144,0.32 $\pm$ 0.132,0.312 $\pm$ 0.125
96,Shuttle,SHAP,0.57 $\pm$ 0.551,1.0 $\pm$ 0.008,,,,1000.0,0.693 $\pm$ 0.17,0.663 $\pm$ 0.156,0.634 $\pm$ 0.147
97,Shuttle,SHAP,0.57 $\pm$ 0.551,1.0 $\pm$ 0.008,,,,2500.0,0.693 $\pm$ 0.17,0.663 $\pm$ 0.156,0.634 $\pm$ 0.147
98,Shuttle,SHAP,0.57 $\pm$ 0.551,1.0 $\pm$ 0.008,,,1.095 $\pm$ 0.051,5000.0,0.693 $\pm$ 0.17,0.663 $\pm$ 0.156,0.634 $\pm$ 0.147


In [692]:
# Filter rows  where Neighborhood Sizet ish not 
df_merged = df_merged[~df_merged["Neighborhood Size"].isna()]

In [693]:
# Reorganize columns to have a more logical order
columns_order = ["Neighborhood Size", 'Dataset', 'Method', 'Total Time', 'Fidelity', "Fid. Neigh.", 'Faithfulness', 'Stability'] + \
                [col for col in df_merged.columns if 'Robustness' in col]

# Apply the column order (only for columns that exist in the dataframe)
available_columns = [col for col in columns_order if col in df_merged.columns]
df_merged = df_merged[available_columns]

# Sort by Dataset and Method using the method_order dictionary
df_merged['method_order'] = df_merged['Method'].map(method_order)
df_merged = df_merged.sort_values(by=['Dataset', 'method_order'])
df_merged = df_merged.drop(columns=['method_order'])

# Reset index
df_merged = df_merged.reset_index(drop=True)
# Display the first few rows


In [694]:
df_merged.to_csv("results.csv", index=False)

In [695]:
df_merged.head(50)

Unnamed: 0,Neighborhood Size,Dataset,Method,Total Time,Fidelity,Fid. Neigh.,Faithfulness,Stability,Robustness K=5,Robustness K=10,Robustness K=20
0,1000.0,Adult,\fire (DT),,0.907,0.918 $\pm$ 0.04,-,0.845 $\pm$ 0.23,0.397 $\pm$ 0.122,0.375 $\pm$ 0.104,0.358 $\pm$ 0.098
1,2500.0,Adult,\fire (DT),,0.897,0.931 $\pm$ 0.037,-,0.898 $\pm$ 0.167,0.586 $\pm$ 0.126,0.575 $\pm$ 0.111,0.564 $\pm$ 0.102
2,5000.0,Adult,\fire (DT),2.137 $\pm$ 0.177,0.897,0.94 $\pm$ 0.032,-,0.92 $\pm$ 0.148,0.609 $\pm$ 0.121,0.6 $\pm$ 0.106,0.59 $\pm$ 0.097
3,1000.0,Adult,\fire (SVM),,0.522,0.514 $\pm$ 0.175,0.019 $\pm$ 0.117,0.861 $\pm$ 0.267,0.261 $\pm$ 0.097,0.259 $\pm$ 0.084,0.257 $\pm$ 0.081
4,2500.0,Adult,\fire (SVM),,-,-,0.01 $\pm$ 0.112,0.859 $\pm$ 0.258,0.296 $\pm$ 0.107,0.292 $\pm$ 0.099,0.288 $\pm$ 0.093
5,5000.0,Adult,\fire (SVM),1.112 $\pm$ 0.212,0.529,0.518 $\pm$ 0.177,0.012 $\pm$ 0.122,0.727 $\pm$ 0.34,0.242 $\pm$ 0.103,0.24 $\pm$ 0.094,0.238 $\pm$ 0.088
6,1000.0,Adult,\fire (LR),,0.895,0.969 $\pm$ 0.019,-0.013 $\pm$ 0.211,0.384 $\pm$ 0.279,0.177 $\pm$ 0.125,0.155 $\pm$ 0.088,0.137 $\pm$ 0.059
7,2500.0,Adult,\fire (LR),,0.846,0.974 $\pm$ 0.014,0.012 $\pm$ 0.212,0.5 $\pm$ 0.28,0.274 $\pm$ 0.133,0.27 $\pm$ 0.122,0.265 $\pm$ 0.114
8,5000.0,Adult,\fire (LR),2.054 $\pm$ 0.629,0.9,0.98 $\pm$ 0.01,0.003 $\pm$ 0.211,0.469 $\pm$ 0.224,0.247 $\pm$ 0.092,0.244 $\pm$ 0.081,0.24 $\pm$ 0.075
9,1000.0,Adult,LIME,,0.908,0.729 $\pm$ 0.017,0.066 $\pm$ 0.18,0.04 $\pm$ 0.017,0.04 $\pm$ 0.008,0.04 $\pm$ 0.006,0.04 $\pm$ 0.005


In [696]:
df_merged["Neighborhood Size"].unique()

array([1000., 2500., 5000.])

In [697]:
# Select the rows of df_merged where Fidelity is "-" and the method is "Lore (Genetic)"
df_merged[(df_merged["Fidelity"] == "-") & (df_merged["Method"] == "Lore (Genetic)")]

Unnamed: 0,Neighborhood Size,Dataset,Method,Total Time,Fidelity,Fid. Neigh.,Faithfulness,Stability,Robustness K=5,Robustness K=10,Robustness K=20
20,5000.0,Adult,Lore (Genetic),,-,-,-,0.546 $\pm$ 0.175,0.251 $\pm$ 0.103,0.251 $\pm$ 0.083,0.252 $\pm$ 0.071
41,5000.0,Dutch,Lore (Genetic),,-,-,-,0.808 $\pm$ 0.197,0.598 $\pm$ 0.123,0.598 $\pm$ 0.107,0.598 $\pm$ 0.1
62,5000.0,House 16,Lore (Genetic),,-,-,-,0.772 $\pm$ 0.144,0.553 $\pm$ 0.1,0.553 $\pm$ 0.085,0.553 $\pm$ 0.077
83,5000.0,Letter,Lore (Genetic),,-,-,-,0.9 $\pm$ 0.059,0.773 $\pm$ 0.047,0.773 $\pm$ 0.041,0.773 $\pm$ 0.038
104,5000.0,Shuttle,Lore (Genetic),,-,-,-,0.868 $\pm$ 0.117,0.751 $\pm$ 0.079,0.75 $\pm$ 0.069,0.75 $\pm$ 0.064


In [698]:
# Get the indexes of rows that need to be updated
idx = df_merged[(df_merged["Fidelity"] == "-") & (df_merged["Method"] == "Lore (Genetic)")].index

# Update the values using loc[] to avoid the warning
df_merged.loc[idx, "Fid. Neigh."] = "1.0 $\pm$ 0.0"
df_merged.loc[idx, "Fidelity"] = [0.64, 0.50 , 0.61 , 0.04 , 0.67]

In [699]:
df_merged.iloc[idx]

Unnamed: 0,Neighborhood Size,Dataset,Method,Total Time,Fidelity,Fid. Neigh.,Faithfulness,Stability,Robustness K=5,Robustness K=10,Robustness K=20
20,5000.0,Adult,Lore (Genetic),,0.64,1.0 $\pm$ 0.0,-,0.546 $\pm$ 0.175,0.251 $\pm$ 0.103,0.251 $\pm$ 0.083,0.252 $\pm$ 0.071
41,5000.0,Dutch,Lore (Genetic),,0.5,1.0 $\pm$ 0.0,-,0.808 $\pm$ 0.197,0.598 $\pm$ 0.123,0.598 $\pm$ 0.107,0.598 $\pm$ 0.1
62,5000.0,House 16,Lore (Genetic),,0.61,1.0 $\pm$ 0.0,-,0.772 $\pm$ 0.144,0.553 $\pm$ 0.1,0.553 $\pm$ 0.085,0.553 $\pm$ 0.077
83,5000.0,Letter,Lore (Genetic),,0.04,1.0 $\pm$ 0.0,-,0.9 $\pm$ 0.059,0.773 $\pm$ 0.047,0.773 $\pm$ 0.041,0.773 $\pm$ 0.038
104,5000.0,Shuttle,Lore (Genetic),,0.67,1.0 $\pm$ 0.0,-,0.868 $\pm$ 0.117,0.751 $\pm$ 0.079,0.75 $\pm$ 0.069,0.75 $\pm$ 0.064


In [700]:
df_merged[(df_merged["Fidelity"] == "-")]

Unnamed: 0,Neighborhood Size,Dataset,Method,Total Time,Fidelity,Fid. Neigh.,Faithfulness,Stability,Robustness K=5,Robustness K=10,Robustness K=20
4,2500.0,Adult,\fire (SVM),,-,-,0.01 $\pm$ 0.112,0.859 $\pm$ 0.258,0.296 $\pm$ 0.107,0.292 $\pm$ 0.099,0.288 $\pm$ 0.093
88,2500.0,Shuttle,\fire (SVM),,-,-,0.262 $\pm$ 0.616,0.805 $\pm$ 0.283,0.435 $\pm$ 0.156,0.423 $\pm$ 0.142,0.41 $\pm$ 0.132
89,5000.0,Shuttle,\fire (SVM),0.474 $\pm$ 0.406,-,-,0.268 $\pm$ 0.612,0.806 $\pm$ 0.282,0.433 $\pm$ 0.154,0.421 $\pm$ 0.141,0.408 $\pm$ 0.132
91,2500.0,Shuttle,\fire (LR),,-,-,0.054 $\pm$ 0.657,0.835 $\pm$ 0.262,0.51 $\pm$ 0.185,0.49 $\pm$ 0.167,0.468 $\pm$ 0.155
92,5000.0,Shuttle,\fire (LR),1.994 $\pm$ 1.839,-,-,0.05 $\pm$ 0.658,0.841 $\pm$ 0.258,0.506 $\pm$ 0.184,0.487 $\pm$ 0.167,0.466 $\pm$ 0.156


In [701]:
idx = df_merged[(df_merged["Fidelity"] == "-")].index

df_merged.loc[idx, "Fidelity"] = [0.522, 0.992 , 0.995 , 0.991 , 0.993]

In [702]:
df_merged[(df_merged["Neighborhood Size"] == 5000.0) & (df_merged["Dataset"] == "Adult")]

Unnamed: 0,Neighborhood Size,Dataset,Method,Total Time,Fidelity,Fid. Neigh.,Faithfulness,Stability,Robustness K=5,Robustness K=10,Robustness K=20
2,5000.0,Adult,\fire (DT),2.137 $\pm$ 0.177,0.897,0.94 $\pm$ 0.032,-,0.92 $\pm$ 0.148,0.609 $\pm$ 0.121,0.6 $\pm$ 0.106,0.59 $\pm$ 0.097
5,5000.0,Adult,\fire (SVM),1.112 $\pm$ 0.212,0.529,0.518 $\pm$ 0.177,0.012 $\pm$ 0.122,0.727 $\pm$ 0.34,0.242 $\pm$ 0.103,0.24 $\pm$ 0.094,0.238 $\pm$ 0.088
8,5000.0,Adult,\fire (LR),2.054 $\pm$ 0.629,0.9,0.98 $\pm$ 0.01,0.003 $\pm$ 0.211,0.469 $\pm$ 0.224,0.247 $\pm$ 0.092,0.244 $\pm$ 0.081,0.24 $\pm$ 0.075
11,5000.0,Adult,LIME,0.081 $\pm$ 0.006,0.926,0.729 $\pm$ 0.017,0.064 $\pm$ 0.177,0.055 $\pm$ 0.02,0.054 $\pm$ 0.009,0.054 $\pm$ 0.007,0.054 $\pm$ 0.005
14,5000.0,Adult,SHAP,2.445 $\pm$ 0.065,,,0.515 $\pm$ 0.16,0.406 $\pm$ 0.196,0.263 $\pm$ 0.112,0.257 $\pm$ 0.096,0.251 $\pm$ 0.087
17,5000.0,Adult,Lore (Random),13.242 $\pm$ 0.564,0.652,1.0 $\pm$ 0.0,-,0.339 $\pm$ 0.223,0.342 $\pm$ 0.137,0.341 $\pm$ 0.122,0.341 $\pm$ 0.113
20,5000.0,Adult,Lore (Genetic),,0.64,1.0 $\pm$ 0.0,-,0.546 $\pm$ 0.175,0.251 $\pm$ 0.103,0.251 $\pm$ 0.083,0.252 $\pm$ 0.071


In [703]:
df_merged[(df_merged["Neighborhood Size"] == 5000.0)]

Unnamed: 0,Neighborhood Size,Dataset,Method,Total Time,Fidelity,Fid. Neigh.,Faithfulness,Stability,Robustness K=5,Robustness K=10,Robustness K=20
2,5000.0,Adult,\fire (DT),2.137 $\pm$ 0.177,0.897,0.94 $\pm$ 0.032,-,0.92 $\pm$ 0.148,0.609 $\pm$ 0.121,0.6 $\pm$ 0.106,0.59 $\pm$ 0.097
5,5000.0,Adult,\fire (SVM),1.112 $\pm$ 0.212,0.529,0.518 $\pm$ 0.177,0.012 $\pm$ 0.122,0.727 $\pm$ 0.34,0.242 $\pm$ 0.103,0.24 $\pm$ 0.094,0.238 $\pm$ 0.088
8,5000.0,Adult,\fire (LR),2.054 $\pm$ 0.629,0.9,0.98 $\pm$ 0.01,0.003 $\pm$ 0.211,0.469 $\pm$ 0.224,0.247 $\pm$ 0.092,0.244 $\pm$ 0.081,0.24 $\pm$ 0.075
11,5000.0,Adult,LIME,0.081 $\pm$ 0.006,0.926,0.729 $\pm$ 0.017,0.064 $\pm$ 0.177,0.055 $\pm$ 0.02,0.054 $\pm$ 0.009,0.054 $\pm$ 0.007,0.054 $\pm$ 0.005
14,5000.0,Adult,SHAP,2.445 $\pm$ 0.065,,,0.515 $\pm$ 0.16,0.406 $\pm$ 0.196,0.263 $\pm$ 0.112,0.257 $\pm$ 0.096,0.251 $\pm$ 0.087
17,5000.0,Adult,Lore (Random),13.242 $\pm$ 0.564,0.652,1.0 $\pm$ 0.0,-,0.339 $\pm$ 0.223,0.342 $\pm$ 0.137,0.341 $\pm$ 0.122,0.341 $\pm$ 0.113
20,5000.0,Adult,Lore (Genetic),,0.64,1.0 $\pm$ 0.0,-,0.546 $\pm$ 0.175,0.251 $\pm$ 0.103,0.251 $\pm$ 0.083,0.252 $\pm$ 0.071
23,5000.0,Dutch,\fire (DT),0.927 $\pm$ 0.032,0.996,0.997 $\pm$ 0.003,-,0.955 $\pm$ 0.113,0.901 $\pm$ 0.141,0.874 $\pm$ 0.144,0.842 $\pm$ 0.144
26,5000.0,Dutch,\fire (SVM),0.351 $\pm$ 0.033,0.989,0.985 $\pm$ 0.02,0.056 $\pm$ 0.272,0.806 $\pm$ 0.263,0.736 $\pm$ 0.25,0.664 $\pm$ 0.235,0.587 $\pm$ 0.203
29,5000.0,Dutch,\fire (LR),6.553 $\pm$ 6.838,0.992,0.988 $\pm$ 0.012,0.061 $\pm$ 0.286,0.827 $\pm$ 0.25,0.757 $\pm$ 0.236,0.691 $\pm$ 0.223,0.619 $\pm$ 0.197


In [704]:
samples_5000 = df_merged[df_merged["Neighborhood Size"] == 5000.0]
samples_1000 = df_merged[df_merged["Neighborhood Size"] == 1000.0]
samples_2500 = df_merged[df_merged["Neighborhood Size"] == 2500.0]

In [705]:
print_fancy_table(df_merged[df_merged["Neighborhood Size"] == 5000.0].drop(columns=['Neighborhood Size']))

\begin{tabular}{llllllllll}
\toprule
Dataset & Method & Total Time & Fidelity & Fid. Neigh. & Faithfulness & Stability & Robustness K=5 & Robustness K=10 & Robustness K=20 \\
\midrule
Adult & \fire (DT) & 2.137 $\pm$ 0.177 & 0.897000 & 0.94 $\pm$ 0.032 & - & 0.92 $\pm$ 0.148 & 0.609 $\pm$ 0.121 & 0.6 $\pm$ 0.106 & 0.59 $\pm$ 0.097 \\
Adult & \fire (SVM) & 1.112 $\pm$ 0.212 & 0.529000 & 0.518 $\pm$ 0.177 & 0.012 $\pm$ 0.122 & 0.727 $\pm$ 0.34 & 0.242 $\pm$ 0.103 & 0.24 $\pm$ 0.094 & 0.238 $\pm$ 0.088 \\
Adult & \fire (LR) & 2.054 $\pm$ 0.629 & 0.900000 & 0.98 $\pm$ 0.01 & 0.003 $\pm$ 0.211 & 0.469 $\pm$ 0.224 & 0.247 $\pm$ 0.092 & 0.244 $\pm$ 0.081 & 0.24 $\pm$ 0.075 \\
Adult & LIME & 0.081 $\pm$ 0.006 & 0.926 & 0.729 $\pm$ 0.017 & 0.064 $\pm$ 0.177 & 0.055 $\pm$ 0.02 & 0.054 $\pm$ 0.009 & 0.054 $\pm$ 0.007 & 0.054 $\pm$ 0.005 \\
Adult & SHAP & 2.445 $\pm$ 0.065 & NaN & NaN & 0.515 $\pm$ 0.16 & 0.406 $\pm$ 0.196 & 0.263 $\pm$ 0.112 & 0.257 $\pm$ 0.096 & 0.251 $\pm$ 0.087 \\
Adult & Lore

In [706]:
print_fancy_table(df_merged[df_merged["Neighborhood Size"] == 2500].drop(columns=['Neighborhood Size', "Total Time"]))

\begin{tabular}{lllllllll}
\toprule
Dataset & Method & Fidelity & Fid. Neigh. & Faithfulness & Stability & Robustness K=5 & Robustness K=10 & Robustness K=20 \\
\midrule
Adult & \fire (DT) & 0.897000 & 0.931 $\pm$ 0.037 & - & 0.898 $\pm$ 0.167 & 0.586 $\pm$ 0.126 & 0.575 $\pm$ 0.111 & 0.564 $\pm$ 0.102 \\
Adult & \fire (SVM) & 0.522000 & - & 0.01 $\pm$ 0.112 & 0.859 $\pm$ 0.258 & 0.296 $\pm$ 0.107 & 0.292 $\pm$ 0.099 & 0.288 $\pm$ 0.093 \\
Adult & \fire (LR) & 0.846000 & 0.974 $\pm$ 0.014 & 0.012 $\pm$ 0.212 & 0.5 $\pm$ 0.28 & 0.274 $\pm$ 0.133 & 0.27 $\pm$ 0.122 & 0.265 $\pm$ 0.114 \\
Adult & LIME & 0.903 & 0.729 $\pm$ 0.017 & 0.064 $\pm$ 0.18 & 0.046 $\pm$ 0.018 & 0.046 $\pm$ 0.009 & 0.046 $\pm$ 0.006 & 0.046 $\pm$ 0.005 \\
Adult & SHAP & NaN & NaN & 0.515 $\pm$ 0.16 & 0.406 $\pm$ 0.196 & 0.263 $\pm$ 0.112 & 0.257 $\pm$ 0.096 & 0.251 $\pm$ 0.087 \\
Adult & Lore (Random) & 0.636 & 1.0 $\pm$ 0.0 & - & 0.469 $\pm$ 0.154 & 0.235 $\pm$ 0.104 & 0.235 $\pm$ 0.088 & 0.235 $\pm$ 0.077 \\
Adul

In [707]:
print_fancy_table(df_merged[df_merged["Neighborhood Size"] == 1000].drop(columns=['Neighborhood Size', "Total Time"]))

\begin{tabular}{lllllllll}
\toprule
Dataset & Method & Fidelity & Fid. Neigh. & Faithfulness & Stability & Robustness K=5 & Robustness K=10 & Robustness K=20 \\
\midrule
Adult & \fire (DT) & 0.907000 & 0.918 $\pm$ 0.04 & - & 0.845 $\pm$ 0.23 & 0.397 $\pm$ 0.122 & 0.375 $\pm$ 0.104 & 0.358 $\pm$ 0.098 \\
Adult & \fire (SVM) & 0.522000 & 0.514 $\pm$ 0.175 & 0.019 $\pm$ 0.117 & 0.861 $\pm$ 0.267 & 0.261 $\pm$ 0.097 & 0.259 $\pm$ 0.084 & 0.257 $\pm$ 0.081 \\
Adult & \fire (LR) & 0.895000 & 0.969 $\pm$ 0.019 & -0.013 $\pm$ 0.211 & 0.384 $\pm$ 0.279 & 0.177 $\pm$ 0.125 & 0.155 $\pm$ 0.088 & 0.137 $\pm$ 0.059 \\
Adult & LIME & 0.908 & 0.729 $\pm$ 0.017 & 0.066 $\pm$ 0.18 & 0.04 $\pm$ 0.017 & 0.04 $\pm$ 0.008 & 0.04 $\pm$ 0.006 & 0.04 $\pm$ 0.005 \\
Adult & SHAP & NaN & NaN & 0.515 $\pm$ 0.16 & 0.406 $\pm$ 0.196 & 0.263 $\pm$ 0.112 & 0.257 $\pm$ 0.096 & 0.251 $\pm$ 0.087 \\
Adult & Lore (Random) & 0.636 & 1.0 $\pm$ 0.0 & - & 0.377 $\pm$ 0.153 & 0.225 $\pm$ 0.112 & 0.224 $\pm$ 0.098 & 0.224 $\p

In [708]:
# # Prepare dataframe for custom LaTeX output
# df_grouped = df_merged.groupby('Dataset')

# # Create a mapping for the method renaming
# method_rename = {
#     'Decision Tree': '\\fire (DT)',
#     'SVM': '\\fire (SVM)',
#     'Logistic Regr.': '\\fire (LR)',
#     'LIME': 'LIME',
#     'SHAP': 'SHAP',
#     'Lore (Random)': 'Lore (Random)',
#     'Lore (Genetic)': 'Lore (Genetic)'
# }

# # Start building the LaTeX table
# latex_output = "\\begin{tabular}{" + "l" * len(df_merged.columns) + "}\n"
# latex_output += "\\toprule\n"

# # Add headers
# latex_output += " & ".join(df_merged.columns) + " \\\\\n"
# latex_output += "\\midrule\n"

# # Add rows with midrules between datasets
# datasets = df_merged['Dataset'].unique()
# for i, dataset in enumerate(datasets):
#     group = df_grouped.get_group(dataset)
    
#     # Convert group dataframe to LaTeX rows but don't add to output yet
#     rows_latex = group.to_latex(index=False, header=False)
    
#     # Extract just the rows part (not headers or table structure)
#     rows_only = "\n".join(rows_latex.split("\n")[3:-3])
    
#     # Apply the method renaming
#     for old_method, new_method in method_rename.items():
#         rows_only = rows_only.replace(old_method, new_method)
    
#     latex_output += rows_only
    
#     # Add midrule if not the last dataset
#     if i < len(datasets) - 1:
#         latex_output += "\\midrule\n"

# latex_output += "\\bottomrule\n\\end{tabular}"

# print(latex_output)

# Plots

In [716]:
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

map_method = {
    r"\fire (DT)": "FIRE360 (DT)",
    r"\fire (SVM)": "FIRE360 (SVM)",
    r"\fire (LR)": "FIRE360 (LR)",
    "LIME": "LIME",
    "SHAP": "SHAP",
    "Lore (Random)": "LORE (Random)",
    "Lore (Genetic)": "LORE (Genetic)"
}
def plot_metrics_per_dataset(df, metrics):
    """
    Generate a plot for each dataset comparing different methods based on the selected metrics
    against the Neighborhood Size.
    Parameters:
    - df: DataFrame containing the data.
    - metrics: List of metrics to plot (e.g., ["Fidelity", "Stability"]).
    """
    # Ensure proper numeric conversion for Neighborhood Size
    df["Neighborhood Size"] = pd.to_numeric(df["Neighborhood Size"], errors='coerce')
    
    # Extract unique datasets
    datasets = df["Dataset"].unique()
    
    # Define colors, markers, and styles
    colors = ["#FF774E", "#7c7787", "#53C4FE", "#70DDA8", "#dc68e4", "#755c51", "gray"]
    edge_colors = ["#E45D22", "#5a5255", "#009EFF", "#00B977", "fuchsia", "#ae5a41", "black"]
    markers = ['o', 'd', 'v', 'h', 's', 'P', 'p']
    
    # Define the specific x values we want to display
    x_ticks = [1000, 2500, 5000]
    
    for dataset in datasets:
        fig, axes = plt.subplots(1, len(metrics), figsize=(6 * len(metrics), 5))
        subset = df[df["Dataset"] == dataset]
        
        if len(metrics) == 1:
            axes = [axes]
            
        for ax_idx, (ax, metric) in enumerate(zip(axes, metrics)):
            for i, method in enumerate(subset["Method"].unique()):
                method_subset = subset[subset["Method"] == method]
                
                # Extract metric values and convert them to numerical format
                y_values = method_subset[metric].astype(str).str.split(" ").str[0]
                y_values = pd.to_numeric(y_values, errors='coerce')
                
                # Sort values for smooth plotting
                sorted_indices = np.argsort(method_subset["Neighborhood Size"].values)
                x_values = method_subset["Neighborhood Size"].values[sorted_indices]
                y_values = y_values.values[sorted_indices]
                
                ax.plot(x_values, y_values,
                       marker=markers[i % len(markers)],
                       markersize=10,
                       linewidth=3,
                       linestyle="--",
                       color=colors[i % len(colors)],
                       markerfacecolor=colors[i % len(colors)],
                       markeredgecolor=edge_colors[i % len(colors)],
                       markeredgewidth=2,
                       alpha=0.8,
                       label=map_method[method])
                
                ax.set_title(f'{metric} vs Neighborhood Size', fontsize=20)
                # Set x-axis label on all plots
                ax.set_xlabel("Neighborhood Size", fontsize=22)
                # Set y-axis label on all plots (metric-dependent)
                ax.set_ylabel(metric, fontsize=22)
                
                # Set specific x-ticks
                ax.set_xticks(x_ticks)
                ax.set_xticklabels([str(x) for x in x_ticks])
                ax.set_xlim(min(x_ticks) - 200, max(x_ticks) + 200)
                
                ax.grid(True, linestyle="--", alpha=0.7)
                
            # Let each plot have its own y-axis scale (plot-dependent)
            ax.autoscale(axis='y')
            # x ticks fontsize
            ax.tick_params(axis='both', which='major', labelsize=16)
            
        # Place legend outside the plot
        handles, labels = axes[0].get_legend_handles_labels()
        fig.legend(handles, labels, loc='lower center', bbox_to_anchor=(0.5, -0.01), 
                   ncol=min(4, len(labels)), frameon=True, fancybox=True, 
                   shadow=True, fontsize=12)
        
        # Adjust layout to make room for the legend
        plt.tight_layout(rect=[0, 0.1, 1, 0.95])
        
        # Save the plot
        os.makedirs('plots', exist_ok=True)
        plot_filename = f'plots/{dataset}_metrics.png'
        plt.savefig(plot_filename, bbox_inches='tight', dpi=300)
        plt.close()

plot_metrics_per_dataset(df_merged, ["Fidelity", "Stability"])

In [710]:
df_merged.head(3)

Unnamed: 0,Neighborhood Size,Dataset,Method,Total Time,Fidelity,Fid. Neigh.,Faithfulness,Stability,Robustness K=5,Robustness K=10,Robustness K=20
0,1000.0,Adult,\fire (DT),,0.907,0.918 $\pm$ 0.04,-,0.845 $\pm$ 0.23,0.397 $\pm$ 0.122,0.375 $\pm$ 0.104,0.358 $\pm$ 0.098
1,2500.0,Adult,\fire (DT),,0.897,0.931 $\pm$ 0.037,-,0.898 $\pm$ 0.167,0.586 $\pm$ 0.126,0.575 $\pm$ 0.111,0.564 $\pm$ 0.102
2,5000.0,Adult,\fire (DT),2.137 $\pm$ 0.177,0.897,0.94 $\pm$ 0.032,-,0.92 $\pm$ 0.148,0.609 $\pm$ 0.121,0.6 $\pm$ 0.106,0.59 $\pm$ 0.097


# Plots robustness

In [711]:
df_metrics_complete

Unnamed: 0,Dataset,Neighborhood Size,Method,Stability,Faithfulness,Robustness K=5,Robustness K=10,Robustness K=20,id
63,Adult,1000,\fire (DT),0.845 $\pm$ 0.23,-,0.397 $\pm$ 0.122,0.375 $\pm$ 0.104,0.358 $\pm$ 0.098,Adult_\fire (DT)1000
64,Adult,2500,\fire (DT),0.898 $\pm$ 0.167,-,0.586 $\pm$ 0.126,0.575 $\pm$ 0.111,0.564 $\pm$ 0.102,Adult_\fire (DT)2500
65,Adult,5000,\fire (DT),0.92 $\pm$ 0.148,-,0.609 $\pm$ 0.121,0.6 $\pm$ 0.106,0.59 $\pm$ 0.097,Adult_\fire (DT)5000
66,Adult,1000,\fire (SVM),0.861 $\pm$ 0.267,0.019 $\pm$ 0.117,0.261 $\pm$ 0.097,0.259 $\pm$ 0.084,0.257 $\pm$ 0.081,Adult_\fire (SVM)1000
67,Adult,2500,\fire (SVM),0.859 $\pm$ 0.258,0.01 $\pm$ 0.112,0.296 $\pm$ 0.107,0.292 $\pm$ 0.099,0.288 $\pm$ 0.093,Adult_\fire (SVM)2500
...,...,...,...,...,...,...,...,...,...
58,Shuttle,2500,Lore (Random),0.834 $\pm$ 0.141,-,0.708 $\pm$ 0.084,0.707 $\pm$ 0.072,0.707 $\pm$ 0.066,Shuttle_Lore (Random)2500
59,Shuttle,5000,Lore (Random),0.713 $\pm$ 0.115,-,0.7 $\pm$ 0.073,0.7 $\pm$ 0.065,0.7 $\pm$ 0.06,Shuttle_Lore (Random)5000
60,Shuttle,1000,Lore (Genetic),0.626 $\pm$ 0.135,-,0.621 $\pm$ 0.072,0.621 $\pm$ 0.06,0.621 $\pm$ 0.052,Shuttle_Lore (Genetic)1000
61,Shuttle,2500,Lore (Genetic),0.674 $\pm$ 0.125,-,0.663 $\pm$ 0.074,0.663 $\pm$ 0.064,0.663 $\pm$ 0.058,Shuttle_Lore (Genetic)2500


In [712]:
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

def plot_robustness_per_dataset(df_metrics):
    # Create the plots directory if it doesn't exist
    os.makedirs('plots', exist_ok=True)
    
    # Get the unique datasets and define top_k values
    datasets = df_metrics['Dataset'].unique()
    top_k = [3, 5, 8, 10, 20]
    
    # Define a color-blind-friendly palette and markers
    colors = ["#FF774E", "#7c7787", "#53C4FE", "#70DDA8", "#dc68e4", "#755c51", "gray"]
    edge_colors = ["#E45D22", "#5a5255", "#009EFF", "#00B977", "fuchsia", "#ae5a41", "black"]
    markers = ['o', 'd', 'v', 'h', 's', 'P', 'p']
#     'D', '^', 'v', 'P', '*', 'X', 'p', 'h']  # Different markers
    
    # Create a single figure with subplots
    fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(18, 15), sharex=True, sharey=True)
    axes = axes.flatten()
    
    handles, labels = [], []
    
    for idx, dataset in enumerate(datasets):
        ax = axes[idx]
        subset = df_metrics[df_metrics['Dataset'] == dataset]
        
        for i, method in enumerate(subset['Method'].unique()):
            method_subset = subset[subset['Method'] == method]
            robustness_values = []
            
            for k in top_k:
                value = method_subset[f'Robustness K={k}'].values[0]
                robustness_value = float(str(value).split(' ')[0])  # Ensure extraction is robust
                robustness_values.append(robustness_value)
            
            line, = ax.plot(top_k, 
                            robustness_values,
                            marker=markers[i % len(markers)],
                            markersize=20, 
                            linewidth=3, 
                            linestyle="--",
                            color=colors[i % len(colors)],
                            markerfacecolor=colors[i % len(colors)], 
                            markeredgecolor=edge_colors[i % len(colors)], 
                            markeredgewidth=2,
                            alpha=0.8, zorder=3)
            if idx == 0:  # Collect legend elements only once
                handles.append(line)
                labels.append(method)
        
        ax.set_title(f'{dataset}', fontsize=25, fontweight='bold')
        ax.set_xlabel('K', fontsize=25)
        ax.set_ylabel('Robustness', fontsize=25)
        ax.tick_params(axis='both', which='major', labelsize=22)
        ax.grid(True, linestyle='--', alpha=0.7)
    
    # Adjust layout
    plt.tight_layout(rect=[0, 0.1, 1, 1])
    
    # Add external legend below plots
    fig.legend(handles, labels, loc='lower center', bbox_to_anchor=(0.5, -0.02), ncol=3, fontsize=20, frameon=True, fancybox=True, shadow=True)
    
    # Save the plot
    plt.savefig('plots/robustness_all_datasets.png', bbox_inches='tight', dpi=300)
    plt.close()

# Call the function with df_metrics_complete
plot_robustness_per_dataset(df_metrics_all_the_robustness)


# Dataset Generation

In [None]:
project_data = download_runs(project_name="tango_generation")
project_name = "tango_generation"

In [None]:
len(project_data["generated_dataset_size_10000_epochs_5000"])

In [None]:
project_data["generated_dataset_size_10000_epochs_5000"][0]

In [None]:
def merge_rows_remove_nan(rows_list):
    """
    Merge multiple rows from a dataset, keeping only non-NaN values for each column.
    
    Args:
        rows_list: List of dataframes or series to merge
        
    Returns:
        A single pandas Series with merged non-NaN values
    """
    # Create an empty dictionary to store the merged values
    merged_dict = {}
    
    # Process each row
    for index, row in rows_list.iterrows():
        # For each column in the row
        for col in row.index:
            # If the value is not NaN and the column is not already in the merged_dict
            # or the column is in merged_dict but the current value is not NaN
            if not pd.isna(row[col]) and (col not in merged_dict or pd.isna(merged_dict.get(col))):
                merged_dict[col] = row[col]
    
    # Convert the dictionary to a pandas Series
    return pd.Series(merged_dict)

# # Apply the function to merge rows
# merged_row = merge_rows_remove_nan(project_data["generated_dataset_size_10000_epochs_5000"][0])

# # Display the merged row
# print(merged_row)

In [None]:
# for pr in project_data["generated_dataset_size_10000_epochs_5000"]:
#     print(pr["synthesizer"])

In [None]:
# project_data[project_data["Dataset"] == "dutch"].head(50)

In [None]:
metrics = {}
datasets = ["adult", "house16", "letter", "dutch", "covertype", "shuttle"]
dataset_sizes = [10000, 25000, 50000, 75000, 100000, 150000, 200000]
epochs = [1000, 2500, 5000]
for dataset in datasets:
    metrics[dataset] = {}   
    for dataset_size in dataset_sizes:
        for epoch in epochs:
            experiment_name = f"generated_dataset_size_{dataset_size}_epochs_{epoch}"
            if experiment_name in project_data:
                results = project_data[f"generated_dataset_size_{dataset_size}_epochs_{epoch}"]
                for result in results:
                    if len(result["dataset"]) > 0 and result["dataset"][0] == dataset:
                        if "synthesizer" in result:
                            result = merge_rows_remove_nan(result)
                            
                            synthesizer = result.get("synthesizer", None)
                            # if dataset == "adult":
                                # print(synthesizer)
                            common_rows_proportion = result.get("common_rows_proportion", 0)
                            close_values_probability = result.get("close_values_probability", 0)

                            xgb_gt = result.get("xgb_gt", 0)

                            xgb = result.get("xgb", 0)
                            linear = result.get("linear", 0)
                            mlp = result.get("mlp", 0)

                            chi_squared_test = result.get("chi_squared_test", 0)

                            k_anonymization_syn = result.get("k_anonymization_syn", 0)
                            identifiability_score = result.get("identifiability_score", 0)

                            metrics[dataset][experiment_name] = {} if experiment_name not in metrics[dataset] else metrics[dataset][experiment_name]

                            metrics[dataset][experiment_name][synthesizer] = {
                                "Common Rows Proportion": round(common_rows_proportion, 4),
                                "Close Values Probability": round(close_values_probability, 3),
                                "xgb_gt": round(xgb_gt, 3),
                                "Linear": round(linear, 3),
                                "XGB": round(xgb, 3),
                                "MLP": round(xgb, 3),
                                "Chi Squared Test": round(chi_squared_test, 3),

                                "K-Anonymization": round(k_anonymization_syn, 3),
                                "Identifiability Score": round(identifiability_score, 3),
                                
                                "Synthesizer": synthesizer,
                            }

In [None]:
# Convert the metrics to a DataFrame
rows = []
for dataset in datasets:
    for dataset_size in dataset_sizes:
        for epoch in epochs:
            for synthesizer in ["ctgan", "tvae"]:
                experiment_name = f"generated_dataset_size_{dataset_size}_epochs_{epoch}"
                if experiment_name in metrics[dataset] and synthesizer in metrics[dataset][experiment_name]:
                    row = {
                        'Dataset': dataset,
                        'Generated Samples': dataset_size,
                        'Epochs': epoch,
                        "Synthesizer": synthesizer,
                        # Sanity
                        'Com. Rows Prop.': metrics[dataset][experiment_name][synthesizer]['Common Rows Proportion'],
                        'Close Val. Prob.': metrics[dataset][experiment_name][synthesizer]['Close Values Probability'],

                        # Statistical
                        "Chi Squar. Test.": metrics[dataset][experiment_name][synthesizer]['Chi Squared Test'],
                        # Performance
                        "XGB Perf.": metrics[dataset][experiment_name][synthesizer]['xgb_gt'],
                        # Detection
                        "MLP Det.": metrics[dataset][experiment_name][synthesizer]['MLP'],
                        "Lin. Det.": metrics[dataset][experiment_name][synthesizer]['Linear'],
                        "XGB Det.": metrics[dataset][experiment_name][synthesizer]['XGB'],
                        # Privacy
                        "K Anon.": metrics[dataset][experiment_name][synthesizer]['K-Anonymization'],
                        'Id. Score': metrics[dataset][experiment_name][synthesizer]['Identifiability Score'],
                        
                    }
                    rows.append(row)

In [None]:
# Create a dataframe from the rows
df_metrics = pd.DataFrame(rows)

In [None]:
dataset_name_mapping = {
    'adult': 'Adult',
    'house16': 'House16',
    'letter': 'Letter',
    'dutch': 'Dutch',
    'covertype': 'Covertype',
    'shuttle': 'Shuttle'
}

# Apply the mapping to the Method column
df_metrics["Dataset"] = df_metrics["Dataset"].map(dataset_name_mapping)

In [None]:
df_metrics = df_metrics.round(3).astype(str)

In [None]:
adult = df_metrics[df_metrics["Dataset"] == "Adult"]
print(adult.to_latex(index=False))

In [None]:
dutch = df_metrics[df_metrics["Dataset"] == "Dutch"]
print(dutch.to_latex(index=False))

In [None]:
shuttle = df_metrics[df_metrics["Dataset"] == "Shuttle"]
print(shuttle.to_latex(index=False))

In [None]:
covertype = df_metrics[df_metrics["Dataset"] == "Covertype"]
print(covertype.to_latex(index=False))

In [None]:
house16 = df_metrics[df_metrics["Dataset"] == "House16"]
print(house16.to_latex(index=False))

In [None]:
letter = df_metrics[df_metrics["Dataset"] == "Letter"]
print(letter.to_latex(index=False))

# Generation Loss

In [None]:
metrics = {}
datasets = ["adult", "house16", "letter", "dutch", "covertype", "shuttle"]
epochs = [1000, 2500, 5000]
synthesizers = ["ctgan", "tvae"]
for dataset in datasets:
    metrics[dataset] = {}
    for synth in synthesizers:
        for epoch in epochs:
            experiment_name = f"{synth}_epochs_{epoch}"
            if experiment_name in project_data:
                results = project_data[experiment_name]
                for result in results:
                    if len(result["dataset"]) > 0 and result["dataset"][0] == dataset:
                        df_result = pd.DataFrame(result)

                        if "loss_generator" in df_result.columns and "loss_discriminator" in df_result.columns:
                            loss_discriminator = df_result["loss_discriminator"]
                            loss_generator = df_result["loss_generator"]
                            # remove nan from the series
                            loss_discriminator = list(loss_discriminator[~loss_discriminator.isna()])
                            loss_generator = list(loss_generator[~loss_generator.isna()])
                            metrics[dataset][experiment_name] = {
                                "Loss Discriminator": loss_discriminator,
                                "Loss Generator": loss_generator,
                            }
                        
                        elif "loss" in df_result.columns:
                            loss = df_result["loss"]
                            loss = list(loss[~loss.isna()])
                            metrics[dataset][experiment_name] = {
                                "Loss": loss,
                            }

In [None]:
metrics["adult"].keys()

In [None]:
import os
import matplotlib.pyplot as plt
import seaborn as sns

# Define a color-blind-friendly palette and markers
colors = ["#FF774E", "#70DDA8", "#7c7787"]
edge_colors = ["#E45D22", "#00B977", "#5a5255"]

# colors = ["#FF774E", "#7c7787", "#53C4FE", "#70DDA8", "#dc68e4", "#755c51", "gray"]
# edge_colors = ["#E45D22", "#5a5255", "#009EFF", "#00B977", "fuchsia", "#ae5a41", "black"]
markers = ['o', 'd', 'v']
line_styles = ['-', '--', '-.']
alpha_values = [1.0, 0.7, 0.5]
marker_sizes = [12, 11, 10]

def plot_gan_losses(metrics, dataset_name, synthesizer_name):
    """
    Plot the generator and discriminator losses for a specific dataset and synthesizer
    across different epochs.
    """
    os.makedirs('plots', exist_ok=True)
    epochs = [1000, 2500, 5000]
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    handles, labels = [], []
    
    for idx, epoch in enumerate(epochs):
        experiment_name = f"{synthesizer_name}_epochs_{epoch}"
        if experiment_name in metrics[dataset_name]:
            
            # Plot discriminator loss
            loss_data = metrics[dataset_name][experiment_name]["Loss Discriminator"]
            x_values = list(range(0, len(loss_data), 100))
            y_values = [loss_data[j] for j in x_values]
            
            line, = axes[0].plot(x_values, y_values, marker=markers[idx], markersize=marker_sizes[idx], linewidth=3, linestyle=line_styles[idx],
                         color=colors[idx], markerfacecolor=colors[idx], markeredgecolor=edge_colors[idx], 
                         markeredgewidth=2, alpha=alpha_values[idx], label=f"Epochs: {epoch}")
            
            handles.append(line)
            labels.append(f"Epochs: {epoch}")
            
            # Plot generator loss
            loss_data = metrics[dataset_name][experiment_name]["Loss Generator"]
            y_values = [loss_data[j] for j in x_values]
            
            axes[1].plot(x_values, y_values, marker=markers[idx], markersize=marker_sizes[idx], linewidth=3, linestyle=line_styles[idx],
                         color=colors[idx], markerfacecolor=colors[idx], markeredgecolor=edge_colors[idx], 
                         markeredgewidth=2, alpha=alpha_values[idx])
    
    # Customize subplots
    for i, ax in enumerate(axes):
        ax.set_xlabel('Training Steps', fontsize=25)
        ax.set_ylabel('Loss', fontsize=25)
        ax.grid(True, linestyle='--', alpha=0.7)
        ax.tick_params(axis='both', which='major', labelsize=22)
        ax.set_title(f"{'Discriminator' if i == 0 else 'Generator'} Loss - {dataset_name.capitalize()} - {synthesizer_name.upper()}", fontsize=25)
    
    # Add external legend below plots
    fig.legend(handles, labels, loc='lower center', bbox_to_anchor=(0.5, -0.02), ncol=3, fontsize=22, frameon=True, fancybox=True, shadow=True)
    
    plt.tight_layout(rect=[0, 0.1, 1, 1])
    plt.savefig(f'plots/{dataset_name}_{synthesizer_name}_losses.png', dpi=300, bbox_inches='tight')
    plt.close()

def create_all_loss_plots(metrics):
    datasets = ["adult", "house16", "letter", "dutch", "covertype", "shuttle"]
    synthesizers = ["ctgan"]
    
    for dataset in datasets:
        for synthesizer in synthesizers:
            has_data = any(f"{synthesizer}_epochs_{e}" in metrics.get(dataset, {}) for e in [1000, 2500, 5000])
            if has_data:
                plot_gan_losses(metrics, dataset, synthesizer)
                print(f"Created plot for {dataset} with {synthesizer}")

# Execute the function to create all plots
create_all_loss_plots(metrics)


In [None]:
import os
import matplotlib.pyplot as plt
import seaborn as sns

# Define a color-blind-friendly palette and markers
colors = ["#FF774E", "#70DDA8", "#7c7787"]
edge_colors = ["#E45D22", "#00B977", "#5a5255"]

markers = ['o', 'd', 'v']
line_styles = ['-', '--', '-.']
alpha_values = [1.0, 0.7, 0.5]
marker_sizes = [12, 11, 10]

def plot_gan_loss(metrics, dataset_name, synthesizer_name):
    """
    Plot the loss for a specific dataset and synthesizer across different epochs.
    """
    os.makedirs('plots', exist_ok=True)
    epochs = [1000, 2500, 5000]
    
    fig, ax = plt.subplots(figsize=(10, 6))
    handles, labels = [], []
    
    for idx, epoch in enumerate(epochs):
        experiment_name = f"{synthesizer_name}_epochs_{epoch}"
        if experiment_name in metrics[dataset_name]:
            
            # Plot loss
            loss_data = metrics[dataset_name][experiment_name]["Loss"]
            x_values = list(range(0, len(loss_data), 100))
            y_values = [loss_data[j] for j in x_values]
            
            line, = ax.plot(x_values, y_values, marker=markers[idx], markersize=marker_sizes[idx], linewidth=3, linestyle=line_styles[idx],
                            color=colors[idx], markerfacecolor=colors[idx], markeredgecolor=edge_colors[idx], 
                            markeredgewidth=2, alpha=alpha_values[idx], label=f"Epochs: {epoch}")
            
            handles.append(line)
            labels.append(f"Epochs: {epoch}")
    
    ax.set_xlabel('Training Steps', fontsize=25)
    ax.set_ylabel('Loss', fontsize=25)
    ax.grid(True, linestyle='--', alpha=0.7)
    ax.tick_params(axis='both', which='major', labelsize=22)
    ax.set_title(f"Loss - {dataset_name.capitalize()} - {synthesizer_name.upper()}", fontsize=25)
    
    # Add external legend below plot
    ax.legend(handles, labels, loc='lower center', bbox_to_anchor=(0.5, -0.2), ncol=3, fontsize=22, frameon=True, fancybox=True, shadow=True)
    
    plt.tight_layout(rect=[0, 0.1, 1, 1])
    plt.savefig(f'plots/{dataset_name}_{synthesizer_name}_loss.png', dpi=300, bbox_inches='tight')
    plt.close()

def create_all_loss_plots(metrics):
    datasets = ["adult", "house16", "letter", "dutch", "covertype", "shuttle"]
    synthesizers = ["tvae"]
    
    for dataset in datasets:
        for synthesizer in synthesizers:
            has_data = any(f"{synthesizer}_epochs_{e}" in metrics.get(dataset, {}) for e in [1000, 2500, 5000])
            if has_data:
                plot_gan_loss(metrics, dataset, synthesizer)
                print(f"Created plot for {dataset} with {synthesizer}")

# Execute the function to create all plots
create_all_loss_plots(metrics)