# Visualize Experiment Results

In [1]:
import os
import re

import pandas as pd
from pathlib import Path

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

from data_imputation_paper.experiment import read_experiment, read_csv_files

# plotting settings
sns.set(style="whitegrid")
sns.set_context('paper', font_scale=1.5)
mpl.rcParams['lines.linewidth'] = '2'

%matplotlib inline

%load_ext autoreload
%autoreload 2

## Import the data

In [2]:
EXPERIMENT_PATH = Path("../data/experiments/cluster/fully_observed/")
EXPERIMENTAL_CONDITIONS = ["imputer", "task", "missing_type", "missing_fraction", "strategy", "column"]
METRIC = "F1_macro"

In [3]:
read_experiment(EXPERIMENT_PATH).head()

Unnamed: 0,experiment,imputer,task,missing_type,missing_fraction,strategy,file_or_dir,detail_file,path
0,fully_observed,AutoKerasImputer,32,MCAR,0.01,single_all,impute_performance_std_input4.csv,,../data/experiments/cluster/fully_observed/Aut...
1,fully_observed,AutoKerasImputer,32,MCAR,0.01,single_all,downstream_performance_std_input4.csv,,../data/experiments/cluster/fully_observed/Aut...
2,fully_observed,AutoKerasImputer,32,MCAR,0.01,single_all,impute_performance_mean_input4.csv,,../data/experiments/cluster/fully_observed/Aut...
3,fully_observed,AutoKerasImputer,32,MCAR,0.01,single_all,downstream_performance_mean_input4.csv,,../data/experiments/cluster/fully_observed/Aut...
4,fully_observed,AutoKerasImputer,32,MCAR,0.01,single_all,input4,impute_performance_rep_2.csv,../data/experiments/cluster/fully_observed/Aut...


In [4]:
results = read_csv_files(read_experiment(EXPERIMENT_PATH))
results.head()

Unnamed: 0,experiment,imputer,task,missing_type,missing_fraction,strategy,column,result_type,metric,train,test,baseline,corrupted,imputed
0,fully_observed,AutoKerasImputer,32,MCAR,0.01,single_all,input4,impute_performance,MAE,7.656001,0.188622,,,
1,fully_observed,AutoKerasImputer,32,MCAR,0.01,single_all,input4,impute_performance,MSE,83.729988,0.035578,,,
2,fully_observed,AutoKerasImputer,32,MCAR,0.01,single_all,input4,impute_performance,RMSE,9.150409,0.188622,,,
3,fully_observed,AutoKerasImputer,32,MCAR,0.01,single_all,input4,impute_performance,MAE,4.332372,1.492393,,,
4,fully_observed,AutoKerasImputer,32,MCAR,0.01,single_all,input4,impute_performance,MSE,26.302351,2.227238,,,


## Data wrangling

1. gruppieren nach allen experimental conditions und dann nur ueber alle repetitions schauen, was die ranks der einzelnen imputation methoden warn
2. diese ranks aggregieren

In [5]:
data = results[results.metric == METRIC]

ranked_data = data.groupby(EXPERIMENTAL_CONDITIONS).rank(ascending=False)
ranked_data = data.join(ranked_data, rsuffix="_rank")
ranked_data

Unnamed: 0,experiment,imputer,task,missing_type,missing_fraction,strategy,column,result_type,metric,train,test,baseline,corrupted,imputed,train_rank,test_rank,baseline_rank,corrupted_rank,imputed_rank
466,fully_observed,AutoKerasImputer,42225,MCAR,0.01,single_all,cut,impute_performance,F1_macro,0.510621,0.708333,,,,5.0,3.0,,,
469,fully_observed,AutoKerasImputer,42225,MCAR,0.01,single_all,cut,impute_performance,F1_macro,0.674888,0.791667,,,,1.0,1.0,,,
472,fully_observed,AutoKerasImputer,42225,MCAR,0.01,single_all,cut,impute_performance,F1_macro,0.553131,0.752137,,,,4.0,2.0,,,
475,fully_observed,AutoKerasImputer,42225,MCAR,0.01,single_all,cut,impute_performance,F1_macro,0.626667,0.300000,,,,3.0,5.0,,,
478,fully_observed,AutoKerasImputer,42225,MCAR,0.01,single_all,cut,impute_performance,F1_macro,0.634929,0.434524,,,,2.0,4.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101596,fully_observed,KNNImputer,1220,MNAR,0.10,single_all,advertiser_id,downstream_performance,F1_macro,,,0.476541,0.475080,0.475080,,,3.0,5.0,5.0
101599,fully_observed,KNNImputer,1220,MNAR,0.10,single_all,advertiser_id,downstream_performance,F1_macro,,,0.476541,0.476596,0.476596,,,3.0,1.0,1.0
101602,fully_observed,KNNImputer,1220,MNAR,0.10,single_all,advertiser_id,downstream_performance,F1_macro,,,0.476541,0.476541,0.476487,,,3.0,2.5,3.0
101605,fully_observed,KNNImputer,1220,MNAR,0.10,single_all,advertiser_id,downstream_performance,F1_macro,,,0.476541,0.476053,0.476053,,,3.0,4.0,4.0


## Data visualization