In [None]:
%load_ext autoreload
%autoreload 2
from data import data_path
from images import images_path
from skfda.representation.grid import FDataGrid
from utils.utils_workflow import compute_mean_value, plot_step_fn
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle

In [None]:
last_simulation_finished_list = [99, 63]
domain_range = (0, 1)
output_data_path = os.path.join(data_path, "output")
scenarios = os.listdir(output_data_path)
shapley_str = "shapley_{}_{}.pkl"
r2_str = "r2_test_{}_{}.pkl"
models_considered = ["fnn", "knn", "lm"]

# Shapley value

In [None]:
all_df = []
all_r2 = []
need_to_load_header = True
for scenario, last_simulation_finished  in zip(scenarios, last_simulation_finished_list):
    for i_sim in range(last_simulation_finished + 1): 
        for current_model in models_considered:
            shapley_file_name = shapley_str.format(current_model, i_sim)
            shapley_file = os.path.join(output_data_path, scenario, shapley_file_name)
            with open(shapley_file, "rb") as file:
                data = pickle.load(file)
            if need_to_load_header:
                intervals = [x[0] for x in data]
                header = compute_mean_value(intervals)
                need_to_load_header = False
            values = [[x[1] for x in data]]
            df = pd.DataFrame(
                data=values,
                columns=header
            )
            df["scenario"] = scenario
            df["model"] = current_model
            df["simulation"] = i_sim
            all_df.append(df)
            r2_file_name = r2_str.format(current_model, i_sim)
            r2_file = os.path.join(output_data_path, scenario, r2_file_name)
            with open(r2_file, "rb") as file:
                data_r2 = pickle.load(file)
            df_r2 = pd.DataFrame(
                data=[[scenario, current_model, data_r2]],
                columns=["scenario", "model", "r2"]
            )
            all_r2.append(df_r2)

In [None]:
df_all = pd.concat(all_df)
df_all.head(10)

In [None]:
df_all.isnull().values.any()

In [None]:
df_all.shape[0]/3

In [None]:
df_aggregated = df_all.groupby(["scenario", "model"]).mean().drop("simulation", axis="columns")

In [None]:
df_scenario_1 = df_aggregated[df_aggregated.index.get_level_values("scenario").isin(["scenario_1"])]
df_scenario_2 = df_aggregated[df_aggregated.index.get_level_values("scenario").isin(["scenario_2"])]
scenario_1_np = df_scenario_1.to_numpy()
scenario_2_np = df_scenario_2.to_numpy()

In [None]:
df_scenario_1.head()

In [None]:
scenario_1_grid = FDataGrid(
    data_matrix=scenario_1_np,
    grid_points=header,
    domain_range=domain_range,
    sample_names=df_scenario_1.index.get_level_values("model"),
    coordinate_names=("Shapley value", ),
)
zzz = scenario_1_grid.plot(
    legend=True,
    group=df_scenario_1.index.get_level_values("model"),
)
plt.savefig(os.path.join(images_path, "scenario_1_shapley.eps"), format="eps")
plt.savefig(os.path.join(images_path, "scenario_1_shapley.pdf"), format="pdf")

In [None]:
x_min = [x[0] for x in intervals]
x_max = [x[1] for x in intervals]
print(len(x_max))

In [None]:
plot_step_fn(
    x_min=x_min,
    x_max=x_max,
    values=scenario_1_np,
    colors=['tab:blue', 'tab:orange', 'tab:green'],
    models=df_scenario_1.index.get_level_values("model").tolist(),
    domain_range=domain_range,
    x_lab="",
    y_lab="Shapley value (step function)",
    plt_h_line=True,
    plot_v_line=True,
)
plt.savefig(os.path.join(images_path, "scenario_1_step_fn.eps"), format="eps")
plt.savefig(os.path.join(images_path, "scenario_1_step_fn.pdf"), format="pdf")

In [None]:
scenario_2_grid = FDataGrid(
    data_matrix=scenario_2_np,
    grid_points=header,
    domain_range=domain_range,
    sample_names=df_scenario_2.index.get_level_values("model"),
    coordinate_names=("Shapley value", ),
)
zzz = scenario_2_grid.plot(
    legend=True,
    group=df_scenario_2.index.get_level_values("model")
)
plt.savefig(os.path.join(images_path, "scenario_2_shapley.eps"), format="eps")
plt.savefig(os.path.join(images_path, "scenario_2_shapley.pdf"), format="pdf")

In [None]:
plot_step_fn(
    x_min=x_min,
    x_max=x_max,
    values=scenario_2_np,
    colors=['tab:blue', 'tab:orange', 'tab:green'],
    models=list(df_scenario_2.index.get_level_values("model")),
    domain_range=domain_range,
    x_lab="",
    y_lab="Shapley value (step function)",
    plt_h_line=True,
    plot_v_line=True,
)
plt.savefig(os.path.join(images_path, "scenario_2_step_fn.eps"), format="eps")
plt.savefig(os.path.join(images_path, "scenario_2_step_fn.pdf"), format="pdf")

# Compute r^2 metrics 

In [None]:
df_all_r2 = pd.concat(all_r2)
df_all_r2.head(10)

In [None]:
df_all_r2.groupby(["scenario", "model"]).mean()

In [None]:
df_all_r2.groupby(["scenario", "model"]).std()