In [None]:
%load_ext autoreload
%autoreload 2
from data import data_path
from hyperopt.fnn import HyperOptFnn
from shapley.shapley_fda import ShapleyFda
from skfda.ml.regression import LinearRegression
from skfda.representation.basis import BSplineBasis
from skfda.representation.grid import FDataGrid
from utils.predict_np import predict_from_np
import numpy as np
import os
import pandas as pd

In [None]:
def count_csv_files(files):
    unique_numbers = []
    csv_files = [x.split(".")[0] for x in files if "csv" in x.split(".")]
    all_numbers = [int(x.split("_")[-1]) for x in csv_files]
    for x in all_numbers:
        if not x in unique_numbers:
            unique_numbers.append(x)
    return len(unique_numbers)

In [None]:
def read_data(i_sim, path):
    type_data = ["train", "validation", "test"]
    X_str = "X_sim_{}_{}.csv"
    target_str = "target_sim_{}_{}.csv"
    X = [
            pd.read_csv(os.path.join(path, X_str.format(x, i_sim))) for x in type_data
        ]
    target = [
            pd.read_csv(os.path.join(path, target_str.format(x, i_sim))) for x in type_data
        ]
    
    colnames = X[0].columns.values
    all_data = [*X, *target]
    all_data_numpy = [x.to_numpy() for x in all_data]
    return [colnames, *all_data_numpy]

def get_abscissa_points(names):
    points = [float(c.split("_")[1]) for c in names]
    return np.array(points)

In [None]:
def predict_no_verbose(predict_fn):
    def inner(*args, **kwargs):
        return predict_fn(*args, verbose=False, **kwargs)
    return inner

In [None]:
domain_range = (0, 1)
num_intervals = 20
num_permutations = 50
simulated_data_path = os.path.join(data_path, "output")
scenario_path = "scenario_5"
full_path = os.path.join(simulated_data_path, scenario_path)
all_files = os.listdir(full_path)
n_basis_representation = 20
basis_bsplines = BSplineBasis(
    n_basis=n_basis_representation,
    domain_range=domain_range
)
i_sim = 0
colnames, X_train, X_val, X_test, target_train, target_val, target_test = read_data(i_sim, full_path)
X_full = np.row_stack((X_train, X_val))
target_full = np.row_stack((target_train, target_val))
abscissa_points = get_abscissa_points(colnames)
X_full_grid = FDataGrid(
    data_matrix=X_full,
    grid_points=abscissa_points,
    domain_range=domain_range
)
X_full_bspline = X_full_grid.to_basis(basis_bsplines)
### Perform hyperopt for neural networks
hyper_opt_fnn = HyperOptFnn(
    input_shape=(X_train.shape[1], 1),
    resolution=X_train.shape[1]
)
tuner_fnn = hyper_opt_fnn.build_tuner(
    objective="val_loss",
    max_trials=3,
    overwrite=True,
    directory=".",
    project_name="tune_hypermodel",
)
tuner_fnn.search(X_train, target_train, epochs=2, validation_data=(X_val, target_val), verbose=False)
# Use the best model
best_hp_fnn = tuner_fnn.get_best_hyperparameters()[0]
hypermodel_final = HyperOptFnn(
    input_shape=(X_train.shape[1], 1),
    resolution=X_train.shape[1]
)
model_fnn = hypermodel_final.build(best_hp_fnn)
history = hypermodel_final.fit(
    best_hp_fnn,
    model_fnn,
    X_full,
    target_full,
    verbose=False,
    epochs=1,
)
### Perform hyperopt for linear model
linear_reg = LinearRegression()
_ = linear_reg.fit(X_full_bspline, target_full[:, 0])

In [None]:
### Compute relevance for neural network
shapley_fda_fnn = ShapleyFda(
    predict_fn=predict_no_verbose(model_fnn.predict),
    X=X_test,
    abscissa_points=abscissa_points,
    target=target_test[:, 0],
    domain_range=domain_range,
    verbose=False,
)
values_shapley_fnn = shapley_fda_fnn.compute_shapley_value(
    num_intervals=num_intervals,
    num_permutations=num_permutations,
)
shapley_fda_fnn.plot()

In [None]:
### Compute relevance for linear model
num_permutations = 100
# Transform predict function to use a numpy array as input
pred_lm = predict_from_np(
    grid_points=abscissa_points,
    domain_range=domain_range,
    basis=X_full_bspline.basis,
    predict_fn=linear_reg.predict
)

shapley_fda_lm = ShapleyFda(
    predict_fn=pred_lm,
    X=X_test,
    abscissa_points=abscissa_points,
    target=target_test,
    domain_range=domain_range,
    verbose=False,
)

values_shapley_lm = shapley_fda_lm.compute_shapley_value(
    num_intervals=num_intervals,
    num_permutations=num_permutations,
)
shapley_fda_lm.plot()

In [None]:
pred_lm(X_test) - target_test[:, 0]

In [None]:
### Perform hyperopt for linear model
linear_reg = LinearRegression()
_ = linear_reg.fit(X_full_bspline, target_full[:, 0])

In [None]:
linear_reg.predict(X_full_bspline)

In [None]:
domain_range = (0, 1)
num_intervals = 20
num_permutations = 10
simulated_data_path = os.path.join(data_path, "output")
for scenario_path in os.listdir(simulated_data_path):
    full_path = os.path.join(simulated_data_path, scenario_path)
    all_files = os.listdir(full_path)
    n_simulation = count_csv_files(all_files)
    for i_sim in range(n_simulation):
        # Read the data
        print(scenario_path, i_sim)
        colnames, X_train, X_val, X_test, target_train, target_val, target_test = read_data(i_sim, full_path)
        abscissa_points = get_abscissa_points(colnames)
        # Perform hyperopt for neural networks
        hyper_opt_fnn = HyperOptFnn(
            input_shape=(X_train.shape[1], 1),
            resolution=X_train.shape[1]
        )
        tuner = hyper_opt_fnn.build_tuner(
            objective="val_loss",
            max_trials=3,
            overwrite=True,
            directory=".",
            project_name="tune_hypermodel",
        )
        tuner.search(X_train, target_train, epochs=2, validation_data=(X_val, target_val))
        # Use the best model
        best_hp = tuner.get_best_hyperparameters()[0]
        hypermodel_final = HyperOptFnn(
            input_shape=(X_train.shape[1], 1),
            resolution=X_train.shape[1]
        )
        model = hypermodel_final.build(best_hp)
        hypermodel_final.fit(
            best_hp,
            model,
            np.row_stack((X_train, X_val)),
            np.row_stack((target_train, target_val)),
            epochs=1
        )
        # Compute relevance
        shapley_fda_fnn = ShapleyFda(
            predict_fn=model.predict,
            X=X_test,
            abscissa_points=abscissa_points,
            target=target_test,
            domain_range=domain_range,
            verbose=False,
        )
        values_shapley_fnn = shapley_fda_fnn.compute_shapley_value(
            num_intervals=num_intervals,
            num_permutations=num_permutations,
        )
        # Store the results
    