In [None]:
%load_ext autoreload
%autoreload 2
from functional_neural_networks.dense import FunctionalDense
from hyperopt.fnn import HyperOptFnn
from hyperopt.sklearn_gridsearch import HyperOptScikitFda
from images import images_path
from shapley.shapley_fda import ShapleyFda
from skfda.exploratory.stats import cov
from skfda.misc.operators import LinearDifferentialOperator
from skfda.misc.regularization import L2Regularization
from skfda.ml.regression import KNeighborsRegressor, LinearRegression
from skfda.representation.basis import BSplineBasis
from utils.predict_np import predict_from_np
from utils.simulator import FdaSimulator
from utils.utils_workflow import l2_reg, predict_no_verbose
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf

In [None]:
n_basis_representation = 51
domain_range = (0, 1)
basis_bsplines = BSplineBasis(
    n_basis=n_basis_representation,
    domain_range=domain_range
)

In [None]:
fda_simulator = FdaSimulator()

In [None]:
# Prepare some global parameters used to generate data
cnt = 30
alpha_p =  1 * cnt
beta_p = 3 * cnt
positions = [0.15, 0.35, 0.55, 0.85]
n_basis_simulated_data = 31

X, phi_X, epsilon, beta_data, col_indexes_bct, target = fda_simulator.simulate(
    #type_covariate="fourier_expansion",
    #type_transformation="linear_unimodal",
    type_covariate="fourier_expansion",
    type_transformation="linear_unimodal",
    sample_size=200,
    eta=0.05,
    datasets_type = ["train", "validation", "test"],
    # Series representation
    n_basis_simulated_data=n_basis_simulated_data,
    sd_x=0.01,
    # Beta parameters
    alpha_param=alpha_p,
    beta_param=beta_p,
    # Brownian parameters
    intercept_brownian=0,
    slope_brownian=1,
    positions=positions
)

In [None]:
X_train, X_validation, X_test = X
X_full = np.row_stack((X_train, X_validation))

target_train, target_validation, target_test = target
target_full = np.row_stack((target_train, target_validation))

beta_train, beta_validation, beta_test = beta_data
epsilon_train, epsilon_validation, epsilon_test = epsilon

X_train_grid = fda_simulator.to_fdata_grid(X_train)
X_validation_grid = fda_simulator.to_fdata_grid(X_validation)
X_test_grid = fda_simulator.to_fdata_grid(X_test)
X_full_grid = fda_simulator.to_fdata_grid(X_full)

X_train_bspline = X_train_grid.to_basis(basis_bsplines)
X_validation_bspline = X_validation_grid.to_basis(basis_bsplines)
X_test_bspline = X_test_grid.to_basis(basis_bsplines)
X_full_bspline = X_full_grid.to_basis(basis_bsplines)
zzz = X_full_grid.plot()
plt.savefig(os.path.join(images_path, "scenario_1_covariate.eps"), format="eps")
plt.savefig(os.path.join(images_path, "scenario_1_covariate.pdf"), format="pdf")

In [None]:
phi_train, phi_validation, phi_test = phi_X

In [None]:
np.var(epsilon_train)/(np.var(phi_train) + np.var(epsilon_train))

In [None]:
if not beta_train is None:
    beta_data_grid = fda_simulator.to_fdata_grid(beta_train)
    plt.plot(
        fda_simulator.abscissa_points,
        np.reshape(beta_train, newshape=(1, -1))[0], '-'
    )
    if False:
        plt.savefig(os.path.join(images_path, "scenario_1_beta_real.eps"), format="eps")
        plt.savefig(os.path.join(images_path, "scenario_1_beta_real.pdf"), format="pdf")

# Linear model

In [None]:
np.var(target_train)

In [None]:
ss_target_train = np.var(target_train) * target_train.shape[0]
print(ss_target_train)

In [None]:
hyperopt_lm = HyperOptScikitFda(
    LinearRegression,
    abscissa_points=fda_simulator.abscissa_points,
    domain_range=fda_simulator.domain_range,
)
#reg_list = [l2_reg(np.exp(x) * ss_target_train) for x  in np.arange(-35, -5, 2)]
reg_list = [l2_reg(np.exp(x) * ss_target_train) for x  in np.arange(-25, 15, 3)]
reg_list.append(None)
params_lm = {
    "regularization": reg_list
}
hist_lm = hyperopt_lm.search(
    params=params_lm,
    X_train=X_train_bspline,
    y_train=target_train[:, 0],
    X_val=X_validation_bspline,
    y_val=target_validation[:, 0]
)

In [None]:
print(len(reg_list))
print(reg_list)

In [None]:
best_params_lm = hist_lm.best_params_
print(best_params_lm)
best_model_lm = hyperopt_lm.cls_estimator(**best_params_lm)

In [None]:
_ = best_model_lm.fit(X_full_bspline, target_full[:, 0])

In [None]:
pred_lm = best_model_lm.predict(X_test_bspline)
plt.scatter(pred_lm, target_test)

In [None]:
zzz = best_model_lm.coef_[0].plot()
if False:
    plt.savefig(os.path.join(images_path, "scenario_1_beta_lm.eps"), format="eps")
    plt.savefig(os.path.join(images_path, "scenario_1_beta_lm.pdf"), format="pdf")

In [None]:
# Transform predict function to use a numpy array as input
pred_best_model_lm_fn = predict_from_np(
    grid_points=fda_simulator.abscissa_points,
    domain_range=fda_simulator.domain_range,
    basis=X_full_bspline.basis,
    predict_fn=best_model_lm.predict
)

In [None]:
compute_mrmr_r2 = True
compute_mrmr_distance_correlation = True
num_intervals = 20
#num_intervals = 4
num_permutations = 1000
#num_permutations = 5
shapley_fda_lm = ShapleyFda(
    X=X_test,
    abscissa_points=fda_simulator.abscissa_points,
    target=target_test[:, 0],
    domain_range=fda_simulator.domain_range,
    verbose=False,
)

values_shapley_lm = shapley_fda_lm.compute_shapley_value(
    num_intervals=num_intervals,
    num_permutations=num_permutations,
    compute_mrmr_r2=compute_mrmr_r2,
    compute_mrmr_distance_correlation=compute_mrmr_distance_correlation,
    #predict_fns=[pred_best_model_lm_fn, pred_best_model_lm_fn],
    predict_fns=pred_best_model_lm_fn,
)

In [None]:
shapley_fda_lm.plot()

# Knn

In [None]:
hyperopt_knn = HyperOptScikitFda(
    KNeighborsRegressor,
    abscissa_points=fda_simulator.abscissa_points,
    domain_range=fda_simulator.domain_range,
)

hist_knn = hyperopt_knn.search(
    params={"n_neighbors": range(3, 30, 1)},
    X_train=X_train,
    y_train=target_train,
    X_val=X_validation,
    y_val=target_validation
)

best_params_knn = hist_knn.best_params_
print(best_params_knn)
best_model_knn = hyperopt_knn.cls_estimator(**best_params_knn)
_ = best_model_knn.fit(X_full, target_full)

In [None]:
pred_knn = best_model_knn.predict(X_test)
plt.scatter(pred_knn, target_test)

In [None]:
target_test.shape

In [None]:
shapley_fda_knn = ShapleyFda(
    predict_fn=best_model_knn.predict,
    X=X_test,
    abscissa_points=fda_simulator.abscissa_points,
    target=target_test,
    domain_range=fda_simulator.domain_range,
    verbose=False,
)

values_shapley_knn = shapley_fda_knn.compute_shapley_value(
    num_intervals=num_intervals,
    num_permutations=num_permutations,
)

In [None]:
shapley_fda_knn.plot(which="mrmr_based")

In [None]:
shapley_fda_knn.plot(which="model_based")

# Neural network

In [None]:
hyperopt_fnn = HyperOptFnn(
    input_shape=(X_train.shape[1], 1),
    resolution=X_train.shape[1]
)

tuner_fnn = hyperopt_fnn.build_tuner(
    objective="val_loss",
    max_trials=10,
    overwrite=True,
    directory=".",
    project_name="tune_hypermodel",
)

tuner_fnn.search(
    X_train,
    target_train,
    validation_data=(X_validation, target_validation),
    verbose=False,
)

In [None]:
best_params_fnn = tuner_fnn.get_best_hyperparameters(1)[0]
best_epochs_fnn = best_params_fnn.get("epochs")
print("best_epochs_fnn:", best_epochs_fnn)

In [None]:
hyperopt_best_fnn = HyperOptFnn(
    input_shape=(X_train.shape[1], 1),
    resolution=X_train.shape[1]
)
best_model_fnn = hyperopt_best_fnn.build(best_params_fnn)
history_best_fnn = hyperopt_best_fnn.fit(
    best_params_fnn,
    best_model_fnn,
    X_full,
    target_full,
    epochs=best_epochs_fnn,
    verbose=False
)

In [None]:
best_model_fnn.summary()

In [None]:
tuner_fnn.get_best_models(1)[0].summary()

In [None]:
pred_fnn = best_model_fnn.predict(X_test)
plt.scatter(pred_fnn, target_test)

In [None]:
shapley_fda_fnn = ShapleyFda(
    predict_fn=predict_no_verbose(best_model_fnn.predict),
    X=X_test,
    abscissa_points=fda_simulator.abscissa_points,
    target=target_test,
    domain_range=fda_simulator.domain_range,
    verbose=False,
)

values_shapley_fnn = shapley_fda_fnn.compute_shapley_value(
    num_intervals=num_intervals,
    num_permutations=num_permutations,
)

In [None]:
shapley_fda_fnn.plot(which="mrmr_based")

In [None]:
shapley_fda_fnn.plot(which="model_based")

## LM, KNN and FNN Shapley

In [None]:
shapley_fda_lm_2 = ShapleyFda(
    predict_fn=pred_best_model_lm_fn,
    X=X_test,
    abscissa_points=fda_simulator.abscissa_points,
    target=target_test[:, 0],
    domain_range=fda_simulator.domain_range,
    verbose=False,
)

values_shapley_lm_2 = shapley_fda_lm_2.compute_shapley_value(
    num_intervals=num_intervals,
    num_permutations=num_permutations,
)

shapley_fda_knn_2 = ShapleyFda(
    predict_fn=best_model_knn.predict,
    X=X_test,
    abscissa_points=fda_simulator.abscissa_points,
    target=target_test,
    domain_range=fda_simulator.domain_range,
    verbose=False,
)

values_shapley_knn_2 = shapley_fda_knn_2.compute_shapley_value(
    num_intervals=num_intervals,
    num_permutations=num_permutations,
)

shapley_fda_fnn_2 = ShapleyFda(
    predict_fn=predict_no_verbose(best_model_fnn.predict),
    X=X_test,
    abscissa_points=fda_simulator.abscissa_points,
    target=target_test,
    domain_range=fda_simulator.domain_range,
    verbose=False,
)

values_shapley_fnn_2 = shapley_fda_fnn_2.compute_shapley_value(
    num_intervals=num_intervals,
    num_permutations=num_permutations,
)

In [None]:
shapley_fda_lm_2.plot()
shapley_fda_knn_2.plot()
shapley_fda_fnn_2.plot()

In [None]:
val_lm = [x[1] for x in values_shapley_lm]
print(sum(val_lm))
np.corrcoef(pred_lm, target_test, rowvar=False)[0, 1] ** 2

In [None]:
val_knn = [x[1] for x in values_shapley_knn_2]
print(sum(val_knn))
np.corrcoef(pred_knn, target_test, rowvar=False)[0, 1] ** 2

In [None]:
val_fnn = [x[1] for x in values_shapley_fnn_2]
print(sum(val_fnn))
np.corrcoef(pred_fnn, target_test, rowvar=False)[0, 1] ** 2

# LM no optimisation

In [None]:
lm_no_reg = LinearRegression(
    regularization=L2Regularization(
        linear_operator=LinearDifferentialOperator(0),
        regularization_parameter=1e-5
    )
)

In [None]:
lm_no_reg.fit(X_train_bspline, target_train[:,0])

# Neural Networks no optimisation

In [None]:
input_shape = (X_train.shape[1], 1)
resolution = X_train.shape[1]
input_layer = tf.keras.layers.Input(shape=input_shape)
num_hidden_layers = 2
n_neurons = 10
n_epcohs = 50
layer_options = []
for i_layer in range(num_hidden_layers):
    dict_layer = {
        "n_neurons": n_neurons,
        "basis_options": {
            "n_functions": 6,
            "resolution": resolution,
            "basis_type": "Legendre",
        },
        "activation": "relu",
        "pooling": False
    }
    layer_options.append(dict_layer)

layer = input_layer
for i_layer, layer_option in enumerate(layer_options):
    layer = FunctionalDense(
        **layer_option,
        name=f"FunctionalDense_{i_layer}"
    )(layer)

output_layer_options = {
    "n_neurons": 1,
    "basis_options": {
        "n_functions": 3,
        "resolution": resolution,
        "basis_type": "Fourier"
    },
    "activation": "linear",
    "pooling": True
}
output_layer = FunctionalDense(
    **output_layer_options,
    name=f"OutputLayer"
)(layer)
model_fnn_no_hyper = tf.keras.Model(inputs=input_layer, outputs=output_layer)
model_fnn_no_hyper.compile(
    loss=tf.keras.losses.MeanSquaredError(),
    optimizer="adam",
)

model_fnn_no_hyper.fit(X_full, target_full, epochs=n_epcohs)

In [None]:
model_fnn_no_hyper.summary()

In [None]:
plt.scatter(model_fnn_no_hyper.predict(X_full), target_full)

In [None]:
plt.scatter(model_fnn_no_hyper.predict(X_test), target_test)

In [None]:
shapley_fda_fnn_no_hyper = ShapleyFda(
    predict_fn=predict_no_verbose(model_fnn_no_hyper.predict),
    X=X_test,
    abscissa_points=fda_simulator.abscissa_points,
    target=target_test,
    domain_range=fda_simulator.domain_range,
    verbose=False,
)

values_shapley_fnn_no_hyper = shapley_fda_fnn_no_hyper.compute_shapley_value(
    num_intervals=num_intervals,
    num_permutations=num_permutations,
)
shapley_fda_fnn_no_hyper.plot()

## Misc.

In [None]:
X_full_bspline_cov = cov(X_full_grid)

In [None]:
X_full_bspline_cov.heatmap()
#cov_fdata

In [None]:
corr_mat = np.corrcoef(X_full.T)
plt.plot(corr_mat)