In [None]:
%load_ext autoreload
%autoreload 2
from utils.predict_np import predict_from_np
from scipy.stats import beta
from shapley_fda import ShapleyFda
from shaple_fda_mean import ShapleyFdaMean
from skfda.misc import inner_product
from skfda.ml.regression import LinearRegression
from skfda.representation.basis import FourierBasis
from skfda.representation.basis import BSplineBasis
from skfda.representation.grid import FDataGrid
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Build Fourier-related data
wanted_symmetry = False
num_functions = 200
n_basis_representation = 20
n_basis_simulated_data = 5
ini = 0
end = 1
period = np.pi/2
domain_range = (ini, end)
step = 0.01
abscissa_points = np.arange(ini, end + step, step)
if n_basis_simulated_data % 2 == 0:
    n_basis_simulated_data = n_basis_simulated_data + 1
basis_fourier = FourierBasis(n_basis=n_basis_simulated_data, period=period, domain_range=domain_range)
basis_bsplines = BSplineBasis(n_basis=n_basis_representation, domain_range=domain_range)
total_abscissa_points = abscissa_points.shape[0]
row_vector_ones_total_abscissa_points = np.ones(shape=(1, total_abscissa_points))
col_vector_ones_num_functions = np.ones(shape=(num_functions, 1))
X_simulated = np.empty(shape=(num_functions, total_abscissa_points))
basis_fourier_evaluated = np.squeeze(basis_fourier(abscissa_points))
#lambda_coefficients = np.array([1/(2 ** i) for i in range(1, n_basis_simulated_data + 1)], ndmin=2)
#lambda_coefficients = np.array([1 for i in range(1, n_basis_simulated_data + 1)], ndmin=2)
lambda_coefficients = np.array([1/i for i in range(1, n_basis_simulated_data + 1)], ndmin=2)
lambda_matrix = np.dot(lambda_coefficients.T, row_vector_ones_total_abscissa_points)
for i in range(num_functions):
    np.random.seed(1234 + i)
    normal_vector = np.random.normal(scale=0.01, size=(1, n_basis_simulated_data))
    normal_matrix = np.dot(normal_vector.T, row_vector_ones_total_abscissa_points)
    # Each basis is multiplied by the same coefficient. Therefore, given a basis (a row), we use
    # the same coefficient for all the columns (time)
    coefficients_basis_matrix = np.multiply(normal_matrix, lambda_matrix)
    basis_with_coefficients_matrix = np.multiply(basis_fourier_evaluated, coefficients_basis_matrix)
    sum_basis = np.sum(basis_with_coefficients_matrix, axis=0)
    X_simulated[i, :] = sum_basis
if wanted_symmetry:
    X_simulated = X_simulated + np.flip(X_simulated, axis=1)
X_simulated_grid = FDataGrid(data_matrix=X_simulated, grid_points=abscissa_points, domain_range=domain_range)
X_basis_fourier = X_simulated_grid.to_basis(basis_fourier)
X_basis_bsplines = X_simulated_grid.to_basis(basis_bsplines)
zzz = X_simulated_grid.plot()

In [None]:
# Build beta distribution data
cnt = 30
alpha_1 =  1 * cnt
beta_1 = 3 * cnt
beta_distr_1 = beta(alpha_1, beta_1)
beta_distr_2 = beta(beta_1, alpha_1)
beta_pdf_abscissa_1 = np.reshape(beta_distr_1.pdf(abscissa_points), newshape=(1, -1))
beta_pdf_abscissa_2 = np.reshape(beta_distr_2.pdf(abscissa_points), newshape=(1, -1))
beta_pdf_abscissa_3 = 0.5 * (beta_pdf_abscissa_1 + beta_pdf_abscissa_2)
beta_pdf_abscissa = beta_pdf_abscissa_3
plt.plot(abscissa_points, beta_pdf_abscissa[0], '-o')

In [None]:
num_fun = 3
ones_vector_column = np.full(
    shape = (num_fun, 1),
    fill_value=1
)
beta_pdf_matrix = np.matmul(ones_vector_column, beta_pdf_abscissa_1)
beta_pdf_matrix_flip = np.flip(beta_pdf_matrix, axis=1)
bbbb = np.max(beta_pdf_matrix, axis=0)
print(np.max(beta_pdf_matrix_flip, axis=0).shape)

In [None]:
# Transform beta distribution to grid and basis
beta_grid = FDataGrid(
    data_matrix= beta_pdf_abscissa[0],
    grid_points=abscissa_points, 
    domain_range=(ini, end)
)
beta_basis_fourier = beta_grid.to_basis(basis_bsplines)
beta_basis_bsplines = beta_grid.to_basis(basis_bsplines)
zzzz = beta_basis_bsplines.plot()

In [None]:
# Build the target
np.random.seed(12345)
y = inner_product(X_simulated_grid, beta_grid) + np.random.normal(scale=0.001, size=num_functions)

In [None]:
y.shape

In [None]:
plt.scatter(inner_product(X_simulated_grid, beta_grid), y)

In [None]:
def my_predict(grid_points, domain_range, beta_grid):
    def inner_pred(X):
        X_grid = FDataGrid(
            data_matrix=X,
            grid_points=grid_points, 
            domain_range=domain_range
        )
        result = inner_product(X_grid, beta_grid)
        return result
    return inner_pred

In [None]:
pred_gold = my_predict(abscissa_points, domain_range, beta_grid)

In [None]:
num_intervals = 20
num_permutations = 1000
shapley_fda = ShapleyFda(
    predict_fn=pred_gold,
    X=X_simulated,
    abscissa_points=abscissa_points,
    target=y,
    domain_range=domain_range,
    verbose=False,
)
values_shapley = shapley_fda.compute_shapley_value(
    num_intervals=num_intervals,
    num_permutations=num_permutations
)

In [None]:
shapley_fda.plot()

In [None]:
mean_f = np.mean(X_simulated, axis=0)
set_1 = shapley_fda.covariate_computed[""]
set_2 = shapley_fda.covariate_computed["0"]
i = 8
plt.plot(mean_f, label = "mean_f")
plt.plot(X_simulated[i, :], label = "true")
plt.plot(set_1[i, :], label = "rec_sin")
plt.plot(set_2[i, :], label = "rec_con")
plt.legend()
plt.show()

In [None]:
shapley_fda_mean = ShapleyFdaMean(
    predict_fn=pred_gold,
    X=X_simulated,
    abscissa_points=abscissa_points,
    target=y,
    domain_range=domain_range,
    verbose=False,
)
values_shapley_mean = shapley_fda_mean.compute_shapley_value(
    num_intervals=num_intervals,
    num_permutations=num_permutations
)

In [None]:
shapley_fda_mean.plot()

In [None]:
set_1_mean = shapley_fda_mean.covariate_computed[""]
set_2_mean = shapley_fda_mean.covariate_computed["0"]
plt.plot(mean_f, label = "mean_f")
plt.plot(X_simulated[i, :], label = "true")
plt.plot(set_1_mean[i, :], label = "rec_sin")
plt.plot(set_2_mean[i, :], label = "rec_con")
plt.legend()
plt.show()

In [None]:
# Fit a scikit fda model
X_lm = X_basis_bsplines.copy()
linear_reg = LinearRegression()
_ = linear_reg.fit(X_lm, y)

In [None]:
zzz = linear_reg.coef_[0].plot()

In [None]:
# Transform predict function to use a numpy array as input
pred_lm = predict_from_np(
    grid_points=abscissa_points,
    domain_range=domain_range,
    basis=X_lm.basis,
    predict_fn=linear_reg.predict
)

In [None]:
shapley_fda_lm = ShapleyFda(
    predict_fn=pred_lm,
    X=X_simulated,
    abscissa_points=abscissa_points,
    target=y,
    domain_range=domain_range,
    verbose=False,
)
values_shapley_lm = shapley_fda_lm.compute_shapley_value(
    num_intervals=num_intervals,
    num_permutations=num_permutations
)

In [None]:
shapley_fda_lm.plot()

In [None]:
shapley_fda_mean_lm = ShapleyFdaMean(
    predict_fn=pred_lm,
    X=X_simulated,
    abscissa_points=abscissa_points,
    target=y,
    domain_range=domain_range,
    verbose=False,
)
values_shapley_mean_lm = shapley_fda_mean_lm.compute_shapley_value(
    num_intervals=num_intervals,
    num_permutations=num_permutations
)

In [None]:
shapley_fda_mean_lm.plot()