In [None]:
%load_ext autoreload
%autoreload 2
from predict_np import predict_from_np
from shapley_fda import ShapleyFda
from scipy.stats import beta
from skfda.misc import inner_product
from skfda.ml.regression import LinearRegression
from skfda.representation.basis import FourierBasis
from skfda.representation.basis import BSplineBasis
from skfda.representation.grid import FDataGrid
from skfda.misc.operators import LinearDifferentialOperator
from skfda.misc.regularization import L2Regularization
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Build Fourier-related data
num_functions = 100
n_basis_representation = 11
if n_basis_representation % 2 == 0:
    n_basis_representation = n_basis_representation + 1
basis = FourierBasis(n_basis=n_basis_representation)
basis_bsplines = BSplineBasis(n_basis=n_basis_representation)
ini = 0
end = 1
period = 1
domain_range = (ini, end)
step = 0.01
abscissa_points = np.arange(ini, end + step, step)
total_abscissa_points = abscissa_points.shape[0]
row_vector_ones_total_abscissa_points = np.ones(shape=(1, total_abscissa_points))
col_vector_ones_num_functions = np.ones(shape=(num_functions, 1))
X_simulated = np.empty(shape=(num_functions, total_abscissa_points))
basis_evaluated = np.squeeze(basis(abscissa_points))
#lambda_coefficients = np.array([1/(2 ** i) for i in range(1, n_basis_representation + 1)], ndmin=2)
lambda_coefficients = np.array([1 for i in range(1, n_basis_representation + 1)], ndmin=2)
lambda_matrix = np.dot(lambda_coefficients.T, row_vector_ones_total_abscissa_points)
for i in range(num_functions):
    np.random.seed(1234 + i)
    normal_vector = np.random.normal(scale=0.01, size=(1, n_basis_representation))
    normal_matrix = np.dot(normal_vector.T, row_vector_ones_total_abscissa_points)
    # Each basis is multiplied by the same coefficient. Therefore, given a basis (a row), we use
    # the same coefficient for all the columns (time)
    coefficients_basis_matrix = np.multiply(normal_matrix, lambda_matrix)
    basis_with_coefficients_matrix = np.multiply(basis_evaluated, coefficients_basis_matrix)
    sum_basis = np.sum(basis_with_coefficients_matrix, axis=0)
    X_simulated[i, :] = sum_basis
X_simulated_grid = FDataGrid(data_matrix=X_simulated, grid_points=abscissa_points, domain_range=domain_range)
X_basis_fourier = X_simulated_grid.to_basis(basis)
X_basis_bsplines = X_simulated_grid.to_basis(basis_bsplines)
zzz = X_simulated_grid.plot()

In [None]:
zzz = X_basis_bsplines.plot()

In [None]:
zzz = X_basis_fourier.plot()

In [None]:
# Build beta distribution data
cnt = 20
alpha_1 =  1 * cnt
beta_1 = 4 * cnt
beta_distr = beta(alpha_1, beta_1)
beta_pdf_abscissa = np.reshape(beta_distr.pdf(abscissa_points), newshape=(1, -1))
beta_pdf_matrix = np.dot(col_vector_ones_num_functions, beta_pdf_abscissa)
plt.plot(abscissa_points, beta_pdf_abscissa[0], '-o')

In [None]:
beta_grid = FDataGrid(
    data_matrix= beta_pdf_abscissa[0],
    grid_points=abscissa_points, 
    domain_range=(ini, end)
)
#beta_basis = beta_grid_important_abs.to_basis(basis)
beta_basis = beta_grid.to_basis(basis_bsplines)
zzzz = beta_basis.plot()

In [None]:
# Build the target
#rate_important_abscissa = 1
#num_important_abscissa = int(np.floor(rate_important_abscissa * abscissa_points.shape[0]))
#important_abscissa_points = abscissa_points[:num_important_abscissa]
#X_grid_fourier_important_abs = FDataGrid(
#    data_matrix=X_fourier[:, :num_important_abscissa],
#    grid_points=important_abscissa_points, 
#    domain_range=(ini, important_abscissa_points[-1])
#)
np.random.seed(12345)
y = inner_product(X_simulated_grid, beta_grid) + np.random.normal(scale=0.001, size=num_functions)
y

In [None]:
plt.scatter(X_simulated[:, 21], y)

In [None]:
regularization = L2Regularization(
    LinearDifferentialOperator(2),
)
type(regularization)

In [None]:
# Fit the model
# Instead of using beta_grid_important_abs, we use X_basis_fourier, since
# we want to see that the last intervals are not releveant (they do not account in beta_grid_important_abs)
#linear_reg = LinearRegression(regularization=regularization)
linear_reg = LinearRegression()
#_ = linear_reg.fit(X_basis_fourier, y)
_ = linear_reg.fit(X_basis_bsplines, y)
beta_estimated = linear_reg.coef_[0]
#predicted_values = linear_reg.predict(X_basis_fourier)
predicted_values = linear_reg.predict(X_basis_bsplines)
print(beta_estimated)
_n_plot = beta_estimated.plot()

In [None]:
def my_predict(grid_points, domain_range, beta_grid):
    def inner_pred(X):
        X_grid = FDataGrid(
            data_matrix=X,
            grid_points=grid_points, 
            domain_range=domain_range
        )
        result = inner_product(X_grid, beta_grid)
        return result
    return inner_pred

In [None]:
pred_gold = my_predict(abscissa_points, domain_range, beta_grid)

In [None]:
plt.scatter(pred_gold(X_simulated), y)

In [None]:
n_plot2 = plt.scatter(predicted_values, y)

In [None]:
predict_function = predict_from_np(
    grid_points=abscissa_points,
    domain_range=domain_range,
    basis=basis,
    predict_fn = linear_reg.predict
)

In [None]:
print(abscissa_points)

In [None]:
derivative_X = np.squeeze(X_basis_fourier.derivative().to_grid().data_matrix)

In [None]:
shapley_fda = ShapleyFda(
    #predict_fn=predict_function,
    predict_fn=pred_gold,
    X=X_simulated,
    derivative_X=derivative_X,
    abscissa_points=abscissa_points,
    target=y,
    domain_range=domain_range,
    verbose=False,
)
values_shapley = shapley_fda.compute_shapley_value(num_intervals=10, num_permutations=1)

In [None]:
values_shapley