In [2]:
import numpy as np
import pandas as pd
import skfda
import skfda.misc.hat_matrix as hm
import skfda.preprocessing.smoothing.validation as val
from skfda.preprocessing.smoothing import KernelSmoother

from tqdm.notebook import tqdm
from Utils import fixed_values, common_functions, paths
from Utils.common_functions import load_data

In [3]:
for data_set in ['base', 'filtered', 'easy']:
    filter_data = data_set == 'filtered'
    easy_data = data_set == 'easy'

    tt, X, y = load_data('FFT', filter_data=filter_data, easy_data=easy_data, remove_outliers=False)
    fd = skfda.FDataGrid(X, tt)
    n_neighbors = np.arange(1, 24)
    scale_factor = (
            (fd.domain_range[0][1] - fd.domain_range[0][0]) / len(fd.grid_points[0])
    )

    bandwidth = n_neighbors * scale_factor
    for idx_external in tqdm(range(fixed_values.EXTERNAL_SPLITS_SHUFFLE), desc=f"{data_set}"):

        X_train, X_test, y_train, y_test = common_functions.get_fold(X, y, idx_external, strategy='randomsplit')
        X_train_fd = skfda.FDataGrid(X_train, tt)
        X_test_fd = skfda.FDataGrid(X_test, tt)

        llr = val.SmoothingParameterSearch(
            KernelSmoother(kernel_estimator=hm.LocalLinearRegressionHatMatrix()),
            bandwidth,
            param_name='kernel_estimator__bandwidth',
        )

        llr.fit(X_train_fd)

        X_train_llr = llr.transform(X_train_fd)
        X_test_llr = llr.transform(X_test_fd)

        X_train_smoothed = pd.DataFrame(X_train_llr.data_matrix.reshape(X_train.shape), columns=X_train.columns, index=X_train.index)
        X_test_smoothed = pd.DataFrame(X_test_llr.data_matrix.reshape(X_test.shape), columns=X_test.columns, index=X_test.index)

        X_train_smoothed.to_pickle(f'{paths.FFT_DATA_PATH}/smoothed_data/{data_set}/X_{idx_external}_train.pickle')
        X_test_smoothed.to_pickle(f'{paths.FFT_DATA_PATH}/smoothed_data/{data_set}/X_{idx_external}_test.pickle')

base:   0%|          | 0/100 [00:00<?, ?it/s]

filtered:   0%|          | 0/100 [00:00<?, ?it/s]

easy:   0%|          | 0/100 [00:00<?, ?it/s]

array([0.00018467, 0.00036934, 0.00055401, 0.00073869, 0.00092336,
       0.00110803, 0.0012927 , 0.00147737, 0.00166204, 0.00184671,
       0.00203138, 0.00221606, 0.00240073, 0.0025854 , 0.00277007,
       0.00295474, 0.00313941, 0.00332408, 0.00350875, 0.00369343,
       0.0038781 , 0.00406277, 0.00424744])