In [1]:
import numpy as np
import pandas as pd
import skfda
import skfda.misc.hat_matrix as hm
import skfda.preprocessing.smoothing.validation as val
from skfda.preprocessing.smoothing import KernelSmoother

from tqdm.notebook import tqdm
from Utils import fixed_values, common_functions, paths
from Utils.common_functions import load_data

In [19]:
for data_set in ['base', 'filtered', 'easy']:
    filter_data = data_set == 'filtered'
    easy_data = data_set == 'easy'

    tt, X, y = load_data('FFT', filter_data=filter_data, easy_data=easy_data, remove_outliers=False)
    print(f"{data_set} All: {X.shape}")

    fd = skfda.FDataGrid(X, tt)
    n_neighbors = np.arange(1, 24)
    scale_factor = (
            (fd.domain_range[0][1] - fd.domain_range[0][0]) / len(fd.grid_points[0])
    )

    bandwidth = n_neighbors * scale_factor
    for idx_external in tqdm(range(fixed_values.EXTERNAL_SPLITS_SHUFFLE), desc=f"{data_set}"):

        X_train, X_test, y_train, y_test = common_functions.get_fold(X, y, idx_external, strategy='randomsplit')
        X_train_fd = skfda.FDataGrid(X_train, tt)
        X_test_fd = skfda.FDataGrid(X_test, tt)

        # print(f"{data_set} Fold: {X_train.shape} {X_test.shape}")


        llr = val.SmoothingParameterSearch(
            KernelSmoother(kernel_estimator=hm.LocalLinearRegressionHatMatrix()),
            bandwidth,
            param_name='kernel_estimator__bandwidth',
        )

        llr.fit(X_train_fd)

        X_train_llr = llr.transform(X_train_fd)
        X_test_llr = llr.transform(X_test_fd)

        X_train_smoothed = pd.DataFrame(X_train_llr.data_matrix.reshape(X_train.shape), columns=X_train.columns, index=X_train.index)
        X_test_smoothed = pd.DataFrame(X_test_llr.data_matrix.reshape(X_test.shape), columns=X_test.columns, index=X_test.index)

        # print(f"{data_set} Smoothed: {X_train_smoothed.shape} {X_test_smoothed.shape}")
        X_train_smoothed.to_pickle(f'{paths.FFT_DATA_PATH}/smoothed_data/{data_set}/X_{idx_external}_train.pickle')
        X_test_smoothed.to_pickle(f'{paths.FFT_DATA_PATH}/smoothed_data/{data_set}/X_{idx_external}_test.pickle')

base All: (536, 540)


base:   0%|          | 0/100 [00:00<?, ?it/s]

filtered All: (257, 540)


filtered:   0%|          | 0/100 [00:00<?, ?it/s]

easy All: (232, 540)


easy:   0%|          | 0/100 [00:00<?, ?it/s]

In [5]:
data_set = 'filtered'
for idx_external in range(fixed_values.EXTERNAL_SPLITS_SHUFFLE):
    X_train_smoothed = pd.read_pickle(f'{paths.FFT_DATA_PATH}/smoothed_data/{data_set}/X_{idx_external}_train.pickle')
    X_test_smoothed = pd.read_pickle(f'{paths.FFT_DATA_PATH}/smoothed_data/{data_set}/X_{idx_external}_test.pickle')

    print(f"Smoothed: {X_train_smoothed.shape} {X_test_smoothed.shape}")

Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed: (192, 540) (65, 540)
Smoothed

In [10]:
tt, X_train, X_test, y_train, y_test = common_functions.load_smoothed_data(0, filter_data=True)
print(f"Fold: {X_train.shape} {X_test.shape}")

Fold: (192, 540) (65, 540)


In [11]:
from Preprocessing.FPCA import FPCA
X_train_pca, X_test_pca = FPCA.calculate_FPCA(X_train, X_test, tt, n_components=fixed_values.MAX_DIMENSION)
print(f"PCA: {X_train_pca.shape} {X_test_pca.shape}")

PCA: (192, 100) (65, 100)


In [21]:
data_set = 'easy'
tt, X, y = common_functions.load_data('FFT', filter_data=data_set == 'filtered', easy_data=data_set=='easy')
print(X.shape)
for idx_external in range(fixed_values.EXTERNAL_SPLITS_SHUFFLE):
    X_train_smoothed = pd.read_pickle(f'{paths.FFT_DATA_PATH}/smoothed_data/{data_set}/X_{idx_external}_train.pickle')
    X_test_smoothed = pd.read_pickle(f'{paths.FFT_DATA_PATH}/smoothed_data/{data_set}/X_{idx_external}_test.pickle')

    print(f"Smoothed: {X_train_smoothed.shape} {X_test_smoothed.shape}", end=' ')

    X_train_pca = pd.read_pickle(f"{paths.FPCA_PATH}/../smoothed/{data_set}/PCA_{idx_external}_train.pickle")
    X_test_pca = pd.read_pickle(f"{paths.FPCA_PATH}/../smoothed/{data_set}/PCA_{idx_external}_test.pickle")
    print(f"PCA: {X_train_pca.shape} {X_test_pca.shape}")

    for idx_internal in range(fixed_values.INTERNAL_SPLITS):
        X_train, X_test, y_train, y_test = common_functions.get_fold(X, y, idx_external, idx_internal,strategy='randomsplit')

        print(f"\t Smoothed: {X_train.shape} {X_test.shape}", end=' ')

        X_train_pca = pd.read_pickle(f"{paths.FPCA_PATH}/../smoothed/{data_set}/PCA_{idx_external}_{idx_internal}_train.pickle")
        X_test_pca = pd.read_pickle(f"{paths.FPCA_PATH}/../smoothed/{data_set}/PCA_{idx_external}_{idx_internal}_test.pickle")
        print(f"Internal PCA: {X_train_pca.shape} {X_test_pca.shape}")

(232, 540)
Smoothed: (174, 540) (58, 540) PCA: (174, 100) (58, 100)
	 Smoothed: (156, 540) (18, 540) Internal PCA: (156, 100) (18, 100)
	 Smoothed: (156, 540) (18, 540) Internal PCA: (156, 100) (18, 100)
	 Smoothed: (156, 540) (18, 540) Internal PCA: (156, 100) (18, 100)
	 Smoothed: (156, 540) (18, 540) Internal PCA: (156, 100) (18, 100)
	 Smoothed: (157, 540) (17, 540) Internal PCA: (157, 100) (17, 100)
	 Smoothed: (157, 540) (17, 540) Internal PCA: (157, 100) (17, 100)
	 Smoothed: (157, 540) (17, 540) Internal PCA: (157, 100) (17, 100)
	 Smoothed: (157, 540) (17, 540) Internal PCA: (157, 100) (17, 100)
	 Smoothed: (157, 540) (17, 540) Internal PCA: (157, 100) (17, 100)
	 Smoothed: (157, 540) (17, 540) Internal PCA: (157, 100) (17, 100)
Smoothed: (174, 540) (58, 540) PCA: (174, 100) (58, 100)
	 Smoothed: (156, 540) (18, 540) Internal PCA: (156, 100) (18, 100)
	 Smoothed: (156, 540) (18, 540) Internal PCA: (156, 100) (18, 100)
	 Smoothed: (156, 540) (18, 540) Internal PCA: (156, 100) (