In [7]:
import os
import zipfile
import urllib.request
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.io import arff
import torch
from torch.utils.data import TensorDataset, DataLoader

# Directory where datasets will be downloaded and extracted
DATA_DIR = 'datasets'
os.makedirs(DATA_DIR, exist_ok=True)

def download_dataset(dataset_name, url):
    """
    Downloads and extracts a ZIP dataset to DATA_DIR/dataset_name.
    Returns the extraction path.
    """
    zip_path = os.path.join(DATA_DIR, f"{dataset_name}.zip")
    extract_path = os.path.join(DATA_DIR, dataset_name)

    print(f"Downloading {dataset_name} from {url}...")
    urllib.request.urlretrieve(url, zip_path)

    print(f"Extracting {dataset_name}...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

    os.remove(zip_path)
    print(f"Dataset {dataset_name} extracted to {extract_path}")
    return extract_path


def load_arff_data(file_path):
    """
    Loads an ARFF file and returns a pandas DataFrame.
    """
    print(f"Loading ARFF file: {file_path}")
    data, meta = arff.loadarff(file_path)
    return pd.DataFrame(data)


def preprocess_data(
    train_paths, test_paths,
    batch_size=64,
    valid_size=0.5,
    random_state=42
):
    """
    Loads multivariate time-series from multiple ARFF files,
    normalizes each dimension, splits into train/valid/test,
    and returns PyTorch DataLoaders PLUS pandas DataFrames/Series.

    Returns:
      train_loader, valid_loader, test_loader,
      X_train_df, y_train_df,
      X_valid_df, y_valid_df,
      X_test_df,  y_test_df
    """
    # 1) Load all dimensions
    train_dfs = [load_arff_data(p) for p in train_paths]
    test_dfs  = [load_arff_data(p) for p in test_paths]

    # 2) Extract features (drop label column)
    train_feats = [df.drop(columns=['activity']) for df in train_dfs]
    test_feats  = [df.drop(columns=['activity']) for df in test_dfs]

    # 3) Map string labels to integers
    label_mapping = {
        b'Badminton_Smash':      0,
        b'Badminton_Clear':      1,
        b'Squash_ForehandBoast': 2,
        b'Squash_BackhandBoast': 3
    }
    y_train = train_dfs[0]['activity'].map(label_mapping).values
    y_test0 = test_dfs [0]['activity'].map(label_mapping).values

    # 4) Normalize each dimension independently
    scalers     = [StandardScaler() for _ in train_feats]
    train_norm  = [scalers[i].fit_transform(train_feats[i]) for i in range(len(train_feats))]
    test_norm   = [scalers[i].transform(test_feats[i])     for i in range(len(test_feats))]

    # 5) Stack into a 3D array: (n_samples, time_steps, n_dims)
    X_train_3d = np.stack(train_norm, axis=-1)
    X_test_3d  = np.stack(test_norm,  axis=-1)

    # 6) Split original test into validation and test sets
    X_valid_3d, X_test_3d, y_valid, y_test = train_test_split(
        X_test_3d, y_test0,
        test_size=valid_size,
        random_state=random_state,
        stratify=y_test0
    )

    # 7) Build DataFrame column names: dim{d}_{feature}
    cols = []
    for d, df in enumerate(train_feats, start=1):
        for col in df.columns:
            cols.append(f"dim{d}_{col}")

    def make_df(X3d):
        flat = X3d.reshape(X3d.shape[0], -1)
        return pd.DataFrame(flat, columns=cols)

    X_train_df = make_df(X_train_3d)
    X_valid_df = make_df(X_valid_3d)
    X_test_df  = make_df(X_test_3d)

    y_train_df = pd.Series(y_train, name='activity')
    y_valid_df = pd.Series(y_valid, name='activity')
    y_test_df  = pd.Series(y_test,  name='activity')

    # 8) Convert to PyTorch tensors for DataLoaders
    def to_tensor(X3d, y):
        return (
            torch.tensor(X3d, dtype=torch.float32),
            torch.tensor(y,   dtype=torch.int64)
        )

    Xt, yt = to_tensor(X_train_3d, y_train)
    Xv, yv = to_tensor(X_valid_3d, y_valid)
    Xe, ye = to_tensor(X_test_3d,  y_test)

    # 9) Create DataLoaders
    train_loader = DataLoader(
        TensorDataset(Xt, yt),
        batch_size=batch_size, shuffle=True, drop_last=True
    )
    valid_loader = DataLoader(
        TensorDataset(Xv, yv),
        batch_size=batch_size, shuffle=False, drop_last=True
    )
    test_loader  = DataLoader(
        TensorDataset(Xe, ye),
        batch_size=batch_size, shuffle=False, drop_last=True
    )

    return (
        train_loader, valid_loader, test_loader,
        X_train_df, y_train_df,
        X_valid_df, y_valid_df,
        X_test_df,  y_test_df
    )


if __name__ == "__main__":
    dataset_name = 'RacketSports'
    url          = 'https://timeseriesclassification.com/aeon-toolkit/RacketSports.zip'

    extract_path    = download_dataset(dataset_name, url)
    train_arff_paths = [
        os.path.join(extract_path, f'RacketSportsDimension{i}_TRAIN.arff')
        for i in range(1,7)
    ]
    test_arff_paths  = [
        os.path.join(extract_path, f'RacketSportsDimension{i}_TEST.arff')
        for i in range(1,7)
    ]

    (
        train_loader, valid_loader, test_loader,
        X_train_df, y_train_df,
        X_valid_df, y_valid_df,
        X_test_df,  y_test_df
    ) = preprocess_data(train_arff_paths, test_arff_paths, batch_size=64)

    n_classes = len(pd.unique(y_train_df))
    print(f"Number of classes: {n_classes}")
    print("Shapes:")
    print("  X_train_df:", X_train_df.shape, " y_train:", y_train_df.shape)
    print("  X_valid_df:", X_valid_df.shape, " y_valid:", y_valid_df.shape)
    print("  X_test_df :", X_test_df.shape,  " y_test :", y_test_df.shape)


Downloading RacketSports from https://timeseriesclassification.com/aeon-toolkit/RacketSports.zip...
Extracting RacketSports...
Dataset RacketSports extracted to datasets/RacketSports
Loading ARFF file: datasets/RacketSports/RacketSportsDimension1_TRAIN.arff
Loading ARFF file: datasets/RacketSports/RacketSportsDimension2_TRAIN.arff
Loading ARFF file: datasets/RacketSports/RacketSportsDimension3_TRAIN.arff
Loading ARFF file: datasets/RacketSports/RacketSportsDimension4_TRAIN.arff
Loading ARFF file: datasets/RacketSports/RacketSportsDimension5_TRAIN.arff
Loading ARFF file: datasets/RacketSports/RacketSportsDimension6_TRAIN.arff
Loading ARFF file: datasets/RacketSports/RacketSportsDimension1_TEST.arff
Loading ARFF file: datasets/RacketSports/RacketSportsDimension2_TEST.arff
Loading ARFF file: datasets/RacketSports/RacketSportsDimension3_TEST.arff
Loading ARFF file: datasets/RacketSports/RacketSportsDimension4_TEST.arff
Loading ARFF file: datasets/RacketSports/RacketSportsDimension5_TEST.ar

In [6]:
import embd_fgit as embd
import clasfy_p1 as clasfy
import clasfy_p2 as clasfy2
import plot_umap as plt_um
import pandas as pd
import numpy

In [12]:
#Performing scaling of the datasets
train_sc, val_sc, test_sc = embd.std_scaling(X_train_df, X_valid_df, X_test_df)
train_sct=train_sc
test_sct=test_sc
val_sct=val_sc
ny_train=y_train_df
ny_test=y_test_df
ny_val=y_valid_df
ny_train2=y_train_df-1
ny_test2=y_test_df-1
ny_val2=y_valid_df-1


In [None]:
#discrete wavelet transform
train_wt, val_wt, test_wt = embd.wavelet_embedding(train_sc, val_sc, test_sc)

In [16]:
namem="Racket_wt"

best_params, best_score = clasfy.optimize_LOGRG(train_wt, val_wt, test_wt,ny_train, ny_val, ny_test,namem)
print(best_params, best_score)


[I 2025-04-24 23:25:38,421] A new study created in memory with name: no-name-9ed72870-779e-4eef-97ea-c2929159d986
[I 2025-04-24 23:25:38,440] Trial 0 finished with value: 0.23684210526315788 and parameters: {'C': 7.475923892099031e-09, 'fit_intercept': True, 'solver': 'saga', 'penalty': 'elasticnet', 'l1_ratio': 0.35178783969699035}. Best is trial 0 with value: 0.23684210526315788.
[I 2025-04-24 23:25:38,457] Trial 1 finished with value: 0.2631578947368421 and parameters: {'C': 0.0008231685578053126, 'fit_intercept': False, 'solver': 'saga', 'penalty': 'elasticnet', 'l1_ratio': 0.10630699245657327}. Best is trial 1 with value: 0.2631578947368421.
[I 2025-04-24 23:25:38,798] Trial 2 finished with value: 0.7368421052631579 and parameters: {'C': 16.72008321134883, 'fit_intercept': True, 'solver': 'saga', 'penalty': 'elasticnet', 'l1_ratio': 0.8417710812844543}. Best is trial 2 with value: 0.7368421052631579.
[I 2025-04-24 23:25:38,926] Trial 3 finished with value: 0.7763157894736842 and p

0.020149707794189453  seconds
Classification report saved as ECG5000_pca_classification_report_2025-04-24_23-26-04.txt
{'C': 594.3394528314901, 'fit_intercept': False, 'solver': 'saga', 'penalty': 'elasticnet', 'l1_ratio': 0.9714385934393621} 0.7631578947368421




In [17]:
best_params, best_score = clasfy.optimize_DT(train_wt, val_wt, test_wt,ny_train, ny_val, ny_test,namem)
print(best_params, best_score)


[I 2025-04-24 23:26:27,725] A new study created in memory with name: no-name-ce7b1fb4-700e-4743-b1a1-392fdfb69179
[I 2025-04-24 23:26:27,732] Trial 0 finished with value: 0.618421052631579 and parameters: {'max_depth': 18, 'criterion': 'entropy'}. Best is trial 0 with value: 0.618421052631579.
[I 2025-04-24 23:26:27,738] Trial 1 finished with value: 0.5657894736842105 and parameters: {'max_depth': 13, 'criterion': 'entropy'}. Best is trial 0 with value: 0.618421052631579.
[I 2025-04-24 23:26:27,744] Trial 2 finished with value: 0.5131578947368421 and parameters: {'max_depth': 4, 'criterion': 'entropy'}. Best is trial 0 with value: 0.618421052631579.
[I 2025-04-24 23:26:27,749] Trial 3 finished with value: 0.5789473684210527 and parameters: {'max_depth': 25, 'criterion': 'entropy'}. Best is trial 0 with value: 0.618421052631579.
[I 2025-04-24 23:26:27,754] Trial 4 finished with value: 0.42105263157894735 and parameters: {'max_depth': 2, 'criterion': 'entropy'}. Best is trial 0 with valu

0.004650592803955078  seconds
Classification report saved as ECG5000_pca_classification_report_2025-04-24_23-26-28.txt
{'max_depth': 18, 'criterion': 'gini'} 0.6052631578947368


In [18]:
best_params, best_score = clasfy.optimize_RF(train_wt, val_wt, test_wt,ny_train, ny_val, ny_test,namem)
print(best_params, best_score)

[I 2025-04-24 23:26:43,988] A new study created in memory with name: no-name-7c0a278d-b950-4bf3-9557-de8df2c9d72b
[I 2025-04-24 23:26:44,154] Trial 0 finished with value: 0.8026315789473685 and parameters: {'n_estimators': 106, 'max_depth': 21}. Best is trial 0 with value: 0.8026315789473685.
[I 2025-04-24 23:26:44,418] Trial 1 finished with value: 0.7894736842105263 and parameters: {'n_estimators': 172, 'max_depth': 28}. Best is trial 0 with value: 0.8026315789473685.
[I 2025-04-24 23:26:44,631] Trial 2 finished with value: 0.8026315789473685 and parameters: {'n_estimators': 139, 'max_depth': 24}. Best is trial 0 with value: 0.8026315789473685.
[I 2025-04-24 23:26:44,748] Trial 3 finished with value: 0.7631578947368421 and parameters: {'n_estimators': 79, 'max_depth': 5}. Best is trial 0 with value: 0.8026315789473685.
[I 2025-04-24 23:26:44,803] Trial 4 finished with value: 0.7631578947368421 and parameters: {'n_estimators': 34, 'max_depth': 16}. Best is trial 0 with value: 0.8026315

0.20635604858398438  seconds
Classification report saved as ECG5000_pca_classification_report_2025-04-24_23-27-03.txt
{'n_estimators': 138, 'max_depth': 10} 0.6973684210526315


In [19]:
best_params, best_score = clasfy.optimize_KNN(train_wt, val_wt, test_wt,ny_train, ny_val, ny_test,namem)
print(best_params, best_score)


[I 2025-04-24 23:27:04,165] A new study created in memory with name: no-name-0a52be4c-1f83-48ab-a43b-fbde3d259afa
[I 2025-04-24 23:27:04,222] Trial 0 finished with value: 0.8157894736842105 and parameters: {'n_neighbors': 9, 'weights': 'distance', 'algorithm': 'brute'}. Best is trial 0 with value: 0.8157894736842105.
[I 2025-04-24 23:27:04,227] Trial 1 finished with value: 0.7631578947368421 and parameters: {'n_neighbors': 11, 'weights': 'uniform', 'algorithm': 'auto'}. Best is trial 0 with value: 0.8157894736842105.
[I 2025-04-24 23:27:04,231] Trial 2 finished with value: 0.8289473684210527 and parameters: {'n_neighbors': 10, 'weights': 'distance', 'algorithm': 'ball_tree'}. Best is trial 2 with value: 0.8289473684210527.
[I 2025-04-24 23:27:04,240] Trial 3 finished with value: 0.7105263157894737 and parameters: {'n_neighbors': 16, 'weights': 'uniform', 'algorithm': 'kd_tree'}. Best is trial 2 with value: 0.8289473684210527.
[I 2025-04-24 23:27:04,248] Trial 4 finished with value: 0.7

0.002549886703491211  seconds
Classification report saved as ECG5000_pca_classification_report_2025-04-24_23-27-05.txt
{'n_neighbors': 2, 'weights': 'distance', 'algorithm': 'brute'} 0.7631578947368421


In [20]:

best_params, best_score = clasfy.optimize_SVM(train_wt, val_wt, test_wt,ny_train, ny_val, ny_test,namem)
print(best_params, best_score)


[I 2025-04-24 23:27:24,938] A new study created in memory with name: no-name-7a26d96b-6edd-48ab-9262-3720eb8f4bb8
[I 2025-04-24 23:27:24,944] Trial 0 finished with value: 0.7631578947368421 and parameters: {'C': 0.1, 'kernel': 'linear', 'degree': 5, 'gamma': 'scale'}. Best is trial 0 with value: 0.7631578947368421.
[I 2025-04-24 23:27:24,950] Trial 1 finished with value: 0.8289473684210527 and parameters: {'C': 1, 'kernel': 'rbf', 'degree': 2, 'gamma': 'scale'}. Best is trial 1 with value: 0.8289473684210527.
[I 2025-04-24 23:27:24,956] Trial 2 finished with value: 0.8289473684210527 and parameters: {'C': 1, 'kernel': 'rbf', 'degree': 5, 'gamma': 'scale'}. Best is trial 1 with value: 0.8289473684210527.
[I 2025-04-24 23:27:24,961] Trial 3 finished with value: 0.2894736842105263 and parameters: {'C': 0.1, 'kernel': 'rbf', 'degree': 4, 'gamma': 'scale'}. Best is trial 1 with value: 0.8289473684210527.
[I 2025-04-24 23:27:24,967] Trial 4 finished with value: 0.27631578947368424 and parame

0.00477147102355957  seconds
Classification report saved as ECG5000_pca_classification_report_2025-04-24_23-27-26.txt
{'C': 1, 'kernel': 'rbf', 'degree': 2, 'gamma': 'scale'} 0.7763157894736842


In [21]:
best_params, best_score = clasfy2.optimize_NB(train_wt, val_wt, test_wt,ny_train, ny_val, ny_test,namem)
print(best_params, best_score)


[I 2025-04-24 23:27:39,001] A new study created in memory with name: no-name-a643eea9-9fc1-4bac-b8a1-036c97f60a4b
[I 2025-04-24 23:27:39,005] Trial 0 finished with value: 0.6710526315789473 and parameters: {'var_smoothing': 0.002441710749329686}. Best is trial 0 with value: 0.6710526315789473.
[I 2025-04-24 23:27:39,009] Trial 1 finished with value: 0.6710526315789473 and parameters: {'var_smoothing': 2.1368885191526899e-07}. Best is trial 0 with value: 0.6710526315789473.
[I 2025-04-24 23:27:39,013] Trial 2 finished with value: 0.6710526315789473 and parameters: {'var_smoothing': 1.1710012739228229e-07}. Best is trial 0 with value: 0.6710526315789473.
[I 2025-04-24 23:27:39,017] Trial 3 finished with value: 0.6710526315789473 and parameters: {'var_smoothing': 0.0003923685898097412}. Best is trial 0 with value: 0.6710526315789473.
[I 2025-04-24 23:27:39,020] Trial 4 finished with value: 0.6710526315789473 and parameters: {'var_smoothing': 0.0032222994207822373}. Best is trial 0 with va

Best hyperparameters:  {'var_smoothing': 0.007926354296367755}
Best validation accuracy:  0.6842105263157895
Test accuracy: 0.7105263157894737
Time taken: 0.001981019973754883 seconds
Classification report saved as ECG5000_pca_classification_report_2025-04-24_23-27-39.txt
{'var_smoothing': 0.007926354296367755} 0.7105263157894737
