In [None]:
import os
import zipfile
import urllib.request
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.io import arff

DATA_DIR = 'datasets'
os.makedirs(DATA_DIR, exist_ok=True)

def download_dataset(dataset_name, url):
    zip_path    = os.path.join(DATA_DIR, f"{dataset_name}.zip")
    extract_dir = os.path.join(DATA_DIR, dataset_name)
    urllib.request.urlretrieve(url, zip_path)
    with zipfile.ZipFile(zip_path, 'r') as zp:
        zp.extractall(extract_dir)
    os.remove(zip_path)
    return extract_dir

def load_arff_data(file_path):
    raw, meta = arff.loadarff(file_path)
    return pd.DataFrame(raw)

def preprocess_data(train_df, test_df, valid_size=0.5, random_state=42):
    # --- 1) Separate features & labels ---
    X_train_df = train_df.drop(columns=['target'])
    y_train_df = train_df['target'].astype(int)

    X_temp_df  = test_df.drop(columns=['target'])
    y_temp_df  = test_df['target'].astype(int)

    # --- 2) Split temp into validation & test ---
    X_valid_df, X_test_df, y_valid_df, y_test_df = train_test_split(
        X_temp_df, y_temp_df,
        test_size=valid_size,
        random_state=random_state,
        stratify=y_temp_df
    )

    # --- 3) Normalize all features (fit on train only) ---
    scaler = StandardScaler().fit(X_train_df)
    X_train_df = pd.DataFrame(
        scaler.transform(X_train_df),
        columns=X_train_df.columns,
        index=X_train_df.index
    )
    X_valid_df = pd.DataFrame(
        scaler.transform(X_valid_df),
        columns=X_valid_df.columns,
        index=X_valid_df.index
    )
    X_test_df = pd.DataFrame(
        scaler.transform(X_test_df),
        columns=X_test_df.columns,
        index=X_test_df.index
    )

    # --- 4) Return six DataFrames/Series ---
    return X_train_df, y_train_df, X_valid_df, y_valid_df, X_test_df, y_test_df

if __name__ == "__main__":
    dataset_name = 'MelbournePedestrian'
    url = 'https://timeseriesclassification.com/aeon-toolkit/MelbournePedestrian.zip'

    path = download_dataset(dataset_name, url)
    train_df = load_arff_data(os.path.join(path, f"{dataset_name}_TRAIN.arff"))
    test_df  = load_arff_data(os.path.join(path, f"{dataset_name}_TEST.arff"))

    X_train, y_train, X_valid, y_valid, X_test, y_test = preprocess_data(train_df, test_df)

    print("Shapes:")
    print("  X_train:", X_train.shape, " y_train:", y_train.shape)
    print("  X_valid:", X_valid.shape, " y_valid:", y_valid.shape)
    print("  X_test: ", X_test.shape,  " y_test: ", y_test.shape)


Shapes:
  X_train: (1194, 24)  y_train: (1194,)
  X_valid: (1219, 24)  y_valid: (1219,)
  X_test:  (1220, 24)  y_test:  (1220,)


In [1]:
import time_series_embeddings1 as embd
import clasfy_p1 as clasfy
import clasfy_p2 as clasfy2
import plot_umap as plt_um
import pandas as pd
import numpy

In [None]:
from sklearn.preprocessing import StandardScaler # Make sure StandardScaler is imported

train_sc, val_sc, test_sc = std_scaling(X_train.numpy(), X_valid.numpy(), X_test.numpy()) # Remove extra indent

In [6]:
#Performing scaling of the datasets
train_sc, val_sc, test_sc = embd.std_scaling(X_train, X_valid, X_test)
#without overlapping windows
train_sct=train_sc
test_sct=test_sc
val_sct=val_sc
ny_train=y_train
ny_test=y_test
ny_val=y_valid
ny_train2=y_train-1
ny_test2=y_test-1
ny_val2=y_valid-1


In [8]:
import importlib
import nnclr_embdtransformer as nn          # your module as first imported
importlib.reload(nn)

train_nn_transformer, val_nn_transformer, test_nn_transformer, train_time, inference_time = nn.nnclr_transformer_embedding_with_timing(train_sc, val_sc, test_sc,y_train-1,y_valid-1,y_test-1,24,10)

x_train_df shape: (1138, 24)
y_train    shape: (1138,)
x_val_df   shape: (1159, 24)
y_val      shape: (1159,)
x_test_df  shape: (1160, 24)
y_test     shape: (1160,)


None


None
Epoch 1/20
36/36 - 20s - 542ms/step - c_acc: 0.0135 - c_loss: 3.9121 - p_acc: 0.1042 - p_loss: nan - r_acc: 0.0126 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 2/20
36/36 - 1s - 27ms/step - c_acc: 0.0151 - c_loss: 3.9120 - p_acc: 0.1042 - p_loss: nan - r_acc: 0.0165 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 3/20
36/36 - 1s - 27ms/step - c_acc: 0.0120 - c_loss: 3.9122 - p_acc: 0.1042 - p_loss: nan - r_acc: 0.0154 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 4/20
36/36 - 1s - 26ms/step - c_acc: 0.0168 - c_loss: 3.9120 - p_acc: 0.1042 - p_loss: nan - r_acc: 0.0184 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 5/20
36/36 - 1s - 26ms/step - c_acc: 0.0177 - c_loss: 3.9120 - p_acc: 0.1042 - p_loss: nan - r_acc: 0.0169 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 6/20
36/36 - 1s - 35ms/step - c_acc: 0.0146 - c_loss: 3.9121 - p_acc: 0.1042 - p_loss: nan - r_acc: 0.0145 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 7/20
36/36 - 1s - 40ms/step - c_acc: 0.0183 - c_loss: 3.9119 - p_acc: 0.1042 - p_

In [9]:
namem="melbourne-transformer"

best_params, best_score = clasfy.optimize_LOGRG(train_nn_transformer, val_nn_transformer, test_nn_transformer,ny_train, ny_val, ny_test,namem)
print(best_params, best_score)


[I 2025-07-07 15:40:43,414] A new study created in memory with name: no-name-c66c2547-d24f-4555-b863-f5dadc3e4f91
[I 2025-07-07 15:40:43,449] Trial 0 finished with value: 0.10440034512510785 and parameters: {'C': 2.444823914909252e-09, 'fit_intercept': False, 'solver': 'saga', 'penalty': 'elasticnet', 'l1_ratio': 0.76002539133061}. Best is trial 0 with value: 0.10440034512510785.
[I 2025-07-07 15:40:43,487] Trial 1 finished with value: 0.10440034512510785 and parameters: {'C': 3.5164607088451946e-08, 'fit_intercept': False, 'solver': 'saga', 'penalty': 'elasticnet', 'l1_ratio': 0.8565167385363067}. Best is trial 0 with value: 0.10440034512510785.
[I 2025-07-07 15:40:43,572] Trial 2 finished with value: 0.10440034512510785 and parameters: {'C': 0.000539581503347045, 'fit_intercept': False, 'solver': 'saga', 'penalty': 'elasticnet', 'l1_ratio': 0.8310887627026855}. Best is trial 0 with value: 0.10440034512510785.
[I 2025-07-07 15:40:43,604] Trial 3 finished with value: 0.1044003451251078

0.8035569190979004  seconds
Classification report saved as melbourne-trans_classification_report_2025-07-07_16-21-30.txt
{'C': 98.23338852003522, 'fit_intercept': True, 'solver': 'saga', 'penalty': 'elasticnet', 'l1_ratio': 0.9582021266211158} 0.27413793103448275


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
best_params, best_score = clasfy.optimize_DT(train_nn_transformer, val_nn_transformer, test_nn_transformer,ny_train, ny_val, ny_test,namem)
print(best_params, best_score)


[I 2025-07-07 16:21:30,117] A new study created in memory with name: no-name-ceeeb815-5288-4df7-a2f4-3bb7b994a72d
[I 2025-07-07 16:21:30,191] Trial 0 finished with value: 0.7627264883520276 and parameters: {'max_depth': 29, 'criterion': 'gini'}. Best is trial 0 with value: 0.7627264883520276.
[I 2025-07-07 16:21:30,253] Trial 1 finished with value: 0.7773943054357204 and parameters: {'max_depth': 18, 'criterion': 'entropy'}. Best is trial 1 with value: 0.7773943054357204.
[I 2025-07-07 16:21:30,314] Trial 2 finished with value: 0.7730802415875755 and parameters: {'max_depth': 14, 'criterion': 'log_loss'}. Best is trial 1 with value: 0.7773943054357204.
[I 2025-07-07 16:21:30,361] Trial 3 finished with value: 0.7359792924935289 and parameters: {'max_depth': 5, 'criterion': 'entropy'}. Best is trial 1 with value: 0.7773943054357204.
[I 2025-07-07 16:21:30,426] Trial 4 finished with value: 0.7834339948231234 and parameters: {'max_depth': 14, 'criterion': 'entropy'}. Best is trial 4 with v

0.0412600040435791  seconds
Classification report saved as melbourne-trans_classification_report_2025-07-07_16-21-35.txt
{'max_depth': 24, 'criterion': 'log_loss'} 0.7517241379310344


In [11]:
best_params, best_score = clasfy.optimize_RF(train_nn_transformer, val_nn_transformer, test_nn_transformer,ny_train, ny_val, ny_test,namem)
print(best_params, best_score)

[I 2025-07-07 16:21:35,326] A new study created in memory with name: no-name-3ffb032a-9dd9-4792-91bd-2128095b1eeb
[I 2025-07-07 16:21:35,932] Trial 0 finished with value: 0.8101811906816221 and parameters: {'n_estimators': 160, 'max_depth': 22}. Best is trial 0 with value: 0.8101811906816221.
[I 2025-07-07 16:21:36,348] Trial 1 finished with value: 0.7023295944779983 and parameters: {'n_estimators': 174, 'max_depth': 4}. Best is trial 0 with value: 0.8101811906816221.
[I 2025-07-07 16:21:36,507] Trial 2 finished with value: 0.8050043140638481 and parameters: {'n_estimators': 39, 'max_depth': 20}. Best is trial 0 with value: 0.8101811906816221.
[I 2025-07-07 16:21:36,766] Trial 3 finished with value: 0.5867126833477135 and parameters: {'n_estimators': 136, 'max_depth': 2}. Best is trial 0 with value: 0.8101811906816221.
[I 2025-07-07 16:21:37,466] Trial 4 finished with value: 0.811044003451251 and parameters: {'n_estimators': 187, 'max_depth': 11}. Best is trial 4 with value: 0.81104400

0.5325977802276611  seconds
Classification report saved as melbourne-trans_classification_report_2025-07-07_16-22-15.txt
{'n_estimators': 141, 'max_depth': 21} 0.7939655172413793


In [14]:
best_params, best_score = clasfy2.optimize_NB(train_nn_transformer, val_nn_transformer, test_nn_transformer, ny_train, ny_val, ny_test,namem)
print(best_params, best_score)


[I 2025-07-07 16:22:35,310] A new study created in memory with name: no-name-14c79f9c-d61f-476c-bb18-6e6cdb3fd595
[I 2025-07-07 16:22:35,323] Trial 0 finished with value: 0.5314926660914582 and parameters: {'var_smoothing': 1.615341962004123e-12}. Best is trial 0 with value: 0.5314926660914582.
[I 2025-07-07 16:22:35,334] Trial 1 finished with value: 0.5314926660914582 and parameters: {'var_smoothing': 7.916219835834229e-08}. Best is trial 0 with value: 0.5314926660914582.
[I 2025-07-07 16:22:35,344] Trial 2 finished with value: 0.5306298533218292 and parameters: {'var_smoothing': 6.7628898911476615e-06}. Best is trial 0 with value: 0.5314926660914582.
[I 2025-07-07 16:22:35,353] Trial 3 finished with value: 0.5306298533218292 and parameters: {'var_smoothing': 1.3554400447231215e-06}. Best is trial 0 with value: 0.5314926660914582.
[I 2025-07-07 16:22:35,362] Trial 4 finished with value: 0.5314926660914582 and parameters: {'var_smoothing': 8.937414778202161e-11}. Best is trial 0 with v

Best hyperparameters:  {'var_smoothing': 1.615341962004123e-12}
Best validation accuracy:  0.5314926660914582
Test accuracy: 0.5431034482758621
Time taken: 0.00912165641784668 seconds
Classification report saved as melbourne-trans_classification_report_2025-07-07_16-22-36.txt
{'var_smoothing': 1.615341962004123e-12} 0.5431034482758621
