In [None]:
import os
import zipfile
import urllib.request
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.io import arff

DATA_DIR = 'datasets'
os.makedirs(DATA_DIR, exist_ok=True)

def download_dataset(dataset_name, url):
    zip_path    = os.path.join(DATA_DIR, f"{dataset_name}.zip")
    extract_dir = os.path.join(DATA_DIR, dataset_name)
    urllib.request.urlretrieve(url, zip_path)
    with zipfile.ZipFile(zip_path, 'r') as zp:
        zp.extractall(extract_dir)
    os.remove(zip_path)
    return extract_dir

def load_arff_data(file_path):
    raw, meta = arff.loadarff(file_path)
    return pd.DataFrame(raw)

def preprocess_data(train_df, test_df, valid_size=0.5, random_state=42):
    # --- 1) Separate features & labels ---
    X_train_df = train_df.drop(columns=['target'])
    y_train_df = train_df['target'].astype(int)

    X_temp_df  = test_df.drop(columns=['target'])
    y_temp_df  = test_df['target'].astype(int)

    # --- 2) Split temp into validation & test ---
    X_valid_df, X_test_df, y_valid_df, y_test_df = train_test_split(
        X_temp_df, y_temp_df,
        test_size=valid_size,
        random_state=random_state,
        stratify=y_temp_df
    )

    # --- 3) Normalize all features (fit on train only) ---
    scaler = StandardScaler().fit(X_train_df)
    X_train_df = pd.DataFrame(
        scaler.transform(X_train_df),
        columns=X_train_df.columns,
        index=X_train_df.index
    )
    X_valid_df = pd.DataFrame(
        scaler.transform(X_valid_df),
        columns=X_valid_df.columns,
        index=X_valid_df.index
    )
    X_test_df = pd.DataFrame(
        scaler.transform(X_test_df),
        columns=X_test_df.columns,
        index=X_test_df.index
    )

    # --- 4) Return six DataFrames/Series ---
    return X_train_df, y_train_df, X_valid_df, y_valid_df, X_test_df, y_test_df

if __name__ == "__main__":
    dataset_name = 'MelbournePedestrian'
    url = 'https://timeseriesclassification.com/aeon-toolkit/MelbournePedestrian.zip'

    path = download_dataset(dataset_name, url)
    train_df = load_arff_data(os.path.join(path, f"{dataset_name}_TRAIN.arff"))
    test_df  = load_arff_data(os.path.join(path, f"{dataset_name}_TEST.arff"))

    X_train, y_train, X_valid, y_valid, X_test, y_test = preprocess_data(train_df, test_df)

    print("Shapes:")
    print("  X_train:", X_train.shape, " y_train:", y_train.shape)
    print("  X_valid:", X_valid.shape, " y_valid:", y_valid.shape)
    print("  X_test: ", X_test.shape,  " y_test: ", y_test.shape)


Shapes:
  X_train: (1194, 24)  y_train: (1194,)
  X_valid: (1219, 24)  y_valid: (1219,)
  X_test:  (1220, 24)  y_test:  (1220,)


In [2]:
import time_series_embeddings1 as embd
import clasfy_p1 as clasfy
import clasfy_p2 as clasfy2
import plot_umap as plt_um
import pandas as pd
import numpy

In [3]:
#Performing scaling of the datasets
train_sc, val_sc, test_sc = embd.std_scaling(X_train, X_valid, X_test)
#without overlapping windows
train_sct=train_sc
test_sct=test_sc
val_sct=val_sc
ny_train=y_train
ny_test=y_test
ny_val=y_valid
ny_train2=y_train-1
ny_test2=y_test-1
ny_val2=y_valid-1


In [6]:
#nnclr_cnn embedding

import nnclr_embdcnn as nn
train_nn_cnn, val_nn_cnn, test_nn_cnn, train_time, inference_time = nn.nnclr_cnn_embedding_with_timing(train_sc, val_sc, test_sc,y_train-1,y_valid-1,y_test-1,64,7)

x_train_df shape: (1138, 24)
y_train    shape: (1138,)
x_val_df   shape: (1159, 24)
y_val      shape: (1159,)
x_test_df  shape: (1160, 24)
y_test     shape: (1160,)


None


None
Epoch 1/200
36/36 - 10s - 266ms/step - c_acc: 0.0600 - c_loss: 3.4301 - p_acc: 0.1007 - p_loss: nan - r_acc: 0.0836 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 2/200
36/36 - 1s - 15ms/step - c_acc: 0.0701 - c_loss: 3.4647 - p_acc: 0.1042 - p_loss: nan - r_acc: 0.1157 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 3/200
36/36 - 1s - 15ms/step - c_acc: 0.0924 - c_loss: 3.2749 - p_acc: 0.1042 - p_loss: nan - r_acc: 0.1480 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 4/200
36/36 - 1s - 15ms/step - c_acc: 0.1172 - c_loss: 3.2857 - p_acc: 0.1042 - p_loss: nan - r_acc: 0.1534 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 5/200
36/36 - 1s - 14ms/step - c_acc: 0.1271 - c_loss: 3.2569 - p_acc: 0.1042 - p_loss: nan - r_acc: 0.1875 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 6/200
36/36 - 1s - 14ms/step - c_acc: 0.1293 - c_loss: 3.0434 - p_acc: 0.1042 - p_loss: nan - r_acc: 0.2151 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 7/200
36/36 - 1s - 14ms/step - c_acc: 0.1341 - c_loss: 2.8873 - p_acc: 0.10

In [None]:
#nnclr_lstm embedding
#for the paper, 100 finetuning and training epochs were chosen, but here the example is done for 20 epochs
# in a new cell, before you call anything from the module:
import importlib
import nnclr_embdcnn4 as nn          # your module as first imported
importlib.reload(nn)

train_nn_cnn, val_nn_cnn, test_nn_cnn, train_time, inference_time = nn.nnclr_cnn_embedding_with_timing(train_sc, val_sc, test_sc,y_train-1,y_valid-1,y_test-1,24,10)

x_train_df shape: (1138, 24)
y_train    shape: (1138,)
x_val_df   shape: (1159, 24)
y_val      shape: (1159,)
x_test_df  shape: (1160, 24)
y_test     shape: (1160,)


None


None
Epoch 1/20
36/36 - 20s - 542ms/step - c_acc: 0.0135 - c_loss: 3.9121 - p_acc: 0.1042 - p_loss: nan - r_acc: 0.0126 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 2/20
36/36 - 1s - 27ms/step - c_acc: 0.0151 - c_loss: 3.9120 - p_acc: 0.1042 - p_loss: nan - r_acc: 0.0165 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 3/20
36/36 - 1s - 27ms/step - c_acc: 0.0120 - c_loss: 3.9122 - p_acc: 0.1042 - p_loss: nan - r_acc: 0.0154 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 4/20
36/36 - 1s - 26ms/step - c_acc: 0.0168 - c_loss: 3.9120 - p_acc: 0.1042 - p_loss: nan - r_acc: 0.0184 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 5/20
36/36 - 1s - 26ms/step - c_acc: 0.0177 - c_loss: 3.9120 - p_acc: 0.1042 - p_loss: nan - r_acc: 0.0169 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 6/20
36/36 - 1s - 35ms/step - c_acc: 0.0146 - c_loss: 3.9121 - p_acc: 0.1042 - p_loss: nan - r_acc: 0.0145 - val_p_acc: 0.1034 - val_p_loss: nan
Epoch 7/20
36/36 - 1s - 40ms/step - c_acc: 0.0183 - c_loss: 3.9119 - p_acc: 0.1042 - p_

In [None]:
namem="melbourne-cnn"

best_params, best_score = clasfy.optimize_LOGRG(train_nn_cnn, val_nn_cnn, test_nn_cnn,ny_train, ny_val, ny_test,namem)
print(best_params, best_score)


[I 2025-07-07 22:06:54,026] A new study created in memory with name: no-name-beb32cda-5701-4974-92c3-a3dd64755161
[I 2025-07-07 22:06:54,078] Trial 0 finished with value: 0.10526315789473684 and parameters: {'C': 0.0008766103922919282, 'fit_intercept': True, 'solver': 'saga', 'penalty': 'elasticnet', 'l1_ratio': 0.4449383923749495}. Best is trial 0 with value: 0.10526315789473684.
[I 2025-07-07 22:07:19,544] Trial 1 finished with value: 0.8593615185504746 and parameters: {'C': 2.814619695647055, 'fit_intercept': True, 'solver': 'saga', 'penalty': 'elasticnet', 'l1_ratio': 0.8281466941232535}. Best is trial 1 with value: 0.8593615185504746.
[I 2025-07-07 22:07:23,908] Trial 2 finished with value: 0.7601380500431406 and parameters: {'C': 0.2521323307212541, 'fit_intercept': False, 'solver': 'saga', 'penalty': 'elasticnet', 'l1_ratio': 0.06689983548067069}. Best is trial 1 with value: 0.8593615185504746.
[I 2025-07-07 22:07:24,568] Trial 3 finished with value: 0.1363244176013805 and param