In [1]:
from typing import Tuple, List, Union, Any, Optional, Dict, Literal, Callable
import time
import os
import sys
sys.path.append(os.path.dirname(os.getcwd()))
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))

import numpy as np
import aeon
import torch
from torch import Tensor
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from aeon.datasets.tsc_datasets import univariate_equal_length, multivariate_equal_length
univariate_equal_length = sorted(list(univariate_equal_length))
multivariate_equal_length = sorted(list(multivariate_equal_length))
from aeon.datasets import load_classification
from sklearn.linear_model import RidgeCV, RidgeClassifierCV
from sklearn.metrics import accuracy_score
from tqdm import tqdm

from utils.utils import print_name, print_shape
from preprocessing.stream_transforms import normalize_mean_std_traindata, normalize_streams, augment_time, add_basepoint_zero
from random_sig_fourier import SigTensorisedRandProj
from signature import SigTransform, LogSigTransform
from features.base import TimeseriesFeatureExtractor, TabularTimeseriesFeatures, RandomGuesser
from randomized_sig import RandomizedSignature
from rocket_wrappers import RocketWrapper, MiniRocketWrapper, MultiRocketWrapper
from pytorch_based.features.multirocket import MultiRocketOwn

np.set_printoptions(precision=3, threshold=5) # Print options



In [2]:
#############################################
#######          Dataset Code         #######
#############################################

def get_aeon_dataset(
        dataset_name:str, 
        extract_path = "/home/nikita/hdd/Data/TSC/",
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        ):
    """Loads a dataset from the UCR/UEA archive using 
    the aeon library.

    Args:
        dataset_name (str): Name of the dataset

    Returns:
        Tuple: 4-tuple of the form (X_train, y_train, X_test, y_test)
    """
    X_train, y_train = load_classification(dataset_name, split="train", extract_path=extract_path)
    X_test, y_test = load_classification(dataset_name, split="test", extract_path=extract_path)
    X_train = torch.from_numpy(X_train.transpose(0,2,1)).to(device).float().detach()
    X_test = torch.from_numpy(X_test.transpose(0,2,1)).to(device).float().detach()
    return X_train, y_train, X_test, y_test

In [3]:
# def test_get_data(idx: int = 17):
#     name = univariate_equal_length[idx]
#     X_train, y_train, X_test, y_test = get_aeon_dataset(name, device="cpu")
#     print("Dataset:", name)
#     print("idx:", idx)
#     print("X_train", X_train.shape)
#     print("X_test", X_test.shape)

# for i in range(20):
#     test_get_data(i)
#     print("\n")

In [4]:
##################################
####  Linear Model (Ridge)  ######
##################################

def train_and_test_linear(
        train_X, train_y, test_X, test_y,
        feat_extractor: TimeseriesFeatureExtractor,
        apply_augmentation:bool=True,
        normalize_features:bool=True,
        clf=RidgeClassifierCV(alphas=np.logspace(-1, 5, 20))
    ):
    # augment data
    print(train_X.shape)
    if apply_augmentation:
        train_X, test_X = normalize_mean_std_traindata(train_X, test_X)
        train_X = add_basepoint_zero(train_X)
        train_X = augment_time(train_X)
        test_X = add_basepoint_zero(test_X)
        test_X = augment_time(test_X)

    # fit transformer
    t0 = time.time()
    with torch.no_grad():
        feat_extractor.fit(train_X)
        feat_train_X = feat_extractor.transform(train_X).cpu().numpy()
        feat_test_X = feat_extractor.transform(test_X).cpu().numpy()
        print("feat_train_X", feat_train_X.shape)
        if normalize_features:
            feat_train_X, feat_test_X = normalize_mean_std_traindata(feat_train_X, feat_test_X)


    # feed into linear classifier
    t1 = time.time()
    clf.fit(feat_train_X, train_y)
    t2 = time.time()

    # predict
    pred = clf.predict(feat_test_X)
    test_acc = accuracy_score(test_y, pred)
    train_acc = accuracy_score(train_y, clf.predict(feat_train_X))
    alpha = clf.alpha_ if hasattr(clf, 'alpha_') else None
    return train_acc, test_acc, alpha, t1-t0, t2-t1

In [5]:
def run_allModels_singleDataset(X_train, y_train, X_test, y_test):
    max_batch = 32
    trunc_level = 4
    n_features = 1344
    n_rocket_features = 20000

    models = [
        ["Random Guesser", RandomGuesser()],
        ["Tabular", TabularTimeseriesFeatures()],
        # # ["Sig", SigTransform(trunc_level, max_batch)],
        # # ["Log Sig", LogSigTransform(trunc_level, max_batch)],
        # ["Randomized Signature", RandomizedSignature(
        #     n_features,
        #     activation = "tanh",
        #     max_batch=10,
        #     )],
        # ["TRP", SigTensorisedRandProj(
        #     trunc_level,
        #     n_features,
        #     only_last=True,
        #     method="linear",
        #     max_batch=max_batch,
        #     )],
        # ["TRP rbf", SigTensorisedRandProj(
        #     trunc_level,
        #     n_features,
        #     only_last=True,
        #     method="RBF",
        #     sigma_rbf=1.0,
        #     max_batch=max_batch,
        #     )],
        # ["concat TRP", SigTensorisedRandProj(
        #     trunc_level,
        #     n_features // (trunc_level-1),
        #     only_last=False,
        #     method="linear",
        #     max_batch=max_batch,
        #     )],
        # ["concat TRP rbf", SigTensorisedRandProj(
        #     trunc_level,
        #     n_features // (trunc_level-1),
        #     only_last=False,
        #     method="RBF",
        #     sigma_rbf=1.0,
        #     max_batch=max_batch,
            # )],
        # ["Rocket", RocketWrapper(
        #     n_rocket_features
        #     )],
        ["MiniRocket", MiniRocketWrapper(
            n_rocket_features
            )],
        ["MultiRocket", MultiRocketWrapper(
            n_rocket_features
            )],
        ["MyOwnMultiRocket", MultiRocketOwn(
            n_rocket_features,
            max_batch=32,
            )],
        ]

    # Run experiments
    model_names = [name for (name, _) in models]
    results_ridge = []
    for name, model in models:
        print("name", name)
        result = train_and_test_linear(
            X_train, y_train, X_test, y_test, model
            )
        results_ridge.append(result)
        print()
    
    return model_names, results_ridge

In [6]:
def run_dataset(dataset_name:str):
    X_train, y_train, X_test, y_test = get_aeon_dataset(dataset_name)
    X_train, X_test = normalize_streams(X_train, X_test, max_T=1000)
    model_names, results_ridge = run_allModels_singleDataset(X_train, y_train, X_test, y_test)
    return model_names, results_ridge

#model_names, results_ridge = run_dataset(univariate_equal_length[9])

In [7]:
from rocket_wrappers import RocketWrapper, MiniRocketWrapper, MultiRocketWrapper

for n_features in [100, 500, 1000, 2000, 1344, 4000, 6000, 8000, 10000]:
    rocket = RocketWrapper(n_features)
    mini_rocket = MiniRocketWrapper(n_features)
    multi_rocket = MultiRocketWrapper(n_features)
    own = MultiRocketOwn(n_features)
    
    print("n_features:", n_features)
    
    # Generate random input
    input_shape = (2, 150, 1)  # N T D
    X = torch.randn(input_shape)
    
    # Fit to random input
    print(rocket.fit_transform(X).shape)
    print(mini_rocket.fit_transform(X).shape)
    print(multi_rocket.fit_transform(X).shape)
    print(own.fit_transform(X).shape)
    print()

n_features: 100
torch.Size([2, 100])
torch.Size([2, 84])
torch.Size([2, 672])
torch.Size([2, 100])

n_features: 500
torch.Size([2, 500])
torch.Size([2, 420])
torch.Size([2, 672])
torch.Size([2, 500])

n_features: 1000
torch.Size([2, 1000])
torch.Size([2, 924])
torch.Size([2, 672])
torch.Size([2, 1000])

n_features: 2000
torch.Size([2, 2000])
torch.Size([2, 1932])
torch.Size([2, 1344])
torch.Size([2, 2000])

n_features: 1344
torch.Size([2, 1344])
torch.Size([2, 1344])
torch.Size([2, 1344])
torch.Size([2, 1340])

n_features: 4000
torch.Size([2, 4000])
torch.Size([2, 3948])
torch.Size([2, 3360])
torch.Size([2, 4000])

n_features: 6000
torch.Size([2, 6000])
torch.Size([2, 5964])
torch.Size([2, 5376])
torch.Size([2, 6000])

n_features: 8000
torch.Size([2, 8000])
torch.Size([2, 7980])
torch.Size([2, 7392])
torch.Size([2, 8000])

n_features: 10000
torch.Size([2, 10000])
torch.Size([2, 9996])
torch.Size([2, 9408])
torch.Size([2, 10000])



In [8]:
def run_allModels_allData(datasets: List[str]):
    #run experiments
    experiments = {}
    failed = {}
    for dataset_name in tqdm(datasets):
        t0 = time.time()
        # try:
        print(dataset_name)
        X_train, y_train, X_test, y_test = get_aeon_dataset(dataset_name)
        X_train, X_test = normalize_streams(X_train, X_test, max_T=1000)
        N_train = X_train.shape[0]
        N_test = X_test.shape[0]
        T = X_train.shape[1]
        D = X_train.shape[2]
        if N_train<=2000 and D<=20:
            results = run_allModels_singleDataset(
                X_train, y_train, X_test, y_test
                )
            experiments[dataset_name] = results
        # except Exception as e:
        #     print(f"Error: {e}")
        #     failed[dataset_name] = e
        print("Elapsed time", time.time()-t0)
    
    #parse results
    # Define the attributes and methods
    attributes = ["ACC_train", "ACC_test", "alpha", "time_transform", "time_fit"]
    
    # Extract model_names from d_res
    model_names = next(iter(experiments.values()))[0]

    # Create and save DataFrames for each attribute and method
    for attribute in attributes:
        df = pd.DataFrame(columns=model_names)
        for dataset_name, (model_names, results_ridge) in experiments.items():
            values = [res[attributes.index(attribute)] for res in results_ridge]
            df.loc[dataset_name] = values

        # Save the DataFrame
        print(df)
        df.to_pickle(f"TSC_{attribute}_results.pkl")

    return experiments, failed

In [9]:
used_by_paper = [
    "EthanolConcentration",
    #"FaceDetection",
    #"Handwriting",
    "Heartbeat",
    #"JapaneseVowels", #unequal length
    #"PEMS-SF",
    #"SelfRegulationSCP1",
    #"SelfRegulationSCP2",
    #"SpokenArabicDigits", #unequal length
    #"UWaveGestureLibrary",
]


# run_allModels_allData(used_by_paper)
#run_allModels_allData(used_by_paper)
run_allModels_allData(univariate_equal_length[0:10])

  0%|          | 0/20 [00:00<?, ?it/s]

ACSF1
name Random Guesser
torch.Size([100, 730, 1])
feat_train_X (100, 2)

name Tabular
torch.Size([100, 730, 1])
feat_train_X (100, 1462)

name MiniRocket
torch.Size([100, 730, 1])
feat_train_X (100, 19992)

name MultiRocket
torch.Size([100, 730, 1])
feat_train_X (100, 19488)

name MyOwnMultiRocket
torch.Size([100, 730, 1])


  5%|▌         | 1/20 [00:08<02:37,  8.28s/it]

feat_train_X (100, 19992)

Elapsed time 8.282074928283691
Adiac
name Random Guesser
torch.Size([390, 176, 1])
feat_train_X (390, 2)

name Tabular
torch.Size([390, 176, 1])
feat_train_X (390, 354)

name MiniRocket
torch.Size([390, 176, 1])
feat_train_X (390, 19992)

name MultiRocket
torch.Size([390, 176, 1])
feat_train_X (390, 19488)

name MyOwnMultiRocket
torch.Size([390, 176, 1])
feat_train_X (390, 20000)


 10%|█         | 2/20 [00:16<02:26,  8.12s/it]


Elapsed time 7.9974000453948975
ArrowHead
name Random Guesser
torch.Size([36, 251, 1])
feat_train_X (36, 2)

name Tabular
torch.Size([36, 251, 1])
feat_train_X (36, 504)

name MiniRocket
torch.Size([36, 251, 1])
feat_train_X (36, 19992)

name MultiRocket
torch.Size([36, 251, 1])
feat_train_X (36, 19488)

name MyOwnMultiRocket
torch.Size([36, 251, 1])


 15%|█▌        | 3/20 [00:19<01:40,  5.94s/it]

feat_train_X (36, 20000)

Elapsed time 3.3520665168762207
BME
name Random Guesser
torch.Size([30, 128, 1])
feat_train_X (30, 2)

name Tabular
torch.Size([30, 128, 1])
feat_train_X (30, 258)

name MiniRocket
torch.Size([30, 128, 1])
feat_train_X (30, 19992)

name MultiRocket
torch.Size([30, 128, 1])
feat_train_X (30, 19488)

name MyOwnMultiRocket
torch.Size([30, 128, 1])


 20%|██        | 4/20 [00:21<01:08,  4.28s/it]

feat_train_X (30, 20000)

Elapsed time 1.7426190376281738
Beef
name Random Guesser
torch.Size([30, 470, 1])
feat_train_X (30, 2)

name Tabular
torch.Size([30, 470, 1])
feat_train_X (30, 942)

name MiniRocket
torch.Size([30, 470, 1])
feat_train_X (30, 19992)

name MultiRocket
torch.Size([30, 470, 1])
feat_train_X (30, 19488)

name MyOwnMultiRocket
torch.Size([30, 470, 1])


 25%|██▌       | 5/20 [00:22<00:49,  3.32s/it]

feat_train_X (30, 19992)

Elapsed time 1.6031842231750488
BeetleFly
name Random Guesser
torch.Size([20, 512, 1])
feat_train_X (20, 2)

name Tabular
torch.Size([20, 512, 1])
feat_train_X (20, 1026)

name MiniRocket
torch.Size([20, 512, 1])
feat_train_X (20, 19992)

name MultiRocket
torch.Size([20, 512, 1])


 30%|███       | 6/20 [00:24<00:36,  2.62s/it]

feat_train_X (20, 19488)

name MyOwnMultiRocket
torch.Size([20, 512, 1])
feat_train_X (20, 19992)

Elapsed time 1.251401662826538
BirdChicken
name Random Guesser
torch.Size([20, 512, 1])
feat_train_X (20, 2)

name Tabular
torch.Size([20, 512, 1])
feat_train_X (20, 1026)

name MiniRocket
torch.Size([20, 512, 1])
feat_train_X (20, 19992)

name MultiRocket
torch.Size([20, 512, 1])


 35%|███▌      | 7/20 [00:25<00:28,  2.16s/it]

feat_train_X (20, 19488)

name MyOwnMultiRocket
torch.Size([20, 512, 1])
feat_train_X (20, 19992)

Elapsed time 1.2300481796264648
CBF
name Random Guesser
torch.Size([30, 128, 1])
feat_train_X (30, 2)

name Tabular
torch.Size([30, 128, 1])
feat_train_X (30, 258)

name MiniRocket
torch.Size([30, 128, 1])
feat_train_X (30, 19992)

name MultiRocket
torch.Size([30, 128, 1])
feat_train_X (30, 19488)

name MyOwnMultiRocket
torch.Size([30, 128, 1])


 40%|████      | 8/20 [00:33<00:48,  4.01s/it]

feat_train_X (30, 20000)

Elapsed time 7.965376377105713
Car
name Random Guesser
torch.Size([60, 577, 1])
feat_train_X (60, 2)

name Tabular
torch.Size([60, 577, 1])
feat_train_X (60, 1156)

name MiniRocket
torch.Size([60, 577, 1])
feat_train_X (60, 19992)

name MultiRocket
torch.Size([60, 577, 1])
feat_train_X (60, 19488)

name MyOwnMultiRocket
torch.Size([60, 577, 1])


 45%|████▌     | 9/20 [00:36<00:40,  3.70s/it]

feat_train_X (60, 19992)

Elapsed time 3.006862163543701
Chinatown
name Random Guesser
torch.Size([20, 24, 1])
feat_train_X (20, 2)

name Tabular
torch.Size([20, 24, 1])
feat_train_X (20, 50)

name MiniRocket
torch.Size([20, 24, 1])
feat_train_X (20, 19992)

name MultiRocket
torch.Size([20, 24, 1])
feat_train_X (20, 19488)

name MyOwnMultiRocket
torch.Size([20, 24, 1])


 50%|█████     | 10/20 [00:37<00:29,  2.98s/it]

feat_train_X (20, 20000)

Elapsed time 1.367276668548584
ChlorineConcentration
name Random Guesser
torch.Size([467, 166, 1])
feat_train_X (467, 2)

name Tabular
torch.Size([467, 166, 1])
feat_train_X (467, 334)

name MiniRocket
torch.Size([467, 166, 1])
feat_train_X (467, 19992)

name MultiRocket
torch.Size([467, 166, 1])
feat_train_X (467, 19488)

name MyOwnMultiRocket
torch.Size([467, 166, 1])
feat_train_X (467, 20000)


 55%|█████▌    | 11/20 [01:30<02:44, 18.28s/it]


Elapsed time 52.965463399887085
CinCECGTorso
name Random Guesser
torch.Size([40, 819, 1])
feat_train_X (40, 2)

name Tabular
torch.Size([40, 819, 1])
feat_train_X (40, 1640)

name MiniRocket
torch.Size([40, 819, 1])
feat_train_X (40, 19992)

name MultiRocket
torch.Size([40, 819, 1])
feat_train_X (40, 19488)

name MyOwnMultiRocket
torch.Size([40, 819, 1])


 55%|█████▌    | 11/20 [02:09<01:45, 11.73s/it]


OutOfMemoryError: CUDA out of memory. Tried to allocate 1002.00 MiB. GPU 0 has a total capacity of 7.91 GiB of which 997.06 MiB is free. Including non-PyTorch memory, this process has 6.57 GiB memory in use. Of the allocated memory 4.90 GiB is allocated by PyTorch, and 1.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# Define the attributes and methods
attributes = ["ACC_train", "ACC_test", "time_transform", "time_fit", "alpha"]
#data_dir = "https://github.com/nikitazozoulenko/zephyrox/raw/main/Data/TSER/"
data_dir = ""
# Load and store the DataFrames for each attribute and method
dfs = {}
for attribute in attributes:
    filename = f"TSC_{attribute}_results.pkl"
    print(data_dir+filename)
    df = pd.read_pickle(data_dir + filename)
    dfs[attribute] = df

In [None]:
dfs["alpha"]

In [None]:
dfs["ACC_test"]

In [None]:
dfs["ACC_train"]

In [None]:
dfs["time_transform"]