In [1]:
from typing import Tuple, List, Union, Any, Optional, Dict, Literal, Callable
import time
import os
import sys
sys.path.append(os.path.dirname(os.getcwd()))
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))

import numpy as np
import aeon
import torch
from torch import Tensor
import torch.nn as nn
import torch.functional as F
import pandas as pd
from aeon.datasets.tsc_datasets import univariate_equal_length, multivariate_equal_length
univariate_equal_length = sorted(list(univariate_equal_length))
multivariate_equal_length = sorted(list(multivariate_equal_length))
from aeon.datasets import load_classification
from sklearn.linear_model import RidgeCV, RidgeClassifierCV
from sklearn.metrics import accuracy_score
from tqdm import tqdm

from utils.utils import print_name, print_shape
from preprocessing.stream_transforms import normalize_mean_std_traindata, normalize_streams, augment_time, add_basepoint_zero
from random_sig_fourier import SigTensorisedRandProj
from signature import SigTransform, LogSigTransform
from features.base import TimeseriesFeatureExtractor, TabularTimeseriesFeatures, RandomGuesser
from randomized_sig import RandomizedSignature
from rocket_wrappers import RocketWrapper, MiniRocketWrapper, MultiRocketWrapper

np.set_printoptions(precision=3, threshold=5) # Print options



In [2]:
#############################################
#######          Dataset Code         #######
#############################################

def get_aeon_dataset(
        dataset_name:str, 
        extract_path = "/home/nikita/hdd/Data/TSC/",
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        ):
    """Loads a dataset from the UCR/UEA archive using 
    the aeon library.

    Args:
        dataset_name (str): Name of the dataset

    Returns:
        Tuple: 4-tuple of the form (X_train, y_train, X_test, y_test)
    """
    X_train, y_train = load_classification(dataset_name, split="train", extract_path=extract_path)
    X_test, y_test = load_classification(dataset_name, split="test", extract_path=extract_path)
    X_train = torch.from_numpy(X_train.transpose(0,2,1)).to(device)
    X_test = torch.from_numpy(X_test.transpose(0,2,1)).to(device)
    return X_train, y_train, X_test, y_test

In [3]:
# def test_get_data(idx: int = 17):
#     name = univariate_equal_length[idx]
#     X_train, y_train, X_test, y_test = get_aeon_dataset(name, device="cpu")
#     print("Dataset:", name)
#     print("idx:", idx)
#     print("X_train", X_train.shape)
#     print("X_test", X_test.shape)

# for i in range(20):
#     test_get_data(i)
#     print("\n")

In [4]:
##################################
####  Linear Model (Ridge)  ######
##################################

def train_and_test_linear(
        train_X, train_y, test_X, test_y,
        feat_extractor: TimeseriesFeatureExtractor,
        apply_augmentation:bool=True,
        normalize_features:bool=True,
        clf=RidgeClassifierCV(alphas=np.logspace(-3, 3, 20))
    ):
    # augment data
    print(train_X.shape)
    if apply_augmentation:
        train_X, test_X = normalize_mean_std_traindata(train_X, test_X)
        train_X = add_basepoint_zero(train_X)
        train_X = augment_time(train_X)
        test_X = add_basepoint_zero(test_X)
        test_X = augment_time(test_X)

    # fit transformer
    t0 = time.time()
    feat_extractor.fit(train_X)
    feat_train_X = feat_extractor.transform(train_X).cpu().numpy()
    feat_test_X = feat_extractor.transform(test_X).cpu().numpy()
    print("feat_train_X", feat_train_X.shape)
    if normalize_features:
        feat_train_X, feat_test_X = normalize_mean_std_traindata(feat_train_X, feat_test_X)


    # feed into linear classifier
    t1 = time.time()
    clf.fit(feat_train_X, train_y)
    t2 = time.time()

    # predict
    pred = clf.predict(feat_test_X)
    test_acc = accuracy_score(test_y, pred)
    train_acc = accuracy_score(train_y, clf.predict(feat_train_X))
    alpha = clf.alpha_ if hasattr(clf, 'alpha_') else None
    return train_acc, test_acc, alpha, t1-t0, t2-t1

In [5]:
def run_allModels_singleDataset(X_train, y_train, X_test, y_test):
    max_batch = 32
    trunc_level = 4
    n_features = 1000

    models = [
        ["Random Guesser", RandomGuesser()],
        ["Tabular", TabularTimeseriesFeatures()],
        # ["Sig", SigTransform(trunc_level, max_batch)],
        # ["Log Sig", LogSigTransform(trunc_level, max_batch)],
        ["Randomized Signature", RandomizedSignature(
            n_features,
            activation = "tanh",
            max_batch=10,
            )],
        ["TRP", SigTensorisedRandProj(
            trunc_level,
            n_features,
            only_last=True,
            method="linear",
            max_batch=max_batch,
            )],
        ["TRP rbf", SigTensorisedRandProj(
            trunc_level,
            n_features,
            only_last=True,
            method="RBF",
            sigma_rbf=1.0,
            max_batch=max_batch,
            )],
        ["concat TRP", SigTensorisedRandProj(
            trunc_level,
            n_features // (trunc_level-1),
            only_last=False,
            method="linear",
            max_batch=max_batch,
            )],
        ["concat TRP rbf", SigTensorisedRandProj(
            trunc_level,
            n_features // (trunc_level-1),
            only_last=False,
            method="RBF",
            sigma_rbf=1.0,
            max_batch=max_batch,
            )],
        ["Rocket", RocketWrapper(
            n_features
            )],
        ["MiniRocket", MiniRocketWrapper(
            n_features
            )],
        ["MultiRocket", MultiRocketWrapper(
            n_features
            )],
        ]

    # Run experiments
    model_names = [name for (name, _) in models]
    results_ridge = []
    for name, model in models:
        print("name", name)
        result = train_and_test_linear(
            X_train, y_train, X_test, y_test, model
            )
        results_ridge.append(result)
        print()
    
    return model_names, results_ridge

In [6]:
def run_dataset(dataset_name:str):
    X_train, y_train, X_test, y_test = get_aeon_dataset(dataset_name)
    X_train, X_test = normalize_streams(X_train, X_test, max_T=1000)
    model_names, results_ridge = run_allModels_singleDataset(X_train, y_train, X_test, y_test)
    return model_names, results_ridge

model_names, results_ridge = run_dataset(univariate_equal_length[9])

name Random Guesser
torch.Size([20, 24, 1])
feat_train_X (20, 2)

name Tabular
torch.Size([20, 24, 1])
feat_train_X (20, 50)

name Randomized Signature
torch.Size([20, 24, 1])
feat_train_X (20, 1000)

name TRP
torch.Size([20, 24, 1])
feat_train_X (20, 1000)

name TRP rbf
torch.Size([20, 24, 1])
feat_train_X (20, 1000)

name concat TRP
torch.Size([20, 24, 1])
feat_train_X (20, 999)

name concat TRP rbf
torch.Size([20, 24, 1])
feat_train_X (20, 999)

name Rocket
torch.Size([20, 24, 1])
feat_train_X (20, 1000)

name MiniRocket
torch.Size([20, 24, 1])
feat_train_X (20, 7392)

name MultiRocket
torch.Size([20, 24, 1])
feat_train_X (20, 672)



In [7]:
def run_allModels_allData(datasets: List[str]):
    #run experiments
    experiments = {}
    failed = {}
    for dataset_name in tqdm(datasets):
        t0 = time.time()
        try:
            print(dataset_name)
            X_train, y_train, X_test, y_test = get_aeon_dataset(dataset_name)
            X_train, X_test = normalize_streams(X_train, X_test, max_T=1000)
            N_train = X_train.shape[0]
            N_test = X_test.shape[0]
            T = X_train.shape[1]
            D = X_train.shape[2]
            if N_train<=2000 and D<=20:
                results = run_allModels_singleDataset(
                    X_train, y_train, X_test, y_test
                    )
                experiments[dataset_name] = results
        except Exception as e:
            print(f"Error: {e}")
            failed[dataset_name] = e
        print("Elapsed time", time.time()-t0)
    
    #parse results
    # Define the attributes and methods
    attributes = ["ACC_train", "ACC_test", "alpha", "time_transform", "time_fit"]
    
    # Extract model_names from d_res
    model_names = next(iter(experiments.values()))[0]

    # Create and save DataFrames for each attribute and method
    for attribute in attributes:
        df = pd.DataFrame(columns=model_names)
        for dataset_name, (model_names, results_ridge) in experiments.items():
            values = [res[attributes.index(attribute)] for res in results_ridge]
            df.loc[dataset_name] = values

        # Save the DataFrame
        print(df)
        df.to_pickle(f"TSC_{attribute}_results.pkl")

    return experiments, failed

In [8]:
run_allModels_allData(univariate_equal_length[:10])

  0%|          | 0/10 [00:00<?, ?it/s]

ACSF1
name Random Guesser
torch.Size([100, 730, 1])
feat_train_X (100, 2)

name Tabular
torch.Size([100, 730, 1])
feat_train_X (100, 1462)

name Randomized Signature
torch.Size([100, 730, 1])
feat_train_X (100, 1000)

name TRP
torch.Size([100, 730, 1])
feat_train_X (100, 1000)

name TRP rbf
torch.Size([100, 730, 1])
feat_train_X (100, 1000)

name concat TRP
torch.Size([100, 730, 1])
feat_train_X (100, 999)

name concat TRP rbf
torch.Size([100, 730, 1])
feat_train_X (100, 999)

name Rocket
torch.Size([100, 730, 1])
feat_train_X (100, 1000)

name MiniRocket
torch.Size([100, 730, 1])
feat_train_X (100, 7392)

name MultiRocket
torch.Size([100, 730, 1])


 10%|█         | 1/10 [00:26<03:57, 26.41s/it]

feat_train_X (100, 672)

Elapsed time 26.40942096710205
Adiac
name Random Guesser
torch.Size([390, 176, 1])
feat_train_X (390, 2)

name Tabular
torch.Size([390, 176, 1])
feat_train_X (390, 354)

name Randomized Signature
torch.Size([390, 176, 1])
feat_train_X (390, 1000)

name TRP
torch.Size([390, 176, 1])
feat_train_X (390, 1000)

name TRP rbf
torch.Size([390, 176, 1])
feat_train_X (390, 1000)

name concat TRP
torch.Size([390, 176, 1])
feat_train_X (390, 999)

name concat TRP rbf
torch.Size([390, 176, 1])
feat_train_X (390, 999)

name Rocket
torch.Size([390, 176, 1])
feat_train_X (390, 1000)

name MiniRocket
torch.Size([390, 176, 1])
feat_train_X (390, 7392)

name MultiRocket
torch.Size([390, 176, 1])


 20%|██        | 2/10 [00:52<03:28, 26.08s/it]

feat_train_X (390, 672)

Elapsed time 25.85232400894165
ArrowHead
name Random Guesser
torch.Size([36, 251, 1])
feat_train_X (36, 2)

name Tabular
torch.Size([36, 251, 1])
feat_train_X (36, 504)

name Randomized Signature
torch.Size([36, 251, 1])
feat_train_X (36, 1000)

name TRP
torch.Size([36, 251, 1])
feat_train_X (36, 1000)

name TRP rbf
torch.Size([36, 251, 1])
feat_train_X (36, 1000)

name concat TRP
torch.Size([36, 251, 1])
feat_train_X (36, 999)

name concat TRP rbf
torch.Size([36, 251, 1])
feat_train_X (36, 999)

name Rocket
torch.Size([36, 251, 1])
feat_train_X (36, 1000)

name MiniRocket
torch.Size([36, 251, 1])


 30%|███       | 3/10 [01:02<02:12, 18.86s/it]

feat_train_X (36, 7392)

name MultiRocket
torch.Size([36, 251, 1])
feat_train_X (36, 672)

Elapsed time 10.258893489837646
BME
name Random Guesser
torch.Size([30, 128, 1])
feat_train_X (30, 2)

name Tabular
torch.Size([30, 128, 1])
feat_train_X (30, 258)

name Randomized Signature
torch.Size([30, 128, 1])
feat_train_X (30, 1000)

name TRP
torch.Size([30, 128, 1])
feat_train_X (30, 1000)

name TRP rbf
torch.Size([30, 128, 1])
feat_train_X (30, 1000)

name concat TRP
torch.Size([30, 128, 1])
feat_train_X (30, 999)

name concat TRP rbf
torch.Size([30, 128, 1])
feat_train_X (30, 999)

name Rocket
torch.Size([30, 128, 1])
feat_train_X (30, 1000)

name MiniRocket
torch.Size([30, 128, 1])


 40%|████      | 4/10 [01:06<01:18, 13.03s/it]

feat_train_X (30, 7392)

name MultiRocket
torch.Size([30, 128, 1])
feat_train_X (30, 672)

Elapsed time 4.093208074569702
Beef
name Random Guesser
torch.Size([30, 470, 1])
feat_train_X (30, 2)

name Tabular
torch.Size([30, 470, 1])
feat_train_X (30, 942)

name Randomized Signature
torch.Size([30, 470, 1])
feat_train_X (30, 1000)

name TRP
torch.Size([30, 470, 1])
feat_train_X (30, 1000)

name TRP rbf
torch.Size([30, 470, 1])
feat_train_X (30, 1000)

name concat TRP
torch.Size([30, 470, 1])
feat_train_X (30, 999)

name concat TRP rbf
torch.Size([30, 470, 1])
feat_train_X (30, 999)

name Rocket
torch.Size([30, 470, 1])
feat_train_X (30, 1000)

name MiniRocket
torch.Size([30, 470, 1])


 50%|█████     | 5/10 [01:11<00:50, 10.10s/it]

feat_train_X (30, 7392)

name MultiRocket
torch.Size([30, 470, 1])
feat_train_X (30, 672)

Elapsed time 4.916654586791992
BeetleFly
name Random Guesser
torch.Size([20, 512, 1])
feat_train_X (20, 2)

name Tabular
torch.Size([20, 512, 1])
feat_train_X (20, 1026)

name Randomized Signature
torch.Size([20, 512, 1])
feat_train_X (20, 1000)

name TRP
torch.Size([20, 512, 1])
feat_train_X (20, 1000)

name TRP rbf
torch.Size([20, 512, 1])
feat_train_X (20, 1000)

name concat TRP
torch.Size([20, 512, 1])
feat_train_X (20, 999)

name concat TRP rbf
torch.Size([20, 512, 1])
feat_train_X (20, 999)

name Rocket
torch.Size([20, 512, 1])
feat_train_X (20, 1000)

name MiniRocket
torch.Size([20, 512, 1])


 60%|██████    | 6/10 [01:15<00:31,  7.88s/it]

feat_train_X (20, 7392)

name MultiRocket
torch.Size([20, 512, 1])
feat_train_X (20, 672)

Elapsed time 3.555321455001831
BirdChicken
name Random Guesser
torch.Size([20, 512, 1])
feat_train_X (20, 2)

name Tabular
torch.Size([20, 512, 1])
feat_train_X (20, 1026)

name Randomized Signature
torch.Size([20, 512, 1])
feat_train_X (20, 1000)

name TRP
torch.Size([20, 512, 1])
feat_train_X (20, 1000)

name TRP rbf
torch.Size([20, 512, 1])
feat_train_X (20, 1000)

name concat TRP
torch.Size([20, 512, 1])
feat_train_X (20, 999)

name concat TRP rbf
torch.Size([20, 512, 1])
feat_train_X (20, 999)

name Rocket
torch.Size([20, 512, 1])
feat_train_X (20, 1000)

name MiniRocket
torch.Size([20, 512, 1])


 70%|███████   | 7/10 [01:18<00:19,  6.49s/it]

feat_train_X (20, 7392)

name MultiRocket
torch.Size([20, 512, 1])
feat_train_X (20, 672)

Elapsed time 3.628350019454956
CBF
name Random Guesser
torch.Size([30, 128, 1])
feat_train_X (30, 2)

name Tabular
torch.Size([30, 128, 1])
feat_train_X (30, 258)

name Randomized Signature
torch.Size([30, 128, 1])
feat_train_X (30, 1000)

name TRP
torch.Size([30, 128, 1])
feat_train_X (30, 1000)

name TRP rbf
torch.Size([30, 128, 1])
feat_train_X (30, 1000)

name concat TRP
torch.Size([30, 128, 1])
feat_train_X (30, 999)

name concat TRP rbf
torch.Size([30, 128, 1])
feat_train_X (30, 999)

name Rocket
torch.Size([30, 128, 1])
feat_train_X (30, 1000)

name MiniRocket
torch.Size([30, 128, 1])
feat_train_X (30, 7392)

name MultiRocket
torch.Size([30, 128, 1])


 80%|████████  | 8/10 [01:40<00:22, 11.24s/it]

feat_train_X (30, 672)

Elapsed time 21.426457166671753
Car
name Random Guesser
torch.Size([60, 577, 1])
feat_train_X (60, 2)

name Tabular
torch.Size([60, 577, 1])
feat_train_X (60, 1156)

name Randomized Signature
torch.Size([60, 577, 1])
feat_train_X (60, 1000)

name TRP
torch.Size([60, 577, 1])
feat_train_X (60, 1000)

name TRP rbf
torch.Size([60, 577, 1])
feat_train_X (60, 1000)

name concat TRP
torch.Size([60, 577, 1])
feat_train_X (60, 999)

name concat TRP rbf
torch.Size([60, 577, 1])
feat_train_X (60, 999)

name Rocket
torch.Size([60, 577, 1])
feat_train_X (60, 1000)

name MiniRocket
torch.Size([60, 577, 1])


 90%|█████████ | 9/10 [01:52<00:11, 11.53s/it]

feat_train_X (60, 7392)

name MultiRocket
torch.Size([60, 577, 1])
feat_train_X (60, 672)

Elapsed time 12.16174578666687
Chinatown
name Random Guesser
torch.Size([20, 24, 1])
feat_train_X (20, 2)

name Tabular
torch.Size([20, 24, 1])
feat_train_X (20, 50)

name Randomized Signature
torch.Size([20, 24, 1])
feat_train_X (20, 1000)

name TRP
torch.Size([20, 24, 1])
feat_train_X (20, 1000)

name TRP rbf
torch.Size([20, 24, 1])
feat_train_X (20, 1000)

name concat TRP
torch.Size([20, 24, 1])
feat_train_X (20, 999)

name concat TRP rbf
torch.Size([20, 24, 1])
feat_train_X (20, 999)

name Rocket
torch.Size([20, 24, 1])
feat_train_X (20, 1000)

name MiniRocket
torch.Size([20, 24, 1])


100%|██████████| 10/10 [01:54<00:00, 11.43s/it]

feat_train_X (20, 7392)

name MultiRocket
torch.Size([20, 24, 1])
feat_train_X (20, 672)

Elapsed time 1.9471001625061035
             Random Guesser   Tabular  Randomized Signature       TRP  \
ACSF1              0.190000  0.550000              0.600000  0.330000   
Adiac              0.053846  0.584615              0.382051  0.179487   
ArrowHead          0.527778  0.944444              0.777778  0.611111   
BME                0.433333  1.000000              0.666667  0.966667   
Beef               0.333333  1.000000              0.700000  0.600000   
BeetleFly          0.700000  0.900000              0.850000  0.950000   
BirdChicken        0.550000  0.750000              0.950000  0.900000   
CBF                0.400000  1.000000              1.000000  0.900000   
Car                0.283333  0.966667              0.883333  0.816667   
Chinatown          0.550000  1.000000              1.000000  1.000000   

              TRP rbf  concat TRP  concat TRP rbf    Rocket  MiniRocket  \




({'ACSF1': (['Random Guesser',
    'Tabular',
    'Randomized Signature',
    'TRP',
    'TRP rbf',
    'concat TRP',
    'concat TRP rbf',
    'Rocket',
    'MiniRocket',
    'MultiRocket'],
   [(0.19,
     0.1,
     233.57214690901213,
     0.0006852149963378906,
     0.00774383544921875),
    (0.55, 0.3, 1000.0, 0.003915309906005859, 0.016365766525268555),
    (0.6, 0.52, 233.57214690901213, 6.977292060852051, 0.008084774017333984),
    (0.33, 0.3, 0.1623776739188721, 0.3136866092681885, 0.01659226417541504),
    (0.49, 0.45, 483.2930238571752, 12.82715892791748, 0.024641990661621094),
    (0.3, 0.3, 233.57214690901213, 0.13701558113098145, 0.06163668632507324),
    (0.54, 0.5, 483.2930238571752, 1.794311761856079, 0.007816791534423828),
    (0.87, 0.7, 112.88378916846884, 2.1105809211730957, 0.06290197372436523),
    (0.98, 0.68, 1000.0, 1.5886821746826172, 0.05336499214172363),
    (0.87,
     0.6,
     112.88378916846884,
     0.19402074813842773,
     0.041948795318603516)]),
  

In [9]:
# Define the attributes and methods
attributes = ["ACC_train", "ACC_test", "time_transform", "time_fit", "alpha"]
#data_dir = "https://github.com/nikitazozoulenko/zephyrox/raw/main/Data/TSER/"
data_dir = ""
# Load and store the DataFrames for each attribute and method
dfs = {}
for attribute in attributes:
    filename = f"TSC_{attribute}_results.pkl"
    print(data_dir+filename)
    df = pd.read_pickle(data_dir + filename)
    dfs[attribute] = df

TSC_ACC_train_results.pkl
TSC_ACC_test_results.pkl
TSC_time_transform_results.pkl
TSC_time_fit_results.pkl
TSC_alpha_results.pkl


In [10]:
dfs["ACC_test"]

Unnamed: 0,Random Guesser,Tabular,Randomized Signature,TRP,TRP rbf,concat TRP,concat TRP rbf,Rocket,MiniRocket,MultiRocket
ACSF1,0.1,0.3,0.52,0.3,0.45,0.3,0.5,0.7,0.68,0.6
Adiac,0.02046,0.414322,0.283887,0.143223,0.248082,0.158568,0.335038,0.731458,0.780051,0.69821
ArrowHead,0.308571,0.737143,0.36,0.348571,0.554286,0.474286,0.554286,0.737143,0.782857,0.617143
BME,0.26,0.94,0.426667,0.74,0.673333,0.733333,0.606667,0.973333,0.94,0.92
Beef,0.033333,0.866667,0.5,0.4,0.466667,0.366667,0.5,0.833333,0.866667,0.633333
BeetleFly,0.55,0.8,0.9,0.75,0.8,0.8,0.85,0.8,0.9,0.75
BirdChicken,0.55,0.55,0.75,0.7,0.7,0.75,0.75,0.95,0.9,0.85
CBF,0.331111,0.834444,0.917778,0.851111,0.952222,0.871111,0.946667,0.992222,0.97,0.872222
Car,0.216667,0.8,0.5,0.45,0.566667,0.516667,0.616667,0.8,0.783333,0.716667
Chinatown,0.55102,0.982507,0.897959,0.947522,0.883382,0.90379,0.865889,0.959184,0.941691,0.906706
