In [1]:
from typing import Tuple, List, Union, Any, Optional, Dict, Literal, Callable
import time
import os
import sys
sys.path.append(os.path.dirname(os.getcwd()))
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))

import numpy as np
import aeon
import torch
from torch import Tensor
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from aeon.datasets.tsc_datasets import univariate_equal_length, multivariate_equal_length
univariate_equal_length = sorted(list(univariate_equal_length))
multivariate_equal_length = sorted(list(multivariate_equal_length))
from aeon.datasets import load_classification
from sklearn.linear_model import RidgeCV, RidgeClassifierCV
from sklearn.metrics import accuracy_score
from tqdm import tqdm

from utils.utils import print_name, print_shape
from preprocessing.stream_transforms import normalize_mean_std_traindata, normalize_streams, augment_time, add_basepoint_zero
from random_sig_fourier import SigTensorisedRandProj
from signature import SigTransform, LogSigTransform
from features.base import TimeseriesFeatureExtractor, TabularTimeseriesFeatures, RandomGuesser
from randomized_sig import RandomizedSignature
from rocket_wrappers import RocketWrapper, MiniRocketWrapper, MultiRocketWrapper
from multirocket import MultiRocketOwn
from frozen_gpt2 import ProjTimeseriesGPT2Last, ProjTimeseriesGPT2Multipooling, ProjTimeseriesGPT2MultipoolingAndLast

np.set_printoptions(precision=3, threshold=5) # Print options



In [2]:
#############################################
#######          Dataset Code         #######
#############################################

def get_aeon_dataset(
        dataset_name:str, 
        extract_path = "/home/nikita/hdd/Data/MTSC/",
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        ):
    """Loads a dataset from the UCR/UEA archive using 
    the aeon library.

    Args:
        dataset_name (str): Name of the dataset

    Returns:
        Tuple: 4-tuple of the form (X_train, y_train, X_test, y_test)
    """
    X_train, y_train = load_classification(dataset_name, split="train", extract_path=extract_path)
    X_test, y_test = load_classification(dataset_name, split="test", extract_path=extract_path)
    X_train = torch.from_numpy(X_train.transpose(0,2,1)).to(device).float().detach()
    X_test = torch.from_numpy(X_test.transpose(0,2,1)).to(device).float().detach()
    return X_train, y_train, X_test, y_test

In [3]:
# def test_get_data(idx: int = 17):
#     name = univariate_equal_length[idx]
#     X_train, y_train, X_test, y_test = get_aeon_dataset(name, device="cpu")
#     print("Dataset:", name)
#     print("idx:", idx)
#     print("X_train", X_train.shape)
#     print("X_test", X_test.shape)

# for i in range(20):
#     test_get_data(i)
#     print("\n")

In [4]:
##################################
####  Linear Model (Ridge)  ######
##################################

def train_and_test_linear(
        train_X, train_y, test_X, test_y,
        feat_extractor: TimeseriesFeatureExtractor,
        apply_augmentation:bool=True,
        normalize_features:bool=True,
        clf=RidgeClassifierCV(alphas=np.logspace(-1, 3, 20))
    ):
    # augment data
    print(train_X.shape)
    if apply_augmentation:
        train_X, test_X = normalize_mean_std_traindata(train_X, test_X)
        train_X = add_basepoint_zero(train_X)
        #train_X = augment_time(train_X)
        test_X = add_basepoint_zero(test_X)
        #test_X = augment_time(test_X)

    # fit transformer
    t0 = time.time()
    with torch.no_grad():
        feat_extractor.fit(train_X)
        feat_train_X = feat_extractor.transform(train_X).cpu().numpy()
        feat_test_X = feat_extractor.transform(test_X).cpu().numpy()
        print("feat_train_X", feat_train_X.shape)
        if normalize_features:
            feat_train_X, feat_test_X = normalize_mean_std_traindata(feat_train_X, feat_test_X)


    # feed into linear classifier
    t1 = time.time()
    clf.fit(feat_train_X, train_y)
    t2 = time.time()

    # predict
    pred = clf.predict(feat_test_X)
    test_acc = accuracy_score(test_y, pred)
    train_acc = accuracy_score(train_y, clf.predict(feat_train_X))
    alpha = clf.alpha_ if hasattr(clf, 'alpha_') else None
    return train_acc, test_acc, alpha, t1-t0, t2-t1

In [5]:
def run_allModels_singleDataset(X_train, y_train, X_test, y_test):
    max_batch = 16
    trunc_level = 4
    n_features = 1344
    n_rocket_features = 20000

    models = [
        ["Random Guesser", RandomGuesser()],
        ["Tabular", TabularTimeseriesFeatures()],
        # # ["Sig", SigTransform(trunc_level, max_batch)],
        # # ["Log Sig", LogSigTransform(trunc_level, max_batch)],
        # ["Randomized Signature", RandomizedSignature(
        #     n_features,
        #     activation = "tanh",
        #     max_batch=10,
        #     )],
        # ["TRP", SigTensorisedRandProj(
        #     trunc_level,
        #     n_features,
        #     only_last=True,
        #     method="linear",
        #     max_batch=max_batch,
        #     )],
        # ["TRP rbf", SigTensorisedRandProj(
        #     trunc_level,
        #     n_features,
        #     only_last=True,
        #     method="RBF",
        #     sigma_rbf=1.0,
        #     max_batch=max_batch,
        #     )],
        # ["concat TRP", SigTensorisedRandProj(
        #     trunc_level,
        #     n_features // (trunc_level-1),
        #     only_last=False,
        #     method="linear",
        #     max_batch=max_batch,
        #     )],
        # ["concat TRP rbf", SigTensorisedRandProj(
        #     trunc_level,
        #     n_features // (trunc_level-1),
        #     only_last=False,
        #     method="RBF",
        #     sigma_rbf=1.0,
        #     max_batch=max_batch,
            # )],
        # ["Rocket", RocketWrapper(
        #     n_rocket_features
        #     )],
        ["MiniRocket", MiniRocketWrapper(
            n_rocket_features
            )],
        ["MultiRocket", MultiRocketWrapper(
            n_rocket_features
            )],
        ["MyOwnMultiRocket", MultiRocketOwn(
            n_rocket_features,
            max_batch,
            )],
        ["GPT2 Last", ProjTimeseriesGPT2Last(
            max_batch,
            )],
        ["GPT2 Pooling", ProjTimeseriesGPT2Multipooling(
            max_batch,
            )],
        ["GPT2 Last+Pooling", ProjTimeseriesGPT2MultipoolingAndLast(
            max_batch,
            )],
        ]

    # Run experiments
    model_names = [name for (name, _) in models]
    results_ridge = []
    for name, model in models:
        print("name", name)
        result = train_and_test_linear(
            X_train, y_train, X_test, y_test, model
            )
        results_ridge.append(result)
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        print()
    
    return model_names, results_ridge

In [6]:
def run_dataset(dataset_name:str):
    X_train, y_train, X_test, y_test = get_aeon_dataset(dataset_name)
    X_train, X_test = normalize_streams(X_train, X_test, max_T=1000)
    model_names, results_ridge = run_allModels_singleDataset(X_train, y_train, X_test, y_test)
    return model_names, results_ridge

#model_names, results_ridge = run_dataset(univariate_equal_length[9])

In [7]:
from rocket_wrappers import RocketWrapper, MiniRocketWrapper, MultiRocketWrapper

for n_features in [100, 500, 1000, 2000, 1344, 4000, 6000, 8000, 10000]:
    rocket = RocketWrapper(n_features)
    mini_rocket = MiniRocketWrapper(n_features)
    multi_rocket = MultiRocketWrapper(n_features)
    own = MultiRocketOwn(n_features)
    
    print("n_features:", n_features)
    
    # Generate random input
    input_shape = (2, 150, 1)  # N T D
    X = torch.randn(input_shape)
    
    # Fit to random input
    print(rocket.fit_transform(X).shape)
    print(mini_rocket.fit_transform(X).shape)
    print(multi_rocket.fit_transform(X).shape)
    print(own.fit_transform(X).shape)
    print()

n_features: 100
torch.Size([2, 100])
torch.Size([2, 84])
torch.Size([2, 672])
torch.Size([2, 100])

n_features: 500
torch.Size([2, 500])
torch.Size([2, 420])
torch.Size([2, 672])
torch.Size([2, 500])

n_features: 1000
torch.Size([2, 1000])
torch.Size([2, 924])
torch.Size([2, 672])
torch.Size([2, 1000])

n_features: 2000
torch.Size([2, 2000])
torch.Size([2, 1932])
torch.Size([2, 1344])
torch.Size([2, 2000])

n_features: 1344
torch.Size([2, 1344])
torch.Size([2, 1344])
torch.Size([2, 1344])
torch.Size([2, 1340])

n_features: 4000
torch.Size([2, 4000])
torch.Size([2, 3948])
torch.Size([2, 3360])
torch.Size([2, 4000])

n_features: 6000
torch.Size([2, 6000])
torch.Size([2, 5964])
torch.Size([2, 5376])
torch.Size([2, 6000])

n_features: 8000
torch.Size([2, 8000])
torch.Size([2, 7980])
torch.Size([2, 7392])
torch.Size([2, 8000])

n_features: 10000
torch.Size([2, 10000])
torch.Size([2, 9996])
torch.Size([2, 9408])
torch.Size([2, 10000])



In [8]:
def run_allModels_allData(datasets: List[str]):
    #run experiments
    experiments = {}
    failed = {}
    for dataset_name in tqdm(datasets):
        t0 = time.time()
        # try:
        print(dataset_name)
        X_train, y_train, X_test, y_test = get_aeon_dataset(dataset_name)
        X_train, X_test = normalize_streams(X_train, X_test, max_T=1000)
        N_train = X_train.shape[0]
        N_test = X_test.shape[0]
        T = X_train.shape[1]
        D = X_train.shape[2]
        if N_train<=2000 and D<=20:
            results = run_allModels_singleDataset(
                X_train, y_train, X_test, y_test
                )
            experiments[dataset_name] = results
        # except Exception as e:
        #     print(f"Error: {e}")
        #     failed[dataset_name] = e
        print("Elapsed time", time.time()-t0)
    
    #parse results
    # Define the attributes and methods
    attributes = ["ACC_train", "ACC_test", "alpha", "time_transform", "time_fit"]
    
    # Extract model_names from d_res
    model_names = next(iter(experiments.values()))[0]

    # Create and save DataFrames for each attribute and method
    for attribute in attributes:
        df = pd.DataFrame(columns=model_names)
        for dataset_name, (model_names, results_ridge) in experiments.items():
            values = [res[attributes.index(attribute)] for res in results_ridge]
            df.loc[dataset_name] = values

        # Save the DataFrame
        print(df)
        df.to_pickle(f"MTSC_{attribute}_results.pkl")

    return experiments, failed

In [9]:
used_by_paper = [
    "EthanolConcentration",
    #"FaceDetection",
    "Handwriting",
    "Heartbeat",
    #"JapaneseVowels", #unequal length
    "PEMS-SF",
    "SelfRegulationSCP1",
    "SelfRegulationSCP2",
    #"SpokenArabicDigits", #unequal length
    "UWaveGestureLibrary",
]


# run_allModels_allData(used_by_paper)
run_allModels_allData(used_by_paper)
# run_allModels_allData(univariate_equal_length[0:10])

  0%|          | 0/7 [00:00<?, ?it/s]

EthanolConcentration
name Random Guesser
torch.Size([261, 875, 3])
feat_train_X (261, 2)

name Tabular
torch.Size([261, 875, 3])
feat_train_X (261, 2628)

name MiniRocket
torch.Size([261, 875, 3])
feat_train_X (261, 19992)

name MultiRocket
torch.Size([261, 875, 3])
feat_train_X (261, 19488)

name MyOwnMultiRocket
torch.Size([261, 875, 3])
feat_train_X (261, 19992)

name GPT2 Last
torch.Size([261, 875, 3])
feat_train_X (261, 768)

name GPT2 Pooling
torch.Size([261, 875, 3])
feat_train_X (261, 3072)

name GPT2 Last+Pooling
torch.Size([261, 875, 3])


 14%|█▍        | 1/7 [02:46<16:36, 166.16s/it]

feat_train_X (261, 3840)

Elapsed time 166.159321308136
Handwriting
name Random Guesser
torch.Size([150, 152, 3])
feat_train_X (150, 2)

name Tabular
torch.Size([150, 152, 3])
feat_train_X (150, 459)

name MiniRocket
torch.Size([150, 152, 3])
feat_train_X (150, 19992)

name MultiRocket
torch.Size([150, 152, 3])
feat_train_X (150, 19488)

name MyOwnMultiRocket
torch.Size([150, 152, 3])
feat_train_X (150, 20000)

name GPT2 Last
torch.Size([150, 152, 3])
feat_train_X (150, 768)

name GPT2 Pooling
torch.Size([150, 152, 3])
feat_train_X (150, 3072)

name GPT2 Last+Pooling
torch.Size([150, 152, 3])


 29%|██▊       | 2/7 [03:40<08:22, 100.52s/it]

feat_train_X (150, 3840)

Elapsed time 54.57391166687012
Heartbeat


 43%|████▎     | 3/7 [03:43<03:43, 55.80s/it] 

Elapsed time 2.5824546813964844
PEMS-SF


 57%|█████▋    | 4/7 [03:59<02:00, 40.02s/it]

Elapsed time 15.824694633483887
SelfRegulationSCP1
name Random Guesser
torch.Size([268, 896, 6])
feat_train_X (268, 2)

name Tabular
torch.Size([268, 896, 6])
feat_train_X (268, 5382)

name MiniRocket
torch.Size([268, 896, 6])
feat_train_X (268, 19992)

name MultiRocket
torch.Size([268, 896, 6])
feat_train_X (268, 19488)

name MyOwnMultiRocket
torch.Size([268, 896, 6])
feat_train_X (268, 19992)

name GPT2 Last
torch.Size([268, 896, 6])
feat_train_X (268, 768)

name GPT2 Pooling
torch.Size([268, 896, 6])
feat_train_X (268, 3072)

name GPT2 Last+Pooling
torch.Size([268, 896, 6])


 71%|███████▏  | 5/7 [08:03<03:47, 113.83s/it]

feat_train_X (268, 3840)

Elapsed time 244.68935132026672
SelfRegulationSCP2
name Random Guesser
torch.Size([200, 576, 7])
feat_train_X (200, 2)

name Tabular
torch.Size([200, 576, 7])
feat_train_X (200, 4039)

name MiniRocket
torch.Size([200, 576, 7])
feat_train_X (200, 19992)

name MultiRocket
torch.Size([200, 576, 7])
feat_train_X (200, 19488)

name MyOwnMultiRocket
torch.Size([200, 576, 7])
feat_train_X (200, 19992)

name GPT2 Last
torch.Size([200, 576, 7])
feat_train_X (200, 768)

name GPT2 Pooling
torch.Size([200, 576, 7])
feat_train_X (200, 3072)

name GPT2 Last+Pooling
torch.Size([200, 576, 7])


 86%|████████▌ | 6/7 [09:40<01:48, 108.16s/it]

feat_train_X (200, 3840)

Elapsed time 97.15615224838257
UWaveGestureLibrary
name Random Guesser
torch.Size([120, 315, 3])
feat_train_X (120, 2)

name Tabular
torch.Size([120, 315, 3])
feat_train_X (120, 948)

name MiniRocket
torch.Size([120, 315, 3])
feat_train_X (120, 19992)

name MultiRocket
torch.Size([120, 315, 3])
feat_train_X (120, 19488)

name MyOwnMultiRocket
torch.Size([120, 315, 3])
feat_train_X (120, 19992)

name GPT2 Last
torch.Size([120, 315, 3])
feat_train_X (120, 768)

name GPT2 Pooling
torch.Size([120, 315, 3])
feat_train_X (120, 3072)

name GPT2 Last+Pooling
torch.Size([120, 315, 3])


100%|██████████| 7/7 [10:43<00:00, 91.91s/it] 

feat_train_X (120, 3840)

Elapsed time 62.382282733917236
                      Random Guesser   Tabular  MiniRocket  MultiRocket  \
EthanolConcentration        0.249042  0.724138         1.0          1.0   
Handwriting                 0.120000  0.886667         1.0          1.0   
SelfRegulationSCP1          0.507463  0.944030         1.0          1.0   
SelfRegulationSCP2          0.570000  0.860000         1.0          1.0   
UWaveGestureLibrary         0.183333  0.933333         1.0          1.0   

                      MyOwnMultiRocket  GPT2 Last  GPT2 Pooling  \
EthanolConcentration          0.977011   0.501916      0.781609   
Handwriting                   1.000000   0.540000      1.000000   
SelfRegulationSCP1            0.985075   0.884328      0.925373   
SelfRegulationSCP2            0.995000   0.685000      0.870000   
UWaveGestureLibrary           1.000000   0.991667      1.000000   

                      GPT2 Last+Pooling  
EthanolConcentration           0.816092  
Hand




({'EthanolConcentration': (['Random Guesser',
    'Tabular',
    'MiniRocket',
    'MultiRocket',
    'MyOwnMultiRocket',
    'GPT2 Last',
    'GPT2 Pooling',
    'GPT2 Last+Pooling'],
   [(0.24904214559386972,
     0.22053231939163498,
     1000.0,
     0.0017986297607421875,
     0.004736423492431641),
    (0.7241379310344828,
     0.5817490494296578,
     33.59818286283781,
     0.009601831436157227,
     0.042554378509521484),
    (1.0, 0.4448669201520912, 1000.0, 4.598974943161011, 0.13681459426879883),
    (1.0, 0.4790874524714829, 1000.0, 10.306032657623291, 0.11631035804748535),
    (0.9770114942528736,
     0.3231939163498099,
     1000.0,
     3.0855185985565186,
     0.13683366775512695),
    (0.5019157088122606,
     0.35361216730038025,
     1000.0,
     42.756288290023804,
     0.037848472595214844),
    (0.7816091954022989,
     0.3193916349809886,
     1000.0,
     44.93659853935242,
     0.19080424308776855),
    (0.8160919540229885,
     0.3574144486692015,
     1000.

In [10]:
# Define the attributes and methods
attributes = ["ACC_train", "ACC_test", "time_transform", "time_fit", "alpha"]
#data_dir = "https://github.com/nikitazozoulenko/zephyrox/raw/main/Data/TSER/"
data_dir = ""
# Load and store the DataFrames for each attribute and method
dfs = {}
for attribute in attributes:
    filename = f"MTSC_{attribute}_results.pkl"
    print(data_dir+filename)
    df = pd.read_pickle(data_dir + filename)
    dfs[attribute] = df

MTSC_ACC_train_results.pkl
MTSC_ACC_test_results.pkl
MTSC_time_transform_results.pkl
MTSC_time_fit_results.pkl
MTSC_alpha_results.pkl


In [11]:
dfs["alpha"]

Unnamed: 0,Random Guesser,Tabular,MiniRocket,MultiRocket,MyOwnMultiRocket,GPT2 Last,GPT2 Pooling,GPT2 Last+Pooling
EthanolConcentration,1000.0,33.598183,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
Handwriting,233.572147,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
SelfRegulationSCP1,379.269019,1000.0,1000.0,1000.0,1000.0,88.586679,1000.0,615.848211
SelfRegulationSCP2,88.586679,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
UWaveGestureLibrary,1000.0,1000.0,0.1,0.1,0.1,33.598183,12.74275,0.1


In [12]:
dfs["ACC_test"]

Unnamed: 0,Random Guesser,Tabular,MiniRocket,MultiRocket,MyOwnMultiRocket,GPT2 Last,GPT2 Pooling,GPT2 Last+Pooling
EthanolConcentration,0.220532,0.581749,0.444867,0.479087,0.323194,0.353612,0.319392,0.357414
Handwriting,0.044706,0.191765,0.409412,0.397647,0.365882,0.169412,0.277647,0.281176
SelfRegulationSCP1,0.464164,0.856655,0.87372,0.938567,0.904437,0.733788,0.849829,0.843003
SelfRegulationSCP2,0.466667,0.511111,0.5,0.561111,0.511111,0.516667,0.511111,0.505556
UWaveGestureLibrary,0.140625,0.821875,0.925,0.921875,0.925,0.69375,0.825,0.85


In [13]:
dfs["ACC_train"]

Unnamed: 0,Random Guesser,Tabular,MiniRocket,MultiRocket,MyOwnMultiRocket,GPT2 Last,GPT2 Pooling,GPT2 Last+Pooling
EthanolConcentration,0.249042,0.724138,1.0,1.0,0.977011,0.501916,0.781609,0.816092
Handwriting,0.12,0.886667,1.0,1.0,1.0,0.54,1.0,1.0
SelfRegulationSCP1,0.507463,0.94403,1.0,1.0,0.985075,0.884328,0.925373,0.966418
SelfRegulationSCP2,0.57,0.86,1.0,1.0,0.995,0.685,0.87,0.905
UWaveGestureLibrary,0.183333,0.933333,1.0,1.0,1.0,0.991667,1.0,1.0


In [14]:
dfs["time_transform"]

Unnamed: 0,Random Guesser,Tabular,MiniRocket,MultiRocket,MyOwnMultiRocket,GPT2 Last,GPT2 Pooling,GPT2 Last+Pooling
EthanolConcentration,0.001799,0.009602,4.598975,10.306033,3.085519,42.756288,44.936599,58.576803
Handwriting,0.000548,0.0018,2.787918,5.789465,1.080091,12.28769,15.087915,16.222575
SelfRegulationSCP1,0.007057,0.012824,7.356523,18.371466,3.238971,59.120779,77.689981,76.872231
SelfRegulationSCP2,0.000607,0.005838,3.909942,7.606298,1.44836,24.174446,28.178466,30.093291
UWaveGestureLibrary,0.000546,0.002146,2.392194,4.643426,0.895145,14.019731,19.579292,19.806077
