In [2]:
from typing import Tuple, List, Union, Any, Optional, Dict, Literal, Callable
import os
import sys
sys.path.append(os.path.dirname(os.getcwd()))
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))

import numpy as np
import jax
import jax.numpy as jnp
import jax.lax as lax
from jaxtyping import Array, Float, Int, PRNGKeyArray
import aeon

from features.sig_trp import SigVanillaTensorizedRandProj, SigRBFTensorizedRandProj
from features.sig import SigTransform, LogSigTransform
from features.base import TimeseriesFeatureTransformer, TabularTimeseriesFeatures, RandomGuesser
from features.sig_neural import RandomizedSignature
from utils.utils import print_name, print_shape

jax.config.update('jax_platform_name', 'gpu') # Used to set the platform (cpu, gpu, etc.)
np.set_printoptions(precision=3, threshold=5) # Print options

  from .autonotebook import tqdm as notebook_tqdm


# aoen toolkit

In [None]:
# Print the different datasets
from aeon.datasets.tsc_datasets import multivariate, univariate, univariate_equal_length
from aeon.datasets import load_classification

def get_aeon_dataset(
        dataset_name:str, 
        extract_path = "/home/nikita/hdd/Data/TSC/"
        ):
    """Loads a dataset from the UCR/UEA archive using 
    the aeon library.

    Args:
        dataset_name (str): Name of the dataset

    Returns:
        Tuple: 4-tuple of the form (X_train, y_train, X_test, y_test)
    """
    X_train, y_train = load_classification(dataset_name, split="train", extract_path=extract_path)
    X_test, y_test = load_classification(dataset_name, split="test", extract_path=extract_path)

    return X_train.transpose(0,2,1), y_train, X_test.transpose(0,2,1), y_test

univariate

In [None]:
#from aeon.transformations.collection.convolution_based import MiniRocketMultivariate
from preprocessing.timeseries_augmentation import normalize_mean_std_traindata, normalize_streams, augment_time, add_basepoint_zero
from aeon.classification.sklearn import RotationForestClassifier
#from sklearn.linear_model import RidgeCV
from sklearn.metrics import accuracy_score
import time

def train_and_test(
        dataset:str,
        transformer:TimeseriesFeatureTransformer,
        apply_augmentation:bool=True,
    ):
    train_X, train_y, test_X, test_y = get_aeon_dataset(dataset)
    train_X, test_X = normalize_streams(train_X, test_X, max_T=1000)
    print_name(train_X)
    print_name(test_X)
    # augment data
    train_X = lax.stop_gradient(jnp.array(train_X))
    test_X  = lax.stop_gradient(jnp.array(test_X))
    if apply_augmentation:
        train_X = add_basepoint_zero(train_X)
        train_X = augment_time(train_X)
        test_X  = add_basepoint_zero(test_X)
        test_X  = augment_time(test_X)

    # fit transformer
    t0 = time.time()
    transformer.fit(train_X)
    train_X = np.array(transformer.transform(train_X))
    test_X = np.array(transformer.transform(test_X))
    train_X, test_X = normalize_mean_std_traindata(train_X, test_X)
    t1 = time.time()
    print_name(train_X)
    print_name(test_X)
    print(f"Time to transform: {t1-t0} seconds")

    # train classifier      
    clf = RotationForestClassifier()
    clf.fit(train_X, train_y)
    t2 = time.time()
    print(f"Time to fit classifier on train: {t2-t1} seconds")

    # predict
    pred = clf.predict(test_X)
    acc = accuracy_score(test_y, pred)
    t3 = time.time()
    print(f"Time to predict: {t3-t2} seconds")
    print(f"{acc} accuracy for {transformer}")

In [None]:
train_and_test(
    dataset="Adiac",
    transformer=SigVanillaTensorizedRandProj(
        jax.random.PRNGKey(999),
        n_features= 128,
        trunc_level=5,
        max_batch=2000,
        )
    )
# 0.6086956521739131 accuracy for SigVanillaTensorizedRandProj(max_batch=10000, n_features=256,
#                              seed=Array([  0, 999], dtype=uint32),
#                              trunc_level=5)

In [None]:
train_and_test(
    dataset="Adiac",
    transformer=SigRBFTensorizedRandProj(
        jax.random.PRNGKey(999),
        n_features= 256,
        trunc_level=3,
        rbf_dimension=1000,
        max_batch=10000,
        )
    )
# 0.6445012787723785 accuracy for SigRBFTensorizedRandProj(max_batch=10000, n_features=256, rbf_dimension=1000,
#                          rff_max_batch=10000,
#                          rff_seed=Array([4116651765, 1982142802], dtype=uint32),
#                          trp_seed=Array([3655788082, 2541180754], dtype=uint32))

In [None]:
train_and_test(
    dataset="Adiac",
    transformer=TabularTimeseriesFeatures(),
    apply_augmentation=False
    )
# 0.7902813299232737 accuracy for TabularTimeseriesFeatures()

In [None]:
train_and_test(
    dataset="Adiac",
    transformer=SigTransform(trunc_level=5),
    ) 
# 0.5498721227621484 accuracy for SigTransform(trunc_level=5)

In [None]:
train_and_test(
    dataset="Adiac",
    transformer=LogSigTransform(trunc_level=5),
    ) 
# 0.4884910485933504 accuracy for LogSigTransform(trunc_level=5)


In [None]:
train_and_test(
    dataset="Adiac",
    transformer=RandomizedSignature(
        jax.random.PRNGKey(999),
        n_features= 128,
        max_batch=10000,
        )
    )
# 0.27365728900255754 accuracy for RandomizedSignature(max_batch=10000, n_features=128,
#                     seed=Array([  0, 999], dtype=uint32))

In [None]:
train_and_test(
    dataset="Adiac",
    transformer=RandomGuesser(
        jax.random.PRNGKey(999),
        n_features= 64,
        )
    )
# 0.03324808184143223 accuracy for RandomGuesser(n_features=64,
#                     seed=Array([1508125853,  174035561], dtype=uint32))

# Ridge

In [None]:
from sklearn.linear_model import RidgeClassifierCV

def train_and_test_ridge(
        dataset:str,
        transformer:TimeseriesFeatureTransformer,
        apply_augmentation:bool=True,
    ):
    train_X, train_y, test_X, test_y = get_aeon_dataset(dataset)
    train_X, test_X = normalize_streams(train_X, test_X, max_T=1000)
    print_shape(train_X)
    print_shape(test_X)
    # augment data
    train_X = lax.stop_gradient(jnp.array(train_X))
    test_X  = lax.stop_gradient(jnp.array(test_X))
    if apply_augmentation:
        train_X = add_basepoint_zero(train_X)
        train_X = augment_time(train_X)
        test_X  = add_basepoint_zero(test_X)
        test_X  = augment_time(test_X)

    # fit transformer
    t0 = time.time()
    transformer.fit(train_X)
    feat_train_X = np.array(transformer.transform(train_X))
    feat_test_X = np.array(transformer.transform(test_X))
    print("Before normalization:")
    print_name(feat_test_X)
    feat_train_X, feat_test_X = normalize_mean_std_traindata(feat_train_X, feat_test_X)
    t1 = time.time()
    print("After normalization:")
    print_name(feat_test_X)
    print(f"Time to transform: {t1-t0} seconds")

    # train classifier      
    clf = RidgeClassifierCV(alphas=np.logspace(-5, -2, 50))
    clf.fit(feat_train_X, train_y)
    t2 = time.time()
    print(f"Chosen alpha: {clf.alpha_}")
    print(f"Time to fit classifier on train: {t2-t1} seconds")

    # predict
    pred = clf.predict(feat_test_X)
    print(pred)
    acc = accuracy_score(test_y, pred)
    t3 = time.time()
    print(f"Time to predict: {t3-t2} seconds")
    print(f"{acc} accuracy for {transformer}")

In [None]:
train_and_test_ridge(
    dataset="MiddlePhalanxTW",
    transformer=SigVanillaTensorizedRandProj(
        jax.random.PRNGKey(999),
        n_features= 1000,
        trunc_level=5,
        max_batch=10,
        )
    )
# 0.5 accuracy for SigVanillaTensorizedRandProj(max_batch=20, n_features=1000,
#                              seed=Array([  0, 999], dtype=uint32),
#                              trunc_level=5)

In [None]:
train_and_test_ridge(
    dataset="MiddlePhalanxTW",
    transformer=SigRBFTensorizedRandProj(
        jax.random.PRNGKey(999),
        n_features= 1000,
        trunc_level=5,
        rbf_dimension=1000,
        max_batch=10,
        rff_max_batch=10000,
        )
    )
# 0.5333333333333333 accuracy for SigRBFTensorizedRandProj(max_batch=10, n_features=1000, rbf_dimension=1000,
#                          rff_max_batch=10000,
#                          seed=Array([  0, 999], dtype=uint32), trunc_level=5)

In [None]:
train_and_test_ridge(
    dataset="MiddlePhalanxTW",
    transformer=TabularTimeseriesFeatures(),
    apply_augmentation=False
    )
# 0.8666666666666667 accuracy for TabularTimeseriesFeatures()

In [None]:
train_and_test_ridge(
    dataset="MiddlePhalanxTW",
    transformer=SigTransform(trunc_level=5),
    )
# 0.6 accuracy for SigTransform(trunc_level=5)

In [None]:
train_and_test_ridge(
    dataset="MiddlePhalanxTW",
    transformer=LogSigTransform(trunc_level=5),
    )
# 0.5 accuracy for LogSigTransform(trunc_level=5)

In [None]:
train_and_test_ridge(
    dataset="MiddlePhalanxTW",
    transformer=RandomizedSignature(
        jax.random.PRNGKey(999),
        n_features= 1000,
        max_batch=10,
        )
    )
# 0.4 accuracy for RandomizedSignature(max_batch=10, n_features=1000,
#                     seed=Array([  0, 999], dtype=uint32))

In [None]:
train_and_test_ridge(
    dataset="MiddlePhalanxTW",
    transformer=RandomGuesser()
    )
# 0.43333333333333335 accuracy for RandomGuesser(seed=Array([2384771982, 3928867769], dtype=uint32))

In [None]:
from sklearn.linear_model import RidgeClassifierCV
from aeon.transformations.collection.convolution_based import Rocket, MultiRocket, MiniRocket

def train_and_test_ridge_ROCKETS(
        dataset:str,
        transformer,
        apply_augmentation:bool=True,
    ):
    train_X, train_y, test_X, test_y = get_aeon_dataset(dataset)

    print_shape(train_X)
    print_shape(test_X)
    # augment data
    train_X = train_X.transpose(0,2,1)
    test_X  = test_X.transpose(0,2,1)
    if apply_augmentation:
        train_X, test_X = normalize_streams(train_X, test_X, max_T=1000)
        train_X = np.array(add_basepoint_zero(train_X))
        test_X  = np.array(add_basepoint_zero(test_X))

    # fit transformer
    t0 = time.time()
    transformer.fit(train_X)
    feat_train_X = np.array(transformer.transform(train_X))
    feat_test_X = np.array(transformer.transform(test_X))
    print("Before normalization:")
    print_name(feat_test_X)
    feat_train_X, feat_test_X = normalize_mean_std_traindata(feat_train_X, feat_test_X)
    t1 = time.time()
    print("After normalization:")
    print_name(feat_test_X)
    print(f"Time to transform: {t1-t0} seconds")

    # train classifier      
    clf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 100))
    clf.fit(feat_train_X, train_y)
    t2 = time.time()
    print(f"Chosen alpha: {clf.alpha_}")
    print(f"Time to fit classifier on train: {t2-t1} seconds")

    # predict
    pred = clf.predict(feat_test_X)
    print(pred)
    acc = accuracy_score(test_y, pred)
    t3 = time.time()
    print(f"Time to predict: {t3-t2} seconds")
    print(f"{acc} accuracy for {transformer}")

In [None]:
train_and_test_ridge_ROCKETS(
    dataset="MiddlePhalanxTW",
    transformer=Rocket(num_kernels=5000),
    apply_augmentation=True
    )
# 0.8 accuracy for Rocket(num_kernels=500)

In [None]:
train_and_test_ridge_ROCKETS(
    dataset="MiddlePhalanxTW",
    transformer=MiniRocket(num_kernels=10000),
    apply_augmentation=False
    )
# 0.8333333333333334 accuracy for MiniRocket(num_kernels=1000)

In [None]:
train_and_test_ridge_ROCKETS(
    dataset="MiddlePhalanxTW",
    transformer=MultiRocket(num_kernels=1300),
    apply_augmentation=False
    )
# 0.8 accuracy for MultiRocket(num_kernels=500)