In [2]:
from typing import Tuple, List, Union, Any, Optional, Dict, Literal, Callable
import time
import os
import sys
sys.path.append(os.path.dirname(os.getcwd()))
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))

import numpy as np
import aeon
import torch
from torch import Tensor
import torch.nn as nn
import torch.functional as F
import pandas as pd
from aeon.datasets.tser_datasets import tser_soton; tser_soton = sorted(list(tser_soton))
from aeon.datasets import load_regression
from aeon.regression.sklearn import RotationForestRegressor
from sklearn.metrics import root_mean_squared_error
from sklearn.linear_model import RidgeCV
from aeon.transformations.collection.convolution_based import Rocket, MultiRocket, MiniRocket
from tqdm import tqdm

from utils.utils import print_name, print_shape
# from rocket import Rocket, RocketFeatures
# from ridge_loocv import fit_ridge_LOOCV
from preprocessing.stream_transforms import normalize_mean_std_traindata, normalize_streams, augment_time, add_basepoint_zero
from random_sig_fourier import SigTensorisedRandProj
from signature import SigTransform, LogSigTransform
from features.base import TimeseriesFeatureExtractor, TabularTimeseriesFeatures, RandomGuesser
from randomized_sig import RandomizedSignature

np.set_printoptions(precision=3, threshold=5) # Print options

In [3]:
##########################################
####        ROCKET wrappers           ####
##########################################

class MultiRocketWrapper(TimeseriesFeatureExtractor):
    def __init__(
            self,
            n_features: int = 3000,
            max_batch: int = 10000,
        ):
        """
        Wrapper for the MultiRocketTransform from the aeon library.
        Original paper: https://link.springer.com/article/10.1007/s10618-022-00844-1

        Args:
            max_batch (int): Maximum batch size for computations.
            n_features (int):  Number of random features.
        """
        super().__init__(max_batch)
        self.n_features = n_features
        self.rocket = MultiRocket(max(84, n_features//8))


    def fit(self, X: Tensor): #shape (N, T, D)
        self.rocket.fit(X.cpu().numpy().transpose(0,2,1))


    def _batched_transform(self, X: Tensor) -> Tensor: # shape (N, T, D)
        X_np = X.cpu().numpy().transpose(0,2,1)
        features = self.rocket.transform(X_np)
        return torch.from_numpy(features).to(X.dtype).to(X.device)



class RocketWrapper(TimeseriesFeatureExtractor):
    def __init__(
            self,
            n_features: int = 3000,
            max_batch: int = 10000,
        ):
        super().__init__(max_batch)
        self.n_features = n_features
        self.rocket = Rocket(max(1, n_features//2))


    def fit(self, X: Tensor): #shape (N, T, D)
        self.rocket.fit(X.cpu().numpy().transpose(0,2,1))


    def _batched_transform(self, X: Tensor) -> Tensor: # shape (N, T, D)
        X_np = X.cpu().numpy().transpose(0,2,1)
        features = self.rocket.transform(X_np)
        return torch.from_numpy(features).to(X.dtype).to(X.device)
    


class MiniRocketWrapper(TimeseriesFeatureExtractor):
    def __init__(
            self,
            n_features: int = 3000,
            max_batch: int = 10000,
        ):
        super().__init__(max_batch)
        self.n_features = n_features
        self.rocket = MiniRocket(max(1, n_features))


    def fit(self, X: Tensor): #shape (N, T, D)
        self.rocket.fit(X.cpu().numpy().transpose(0,2,1))


    def _batched_transform(self, X: Tensor) -> Tensor: # shape (N, T, D)
        X_np = X.cpu().numpy().transpose(0,2,1)
        features = self.rocket.transform(X_np)
        return torch.from_numpy(features).to(X.dtype).to(X.device)

In [4]:
#############################################
#######          Dataset Code         #######
#############################################

def get_aeon_dataset(
        dataset_name:str, 
        #extract_path = "/rds/general/user/nz423/home/Data/TSER/"
        extract_path = "/home/nikita/hdd/Data/TSER/",
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        ):
    """Loads a dataset from the UCR/UEA archive using 
    the aeon library.

    Args:
        dataset_name (str): Name of the dataset

    Returns:
        Tuple: 4-tuple of the form (X_train, y_train, X_test, y_test)
    """
    X_train, y_train = load_regression(dataset_name, split="train", extract_path=extract_path)
    X_test, y_test = load_regression(dataset_name, split="test", extract_path=extract_path)
    X_train = torch.from_numpy(X_train.transpose(0,2,1)).to(device)
    X_test = torch.from_numpy(X_test.transpose(0,2,1)).to(device)
    return X_train, y_train, X_test, y_test

In [5]:
def test_get_data(idx: int = 17):
    name = sorted(list(tser_soton))[idx]
    X_train, y_train, X_test, y_test = get_aeon_dataset(name, device="cpu")
    print("Dataset:", name)
    print("idx:", idx)
    print("X_train", X_train.shape)
    print("X_test", X_test.shape)

# for i in range(20):
#     test_get_data(i)
#     print("\n")

In [6]:
##################################
####  Linear Model (Ridge)  ######
##################################

def train_and_test_linear(
        train_X, train_y, test_X, test_y,
        feat_extractor: TimeseriesFeatureExtractor,
        apply_augmentation:bool=True,
        normalize_features:bool=True,
        clf=RidgeCV(alphas=np.logspace(-3, 3, 20))
    ):
    # augment data
    print(train_X.shape)
    if apply_augmentation:
        train_X, test_X = normalize_mean_std_traindata(train_X, test_X)
        train_X = add_basepoint_zero(train_X)
        train_X = augment_time(train_X)
        test_X = add_basepoint_zero(test_X)
        test_X = augment_time(test_X)

    # fit transformer
    t0 = time.time()
    feat_extractor.fit(train_X)
    feat_train_X = feat_extractor.transform(train_X).cpu().numpy()
    feat_test_X = feat_extractor.transform(test_X).cpu().numpy()
    print("feat_train_X", feat_train_X.shape)
    if normalize_features:
        feat_train_X, feat_test_X = normalize_mean_std_traindata(feat_train_X, feat_test_X)


    # feed into linear classifier
    t1 = time.time()
    clf.fit(feat_train_X, train_y)
    t2 = time.time()

    # predict
    pred = clf.predict(feat_test_X)
    test_rmse = root_mean_squared_error(test_y, pred)
    train_rmse = root_mean_squared_error(train_y, clf.predict(feat_train_X))
    alpha = clf.alpha_ if hasattr(clf, 'alpha_') else None
    return train_rmse, test_rmse, alpha, t1-t0, t2-t1

In [7]:
def run_allModels_singleDataset(X_train, y_train, X_test, y_test):
    max_batch = 32
    trunc_level = 4
    n_features = 500

    models = [
        ["Random Guesser", RandomGuesser()],
        ["Tabular", TabularTimeseriesFeatures()],
        # ["Sig", SigTransform(trunc_level, max_batch)],
        # ["Log Sig", LogSigTransform(trunc_level, max_batch)],
        ["Randomized Signature", RandomizedSignature(
            n_features,
            activation = "tanh",
            max_batch=10,
            )],
        ["TRP", SigTensorisedRandProj(
            trunc_level,
            n_features,
            only_last=True,
            method="linear",
            max_batch=max_batch,
            )],
        ["TRP rbf", SigTensorisedRandProj(
            trunc_level,
            n_features,
            only_last=True,
            method="RBF",
            sigma_rbf=1.0,
            max_batch=max_batch,
            )],
        ["concat TRP", SigTensorisedRandProj(
            trunc_level,
            n_features // (trunc_level-1),
            only_last=False,
            method="linear",
            max_batch=max_batch,
            )],
        ["concat TRP rbf", SigTensorisedRandProj(
            trunc_level,
            n_features // (trunc_level-1),
            only_last=False,
            method="RBF",
            sigma_rbf=1.0,
            max_batch=max_batch,
            )],
        ["Rocket", RocketWrapper(
            n_features
            )],
        ["MiniRocket", MiniRocketWrapper(
            n_features
            )],
        ["MultiRocket", MultiRocketWrapper(
            n_features
            )],
        ]

    # Run experiments
    model_names = [name for (name, _) in models]
    results_ridge = []
    for name, model in models:
        print("name", name)
        result = train_and_test_linear(
            X_train, y_train, X_test, y_test, model
            )
        results_ridge.append(result)
        print()
    
    return model_names, results_ridge

In [8]:
def run_dataset(dataset_name:str):
    X_train, y_train, X_test, y_test = get_aeon_dataset(dataset_name)
    X_train, X_test = normalize_streams(X_train, X_test, max_T=1000)
    y_train, y_test = normalize_mean_std_traindata(y_train, y_test)
    model_names, results_ridge = run_allModels_singleDataset(X_train, y_train, X_test, y_test)
    return model_names, results_ridge

model_names, results_ridge = run_dataset(tser_soton[17])

name Random Guesser
torch.Size([321, 168, 4])
feat_train_X (321, 2)

name Tabular
torch.Size([321, 168, 4])
feat_train_X (321, 845)

name Randomized Signature
torch.Size([321, 168, 4])
feat_train_X (321, 500)

name TRP
torch.Size([321, 168, 4])
feat_train_X (321, 500)

name TRP rbf
torch.Size([321, 168, 4])
feat_train_X (321, 500)

name concat TRP
torch.Size([321, 168, 4])
feat_train_X (321, 498)

name concat TRP rbf
torch.Size([321, 168, 4])
feat_train_X (321, 498)

name Rocket
torch.Size([321, 168, 4])
feat_train_X (321, 500)

name MiniRocket
torch.Size([321, 168, 4])
feat_train_X (321, 420)

name MultiRocket
torch.Size([321, 168, 4])
feat_train_X (321, 672)



In [16]:
def run_allModels_allData(datasets: List[str]):
    #run experiments
    experiments = {}
    failed = {}
    for dataset_name in tqdm(datasets):
        t0 = time.time()
        try:
            print(dataset_name)
            X_train, y_train, X_test, y_test = get_aeon_dataset(dataset_name)
            X_train, X_test = normalize_streams(X_train, X_test, max_T=1000)
            y_train, y_test = normalize_mean_std_traindata(y_train, y_test)
            N_train = X_train.shape[0]
            N_test = X_test.shape[0]
            T = X_train.shape[1]
            D = X_train.shape[2]
            if N_train<=2000 and D<=20:
                results = run_allModels_singleDataset(
                    X_train, y_train, X_test, y_test
                    )
                experiments[dataset_name] = results
        except Exception as e:
            print(f"Error: {e}")
            failed[dataset_name] = e
        print("Elapsed time", time.time()-t0)
    
    #parse results
    # Define the attributes and methods
    attributes = ["RMSE_train", "RMSE_test", "alpha", "time_transform", "time_fit"]
    
    # Extract model_names from d_res
    model_names = next(iter(experiments.values()))[0]

    # Create and save DataFrames for each attribute and method
    for attribute in attributes:
        df = pd.DataFrame(columns=model_names)
        for dataset_name, (model_names, results_ridge) in experiments.items():
            values = [res[attributes.index(attribute)] for res in results_ridge]
            df.loc[dataset_name] = values

        # Save the DataFrame
        print(df)
        df.to_pickle(f"TESR_{attribute}_results.pkl")

    return experiments, failed

In [17]:
run_allModels_allData(tser_soton[17:19])

  0%|          | 0/2 [00:00<?, ?it/s]

ChilledWaterPredictor
name Random Guesser
torch.Size([321, 168, 4])
feat_train_X (321, 2)

name Tabular
torch.Size([321, 168, 4])
feat_train_X (321, 845)

name Randomized Signature
torch.Size([321, 168, 4])
feat_train_X (321, 500)

name TRP
torch.Size([321, 168, 4])
feat_train_X (321, 500)

name TRP rbf
torch.Size([321, 168, 4])
feat_train_X (321, 500)

name concat TRP
torch.Size([321, 168, 4])
feat_train_X (321, 498)

name concat TRP rbf
torch.Size([321, 168, 4])
feat_train_X (321, 498)

name Rocket
torch.Size([321, 168, 4])
feat_train_X (321, 500)

name MiniRocket
torch.Size([321, 168, 4])
feat_train_X (321, 420)

name MultiRocket
torch.Size([321, 168, 4])


 50%|█████     | 1/2 [00:07<00:07,  7.02s/it]

feat_train_X (321, 672)

Elapsed time 7.014836549758911
CopperConcentration
name Random Guesser
torch.Size([440, 847, 1])
feat_train_X (440, 2)

name Tabular
torch.Size([440, 847, 1])
feat_train_X (440, 1696)

name Randomized Signature
torch.Size([440, 847, 1])
feat_train_X (440, 500)

name TRP
torch.Size([440, 847, 1])
feat_train_X (440, 500)

name TRP rbf
torch.Size([440, 847, 1])
feat_train_X (440, 500)

name concat TRP
torch.Size([440, 847, 1])
feat_train_X (440, 498)

name concat TRP rbf
torch.Size([440, 847, 1])
feat_train_X (440, 498)

name Rocket
torch.Size([440, 847, 1])
feat_train_X (440, 500)

name MiniRocket
torch.Size([440, 847, 1])
feat_train_X (440, 420)

name MultiRocket
torch.Size([440, 847, 1])


100%|██████████| 2/2 [00:32<00:00, 16.16s/it]

feat_train_X (440, 672)

Elapsed time 25.295840978622437
                       Random Guesser   Tabular  Randomized Signature  \
ChilledWaterPredictor        0.999070  0.825708              0.912168   
CopperConcentration          0.999861  0.623243              0.779510   

                            TRP  TRP rbf  concat TRP  concat TRP rbf  \
ChilledWaterPredictor  0.935015  0.70114    0.915992        0.712540   
CopperConcentration    0.876810  0.79557    0.896071        0.800917   

                         Rocket  MiniRocket  MultiRocket  
ChilledWaterPredictor  0.816810    0.868008     0.788814  
CopperConcentration    0.690187    0.682630     0.781351  
                       Random Guesser   Tabular  Randomized Signature  \
ChilledWaterPredictor        0.212780  0.321760              0.353864   
CopperConcentration          1.064394  0.880425              0.953231   

                            TRP   TRP rbf  concat TRP  concat TRP rbf  \
ChilledWaterPredictor  0.290734  0.5




({'ChilledWaterPredictor': (['Random Guesser',
    'Tabular',
    'Randomized Signature',
    'TRP',
    'TRP rbf',
    'concat TRP',
    'concat TRP rbf',
    'Rocket',
    'MiniRocket',
    'MultiRocket'],
   [(0.9990702110539251,
     0.2127800722053151,
     1000.0,
     0.014102697372436523,
     0.07094359397888184),
    (0.8257080392088081,
     0.3217600907493544,
     1000.0,
     0.035019636154174805,
     0.10147595405578613),
    (0.9121681795766039,
     0.35386428334120273,
     483.2930238571752,
     1.7384676933288574,
     0.05115199089050293),
    (0.9350151602097505,
     0.2907342082106951,
     1000.0,
     0.12572264671325684,
     0.10596251487731934),
    (0.7011404770910523,
     0.5608342923740216,
     1000.0,
     1.8493633270263672,
     0.0535886287689209),
    (0.9159920949054428,
     0.3305764272877676,
     1000.0,
     0.06533384323120117,
     0.04683971405029297),
    (0.7125401742309685,
     0.5188819649765589,
     1000.0,
     0.292291164398193

In [18]:
# Define the attributes and methods
attributes = ["RMSE_train", "RMSE_test", "time_transform", "time_fit", "alpha"]
#data_dir = "https://github.com/nikitazozoulenko/zephyrox/raw/main/Data/TSER/"
data_dir = ""
# Load and store the DataFrames for each attribute and method
dfs = {}
for attribute in attributes:
    filename = f"TESR_{attribute}_results.pkl"
    print(data_dir+filename)
    df = pd.read_pickle(data_dir + filename)
    dfs[attribute] = df

TESR_RMSE_train_results.pkl
TESR_RMSE_test_results.pkl
TESR_time_transform_results.pkl
TESR_time_fit_results.pkl
TESR_alpha_results.pkl


In [23]:
dfs["RMSE_test"]

Unnamed: 0,Random Guesser,Tabular,Randomized Signature,TRP,TRP rbf,concat TRP,concat TRP rbf,Rocket,MiniRocket,MultiRocket
ChilledWaterPredictor,0.21278,0.32176,0.353864,0.290734,0.560834,0.330576,0.518882,0.279269,0.284953,0.327401
CopperConcentration,1.064394,0.880425,0.953231,0.943294,0.92993,0.959601,0.923901,0.829749,0.822336,0.913779
