In [8]:
from typing import Tuple, List, Union, Any, Optional, Dict, Literal, Callable
import time
import os
import sys
sys.path.append(os.path.dirname(os.getcwd()))
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))

import numpy as np
import aeon
import torch
from torch import Tensor
import torch.nn as nn
import torch.functional as F
import pandas as pd
from aeon.datasets.tser_datasets import tser_soton
from aeon.datasets import load_regression
from aeon.regression.sklearn import RotationForestRegressor
from sklearn.metrics import root_mean_squared_error
from sklearn.linear_model import RidgeCV
from aeon.transformations.collection.convolution_based import Rocket, MultiRocketMultivariate, MiniRocketMultivariate

from utils.utils import print_name, print_shape
# from rocket import Rocket, RocketFeatures
# from ridge_loocv import fit_ridge_LOOCV
from preprocessing.stream_transforms import normalize_mean_std_traindata, normalize_streams, augment_time, add_basepoint_zero
from random_sig_fourier import SigTensorisedRandProj
from signature import SigTransform, LogSigTransform
from features.base import TimeseriesFeatureExtractor, TabularTimeseriesFeatures, RandomGuesser
from randomized_sig import RandomizedSignature

np.set_printoptions(precision=3, threshold=5) # Print options

In [9]:
##########################################
####        ROCKET wrappers           ####
##########################################

class MultiRocketWrapper():
    def __init__(
            self,
            max_batch: int = 10000,
            n_features: int = 3000,
        ):
        """
        Wrapper for the MultiRocketTransform from the aeon library.
        Original paper: https://link.springer.com/article/10.1007/s10618-022-00844-1

        Args:
            max_batch (int): Maximum batch size for computations.
            n_features (int):  Number of random features.
        """
        self.max_batch = max_batch
        self.n_features = n_features
        self.rocket = MultiRocketMultivariate(max(1, n_features//4))


    def fit(self, X: Tensor): #shape (N, T, D)
        self.rocket.fit(np.array(X).transpose(0,2,1))


    def _batched_transform(self, X: Tensor) -> Tensor:
        """MultiRocket features.

        Args:
            X (Tensor): Shape (N, T, D)

        Returns:
            Tensor: Transformed tensor of shape (N, n_features)
        """
        X_np = X.cpu().numpy().transpose(0,2,1)
        features = self.rocket.transform(X_np)
        return torch.from_numpy(features, dtype=X.dtype, device=X.device)

In [10]:
#############################################
#######          Dataset Code         #######
#############################################

def get_aeon_dataset(
        dataset_name:str, 
        #extract_path = "/rds/general/user/nz423/home/Data/TSER/"
        extract_path = "/home/nikita/hdd/Data/TSER/"
        ):
    """Loads a dataset from the UCR/UEA archive using 
    the aeon library.

    Args:
        dataset_name (str): Name of the dataset

    Returns:
        Tuple: 4-tuple of the form (X_train, y_train, X_test, y_test)
    """
    X_train, y_train = load_regression(dataset_name, split="train", extract_path=extract_path)
    X_test, y_test = load_regression(dataset_name, split="test", extract_path=extract_path)
    X_train = torch.from_numpy(X_train.transpose(0,2,1))
    X_test = torch.from_numpy(X_test.transpose(0,2,1))
    return X_train, y_train, X_test, y_test

In [11]:
##################################
####  Linear Model (Ridge)  ######
##################################

def train_and_test_linear(
        train_X, train_y, test_X, test_y,
        feat_extractor: TimeseriesFeatureExtractor,
        apply_augmentation:bool=True,
        normalize_features:bool=True,
        clf=RidgeCV(alphas=np.logspace(-3, 3, 20))
    ):
    # augment data
    print(train_X.shape)
    if apply_augmentation:
        train_X, test_X = normalize_mean_std_traindata(train_X, test_X)
        train_X = add_basepoint_zero(train_X)
        train_X = augment_time(train_X)
        test_X = add_basepoint_zero(test_X)
        test_X = augment_time(test_X)

    # fit transformer
    t0 = time.time()
    feat_extractor.fit(train_X)
    feat_train_X = np.array(feat_extractor.transform(train_X))
    feat_test_X = np.array(feat_extractor.transform(test_X))
    if normalize_features:
        feat_train_X, feat_test_X = normalize_mean_std_traindata(feat_train_X, feat_test_X)


    # feed into linear classifier
    t1 = time.time()
    clf.fit(feat_train_X, train_y)
    t2 = time.time()

    # predict
    pred = clf.predict(feat_test_X)
    test_rmse = root_mean_squared_error(test_y, pred)
    train_rmse = root_mean_squared_error(train_y, clf.predict(feat_train_X))
    alpha = clf.alpha_ if hasattr(clf, 'alpha_') else None
    return train_rmse, test_rmse, alpha, t1-t0, t2-t1

In [18]:
def run_allModels_singleDataset(X_train, y_train, X_test, y_test):
    max_batch = 32
    trunc_level = 4
    n_features = 1000

    models = [
        ["Random Guesser", RandomGuesser()],
        ["Tabular", TabularTimeseriesFeatures()],
        # ["Sig", SigTransform(trunc_level, max_batch)],
        # ["Log Sig", LogSigTransform(trunc_level, max_batch)],
        ["Randomized Signature", RandomizedSignature(
            n_features,
            activation = "tanh",
            max_batch=10,
            )],
        ["TRP", SigTensorisedRandProj(
            trunc_level,
            n_features,
            only_last=True,
            method="linear",
            max_batch=max_batch,
            )],
        ["TRP rbf", SigTensorisedRandProj(
            trunc_level,
            n_features,
            only_last=True,
            method="RBF",
            sigma_rbf=1.0,
            max_batch=max_batch,
            )],
        ["concat TRP", SigTensorisedRandProj(
            trunc_level,
            n_features,
            only_last=False,
            method="linear",
            max_batch=max_batch,
            )],
        ["concat TRP rbf", SigTensorisedRandProj(
            trunc_level,
            n_features,
            only_last=False,
            method="RBF",
            sigma_rbf=1.0,
            max_batch=max_batch,
            )],
        ["MultiRocket", SigTensorisedRandProj(
            trunc_level,
            n_features,
            only_last=False,
            method="RBF",
            sigma_rbf=1.0,
            max_batch=max_batch,
            )],
        ]
    
    # rocket_models = [
    #     ["Rocket", Rocket(n_features//2, random_state=numpy_seed)],
    #     ["MiniRocket", MiniRocketMultivariate(n_features, random_state=numpy_seed)],
    #     ["MultiRocket", MultiRocketMultivariate(n_features//4, random_state=numpy_seed)],
    #     ]
    
    # Run experiments
    model_names = [name for (name, _) in models]
    results_ridge = []
    for name, model in models:
        result = train_and_test_linear(
            X_train, y_train, X_test, y_test, model
            )
        results_ridge.append(result)
    
    return model_names, results_ridge

In [13]:
def run_dataset(dataset_name:str):
    X_train, y_train, X_test, y_test = get_aeon_dataset(dataset_name)
    model_names, results_ridge = run_allModels_singleDataset(X_train, y_train, X_test, y_test)
    return model_names, results_ridge

In [19]:
tser_soton = sorted(list(tser_soton))
run_dataset(tser_soton[8])

torch.Size([1099, 24, 6])
torch.Size([1099, 24, 6])
torch.Size([1099, 24, 6])


KeyboardInterrupt: 

In [None]:


# def do_experiments(datasets: List[str]):
#     experiments = {}
#     experiments_metadata = {}
#     failed = {}
#     for dataset_name in tqdm(datasets):
#         t0 = time.time()
#         try:
#             print(dataset_name)
#             X_train, y_train, X_test, y_test = get_aeon_dataset(dataset_name)
#             X_train, X_test = normalize_streams(X_train, X_test, max_T=1000)
#             y_train, y_test = normalize_mean_std_traindata(y_train, y_test)
#             N_train = X_train.shape[0]
#             N_test = X_test.shape[0]
#             T = X_train.shape[1]
#             D = X_train.shape[2]
#             if N_train > 2000 or D > 20:
#                 continue
#             results = run_all_experiments(
#                 X_train, y_train, X_test, y_test
#                 )
#             experiments_metadata[dataset_name] = {
#                 "N_train": N_train,
#                 "N_test": N_test,
#                 "T": T,
#                 "D": D,
#             }
#             experiments[dataset_name] = results
#         except Exception as e:
#             print(f"Error: {e}")
#             failed[dataset_name] = e
#         print("Elapsed time", time.time()-t0)
#     return experiments, experiments_metadata, failed


# if __name__ == "__main__":
#     d_res, d_meta, d_failed = do_experiments(list(tser_soton))
    
#     # Define the attributes and methods
#     attributes = ["RMSE_train", "RMSE_test", "time_transform", "time_fit", "alpha"]
#     methods = ["ridge", "rotforest"]
    
#     # Extract model_names from d_res
#     model_names = next(iter(d_res.values()))[0]

#     # Create and save DataFrames for each attribute and method
#     for attribute in attributes:
#         for method in methods:
#             df = pd.DataFrame(columns=model_names)
#             for dataset_name, (model_names, results_ridge, results_rotforest) in d_res.items():
#                 if method == "ridge":
#                     results = results_ridge
#                 elif method == "rotforest":
#                     results = results_rotforest

#                 values = [res[attributes.index(attribute)] for res in results]
#                 df.loc[dataset_name] = values

#             # Save the DataFrame
#             print(df)
#             df.to_pickle(f"TESR_{attribute}_{method}_results.pkl")