In [1]:
import numpy as np
from typing import List, Optional, Dict, Set, Callable, Any, Literal
from joblib import Memory, Parallel, delayed
import tslearn
import tslearn.metrics
from tslearn.datasets import UCR_UEA_datasets
import torch
from torch import Tensor

from experiments.cross_validation import cv_tslearn, print_cv_results
from experiments.eval_on_test import validate_tslearn, print_test_results
from experiments.utils import join_dicts_from_pickle_paths, save_to_pickle, print_latex_results
from experiments.experiment_code import run_all_kernels

# Cross Validation on Train

In [None]:
cv_results = cv_tslearn(
    dataset_names = [
        # 'Epilepsy',                    # N_corpus = 34
        'EthanolConcentration',        # N_corpus = 65
        # 'FingerMovements',             # N_corpus = 158
        # 'HandMovementDirection',       # N_corpus = 40
        # 'Heartbeat',                   # N_corpus = 102
        # 'LSST',                        # N_corpus = 176
        # 'MotorImagery',                # N_corpus = 139
        'NATOPS',                      # N_corpus = 30
        # 'PenDigits',                   # N_corpus = 749
        # 'PEMS-SF',                     # N_corpus = 38
        # 'PhonemeSpectra',              # N_corpus = 85
        # 'RacketSports',                # N_corpus = 38
        # 'SelfRegulationSCP1',          # N_corpus = 134
        ],
    kernel_names = [
            # "flat linear",
            "flat rbf",
            "flat poly",

            # "integral rbf",
            # "integral poly",

            # "trunc sig linear",
            # "trunc sig rbf",

            # "pde sig rbf",

            # "gak", #normalized only

            # "reservoir",
        ],
        k_folds=3,
        n_repeats=1,
        verbose=False,
        )

In [None]:
print_cv_results(cv_results)
save_to_pickle(cv_results, "Data/cv_dummy.pkl")

# Validate on Test

In [None]:
test_results = validate_tslearn(cv_results, verbose=False)

In [None]:
print_test_results(test_results)

# Read CV data from file and print results

In [2]:
# Load the cross validation results
cv_results = join_dicts_from_pickle_paths(
    [
    # "Data/cv_Epilepsy.pkl",
    # "Data/cv_EthanolConcentration.pkl",
    # "Data/cv_FingerMovements.pkl",
    # "Data/cv_HandMovementDirection.pkl",
    # "Data/cv_Heartbeat.pkl",
    # "Data/cv_LSST.pkl",
    # "Data/cv_MotorImagery.pkl",
    # "Data/cv_NATOPS.pkl",
    # "Data/cv_PEMS-SF.pkl",
    # "Data/cv_PhonemeSpectra.pkl",
    "Data/cv_RacketSports.pkl",
    # "Data/cv_SelfRegulationSCP1.pkl",
    ])
print_cv_results(cv_results)

Cross Validation Results
Number of Classes: 4
Dimension of path: 6
Length: 30
Train: 151
Test: N/A

conf_results

flat linear
final_score_avgs 1.6121056885807024
alphas_score_avgs [1.542 1.542 1.543 1.612]
thresh_score_avgs [1.14  1.14  1.14  1.14  1.14  1.14  1.14  1.14  1.197 1.45  1.519 1.508
 1.56  1.565 1.572 1.572 1.572 1.572 1.572 1.572]
Badminton_Clear
{'basepoint': '', 'time': '', 'normalize': True, 'threshold': 0.02144839925588929, 'alpha': 0.01, 'CV_time': 455.3627350330353}
Badminton_Smash
{'basepoint': '', 'time': '', 'normalize': True, 'threshold': 0.00545879987887382, 'alpha': 0.01, 'CV_time': 455.3627350330353}
Squash_BackhandBoast
{'basepoint': '', 'time': '', 'normalize': True, 'threshold': 0.02144839925588929, 'alpha': 0.01, 'CV_time': 455.3627350330353}
Squash_ForehandBoast
{'basepoint': '', 'time': 'time_enhance', 'normalize': True, 'threshold': 0.04251515251442982, 'alpha': 0.01, 'CV_time': 455.3627350330353}

flat rbf
final_score_avgs 1.573445070558368
alphas_sco

In [None]:
test_results = join_dicts_from_pickle_paths([
                                "Data/results_shorts.pkl",
                                "Data/results_longs.pkl",
                                             ])

test_results = {d:k for d,k in test_results.items() 
                # if d in ["EthanolConcentration",  #datasets with corpus size > 50
                #         "FingerMovements",
                #         "Heartbeat",
                #         "LSST",
                #         "MotorImagery",
                #         "PenDigits",
                #         "PhonemeSpectra",
                #         "SelfRegulationSCP1",]
                }

In [None]:
print_latex_results(test_results, round_digits=2)
print_latex_results(test_results, round_digits=3)

## Enumerate all UCR UEA datasets in 'tslearn'

In [None]:
UCR_UEA_datasets = UCR_UEA_datasets()

for dataset_name in UCR_UEA_datasets.list_multivariate_datasets():
#for dataset_name in _datasets:
    print("Dataset:", dataset_name)
    dataset = UCR_UEA_datasets.load_dataset(dataset_name)
    if dataset[0] is not None:
        X_train, y_train, X_test, y_test = dataset
        num_classes = len(np.unique(y_train))
        N_train, T, d = X_train.shape
        N_test, _, _  = X_test.shape
        
        print("Number of Classes:", num_classes)
        print("Dimension of path:", d)
        print("Length:", T)
        print("Train Size, Test Size", N_train, N_test)
        print()
    else:
        print("No dataset found")
        print()

# Print distribution of CV params

In [None]:
import matplotlib.pyplot as plt

all_datasets = [
    "Epilepsy",
    "EthanolConcentration",
    "FingerMovements",
    "HandMovementDirection",
    "Heartbeat",
    "LSST",
    "MotorImagery",
    "NATOPS",
    "PEMS-SF",
    "PhonemeSpectra",
    "RacketSports",
    "SelfRegulationSCP1",
    "SelfRegulationSCP2",
    ]

all_kernels = [
    "flat linear",
    "flat rbf",
    "flat poly",
    "integral rbf",
    "integral poly",
    "trunc sig linear",
    "trunc sig rbf",
    "pde sig rbf",
    "gak",
    "reservoir",
    ]


def plot_cv_params_single_kernel(
        cv_results:Dict,
        param_name:str = "sigma", 
        kernel_name:str = "flat rbf", 
        datasets:Optional[List[str]] = None, 
        n_bins:int = 30,
    ):
    """
    For each dataset, plot the histogram of the best parameter
    values specified by 'param_name' for the kerenl "kernel_name".
    """
    if datasets is None:
        datasets = list(cv_results.keys())

    l = []
    for dataset_name, results in cv_results.items():
        for anomaly_method in ["conf_results", "mahal_results"]:
            kernelwise_dict = results[anomaly_method]
            for ker, labelwise_dict in kernelwise_dict.items():
                for label, param_dict in labelwise_dict.items():
                    if ker == kernel_name:
                        l.append(param_dict[param_name])
    l = np.array(l)
    l.sort()
    l = l.astype(str)

    plt.hist(l, n_bins)
    plt.xlabel(param_name)
    plt.ylabel("Frequency")
    plt.title(f"Kernel: {kernel_name}")
    plt.xticks(rotation='vertical')
    plt.show()



def plot_cv_params_all_kernels(
        cv_results:Dict,
        param_name:str = "sigma",
        kernels:Optional[List[str]] = None, 
        datasets:Optional[List[str]] = None,
        n_bins:int = 30,
    ):
    """
    For each dataset, plot the histogram of the best parameter
    values specified by 'param_name' for all the kernels.
    """
    if datasets is None:
        datasets = list(cv_results.keys())
    if kernels is None:
        kernels = list(cv_results[datasets[0]]["conf_results"].keys())

    l = []
    for dataset_name, results in cv_results.items():
        for anomaly_method in ["conf_results", "mahal_results"]:
            kernelwise_dict = results[anomaly_method]
            for kernel_name, labelwise_dict in kernelwise_dict.items():
                for label, param_dict in labelwise_dict.items():
                    if kernel_name == kernel_name:
                        if param_name in param_dict:
                            print(param_dict[param_name])
                            l.append(param_dict[param_name])
    l = np.array(l)
    l.sort()
    l = l.astype(str)
    print(len(l))

    plt.hist(l, n_bins)
    plt.xlabel(param_name)
    plt.ylabel("Frequency")
    plt.title(f"All datasets: {param_name}")
    plt.xticks(rotation='vertical')
    plt.show()

In [None]:
# plot_cv_params_all_kernels(cv_results, "alpha")
# plot_cv_params_all_kernels(cv_results, "threshold")
# plot_cv_params_all_kernels(cv_results, "normalize")
# plot_cv_params_all_kernels(cv_results, "time")
# plot_cv_params_all_kernels(cv_results, "basepoint")

In [None]:
plot_cv_params_single_kernel(cv_results, "scale", "trunc sig linear")

# Run specific param_dict on specific dataset

In [None]:
# code to run
def run_specific_param_dict(
        dataset_name:str, 
        param_dict:Dict,
        verbose:bool = False,
        device="cuda",
    ):    
    """Runs the specific param_dict on a dataset.
    
    Args:
        dataset_name (str): Name of the dataset.
        param_dict (Dict): Dictionary of parameters.
        verbose (bool): Verbosity.
        device (str): Device for PyTorch computation.
        """
    experiments = {}
    X_train, y_train, X_test, y_test = UCR_UEA_datasets().load_dataset(dataset_name)
    X_train = torch.from_numpy(X_train).to(device)
    X_test = torch.from_numpy(X_test).to(device)
    unique_labels = np.unique(y_train)
    kernel_name = param_dict["kernel_name"]

    #create kernelwise dict
    kernelwise_dict = {kernel_name: {label: param_dict for label in unique_labels}}

    results = run_all_kernels(X_train, y_train, X_test, y_test, 
                        unique_labels, kernelwise_dict, verbose)
    aucs = results[kernel_name]
    print("Conf ROCAUC\t", aucs[0,0])
    print("Conf PRAUC\t", aucs[0,1])
    print("Mah ROCAUC\t", aucs[1,0])
    print("Mah PRAUC\t", aucs[1,1])
    return results


# General Parameters
param_dict = {
    "alpha": 0.00000001,
    "threshold": 0.001,
    "normalize": True,
    "time": "include_time",
    "basepoint": "basepoint",
}


# Kernel Specific Parameters
param_dict["kernel_name"] = "gak"
param_dict["order"] = 7
param_dict["sigma"] = 1.4
param_dict["scale"] = 1.0
param_dict["gak_factor"] = 10


res = run_specific_param_dict("Epilepsy", param_dict, verbose=True)