In [1]:
import numpy as np
from typing import List, Optional, Dict, Set, Callable, Any, Literal
from joblib import Memory, Parallel, delayed
import tslearn
import tslearn.metrics
from tslearn.datasets import UCR_UEA_datasets

from experiments.cross_validation import cv_tslearn, print_cv_results
from experiments.eval_on_test import validate_tslearn, print_test_results
from experiments.utils import join_dicts_from_pickle_paths, save_to_pickle

# Cross Validation on Train

In [3]:
cv_results = cv_tslearn(
    dataset_names = [
        # 'Epilepsy',                    # N_corpus = 34
        # 'EthanolConcentration',        # N_corpus = 65
        # 'FingerMovements',             # N_corpus = 158
        # 'HandMovementDirection',       # N_corpus = 40
        # 'Heartbeat',                   # N_corpus = 102
        # 'LSST',                        # N_corpus = 176
        # 'MotorImagery',                # N_corpus = 139
         'NATOPS',                      # N_corpus = 30
        # 'PenDigits',                   # N_corpus = 749
        # 'PEMS-SF',                     # N_corpus = 38
        # 'PhonemeSpectra',              # N_corpus = 85
        # 'RacketSports',                # N_corpus = 38
        # 'SelfRegulationSCP1',          # N_corpus = 134
        ],
    kernel_names = [
        # "linear",
        # "rbf",
        # "poly",
        # "integral rbf",
        # "integral poly",
        # "truncated sig",
        # "truncated sig rbf",
        "signature pde rbf",
        "gak",
        ],
        k=5,
        n_repeats=1,
        n_jobs_repeats=3,
        n_jobs_gram=1,
        verbose=False,
        )
print_cv_results(cv_results)

Dataset: NATOPS
Number of Classes: 6
Dimension of path: 24
Length: 51
Train: 180
Test: N/A


Label for signature pde rbf: 100%|██████████| 6/6 [02:16<00:00, 22.73s/it]


Time taken for kernel signature pde rbf: 136.38422012329102 seconds


Label for gak: 100%|██████████| 6/6 [00:15<00:00,  2.59s/it]

Time taken for kernel gak: 15.546572208404541 seconds
Time taken for dataset NATOPS: 151.94349932670593 seconds



Cross Validation Results
Number of Classes: 6
Dimension of path: 24
Length: 51
Train: 180
Test: N/A

conf_results

signature pde rbf
final_score_avgs 1.1739908768807794
params_score_avgs [0.703 0.893 1.051 0.99  1.09 ]
thresh_score_avgs [0.963 0.889 0.909 0.97  0.925 0.906 0.927 0.933 0.926 0.913 0.887 0.887
 0.913 0.897 0.92  0.9   0.93  0.967 0.983 0.974 0.988 1.019 1.086 0.736]
1.0
{'sigma': 10.0, 'dyadic_order': 2.0, 'threshold': 20}
2.0
{'sigma': 10.0, 'dyadic_order': 2.0, 'threshold': 23}
3.0
{'sigma': 0.1, 'dyadic_order': 2.0, 'threshold': 18}
4.0
{'sigma': 10.0, 'dyadic_order': 2.0, 'threshold': 23}
5.0
{'sigma': 1.0, 'dyadic_order': 2.0, 'threshold': 1}
6.0
{'sigma': 0.1, 'dyadic_order': 2.0, 'threshold': 7}

gak
final_score_avgs 1.6473840215379374
params_score_avgs [1.486 1.59  1.629]
thresh_score_avgs [1.265 1.295 1.41  1.385 1.46  1.5   1.514 1.51  1.481 1.461 




# Validate on Test

In [None]:
test_results = validate_tslearn(cv_results, n_jobs=4, verbose=False)
print_test_results(test_results)

# Read CV data from file and print results

In [None]:
# Load the cross validation results
cv_results = join_dicts_from_pickle_paths(
    [
    "Data/cv_Epilepsy.pkl",
    #"Data/cv_EthanolConcentration.pkl",
    #"Data/cv_FingerMovements.pkl",
    #"Data/cv_HandMovementDirection.pkl",
    #"Data/cv_Heartbeat.pkl",
    #"Data/cv_LSST.pkl",
    #"Data/cv_MotorImagery.pkl",
    #"Data/cv_NATOPS.pkl",
    #"Data/cv_PEMS-SF.pkl",
    #"Data/cv_PenDigits.pkl",
    #"Data/cv_PhonemeSpectra.pkl",
    #"Data/cv_RacketSports.pkl",
    #"Data/cv_SelfRegulationSCP1.pkl",
    ])
print_cv_results(cv_results)

In [None]:
from experiments.experiment_code import print_dataset_stats
from experiments.utils import print_latex_results, join_dicts_from_pickle_paths

test_results = join_dicts_from_pickle_paths([
                                "Data/results_shorts.pkl",
                                "Data/results_longs.pkl",
                                             ])

test_results = {d:k for d,k in test_results.items() 
                # if d in ["EthanolConcentration",  #datasets with corpus size > 50
                #         "FingerMovements",
                #         "Heartbeat",
                #         "LSST",
                #         "MotorImagery",
                #         "PenDigits",
                #         "PhonemeSpectra",
                #         "SelfRegulationSCP1",]
                }
print_latex_results(test_results, round_digits=2)
print_latex_results(test_results, round_digits=3)

## Enumerate all UCR UEA datasets in 'tslearn'

In [None]:
UCR_UEA_datasets = UCR_UEA_datasets()

for dataset_name in UCR_UEA_datasets.list_multivariate_datasets():
#for dataset_name in _datasets:
    print("Dataset:", dataset_name)
    dataset = UCR_UEA_datasets.load_dataset(dataset_name)
    if dataset[0] is not None:
        X_train, y_train, X_test, y_test = dataset
        num_classes = len(np.unique(y_train))
        N_train, T, d = X_train.shape
        N_test, _, _  = X_test.shape
        
        print("Number of Classes:", num_classes)
        print("Dimension of path:", d)
        print("Length:", T)
        print("Train Size, Test Size", N_train, N_test)
        print()
    else:
        print("No dataset found")
        print()