In [1]:
import numpy as np
from typing import List, Optional, Dict, Set, Callable, Any, Literal
from joblib import Memory, Parallel, delayed
import tslearn
import tslearn.metrics
from tslearn.datasets import UCR_UEA_datasets

from experiments.cross_validation import cv_tslearn, print_cv_results
from experiments.eval_on_test import validate_tslearn, print_test_results
from experiments.utils import join_dicts_from_pickle_paths, save_to_pickle, print_latex_results

# Cross Validation on Train

In [2]:
cv_results = cv_tslearn(
    dataset_names = [
        # 'Epilepsy',                    # N_corpus = 34
        # 'EthanolConcentration',        # N_corpus = 65
        # 'FingerMovements',             # N_corpus = 158
        # 'HandMovementDirection',       # N_corpus = 40
        # 'Heartbeat',                   # N_corpus = 102
        # 'LSST',                        # N_corpus = 176
        # 'MotorImagery',                # N_corpus = 139
         'NATOPS',                      # N_corpus = 30
        # 'PenDigits',                   # N_corpus = 749
        # 'PEMS-SF',                     # N_corpus = 38
        # 'PhonemeSpectra',              # N_corpus = 85
        # 'RacketSports',                # N_corpus = 38
        # 'SelfRegulationSCP1',          # N_corpus = 134
        ],
    kernel_names = [
        "linear",
        "rbf",
        "poly",
        "integral rbf",
        "integral poly",
        "truncated sig",
        "truncated sig rbf",
        "signature pde rbf",
        "gak",
        ],
        k=5,
        n_repeats=1,
        n_jobs_repeats=1,
        n_jobs_gram=4,
        verbose=False,
        )

Dataset: NATOPS
Number of Classes: 6
Dimension of path: 24
Length: 51
Train: 180
Test: N/A


Label for linear: 100%|██████████| 6/6 [00:02<00:00,  2.19it/s]


Time taken for kernel linear: 2.7416043281555176 seconds


Label for rbf: 100%|██████████| 6/6 [00:17<00:00,  2.86s/it]


Time taken for kernel rbf: 17.186774969100952 seconds


Label for poly: 100%|██████████| 6/6 [00:12<00:00,  2.10s/it]


Time taken for kernel poly: 12.588505983352661 seconds


Label for integral rbf: 100%|██████████| 6/6 [00:17<00:00,  2.99s/it]


Time taken for kernel integral rbf: 17.971454858779907 seconds


Label for integral poly: 100%|██████████| 6/6 [00:12<00:00,  2.12s/it]


Time taken for kernel integral poly: 12.734068155288696 seconds


Label for truncated sig: 100%|██████████| 6/6 [29:09<00:00, 291.66s/it]


Time taken for kernel truncated sig: 1749.9360253810883 seconds


Label for truncated sig rbf: 100%|██████████| 6/6 [41:45<00:00, 417.53s/it]


Time taken for kernel truncated sig rbf: 2505.1692292690277 seconds


Label for signature pde rbf: 100%|██████████| 6/6 [03:18<00:00, 33.11s/it]


Time taken for kernel signature pde rbf: 198.65954566001892 seconds


Label for gak: 100%|██████████| 6/6 [01:14<00:00, 12.46s/it]

Time taken for kernel gak: 74.74281096458435 seconds
Time taken for dataset NATOPS: 4591.739802837372 seconds








In [3]:
print_cv_results(cv_results)

Cross Validation Results
Number of Classes: 6
Dimension of path: 24
Length: 51
Train: 180
Test: N/A

conf_results

linear
final_score_avgs 1.6466706615919724
params_score_avgs [1.647]
thresh_score_avgs [0.791 1.057 1.33  1.354 1.397 1.456 1.465 1.503 1.508 1.5   1.496 1.495
 1.481 1.51  1.489 1.495 1.517 1.536 1.549 1.579 1.57  1.58  1.588 0.   ]
1.0
{'threshold': 20}
2.0
{'threshold': 22}
3.0
{'threshold': 23}
4.0
{'threshold': 22}
5.0
{'threshold': 7}
6.0
{'threshold': 22}

rbf
final_score_avgs 1.671576481081125
params_score_avgs [1.646 1.646 1.647 1.521 1.641 1.521 0.907]
thresh_score_avgs [1.346 1.317 1.452 1.549 1.513 1.523 1.559 1.55  1.566 1.56  1.546 1.506
 1.482 1.515 1.497 1.5   1.52  1.542 1.551 1.579 1.573 1.584 1.66  0.   ]
1.0
{'sigma': 0.01, 'threshold': 20}
2.0
{'sigma': 0.0001, 'threshold': 22}
3.0
{'sigma': 0.01, 'threshold': 22}
4.0
{'sigma': 0.01, 'threshold': 22}
5.0
{'sigma': 10.0, 'threshold': 23}
6.0
{'sigma': 1.0, 'threshold': 23}

poly
final_score_avgs 1.68385

# Validate on Test

In [4]:
test_results = validate_tslearn(cv_results, n_jobs=4, verbose=False)

Start validation on test sets
NATOPS
Number of Classes: 6
Dimension of path: 24
Length: 51
Train: 180
Test: 180


 22%|██▏       | 2/9 [00:00<00:00, 10.38it/s]

Kernel: linear
Kernel: rbf
Kernel: poly


 44%|████▍     | 4/9 [00:00<00:00,  8.07it/s]

Kernel: integral rbf
Kernel: integral poly


 56%|█████▌    | 5/9 [00:00<00:00,  7.54it/s]

Kernel: truncated sig


 67%|██████▋   | 6/9 [01:01<00:54, 18.11s/it]

Kernel: truncated sig rbf


 78%|███████▊  | 7/9 [02:29<01:17, 38.99s/it]

Kernel: signature pde rbf


 89%|████████▉ | 8/9 [02:58<00:35, 35.97s/it]

Kernel: gak


100%|██████████| 9/9 [03:09<00:00, 21.06s/it]
 22%|██▏       | 2/9 [00:00<00:00, 11.06it/s]

Kernel: linear
Kernel: rbf
Kernel: poly


 44%|████▍     | 4/9 [00:00<00:00,  9.41it/s]

Kernel: integral rbf
Kernel: integral poly


 56%|█████▌    | 5/9 [00:00<00:00,  8.02it/s]

Kernel: truncated sig


 67%|██████▋   | 6/9 [00:43<00:38, 12.86s/it]

Kernel: truncated sig rbf


 78%|███████▊  | 7/9 [02:35<01:24, 42.45s/it]

Kernel: signature pde rbf


 89%|████████▉ | 8/9 [02:58<00:36, 36.44s/it]

Kernel: gak


100%|██████████| 9/9 [03:09<00:00, 21.03s/it]

Total elapsed time for NATOPS: 378.8680188790004 seconds

End validation on test sets








In [5]:
print_test_results(test_results)

Test Results

Dataset: NATOPS
Number of Classes: 6
Dimension of path: 24
Length: 51
Train: 180
Test: 180

Kernel: linear
Conformance AUC: 0.882
Conformance PR AUC: 0.636

Kernel: rbf
Conformance AUC: 0.768
Conformance PR AUC: 0.538

Kernel: poly
Conformance AUC: 0.873
Conformance PR AUC: 0.607

Kernel: integral rbf
Conformance AUC: 0.884
Conformance PR AUC: 0.63

Kernel: integral poly
Conformance AUC: 0.864
Conformance PR AUC: 0.584

Kernel: truncated sig
Conformance AUC: 0.798
Conformance PR AUC: 0.527

Kernel: truncated sig rbf
Conformance AUC: 0.545
Conformance PR AUC: 0.322

Kernel: signature pde rbf
Conformance AUC: 0.682
Conformance PR AUC: 0.382

Kernel: gak
Conformance AUC: 0.896
Conformance PR AUC: 0.675

Kernel: linear
Mahalanobis AUC: 0.871
Mahalanobis PR AUC: 0.614

Kernel: rbf
Mahalanobis AUC: 0.871
Mahalanobis PR AUC: 0.614

Kernel: poly
Mahalanobis AUC: 0.884
Mahalanobis PR AUC: 0.653

Kernel: integral rbf
Mahalanobis AUC: 0.875
Mahalanobis PR AUC: 0.621

Kernel: integra

# Read CV data from file and print results

In [None]:
# Load the cross validation results
cv_results = join_dicts_from_pickle_paths(
    [
    "Data/Epilepsy.pkl",
    "Data/cv_EthanolConcentration.pkl",
    "Data/cv_FingerMovements.pkl",
    "Data/cv_HandMovementDirection.pkl",
    "Data/cv_Heartbeat.pkl",
    "Data/cv_LSST.pkl",
    "Data/cv_MotorImagery.pkl",
    "Data/cv_NATOPS.pkl",
    "Data/cv_PEMS-SF.pkl",
    "Data/cv_PenDigits.pkl",
    "Data/cv_PhonemeSpectra.pkl",
    "Data/cv_RacketSports.pkl",
    "Data/cv_SelfRegulationSCP1.pkl",
    ])
print_cv_results(cv_results)

In [None]:
test_results = join_dicts_from_pickle_paths([
                                "Data/results_shorts.pkl",
                                "Data/results_longs.pkl",
                                             ])

test_results = {d:k for d,k in test_results.items() 
                # if d in ["EthanolConcentration",  #datasets with corpus size > 50
                #         "FingerMovements",
                #         "Heartbeat",
                #         "LSST",
                #         "MotorImagery",
                #         "PenDigits",
                #         "PhonemeSpectra",
                #         "SelfRegulationSCP1",]
                }

In [6]:
print_latex_results(test_results, round_digits=2)
print_latex_results(test_results, round_digits=3)

PR LaTeX table:

    \begin{tabular}{lc||ccc|cc|ccc|c}
        \toprule
        \multirow{2}{*}{Dataset}   &  \multicolumn{10}{c}{Precision-Recall AUC} \\
        \cline{3-11}
                                & & linear & RBF & poly 
                                & $I_\text{RBF}$ & $I_\text{poly}$ 
                                & $S_\text{lin}$ & $S_\text{RBF}$ & $S^\infty_\text{RBF}$ 
                                & GAK \\ 
        \hline
        \hline
		\hline
		\multirow{2}{*}{NATO}    
		& C & .64 & .54 & .61 & .63 & .58 & .53 & .32 & .38 & \textbf{.67}\\
		& M & .61 & .61 & .65 & .62 & .50 & .55 & .43 & .44 & \textbf{.67}\\
		\hline
		\hline
		\hline
		\multirow{2}{*}{Avg. AUC}    
		& C & .64 & .54 & .61 & .63 & .58 & .53 & .32 & .38 & \textbf{.67}\\
		& M & .61 & .61 & .65 & .62 & .50 & .55 & .43 & .44 & \textbf{.67}\\
		\hline
		\multirow{2}{*}{Avg. Rank}    
		& C & 9.00 & \textbf{1.00} & 4.00 & 3.00 & 5.00 & 2.00 & 6.00 & 8.00 & 7.00\\
		& M & 9.00 & 3.00 & 4.00 & \text

## Enumerate all UCR UEA datasets in 'tslearn'

In [None]:
UCR_UEA_datasets = UCR_UEA_datasets()

for dataset_name in UCR_UEA_datasets.list_multivariate_datasets():
#for dataset_name in _datasets:
    print("Dataset:", dataset_name)
    dataset = UCR_UEA_datasets.load_dataset(dataset_name)
    if dataset[0] is not None:
        X_train, y_train, X_test, y_test = dataset
        num_classes = len(np.unique(y_train))
        N_train, T, d = X_train.shape
        N_test, _, _  = X_test.shape
        
        print("Number of Classes:", num_classes)
        print("Dimension of path:", d)
        print("Length:", T)
        print("Train Size, Test Size", N_train, N_test)
        print()
    else:
        print("No dataset found")
        print()