In [1]:
import numpy as np
from typing import List, Optional, Dict, Set, Callable, Any
from joblib import Memory, Parallel, delayed
import tslearn
import tslearn.metrics
from tslearn.datasets import UCR_UEA_datasets

from experiments.cross_validation import cv_tslearn, print_cv_results
from experiments.eval_on_test import validate_tslearn, print_test_results
from experiments.utils import join_dicts_from_pickle_paths, save_to_pickle

# Enumerate all tslearn datasets

In [None]:
# _datasets = [
#             'ArticularyWordRecognition', 
#             'BasicMotions', 
#             'Cricket',
#             #'ERing',
#             'Libras', 
#             'NATOPS', 
#             'RacketSports',     
#             'FingerMovements',
#             'Heartbeat',
#             'SelfRegulationSCP1', 
#             'UWaveGestureLibrary'
#             ]

# import tslearn
# UCR_UEA_datasets = tslearn.datasets.UCR_UEA_datasets()

# for dataset_name in UCR_UEA_datasets.list_multivariate_datasets():
# #for dataset_name in _datasets:
#     print("Dataset:", dataset_name)
#     dataset = UCR_UEA_datasets.load_dataset(dataset_name)
#     if dataset[0] is not None:
#         X_train, y_train, X_test, y_test = dataset
#         num_classes = len(np.unique(y_train))
#         N_train, T, d = X_train.shape
#         N_test, _, _  = X_test.shape
        
#         print("Number of Classes:", num_classes)
#         print("Dimension of path:", d)
#         print("Length:", T)
#         print("Train Size, Test Size", N_train, N_test)
#         print()
#     else:
#         print("No dataset found")
#         print()

#yes
# Dataset: ArticularyWordRecognition
# Number of Classes: 25
# Dimension of path: 9
# Length: 144
# Train Size, Test Size 275 300

# Dataset: AtrialFibrillation
# No dataset found

#yes
# Dataset: BasicMotions
# Number of Classes: 4
# Dimension of path: 6
# Length: 100
# Train Size, Test Size 40 40

# Dataset: CharacterTrajectories
# No dataset found

#yes
# Dataset: Cricket
# Number of Classes: 12
# Dimension of path: 6
# Length: 1197
# Train Size, Test Size 108 72

# Dataset: DuckDuckGeese
# No dataset found

# Dataset: EigenWorms
# Number of Classes: 5
# Dimension of path: 6
# Length: 17984
# Train Size, Test Size 128 131

#why not
# Dataset: Epilepsy
# Number of Classes: 4
# Dimension of path: 3
# Length: 206
# Train Size, Test Size 137 138

#longLength
# Dataset: EthanolConcentration
# Number of Classes: 4
# Dimension of path: 3
# Length: 1751
# Train Size, Test Size 261 263

# Dataset: ERing
# No dataset found

#big
# Dataset: FaceDetection
# Number of Classes: 2
# Dimension of path: 144
# Length: 62
# Train Size, Test Size 5890 3524

#yes
# Dataset: FingerMovements
# Number of Classes: 2
# Dimension of path: 28
# Length: 50
# Train Size, Test Size 316 100

#why not, maybe big length
# Dataset: HandMovementDirection
# Number of Classes: 4
# Dimension of path: 10
# Length: 400
# Train Size, Test Size 160 74

#smallTrain
# Dataset: Handwriting
# Number of Classes: 26
# Dimension of path: 3
# Length: 152
# Train Size, Test Size 150 850

#yes
# Dataset: Heartbeat
# Number of Classes: 2
# Dimension of path: 61
# Length: 405
# Train Size, Test Size 204 205

#big
# Dataset: InsectWingbeat
# Number of Classes: 10
# Dimension of path: 200
# Length: 22
# Train Size, Test Size 25000 25000

# Dataset: JapaneseVowels
# No dataset found

#yes
# Dataset: Libras
# Number of Classes: 15
# Dimension of path: 2
# Length: 45
# Train Size, Test Size 180 180

#TODO I SHOULD INCLUDE
# Dataset: LSST
# Number of Classes: 14
# Dimension of path: 6
# Length: 36
# Train Size, Test Size 2459 2466

#length
# Dataset: MotorImagery
# Number of Classes: 2
# Dimension of path: 64
# Length: 3000
# Train Size, Test Size 278 100

#yes
# Dataset: NATOPS
# Number of Classes: 6
# Dimension of path: 24
# Length: 51
# Train Size, Test Size 180 180

#yes
# Dataset: PenDigits
# Number of Classes: 10
# Dimension of path: 2
# Length: 8
# Train Size, Test Size 7494 3498

#TODO SHOULD INCLUDE highDim
# Dataset: PEMS-SF
# Number of Classes: 7
# Dimension of path: 963
# Length: 144
# Train Size, Test Size 267 173

#NO, dim=1, big length, large num classes
# Dataset: Phoneme
# Number of Classes: 39
# Dimension of path: 1
# Length: 1024
# Train Size, Test Size 214 1896

#yes
# Dataset: RacketSports
# Number of Classes: 4
# Dimension of path: 6
# Length: 30
# Train Size, Test Size 151 152

#yes
# Dataset: SelfRegulationSCP1
# Number of Classes: 2
# Dimension of path: 6
# Length: 896
# Train Size, Test Size 268 293

# Dataset: SelfRegulationSCP2
# Number of Classes: 2
# Dimension of path: 7
# Length: 1152
# Train Size, Test Size 200 180

# Dataset: SpokenArabicDigits
# No dataset found

#NO, long, also very small set
# Dataset: StandWalkJump
# Number of Classes: 3
# Dimension of path: 4
# Length: 2500
# Train Size, Test Size 12 15

#yes
# Dataset: UWaveGestureLibrary
# Number of Classes: 8
# Dimension of path: 3
# Length: 315
# Train Size, Test Size 120 320


# Cross Validation on Train

In [None]:
cv_results = cv_tslearn(
    dataset_names = [
        #'ArticularyWordRecognition', 
        #'BasicMotions', 
        #'Cricket',
             #########'ERing', #cant find dataset
        'Libras', 
        #'NATOPS', 
        #'RacketSports',     
        #'FingerMovements',
        #'Heartbeat',
        #'SelfRegulationSCP1', 
        #'UWaveGestureLibrary',
        #'PenDigits',
        #'LSST',
        #'EthanolConcentration',
        ],
    kernel_names = [
        "linear",
        #"rbf",
        #"poly",
        #"gak",
        #"truncated sig",
        #"truncated sig rbf",
        #"signature pde rbf",
        #"integral linear",
        #"integral rbf",
        #"integral poly",
        ],
        k=5,
        n_repeats=1,
        n_jobs_repeats=1,
        n_jobs_gram=1,
        verbose=False,
        )

##### Print CV results

In [None]:
print_cv_results(cv_results)

# Validate on Test

In [None]:
test_results = validate_tslearn(cv_results, n_jobs=4, verbose=False)

##### Print test results

In [None]:
print_test_results(test_results)

# Read CV data from file and print results

In [None]:
# Load the cross validation results
cv_results = join_dicts_from_pickle_paths(
    [
    #"Data/cv_ArticularyWordRecognition.pkl",
    #"Data/cv_BasicMotions.pkl",
    #"Data/cv_EthanolConcentration.pkl",
    "Data/cv_FingerMovements.pkl",
    #"Data/cv_Heartbeat.pkl",
    "Data/cv_Libras.pkl",
    "Data/cv_NATOPS.pkl",
    # "Data/cv_RacketSports.pkl", 
    # "Data/cv_SelfRegulationSCP1.pkl",
     "Data/cv_UWaveGestureLibrary.pkl",
    ])
print_cv_results(cv_results)

In [2]:
test_results = join_dicts_from_pickle_paths(["Data/results_shorts.pkl"])
print_test_results(test_results)

Test Results

Dataset: ArticularyWordRecognition
Number of Classes: 25
Dimension of path: 9
Length: 144
Train: 275
Test: 300

Kernel: linear
Conformance AUC: 0.934
Mahalanobis AUC: 0.963
Conformance PR AUC: 0.511
Mahalanobis PR AUC: 0.713

Kernel: rbf
Conformance AUC: 0.854
Mahalanobis AUC: 0.549
Conformance PR AUC: 0.648
Mahalanobis PR AUC: 0.44

Kernel: poly
Conformance AUC: 0.875
Mahalanobis AUC: 0.901
Conformance PR AUC: 0.371
Mahalanobis PR AUC: 0.497

Kernel: gak
Conformance AUC: 0.847
Mahalanobis AUC: 0.275
Conformance PR AUC: 0.819
Mahalanobis PR AUC: 0.233

Kernel: truncated sig
Conformance AUC: 0.972
Mahalanobis AUC: 0.973
Conformance PR AUC: 0.779
Mahalanobis PR AUC: 0.784

Kernel: truncated sig rbf
Conformance AUC: 0.953
Mahalanobis AUC: 0.957
Conformance PR AUC: 0.683
Mahalanobis PR AUC: 0.748

Kernel: signature pde rbf
Conformance AUC: 0.528
Mahalanobis AUC: 0.475
Conformance PR AUC: 0.339
Mahalanobis PR AUC: 0.331

Kernel: integral linear
Conformance AUC: 0.934
Mahalanob