In [1]:
import numpy as np
import pandas as pd
import sklearn.metrics
from typing import List, Optional, Dict, Set, Callable, Any
from joblib import Memory, Parallel, delayed
import tslearn
import tslearn.metrics
from tslearn.datasets import UCR_UEA_datasets
from scipy.interpolate import interp1d
from numba import njit
import pickle

from experiment_code import print_dataset_stats, run_all_kernels
from cross_validation import cv_tslearn

## Find all tslearn datasets

In [None]:
# _datasets = [
#             'ArticularyWordRecognition', 
#             'BasicMotions', 
#             'Cricket',
#             #'ERing',
#             'Libras', 
#             'NATOPS', 
#             'RacketSports',     
#             'FingerMovements',
#             'Heartbeat',
#             'SelfRegulationSCP1', 
#             'UWaveGestureLibrary'
#             ]

# import tslearn
# UCR_UEA_datasets = tslearn.datasets.UCR_UEA_datasets()

# for dataset_name in UCR_UEA_datasets.list_multivariate_datasets():
# #for dataset_name in _datasets:
#     print("Dataset:", dataset_name)
#     dataset = UCR_UEA_datasets.load_dataset(dataset_name)
#     if dataset[0] is not None:
#         X_train, y_train, X_test, y_test = dataset
#         num_classes = len(np.unique(y_train))
#         N_train, T, d = X_train.shape
#         N_test, _, _  = X_test.shape
        
#         print("Number of Classes:", num_classes)
#         print("Dimension of path:", d)
#         print("Length:", T)
#         print("Train Size, Test Size", N_train, N_test)
#         print()
#     else:
#         print("No dataset found")
#         print()

#yes
# Dataset: ArticularyWordRecognition
# Number of Classes: 25
# Dimension of path: 9
# Length: 144
# Train Size, Test Size 275 300

# Dataset: AtrialFibrillation
# No dataset found

#yes
# Dataset: BasicMotions
# Number of Classes: 4
# Dimension of path: 6
# Length: 100
# Train Size, Test Size 40 40

# Dataset: CharacterTrajectories
# No dataset found

#yes
# Dataset: Cricket
# Number of Classes: 12
# Dimension of path: 6
# Length: 1197
# Train Size, Test Size 108 72

# Dataset: DuckDuckGeese
# No dataset found

# Dataset: EigenWorms
# Number of Classes: 5
# Dimension of path: 6
# Length: 17984
# Train Size, Test Size 128 131

#why not
# Dataset: Epilepsy
# Number of Classes: 4
# Dimension of path: 3
# Length: 206
# Train Size, Test Size 137 138

#longLength
# Dataset: EthanolConcentration
# Number of Classes: 4
# Dimension of path: 3
# Length: 1751
# Train Size, Test Size 261 263

# Dataset: ERing
# No dataset found

#big
# Dataset: FaceDetection
# Number of Classes: 2
# Dimension of path: 144
# Length: 62
# Train Size, Test Size 5890 3524

#yes
# Dataset: FingerMovements
# Number of Classes: 2
# Dimension of path: 28
# Length: 50
# Train Size, Test Size 316 100

#why not, maybe big length
# Dataset: HandMovementDirection
# Number of Classes: 4
# Dimension of path: 10
# Length: 400
# Train Size, Test Size 160 74

#smallTrain
# Dataset: Handwriting
# Number of Classes: 26
# Dimension of path: 3
# Length: 152
# Train Size, Test Size 150 850

#yes
# Dataset: Heartbeat
# Number of Classes: 2
# Dimension of path: 61
# Length: 405
# Train Size, Test Size 204 205

#big
# Dataset: InsectWingbeat
# Number of Classes: 10
# Dimension of path: 200
# Length: 22
# Train Size, Test Size 25000 25000

# Dataset: JapaneseVowels
# No dataset found

#yes
# Dataset: Libras
# Number of Classes: 15
# Dimension of path: 2
# Length: 45
# Train Size, Test Size 180 180

#TODO I SHOULD INCLUDE
# Dataset: LSST
# Number of Classes: 14
# Dimension of path: 6
# Length: 36
# Train Size, Test Size 2459 2466

#length
# Dataset: MotorImagery
# Number of Classes: 2
# Dimension of path: 64
# Length: 3000
# Train Size, Test Size 278 100

#yes
# Dataset: NATOPS
# Number of Classes: 6
# Dimension of path: 24
# Length: 51
# Train Size, Test Size 180 180

#yes
# Dataset: PenDigits
# Number of Classes: 10
# Dimension of path: 2
# Length: 8
# Train Size, Test Size 7494 3498

#TODO SHOULD INCLUDE highDim
# Dataset: PEMS-SF
# Number of Classes: 7
# Dimension of path: 963
# Length: 144
# Train Size, Test Size 267 173

#NO, dim=1, big length, large num classes
# Dataset: Phoneme
# Number of Classes: 39
# Dimension of path: 1
# Length: 1024
# Train Size, Test Size 214 1896

#yes
# Dataset: RacketSports
# Number of Classes: 4
# Dimension of path: 6
# Length: 30
# Train Size, Test Size 151 152

#yes
# Dataset: SelfRegulationSCP1
# Number of Classes: 2
# Dimension of path: 6
# Length: 896
# Train Size, Test Size 268 293

# Dataset: SelfRegulationSCP2
# Number of Classes: 2
# Dimension of path: 7
# Length: 1152
# Train Size, Test Size 200 180

# Dataset: SpokenArabicDigits
# No dataset found

#NO, long, also very small set
# Dataset: StandWalkJump
# Number of Classes: 3
# Dimension of path: 4
# Length: 2500
# Train Size, Test Size 12 15

#yes
# Dataset: UWaveGestureLibrary
# Number of Classes: 8
# Dimension of path: 3
# Length: 315
# Train Size, Test Size 120 320


# (tslearn) Cross Validation on Train

In [2]:
cv_best_models = cv_tslearn(
    dataset_names = [
        #'ArticularyWordRecognition', 
        'BasicMotions', 
        #'Cricket',
             #########'ERing', #cant find dataset
        #'Libras', 
        #'NATOPS', 
        #'RacketSports',     
        #'FingerMovements',
        #'Heartbeat',
        #'SelfRegulationSCP1', 
        #'UWaveGestureLibrary'
        ],
    kernel_names = [
        "linear",
        #"rbf",
        #"poly",
        "gak",
        "truncated sig",
        #"truncated sig rbf",
        #"truncated sig poly",
        "signature pde",
        #"signature pde rbf",
        #"signature pde poly",
        #"integral linear",
        #"integral rbf",
        #"integral poly",
        ],
        k=4,
        n_repeats=1,
        n_jobs_repeats=4,
        n_jobs_gram=1,
        verbose=False,
        )

Dataset: BasicMotions
Number of Classes: 4
Dimension of path: 6
Length: 100
Train: 40
Test: N/A


Label for linear:   0%|          | 0/4 [00:00<?, ?it/s]

mean <x_i, x_j> =  0.0
mean absolute <x_i, x_j> =  0.2857142857142857
mean <f_i, f_j> =  -3.96508223080413e-18
mean absolute <f_i, f_j> =  0.040816326530612256 

mean <x_i, x_j> =  8.131516293641283e-18
mean absolute <x_i, x_j> =  0.2510201306431096
mean <f_i, f_j> =  -1.8973538018496328e-19
mean absolute <f_i, f_j> =  0.0313775163303887 

mean <x_i, x_j> =  1.1926223897340549e-17
mean absolute <x_i, x_j> =  0.2512695126549892
mean <f_i, f_j> =  -2.0328790734103208e-19
mean absolute <f_i, f_j> =  0.03140868908187365 

mean <x_i, x_j> =  -4.531522549490435e-17
mean absolute <x_i, x_j> =  0.2871500404188278
mean <f_i, f_j> =  3.96508223080413e-18
mean absolute <f_i, f_j> =  0.041021434345546816 



Label for linear: 100%|██████████| 4/4 [00:03<00:00,  1.20it/s]


mean <x_i, x_j> =  -2.0391851472706958e-17
mean absolute <x_i, x_j> =  0.3299310791114754
mean <f_i, f_j> =  5.664403186863043e-19
mean absolute <f_i, f_j> =  0.047133011301639355 

mean <x_i, x_j> =  4.531522549490435e-17
mean absolute <x_i, x_j> =  0.331740301171073
mean <f_i, f_j> =  1.1328806373726086e-18
mean absolute <f_i, f_j> =  0.047391471595867575 

mean <x_i, x_j> =  -5.936006894358137e-18
mean absolute <x_i, x_j> =  0.2901838653605915
mean <f_i, f_j> =  6.674619624363887e-19
mean absolute <f_i, f_j> =  0.036272983170073936 

mean <x_i, x_j> =  -1.1058862159352145e-17
mean absolute <x_i, x_j> =  0.30974780475562924
mean <f_i, f_j> =  1.8431436932253575e-18
mean absolute <f_i, f_j> =  0.038718475594453655 

mean <x_i, x_j> =  9.063045098980869e-18
mean absolute <x_i, x_j> =  0.3006656467464427
mean <f_i, f_j> =  2.5489814340883697e-18
mean absolute <f_i, f_j> =  0.042952235249491814 

mean <x_i, x_j> =  5.437827059388522e-17
mean absolute <x_i, x_j> =  0.2947882994845714
mean

Label for gak:   0%|          | 0/4 [00:00<?, ?it/s]

mean <x_i, x_j> =  0.155634505009953
mean absolute <x_i, x_j> =  0.155634505009953
mean <f_i, f_j> =  -4.248302390147283e-18
mean absolute <f_i, f_j> =  0.0344638977546958 

mean <x_i, x_j> =  0.15081019796012976
mean absolute <x_i, x_j> =  0.15081019796012976
mean <f_i, f_j> =  1.416100796715761e-18
mean absolute <f_i, f_j> =  0.03466080824652531 

mean <x_i, x_j> =  0.13509133483580305
mean absolute <x_i, x_j> =  0.13509133483580305
mean <f_i, f_j> =  6.505213034913027e-19
mean absolute <f_i, f_j> =  0.02702839578638115 

mean <x_i, x_j> =  0.13644026795587832
mean absolute <x_i, x_j> =  0.13644026795587832
mean <f_i, f_j> =  -1.463672932855431e-18
mean absolute <f_i, f_j> =  0.026986241626378804 

mean <x_i, x_j> =  0.6799849861580703
mean absolute <x_i, x_j> =  0.6799849861580703
mean <f_i, f_j> =  -2.3852447794681098e-18
mean absolute <f_i, f_j> =  0.010158019499919135 

mean <x_i, x_j> =  0.6556057937586142
mean absolute <x_i, x_j> =  0.6556057937586142
mean <f_i, f_j> =  -6.5140

Label for gak:  25%|██▌       | 1/4 [00:01<00:03,  1.18s/it]

mean <x_i, x_j> =  0.9440777952562003
mean absolute <x_i, x_j> =  0.9440777952562003
mean <f_i, f_j> =  4.553649124439119e-18
mean absolute <f_i, f_j> =  0.0018672699825821057 

mean <x_i, x_j> =  0.9523147858578662
mean absolute <x_i, x_j> =  0.9523147858578662
mean <f_i, f_j> =  -5.416585547437786e-18
mean absolute <f_i, f_j> =  0.0021007493343132653 

mean <x_i, x_j> =  0.9512552432395751
mean absolute <x_i, x_j> =  0.9512552432395751
mean <f_i, f_j> =  8.301890920746148e-18
mean absolute <f_i, f_j> =  0.0020888695556806718 

mean <x_i, x_j> =  0.23301705146737642
mean absolute <x_i, x_j> =  0.23301705146737642
mean <f_i, f_j> =  -5.664403186863043e-19
mean absolute <f_i, f_j> =  0.03130542647071933 

mean <x_i, x_j> =  0.23105010431869055
mean absolute <x_i, x_j> =  0.23105010431869055
mean <f_i, f_j> =  3.6818620714609786e-18
mean absolute <f_i, f_j> =  0.031385710027808544 

mean <x_i, x_j> =  0.17422272577556835
mean absolute <x_i, x_j> =  0.17422272577556835
mean <f_i, f_j> =  

Label for gak:  50%|█████     | 2/4 [00:01<00:01,  1.42it/s]

mean <x_i, x_j> =  0.8254821465163956
mean absolute <x_i, x_j> =  0.8254821465163956
mean <f_i, f_j> =  -4.553649124439119e-18
mean absolute <f_i, f_j> =  0.005731628682736227 

mean <x_i, x_j> =  0.8470527618537956
mean absolute <x_i, x_j> =  0.8470527618537956
mean <f_i, f_j> =  9.275460218488234e-18
mean absolute <f_i, f_j> =  0.0069301725944479475 

mean <x_i, x_j> =  0.8370089128709277
mean absolute <x_i, x_j> =  0.8370089128709277
mean <f_i, f_j> =  -2.3507273225481632e-17
mean absolute <f_i, f_j> =  0.007283441186854955 

mean <x_i, x_j> =  0.8385309670376897
mean absolute <x_i, x_j> =  0.8385309670376897
mean <f_i, f_j> =  2.6020852139652106e-18
mean absolute <f_i, f_j> =  0.005619930171223498 

mean <x_i, x_j> =  0.9805172814067618
mean absolute <x_i, x_j> =  0.9805172814067618
mean <f_i, f_j> =  9.324138683375338e-18
mean absolute <f_i, f_j> =  0.0006998946509861996 

mean <x_i, x_j> =  0.9818242699033892
mean absolute <x_i, x_j> =  0.9818242699033892
mean <f_i, f_j> =  -2.59

Label for gak:  75%|███████▌  | 3/4 [00:01<00:00,  1.82it/s]

mean <x_i, x_j> =  0.9688260202955952
mean absolute <x_i, x_j> =  0.9688260202955952
mean <f_i, f_j> =  4.336808689942018e-19
mean absolute <f_i, f_j> =  0.0011475001424532828 

mean <x_i, x_j> =  0.9633757501688602
mean absolute <x_i, x_j> =  0.9633757501688602
mean <f_i, f_j> =  -4.336808689942018e-18
mean absolute <f_i, f_j> =  0.0012013940557753671 

mean <x_i, x_j> =  0.9644107687815247
mean absolute <x_i, x_j> =  0.9644107687815247
mean <f_i, f_j> =  -6.5848687047282885e-18
mean absolute <f_i, f_j> =  0.001587484450545721 

mean <x_i, x_j> =  0.9581984515487534
mean absolute <x_i, x_j> =  0.9581984515487534
mean <f_i, f_j> =  -5.2749754677662094e-18
mean absolute <f_i, f_j> =  0.0017186311139789428 

mean <x_i, x_j> =  0.15383911288152294
mean absolute <x_i, x_j> =  0.15383911288152294
mean <f_i, f_j> =  1.1328806373726086e-18
mean absolute <f_i, f_j> =  0.03453717906606029 

mean <x_i, x_j> =  0.13382799129945852
mean absolute <x_i, x_j> =  0.13382799129945852
mean <f_i, f_j> = 

Label for gak: 100%|██████████| 4/4 [00:02<00:00,  1.72it/s]


mean <x_i, x_j> =  0.9606672733857211
mean absolute <x_i, x_j> =  0.9606672733857211
mean <f_i, f_j> =  -1.951563910473908e-18
mean absolute <f_i, f_j> =  0.0014045784832743466 

Time taken for kernel gak: 2.3324692249298096 seconds


Label for truncated sig:   0%|          | 0/4 [00:00<?, ?it/s]

mean <x_i, x_j> =  0.9999999999999998
mean absolute <x_i, x_j> =  1.0397810720377039
mean <f_i, f_j> =  6.288372600415926e-18
mean absolute <f_i, f_j> =  0.0804051318260861 

mean <x_i, x_j> =  129.1307774029919
mean absolute <x_i, x_j> =  129.1307774029919
mean <f_i, f_j> =  4.718447854656915e-16
mean absolute <f_i, f_j> =  2.036501715764226 

mean <x_i, x_j> =  197.0129944362949
mean absolute <x_i, x_j> =  199.63944537483053
mean <f_i, f_j> =  2.55351295663786e-15
mean absolute <f_i, f_j> =  15.944050901133082 

mean <x_i, x_j> =  5652.582212711188
mean absolute <x_i, x_j> =  5652.582212711188
mean <f_i, f_j> =  -2.930988785010413e-14
mean absolute <f_i, f_j> =  238.3245255551053 

mean <x_i, x_j> =  9980.011024289
mean absolute <x_i, x_j> =  9987.244395014059
mean <f_i, f_j> =  -5.861977570020827e-14
mean absolute <f_i, f_j> =  1156.9910655221738 

mean <x_i, x_j> =  129716.46302465361
mean absolute <x_i, x_j> =  129716.46302465361
mean <f_i, f_j> =  1.7053025658242404e-13
mean abso

Label for truncated sig:  25%|██▌       | 1/4 [00:18<00:55, 18.41s/it]

mean <x_i, x_j> =  79481790.62556727
mean absolute <x_i, x_j> =  79481790.62556727
mean <f_i, f_j> =  -2.4328426438934947e-09
mean absolute <f_i, f_j> =  25332473.731134016 

mean <x_i, x_j> =  0.9999999999999993
mean absolute <x_i, x_j> =  1.3368955010376198
mean <f_i, f_j> =  -3.0357660829594124e-18
mean absolute <f_i, f_j> =  0.12314839032092792 

mean <x_i, x_j> =  641.5896417055463
mean absolute <x_i, x_j> =  641.5896417055463
mean <f_i, f_j> =  9.547918011776346e-15
mean absolute <f_i, f_j> =  10.266254564054506 

mean <x_i, x_j> =  811.0666666170417
mean absolute <x_i, x_j> =  811.0666666170417
mean <f_i, f_j> =  6.661338147750939e-16
mean absolute <f_i, f_j> =  66.72468572998741 

mean <x_i, x_j> =  138028.28499189208
mean absolute <x_i, x_j> =  138028.28499189208
mean <f_i, f_j> =  5.684341886080801e-13
mean absolute <f_i, f_j> =  6725.942980829056 

mean <x_i, x_j> =  219525.96556809932
mean absolute <x_i, x_j> =  220924.40963301423
mean <f_i, f_j> =  -9.663381206337363e-13
m

Label for truncated sig:  50%|█████     | 2/4 [00:37<00:37, 18.81s/it]

mean <x_i, x_j> =  136152038102.6564
mean absolute <x_i, x_j> =  136152038102.6564
mean <f_i, f_j> =  -3.8925482302295915e-08
mean absolute <f_i, f_j> =  30642620905.538036 

mean <x_i, x_j> =  0.9999999999999993
mean absolute <x_i, x_j> =  1.2742394120970724
mean <f_i, f_j> =  -6.5052130349130266e-18
mean absolute <f_i, f_j> =  0.11212055398350904 

mean <x_i, x_j> =  299.95004033101134
mean absolute <x_i, x_j> =  299.95004033101134
mean <f_i, f_j> =  4.163336342344337e-15
mean absolute <f_i, f_j> =  12.872449141739661 

mean <x_i, x_j> =  409.1646882820313
mean absolute <x_i, x_j> =  419.4895817744103
mean <f_i, f_j> =  9.547918011776346e-15
mean absolute <f_i, f_j> =  58.25949365666193 

mean <x_i, x_j> =  38419.05156458454
mean absolute <x_i, x_j> =  38419.05156458454
mean <f_i, f_j> =  3.481659405224491e-13
mean absolute <f_i, f_j> =  3687.623438225928 

mean <x_i, x_j> =  84865.81715699742
mean absolute <x_i, x_j> =  87982.09882235738
mean <f_i, f_j> =  5.684341886080801e-13
mean

Label for truncated sig:  75%|███████▌  | 3/4 [00:56<00:18, 18.75s/it]

mean <x_i, x_j> =  619356043.0435655
mean absolute <x_i, x_j> =  629189532.1503134
mean <f_i, f_j> =  2.736947974380182e-09
mean absolute <f_i, f_j> =  224861786.15614563 

mean <x_i, x_j> =  343886920.717179
mean absolute <x_i, x_j> =  345633052.7968537
mean <f_i, f_j> =  -1.7486056502984496e-08
mean absolute <f_i, f_j> =  117252729.64571387 

mean <x_i, x_j> =  7091912707.7406845
mean absolute <x_i, x_j> =  7091912707.7406845
mean <f_i, f_j> =  2.432842643893495e-08
mean absolute <f_i, f_j> =  2372919960.82532 

mean <x_i, x_j> =  3145149471.5023336
mean absolute <x_i, x_j> =  3145149471.5023336
mean <f_i, f_j> =  8.028380724848533e-08
mean absolute <f_i, f_j> =  993863646.9133874 

mean <x_i, x_j> =  0.9999999999999991
mean absolute <x_i, x_j> =  1.2117434829629876
mean <f_i, f_j> =  -8.890457814381136e-18
mean absolute <f_i, f_j> =  0.09140526648049553 

mean <x_i, x_j> =  240.09302076946165
mean absolute <x_i, x_j> =  240.09302076946165
mean <f_i, f_j> =  -7.216449660063518e-16
me

Label for truncated sig: 100%|██████████| 4/4 [01:16<00:00, 19.04s/it]


mean <x_i, x_j> =  30884187.783605307
mean absolute <x_i, x_j> =  30903498.68042022
mean <f_i, f_j> =  -8.552962419938068e-10
mean absolute <f_i, f_j> =  8767338.704959173 

mean <x_i, x_j> =  99368488.29443473
mean absolute <x_i, x_j> =  99368488.29443473
mean <f_i, f_j> =  -2.2047636460284797e-09
mean absolute <f_i, f_j> =  27206629.802335534 

mean <x_i, x_j> =  248466575.55464092
mean absolute <x_i, x_j> =  248466575.55464092
mean <f_i, f_j> =  -4.105421961570272e-09
mean absolute <f_i, f_j> =  67782203.2122483 

mean <x_i, x_j> =  312030326.1245406
mean absolute <x_i, x_j> =  312049124.6107064
mean <f_i, f_j> =  2.5848953091368383e-09
mean absolute <f_i, f_j> =  100758322.18988223 

mean <x_i, x_j> =  4689932095.664822
mean absolute <x_i, x_j> =  4689932095.664822
mean <f_i, f_j> =  4.914342140664859e-07
mean absolute <f_i, f_j> =  1595064110.574465 

Time taken for kernel truncated sig: 76.1658935546875 seconds


Label for signature pde:   0%|          | 0/4 [00:00<?, ?it/s]

mean <x_i, x_j> =  8.364029392760952e+28
mean absolute <x_i, x_j> =  8.364029392760952e+28
mean <f_i, f_j> =  137438953472.0
mean absolute <f_i, f_j> =  3.201825440494807e+28 



Label for signature pde:  25%|██▌       | 1/4 [00:04<00:13,  4.46s/it]

mean <x_i, x_j> =  8.10976099540964e+27
mean absolute <x_i, x_j> =  8.10976099540964e+27
mean <f_i, f_j> =  141733920768.0
mean absolute <f_i, f_j> =  3.1045177130227146e+27 

mean <x_i, x_j> =  7.377326490722621e+31
mean absolute <x_i, x_j> =  7.377326490722621e+31
mean <f_i, f_j> =  -1470563143631182.2
mean absolute <f_i, f_j> =  3.0971865834817096e+31 

mean <x_i, x_j> =  9.440121544416869e+32
mean absolute <x_i, x_j> =  9.440121544416869e+32
mean <f_i, f_j> =  1.1764505149049458e+16
mean absolute <f_i, f_j> =  3.963199711763984e+32 

mean <x_i, x_j> =  2.089340718141283e+38
mean absolute <x_i, x_j> =  2.089340718141283e+38
mean <f_i, f_j> =  7.083549724304468e+21
mean absolute <f_i, f_j> =  7.998250828800432e+37 

mean <x_i, x_j> =  8.967382856642705e+34
mean absolute <x_i, x_j> =  8.967382856642705e+34
mean <f_i, f_j> =  -1.4411518807585587e+18
mean absolute <f_i, f_j> =  3.4328258084117515e+34 



Label for signature pde:  50%|█████     | 2/4 [00:09<00:09,  4.61s/it]

mean <x_i, x_j> =  3.9456650017406106e+36
mean absolute <x_i, x_j> =  3.9456650017406106e+36
mean <f_i, f_j> =  9.035139954469984e+18
mean absolute <f_i, f_j> =  1.656487249004345e+36 

mean <x_i, x_j> =  2.1326929209620783e+31
mean absolute <x_i, x_j> =  2.1326929209620783e+31
mean <f_i, f_j> =  884635641090633.1
mean absolute <f_i, f_j> =  8.952231963899503e+30 

mean <x_i, x_j> =  6.576281963438036e+30
mean absolute <x_i, x_j> =  6.576281963438036e+30
mean <f_i, f_j> =  -109143358316376.81
mean absolute <f_i, f_j> =  2.76088791709842e+30 

mean <x_i, x_j> =  2.673252804139919e+31
mean absolute <x_i, x_j> =  2.673252804139919e+31
mean <f_i, f_j> =  -96757023244288.0
mean absolute <f_i, f_j> =  8.97939311919643e+30 



  d = d**2 / self.S[None, :] #shape (..., N, M)
  d = d**2 / self.S #shape (..., M)
  d = d**2 / self.S[None, :] #shape (..., N, M)
  d = d**2 / self.S #shape (..., M)
Label for signature pde:  50%|█████     | 2/4 [00:13<00:13,  6.69s/it]


ValueError: Input contains infinity or a value too large for dtype('float64').

In [None]:
# # Test GAK
# from experiment_code import normalize_streams
# def test_gak(dataset_name:str):
#     X_train, y_train, X_test, y_test = UCR_UEA_datasets().load_dataset(dataset_name)
#     unique_labels = np.unique(y_train)
#     num_classes = len(unique_labels)
#     N_train, T, d = X_train.shape

#     corpus, test = normalize_streams(X_train, X_test)
#     sigma=2
#     s = tslearn.metrics.sigma_gak(dataset=corpus,
#           n_samples=200,
#           random_state=0)  

#     for sigma in sorted([s, 0.0001, 0.1, 0.5, 1, 2, 5, 10, 20, 50, 100, 1000, 10000]):
#         uv = tslearn.metrics.cdist_gak(corpus[0:5]/20, corpus[0:5]/20, sigma=sigma)
#         print(uv, sigma, dataset_name)


# for dataset_name in [
#         #'ArticularyWordRecognition', 
#         #'BasicMotions', 
#         'Cricket', #only NaNs for all sigmas>1, else 0
#          ##########'ERing', #cant find dataset
#         #'Libras', 
#         #'NATOPS', 
#         #'RacketSports',     
#         #'FingerMovements', # estimates a bit low, 10e-3
#         #'Heartbeat',
#         'SelfRegulationSCP1',  #only NaNs for all sigmas>1, else 0       
#         #'UWaveGestureLibrary'
#         ]:
#     test_gak(dataset_name)

### Print CV results

In [None]:
def average_labels(labelwise_dict:Dict[str, Dict[str, Any]],
                          field:str):
    """Averages the values of a field over the labels."""
    L = [param_dict[field] for param_dict in labelwise_dict.values()]
    min_len = min([len(Li) for Li in L])
    if min_len > 1:
        L = [Li[:min_len] for Li in L]
    return np.mean(L,axis=0)


def print_cv_tslearn_results(
        dataset_kernel_label_paramdict : Dict[str, Dict[str, Dict[str, Any]]],
        ):

    # return experiments
    for dataset_name, results in dataset_kernel_label_paramdict.items():
        print(dataset_name)
        kernelwise_dict = results["kernel_results"]
        n_classes = results['num_classes']
        ts_length = results['ts_length']
        n_train = results['N_train']
        path_dim = results['path dim']
        from experiment_code import print_dataset_stats
        print_dataset_stats(n_classes, path_dim, ts_length, n_train, "unknown")
        for kernel_name, labelwise_dict in kernelwise_dict.items():
            #final_auc_avgs = average_labels(labelwise_dict, "CV_train_auc")
            params_auc_avgs = average_labels(labelwise_dict, "auc_params")
            thresh_auc_avgs = average_labels(labelwise_dict, "auc_thresh")
            print(f"\n{kernel_name}")
            #print("final_auc_avgs", final_auc_avgs)
            print("params_auc_avgs", params_auc_avgs)
            print("thresh_auc_avgs", thresh_auc_avgs)
            if "truncated sig" in kernel_name:
                trunc_auc_avgs = average_labels(labelwise_dict, "auc_orders")
                print("orders_auc_avgs", trunc_auc_avgs)
        print("\nEnd dataset \n\n\n")

print_cv_tslearn_results(cv_best_models)

# (tslearn) Validate on Test

In [None]:
def validate_tslearn(
        dataset_kernel_label_paramdict : Dict[str, Dict[str, Dict[str, Any]]],
        n_jobs:int = 1, 
        verbose:bool=True,
        ):
    """Validates the best models from cross validation on the
    tslearn datasets using kernel conformance scores."""
    experiments = {}
    for dataset_name, results in dataset_kernel_label_paramdict.items():

        # Load dataset
        print(dataset_name)
        X_train, y_train, X_test, y_test = UCR_UEA_datasets().load_dataset(dataset_name)
        unique_labels = np.unique(y_train)
        num_classes = len(unique_labels)
        N_train, T, d = X_train.shape
        N_test, _, _  = X_test.shape
        print_dataset_stats(num_classes, d, T, N_train, N_test)

        #validate on test set
        kernelwise_dict = results["kernel_results"]
        kernel_results = run_all_kernels(X_train, y_train, X_test, y_test, 
                            unique_labels, kernelwise_dict, fixed_length=True, 
                            n_jobs=n_jobs, verbose=verbose)
        experiments[dataset_name] = {"results": kernel_results, 
                                     "num_classes": num_classes, 
                                     "path dim":d,
                                     "ts_length":T, 
                                     "N_train":N_train, 
                                     "N_test":N_test}
    return experiments

test_results_tslearn = validate_tslearn(cv_best_models, n_jobs=8, verbose=False)

### Print test results

In [None]:
def print_experiment_results(experiments, round_digits=5):
    for dataset_name, results in experiments.items():
        #Dataset:
        print("Dataset:", dataset_name)
        print_dataset_stats(results["num_classes"], results["path dim"], 
                            results["ts_length"], results["N_train"], 
                            results["N_test"])

        #Results for each kernel:
        for kernel_name, scores in results["results"].items():
            print("\nKernel:", kernel_name)
            print("Conformance AUC:", round(scores[0, 0], round_digits))
            print("Mahalanobis AUC:", round(scores[1, 0], round_digits))
            print("Conformance PR AUC:", round(scores[0, 1], round_digits))
            print("Mahalanobis PR AUC:", round(scores[1, 1], round_digits))

        print("\nEnd Dataset\n\n\n")


print_experiment_results(test_results_tslearn)

# Read CV data from file and validate

In [None]:
#TODO