In [2]:
import numpy as np
import pandas as pd
import sklearn.preprocessing
import sklearn.utils
import sklearn.metrics
import iisignature
import torch
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from typing import List, Optional, Dict, Set, Callable
from joblib import Memory, Parallel, delayed
import tslearn
import tslearn.metrics
from tslearn.datasets import UCR_UEA_datasets
import sigkernel
import scipy
from scipy.interpolate import interp1d
from numba import njit

from signature import streams_to_sigs, transform_stream
from conformance import BaseclassConformanceScore, pairwise_kernel_gram, stream_to_torch

In [2]:
# Dataset: ArticularyWordRecognition
# Number of Classes: 25
# Dimension of path: 9
# Length: 144
# Train Size, Test Size 275 300

# Dataset: AtrialFibrillation
# No dataset found

# Dataset: BasicMotions
# Number of Classes: 4
# Dimension of path: 6
# Length: 100
# Train Size, Test Size 40 40

# Dataset: CharacterTrajectories
# No dataset found

# Dataset: Cricket
# Number of Classes: 12
# Dimension of path: 6
# Length: 1197
# Train Size, Test Size 108 72

# Dataset: DuckDuckGeese
# No dataset found

# Dataset: EigenWorms
# Number of Classes: 5
# Dimension of path: 6
# Length: 17984
# Train Size, Test Size 128 131

# Dataset: Epilepsy
# Number of Classes: 4
# Dimension of path: 3
# Length: 206
# Train Size, Test Size 137 138

# Dataset: EthanolConcentration
# Number of Classes: 4
# Dimension of path: 3
# Length: 1751
# Train Size, Test Size 261 263

# Dataset: ERing
# No dataset found

# Dataset: FaceDetection
# Number of Classes: 2
# Dimension of path: 144
# Length: 62
# Train Size, Test Size 5890 3524

# Dataset: FingerMovements
# Number of Classes: 2
# Dimension of path: 28
# Length: 50
# Train Size, Test Size 316 100

# Dataset: HandMovementDirection
# Number of Classes: 4
# Dimension of path: 10
# Length: 400
# Train Size, Test Size 160 74

# Dataset: Handwriting
# Number of Classes: 26
# Dimension of path: 3
# Length: 152
# Train Size, Test Size 150 850

# Dataset: Heartbeat
# Number of Classes: 2
# Dimension of path: 61
# Length: 405
# Train Size, Test Size 204 205

# Dataset: InsectWingbeat
# Number of Classes: 10
# Dimension of path: 200
# Length: 22
# Train Size, Test Size 25000 25000

# Dataset: JapaneseVowels
# No dataset found

# Dataset: Libras
# Number of Classes: 15
# Dimension of path: 2
# Length: 45
# Train Size, Test Size 180 180

# Dataset: LSST
# Number of Classes: 14
# Dimension of path: 6
# Length: 36
# Train Size, Test Size 2459 2466

# Dataset: MotorImagery
# Number of Classes: 2
# Dimension of path: 64
# Length: 3000
# Train Size, Test Size 278 100

# Dataset: NATOPS
# Number of Classes: 6
# Dimension of path: 24
# Length: 51
# Train Size, Test Size 180 180

# Dataset: PenDigits
# Number of Classes: 10
# Dimension of path: 2
# Length: 8
# Train Size, Test Size 7494 3498

# Dataset: PEMS-SF
# Number of Classes: 7
# Dimension of path: 963
# Length: 144
# Train Size, Test Size 267 173

# Dataset: Phoneme
# Number of Classes: 39
# Dimension of path: 1
# Length: 1024
# Train Size, Test Size 214 1896

# Dataset: RacketSports
# Number of Classes: 4
# Dimension of path: 6
# Length: 30
# Train Size, Test Size 151 152

# Dataset: SelfRegulationSCP1
# Number of Classes: 2
# Dimension of path: 6
# Length: 896
# Train Size, Test Size 268 293

# Dataset: SelfRegulationSCP2
# Number of Classes: 2
# Dimension of path: 7
# Length: 1152
# Train Size, Test Size 200 180

# Dataset: SpokenArabicDigits
# No dataset found

# Dataset: StandWalkJump
# Number of Classes: 3
# Dimension of path: 4
# Length: 2500
# Train Size, Test Size 12 15

# Dataset: UWaveGestureLibrary
# Number of Classes: 8
# Dimension of path: 3
# Length: 315
# Train Size, Test Size 120 320


# tslearn datasets (equal length)

* equal length (in time) UCR_UEA multivariate time series 

In [112]:
#static kernels#########################################################################
######################## Static Kernels on R^d ###########################
##########################################################################

def _check_gram_dims(X:np.ndarray, 
                     Y:np.ndarray,
                     diag:bool = False,):
    """Stacks the input into a Gram matrix shape (N1, N2, ..., d) or
    into a diagonal Gram shape (N1, ..., d) if diag and N1==N2.

    Args:
        X (np.ndarray): Shape (N1, ... , d).
        Y (np.ndarray): Shape (N2, ... , d).
        diag (bool): If True, use diagonal Gram shape.
    """
    len1 = len(X.shape)
    len2 = len(Y.shape)
    if (len1<2) or (len2<2):
        raise ValueError("X and Y must have at least 2 dimensions, found {len1} and {len2}.")
    if X.shape[1:] != Y.shape[1:]:
        raise ValueError("X and Y must have the same dimensions except for the first axis.")

    N1 = X.shape[0]
    N2 = Y.shape[0]
    if diag and N1!=N2:
        raise ValueError("If 'diag' is True, X and Y must have the same number of samples.")


def linear_kernel_gram(X:np.ndarray, 
                       Y:np.ndarray,
                       diag:bool = False,
                       divide_by_dims:bool = True,
                       ):
    """Computes the Rd inner product matrix <x_i, y_j> or diagonal <x_i, y_i>.
    The inputs dimensions can only differ in the first axis.
    
    Args:
        X (np.ndarray): Shape (N1, ... , d).
        Y (np.ndarray): Shape (N2, ... , d).
        diag (bool): If True, computes the diagonal of the gram matrix.
        divide_by_dims (bool): If True, divides the result by the dimension d.

    Returns:
        np.ndarray: Array of shape (N1, N2, ...) or (N1, ...) if diag=True.
    """
    _check_gram_dims(X, Y, diag)
    if diag:
        #out_i... = sum(X_i...k * Y_i...k)
        out = np.einsum('i...k,i...k -> i...', X, Y)
    else:
        #out_ij... = sum(X_i...k * Y_j...k)
        out = np.einsum('i...k,j...k -> ij...', X, Y)
    
    if divide_by_dims:
        d = X.shape[-1]
        out = out/d

    return out


def rbf_kernel_gram(X:np.ndarray, 
                    Y:np.ndarray,
                    sigma:float,
                    diag:bool = False,
                    divide_by_dims:bool = True,
                    ):
    """Computes the RBF gram matrix k(x_i, y_j) or diagonal k(x_i, y_i).
    The inputs dimensions can only differ in the first axis.
    
    Args:
        X (np.ndarray): Shape (N1, ... , d).
        Y (np.ndarray): Shape (N2, ... , d).
        sigma (float): RBF parameter
        diag (bool): If True, computes the diagonal of the gram matrix.
        divide_by_dims (bool): If True, normalizes the norm by the dimension d.

    Returns:
        np.ndarray: Array of shape (N1, N2, ...) or (N1, ...) if diag=True.
    """
    if diag:
        diff = X-Y
        norms_squared = linear_kernel_gram(diff, diff, diag=True, 
                                           divide_by_dims=divide_by_dims)
    else:
        xx = linear_kernel_gram(X, X, diag=True, divide_by_dims=divide_by_dims)
        xy = linear_kernel_gram(X, Y, diag=False, divide_by_dims=divide_by_dims)
        yy = linear_kernel_gram(Y, Y, diag=True, divide_by_dims=divide_by_dims)
        norms_squared = xx[:, np.newaxis] + yy[np.newaxis, :] - 2*xy

    d= X.shape[-1]
    return np.exp(-sigma * norms_squared)


def poly_kernel_gram(X:np.ndarray, 
                     Y:np.ndarray,
                     p:float, #eg 2 or 3
                     diag:bool = False,
                     divide_by_dims:bool = True,):
    """Computes the polynomial kernel (<x_i, y_j> + 1)^p.
    The inputs dimensions can only differ in the first axis.
    
    Args:
        X (np.ndarray): Shape (N1, ... , d).
        Y (np.ndarray): Shape (N2, ... , d).
        p (float): Polynomial degree.
        diag (bool): If True, computes the diagonal of the gram matrix.
        divide_by_dims (bool): If True, normalizes the norm by the dimension d.

    Returns:
        np.ndarray: Array of shape (N1, N2, ...) or (N1, ...) if diag=True.
    """
    d = X.shape[-1]
    xy = linear_kernel_gram(X, Y, diag, divide_by_dims)
    return (xy + 1)**p


#######################################################################################
################### time series Integral Kernel of static kernel ######################
#######################################################################################


def integral_kernel(s1: np.ndarray,
                    s2: np.ndarray,
                    static_diag_kernel:Callable,
                    )-> float:
    """Computes the integral kernel K(x, y) = \int k(x_t, y_t) dt 
    given static kernel and two piecewise linear paths.

    Args:
        s1 (np.ndarray): A time series of shape (T1, d).
        s2 (np.ndarray): A time series of shape (T2, d).
        static_diag_kernel_gram (Callable): Takes in two arrays of shape (M, d) 
                        and outputs the diagonal Gram <x_m, y_m> of shape (M).
    """
    #Find all breakpoints of the piecewise linear paths
    T1, d = s1.shape
    T2, d = s2.shape
    times = np.concatenate([np.linspace(0, 1, T1), np.linspace(0, 1, T2)])
    times = sorted(np.unique(times))

    #Add the extra breakpoints to the paths
    f1 = interp1d(np.linspace(0, 1, T1), s1, axis=0, assume_sorted=True)
    f2 = interp1d(np.linspace(0, 1, T2), s2, axis=0, assume_sorted=True)
    x = f1(times) #shape (len(times), d)
    y = f2(times)

    #calculate k(x_t, y_t) for each t
    Kt = static_diag_kernel(x, y)

    #return integral of k(x_t, y_t) dt
    return np.trapz(Kt, times)


def integral_kernel_gram(
        X:List[np.ndarray],
        Y:List[np.ndarray],
        static_kernel_gram:Callable, #either linear_kernel_gram or rbf_kernel_gram with "diag" argument
        variable_length:bool,
        sym:bool = False,
    ):
    """Computes the Gram matrix K(X_i, Y_j) of the integral kernel 
    K(x, y) = \int k(x_t, y_t) dt.


    Args:
        static_kernel_gram (Callable): Gram kernel function taking in two ndarrays and
                    one boolean "diag" argument, see e.g. 'linear_kernel_gram' or 
                    'rbf_kernel_gram'.
        X (List[np.ndarray]): List of time series of shape (T_i, d).
        Y (List[np.ndarray]): List of time series of shape (T_j, d).
        variable_length (bool): If False, uses the optimized kernels for equal 
                                length time series.
        sym (bool): If True, computes the symmetric Gram matrix.
    """
    if not variable_length:
        X = np.array(X)
        Y = np.array(Y)
        ijKt = static_kernel_gram(X, Y, False) #diag=False

        #return integral of k(x_t, y_t) dt for each pair x and y
        N, T, d = X.shape
        return np.trapz(ijKt, dx=1/(T-1), axis=-1)
    else:
        static_ker = lambda a,b : static_kernel_gram(a,b, True) #diag=True
        pairwise_int_ker = lambda s1, s2 : integral_kernel(s1, s2, static_ker)
        return pairwise_kernel_gram(X,
                                    Y,
                                    pairwise_int_ker,
                                    sym)

############################################################################
################# signature kernels of static kernels ######################
############################################################################


def sig_kernel(s1:np.ndarray, 
               s2:np.ndarray, 
               order:int,
               static_kernel_gram:Callable = linear_kernel_gram,
               only_last:bool = True):
    """s1 and s2 are time series of shape (T_i, d)"""
    K = static_kernel_gram(s1, s2)
    nabla = K[1:, 1:] + K[:-1, :-1] - K[1:, :-1] - K[:-1, 1:]
    sig_kers = jitted_trunc_sig_kernel(nabla, order)
    if only_last:
        return sig_kers[-1]
    else:
        return sig_kers


@njit
def reverse_cumsum(arr:np.ndarray, axis:int): #ndim=2
    """JITed reverse cumulative sum along the specified axis.
    (np.cumsum with axis is not natively supported by Numba)"""
    A = arr.copy()
    if axis==0:
        for i in np.arange(A.shape[0]-2, -1, -1):
            A[i, :] += A[i+1, :]
    else: #axis==1
        for i in np.arange(A.shape[1]-2, -1, -1):
            A[:,i] += A[:,i+1]
    return A


@njit
def jitted_trunc_sig_kernel(nabla:np.ndarray, # gram matrix (T_1, T_2)
                            order:int,
                            ):
    """Given difference matrix nabla_ij = K[i+1, j+1] + K[i, j] - K[i+1, j] - K[i, j+1],
    computes the truncated signature kernel of all orders up to 'order'."""
    B = np.ones((order+1, order+1, order+1, *nabla.shape))
    for d in np.arange(order):
        for n in np.arange(order-d):
            for m in np.arange(order-d):
                B[d+1,n,m] = 1 + nabla/(n+1)/(m+1)*B[d, n+1, m+1]
                r1 = reverse_cumsum(nabla * B[d, n+1, 1] / (n+1), axis=0)
                B[d+1,n,m, :-1, :] += r1[1:, :]
                r2 = reverse_cumsum(nabla * B[d, 1, m+1] / (m+1), axis=1)
                B[d+1,n,m, :, :-1] += r2[:, 1:]
                rr = reverse_cumsum(nabla * B[d, 1, 1], axis=0)
                rr = reverse_cumsum(rr, axis=1)
                B[d+1,n,m, :-1, :-1] += rr[1:, 1:]

    return B[:,0,0,0,0]


def sig_kernel_gram(
        X:List[np.ndarray],
        Y:List[np.ndarray],
        order:int,
        static_kernel_gram:Callable,
        only_last:bool = True,
        sym:bool = False,
    ):
    """Computes the Gram matrix k_sig(X_i, Y_j) of the signature kernel,
    given the static kernel k(x, y) and the truncation order.

    Args:
        X (List[np.ndarray]): List of time series of shape (T_i, d).
        Y (List[np.ndarray]): List of time series of shape (T_j, d).
        static_kernel_gram (Callable): Gram kernel function taking in two ndarrays,
                            see e.g. 'linear_kernel_gram' or 'rbf_kernel_gram'.
        order (int): Truncation level of the signature kernel.
        only_last (bool): If False, returns results of all truncation levels up to 'order'.
        sym (bool): If True, computes the symmetric Gram matrix.
    """
    pairwise_ker = lambda s1, s2 : sig_kernel(s1, s2, order, static_kernel_gram, only_last)
    return pairwise_kernel_gram(X,
                                Y,
                                pairwise_ker,
                                sym)











#experiment code


def print_dataset_stats(num_classes, d, T, N_train, N_test):
    print("Number of Classes:", num_classes)
    print("Dimension of path:", d)
    print("Length:", T)
    print("Train:", N_train)
    print("Test:", N_test)


def case_static(train:np.ndarray, 
                test:np.ndarray,
                static_kernel_gram:Callable,):
    """Calculates the gram matrices of equal length time series for 
    a static kernel on R^d. Train and test are of shape (N1, T, d) 
    and (N2, T, d). Static kernel should take in two arrays of shape 
    (M, T*d) and return the Gram matrix."""
    N1, T, d = train.shape
    N2, _, _ = test.shape
    train = train.reshape(N1, -1)
    test = test.reshape(N2, -1)
    vv_gram = static_kernel_gram(train, train)
    uv_gram = static_kernel_gram(test, train)
    return vv_gram, uv_gram


def case_linear(train:np.ndarray, 
                test:np.ndarray):
    """Calculates the gram matrices for the euclidean inner product.
    Train and test are of shape (N1, T, d) and (N2, T, d)."""
    return case_static(train, test, linear_kernel_gram)


def case_rbf(train:np.ndarray, 
             test:np.ndarray,
             sigma:float):
    """Calculates the gram matrices for the rbf kernel.
    Train and test are of shape (N1, T, d) and (N2, T, d)."""
    rbf_ker = lambda X, Y : rbf_kernel_gram(X, Y, sigma)
    return case_static(train, test, rbf_ker)


def case_poly(train:np.ndarray, 
              test:np.ndarray,
              p:float):
    """Calculates the gram matrices for the rbf kernel.
    Train and test are of shape (N1, T, d) and (N2, T, d)."""
    poly_ker = lambda X, Y : poly_kernel_gram(X, Y, p)
    return case_static(train, test, poly_ker)


def case_gak(train:List[np.ndarray], 
                   test:List[np.ndarray], 
                   variable_length:bool,
                   sigma:float = 1.0,):
    """Calculates the gram matrices for the gak kernel.
    Train and test are lists of possibly variable length multidimension 
    time series of shape (T_i, d)"""
    #pick sigma parameter according to GAK paper
    if not variable_length:
        sigma = tslearn.metrics.sigma_gak(np.array(train))

    #compute gram matrices
    kernel = lambda s1, s2 : tslearn.metrics.gak(s1, s2, sigma)
    vv_gram = pairwise_kernel_gram(train, train, kernel, sym=True, disable_tqdm=False)
    uv_gram = pairwise_kernel_gram(test, train, kernel, sym=False, disable_tqdm=False)
    return vv_gram, uv_gram


# Solely to be used in sigkernel library. See e.g. sigkernel.LinearKernel.
# Had to reimplement it since the original class is missing the scalar in 
# the Gram method
class LinearKernel():
    def __init__(self, scale=1.0):
        self.scale = scale
        
    def batch_kernel(self, X, Y):
        return self.scale*torch.bmm(X, Y.permute(0,2,1))

    def Gram_matrix(self, X, Y):
        return self.scale * torch.einsum('ipk,jqk->ijpq', X, Y)
    
class PolyKernel():
    def __init__(self, scale=1.0, p=2):
        self.scale = scale
        self.p = p
        
    def batch_kernel(self, X, Y):
        return self.scale * (1+torch.bmm(X, Y.permute(0,2,1)))**self.p

    def Gram_matrix(self, X, Y):
        return self.scale * (1+torch.einsum('ipk,jqk->ijpq', X, Y))**self.p

 
def case_sig_pde(train:List[np.ndarray], 
                 test:List[np.ndarray], 
                 dyadic_order:int = 3,
                 static_kernel = sigkernel.LinearKernel(),
                ):
    """Calculates the signature kernel gram matrices of the train and test.
    Train and test are lists of possibly variable length multidimension 
    time series of shape (T_i, d)"""
    sig_kernel = sigkernel.SigKernel(static_kernel, dyadic_order)
    kernel = lambda s1, s2 : sig_kernel.compute_kernel(
                                stream_to_torch(s1), 
                                stream_to_torch(s2)).numpy()[0]
    vv_gram = pairwise_kernel_gram(train, train, kernel, sym=True, disable_tqdm=False)
    uv_gram = pairwise_kernel_gram(test, train, kernel, sym=False, disable_tqdm=False)
    return vv_gram, uv_gram


def calc_grams(train:List[np.ndarray], 
               test:List[np.ndarray],
               kernel_name:str, 
               variable_length:bool, 
               dyadic_order:int,        #for signature pde
               order:int,               #for truncated signature
               sigma:float = 2,         #for rbf
               p:float = 2,             #for polynomial
               ):   
    """Calculates gram matrices <train, train>, <test, train> given a kernel.
    Train and test are lists of possibly variable length multidimension time 
    series of shape (T_i, d)"""

    #Transform to array if possible
    if not variable_length:
        train = np.array(train)
        test = np.array(test)
    
    #choose method based on kernel name
    if kernel_name == "linear":
        return case_linear(train, test)
    
    elif kernel_name == "rbf":
        return case_rbf(train, test, sigma)
    
    elif kernel_name == "poly":
        return case_poly(train, test, p)

    elif kernel_name == "gak":
        return case_gak(train, test, variable_length)

    elif kernel_name == "truncated sig":
        vv_gram = sig_kernel_gram(train, train, order, linear_kernel_gram, sym=True)
        uv_gram = sig_kernel_gram(test, train, order, linear_kernel_gram)
        return vv_gram, uv_gram
    
    elif kernel_name == "truncated sig rbf":
        ker = lambda X, Y: rbf_kernel_gram(X, Y, sigma)
        vv_gram = sig_kernel_gram(train, train, order, ker, sym=True)
        uv_gram = sig_kernel_gram(test, train, order, ker)
        return vv_gram, uv_gram
    
    elif kernel_name == "truncated sig poly":
        ker = lambda X, Y : poly_kernel_gram(X, Y, p)
        vv_gram = sig_kernel_gram(train, train, order, ker, sym=True)
        uv_gram = sig_kernel_gram(test, train, order, ker)
        return vv_gram, uv_gram
    
    elif kernel_name == "signature pde":
        return case_sig_pde(train, 
                        test, 
                        dyadic_order=dyadic_order, 
                        static_kernel=LinearKernel(1/train[0].shape[-1]),)
    
    elif kernel_name == "signature pde rbf":
        return case_sig_pde(train, 
                        test, 
                        dyadic_order=dyadic_order, 
                        static_kernel=sigkernel.RBFKernel(sigma * train[0].shape[-1]),)

    elif kernel_name == "signature pde poly":
        return case_sig_pde(train, 
                        test, 
                        dyadic_order=dyadic_order, 
                        static_kernel=PolyKernel(1/train[0].shape[-1], p),)
    
    elif kernel_name == "integral linear":
        vv_gram = integral_kernel_gram(train, train, linear_kernel_gram, variable_length, sym=True)
        uv_gram = integral_kernel_gram(test, train, linear_kernel_gram, variable_length)
        return vv_gram, uv_gram

    elif kernel_name == "integral rbf":
        ker = lambda X, Y, diag: rbf_kernel_gram(X, Y, sigma, diag)
        vv_gram = integral_kernel_gram(train, train, ker, variable_length, sym=True)
        uv_gram = integral_kernel_gram(test, train, ker, variable_length)
        return vv_gram, uv_gram

    elif kernel_name == "integral poly":
        ker = lambda X, Y, diag : poly_kernel_gram(X, Y, p, diag)
        vv_gram = integral_kernel_gram(train, train, ker, variable_length, sym=True)
        uv_gram = integral_kernel_gram(test, train, ker, variable_length)
        return vv_gram, uv_gram
    
    else:
        raise ValueError("Invalid kernel name:", kernel_name)


def normalize_streams(train:np.ndarray, 
                      test:np.ndarray,
                      ):
    """Inputs are 3D arrays of shape (N, T, d) where N is the number of time series, 
    T is the length of each time series, and d is the dimension of each time series."""
    # Normalize data by training set mean and std
    mean = np.mean(train, axis=0, keepdims=True)
    std = np.std(train, axis=0, keepdims=True)
    train = (train - mean) / std
    test = (test - mean) / std
    return train, test


def run_single_kernel_single_label(
        corpus:List[np.ndarray], 
        X_test:List[np.ndarray], 
        y_test:np.array, #one vs rest labels
        class_to_test,
        kernel_name:str,
        variable_length:bool,
        dyadic_order:int = 5,   #for signature pde
        order:int = 10, #for truncated signature
        SVD_threshold:float = 0.01,
        SVD_max_rank:Optional[int] = None,
        sigma:float = 2,       #for rbf
        p:float=2,             #for polynomial
        verbose:bool = False,
        vv_gram=None,
        uv_gram=None,
        ):
    """Computes the AUC scores (weighted one vs rest) for a single kernel,
    using kernelized nearest neighbour variance adjusted distances.

    Args:
        X_train (List[np.ndarray]): List of time series of shape (T_i, d).
        y_train (np.array): 1-dim array of class labels.
        X_test (List[np.ndarray]): List of time series of shape (T_i, d).
        y_test (np.array): 1-dim array of class labels.
        unique_labels (np.array): Array of unique class labels.
        kernel_name (str): Name of the kernel to use.
        variable_length (bool): If False, uses the optimized kernels for equal 
                                length time series.
        normalize (bool): If True, normalizes train and test by the training set
                          mean and std.
        dyadic_order (int): Dyadic order for PDE solver 
                            (int > 0, higher = more accurate but slower).
        max_batch (int): Batch size in sig kernel computations.
        trunc_sig_dim_bound (int): Upper bound on the dimensionality of the 
                                  truncated signature.
        SVD_threshold (float): Sets all eigenvalues below this threshold to be 0.
        SVD_max_rank (int): Sets all SVD eigenvalues to be 0 beyond 'SVD_max_rank'.
    """
    # 2 methods (conf, mahal), 2 metrics (roc_auc, pr_auc)
    aucs = np.zeros( (2, 2) ) 

    # Calculate amomaly distancce scores for all test samples
    if (vv_gram is None) and (uv_gram is None):
        vv_gram, uv_gram = calc_grams(corpus, X_test, kernel_name, 
                                variable_length, dyadic_order,
                                order, sigma=sigma, p=p)
    scorer = BaseclassConformanceScore(vv_gram, SVD_threshold, print_rank=verbose, 
                                        SVD_max_rank=SVD_max_rank)
    dists = np.array([scorer._anomaly_distance(sample, method="both") 
                        for sample in uv_gram]).T
    distances_conf, distances_mahal = dists

    # Calculate one vs rest AUC, weighted by size of class
    for idx_conf_mahal, distances in enumerate([distances_conf, distances_mahal]):
        ovr_labels = y_test != class_to_test
        average="weighted" #average = "macro"
        roc_auc = sklearn.metrics.roc_auc_score(ovr_labels, distances, average=average)
        pr_auc = sklearn.metrics.average_precision_score(ovr_labels, distances, average=average)
        aucs[idx_conf_mahal, 0] = roc_auc
        aucs[idx_conf_mahal, 1] = pr_auc
    
    return aucs


def run_all_kernels(X_train:List[np.ndarray], 
                    y_train:np.array, 
                    X_test:List[np.ndarray], 
                    y_test:np.array, 
                    unique_labels:np.array, 
                    kernel_names:List[str],
                    variable_length:bool,
                    verbose:bool = False,
                    ):
    kernel_results = {}
    for kernel_name in kernel_names:
        # 2 methods (conf, mahal), 2 metrics (roc_auc, pr_auc), C classes
        aucs = np.zeros( (2, 2, len(unique_labels)) ) 
        for i, label in enumerate(unique_labels):
            # Get all samples of the current class
            idxs = np.where(y_train == label)[0]
            corpus = [X_train[k] for k in idxs]
            if not variable_length:
                corpus, X_test = normalize_streams(np.array(corpus), X_test)

            #run model
            scores = run_single_kernel_single_label(corpus, X_test, y_test,
                                    label, kernel_name, variable_length, 
                                    verbose=verbose)
            aucs[:,:, i] = scores
        
        #update kernel results
        kernel_results[kernel_name] = aucs
    return kernel_results


def run_tslearn_experiments(dataset_names:List[str], 
                            kernel_names:List[str],
                            verbose:bool=False,
                            ):
    """Runs a series of time series anomaly detection experiments on the specified 
    tslearn datasets using kernel conformance scores."""
    experiments = {}
    for dataset_name in dataset_names:
        # Load dataset
        X_train, y_train, X_test, y_test = UCR_UEA_datasets().load_dataset(dataset_name)

        # stats
        unique_labels = np.unique(y_train)
        num_classes = len(unique_labels)
        N_train, T, d = X_train.shape
        N_test, _, _  = X_test.shape
        print_dataset_stats(num_classes, d, T, N_train, N_test)

        # Run each kernel
        kernel_results = run_all_kernels(X_train, y_train, X_test, y_test, 
                                         unique_labels, kernel_names,
                                         variable_length=False,
                                         verbose=verbose)
        
        #log dataset experiment
        experiments[dataset_name] = {"results": kernel_results, 
                                     "num_classes": num_classes, 
                                     "dim":d,
                                     "ts_length":T, 
                                     "N_train":N_train, 
                                     "N_test":N_test}
    return experiments

In [118]:
#run experiments

experiments = run_tslearn_experiments(
    dataset_names = [
        #'ArticularyWordRecognition', 
        #'BasicMotions', 
        #'Cricket',
         ##########'ERing', #cant find dataset
        'Libras', 
        #'NATOPS', 
        #'RacketSports',     
        #'FingerMovements',
        #'Heartbeat',
        #'SelfRegulationSCP1', 
        #'UWaveGestureLibrary'
        ],
    kernel_names = [
        #"linear",
        #"rbf",
        #"poly",
        #"gak",
        #"truncated sig",
        #"truncated sig rbf",
        #"truncated sig poly",
        #"signature pde",
        #"signature pde rbf",
        #"signature pde poly",
        #"integral linear",
        #"integral rbf",
        #"integral poly",
        ]
        )


def print_experiment_results(experiments, round_digits=5):
    for dataset_name, results in experiments.items():
        #Dataset:
        print("\nStart Dataset {dataset_name} results:", dataset_name)
        print_dataset_stats(results["num_classes"], results["dim"], 
                            results["ts_length"], results["N_train"], 
                            results["N_test"])

        #Results for each kernel:
        for kernel_name, scores in results["results"].items():
            print("\nKernel:", kernel_name)
            scores = np.mean(scores, axis=2)
            print("Conformance AUC:", round(scores[0, 0], round_digits))
            print("Mahalanobis AUC:", round(scores[1, 0], round_digits))
            print("Conformance PR AUC:", round(scores[0, 1], round_digits))
            print("Mahalanobis PR AUC:", round(scores[1, 1], round_digits))

        print("\nEnd Dataset {dataset_name} results\n\n\n")
        
print_experiment_results(experiments)

Number of Classes: 15
Dimension of path: 2
Length: 45
Train: 180
Test: 180

Start Dataset {dataset_name} results: Libras
Number of Classes: 15
Dimension of path: 2
Length: 45
Train: 180
Test: 180

Kernel: integral linear
Conformance AUC: 0.44114
Mahalanobis AUC: 0.44104
Conformance PR AUC: 0.92761
Mahalanobis PR AUC: 0.92762

End Dataset {dataset_name} results





# PenDigits dataset (Variable Length) 

* Can't use ts-learn since it interpolated and homogenized the length of all time series

In [None]:
#################################################################################################################
## Loading code taken from https://github.com/pafoster/conformance_distance_experiments_cochrane_et_al_2020    ##
## DATASET_URLS = ['https://archive.ics.uci.edu/ml/machine-learning-databases/pendigits/pendigits-orig.tes.Z', ##
##                 'https://archive.ics.uci.edu/ml/machine-learning-databases/pendigits/pendigits-orig.tra.Z'] ##
#################################################################################################################

def read_pendigits_dataset(filename):
    with open(filename, 'r') as f:
        data_lines = f.readlines()

    data = []
    data_labels = []
    current_digit = None
    for line in data_lines:
        if line == "\n":
            continue

        if line[0] == ".":
            if "SEGMENT DIGIT" in line[1:]:
                if current_digit is not None:
                    data.append(np.array(current_digit))
                    data_labels.append(digit_label)

                current_digit = []
                digit_label = int(line.split('"')[1])
            else:
                continue

        else:
            x, y = map(float, line.split())
            current_digit.append([x, y])
            
    data.append(np.array(current_digit))
    data_labels.append(digit_label)
    return data, np.array(data_labels)


def create_pendigits_dataframe(data):
    dataframes = []
    for subset, data in data.items():
        df = pd.DataFrame(data).T
        df.columns = ['data', 'label']
        df['subset'] = subset
        dataframes.append(df)
    return pd.concat(dataframes)

data = {'train': read_pendigits_dataset("Data/pendigits-orig.tra"),
        'test': read_pendigits_dataset("Data/pendigits-orig.tes")}

df_pendigits_raw = create_pendigits_dataframe(data)

def plot_pendigits_entry(sample):
    df = pd.DataFrame(sample["data"], columns=["x", "y"])
    fig = px.line(df, x="x", y="y", text=df.index, width=500, height=500)
    fig.update_traces(textposition="bottom right")
    fig.show()

plot_pendigits_entry(df_pendigits_raw.iloc[20])
print(df_pendigits_raw.head()) 
# Each data point is a timeseries of the form ([x_t1, y_t1], ... , [x_tn, y_tn]) 
# of variable length, that is an array of shape (N_i, 2) for each i in the dataset.


In [None]:
############################################################################################## |
################################### PenDigits experiments #################################### |
############################################################################################## \/

def run_pendigits_experiments(df:pd.DataFrame, 
                              kernel_names:List[str],
                              stream_transforms = ["time_enhance", "min_max_normalize"],):
    """Calculates AUCs for each kernel on the PenDigits dataset.
    df has columns ["data", "label", "subset"]. Each data point 
    is a timeseries of shape (T_i, d) of variable length."""
    #transform streams
    df["data"] = df["data"].apply(lambda x : transform_stream(x, stream_transforms))

    #Gather dataset info
    X_train = df[df["subset"]=="train"]["data"].values
    y_train = np.array(df[df["subset"]=="train"]["label"].values)
    X_test = df[df["subset"]=="test"]["data"].values
    y_test = np.array(df[df["subset"]=="test"]["label"].values)
    labels = sorted(df["label"].unique())
    num_classes = len(labels)
    d = X_train[0].shape[1]
    T = "variable length"
    N_train = len(X_train)
    N_test = len(X_test)
    print_dataset_stats(num_classes, d, T, N_train, N_test)

    # Run each kernel
    kernel_results = {}
    for kernel_name in kernel_names:
        print(kernel_name)
        scores = run_single_kernel(X_train, y_train, X_test, y_test, labels, 
                        kernel_name, variable_length=True, normalize=False,
                        trunc_sig_dim_bound=200, SVD_max_rank=None)
        kernel_results[kernel_name] = scores

    #log results
    pendigits_results = {"results": kernel_results, 
                         "num_classes": num_classes,
                         "dim": d,
                         "ts_length":T, 
                         "N_train":N_train, 
                         "N_test":N_test}
    return pendigits_results

# pendigits_results = run_pendigits_experiments(
#     df_pendigits_raw, 
#     kernel_names=[
#         #"gak",
#         "truncated signature", 
#         #"signature pde", 
#         #"signature pde RBF"
#         ],
#         )

In [None]:
# Dataset: Libras
# Number of Classes: 15
# Dimension of path: 2
# Length: 45
# Train: 180
# Test: 180

# Kernel: linear
# Conformance AUC: 0.9471891534391536
# Mahalanobis AUC: 0.9460978835978835
# Conformance PR AUC: 0.9952856063472626
# Mahalanobis PR AUC: 0.9953123143532209

# Kernel: rbf
# Conformance AUC: 0.6121858465608465
# Mahalanobis AUC: 0.235896164021164
# Conformance PR AUC: 0.9543784983385764
# Mahalanobis PR AUC: 0.9073331474510873

# Kernel: gak
# Conformance AUC: 0.8303240740740742
# Mahalanobis AUC: 0.056779100529100526
# Conformance PR AUC: 0.9720553520740843
# Mahalanobis PR AUC: 0.8271237492596946

# Kernel: truncated signature
# Conformance AUC: 0.8772486772486773
# Mahalanobis AUC: 0.8711970899470899
# Conformance PR AUC: 0.9884586491342986
# Mahalanobis PR AUC: 0.9880448383038575

# Kernel: signature pde
# Conformance AUC: 0.8616071428571429
# Mahalanobis AUC: 0.8559854497354498
# Conformance PR AUC: 0.9860276339146576
# Mahalanobis PR AUC: 0.9856341293618142

# Kernel: signature pde RBF
# Conformance AUC: 0.48647486772486775
# Mahalanobis AUC: 0.5124669312169311
# Conformance PR AUC: 0.9148945413486355
# Mahalanobis PR AUC: 0.9149847211152167

# End Dataset


In [None]:
# print_experiment_results({"PenDigits": pendigits_results})

In [96]:
# testing ksig  ---- clearly something wrong, since the values dont converge as order->infty for ksig

import time

def calc_iisig_kernel(X, Y, order):
    sig_X, sig_Y = streams_to_sigs([X,Y], order, disable_tqdm=True)
    dot = 1 + np.dot(sig_X, sig_Y)
    return dot


def calc_sigpde_kernel(X,Y):
    dyadic_order = 5
    static_kernel = sigkernel.LinearKernel()
    vv, uv = case_sig_pde([X], [Y], dyadic_order, static_kernel)
    return uv[0,0]


def calc_ksig_kernel(X,Y, order):
    import ksig
    static_kernel = ksig.static.kernels.LinearKernel() 
    sig_kernel = ksig.kernels.SignatureKernel(n_levels=order, order=1, static_kernel=static_kernel, normalize=False)
    dot = sig_kernel(np.array([X,X]), np.array([Y,Y]))[0,0]
    return dot


def trunc_sig_kernel(s1:np.ndarray, 
                    s2:np.ndarray, 
                    order:int, #order is truncation level of the signature
                    static_kernel_gram:Callable = linear_kernel_gram,
                    only_last:bool = True,

                    ):
    """s1 and s2 are time series of shape (T_i, d)"""
    K = static_kernel_gram(s1, s2)
    nabla = K[1:, 1:] + K[:-1, :-1] - K[1:, :-1] - K[:-1, 1:]
    sig_kers = jitted_trunc_sig_kernel(nabla, order)
    if only_last:
        return sig_kers[-1]
    else:
        return sig_kers



@njit
def reverse_cumsum(arr:np.ndarray, axis:int): #ndim=2
    """JITed reverse cumulative sum along the specified axis.
    (np.cumsum with axis is not natively supported by Numba)"""
    A = arr.copy()
    if axis==0:
        for i in np.arange(A.shape[0]-2, -1, -1):
            A[i, :] += A[i+1, :]
    else: #axis==1
        for i in np.arange(A.shape[1]-2, -1, -1):
            A[:,i] += A[:,i+1]
    return A


@njit
def jitted_trunc_sig_kernel(nabla:np.ndarray, # gram matrix (T_1, T_2)
                            order:int,
                            ):
    """Given difference matrix nabla_ij = K[i+1, j+1] + K[i, j] - K[i+1, j] - K[i, j+1],
    computes the truncated signature kernel of all orders up to 'order'."""
    B = np.ones((order+1, order+1, order+1, *nabla.shape))
    for d in np.arange(order):
        for n in np.arange(order-d):
            for m in np.arange(order-d):
                B[d+1,n,m] = 1 + nabla/(n+1)/(m+1)*B[d, n+1, m+1]
                r1 = reverse_cumsum(nabla * B[d, n+1, 1] / (n+1), axis=0)
                B[d+1,n,m, :-1, :] += r1[1:, :]
                r2 = reverse_cumsum(nabla * B[d, 1, m+1] / (m+1), axis=1)
                B[d+1,n,m, :, :-1] += r2[:, 1:]
                rr = reverse_cumsum(nabla * B[d, 1, 1], axis=0)
                rr = reverse_cumsum(rr, axis=1)
                B[d+1,n,m, :-1, :-1] += rr[1:, 1:]

    return B[:,0,0,0,0]
    
    



d = 2
MAX_ORDER = 18
times_iisig = np.zeros( (MAX_ORDER) )
times_sigker  = np.zeros( (MAX_ORDER) )
times_ksig = np.zeros( (MAX_ORDER) )
np.random.seed(99)
X, Y = np.random.randn(2, 19, d)/np.sqrt(d)
for order in range(1, MAX_ORDER+1):
    print("\norder", order)
    t0= time.time()
    dot1=calc_iisig_kernel(X, Y, order)
    t1 = time.time()
    dot2=trunc_sig_kernel(X, Y, order)
    t2 = time.time()
    dot3=calc_ksig_kernel(X, Y, order)
    t3 = time.time()
    times_iisig[order-1] = t1-t0
    times_sigker[order-1] = t2-t1
    times_ksig[order-1] = t3-t2
    print("dot1", dot1)
    print("dot2", dot2)
    print("dot3", dot3)

print("\n")
dot4 = calc_sigpde_kernel(X, Y)
print("dot_pde", dot4)


print("\ncomparison", times_iisig[1:]/times_sigker[1:])
print("\niisig", times_iisig[1:])
print("\nsigker", times_sigker[1:])
print("\nksig", times_ksig[1:])
print("\ncomparison", times_sigker[1:]/times_ksig[1:])


order 1
dot1 -1.755811675941624
dot2 -1.7558116759416191
dot3 -1.7558116759416258

order 2
dot1 1.4030025671165636
dot2 1.4030025671165642
dot3 110.89060746948743

order 3
dot1 1.061035219597274
dot2 1.0610352195972634
dot3 83.69762398102489

order 4
dot1 1.212371443858032
dot2 1.2123714438579665
dot3 1443.8494052680496

order 5
dot1 2.1171672546357194
dot2 2.11716725463564
dot3 4318.918584525861

order 6
dot1 1.2958898619566153
dot2 1.2958898619565575
dot3 6211.33595570119

order 7
dot1 1.7067618411050665
dot2 1.706761841105013
dot3 15519.90607393497

order 8
dot1 1.5164888736274276
dot2 1.5164888736273308
dot3 17961.066545503454

order 9
dot1 1.788181977228414
dot2 1.7881819772283214
dot3 17964.064206142768

order 10
dot1 2.140755266525466
dot2 2.140755266525469
dot3 21634.64009985917

order 11
dot1 2.211273722994132
dot2 2.211273722994162
dot3 21444.509913591995

order 12
dot1 2.2948210029800458
dot2 2.2948210029800493
dot3 21344.041986561395

order 13
dot1 2.2952324218406037
dot2 

Kernel Gram Matrix: 100%|██████████| 1/1 [00:00<00:00, 117.14it/s]
Kernel Gram Matrix: 100%|██████████| 1/1 [00:00<00:00, 44.42it/s]

dot_pde 2.3122997286191893

comparison [1.87387387 1.38100209 0.47232704 0.59489456 0.19172842 0.15874269
 0.20778146 0.1707777  0.19585871 0.24052829 0.42410606 0.30205229
 0.99533344 1.72951591 2.11428968 3.44366661 6.09059463]

iisig [2.47955322e-04 3.15427780e-04 1.79052353e-04 3.83377075e-04
 1.91211700e-04 2.39610672e-04 4.78744507e-04 5.39779663e-04
 9.58442688e-04 1.45459175e-03 3.32546234e-03 6.78992271e-03
 2.71043777e-02 5.95753193e-02 8.72416496e-02 1.62200212e-01
 3.45914602e-01]

sigker [0.00013232 0.0002284  0.00037909 0.00064445 0.0009973  0.00150943
 0.00230408 0.00316072 0.00489354 0.00604749 0.00784111 0.0224793
 0.02723145 0.03444624 0.04126287 0.04710102 0.05679488]

ksig [0.00180602 0.00174117 0.00211787 0.00223088 0.00233984 0.00345516
 0.00381732 0.00341773 0.00444031 0.00453067 0.0053246  0.01027536
 0.0112586  0.01289487 0.01192331 0.0119617  0.01337743]

comparison [0.07326733 0.13117897 0.17899358 0.28887464 0.42622784 0.43686172
 0.60358504 0.92479944 1.102




In [None]:
import tslearn

_datasets = [
            'ArticularyWordRecognition', 
            'BasicMotions', 
            'Cricket',
            #'ERing',
            'Libras', 
            'NATOPS', 
            'RacketSports',     
            'FingerMovements',
            'Heartbeat',
            'SelfRegulationSCP1', 
            'UWaveGestureLibrary'
            ]


#for dataset_name in ucr_datasets.list_multivariate_datasets():
for dataset_name in _datasets:
    print("Dataset:", dataset_name)
    dataset = tslearn.datasets.UCR_UEA_datasets().load_dataset(dataset_name)
    if dataset[0] is not None:
        X_train, y_train, X_test, y_test = dataset
        num_classes = len(np.unique(y_train))
        N_train, T, d = X_train.shape
        N_test, _, _  = X_test.shape

        # is_irregular = dataset["is_irregular"]
        
        print("Number of Classes:", num_classes)
        print("Dimension of path:", d)
        print("Length:", T)
        print("Train Size, Test Size", N_train, N_test)
        print()
    else:
        print("No dataset found")
        print()

In [None]:
# Dataset: ArticularyWordRecognition
# Number of Classes: 25
# Dimension of path: 9
# Length: 144
# Train Size, Test Size 275 300

# Dataset: BasicMotions
# Number of Classes: 4
# Dimension of path: 6
# Length: 100
# Train Size, Test Size 40 40

# Dataset: Cricket
# Number of Classes: 12
# Dimension of path: 6
# Length: 1197
# Train Size, Test Size 108 72

# Dataset: Libras
# Number of Classes: 15
# Dimension of path: 2
# Length: 45
# Train Size, Test Size 180 180

# Dataset: NATOPS
# Number of Classes: 6
# Dimension of path: 24
# Length: 51
# Train Size, Test Size 180 180

# Dataset: RacketSports
# Number of Classes: 4
# Dimension of path: 6
# Length: 30
# Train Size, Test Size 151 152

# Dataset: FingerMovements
# Number of Classes: 2
# Dimension of path: 28
# Length: 50
# Train Size, Test Size 316 100

# Dataset: Heartbeat
# Number of Classes: 2
# Dimension of path: 61
# Length: 405
# Train Size, Test Size 204 205

# Dataset: SelfRegulationSCP1
# Number of Classes: 2
# Dimension of path: 6
# Length: 896
# Train Size, Test Size 268 293

# Dataset: UWaveGestureLibrary
# Number of Classes: 8
# Dimension of path: 3
# Length: 315
# Train Size, Test Size 120 320

# Cross Validation code (for anomaly detection)

In [109]:
def balanced_k_folds(X:List,    #dataset
                    y:np.array, #class labels
                    k:int = 5):
    """Generates balanced k-folds for cross-validation, where each fold
    is balanced the same as the original dataset."""
    #is X numpy array?
    is_numpy=True if isinstance(X, np.ndarray) else False

    #shuffle data
    indices = np.arange(len(X))
    np.random.shuffle(indices)
    X = [X[i] for i in indices]
    y = np.array([y[i] for i in indices])
    unique_labels = np.unique(y)

    #split into classes
    classwise = {label:[] for label in unique_labels}
    for x, label in zip(X, y):
        classwise[label].append(x)

    #split into k-folds
    classwise_folds = {}
    for label, dataclass in classwise.items():
        classwise_folds[label] = np.array_split(dataclass, k)
    
    #create folds
    folds=[]
    for i in range(k):
        X_train = []
        y_train = []
        X_val = []
        y_val = []
        for label, dataclass in classwise_folds.items():
            for j in range(k):
                if j!=i:
                    X_train.extend(dataclass[j])
                    y_train.extend([label]*len(dataclass[j]))
                else:
                    X_val.extend(dataclass[j])
                    y_val.extend([label]*len(dataclass[j]))

        #convert to numpy if possible
        if is_numpy:
            X_train = np.array(X_train)
            X_val = np.array(X_val)
        y_train = np.array(y_train)
        y_val = np.array(y_val)
        folds.append([X_train, y_train, X_val, y_val])

    return folds



def get_hyperparam_ranges(kernel_name:str, 
                          leave_out_sig_order:bool=True):
    num_params = 5
    sigma_grid = np.exp(np.linspace(-5, 0, num_params))
    p_grid = np.array([1.5, 2.0, 2.5, 3.0])
    ranges = {}
    if "rbf" in kernel_name:
        ranges["sigma"] = sigma_grid
    elif "poly" in kernel_name:
        ranges["p"] = p_grid
    
    if not leave_out_sig_order:
        if "truncated sig" in kernel_name:
            ranges["order"] = np.arange(2, 15)
    return ranges
    


def test_model_params_CV(kernel_name, 
                         label, 
                         X_train, 
                         y_train, 
                         X_val, 
                         y_val, 
                         ):
    """"Kernel_name specifies the model. We work on a single fold, and do
    anomaly detection using 'label' as the normal class. We then calculate
    the AUC scores for each hyperparameter."""
    pass


def cross_validation(X:List,                #Training Dataset
                     y:np.array,            #Training class labels
                     unique_labels:np.array, #Unique class labels
                     kernel_names:List[str],
                     ):
    model_params_CV = {}
    # anomaly detection model is specified by a pair (class label, kernel_name)
    # model_params_CV[pair] = CV_scores
    folds = balanced_k_folds(X, y, k=5)
    for kernel_name in kernel_names:
        for label in unique_labels:
            #calc minimum size of the label class in folds
            min_fold_size = min([len(np.where(y_train==label)[0]) 
                                 for (_, y_train, _, _) in folds])
            
            #loop over folds
            for fold in folds:
                X_train, y_train, X_val, y_val = fold
                param_ranges = get_hyperparam_ranges(kernel_name)
                print(param_ranges)
                
                #problem: different folds can be of different sizes.

                assert False
                #model_params_CV[(label, kernel_name)] = []
                # each model has its own set of hyperparameters


                # take max of mahal and conf in same category (PR vs PR etc.)
                # We are after the best model after all.

                for param in param_ranges:
                    # fit model normally for most models
                # if model is truncated sig, we only need to calculate the Gram matrices once.

                # TODO modify run_single_kernel_single_label() to return
                # multiple AUC scores for each threshold. 
                # use variable 'min_fold_size' later.

def test():
    X_train, y_train, X_test, y_test = UCR_UEA_datasets().load_dataset("Libras")
    unique_labels = np.unique(y_train)

    cross_validation(X_train, 
                     y_train, 
                     unique_labels, 
                     ["rbf"])
test()

{'sigma': array([0.00673795, 0.02351775, 0.082085  , 0.2865048 , 1.        ])}


AssertionError: 

In [None]:
labels = np.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
label = 3

np.where(y_train==label)