In [20]:
import numpy as np
from sklearn.linear_model import RidgeClassifierCV
from sklearn.pipeline import make_pipeline
from sktime.transformers.series_as_features.rocket import Rocket
import pickle
import pandas as pd

In [7]:
from sktime.datasets.base import load_japanese_vowels, load_arrow_head  # multivariate dataset
X_train, y_train = load_japanese_vowels(split="train", return_X_y=True)


In [5]:
X_train.shape

(270, 12)

In [8]:
X_train, y_train = load_arrow_head(split="train", return_X_y=True)
X_train.shape

(36, 1)

In [11]:
dataset_names = [("POLLUTION", 5, 25)]
dataset_name, window_size, task_size = dataset_names[0]

train_data = pickle.load(  open( "../Data/TRAIN-"+dataset_name+"-W"+str(window_size)+"-T"+str(task_size)+"-NOML.pickle", "rb" ) )
validation_data = pickle.load( open( "../Data/VAL-"+dataset_name+"-W"+str(window_size)+"-T"+str(task_size)+"-NOML.pickle", "rb" ) )
test_data = pickle.load( open( "../Data/TEST-"+dataset_name+"-W"+str(window_size)+"-T"+str(task_size)+"-NOML.pickle", "rb" ) )

In [13]:
train_data.x.shape

(119483, 5, 14)

In [56]:
sample_data = train_data.x[0]

In [52]:
sample_data[i]

0   -0.537382
1   -0.537382
2   -0.537382
3   -0.537382
4   -0.537382
Name: 13, dtype: float64

In [17]:
train_data.x[0].shape

(5, 14)

In [22]:
type(X_train)

pandas.core.frame.DataFrame

In [35]:
X_train.iloc[0][0]

0     -1.9630
1     -1.9578
2     -1.9561
3     -1.9383
4     -1.8967
        ...  
246   -1.8413
247   -1.8843
248   -1.9054
249   -1.9239
250   -1.9092
Length: 251, dtype: float64

In [36]:
X_train.iloc[1][0]

0     -1.7746
1     -1.7740
2     -1.7766
3     -1.7307
4     -1.6963
        ...  
246   -1.6400
247   -1.6787
248   -1.7292
249   -1.7757
250   -1.7893
Length: 251, dtype: float64

In [55]:
from rocket_functions import generate_kernels, apply_kernels
from sklearn.linear_model import RidgeClassifierCV

In [57]:
n_kernels = 10000
kernels = generate_kernels(sample_data.shape[-1], n_kernels)

In [58]:
X_training_transform = apply_kernels(sample_data, kernels)
X_training_transform.shape

(5, 20000)

In [93]:
def sample_to_sktime(sample):
    
    #inspired by tslearn https://github.com/tslearn-team/tslearn/blob/775dadd/tslearn/utils.py#L867-L939
    X_ = sample
    X_pd = pd.DataFrame(dtype=np.float32)
    for dim in range(X_.shape[2]):
        X_pd['dim_' + str(dim)] = [pd.Series(data=Xi[:Xi.shape[0], dim])
                                   for Xi in X_]#
    
    return X_pd

In [73]:
X_pd

Unnamed: 0,dim_0,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6,dim_7,dim_8,dim_9,dim_10,dim_11,dim_12,dim_13
0,0 -1.67038 1 -1.67038 2 -1.67038 3 -1....,0 -1.68077 1 -1.68077 2 -1.68077 3 -1....,0 -1.661520 1 -1.517040 2 -1.372560 3 ...,0 1.367587 1 1.367587 2 1.367587 3 ...,0 -0.508999 1 -0.508999 2 -0.508999 3 ...,0 -0.364888 1 -0.301414 2 -0.364888 3 ...,0 0.837460 1 0.739253 2 0.641046 3 ...,0 -2.672255 1 -2.767781 2 -2.672255 3 ...,0 1.303114 1 1.303114 2 1.303114 3 ...,0 -0.346122 1 -0.255730 2 -0.204037 3 ...,0 -0.002937 1 -0.002937 2 -0.002937 3 ...,0 -0.003103 1 -0.003103 2 -0.003103 3 ...,0 -0.064236 1 -0.064236 2 -0.064236 3 ...,0 -0.537382 1 -0.537382 2 -0.537382 3 ...


In [78]:
rocket = Rocket()
rocket.fit(X_pd)
X_train_transform = rocket.transform(X_pd)


In [80]:
X_train_transform.shape

(1, 20000)

In [82]:
rocket.get_params()

{'normalise': True, 'num_kernels': 10000, 'random_state': None}

In [94]:
x_train = []
for sample in train_data.x:
    x_train.append(to_sktime(sample[np.newaxis,:]))

KeyboardInterrupt: 

In [95]:
len(x_train)

853

In [88]:
train_data.x.shape

(119483, 5, 14)

In [96]:
for i in range(len(x_train)):
    
    X_train_transform = rocket.transform(x_train[i])

In [91]:
x_train[0]

array([[[-1.67037964e+00, -1.68076986e+00, -1.66151987e+00,
          1.36758687e+00, -5.08999427e-01, -3.64887883e-01,
          8.37460334e-01, -2.67225548e+00,  1.30311394e+00,
         -3.46121894e-01, -2.93730136e-03, -3.10294429e-03,
         -6.42364991e-02, -5.37382028e-01],
        [-1.67037964e+00, -1.68076986e+00, -1.51704004e+00,
          1.36758687e+00, -5.08999427e-01, -3.01413885e-01,
          7.39253183e-01, -2.76778143e+00,  1.30311394e+00,
         -2.55730232e-01, -2.93730136e-03, -3.10294429e-03,
         -6.42364991e-02, -5.37382028e-01],
        [-1.67037964e+00, -1.68076986e+00, -1.37256021e+00,
          1.36758687e+00, -5.08999427e-01, -3.64887883e-01,
          6.41046032e-01, -2.67225548e+00,  1.30311394e+00,
         -2.04036598e-01, -2.93730136e-03, -3.10294429e-03,
         -6.42364991e-02, -5.37382028e-01],
        [-1.67037964e+00, -1.68076986e+00, -1.22808038e+00,
          1.36758687e+00, -5.08999427e-01, -1.74465889e-01,
          6.41046032e-01, -2

In [92]:
to_sktime(sample[np.newaxis,:])

array([[[-0.50392087,  1.62146227,  0.79463726, -1.35663523,
          0.06417944, -0.41772999, -0.1642526 ,  1.05325646,
         -0.65452908,  0.00649222, -0.0029373 , -0.00310294,
         -0.0642365 , -0.53738203],
        [-0.50392087,  1.62146227,  0.93911709, -1.35663523,
          0.13161225, -0.18414567, -0.1642526 ,  0.95773052,
         -1.30707675, -0.34005728, -0.0029373 , -0.00310294,
         -0.0642365 ,  1.86087355],
        [-0.50392087,  1.62146227,  1.08359692, -1.35663523,
          0.13161225, -0.13177963, -0.1642526 ,  0.86220457,
         -1.30707675, -0.28229903, -0.0029373 , -0.00310294,
         -0.0642365 ,  1.86087355],
        [-0.50392087,  1.62146227,  1.22807675, -1.35663523,
          0.14847045,  0.0508668 , -0.05622474,  0.67115268,
         -1.30707675, -0.2534199 , -0.0029373 , -0.00310294,
         -0.0642365 ,  1.86087355],
        [-0.50392087,  1.62146227,  1.37255658, -1.35663523,
          0.18218685,  0.27381922, -0.05622474,  0.57562673,
  

In [97]:
train_data = pickle.load(  open( "../Data/TRAIN-"+dataset_name+"-W"+str(window_size)+"-T"+str(task_size)+"-NOML-SKTIME.pickle", "rb" ) )


KeyboardInterrupt: 