# Try out different time series classification algorithms

In [73]:
import matplotlib.pyplot as plt
import pandas as pd

from train import Dataset
from param import *

datalist = ParamDir().data_path_list

In [74]:
data_dir = datalist[2]
dataset = Dataset(data_dir, 1.0, False)

(X_train, y_train), (X_test, y_test) = dataset.load_all_data(10, .8)

In [192]:
def segment(a):
    seg_ind = []
    for i in range(len(a)-1):
        if a[i] != a[i+1]:
            seg_ind.append(i+1)
    return seg_ind


# ---- train set
segment_ind = segment(y_train)

y_new = np.append(y_train[0], y_train[segment_ind])

X_seg = np.split(X_train, segment_ind)
max_len = max([len(X) for X in X_seg])
n_neurons = X_seg[0].shape[1]
X_seg_new, y_new_train = [], []
for _id, X in enumerate(X_seg):
    if len(X) > 3: # the instance time points need to be more than 3 bins
        y_new_train.append(str(y_new[_id]))
        # X_seg_new.append(X) # unequal length
        X_seg_new.append(np.vstack((X, np.zeros((max_len - len(X), n_neurons))))) # set to equal length with zeros
y_new_train = np.array(y_new_train)

# filter the neuron: delete the neurons where the activity is zero across instances
neurons_to_use = np.vstack(X_seg_new).sum(axis=0)>0
X_seg_new = [X[:, neurons_to_use ] for X in X_seg_new]

X_new_train = pd.DataFrame([[pd.Series(i) for i in X.T] for X in X_seg_new])

# ---- test set
segment_ind = segment(y_test)

y_new = np.append(y_test[0], y_test[segment_ind])

X_seg = np.split(X_test, segment_ind)
X_seg_new, y_new_test = [], []
for _id, X in enumerate(X_seg):
    if (len(X) <= max_len) and (len(X) > 3):
        y_new_test.append(str(y_new[_id]))
        # X_seg_new.append(X) # unequal length
        X_seg_new.append(np.vstack((X, np.zeros((max_len - len(X), n_neurons))))) # set to equal length with zeros

# filter the neuron: delete the neurons where the activity is zero across instances
X_seg_new = [X[:, neurons_to_use ] for X in X_seg_new]

X_new_test = pd.DataFrame([[pd.Series(i) for i in X.T] for X in X_seg_new])


# Classification with Unequal Length Series

In [183]:
# from sktime.registry import all_estimators

# # search for all classifiers which can handle unequal length data. This may give some
# # UserWarnings if soft dependencies are not installed.
# all_estimators(
#     filter_tags={"capability:unequal_length": True}, estimator_types="classifier"
# )

from sktime.classification.feature_based import RandomIntervalClassifier
from sktime.transformations.panel.padder import PaddingTransformer

padded_clf = PaddingTransformer() * RandomIntervalClassifier(n_intervals=4)
padded_clf.fit(X_new_train, y_new_train)
y_pred = padded_clf.predict(X_new_test)

In [184]:
np.sum(y_new_test == y_pred)/len(y_new_test)

0.3076923076923077

# Equal Length

## KNeighborsTimeSeriesClassifier

In [198]:
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold, cross_val_score

knn = KNeighborsTimeSeriesClassifier(n_neighbors=5)
param_grid = {"n_neighbors": [1, 5], "distance": ["euclidean", "dtw"]}
parameter_tuning_method = GridSearchCV(knn, param_grid, cv=KFold(n_splits=4))
parameter_tuning_method.fit(X_new_train, y_new_train)
y_pred = parameter_tuning_method.predict(X_new_test)

In [199]:
np.sum(y_new_test == y_pred)/len(y_new_test)

0.38461538461538464

## RocketClassifier

In [200]:
from sktime.classification.kernel_based import RocketClassifier

rocket = RocketClassifier(num_kernels=2000)
rocket.fit(X_new_train, y_new_train)
y_pred = rocket.predict(X_new_test)

In [201]:
np.sum(y_new_test == y_pred)/len(y_new_test)

0.6153846153846154

## HIVECOTEV2

In [202]:
from sktime.classification.hybrid import HIVECOTEV2

hc2 = HIVECOTEV2(time_limit_in_minutes=0.2)
hc2.fit(X_new_train, y_new_train)
y_pred = hc2.predict(X_new_test)

In [203]:
np.sum(y_new_test == y_pred)/len(y_new_test)

0.38461538461538464