In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

from sklearn import metrics
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.metrics import ConfusionMatrixDisplay
from scikitplot.metrics import plot_roc
from scikitplot.metrics import plot_precision_recall
from scikitplot.metrics import plot_cumulative_gain
from scikitplot.metrics import plot_lift_curve

from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn import random_projection
from sklearn.pipeline import Pipeline
from sklearn.pipeline import FeatureUnion
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.decomposition import IncrementalPCA
from sklearn.manifold import Isomap
from sklearn.manifold import TSNE
from sklearn.manifold import MDS
from sklearn.linear_model import LogisticRegression

from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from scipy.spatial.distance import euclidean
from scipy.spatial.distance import cityblock
from tslearn.metrics import dtw, dtw_path, cdist_dtw, subsequence_cost_matrix
from scipy.spatial.distance import cdist
from pyts.metrics import dtw as dtw2



In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
from tqdm import tqdm

In [4]:
#features = list()
with open('../features.txt') as f:
    features = [line.split()[1] for line in f.readlines()]
print('No of Features: {}'.format(len(features)))

No of Features: 561


# Loading Training Sets

In [5]:
X_train = pd.read_csv('X_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
X_train.columns = features

# add subject column to the dataframe
#X_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

y_train = pd.read_csv('y_train.txt', names=['Activity'], squeeze=True)
#y_train_labels = y_train.map({1: 'WALKING', 2:'WALKING_UPSTAIRS',3:'WALKING_DOWNSTAIRS', 4:'SITTING', 5:'STANDING',6:'LAYING'})

# put all columns in a single dataframe
#train = X_train
#train['Activity'] = y_train

In [6]:
body_acc_x_train = pd.read_csv('Inertial Signals/body_acc_x_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_acc_x_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [7]:
body_acc_y_train = pd.read_csv('Inertial Signals/body_acc_y_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_acc_y_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [8]:
body_acc_z_train = pd.read_csv('Inertial Signals/body_acc_z_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_acc_z_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [68]:
body_gyro_x_train = pd.read_csv('Inertial Signals/body_gyro_x_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_gyro_x_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)
#y_train = pd.read_csv('y_train.txt', names=['Activity'], squeeze=True)
#y_train_labels = y_train.map({1: 'WALKING', 2:'WALKING_UPSTAIRS',3:'WALKING_DOWNSTAIRS', 4:'SITTING', 5:'STANDING',6:'LAYING'})

# put all columns in a single dataframe
#train = body_gyro_x_train
#train['Activity'] = y_train
#train['ActivityName'] = y_train_labels

In [69]:
body_gyro_y_train = pd.read_csv('Inertial Signals/body_gyro_y_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_gyro_y_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [11]:
body_gyro_z_train = pd.read_csv('Inertial Signals/body_gyro_z_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_gyro_z_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [12]:
total_acc_x_train = pd.read_csv('Inertial Signals/total_acc_x_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#total_acc_x_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [13]:
total_acc_y_train = pd.read_csv('Inertial Signals/total_acc_y_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#total_acc_y_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [14]:
total_acc_z_train = pd.read_csv('Inertial Signals/total_acc_z_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#total_acc_z_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

# Loading Test Sets

In [15]:
X_test = pd.read_csv('../test/X_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
X_test.columns = features

# add subject column to the dataframe
#X_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

y_test = pd.read_csv('../test/y_test.txt', names=['Activity'], squeeze=True)
#y_test_labels = y_train.map({1: 'WALKING', 2:'WALKING_UPSTAIRS',3:'WALKING_DOWNSTAIRS', 4:'SITTING', 5:'STANDING',6:'LAYING'})

# put all columns in a single dataframe
#train01 = X_test
#train01['Activity'] = y_test

In [16]:
body_acc_x_test = pd.read_csv('../test/Inertial Signals/body_acc_x_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_acc_x_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [17]:
body_acc_y_test = pd.read_csv('../test/Inertial Signals/body_acc_y_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_acc_y_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [18]:
body_acc_z_test = pd.read_csv('../test/Inertial Signals/body_acc_z_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_acc_z_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [19]:
body_gyro_x_test = pd.read_csv('../test/Inertial Signals/body_gyro_x_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_gyro_x_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [20]:
body_gyro_y_test = pd.read_csv('../test/Inertial Signals/body_gyro_y_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_gyro_y_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [21]:
body_gyro_z_test = pd.read_csv('../test/Inertial Signals/body_gyro_z_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_gyro_z_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [22]:
total_acc_x_test = pd.read_csv('../test/Inertial Signals/total_acc_x_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#total_acc_x_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [23]:
total_acc_y_test = pd.read_csv('../test/Inertial Signals/total_acc_y_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#total_acc_y_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [24]:
total_acc_z_test = pd.read_csv('../test/Inertial Signals/total_acc_z_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#total_acc_z_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

## Shaplet Classifiers

### Brute Force Approach

In [25]:
from pyts.transformation import ShapeletTransform

### Learning-based Approach

In [26]:
#from keras.optimizers import Adagrad
from tslearn.shapelets import ShapeletModel
from tslearn.shapelets import grabocka_params_to_shapelet_size_dict

In [27]:
n_ts, ts_sz = body_gyro_x_train.shape
n_classes = len(set(y_train))

# Set the number of shapelets per size as done in the original paper
shapelet_sizes = grabocka_params_to_shapelet_size_dict(n_ts=n_ts,
                                                       ts_sz=ts_sz,
                                                       n_classes=n_classes,
                                                       l=0.1,
                                                       r=1)

print('n_ts', n_ts)
print('ts_sz', ts_sz)
print('n_classes', n_classes)
print('shapelet_sizes', shapelet_sizes)

n_ts 7352
ts_sz 128
n_classes 6
shapelet_sizes {12: 6}


In [28]:
# Define the model using parameters provided by the authors (except that we use
# fewer iterations here)
shp_clf = ShapeletModel(n_shapelets_per_size=shapelet_sizes,
                        optimizer="sgd",
                        weight_regularizer=.01,
                        max_iter=200,
                        verbose=1)

In [56]:
# define the parameter values that should be searched
k_range = list(range(1, 31))
weight_options = ['uniform', 'distance']
param_grid = dict(n_neighbors=k_range, weight_options = ['uniform', 'distance'])

grid = GridSearchCV(KNeighborsClassifier(), param_grid, cv=10, scoring='f1', return_train_score=False,verbose=1)
grid.fit(body_gyro_x_train, y_train)

In [57]:
grid.best_params_

{'n_neighbors': 5}

In [29]:
shp_clf.fit(body_gyro_x_train, y_train)

2022-05-17 10:16:13.884480: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

LearningShapelets(max_iter=200, n_shapelets_per_size={12: 6}, verbose=1,
                  weight_regularizer=0.01)

In [30]:
y_pred = shp_clf.predict(body_gyro_x_test)



In [31]:
print('Accuracy %s' % accuracy_score(y_test, y_pred))
print('F1-score %s' % f1_score(y_test, y_pred, average=None))
print(classification_report(y_test, y_pred))

Accuracy 0.3702069901594842
F1-score [0.52676399 0.17421603 0.         0.12624585 0.13099042 0.52169625]
              precision    recall  f1-score   support

           1       0.38      0.87      0.53       496
           2       0.49      0.11      0.17       471
           3       0.00      0.00      0.00       420
           4       0.34      0.08      0.13       491
           5       0.44      0.08      0.13       532
           6       0.35      0.99      0.52       537

    accuracy                           0.37      2947
   macro avg       0.33      0.35      0.25      2947
weighted avg       0.34      0.37      0.26      2947



# Shaplet-distances-based Classifier

In [32]:
X_train2 = shp_clf.transform(body_gyro_x_train)



In [33]:
X_train2.shape

(7352, 6)

In [34]:
X_train2

array([[3.16109727e-05, 1.47584963e+00, 1.83588356e-01, 3.31198752e-01,
        1.46587342e-01, 1.36230230e+00],
       [3.16109727e-05, 1.41172695e+00, 1.83688089e-01, 3.02785039e-01,
        1.50766581e-01, 1.38004720e+00],
       [5.09038800e-05, 1.39487553e+00, 1.83688089e-01, 2.93777913e-01,
        1.50766581e-01, 1.38004720e+00],
       ...,
       [2.10537240e-02, 8.13817978e-02, 2.59610824e-02, 1.27891703e-02,
        3.68353538e-02, 2.52799213e-01],
       [1.29218055e-02, 8.13817978e-02, 6.71010464e-02, 1.99542847e-02,
        3.93618345e-02, 9.79280412e-01],
       [1.29218055e-02, 1.06393978e-01, 1.73393097e-02, 7.14265108e-02,
        2.55999565e-02, 3.07628632e-01]], dtype=float32)

In [35]:
X_test2 = shp_clf.transform(body_gyro_x_test)



In [36]:
from sklearn.neighbors import KNeighborsClassifier

In [108]:
# define the parameter values that should be searched
k_range = list(range(1, 31))
weight_options = ['uniform', 'distance']
param_grid = dict(n_neighbors=k_range, weights=weight_options)

grid = GridSearchCV(KNeighborsClassifier(), param_grid, cv=10, scoring='accuracy', return_train_score=False,verbose=1)
grid.fit(X_train2, y_train)

Fitting 10 folds for each of 60 candidates, totalling 600 fits


GridSearchCV(cv=10, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                         13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
                                         23, 24, 25, 26, 27, 28, 29, 30],
                         'weights': ['uniform', 'distance']},
             scoring='accuracy', verbose=1)

In [109]:
grid.best_params_

{'n_neighbors': 30, 'weights': 'distance'}

In [36]:
clf = KNeighborsClassifier(n_neighbors=30, weights='distance')
clf.fit(X_train2, y_train)

KNeighborsClassifier(n_neighbors=30, weights='distance')

In [37]:
y_pred = clf.predict(X_test2)

print('Accuracy %s' % accuracy_score(y_test, y_pred))
print('F1-score %s' % f1_score(y_test, y_pred, average=None))
print(classification_report(y_test, y_pred))

Accuracy 0.5622667119104173
F1-score [0.66191446 0.62075848 0.56202532 0.54077253 0.56032389 0.42182581]
              precision    recall  f1-score   support

           1       0.67      0.66      0.66       496
           2       0.59      0.66      0.62       471
           3       0.60      0.53      0.56       420
           4       0.57      0.51      0.54       491
           5       0.49      0.65      0.56       532
           6       0.48      0.37      0.42       537

    accuracy                           0.56      2947
   macro avg       0.57      0.56      0.56      2947
weighted avg       0.56      0.56      0.56      2947



In [38]:
from sklearn.tree import DecisionTreeClassifier

In [113]:
# define the parameter values that should be searched
k_range = list(range(1, 31))
param_grid = dict(max_depth=k_range)

grid = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=10, scoring='accuracy', return_train_score=False,verbose=1)
grid.fit(X_train2, y_train)

Fitting 10 folds for each of 30 candidates, totalling 300 fits


GridSearchCV(cv=10, estimator=DecisionTreeClassifier(),
             param_grid={'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                       13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
                                       23, 24, 25, 26, 27, 28, 29, 30]},
             scoring='accuracy', verbose=1)

In [114]:
grid.best_params_

{'max_depth': 7}

In [39]:
clf = DecisionTreeClassifier(max_depth=7, random_state=42)
clf.fit(X_train2, y_train)

y_pred = clf.predict(X_test2)

print('Accuracy %s' % accuracy_score(y_test, y_pred))
print('F1-score %s' % f1_score(y_test, y_pred, average=None))
print(classification_report(y_test, y_pred))

Accuracy 0.5524261961316593
F1-score [0.67909715 0.57373272 0.57589286 0.52325581 0.55300222 0.3924612 ]
              precision    recall  f1-score   support

           1       0.66      0.70      0.68       496
           2       0.63      0.53      0.57       471
           3       0.54      0.61      0.58       420
           4       0.61      0.46      0.52       491
           5       0.46      0.70      0.55       532
           6       0.48      0.33      0.39       537

    accuracy                           0.55      2947
   macro avg       0.56      0.55      0.55      2947
weighted avg       0.56      0.55      0.55      2947



# Feature-based Classifier

In [40]:
import scipy.stats as stats

In [41]:
def calculate_features(values):
    features = {
        'avg': np.mean(values),
        'std': np.std(values),
        'var': np.var(values),
        'med': np.median(values),
        '10p': np.percentile(values, 10),
        '25p': np.percentile(values, 25),
        '50p': np.percentile(values, 50),
        '75p': np.percentile(values, 75),
        '90p': np.percentile(values, 90),
        'iqr': np.percentile(values, 75) - np.percentile(values, 25),
        'skw': stats.skew(values),
        'kur': stats.kurtosis(values)
    }

    return features

In [42]:
X_train3 = np.array([list(calculate_features(x).values()) for x in body_gyro_x_train.T])

In [43]:
X_train3.shape

(7352, 12)

In [44]:
X_test3 = np.array([list(calculate_features(x).values()) for x in body_gyro_x_test.T])

In [45]:
clf = DecisionTreeClassifier(max_depth=15, random_state=42)
clf.fit(X_train3, y_train)

y_pred = clf.predict(X_test3)

print('Accuracy %s' % accuracy_score(y_test, y_pred))
print('F1-score %s' % f1_score(y_test, y_pred, average=None))
print(classification_report(y_test, y_pred))

Accuracy 0.19138106549032916
F1-score [0.         0.         0.         0.         0.09660107 0.31067399]
              precision    recall  f1-score   support

           1       0.00      0.00      0.00       496
           2       0.00      0.00      0.00       471
           3       0.00      0.00      0.00       420
           4       0.00      0.00      0.00       491
           5       1.00      0.05      0.10       532
           6       0.18      1.00      0.31       537

    accuracy                           0.19      2947
   macro avg       0.20      0.18      0.07      2947
weighted avg       0.21      0.19      0.07      2947



# Time Series Classifier

In [129]:
# define the parameter values that should be searched
k_range = list(range(1, 31))
weight_options = ['uniform', 'distance']
param_grid = dict(n_neighbors=k_range, weights=weight_options)

grid = GridSearchCV(KNeighborsClassifier(), param_grid, cv=10, scoring='accuracy', return_train_score=False,verbose=1)
grid.fit(body_gyro_x_train, y_train)

Fitting 10 folds for each of 60 candidates, totalling 600 fits


GridSearchCV(cv=10, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                         13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
                                         23, 24, 25, 26, 27, 28, 29, 30],
                         'weights': ['uniform', 'distance']},
             scoring='accuracy', verbose=1)

In [130]:
grid.best_params_

{'n_neighbors': 5, 'weights': 'distance'}

In [46]:
clf = KNeighborsClassifier(n_neighbors=5, weights='distance')
clf.fit(body_gyro_x_train, y_train)

y_pred = clf.predict(body_gyro_x_test)

print('Accuracy %s' % accuracy_score(y_test, y_pred))
print('F1-score %s' % f1_score(y_test, y_pred, average=None))
print(classification_report(y_test, y_pred))

Accuracy 0.5863590091618596
F1-score [0.79726651 0.64053751 0.62928349 0.55508831 0.50229148 0.47626978]
              precision    recall  f1-score   support

           1       0.92      0.71      0.80       496
           2       0.68      0.61      0.64       471
           3       0.91      0.48      0.63       420
           4       0.47      0.67      0.56       491
           5       0.49      0.52      0.50       532
           6       0.43      0.53      0.48       537

    accuracy                           0.59      2947
   macro avg       0.65      0.59      0.60      2947
weighted avg       0.64      0.59      0.60      2947



In [131]:
# define the parameter values that should be searched
k_range = list(range(1, 31))
param_grid = dict(max_depth=k_range)

grid = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=10, scoring='accuracy', return_train_score=False,verbose=1)
grid.fit(body_gyro_x_train, y_train)

Fitting 10 folds for each of 30 candidates, totalling 300 fits


GridSearchCV(cv=10, estimator=DecisionTreeClassifier(),
             param_grid={'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                       13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
                                       23, 24, 25, 26, 27, 28, 29, 30]},
             scoring='accuracy', verbose=1)

In [132]:
grid.best_params_

{'max_depth': 15}

In [47]:
clf = DecisionTreeClassifier(max_depth=15, random_state=42)
clf.fit(body_gyro_x_train, y_train)

y_pred = clf.predict(body_gyro_x_test)

print('Accuracy %s' % accuracy_score(y_test, y_pred))
print('F1-score %s' % f1_score(y_test, y_pred, average=None))
print(classification_report(y_test, y_pred))

Accuracy 0.5374957583983713
F1-score [0.71159564 0.53669222 0.52205882 0.54025045 0.49028677 0.41797283]
              precision    recall  f1-score   support

           1       0.70      0.72      0.71       496
           2       0.55      0.52      0.54       471
           3       0.54      0.51      0.52       420
           4       0.48      0.62      0.54       491
           5       0.48      0.50      0.49       532
           6       0.48      0.37      0.42       537

    accuracy                           0.54      2947
   macro avg       0.54      0.54      0.54      2947
weighted avg       0.54      0.54      0.53      2947



In [48]:
from pyts.classification import KNeighborsClassifier

In [None]:
clf = KNeighborsClassifier(metric='dtw_multiscale', n_jobs=-1)
clf.fit(body_gyro_x_train, y_train)

y_pred = clf.predict(body_gyro_x_test)

print('Accuracy %s' % accuracy_score(y_test, y_pred))
print('F1-score %s' % f1_score(y_test, y_pred, average=None))
print(classification_report(y_test, y_pred))

# CNN Classifier

In [49]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, GlobalAveragePooling1D
from keras.layers import Conv1D, Activation, Conv1D, BatchNormalization

In [50]:
def build_simple_cnn(n_timesteps, n_outputs):
    model = Sequential()
    
    model.add(Conv1D(filters=16, kernel_size=8, activation='relu', input_shape=(n_timesteps, 1)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Dropout(0.3))
    
    model.add(Conv1D(filters=32, kernel_size=5, activation='relu'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Dropout(0.3))
    
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Dropout(0.3))
    
    model.add(GlobalAveragePooling1D())
    
    model.add(Dense(n_outputs, activation='sigmoid'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [51]:
X_train_cnn = body_gyro_x_train
X_test_cnn = body_gyro_x_test

X_train_cnn, X_val_cnn, y_train_cnn, y_val_cnn = train_test_split(X_train_cnn, y_train, test_size=0.2, stratify=y_train)

n_timesteps, n_outputs, n_features = X_train_cnn.shape[1], len(np.unique(y_train_cnn)), 1 
print("TIMESTEPS: ", n_timesteps)
print("N. LABELS: ", n_outputs)

TIMESTEPS:  128
N. LABELS:  6


In [52]:
cnn = build_simple_cnn(n_timesteps, n_outputs)

In [29]:
cnn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 121, 16)           144       
                                                                 
 batch_normalization (BatchN  (None, 121, 16)          64        
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 121, 16)           0         
                                                                 
 dropout (Dropout)           (None, 121, 16)           0         
                                                                 
 conv1d_1 (Conv1D)           (None, 117, 32)           2592      
                                                                 
 batch_normalization_1 (Batc  (None, 117, 32)          128       
 hNormalization)                                        

In [53]:
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint

In [54]:
rlr = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=50, min_lr=0.0001)
mc = ModelCheckpoint('best_model_cnn.h5', monitor='val_loss', save_best_only=True)

callbacks = [rlr, mc]

batch_size = 16
mini_batch_size = int(min(X_train_cnn.shape[0]/10, batch_size))

In [32]:
history_cnn = cnn.fit(X_train_cnn, y_train_cnn, epochs=5, batch_size=mini_batch_size, callbacks=callbacks,
                      validation_data=(X_val_cnn, y_val_cnn)).history

Epoch 1/5


2022-05-16 13:30:46.793796: W tensorflow/core/framework/op_kernel.cc:1745] OP_REQUIRES failed at sparse_xent_op.cc:103 : INVALID_ARGUMENT: Received a label value of 6 which is outside the valid range of [0, 6).  Label values: 1 1 6 1 5 6 1 5 2 3 2 5 5 5 6 4


InvalidArgumentError: Graph execution error:

Detected at node 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits' defined at (most recent call last):
    File "/opt/anaconda3/lib/python3.8/runpy.py", line 193, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/opt/anaconda3/lib/python3.8/runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "/opt/anaconda3/lib/python3.8/site-packages/ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "/opt/anaconda3/lib/python3.8/site-packages/traitlets/config/application.py", line 845, in launch_instance
      app.start()
    File "/opt/anaconda3/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 667, in start
      self.io_loop.start()
    File "/opt/anaconda3/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "/opt/anaconda3/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/opt/anaconda3/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/opt/anaconda3/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/opt/anaconda3/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 457, in dispatch_queue
      await self.process_one()
    File "/opt/anaconda3/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 446, in process_one
      await dispatch(*args)
    File "/opt/anaconda3/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 353, in dispatch_shell
      await result
    File "/opt/anaconda3/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 648, in execute_request
      reply_content = await reply_content
    File "/opt/anaconda3/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 345, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "/opt/anaconda3/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 532, in run_cell
      return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
    File "/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2898, in run_cell
      result = self._run_cell(
    File "/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2944, in _run_cell
      return runner(coro)
    File "/opt/anaconda3/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
      coro.send(None)
    File "/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3169, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3361, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3441, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/var/folders/wc/nkw6_2m95b99xglzpfb6736h0000gn/T/ipykernel_2506/3037636033.py", line 1, in <module>
      history_cnn = cnn.fit(X_train_cnn, y_train_cnn, epochs=5, batch_size=mini_batch_size, callbacks=callbacks,
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1384, in fit
      tmp_logs = self.train_function(iterator)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1021, in train_function
      return step_function(self, iterator)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1010, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1000, in run_step
      outputs = model.train_step(data)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 860, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 918, in compute_loss
      return self.compiled_loss(
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/compile_utils.py", line 201, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/losses.py", line 141, in __call__
      losses = call_fn(y_true, y_pred)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/losses.py", line 245, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/losses.py", line 1862, in sparse_categorical_crossentropy
      return backend.sparse_categorical_crossentropy(
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/backend.py", line 5202, in sparse_categorical_crossentropy
      res = tf.nn.sparse_softmax_cross_entropy_with_logits(
Node: 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits'
Received a label value of 6 which is outside the valid range of [0, 6).  Label values: 1 1 6 1 5 6 1 5 2 3 2 5 5 5 6 4
	 [[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_1841]

In [55]:
y_pred = np.argmax(cnn.predict(X_test_cnn), axis=1)

print('Accuracy %s' % accuracy_score(y_test, y_pred))
print('F1-score %s' % f1_score(y_test, y_pred, average=None))
print(classification_report(y_test, y_pred))

Accuracy 0.14251781472684086
F1-score [0.         0.         0.         0.24955437 0.         0.
 0.        ]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00       496
           2       0.00      0.00      0.00       471
           3       0.14      1.00      0.25       420
           4       0.00      0.00      0.00       491
           5       0.00      0.00      0.00       532
           6       0.00      0.00      0.00       537

    accuracy                           0.14      2947
   macro avg       0.02      0.14      0.04      2947
weighted avg       0.02      0.14      0.04      2947



In [56]:
cnn.evaluate(X_test_cnn, y_test)

 1/93 [..............................] - ETA: 44s - loss: 1.7935 - accuracy: 0.0000e+00

2022-05-17 10:22:47.477238: W tensorflow/core/framework/op_kernel.cc:1745] OP_REQUIRES failed at sparse_xent_op.cc:103 : INVALID_ARGUMENT: Received a label value of 6 which is outside the valid range of [0, 6).  Label values: 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 6 6 6 6 6 6 6 6 6


InvalidArgumentError: Graph execution error:

Detected at node 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits' defined at (most recent call last):
    File "/opt/anaconda3/lib/python3.8/runpy.py", line 193, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/opt/anaconda3/lib/python3.8/runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "/opt/anaconda3/lib/python3.8/site-packages/ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "/opt/anaconda3/lib/python3.8/site-packages/traitlets/config/application.py", line 845, in launch_instance
      app.start()
    File "/opt/anaconda3/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 667, in start
      self.io_loop.start()
    File "/opt/anaconda3/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "/opt/anaconda3/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/opt/anaconda3/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/opt/anaconda3/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/opt/anaconda3/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 457, in dispatch_queue
      await self.process_one()
    File "/opt/anaconda3/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 446, in process_one
      await dispatch(*args)
    File "/opt/anaconda3/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 353, in dispatch_shell
      await result
    File "/opt/anaconda3/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 648, in execute_request
      reply_content = await reply_content
    File "/opt/anaconda3/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 345, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "/opt/anaconda3/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 532, in run_cell
      return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
    File "/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2898, in run_cell
      result = self._run_cell(
    File "/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2944, in _run_cell
      return runner(coro)
    File "/opt/anaconda3/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
      coro.send(None)
    File "/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3169, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3361, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3441, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/var/folders/wc/nkw6_2m95b99xglzpfb6736h0000gn/T/ipykernel_4389/3003214766.py", line 1, in <module>
      cnn.evaluate(X_test_cnn, y_test)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1716, in evaluate
      tmp_logs = self.test_function(iterator)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1525, in test_function
      return step_function(self, iterator)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1514, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1507, in run_step
      outputs = model.test_step(data)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1473, in test_step
      self.compute_loss(x, y, y_pred, sample_weight)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 918, in compute_loss
      return self.compiled_loss(
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/compile_utils.py", line 201, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/losses.py", line 141, in __call__
      losses = call_fn(y_true, y_pred)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/losses.py", line 245, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/losses.py", line 1862, in sparse_categorical_crossentropy
      return backend.sparse_categorical_crossentropy(
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/backend.py", line 5202, in sparse_categorical_crossentropy
      res = tf.nn.sparse_softmax_cross_entropy_with_logits(
Node: 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits'
Received a label value of 6 which is outside the valid range of [0, 6).  Label values: 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 6 6 6 6 6 6 6 6 6
	 [[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_test_function_32913]

# LSTM Classifier

In [57]:
from keras.layers import LSTM

In [58]:
def build_lstm(n_timesteps, n_outputs):
    model = Sequential()
    model.add(LSTM(256, input_shape=(n_timesteps, 1)))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(n_outputs, activation='sigmoid'))
    
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [59]:
lstm = build_lstm(n_timesteps, n_outputs)

In [140]:
lstm.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 256)               264192    
                                                                 
 dropout_3 (Dropout)         (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                16448     
                                                                 
 dropout_4 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 6)                 390       
                                                                 
Total params: 281,030
Trainable params: 281,030
Non-trainable params: 0
_________________________________________________________________


In [141]:
history_lstm = cnn.fit(X_train_cnn, y_train_cnn, epochs=10, batch_size=mini_batch_size, callbacks=callbacks,
                       validation_data=(X_val_cnn, y_val_cnn)).history

Epoch 1/10


ValueError: in user code:

    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 859, in train_step
        y_pred = self(x, training=True)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 128, 1), found shape=(4, 100, 6)


In [60]:
y_pred = np.argmax(lstm.predict(X_test_cnn), axis=1)

print('Accuracy %s' % accuracy_score(y_test, y_pred))
print('F1-score %s' % f1_score(y_test, y_pred, average=None))
print(classification_report(y_test, y_pred))

Accuracy 0.15744825246012895
F1-score [0.         0.24856258 0.         0.17773238 0.160401   0.        ]
              precision    recall  f1-score   support

           1       0.00      0.00      0.00       496
           2       0.16      0.60      0.25       471
           3       0.00      0.00      0.00       420
           4       0.18      0.18      0.18       491
           5       0.14      0.18      0.16       532
           6       0.00      0.00      0.00       537

    accuracy                           0.16      2947
   macro avg       0.08      0.16      0.10      2947
weighted avg       0.08      0.16      0.10      2947



In [None]:
lstm.evaluate(X_test_cnn, y_test)

# Multivariate Time Series Classifiers

In [61]:
from pyts.datasets import load_basic_motions

from tslearn.preprocessing import TimeSeriesScalerMinMax
from sklearn.model_selection import train_test_split, cross_val_score 

from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.metrics import roc_curve, auc, roc_auc_score

In [115]:
scaler = TimeSeriesScalerMinMax()
X_train = scaler.fit_transform(X_train_cnn)
X_test = scaler.transform(X_test_cnn)

In [119]:
X_train.shape

(5881, 128, 1)

In [62]:
from sklearn.preprocessing import LabelEncoder

In [117]:
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.fit_transform(y_test)

In [120]:
X_train = X_train.reshape(5881, 128, 1)
X_test = X_test.reshape(2947, 128, 1)

In [121]:
n_timesteps, n_outputs, n_features = X_train.shape[1], len(np.unique(y_train)), X_train.shape[2] 
print("TIMESTEPS: ", n_timesteps)
print("N. LABELS: ", n_outputs)
print("N. FEATURES: ", n_features)

TIMESTEPS:  128
N. LABELS:  6
N. FEATURES:  1


In [133]:
X_train_cnn, X_val_cnn, y_train_cnn, y_val_cnn = train_test_split(X_train, y_train, test_size=0.2, stratify=y_train)

In [79]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout, Flatten
from keras.layers import TimeDistributed
from keras.layers.recurrent import LSTM
from keras.layers import Dense, Conv1D, Conv2D, MaxPool2D, Flatten, Dropout, LeakyReLU, GlobalAveragePooling1D
from keras.callbacks import EarlyStopping, ModelCheckpoint
#from keras.layers.normalization import BatchNormalization
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint

In [122]:
def build_lstm2(n_timesteps, n_outputs, n_features):
    model = Sequential()
    model.add(LSTM(4, input_shape=(n_timesteps, n_features), return_sequences=True, 
                        kernel_initializer='TruncatedNormal'))
    model.add(BatchNormalization())
    model.add(LeakyReLU())
    model.add(Dropout(0.3))
    
    #1
    for _ in range(2):
        model.add(LSTM(4, kernel_initializer='TruncatedNormal', return_sequences=True))
        model.add(BatchNormalization())
        model.add(LeakyReLU())
        model.add(Dropout(0.04))   

    #2
    model.add(LSTM(32, kernel_initializer='TruncatedNormal', return_sequences=False))
    model.add(BatchNormalization())
    model.add(LeakyReLU())
    model.add(Dropout(0.7))
    
    #3
    for _ in range(2):
        model.add(Dense(256, kernel_initializer='TruncatedNormal'))
        model.add(BatchNormalization())
        model.add(LeakyReLU())
        model.add(Dropout(0.2))
    #4
    for _ in range(1):
        model.add(Dense(64, kernel_initializer='TruncatedNormal'))
        model.add(BatchNormalization())
        model.add(LeakyReLU())
        model.add(Dropout(0.7))

    #5
    model.add(Dense(32, kernel_initializer='TruncatedNormal'))
    model.add(BatchNormalization())
    model.add(LeakyReLU())
    model.add(Dropout(0.4))
        
    model.add(Dense(n_outputs, activation='sigmoid'))
    
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [123]:
lstm2 = build_lstm2(n_timesteps, n_outputs, n_features)

In [58]:
lstm2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100, 4)            176       
                                                                 
 batch_normalization_3 (Batc  (None, 100, 4)           16        
 hNormalization)                                                 
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 100, 4)            0         
                                                                 
 dropout_3 (Dropout)         (None, 100, 4)            0         
                                                                 
 lstm_1 (LSTM)               (None, 100, 4)            144       
                                                                 
 batch_normalization_4 (Batc  (None, 100, 4)           16        
 hNormalization)                                      

In [124]:
rlr = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=50, min_lr=0.0001)
mc = ModelCheckpoint('best_model_lstm2.h5', monitor='val_loss', save_best_only=True)

callbacks = [rlr, mc]

batch_size = 16
mini_batch_size = int(min(X_train.shape[0]/10, batch_size))

In [60]:
history_lstm2 = lstm2.fit(X_train_cnn, y_train_cnn, epochs=50, batch_size=mini_batch_size, callbacks=callbacks,
                          validation_data=(X_val_cnn, y_val_cnn)).history

Epoch 1/50


ValueError: in user code:

    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 859, in train_step
        y_pred = self(x, training=True)
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/opt/anaconda3/lib/python3.8/site-packages/keras/engine/input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "sequential_1" is incompatible with the layer: expected shape=(None, 100, 6), found shape=(None, 128)


In [125]:
y_pred = np.argmax(lstm2.predict(X_test), axis=1)

print('Accuracy %s' % accuracy_score(y_test, y_pred))
print('F1-score %s' % f1_score(y_test, y_pred, average=None))
print(classification_report(y_test, y_pred))

Accuracy 0.168306752629793
F1-score [0.28812082 0.         0.         0.         0.         0.        ]
              precision    recall  f1-score   support

           0       0.17      1.00      0.29       496
           1       0.00      0.00      0.00       471
           2       0.00      0.00      0.00       420
           3       0.00      0.00      0.00       491
           4       0.00      0.00      0.00       532
           5       0.00      0.00      0.00       537

    accuracy                           0.17      2947
   macro avg       0.03      0.17      0.05      2947
weighted avg       0.03      0.17      0.05      2947



In [127]:
X_train_cnn.shape

(5881, 128)

In [106]:
X_train_cnn = X_train_cnn.to_numpy()

AttributeError: 'numpy.ndarray' object has no attribute 'to_numpy'

In [129]:
X_train_cnn2 = X_train_cnn.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
#X_val_cnn2 = X_val_cnn.reshape(X_val_cnn.shape[0], X_val_cnn.shape[1], X_val_cnn.shape[2], 1)
X_test_cnn2 = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

X_train_cnn2.shape

(5881, 128, 1, 1)

In [130]:
def build_cnn2(n_timesteps, n_features, n_outputs):
    input_shape = (n_timesteps, n_features, 1)

    model = Sequential()
    
    ks1_first = 3
    ks1_second = 3
    
    ks2_first = 4
    ks2_second = 4
    
    model.add(Conv2D(filters=(3), 
                     kernel_size=(ks1_first, ks1_second),
                     input_shape=input_shape, 
                     padding='same',
                     kernel_initializer='TruncatedNormal'))
    model.add(BatchNormalization())
    model.add(LeakyReLU())
    model.add(Dropout(0.02))
    
    for _ in range(2):
        model.add(Conv2D(filters=(4), 
                     kernel_size= (ks2_first, ks2_second), 
                         padding='same',
                     kernel_initializer='TruncatedNormal'))
        model.add(BatchNormalization())
        model.add(LeakyReLU())
        model.add(Dropout(0.2))  
    
    model.add(Flatten())
    
    for _ in range(4):
        model.add(Dense(64 , kernel_initializer='TruncatedNormal'))
        model.add(BatchNormalization())
        model.add(LeakyReLU())
        model.add(Dropout(0.4))
    
    for _ in range(3):
        model.add(Dense(128 , kernel_initializer='TruncatedNormal'))
        model.add(BatchNormalization())
        model.add(LeakyReLU())
        model.add(Dropout(0.3))
  
    model.add(Dense(1024 , kernel_initializer='TruncatedNormal'))
    model.add(BatchNormalization())
    model.add(LeakyReLU())
    model.add(Dropout(0.7))
        
    model.add(Dense(n_outputs, activation='sigmoid'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [131]:
cnn2 = build_cnn2(n_timesteps, n_features, n_outputs)

In [None]:
cnn2.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 100, 6, 3)         30        
_________________________________________________________________
batch_normalization_31 (Batc (None, 100, 6, 3)         12        
_________________________________________________________________
leaky_re_lu_31 (LeakyReLU)   (None, 100, 6, 3)         0         
_________________________________________________________________
dropout_31 (Dropout)         (None, 100, 6, 3)         0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 100, 6, 4)         196       
_________________________________________________________________
batch_normalization_32 (Batc (None, 100, 6, 4)         16        
_________________________________________________________________
leaky_re_lu_32 (LeakyReLU)   (None, 100, 6, 4)        

In [132]:
rlr = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=50, min_lr=0.0001)
mc = ModelCheckpoint('best_model_cnn2.h5', monitor='val_loss', save_best_only=True)

callbacks = [rlr, mc]

batch_size = 16
mini_batch_size = int(min(X_train.shape[0]/10, batch_size))

In [134]:
y_pred = np.argmax(cnn2.predict(X_test_cnn2), axis=1)

print('Accuracy %s' % accuracy_score(y_test, y_pred))
print('F1-score %s' % f1_score(y_test, y_pred, average=None))
print(classification_report(y_test, y_pred))

Accuracy 0.168306752629793
F1-score [0.28812082 0.         0.         0.         0.         0.        ]
              precision    recall  f1-score   support

           0       0.17      1.00      0.29       496
           1       0.00      0.00      0.00       471
           2       0.00      0.00      0.00       420
           3       0.00      0.00      0.00       491
           4       0.00      0.00      0.00       532
           5       0.00      0.00      0.00       537

    accuracy                           0.17      2947
   macro avg       0.03      0.17      0.05      2947
weighted avg       0.03      0.17      0.05      2947



In [135]:
def build_cnn3(n_timesteps, n_outputs, n_features):
    model = Sequential()
    
    model.add(Conv1D(filters=16, kernel_size=8, activation='relu', input_shape=(n_timesteps, n_features)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Dropout(0.3))
    
    model.add(Conv1D(filters=32, kernel_size=5, activation='relu'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Dropout(0.3))
    
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Dropout(0.3))
    
    model.add(GlobalAveragePooling1D())
    
    model.add(Dense(n_outputs, activation='sigmoid'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [136]:
cnn3 = build_cnn3(n_timesteps, n_outputs, n_features)

In [None]:
cnn3.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_7 (Conv1D)            (None, 93, 16)            784       
_________________________________________________________________
batch_normalization_48 (Batc (None, 93, 16)            64        
_________________________________________________________________
activation_7 (Activation)    (None, 93, 16)            0         
_________________________________________________________________
dropout_48 (Dropout)         (None, 93, 16)            0         
_________________________________________________________________
conv1d_8 (Conv1D)            (None, 89, 32)            2592      
_________________________________________________________________
batch_normalization_49 (Batc (None, 89, 32)            128       
_________________________________________________________________
activation_8 (Activation)    (None, 89, 32)           

In [137]:
rlr = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=50, min_lr=0.0001)
mc = ModelCheckpoint('best_model_cnn2.h5', monitor='val_loss', save_best_only=True)

callbacks = [rlr, mc]

batch_size = 16
mini_batch_size = int(min(X_train.shape[0]/10, batch_size))

In [None]:
history_cnn3 = cnn3.fit(X_train_cnn, y_train_cnn, epochs=50, batch_size=mini_batch_size, callbacks=callbacks,
                      validation_data=(X_val_cnn, y_val_cnn)).history

In [138]:
y_pred = np.argmax(cnn3.predict(X_test), axis=1)

print('Accuracy %s' % accuracy_score(y_test, y_pred))
print('F1-score %s' % f1_score(y_test, y_pred, average=None))
print(classification_report(y_test, y_pred))

Accuracy 0.18221920597217509
F1-score [0.         0.         0.         0.         0.         0.30826636]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       496
           1       0.00      0.00      0.00       471
           2       0.00      0.00      0.00       420
           3       0.00      0.00      0.00       491
           4       0.00      0.00      0.00       532
           5       0.18      1.00      0.31       537

    accuracy                           0.18      2947
   macro avg       0.03      0.17      0.05      2947
weighted avg       0.03      0.18      0.06      2947



# TSC state of the art

### 1-NN with DTW (baseline)

In [25]:
from sktime.datatypes._panel._convert import from_3d_numpy_to_nested

In [26]:
body_gyro_x_train = body_gyro_x_train.to_numpy()

In [27]:
body_gyro_x_test = body_gyro_x_test.to_numpy()

In [28]:
X_train_uni = from_3d_numpy_to_nested(body_gyro_x_train.reshape(7352,1,128))
X_test_uni = from_3d_numpy_to_nested(body_gyro_x_test.reshape(2947,1,128))

In [29]:
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier

In [30]:
clf = KNeighborsTimeSeriesClassifier()
clf.fit(X_train_uni, y_train)

KNeighborsTimeSeriesClassifier()

In [32]:
%%time
y_pred = clf.predict(X_test_uni)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.81      0.83      0.82       496
           2       0.71      0.74      0.73       471
           3       0.74      0.65      0.69       420
           4       0.51      0.51      0.51       491
           5       0.55      0.58      0.56       532
           6       0.48      0.47      0.48       537

    accuracy                           0.63      2947
   macro avg       0.63      0.63      0.63      2947
weighted avg       0.63      0.63      0.63      2947

CPU times: user 47min 1s, sys: 20.6 s, total: 47min 22s
Wall time: 48min 13s


### ROCKET

In [30]:
from sklearn.linear_model import RidgeClassifierCV
from sktime.transformations.panel.rocket import Rocket

OMP: Info #271: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [98]:
%%time
rocket = Rocket()  # by default, MiniRocket uses ~10,000 kernels
rocket.fit(X_train_uni)
X_train_transform = rocket.transform(X_train_uni)
clf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
clf.fit(X_train_transform, y_train)

CPU times: user 7min 1s, sys: 14.8 s, total: 7min 16s
Wall time: 4min 27s


RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,
       4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,
       2.15443469e+02, 1.00000000e+03]),
                  normalize=True)

In [99]:
X_test_transform = rocket.transform(X_test_uni)
y_pred = clf.predict(X_test_transform)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.88      0.82      0.85       496
           2       0.72      0.74      0.73       471
           3       0.65      0.70      0.67       420
           4       0.50      0.51      0.50       491
           5       0.62      0.66      0.64       532
           6       0.55      0.50      0.52       537

    accuracy                           0.65      2947
   macro avg       0.65      0.65      0.65      2947
weighted avg       0.65      0.65      0.65      2947



### MINI-ROCKET

In [100]:
from sklearn.linear_model import RidgeClassifierCV
from sktime.transformations.panel.rocket import MiniRocket

In [101]:
%%time
minirocket = MiniRocket()  # by default, MiniRocket uses ~10,000 kernels
minirocket.fit(X_train_uni)
X_train_transform = minirocket.transform(X_train_uni)

CPU times: user 35 s, sys: 420 ms, total: 35.4 s
Wall time: 36 s


In [102]:
clf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
clf.fit(X_train_transform.to_numpy(), y_train.ravel())

RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,
       4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,
       2.15443469e+02, 1.00000000e+03]),
                  normalize=True)

In [103]:
X_test_transform = minirocket.transform(X_test_uni)

In [104]:
y_pred = clf.predict(X_test_transform.to_numpy())
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.91      0.81      0.86       496
           2       0.74      0.83      0.78       471
           3       0.77      0.77      0.77       420
           4       0.61      0.55      0.58       491
           5       0.66      0.80      0.72       532
           6       0.64      0.56      0.60       537

    accuracy                           0.72      2947
   macro avg       0.72      0.72      0.72      2947
weighted avg       0.72      0.72      0.71      2947



### Canonical Interval Forest (CIF)

In [25]:
from sktime.classification.interval_based import CanonicalIntervalForest

In [31]:
# define the parameter values that should be searched
k_range = list(range(5, 100, 5))
param_grid = dict(n_estimators=k_range)

grid = GridSearchCV(CanonicalIntervalForest(), param_grid, cv=10, scoring='accuracy', return_train_score=False, verbose=1, n_jobs=-1)
grid.fit(X_train_uni, y_train)

Fitting 10 folds for each of 19 candidates, totalling 190 fits


KeyboardInterrupt: 

In [None]:
grid.best_params_

## MULTIVARIATE

In [52]:
from sktime.utils.plotting import plot_series
from sktime.datatypes._panel._convert import from_3d_numpy_to_nested

In [26]:
body_gyro_x_train = body_gyro_x_train.to_numpy()

In [27]:
body_gyro_x_test = body_gyro_x_test.to_numpy()

In [28]:
X_train_uni = from_3d_numpy_to_nested(body_gyro_x_train.reshape(7352,1,128))
X_test_uni = from_3d_numpy_to_nested(body_gyro_x_test.reshape(2947,1,128))

In [29]:
body_gyro_y_train = body_gyro_y_train.to_numpy()

In [30]:
body_gyro_y_test = body_gyro_y_test.to_numpy()

In [31]:
Y_train_uni = from_3d_numpy_to_nested(body_gyro_y_train.reshape(7352,1,128))
Y_test_uni = from_3d_numpy_to_nested(body_gyro_y_test.reshape(2947,1,128))

In [90]:
XY_train = pd.concat([X_train_uni, Y_train_uni], axis=1)
XY_test = pd.concat([X_test_uni, Y_test_uni], axis=1)

### 1-NN with DTW (baseline)

In [None]:
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier

In [None]:
clf = KNeighborsTimeSeriesClassifier()
clf.fit(XY_train, y_train)

In [None]:
%%time
y_pred = clf.predict(XY_test)
print(classification_report(y_test, y_pred))

### ROCKET

In [91]:
from sklearn.linear_model import RidgeClassifierCV
from sktime.transformations.panel.rocket import Rocket

In [92]:
%%time
rocket = Rocket()
rocket.fit(XY_train)
X_train_transform = rocket.transform(XY_train)
clf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
clf.fit(X_train_transform, y_train)

CPU times: user 9min 21s, sys: 15.5 s, total: 9min 36s
Wall time: 6min 49s


RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,
       4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,
       2.15443469e+02, 1.00000000e+03]),
                  normalize=True)

In [93]:
X_test_transform = rocket.transform(XY_test)
y_pred = clf.predict(X_test_transform)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.98      0.75      0.85       496
           2       0.86      0.92      0.89       471
           3       0.77      0.85      0.81       420
           4       0.60      0.64      0.62       491
           5       0.67      0.75      0.71       532
           6       0.66      0.60      0.63       537

    accuracy                           0.74      2947
   macro avg       0.76      0.75      0.75      2947
weighted avg       0.75      0.74      0.75      2947



### MINI-ROCKET

In [59]:
from sklearn.linear_model import RidgeClassifierCV
from sktime.transformations.panel.rocket import MiniRocketMultivariate

OMP: Info #271: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [60]:
%%time
minirocket = MiniRocketMultivariate()  # by default, MiniRocket uses ~10,000 kernels
minirocket.fit(XY_train)
X_train_transform = minirocket.transform(XY_train)
clf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
clf.fit(X_train_transform, y_train)

CPU times: user 5min 5s, sys: 9.56 s, total: 5min 15s
Wall time: 2min 40s


RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,
       4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,
       2.15443469e+02, 1.00000000e+03]),
                  normalize=True)

In [61]:
X_test_transform = minirocket.transform(XY_test)
y_pred = clf.predict(X_test_transform)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.95      0.78      0.86       496
           2       0.83      0.99      0.90       471
           3       0.85      0.85      0.85       420
           4       0.69      0.71      0.70       491
           5       0.76      0.88      0.81       532
           6       0.76      0.62      0.68       537

    accuracy                           0.80      2947
   macro avg       0.81      0.81      0.80      2947
weighted avg       0.80      0.80      0.80      2947

