Feature extraction with interpreTS

In this tutorial, we show how you can use interpreTS instead of sktime with tsfresh to extract features from time series and use them for classification.

In [10]:
!pip install --upgrade interpreTS -q



In [14]:
import pandas as pd
import numpy as np
import interpreTS as it
from sktime.datasets import load_arrow_head, load_basic_motions
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

In [15]:
X, y = load_arrow_head(return_type="pd-multiindex")
instance_ids = np.unique(X.index.get_level_values(0))
#train_test_split from sktime does not work with multiindex series
train_ids, test_ids = train_test_split(instance_ids, test_size=0.2, random_state=42)

X_train = X.loc[train_ids]
X_test = X.loc[test_ids]
train_indices = [np.where(instance_ids == id_)[0][0] for id_ in train_ids]
test_indices = [np.where(instance_ids == id_)[0][0] for id_ in test_ids]

y_train = y[train_indices]
y_test = y[test_indices]

print("Train set size:", X_train.shape, y_train.shape)
print("Test set size:", X_test.shape, y_test.shape)
X

Train set size: (42168, 1) (168,)
Test set size: (10793, 1) (43,)


Unnamed: 0,Unnamed: 1,dim_0
0,0,-1.963009
0,1,-1.957825
0,2,-1.956145
0,3,-1.938289
0,4,-1.896657
...,...,...
210,246,-1.513637
210,247,-1.550431
210,248,-1.581576
210,249,-1.595273


Using interpreTS to extract features

In [21]:
t = it.FeatureExtractor(window_size=251, stride=251)
X_train_ts = t.extract_features(X_train)
X_test_ts = t.extract_features(X_test)
X_test_ts.head()

Unnamed: 0,length_dim_0,mean_dim_0,variance_dim_0,stability_dim_0,entropy_dim_0,spikeness_dim_0,seasonality_strength_dim_0
0,251,1.071713e-09,1.0,0.59695,0.998374,-0.298874,0.952867
1,251,8.505976e-10,1.0,0.56358,0.997343,-0.368615,0.973132
2,251,1.248207e-09,1.0,0.579561,0.996837,-0.25872,0.962001
3,251,2.191235e-11,1.0,0.597823,0.999448,0.213841,0.95527
4,251,-2.191236e-11,1.0,0.579527,0.999723,-0.180719,0.964042


In [22]:
# Initialize the classifier
clf = RandomForestClassifier(random_state=42)

# Train the classifier
clf.fit(X_train_ts, y_train)

In [23]:
y_pred = clf.predict(X_test_ts)
# Evaluate the classifier
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.7674418604651163
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.71      0.80        17
           1       0.67      0.77      0.71        13
           2       0.73      0.85      0.79        13

    accuracy                           0.77        43
   macro avg       0.77      0.77      0.77        43
weighted avg       0.79      0.77      0.77        43



Multivariate time series classification data

In [6]:
X2, y = load_basic_motions(return_type="pd-multiindex")
#X_train, X_test, y_train, y_test = train_test_split(X2, y)
#traint_test_split z sk nie działa dla przykładowych danych z sk
instance_ids = np.unique(X2.index.get_level_values(0))

train_ids, test_ids = train_test_split(instance_ids, test_size=0.2, random_state=42,)

X_train = X2.loc[train_ids]
X_test = X2.loc[test_ids]
train_indices = [np.where(instance_ids == id_)[0][0] for id_ in train_ids]
test_indices = [np.where(instance_ids == id_)[0][0] for id_ in test_ids]

# Use indices to split y
y_train = y[train_indices]
y_test = y[test_indices]

# Verify the splits
print("Train set size:", X_train.shape, y_train.shape)
print("Test set size:", X_test.shape, y_test.shape)
X2

Train set size: (6400, 6) (64,)
Test set size: (1600, 6) (16,)


Unnamed: 0,Unnamed: 1,dim_0,dim_1,dim_2,dim_3,dim_4,dim_5
0,0,0.079106,0.394032,0.551444,0.351565,0.023970,0.633883
0,1,0.079106,0.394032,0.551444,0.351565,0.023970,0.633883
0,2,-0.903497,-3.666397,-0.282844,-0.095881,-0.319605,0.972131
0,3,1.116125,-0.656101,0.333118,1.624657,-0.569962,1.209171
0,4,1.638200,1.405135,0.393875,1.187864,-0.271664,1.739182
...,...,...,...,...,...,...,...
79,95,28.459024,-16.633770,3.631869,8.978229,-3.611533,-1.491489
79,96,10.260094,0.102775,1.269261,-1.645964,-3.377157,1.283746
79,97,4.316471,-3.574319,2.063831,-1.717875,-1.843054,0.484734
79,98,0.704446,-4.920444,2.851857,-2.982977,-0.809665,-0.721774


In [7]:
extractor = it.FeatureExtractor(window_size=100, stride=100)
X_train_ts = extractor.extract_features(X_train)
X_test_ts = extractor.extract_features(X_test)
X_train_ts

Unnamed: 0,length_dim_0,length_dim_1,length_dim_2,length_dim_3,length_dim_4,length_dim_5,mean_dim_0,mean_dim_1,mean_dim_2,mean_dim_3,...,spikeness_dim_2,spikeness_dim_3,spikeness_dim_4,spikeness_dim_5,seasonality_strength_dim_0,seasonality_strength_dim_1,seasonality_strength_dim_2,seasonality_strength_dim_3,seasonality_strength_dim_4,seasonality_strength_dim_5
0,100,100,100,100,100,100,3.154845,-2.092907,-1.363469,-0.191603,...,-0.773377,-0.531370,2.683226,-2.063437,0.283463,0.000000,0.000000,0.000000,0.059406,0.000000
1,100,100,100,100,100,100,1.587498,0.222384,-0.749236,0.191789,...,-0.606410,0.901441,-0.387104,-0.099259,0.000000,0.397999,0.223769,0.415718,0.236831,0.409275
2,100,100,100,100,100,100,5.807326,-6.281034,-2.437611,-0.143369,...,-0.331167,0.438019,-0.247202,0.280322,0.101126,0.093010,0.000000,0.063491,0.184433,0.299320
3,100,100,100,100,100,100,-0.022142,-0.029539,-0.148466,-0.139321,...,-7.238673,-7.383345,6.811770,-8.528333,0.000000,0.277989,0.112622,0.072020,0.000000,0.051819
4,100,100,100,100,100,100,-0.099601,0.054716,-0.022336,-0.018111,...,0.367160,-3.351303,-0.501211,-1.105247,0.000000,0.364933,0.175700,0.144759,0.186430,0.326102
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,100,100,100,100,100,100,1.137455,0.307648,-0.193283,0.068635,...,-0.194638,0.194516,-0.704307,0.016527,0.114462,0.501477,0.247811,0.172043,0.430744,0.496826
60,100,100,100,100,100,100,0.333342,-0.016185,-0.193504,0.031614,...,0.035636,0.318068,0.070821,0.058377,0.000000,0.529218,0.400919,0.400937,0.432037,0.521325
61,100,100,100,100,100,100,4.092811,0.056040,-2.301954,-0.312414,...,-0.359901,-0.082943,-0.061941,-1.733189,0.443878,0.012607,0.195395,0.330956,0.388812,0.210893
62,100,100,100,100,100,100,5.726527,-5.287751,-2.073341,0.173838,...,0.125040,0.250203,0.909346,-0.036011,0.007575,0.000000,0.052680,0.000000,0.000000,0.176984


In [8]:
# Initialize the classifier
clf = RandomForestClassifier(random_state=42)

# Train the classifier
clf.fit(X_train_ts, y_train)

In [9]:
y_pred = clf.predict(X_test_ts)

# Evaluate the classifier
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

   badminton       1.00      1.00      1.00         5
     running       1.00      1.00      1.00         3
    standing       1.00      1.00      1.00         4
     walking       1.00      1.00      1.00         4

    accuracy                           1.00        16
   macro avg       1.00      1.00      1.00        16
weighted avg       1.00      1.00      1.00        16

