# Data classification with interpreTS

In this tutorial, we show how you can use interpreTS for data classification.

In [None]:
import pandas as pd
import numpy as np
import interpreTS as it
from sktime.datasets import load_arrow_head, load_basic_motions
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

In [2]:
#prepare data
X, y = load_arrow_head(return_type="pd-multiindex")
instance_ids = np.unique(X.index.get_level_values(0))
train_ids, test_ids = train_test_split(instance_ids, test_size=0.2, random_state=42)

X_train = X.loc[train_ids]
X_test = X.loc[test_ids]
train_indices = [np.where(instance_ids == id_)[0][0] for id_ in train_ids]
test_indices = [np.where(instance_ids == id_)[0][0] for id_ in test_ids]

y_train = y[train_indices]
y_test = y[test_indices]

print("Train set size:", X_train.shape, y_train.shape)
print("Test set size:", X_test.shape, y_test.shape)
X.head()

Train set size: (42168, 1) (168,)
Test set size: (10793, 1) (43,)


Unnamed: 0,Unnamed: 1,dim_0
0,0,-1.963009
0,1,-1.957825
0,2,-1.956145
0,3,-1.938289
0,4,-1.896657


In [3]:
# create a feature extractor
t = it.FeatureExtractor(window_size=251, stride=251)
X_train_ts = t.extract_features(X_train)
X_test_ts = t.extract_features(X_test)
X_test_ts.head()

Unnamed: 0,length_level_0,length_level_1,length_dim_0,mean_level_0,mean_level_1,mean_dim_0,variance_level_0,variance_level_1,variance_dim_0,stability_level_0,...,stability_dim_0,entropy_level_0,entropy_level_1,entropy_dim_0,spikeness_level_0,spikeness_level_1,spikeness_dim_0,seasonality_strength_level_0,seasonality_strength_level_1,seasonality_strength_dim_0
0,251,251,251,30.0,125.0,1.071713e-09,0.0,5250.0,0.996016,1.0,...,0.59695,0.0,0.997518,0.982903,0.0,0.0,-0.298874,0.0,0.976096,0.952867
1,251,251,251,173.0,125.0,8.505976e-10,0.0,5250.0,0.996016,1.0,...,0.56358,0.0,0.997518,0.988598,0.0,0.0,-0.368615,0.0,0.976096,0.973132
2,251,251,251,140.0,125.0,1.248207e-09,0.0,5250.0,0.996016,1.0,...,0.579561,0.0,0.997518,0.991015,0.0,0.0,-0.25872,0.0,0.976096,0.962001
3,251,251,251,75.0,125.0,2.191235e-11,0.0,5250.0,0.996016,1.0,...,0.597823,0.0,0.997518,0.96911,0.0,0.0,0.213841,0.0,0.976096,0.95527
4,251,251,251,60.0,125.0,-2.191236e-11,0.0,5250.0,0.996016,1.0,...,0.579527,0.0,0.997518,0.993019,0.0,0.0,-0.180719,0.0,0.976096,0.964042


In [4]:
# Initialize the classifier
clf = RandomForestClassifier(random_state=42)

# Train the classifier
clf.fit(X_train_ts, y_train)
y_pred = clf.predict(X_test_ts)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.8837209302325582
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.71      0.83        17
           1       0.81      1.00      0.90        13
           2       0.87      1.00      0.93        13

    accuracy                           0.88        43
   macro avg       0.89      0.90      0.88        43
weighted avg       0.90      0.88      0.88        43

