# Data classification with interpreTS

In this tutorial, we show how you can use interpreTS for data classification.

In [2]:
import pandas as pd
import numpy as np
import interpreTS as it
from sktime.datasets import load_arrow_head, load_basic_motions
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

In [9]:
#prepare data
X, y = load_arrow_head(return_type="pd-multiindex")
instance_ids = np.unique(X.index.get_level_values(0))
train_ids, test_ids = train_test_split(instance_ids, test_size=0.2, random_state=42)

X_train = X.loc[train_ids]
X_test = X.loc[test_ids]
train_indices = [np.where(instance_ids == id_)[0][0] for id_ in train_ids]
test_indices = [np.where(instance_ids == id_)[0][0] for id_ in test_ids]

y_train = y[train_indices]
y_test = y[test_indices]

print("Train set size:", X_train.shape, y_train.shape)
print("Test set size:", X_test.shape, y_test.shape)
X.head()

Train set size: (42168, 1) (168,)
Test set size: (10793, 1) (43,)


Unnamed: 0,Unnamed: 1,dim_0
0,0,-1.963009
0,1,-1.957825
0,2,-1.956145
0,3,-1.938289
0,4,-1.896657


In [5]:
# create a feature extractor
t = it.FeatureExtractor(window_size=251, stride=251)
X_train_ts = t.extract_features(X_train)
X_test_ts = t.extract_features(X_test)
X_test_ts.head()

Unnamed: 0,length_dim_0,mean_dim_0,variance_dim_0,stability_dim_0,entropy_dim_0,spikeness_dim_0,seasonality_strength_dim_0
0,251,1.071713e-09,1.0,0.59695,0.998374,-0.298874,0.952867
1,251,8.505976e-10,1.0,0.56358,0.997343,-0.368615,0.973132
2,251,1.248207e-09,1.0,0.579561,0.996837,-0.25872,0.962001
3,251,2.191235e-11,1.0,0.597823,0.999448,0.213841,0.95527
4,251,-2.191236e-11,1.0,0.579527,0.999723,-0.180719,0.964042


In [6]:
# Initialize the classifier
clf = RandomForestClassifier(random_state=42)

# Train the classifier
clf.fit(X_train_ts, y_train)
y_pred = clf.predict(X_test_ts)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.7674418604651163
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.71      0.80        17
           1       0.67      0.77      0.71        13
           2       0.73      0.85      0.79        13

    accuracy                           0.77        43
   macro avg       0.77      0.77      0.77        43
weighted avg       0.79      0.77      0.77        43

