# Interval based time series classification in sktime

Interval based approaches look at phase dependent intervals of the full series, calculating summary statistics from selected subseries to be used in classification.

## 1. Imports

In [58]:
import numpy as np
import pandas as pd
from sklearn import metrics

from sktime.classification.interval_based import (
    CanonicalIntervalForest,
    DrCIF,
)
from sktime.datasets import load_basic_motions
from sktime.datatypes import mtype

## 2. Load data

In [59]:
X_train_mv, y_train_mv = load_basic_motions(split="train", return_X_y=True)
X_test_mv, y_test_mv = load_basic_motions(split="test", return_X_y=True)

print(X_train_mv.shape, y_train_mv.shape, X_test_mv.shape, y_test_mv.shape)

X_train_mv = X_train_mv[:40]
y_train_mv = y_train_mv[:40]
X_test_mv = X_test_mv[:40]
y_test_mv = y_test_mv[:40]

print(X_train_mv.shape, y_train_mv.shape, X_test_mv.shape, y_test_mv.shape)

(40, 6) (40,) (40, 6) (40,)
(40, 6) (40,) (40, 6) (40,)


In [60]:
for i in range(X_train_mv.shape[0]):
    for col in X_train_mv.columns:
        X_train_mv.loc[i][col] = X_train_mv.loc[i][col][:24]

for i in range(X_test_mv.shape[0]):
    for col in X_test_mv.columns:
        X_test_mv.loc[i][col] = X_test_mv.loc[i][col][:24]

In [61]:
stock_ds = np.load("./OUTPUT/samples/stock_norm_truth_24_nested_univ.npy", allow_pickle=True)
stock_df = pd.DataFrame(stock_ds)
mtype(stock_df, as_scitype="Panel")

'nested_univ'

In [62]:
stock_df.head()

Unnamed: 0,0,1,2,3,4,5
0,0 0.168671 1 0.169871 2 0.167439 3...,0 0.168727 1 0.169337 2 0.167932 3...,0 0.171846 1 0.170821 2 0.169671 3...,0 0.168885 1 0.167116 2 0.168803 3...,0 0.168885 1 0.167116 2 0.168803 3...,0 0.105840 1 0.105423 2 0.107163 3...
1,0 0.085718 1 0.080664 2 0.083693 3...,0 0.086112 1 0.081962 2 0.083858 3...,0 0.083442 1 0.078910 2 0.082594 3...,0 0.081919 1 0.081112 2 0.081389 3...,0 0.081919 1 0.081112 2 0.081389 3...,0 0.132901 1 0.144250 2 0.126979 3...
2,0 0.419600 1 0.393184 2 0.397824 3...,0 0.418008 1 0.397684 2 0.401017 3...,0 0.407273 1 0.396205 2 0.398713 3...,0 0.400698 1 0.392252 2 0.393818 3...,0 0.400698 1 0.392252 2 0.393818 3...,0 0.048816 1 0.047459 2 0.031104 3...
3,0 0.248200 1 0.247595 2 0.247460 3...,0 0.248216 1 0.248168 2 0.250515 3...,0 0.252675 1 0.251843 2 0.252580 3...,0 0.248526 1 0.248648 2 0.252031 3...,0 0.248526 1 0.248648 2 0.252031 3...,0 0.063593 1 0.036590 2 0.050175 3...
4,0 0.830821 1 0.850212 2 0.855123 3...,0 0.846650 1 0.852601 2 0.861331 3...,0 0.845332 1 0.862639 2 0.869297 3...,0 0.847495 1 0.850712 2 0.863701 3...,0 0.847495 1 0.850712 2 0.863701 3...,0 0.017186 1 0.012043 2 0.015360 3...


## 6. Canonical Interval Forest (CIF)

CIF extends from the TSF algorithm. In addition to the 3 summary statistics used by TSF, CIF makes use of the features from the `Catch22` \[5\] transform.
To increase the diversity of the ensemble, the number of TSF and catch22 attributes is randomly subsampled per tree.

### Multivariate

In [63]:
cif_m = CanonicalIntervalForest(n_estimators=50, att_subsample_size=8, random_state=47)
cif_m.fit(X_train_mv, y_train_mv)

cif_m_preds = cif_m.predict(X_test_mv)
print("CIF Accuracy: " + str(metrics.accuracy_score(y_test_mv, cif_m_preds)))

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


CIF Accuracy: 0.975


In [64]:
cif_stock_preds = cif_m.predict(stock_df)

  warn(


In [68]:
np.unique(cif_stock_preds)

array(['standing'], dtype='<U8')

## 6. Diverse Representation Canonical Interval Forest (DrCIF)

DrCIF makes use of the periodogram and differences representations used by STSF as well as the addition summary statistics in CIF.

### Multivariate

In [69]:
drcif_m = DrCIF(n_estimators=5, att_subsample_size=10, random_state=47)
drcif_m.fit(X_train_mv, y_train_mv)

drcif_m_preds = drcif_m.predict(X_test_mv)
print("DrCIF Accuracy: " + str(metrics.accuracy_score(y_test_mv, drcif_m_preds)))

  warn(
  warn(
  warn(
  warn(
  warn(


DrCIF Accuracy: 0.95


  warn(


In [70]:
drcif_stock_preds = drcif_m.predict(stock_df)

  warn(


In [71]:
np.unique(drcif_stock_preds)

array(['standing'], dtype='<U8')