In [3]:
from sktime.transformers.compose import ColumnConcatenator
from sktime.classifiers.compose import TimeSeriesForestClassifier
from sktime.classifiers.dictionary_based.boss import BOSSEnsemble
from sktime.classifiers.compose import ColumnEnsembleClassifier
from sktime.classifiers.shapelet_based import ShapeletTransformClassifier
from sktime.datasets import load_basic_motions
from sktime.pipeline import Pipeline

import numpy as np
import pandas as pd

In [4]:
X_train, y_train = load_basic_motions(split='TRAIN', return_X_y=True)
X_test, y_test = load_basic_motions(split='TEST', return_X_y=True)
# multivariate input data
# X_train是有40个训练样本，每个样本有6个变量，每个样本的每个变量是一个长为100的时间序列
X_train.shape

(40, 6)

In [5]:
y_train.shape

(40,)

In [6]:
X_test.shape

(40, 6)

In [7]:
X_train.loc[39].shape

(6,)

In [8]:
X_train.loc[[0]].shape

(1, 6)

In [9]:
X_train.loc[0][0].tail()

95   -0.167918
96   -0.227670
97   -0.193271
98   -0.193271
99   -0.205150
dtype: float64

In [10]:
# multi-class target variable
np.unique(y_train)

array(['badminton', 'running', 'standing', 'walking'], dtype='<U9')

将多变量时间序列/面板数据连接成长的单变量时间序列/面板，然后对单变量数据应用分类器。

In [11]:
steps = [
    ('concatenate', ColumnConcatenator()),
    ('classify', TimeSeriesForestClassifier(n_estimators=100))]
clf = Pipeline(steps)
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

1.0

为每个时间序列的每列拟合一个分类器，然后聚合它们的预测。

In [12]:
clf = ColumnEnsembleClassifier(estimators=[
    ("TSF0", TimeSeriesForestClassifier(n_estimators=100), [0]),
    ("BOSSEnsemble3", BOSSEnsemble(ensemble_size=3), [3]),
])
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

1.0

In [15]:
clf = ShapeletTransformClassifier(time_contract_in_mins=0.5)
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.65