# 使用sktime进行多元时间序列分类 

## 准备工作

In [1]:
from sktime.transformers.compose import ColumnConcatenator
from sktime.classifiers.compose import TimeSeriesForestClassifier
from sktime.classifiers.dictionary_based.boss import BOSSEnsemble
from sktime.classifiers.compose import ColumnEnsembleClassifier
from sktime.classifiers.shapelet_based import ShapeletTransformClassifier
from sktime.datasets import load_basic_motions
from sktime.pipeline import Pipeline

import numpy as np
import pandas as pd

## 导入数据

In [2]:
X_train, y_train = load_basic_motions(split='TRAIN', return_X_y=True)
X_test, y_test = load_basic_motions(split='TEST', return_X_y=True)
# multivariate input data
X_train.shape

(40, 6)

In [3]:
X_train.head()

Unnamed: 0,dim_0,dim_1,dim_2,dim_3,dim_4,dim_5
0,0 0.079106 1 0.079106 2 -0.903497 3...,0 0.394032 1 0.394032 2 -3.666397 3...,0 0.551444 1 0.551444 2 -0.282844 3...,0 0.351565 1 0.351565 2 -0.095881 3...,0 0.023970 1 0.023970 2 -0.319605 3...,0 0.633883 1 0.633883 2 0.972131 3...
1,0 0.377751 1 0.377751 2 2.952965 3...,0 -0.610850 1 -0.610850 2 0.970717 3...,0 -0.147376 1 -0.147376 2 -5.962515 3...,0 -0.103872 1 -0.103872 2 -7.593275 3...,0 -0.109198 1 -0.109198 2 -0.697804 3...,0 -0.037287 1 -0.037287 2 -2.865789 3...
2,0 -0.813905 1 -0.813905 2 -0.424628 3...,0 0.825666 1 0.825666 2 -1.305033 3...,0 0.032712 1 0.032712 2 0.826170 3...,0 0.021307 1 0.021307 2 -0.372872 3...,0 0.122515 1 0.122515 2 -0.045277 3...,0 0.775041 1 0.775041 2 0.383526 3...
3,0 0.289855 1 0.289855 2 -0.669185 3...,0 0.284130 1 0.284130 2 -0.210466 3...,0 0.213680 1 0.213680 2 0.252267 3...,0 -0.314278 1 -0.314278 2 0.018644 3...,0 0.074574 1 0.074574 2 0.007990 3...,0 -0.079901 1 -0.079901 2 0.237040 3...
4,0 -0.123238 1 -0.123238 2 -0.249547 3...,0 0.379341 1 0.379341 2 0.541501 3...,0 -0.286006 1 -0.286006 2 0.208420 3...,0 -0.098545 1 -0.098545 2 -0.023970 3...,0 0.058594 1 0.058594 2 0.175783 3...,0 -0.074574 1 -0.074574 2 0.114525 3...


In [4]:
np.unique(y_train)

array(['badminton', 'running', 'standing', 'walking'], dtype='<U9')

## 多元时间序列分类

sktime提供3种解决多元时间序列分类问题的方法：
- 通过列连接器（ColumnConcatenator）将时间序列的每列连接成单个长时间序列，并对连接的数据应用分类器，
- 通过ColumnEnsembleClassifier进行的列级集成，对时间序列的每列训练一个分类器，并对每个分类器的预测结果进行聚合，
- 通过处理多元时间序列数据的特定的估计方法，例如在多维空间中查找形状（仍在进行中）。

### 时间序列串联

我们可以通过将多元时间序列数据合并成长的一元时间序列，然后对一元时间序列训练分类器。

In [6]:
steps = [
    ('concatenate', ColumnConcatenator()),
    ('classify', TimeSeriesForestClassifier(n_estimators=100))
]
clf = Pipeline(steps)
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

1.0

In [20]:
cc = ColumnConcatenator()
# cc.fit(X = X_train)
X_train_cc = cc.fit_transform(X_train)

In [21]:
X_train_cc.head()

Unnamed: 0,0
0,0 0.079106 1 0.079106 2 -0.90349...
1,0 0.377751 1 0.377751 2 2.95296...
2,0 -0.813905 1 -0.813905 2 -0.42462...
3,0 0.289855 1 0.289855 2 -0.66918...
4,0 -0.123238 1 -0.123238 2 -0.24954...


In [18]:
X_train_cc.iloc[0][0].shape

(600,)

In [22]:
X_train.iloc[0][0].shape

(100,)

In [23]:
X_train_cc.shape

(40, 1)

In [24]:
X_train.shape

(40, 6)

### 列集成
对时间序列的每列训练分类器，然后聚合分类器的预测结果。

In [25]:
clf = ColumnEnsembleClassifier(estimators=[
    ('TSF0', TimeSeriesForestClassifier(n_estimators=100), [0]),
    ('BOSSEnsemble3', BOSSEnsemble(ensemble_size=3), [3]),
])
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

1.0

### 定制方法：多维空间中的形状


In [27]:
clf = ShapeletTransformClassifier(time_contract_in_mins=0.5)
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.8