# 使用sktime进行单变量时间序列分类

## 准备工作

In [4]:
import sktime

In [7]:
from sktime.highlevel.tasks import TSCTask
from sktime.highlevel.strategies import TSCStrategy

from statsmodels.tsa.stattools import acf
from statsmodels.tsa.ar_model import AR

from sklearn.preprocessing import FunctionTransformer

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

import numpy as np
import pandas as pd

In [8]:
import numpy as np
import pandas as pd

## 导入数据

In [15]:
from sktime.datasets import load_gunpoint
X_train, y_train = load_gunpoint(split='TRAIN', return_X_y=True)
X_test, y_test = load_gunpoint(split='TEST', return_X_y=True)
X_train.head()

Unnamed: 0,dim_0
0,0 -0.64789 1 -0.64199 2 -0.63819 3...
1,0 -0.64443 1 -0.64540 2 -0.64706 3...
2,0 -0.77835 1 -0.77828 2 -0.77715 3...
3,0 -0.75006 1 -0.74810 2 -0.74616 3...
4,0 -0.59954 1 -0.59742 2 -0.59927 3...


In [11]:
X_train.shape

(50, 1)

In [12]:
X_test.shape

(150, 1)

In [13]:
np.unique(y_train)

array(['1', '2'], dtype='<U1')

## 低水平接口

### 时间序列的K-最近邻分类器

In [16]:
from sktime.classifiers.distance_based import KNeighborsTimeSeriesClassifier
knn = KNeighborsTimeSeriesClassifier(metric='dtw')
knn.fit(X_train, y_train)
knn.score(X_test, y_test)

0.9066666666666666

### 全模块化时间序列森林分类器

In [None]:
from sktime.transformers.compose import RowwiseTransformer
from sktime.transformers.segment import RandomIntervalSegmenter
from sktime.pipeline import Pipeline
from sktime.pipeline import FeatureUnion
from sktime.utils.time_series import time_series_slope
from sklearn.tree import DecisionTreeClassifier

from sktime.transformers.compose import ColumnTransformer
from sktime.transformers.compose import Tabulariser
from sktime.classifiers.distance_based import ProximityForest

In [17]:
steps = [
    ('segment', RandomIntervalSegmenter(n_intervals='sqrt')),
    ('transform', FeatureUnion([
        ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))),
        ('std', RowwiseTransformer(FunctionTransformer(func=np.std, validate=False))),
        ('slope', RowwiseTransformer(FunctionTransformer(func=time_series_slope, validate=False)))
    ])),
    ('clf', DecisionTreeClassifier())
]
base_estimator = Pipeline(steps, random_state=1)

In [18]:
base_estimator.fit(X_train, y_train)
base_estimator.score(X_test, y_test)

0.8266666666666667

对于时间序列森林，我们可以简单地使用单树作为森林集合中的基估计量。

In [19]:
from sktime.classifiers.compose import TimeSeriesForestClassifier
tsf = TimeSeriesForestClassifier(base_estimator=base_estimator, n_estimators=100, criterion='entropy',
                                bootstrap=True, oob_score=True, random_state=1)
tsf.fit(X_train, y_train)

TimeSeriesForestClassifier(base_estimator=Pipeline(check_input=False,
                                                   memory=None, random_state=1,
                                                   steps=[('segment',
                                                           RandomIntervalSegmenter(min_length=2,
                                                                                   n_intervals='sqrt',
                                                                                   random_state=1)),
                                                          ('transform',
                                                           FeatureUnion(n_jobs=None,
                                                                        preserve_dataframe=True,
                                                                        transformer_list=[('mean',
                                                                                           RowwiseTransformer(transformer=Func

In [21]:
tsf.oob_score_

1.0

In [22]:
tsf.score(X_test, y_test)

0.9533333333333334