# K-nearest neighbors with dynamic time warping.

In [4]:
from tsfresh.examples import load_robot_execution_failures
from tsfresh.examples.robot_execution_failures import download_robot_execution_failures

download_robot_execution_failures()
df_ts, y = load_robot_execution_failures()

In [5]:
print(f"{y.mean():.2f}")

0.24


In [7]:
from tsfresh import extract_features
from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute

# Using TSFresh extracts time-series.
extracted_features = impute(extract_features(df_ts, column_id="id", column_sort="time"))

# In TSFresh, the p-value from a statistical test is used to calculate the feature
# significance.
features_filtered = select_features(extracted_features, y)

Feature Extraction: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:04<00:00,  4.46it/s]
 'T_x__partial_autocorrelation__lag_8'
 'T_x__partial_autocorrelation__lag_9' ...
 'F_z__agg_linear_trend__attr_"stderr"__chunk_len_50__f_agg_"mean"'
 'F_z__agg_linear_trend__attr_"stderr"__chunk_len_50__f_agg_"var"'
 'F_z__query_similarity_count__query_None__threshold_0.0'] did not have any finite values. Filling with zeros.


In [9]:
features_filtered.head()

Unnamed: 0,F_x__value_count__value_-1,F_x__root_mean_square,F_x__abs_energy,T_y__absolute_maximum,F_x__mean_n_absolute_max__number_of_maxima_7,F_x__range_count__max_1__min_-1,F_y__abs_energy,F_y__root_mean_square,F_y__mean_n_absolute_max__number_of_maxima_7,T_y__variance,...,"F_y__cwt_coefficients__coeff_14__w_5__widths_(2, 5, 10, 20)","F_y__cwt_coefficients__coeff_13__w_2__widths_(2, 5, 10, 20)",T_y__lempel_ziv_complexity__bins_3,T_y__quantile__q_0.1,F_z__time_reversal_asymmetry_statistic__lag_1,F_x__quantile__q_0.2,F_y__quantile__q_0.7,"T_x__change_quantiles__f_agg_""var""__isabs_False__qh_0.2__ql_0.0",T_z__large_standard_deviation__r_0.35000000000000003,T_z__quantile__q_0.9
1,14.0,0.966092,14.0,1.0,1.0,15.0,13.0,0.930949,1.0,0.222222,...,-0.751682,-0.310265,0.4,-1.0,-596.0,-1.0,-1.0,0.0,0.0,0.0
2,7.0,1.290994,25.0,5.0,1.571429,13.0,76.0,2.250926,3.0,4.222222,...,0.057818,-0.202951,0.533333,-3.6,-680.384615,-1.0,-1.0,0.0,1.0,0.0
3,11.0,0.894427,12.0,5.0,1.0,14.0,40.0,1.632993,2.142857,3.128889,...,0.912474,0.539121,0.533333,-4.0,-617.0,-1.0,0.0,0.0,1.0,0.0
4,5.0,1.032796,16.0,6.0,1.285714,10.0,60.0,2.0,2.428571,7.128889,...,-0.609735,-2.64139,0.533333,-4.6,3426.307692,-1.0,1.0,0.0,0.0,0.0
5,9.0,1.064581,17.0,5.0,1.285714,13.0,46.0,1.75119,2.285714,4.16,...,0.072771,0.591927,0.466667,-5.0,-2609.0,-1.0,0.8,0.0,0.0,0.6


In [10]:
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from tslearn.neighbors import KNeighborsTimeSeriesClassifier

knn = KNeighborsTimeSeriesClassifier()
param_search = {
    'metric': ['dtw'],
    'n_neighbors': [1, 2, 3]
}
tscv = TimeSeriesSplit(n_splits=2)

gsearch = GridSearchCV(
    estimator=knn,
    cv=tscv,
    param_grid=param_search
)
gsearch.fit(
    features_filtered,
    y
)