This notebook extracts all tsfresh features for the TPS competion of april 2022.

To do so its `tsflex` efficient feature extraction capabilities are utilized; https://github.com/predict-idlab/tsflex

In [None]:
!pip install tsflex tsfresh

In [None]:
import pandas as pd

from tsflex.features import FeatureCollection, MultipleFeatureDescriptors
from tsflex.features.integrations import tsfresh_settings_wrapper

from tsfresh.feature_extraction import ComprehensiveFCParameters

In [None]:
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)

## Load the data

In [None]:
# Reading the data
train = pd.read_csv('../input/tabular-playground-series-apr-2022/train.csv')
train_labels = pd.read_csv('../input/tabular-playground-series-apr-2022/train_labels.csv')
test = pd.read_csv('../input/tabular-playground-series-apr-2022/test.csv')

# Merge the labels into the train data
train = train.merge(train_labels, how='left', on="sequence")

signals = [col for col in train.columns if 'sensor_' in col]

In [None]:
train.shape, test.shape

# Extract all tsfresh features

In [None]:
settings = ComprehensiveFCParameters()  # all the tsfresh features
del settings["linear_trend_timewise"]  # requires a time-index

fc = FeatureCollection(
    MultipleFeatureDescriptors(
        functions=tsfresh_settings_wrapper(settings),
        series_names=signals,
        windows=60,
        strides=60
    )
)

print("Extracting features on the training data")
df_feats_train = fc.calculate(train, show_progress=True, return_df=True, window_idx="begin")
df_feats_train = df_feats_train.merge(train[["sequence", "subject", "state"]], left_index=True, right_index=True)

print("Extracting features on the testing data")
df_feats_test = fc.calculate(test, show_progress=True, return_df=True, window_idx="begin")
df_feats_test = df_feats_test.merge(test[["sequence", "subject",]], left_index=True, right_index=True)

In [None]:
df_feats_train.shape, df_feats_test.shape

In [None]:
df_feats_train.to_parquet("tsfresh_feats_train.parquet")
df_feats_test.to_parquet("tsfresh_feats_test.parquet")