# TSFresh exploration

In [1]:
from tsfresh import select_features, extract_relevant_features, extract_features
from tsfresh.utilities.dataframe_functions import impute
from get_processed_data import get_processed_data, get_train_test_split
import pandas as pd

In [9]:
train, test  = get_train_test_split(
  'malte', 60 * 15, 60 * 3
)

print("train hard", len(train["hard"]))
print("train easy", len(train["easy"]))

print("test hard", len(test["hard"]))
print("test easy", len(test["easy"]))


train hard 27
train easy 18
test hard 6
test easy 1


## Prepare data for TSFresh
Convert data to a single pandas dataframe and generate a unique id for each climb.

Also, generate a labels array, that indicates 0 for an easy route and 1 for a hard route for each climb.

In [18]:
easy_routes = get_processed_data("malte", ["3", "4"], 60 * 15, 60 * 3)
hard_routes = get_processed_data("malte", ["6", "6+"], 60 * 15, 60 * 3)

all_climbs = []
labels = pd.Series() # 0 = not hard, 1 = hard
i = 0


for route in easy_routes:
    data  = pd.DataFrame(route)
    data = data.drop("PacketCounter", axis=1)
    data["id"] = i
    all_climbs.append(data)
    labels[i] = False
    i += 1

for route in hard_routes:
    data  = pd.DataFrame(route)
    data = data.drop("PacketCounter", axis=1)
    data["id"] = i
    all_climbs.append(data)
    labels[i] = True
    i += 1

all_climbs_df = pd.concat(all_climbs)

In [19]:
# now we can extract features from all_climbs
extracted_features = extract_features(all_climbs_df, column_id="id", column_sort="SampleTimeFine", column_kind=None, column_value=None)
extracted_features.to_csv("extracted_features.csv")

Feature Extraction: 100%|██████████| 20/20 [08:29<00:00, 25.47s/it]


In [22]:
extracted_features

Unnamed: 0,Quat_W__variance_larger_than_standard_deviation,Quat_W__has_duplicate_max,Quat_W__has_duplicate_min,Quat_W__has_duplicate,Quat_W__sum_values,Quat_W__abs_energy,Quat_W__mean_abs_change,Quat_W__mean_change,Quat_W__mean_second_derivative_central,Quat_W__median,...,FreeAcc_Z__fourier_entropy__bins_5,FreeAcc_Z__fourier_entropy__bins_10,FreeAcc_Z__fourier_entropy__bins_100,FreeAcc_Z__permutation_entropy__dimension_3__tau_1,FreeAcc_Z__permutation_entropy__dimension_4__tau_1,FreeAcc_Z__permutation_entropy__dimension_5__tau_1,FreeAcc_Z__permutation_entropy__dimension_6__tau_1,FreeAcc_Z__permutation_entropy__dimension_7__tau_1,FreeAcc_Z__query_similarity_count__query_None__threshold_0.0,FreeAcc_Z__mean_n_absolute_max__number_of_maxima_7
0,0.0,0.0,0.0,1.0,-512.772215,296.857658,0.002989,6.2e-05,2.132517e-07,-0.570555,...,0.491645,0.73834,1.892759,1.606224,2.691413,3.888623,4.987931,5.804821,0.0,9.57165
1,0.0,0.0,0.0,1.0,-467.060644,247.320558,0.003087,0.000212,-2.282294e-06,-0.520851,...,0.3911,0.615315,1.772768,1.626988,2.743884,3.953241,5.082721,5.93195,0.0,10.479121
2,0.0,0.0,0.0,1.0,-678.468407,512.874312,0.001515,-1.7e-05,2.027283e-06,-0.766035,...,0.551856,0.934959,2.231074,1.607703,2.693844,3.871073,4.988448,5.838081,0.0,7.509788
3,0.0,0.0,0.0,1.0,-674.325113,506.752846,0.001877,-7.2e-05,-6.915367e-07,-0.754421,...,0.620592,0.985619,2.203251,1.563292,2.576192,3.683353,4.76293,5.688872,0.0,8.61565
4,0.0,0.0,0.0,1.0,-673.235081,506.2432,0.00228,-4.3e-05,1.23608e-07,-0.758276,...,0.539976,0.810946,1.900196,1.524369,2.477237,3.544598,4.588249,5.514912,0.0,12.337637
5,0.0,0.0,0.0,1.0,-604.509487,408.713777,0.002138,-4.3e-05,-2.277283e-07,-0.664526,...,0.470291,0.719481,2.200979,1.644385,2.779493,4.034797,5.212219,6.051388,0.0,13.387012
6,0.0,0.0,0.0,1.0,-578.271598,375.852399,0.00231,3.6e-05,-4.116927e-06,-0.652747,...,0.363233,0.631426,1.708073,1.677674,2.860106,4.149895,5.315946,6.055085,0.0,9.853896
7,0.0,0.0,0.0,1.0,-577.039833,372.54088,0.001856,-0.000162,-6.943207e-07,-0.638513,...,0.397253,0.615315,1.632394,1.633807,2.757962,3.988326,5.109979,5.90632,0.0,9.094114
8,0.0,0.0,0.0,1.0,483.18579,266.463142,0.002358,4.1e-05,-2.827951e-06,0.517167,...,0.517869,0.851031,2.1775,1.601069,2.637968,3.81619,4.98992,5.938287,0.0,14.473091
9,0.0,0.0,0.0,1.0,468.903135,249.369913,0.002925,0.000239,6.261693e-06,0.531057,...,0.573101,0.853976,2.192464,1.609671,2.683795,3.888887,5.035529,5.944357,0.0,15.353678


In [20]:
# drop columns with NaN values
extracted_features_without_nan = impute(extracted_features)

# # now we can filter out the relevant features
filtered_features = select_features(extracted_features_without_nan, labels)
filtered_features

 'Quat_X__query_similarity_count__query_None__threshold_0.0'
 'Quat_Y__query_similarity_count__query_None__threshold_0.0'
 'Quat_Z__query_similarity_count__query_None__threshold_0.0'
 'FreeAcc_X__query_similarity_count__query_None__threshold_0.0'
 'FreeAcc_Y__query_similarity_count__query_None__threshold_0.0'
 'FreeAcc_Z__query_similarity_count__query_None__threshold_0.0'] did not have any finite values. Filling with zeros.


Unnamed: 0,Quat_Z__permutation_entropy__dimension_3__tau_1,FreeAcc_X__spkt_welch_density__coeff_2,Quat_Z__number_peaks__n_1,Quat_Z__permutation_entropy__dimension_5__tau_1,Quat_Z__permutation_entropy__dimension_7__tau_1,Quat_Z__permutation_entropy__dimension_4__tau_1,Quat_Z__permutation_entropy__dimension_6__tau_1,FreeAcc_Y__spkt_welch_density__coeff_2,FreeAcc_Z__count_above_mean,FreeAcc_Z__count_below_mean,...,Quat_W__quantile__q_0.7,"Quat_W__cwt_coefficients__coeff_4__w_10__widths_(2, 5, 10, 20)",Quat_W__quantile__q_0.8,"Quat_W__cwt_coefficients__coeff_11__w_10__widths_(2, 5, 10, 20)","Quat_W__agg_linear_trend__attr_""intercept""__chunk_len_50__f_agg_""max""",FreeAcc_Y__quantile__q_0.2,Quat_W__c3__lag_1,"FreeAcc_Y__agg_linear_trend__attr_""stderr""__chunk_len_5__f_agg_""max""",Quat_W__c3__lag_3,Quat_W__c3__lag_2
0,1.061194,56.018399,41.0,1.889376,2.67982,1.461584,2.30324,17.791079,374.0,526.0,...,-0.536865,-0.686637,-0.516011,-1.10242,-0.492466,-0.893716,-0.193628,0.001465,-0.193315,-0.193492
1,1.01468,54.016443,34.0,1.687008,2.318529,1.358848,2.009177,19.836238,380.0,520.0,...,-0.477068,-0.726352,-0.444988,-1.06986,-0.524262,-0.825443,-0.148182,0.00129,-0.147898,-0.148057
2,0.981536,39.308865,30.0,1.617684,2.235148,1.299061,1.934073,15.670417,397.0,503.0,...,-0.739571,-0.758591,-0.714733,-1.224998,-0.717656,-0.632818,-0.43187,0.001509,-0.431712,-0.431803
3,1.058664,23.914923,42.0,1.881504,2.654709,1.464176,2.275606,20.995586,407.0,493.0,...,-0.724506,-0.694651,-0.7086,-1.16033,-0.721838,-1.198391,-0.424339,0.002573,-0.424216,-0.424284
4,1.002273,41.904795,32.0,1.685555,2.321651,1.342082,2.013891,38.466143,405.0,495.0,...,-0.716008,-0.632763,-0.697949,-1.17708,-0.737598,-1.379282,-0.425086,0.003042,-0.425055,-0.425084
5,0.964496,24.582561,27.0,1.557715,2.130537,1.254435,1.854608,43.012548,371.0,529.0,...,-0.649559,-0.63506,-0.641731,-1.030823,-0.651689,-1.02796,-0.309051,0.003677,-0.309056,-0.309065
6,0.927934,26.013436,24.0,1.432565,1.930526,1.17856,1.68176,36.475153,387.0,513.0,...,-0.610822,-0.834827,-0.5733,-1.13931,-0.570139,-1.144252,-0.274371,0.002151,-0.274255,-0.274322
7,1.002069,18.438008,32.0,1.659778,2.279625,1.332529,1.975709,38.345228,394.0,506.0,...,-0.61626,-0.665025,-0.59164,-1.040508,-0.60213,-0.996184,-0.26887,0.001862,-0.26832,-0.268601
8,1.097437,27.572436,47.0,2.003678,2.796809,1.550477,2.423629,26.041016,365.0,535.0,...,0.559195,0.520368,0.628198,0.844225,0.544726,-0.960712,0.167733,0.001988,0.167713,0.167726
9,1.123484,14.987625,51.0,2.08878,3.00995,1.59993,2.568575,19.807377,357.0,543.0,...,0.565382,0.448352,0.582116,0.726495,0.486965,-1.148734,0.150013,0.001959,0.149797,0.149898
