In [1]:
import pandas as pd
import plotly.graph_objects as go

from utils.feature_extraction_utils import zip_dicts, read_files, segment_df, extract_manual_features, extract_residual_features, check_peaks, construct_feature_collection, save_features

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_folder = "./../data/"
raw_data_folder = data_folder + "raw/"
signal_names = ['low_res', 'high_res']

In [3]:
raw_df = read_files(raw_data_folder)

100%|██████████| 440/440 [00:11<00:00, 39.28it/s]


In [4]:
raw_df

Unnamed: 0,low_res,high_res,daq_nr,misalignment,recording_nr,direction,speed
0,-0.021654,-0.21389,10,-0.4,10,H,425
1,-0.022122,-0.21820,10,-0.4,10,H,425
2,-0.022556,-0.22233,10,-0.4,10,H,425
3,-0.023192,-0.22621,10,-0.4,10,H,425
4,-0.023537,-0.22971,10,-0.4,10,H,425
...,...,...,...,...,...,...,...
12795,-0.022185,-0.19910,9,-0.3,9,V,750
12796,-0.021639,-0.19363,9,-0.3,9,V,750
12797,-0.020980,-0.18809,9,-0.3,9,V,750
12798,-0.020426,-0.18224,9,-0.3,9,V,750


In [5]:
# Plot the low_res and high_res signals for the first recording with the lowest speed and highest misalignment
fig = go.Figure()
fig.add_trace(go.Scatter(x=raw_df.loc[(raw_df['recording_nr'] == 1) & (raw_df['misalignment'] == 0.5) & (raw_df['direction'] == 'H') & (raw_df['speed'] == 425)].index, y=raw_df.loc[(raw_df['recording_nr'] == 1) & (raw_df['misalignment'] == 0.5) & (raw_df['direction'] == 'H') & (raw_df['speed'] == 425)]['low_res'], name='low_res'))
fig.add_trace(go.Scatter(x=raw_df.loc[(raw_df['recording_nr'] == 1) & (raw_df['misalignment'] == 0.5) & (raw_df['direction'] == 'H') & (raw_df['speed'] == 425)].index, y=raw_df.loc[(raw_df['recording_nr'] == 1) & (raw_df['misalignment'] == 0.5) & (raw_df['direction'] == 'H') & (raw_df['speed'] == 425)]['high_res'], name='high_res'))
fig.update_layout(title='Low and High Resolution Signals for one specific recording', xaxis_title='Time', yaxis_title='Amplitude')
fig.show()

In [6]:
# Specify window sizes here
window_size_list = [400, 800, 1600, 3200, 6400, 12800]
segment_dict = {window_size: segment_df(raw_df, window_size) for window_size in window_size_list}
label_dict = {window_size: segment_df.reset_index()[['misalignment', 'recording_nr', 'direction', 'speed', 'window_id']].iloc[::window_size, :] for window_size, segment_df in segment_dict.items()}

100%|██████████| 440/440 [00:01<00:00, 332.32it/s]
100%|██████████| 440/440 [00:01<00:00, 307.69it/s]
100%|██████████| 440/440 [00:01<00:00, 295.50it/s]
100%|██████████| 440/440 [00:02<00:00, 171.47it/s]
100%|██████████| 440/440 [00:01<00:00, 296.69it/s]
100%|██████████| 440/440 [00:01<00:00, 310.95it/s]


In [None]:
# Check the amount of peaks in the high resolution signal.
peaks_dict = {signal:{window_size:check_peaks(segment_df, signal) for window_size, segment_df in segment_dict.items()} for signal in signal_names}

In [None]:
# Extract manual features per signal
# Only extract manual features if atleast two peaks are found
window_man_feat_dict = {window_size:[] for window_size in segment_dict.keys()}
for signal in signal_names:
    for window_size, segment_df in segment_dict.items():
        if peaks_dict[signal][window_size][0] > 1:
            print(f"Extracting manual features for {signal} with window size {window_size}")
            man_perm_list, man_feat_df = extract_manual_features(segment_df, window_size, signal)
            window_man_feat_dict[window_size].append(man_feat_df)
        else:
            print(f"Not enough peaks found for {signal} with window size {window_size}")

In [None]:
# Extract residual features per signal
# Only extract residual features if atleast two peaks are found
for window_size, segment_df in segment_dict.items():
    if peaks_dict[signal_names[1]][window_size][0] > 1:
        print(f"Extracting manual features for residuals with window size {window_size}")
        man_perm_list, man_feat_df = extract_residual_features(segment_df, window_size, signal_names)
        window_man_feat_dict[window_size].append(man_feat_df)
    else:
        print(f"Not enough peaks found for residuals with window size {window_size}")

In [None]:
man_feat_dict = {window_size:pd.concat(man_feat_list, axis=1) for window_size, man_feat_list in window_man_feat_dict.items() if len(man_feat_list)}

In [None]:
fc_dict = construct_feature_collection(window_size_list)
# tsflex supports multiprocessing by default
lib_feat_dict = {window_size:fc.calculate(segment_df.reset_index(), window_idx='begin', include_final_window=True, show_progress=True, return_df=True) for window_size, fc, segment_df in zip_dicts(fc_dict, segment_dict)}

In [None]:
feat_dict = {window_size:pd.concat([label_df, lib_feat_df], axis=1) for window_size, label_df, lib_feat_df in zip_dicts(label_dict, lib_feat_dict)}
feat_dict = {window_size:pd.concat([feat_df.reset_index(), man_feat_dict[window_size]], axis=1) if window_size in man_feat_dict.keys() else feat_df.reset_index() for window_size, feat_df in feat_dict.items()}
feature_names_dict = {window_size:feat_df.drop(['index', 'misalignment', 'recording_nr', 'direction', 'speed', 'window_id'], axis=1).columns.values for window_size, feat_df in feat_dict.items()}

In [None]:
for window_size, feat_df in feat_dict.items():
    print(f"Window size: {window_size}")
    print(f"Number of features: {len(feature_names_dict[window_size])}")
    display(feat_df)

In [None]:
save_features(feat_dict, data_folder)