In [5]:
# this file is making live predictions if a sample is of an OCD activity or not

%load_ext autoreload
%autoreload 2

from sklearn.preprocessing import StandardScaler
from tsfresh import select_features
from tsfresh.feature_extraction import MinimalFCParameters
from tsfresh.utilities.dataframe_functions import impute

from classification.classification import train_and_select_best_model, predict
from data_reading.phyphox import read_experiments_in_dir
from features import extract_timeseries_features
from file_handling import get_sub_directories
from preprocessing import segment_for_null_classification, segment_windows, concat_chunks_for_feature_extraction, \
    preprocess_chunks_for_null_test
from shared_constants import SEGMENTATION_NO_OVERLAP

sample_rate = 50

experiment_dir_path = "../../data/phyphox/full recordings/"
experiment_dirs = get_sub_directories(experiment_dir_path)
use_indoor = True
use_fingerprinting_approach = True
window_size = 100
feature_calculation_setting = MinimalFCParameters()

experiment_dirs_selected = ["Ana-2","Anne","Ariane","Cilly","Fabi","Julian","Julius","Wiktoria"]
experiment_dirs = [exp_dir for exp_dir in experiment_dirs if exp_dir.split("/")[-1] in experiment_dirs_selected]

selected_activities = ["washing hands", "drying hands"]

# Read data
chunks, null_chunks, y = read_experiments_in_dir(experiment_dirs, sample_rate, drop_lin_acc=True,
                                                 require_indoor=use_indoor, selected_activities=selected_activities)

del experiment_dirs
print("Finished reading data")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Finished reading data


In [6]:
chunks_ocd, chunks_null_class = preprocess_chunks_for_null_test(chunks, null_chunks, use_indoor=use_indoor)
#labels = y_ocd.loc[:, "label"].squeeze()
#assert len(labels) == len(chunks_ocd)
del chunks
del null_chunks
chunks_ocd_segmented, labels_ocd_segmented, chunks_null_segmented, labels_null_segmented = segment_for_null_classification(chunks_ocd, chunks_null_class, window_size)

assert len(set(labels_ocd_segmented)) == 1
assert len(set(labels_null_segmented)) == 1

null_classification_df, labels_null_classification = concat_chunks_for_feature_extraction(
        [chunks_ocd_segmented, chunks_null_segmented],
        [labels_ocd_segmented, labels_null_segmented])
assert len(set(labels_null_classification)) == 2

In [7]:
X_null_class_classification = extract_timeseries_features(null_classification_df, use_indoor=use_indoor,
                                                              feature_set_config=feature_calculation_setting,
                                                              use_fingerprinting_approach=use_fingerprinting_approach)
impute(X_null_class_classification)
X_null_classification_selected = select_features(X_null_class_classification, labels_null_classification)

# store the features so that we can apply the same feature selection later on the test data
selected_features = X_null_classification_selected.columns
selected_features

Feature Extraction: 100%|██████████| 20/20 [00:07<00:00,  2.81it/s]


Index([                                         4,
                                                7,
                                               10,
                                                3,
                                                8,
                                                6,
                                                5,
                        'acceleration z__minimum',
                  'acceleration z_right__minimum',
             'acceleration z__standard_deviation',
                       'acceleration z__variance',
                           'gyroscope y__minimum',
                     'acceleration z_right__mean',
               'acceleration z_right__sum_values',
                'gyroscope y__standard_deviation',
                          'gyroscope y__variance',
                   'acceleration z_right__median',
                        'acceleration x__minimum',
                           'gyroscope y__maximum',
                           'gyr

In [8]:
scaler = StandardScaler()
X_null_classification_scaled = scaler.fit_transform(X_null_classification_selected)

In [9]:
trained_model = train_and_select_best_model(X_null_classification_scaled, labels_null_classification)

Training model
Best scoring model LDA has average f score of 0.87


In [37]:
# test on long recording

test_data_dir = experiment_dir_path + "Marvin/"
chunks_test, null_chunks_test, y_test = read_experiments_in_dir([test_data_dir], sample_rate, drop_lin_acc=True,
                                                 require_indoor=use_indoor)

chunks_ocd_test, chunks_null_class_test = preprocess_chunks_for_null_test(chunks_test, null_chunks_test, use_indoor=use_indoor)

# we need to zip the chunks back
chunks_test_all = list(sum(zip(chunks_ocd_test, chunks_null_class_test),())) # we might need to append the last element from the chunks_test at the end as well

chunks_test_segmented, labels_test_segmented = segment_windows(chunks_test_all, ["Test"] * len(chunks_test_all), window_size, SEGMENTATION_NO_OVERLAP)

  time_delta_index = pd.TimedeltaIndex(timestamp_to_date, unit=output_timestamp_unit)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  c_new["combined_id"] = [(action_id, i)] * len(c_new)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  c_new["action_id"] = [action_id] * len(c_new)
A value is trying to be set on a copy of a slice from a DataFrame

In [39]:
print(len(chunks_test_all))
print(y_test[y_test["label"] == "washing hands"])
print(y_test[y_test["label"] == "drying hands"])

152
    index    start    end          label  hand
22     22  156.105  185.0  washing hands  both
24     24  213.400  236.1  washing hands  both
26     26  257.300  286.0  washing hands  both
    index   start    end         label  hand
23     23  187.81  210.0  drying hands  both
25     25  236.90  256.7  drying hands  both
27     27  287.30  302.3  drying hands  both
28     28  302.40  314.7  drying hands  both


In [50]:
import pandas as pd
# split into blocks of 10 chunks for which we want to predict
block_size = 10
segment_id = 0
blocks = [chunks_test_segmented[i*block_size:(i+1)*block_size] for i in range(int(len(chunks_test_segmented)/block_size))]
for b in blocks:
    current_df, _ = concat_chunks_for_feature_extraction(chunks=[b], labels=[pd.Series(["Test"] * block_size)]) # the labels don't matter
    X_test = extract_timeseries_features(current_df, use_indoor=use_indoor,
                                                              feature_set_config=feature_calculation_setting,
                                                              use_fingerprinting_approach=use_fingerprinting_approach)
    impute(X_test)
    X_test = X_test.loc[:, list(selected_features)]
    X_test_scaled = scaler.transform(X_test)
    predictions = predict(X=X_test, model=trained_model)
    for i in range(block_size):
        print("Action: {}: start time: {}: {}".format(b[i]["action_id"][0], b[i].reset_index()["index"][0].total_seconds(), predictions[i]))



Feature Extraction: 100%|██████████| 20/20 [00:00<00:00, 1350.61it/s]
Feature Extraction: 100%|██████████| 20/20 [00:00<00:00, 1173.69it/s]
Feature Extraction: 100%|██████████| 20/20 [00:00<00:00, 1162.60it/s]
Feature Extraction: 100%|██████████| 20/20 [00:00<00:00, 1413.34it/s]
Feature Extraction: 100%|██████████| 20/20 [00:00<00:00, 1464.67it/s]
Feature Extraction: 100%|██████████| 20/20 [00:00<00:00, 1050.74it/s]
Feature Extraction: 100%|██████████| 20/20 [00:00<00:00, 996.86it/s]
Feature Extraction: 100%|██████████| 20/20 [00:00<00:00, 1027.54it/s]
Feature Extraction: 100%|██████████| 20/20 [00:00<00:00, 1090.45it/s]
Feature Extraction: 100%|██████████| 20/20 [00:00<00:00, 1063.32it/s]
Feature Extraction: 100%|██████████| 20/20 [00:00<00:00, 1479.50it/s]
Feature Extraction: 100%|██████████| 20/20 [00:00<00:00, 960.01it/s]
Feature Extraction: 100%|██████████| 20/20 [00:00<00:00, 834.06it/s]
Feature Extraction: 100%|██████████| 20/20 [00:00<00:00, 908.80it/s]
Feature Extraction: 100%

Action: 0: start time: 11.79440875: null class
Action: 77: start time: 15.59440875: null class
Action: 77: start time: 17.59440875: null class
Action: 77: start time: 19.59440875: null class
Action: 1: start time: 22.29440875: null class
Action: 2: start time: 26.09440875: OCD activity
Action: 79: start time: 29.49440875: null class
Action: 3: start time: 31.79440875: null class
Action: 4: start time: 35.29440875: null class
Action: 81: start time: 37.99440875: null class
Action: 5: start time: 40.59440875: null class
Action: 6: start time: 43.99440875: null class
Action: 83: start time: 46.29440875: OCD activity
Action: 83: start time: 48.29440875: null class
Action: 83: start time: 50.29440875: null class
Action: 83: start time: 52.29440875: null class
Action: 83: start time: 54.29440875: null class
Action: 83: start time: 56.29440875: null class
Action: 7: start time: 59.79440875: OCD activity
Action: 8: start time: 64.19440875: OCD activity
Action: 9: start time: 69.09440875: null 