In [18]:
import pandas as pd
import os
import numpy as np
from collections import OrderedDict
from src import project_dir
from src.selection import kept_features, all_features
feature_cost_map = {
    'min': 0.002,
    'max': 0.002,
    'sum': 0.0099,
    'mean': 0.0084
}

In [57]:
dataset_file = os.path.join(project_dir, 'data', 'harth.csv')
use_all_features = True
selected_features = [35, 71, 11, 0, 10, 4, 9, 64, 27, 70, 2, 3, 28, 33, 62, 16, 34, 24, 26, 23]

In [58]:
sensors = pd.read_csv(dataset_file, nrows=1).columns.to_list()
num_sensors = len(sensors) - 2
print(f"{num_sensors} sensors found in the dataset: {sensors}")
features_dict = OrderedDict([
    (sensor_id, all_features) if use_all_features else
    (sensor_id, kept_features)
    for sensor_id in range(0, num_sensors)
])

all_feature_names = [
    feature
    for sensor_id, features in features_dict.items()
    for feature in features
]

selected_feature_list = np.array(all_feature_names)[selected_features]
print(f"{len(selected_feature_list)} Selected features:\n{selected_feature_list.tolist()}")

if use_all_features:
    print("Cannot compute cost of selected features with all features enabled.")

else:
    total_cost = sum(feature_cost_map[feature] for feature in selected_feature_list)
    print(f"Total cost of selected features: {total_cost}")

7 sensors found in the dataset: ['timestamp', 'back_x', 'back_y', 'back_z', 'thigh_x', 'thigh_y', 'thigh_z', 'label', 'subject']
20 Selected features:
['muPeaks', 'muPeaks', 'muPeaks', 'min', 'npeaks', 'sum', 'peaks', 'sum', 'std', 'npeaks', 'mean', 'std', 'sum', 'peaks', 'mean', 'sum', 'npeaks', 'min', 'mean', 'muPeaks']
Cannot compute cost of selected features with all features enabled.
