# Aggregated features and merge datasets

## Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import timedelta

In [2]:
acc = pd.read_csv('../data/raw/SensorAccelerometerData_labeled_day1.csv')
gps = pd.read_csv('../data/raw/SensorGPSData_labeled_day1.csv')
ori = pd.read_csv('../data/raw/SensorOrientationData_labeled_day1.csv')

In [4]:
def data_prep(timestep):
    
    # Convert to datetime
    acc.attr_time = pd.to_datetime(acc.attr_time, format='%d.%m.%y %H:%M:%S.%f')
    gps.attr_time = pd.to_datetime(gps.attr_time, format='%d.%m.%y %H:%M:%S.%f')
    ori.attr_time = pd.to_datetime(ori.attr_time, format='%d.%m.%y %H:%M:%S.%f')

    # Merge, order by attr_time and set as index
    data = pd.concat([acc, gps, ori])
    data = data.sort_values(by='attr_time').reset_index()
    data.index = data.attr_time

    # Drop rows where label is invalid
    data = data[data.label_valid]

    # Drop unused columns 
    data = data.drop(['label_environment', 'label_deviceposition', 'label_activity', 'index', 'label_valid', 'id', 'attr_time'], axis=1)

    # Replace labels unknown and not specified, plus some formatting
    labels = data.label_posture.replace(['unknown', 'Not Specified'], np.nan)
    labels = labels.replace('Climbing (down)', 'ClimbingDown')
    labels = labels.replace('Climbing (up)', 'ClimbingUp')

    # Explode labels
    exploded_labels = pd.get_dummies(labels, prefix='Label')

    # Groupby within timestep and merge back features with labels
    features = data.drop('label_posture', axis=1).columns
    merged_data = data.groupby(pd.Grouper(freq=timestep))[features].mean()
    merged_labels = exploded_labels.groupby(pd.Grouper(freq=timestep))[exploded_labels.columns].any()
    merged_df = merged_data.join(merged_labels)
    
    return merged_df

In [4]:
merged_250ms = data_prep('250000us')
merged_500ms = data_prep('500000us')
merged_1s = data_prep('1000000us')

In [11]:
merged_10s = data_prep('10000000us')

In [12]:
# Write to CSV file
# merged_250ms.to_csv('../data/aggregated/aggregated_250ms.csv')
# merged_500ms.to_csv('../data/aggregated/aggregated_500ms.csv')
merged_10s.to_csv('../data/aggregated/aggregated_10s.csv')