In [1]:
import datetime
import CreateDataset_new as cd
import pandas as pd
from os import listdir
import numpy as np
%load_ext autoreload
%autoreload 2

In [2]:
base_dir = '../datasets/'

In [3]:
base_directories = {'walking': 'Walking_2020-06-04_12-53-11/', 
                    'running': 'Running_2020-06-04_12-40-48/', 
                    'cycling': 'Cycling_2020-06-04_13-57-11/',
                    'sitting': 'Sitting_2020-06-04_13-28-48/',
                    'no_activity': 'No_Activity_2020-06-04_13-13-43/'}

In [4]:
sensor_types = sorted(listdir(base_dir + base_directories['walking']))
sensor_types

['Accelerometer.csv',
 'Gyroscope.csv',
 'Linear Acceleration.csv',
 'Location.csv',
 'Magnetometer.csv',
 'Pressure.csv',
 'Proximity.csv']

In [5]:
def create_merged_dataset(granularity, activity, describe=False):
    # create class instance
    df_creator = cd.CreateDataset(base_dir=base_dir, granularity=granularity, data_table=None)
    # add numerical data for each sensor type
    for i, sensor_type in enumerate(sensor_types):
        df_creator.add_numerical_dataset(base_directories[activity] + sensor_type)
    # merge sensory data
    df_creator.merge_datasets()
    if describe:
        # save data statistics to csv
        filename = 'results/describe_{}_gran_{}.csv'.format(activity, granularity)
        df_creator.data_table.describe().to_csv(filename)
    return df_creator.data_table

In [6]:
# create datasets for all activities
dataframes_dict_250 = {}
granularity = 250
for activity in list(base_directories.keys()):
    dataframes_dict_250[activity] = create_merged_dataset(granularity, activity, describe=True)
    dataframes_dict_250[activity].to_pickle(base_dir + 'dataframes/df_{}_gran_{}.pkl'.format(activity, granularity))

Reading data from Walking_2020-06-04_12-53-11/Accelerometer.csv
Reading data from Walking_2020-06-04_12-53-11/Gyroscope.csv
Reading data from Walking_2020-06-04_12-53-11/Linear Acceleration.csv
Reading data from Walking_2020-06-04_12-53-11/Location.csv
Reading data from Walking_2020-06-04_12-53-11/Magnetometer.csv
Reading data from Walking_2020-06-04_12-53-11/Pressure.csv
Reading data from Walking_2020-06-04_12-53-11/Proximity.csv
Reading data from Running_2020-06-04_12-40-48/Accelerometer.csv
Reading data from Running_2020-06-04_12-40-48/Gyroscope.csv
Reading data from Running_2020-06-04_12-40-48/Linear Acceleration.csv
Reading data from Running_2020-06-04_12-40-48/Location.csv
Reading data from Running_2020-06-04_12-40-48/Magnetometer.csv
Reading data from Running_2020-06-04_12-40-48/Pressure.csv
Reading data from Running_2020-06-04_12-40-48/Proximity.csv
Reading data from Cycling_2020-06-04_13-57-11/Accelerometer.csv
Reading data from Cycling_2020-06-04_13-57-11/Gyroscope.csv
Readin

In [7]:
# concatenate all activities
minutes = 0
list_dfs_250 =[]
for i, df_name in enumerate(dataframes_dict_250):
    dataframes_dict_250[df_name].index = dataframes_dict_250[df_name].index + datetime.timedelta(minutes=minutes)
    minutes += 5
    list_dfs_250.append(dataframes_dict_250[df_name])

In [8]:
# concatenate activities dfs
concatenated_dfs_250 = pd.concat(list_dfs_250)

In [9]:
# add column for labels per activity
concatenated_dfs_250['labelWalking'] = 0
concatenated_dfs_250['labelRunning'] = 0
concatenated_dfs_250['labelCycling'] = 0
concatenated_dfs_250['labelSitting'] = 0
concatenated_dfs_250['labelNoActivity'] = 0

In [10]:
# add labels to activities
mask_walk = (concatenated_dfs_250.index >= '1970-01-01 00:00:00.000') & (concatenated_dfs_250.index <= '1970-01-01 00:05:00.000')
mask_run = (concatenated_dfs_250.index > '1970-01-01 00:05:00.000') & (concatenated_dfs_250.index <= '1970-01-01 00:10:00.000')
mask_cycle = (concatenated_dfs_250.index > '1970-01-01 00:10:00.000') & (concatenated_dfs_250.index <= '1970-01-01 00:15:00.000')
mask_sitting = (concatenated_dfs_250.index > '1970-01-01 00:15:00.000') & (concatenated_dfs_250.index <= '1970-01-01 00:20:00.000')
mask_no_act = (concatenated_dfs_250.index > '1970-01-01 00:20:00.000') & (concatenated_dfs_250.index <= '1970-01-01 00:25:00.000')

In [11]:
concatenated_dfs_250.loc[mask_walk, 'labelWalking'] = 1
concatenated_dfs_250.loc[mask_run, 'labelRunning'] = 1
concatenated_dfs_250.loc[mask_cycle, 'labelCycling'] = 1
concatenated_dfs_250.loc[mask_sitting, 'labelSitting'] = 1
concatenated_dfs_250.loc[mask_no_act, 'labelNoActivity'] = 1

In [12]:
concatenated_dfs_250 = concatenated_dfs_250.sort_index()

In [13]:
# save df to pickle
concatenated_dfs_250.to_pickle(base_dir + 'dataframes/concat_df_gran_{}.pkl'.format(granularity))

In [14]:
# create datasets for all activities
dataframes_dict_60000 = {}
granularity = 60000
for activity in list(base_directories.keys()):
    dataframes_dict_60000[activity] = create_merged_dataset(granularity, activity, describe=True)
    dataframes_dict_60000[activity].to_pickle(base_dir + 'dataframes/df_{}_gran_{}.pkl'.format(activity, granularity))

Reading data from Walking_2020-06-04_12-53-11/Accelerometer.csv
Reading data from Walking_2020-06-04_12-53-11/Gyroscope.csv
Reading data from Walking_2020-06-04_12-53-11/Linear Acceleration.csv
Reading data from Walking_2020-06-04_12-53-11/Location.csv
Reading data from Walking_2020-06-04_12-53-11/Magnetometer.csv
Reading data from Walking_2020-06-04_12-53-11/Pressure.csv
Reading data from Walking_2020-06-04_12-53-11/Proximity.csv
Reading data from Running_2020-06-04_12-40-48/Accelerometer.csv
Reading data from Running_2020-06-04_12-40-48/Gyroscope.csv
Reading data from Running_2020-06-04_12-40-48/Linear Acceleration.csv
Reading data from Running_2020-06-04_12-40-48/Location.csv
Reading data from Running_2020-06-04_12-40-48/Magnetometer.csv
Reading data from Running_2020-06-04_12-40-48/Pressure.csv
Reading data from Running_2020-06-04_12-40-48/Proximity.csv
Reading data from Cycling_2020-06-04_13-57-11/Accelerometer.csv
Reading data from Cycling_2020-06-04_13-57-11/Gyroscope.csv
Readin

In [15]:
# concatenate all activities
minutes = 0
list_dfs_6000 =[]
for i, df_name in enumerate(dataframes_dict_60000):
    dataframes_dict_60000[df_name].index = dataframes_dict_60000[df_name].index + datetime.timedelta(minutes=minutes)
    minutes += 5
    list_dfs_6000.append(dataframes_dict_60000[df_name])

In [16]:
# concatenate activites dfs
concatenated_dfs_60000 = pd.concat(list_dfs_6000)

In [17]:
# add column for labels per activity
concatenated_dfs_60000['labelWalking'] = 0
concatenated_dfs_60000['labelRunning'] = 0
concatenated_dfs_60000['labelCycling'] = 0
concatenated_dfs_60000['labelSitting'] = 0
concatenated_dfs_60000['labelNoActivity'] = 0

In [18]:
# create masks for each activity
mask_walk = (concatenated_dfs_60000.index >= '1970-01-01 00:00:00.000') & (concatenated_dfs_60000.index <= '1970-01-01 00:05:00.000')
mask_run = (concatenated_dfs_60000.index > '1970-01-01 00:05:00.000') & (concatenated_dfs_60000.index <= '1970-01-01 00:10:00.000')
mask_cycle = (concatenated_dfs_60000.index > '1970-01-01 00:10:00.000') & (concatenated_dfs_60000.index <= '1970-01-01 00:15:00.000')
mask_sitting = (concatenated_dfs_60000.index > '1970-01-01 00:15:00.000') & (concatenated_dfs_60000.index <= '1970-01-01 00:20:00.000')
mask_no_act = (concatenated_dfs_60000.index > '1970-01-01 00:20:00.000') & (concatenated_dfs_60000.index <= '1970-01-01 00:25:00.000')

In [19]:
concatenated_dfs_60000.loc[mask_walk, 'labelWalking'] = 1
concatenated_dfs_60000.loc[mask_run, 'labelRunning'] = 1
concatenated_dfs_60000.loc[mask_cycle, 'labelCycling'] = 1
concatenated_dfs_60000.loc[mask_sitting, 'labelSitting'] = 1
concatenated_dfs_60000.loc[mask_no_act, 'labelNoActivity'] = 1

In [20]:
# sort index 
concatenated_dfs_60000 = concatenated_dfs_250.sort_index()

In [21]:
concatenated_dfs_60000

Unnamed: 0_level_0,Acceleration x (m/s^2),Acceleration y (m/s^2),Acceleration z (m/s^2),Gyroscope x (rad/s),Gyroscope y (rad/s),Gyroscope z (rad/s),Linear Acceleration x (m/s^2),Linear Acceleration y (m/s^2),Linear Acceleration z (m/s^2),Latitude (°),...,Magnetic field x (µT),Magnetic field y (µT),Magnetic field z (µT),Pressure (hPa),Distance (cm),labelWalking,labelRunning,labelCycling,labelSitting,labelNoActivity
Time (s),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1970-01-01 00:00:00.000,-0.405317,2.483034,9.345501,,,,-2.351272,0.416187,0.085779,52.255127,...,-0.195000,-11.2350,8.0325,999.577393,8.0,1,0,0,0,0
1970-01-01 00:00:00.250,-0.856193,2.997881,8.358164,0.147191,-0.352289,-0.616319,-3.124942,0.620807,-0.895463,,...,-1.490400,-13.2192,10.0056,999.566650,,1,0,0,0,0
1970-01-01 00:00:00.500,-1.295935,3.048065,10.177966,-0.087631,-0.392785,-0.022192,-4.109661,0.417752,1.168546,,...,-3.168000,-14.2776,10.6128,999.546143,,1,0,0,0,0
1970-01-01 00:00:00.750,-0.481996,2.723746,7.952019,0.122121,0.219728,0.367726,-3.481357,0.163022,-1.017270,,...,-5.707200,-14.3640,11.1408,999.533813,,1,0,0,0,0
1970-01-01 00:00:01.000,0.037427,3.717198,9.854835,-0.137473,-0.215433,0.086601,-3.165712,1.391511,0.735769,,...,-6.482400,-14.4816,10.8144,999.530843,,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1970-01-01 00:24:59.000,1.441239,0.155206,9.457864,-0.000448,-0.001047,0.000819,-0.038599,-0.000575,-0.236111,,...,19.833600,-7.2816,-5.6400,1000.315186,,0,0,0,0,1
1970-01-01 00:24:59.250,1.441813,0.152257,9.459837,-0.000449,-0.001139,0.000701,-0.038774,-0.004694,-0.233923,,...,19.833600,-7.1712,-5.5488,1000.314209,,0,0,0,0,1
1970-01-01 00:24:59.500,1.441507,0.154459,9.456619,-0.000441,-0.001113,0.000408,-0.037468,-0.001889,-0.234335,,...,19.509600,-7.0992,-5.5296,1000.302490,,0,0,0,0,1
1970-01-01 00:24:59.750,1.438365,0.154613,9.455834,-0.000441,-0.001157,0.000784,-0.041328,-0.004679,-0.239249,,...,19.723199,-6.9312,-5.3232,1000.305501,,0,0,0,0,1
