In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from ipywidgets import interact
%matplotlib inline

In [3]:
df = pickle.load(open("../data/dataset_consolidated.p", "rb"))
meta = pickle.load(open("../data/metadata.p", "rb"))

In [4]:
df2 = df.copy()

In [6]:
df2 = df2[['target', 'time_seconds',  'trial_type','trial_subtype',
       'trial_num','subject',
       'r.ankle Acceleration X (m/s^2)', 'r.ankle Acceleration Y (m/s^2)',
       'r.ankle Acceleration Z (m/s^2)', 'r.ankle Angular Velocity X (rad/s)',
       'r.ankle Angular Velocity Y (rad/s)',
       'r.ankle Angular Velocity Z (rad/s)', 'r.ankle Magnetic Field X (uT)',
       'r.ankle Magnetic Field Y (uT)', 'r.ankle Magnetic Field Z (uT)',
       'l.ankle Acceleration X (m/s^2)', 'l.ankle Acceleration Y (m/s^2)',
       'l.ankle Acceleration Z (m/s^2)', 'l.ankle Angular Velocity X (rad/s)',
       'l.ankle Angular Velocity Y (rad/s)',
       'l.ankle Angular Velocity Z (rad/s)', 'l.ankle Magnetic Field X (uT)',
       'l.ankle Magnetic Field Y (uT)', 'l.ankle Magnetic Field Z (uT)',
       'r.thigh Acceleration X (m/s^2)', 'r.thigh Acceleration Y (m/s^2)',
       'r.thigh Acceleration Z (m/s^2)', 'r.thigh Angular Velocity X (rad/s)',
       'r.thigh Angular Velocity Y (rad/s)',
       'r.thigh Angular Velocity Z (rad/s)', 'r.thigh Magnetic Field X (uT)',
       'r.thigh Magnetic Field Y (uT)', 'r.thigh Magnetic Field Z (uT)',
       'l.thigh Acceleration X (m/s^2)', 'l.thigh Acceleration Y (m/s^2)',
       'l.thigh Acceleration Z (m/s^2)', 'l.thigh Angular Velocity X (rad/s)',
       'l.thigh Angular Velocity Y (rad/s)',
       'l.thigh Angular Velocity Z (rad/s)', 'l.thigh Magnetic Field X (uT)',
       'l.thigh Magnetic Field Y (uT)', 'l.thigh Magnetic Field Z (uT)',
       'head Acceleration X (m/s^2)', 'head Acceleration Y (m/s^2)',
       'head Acceleration Z (m/s^2)', 'head Angular Velocity X (rad/s)',
       'head Angular Velocity Y (rad/s)', 'head Angular Velocity Z (rad/s)',
       'head Magnetic Field X (uT)', 'head Magnetic Field Y (uT)',
       'head Magnetic Field Z (uT)', 'sternum Acceleration X (m/s^2)',
       'sternum Acceleration Y (m/s^2)', 'sternum Acceleration Z (m/s^2)',
       'sternum Angular Velocity X (rad/s)',
       'sternum Angular Velocity Y (rad/s)',
       'sternum Angular Velocity Z (rad/s)', 'sternum Magnetic Field X (uT)',
       'sternum Magnetic Field Y (uT)', 'sternum Magnetic Field Z (uT)',
       'waist Acceleration X (m/s^2)', 'waist Acceleration Y (m/s^2)',
       'waist Acceleration Z (m/s^2)', 'waist Angular Velocity X (rad/s)',
       'waist Angular Velocity Y (rad/s)', 'waist Angular Velocity Z (rad/s)',
       'waist Magnetic Field X (uT)', 'waist Magnetic Field Y (uT)',
       'waist Magnetic Field Z (uT)', 'r.ankle resultant acceleration',
       'r.ankle resultant angular velocity',
       'r.ankle resultant magnetic field', 'l.ankle resultant acceleration',
       'l.ankle resultant angular velocity',
       'l.ankle resultant magnetic field', 'r.thigh resultant acceleration',
       'r.thigh resultant angular velocity',
       'r.thigh resultant magnetic field', 'l.thigh resultant acceleration',
       'l.thigh resultant angular velocity',
       'l.thigh resultant magnetic field', 'head resultant acceleration',
       'head resultant angular velocity', 'head resultant magnetic field',
       'sternum resultant acceleration', 'sternum resultant angular velocity',
       'sternum resultant magnetic field', 'waist resultant acceleration',
       'waist resultant angular velocity', 'waist resultant magnetic field']]

In [7]:
# For this dashboard we are using data from subject 1 and trial number equal to 1
# The reason is that using the whole dataset would result in aproximately 99 milion rows, and to running in 
# javascript was not possible
df2 = df2[(df2.subject == 1) & (df2.trial_num == 1)]

In [8]:
df2.shape

(39060, 90)

## Reorganization of the data

These next steps reorganize the data, including all the reading valures in one column and creating additional columns to categorize the readings in the different sensors, axis and measures.

In [10]:
columns = list(df2.columns[7:])
list_dfs = []
for col in columns:
    df_temp = df2[['time_seconds','trial_type','trial_subtype','trial_num'] + [col]]
    df_temp['sensor'] = meta.loc[col,'body_location']
    df_temp['axis'] = meta.loc[col,'axis']
    df_temp['measure'] = meta.loc[col,'measure']
    df_temp['reading'] = df2[col]
    df_temp.drop(col,axis=1,inplace=True)
    list_dfs.append(df_temp)

In [11]:
# Concat all the intermediary dataframes in one
df_dash = pd.concat(list_dfs,axis=0)

In [12]:
df_dash.head()

Unnamed: 0,time_seconds,trial_type,trial_subtype,trial_num,sensor,axis,measure,reading
0,0.0,ADLs,SQ,1,r.ankle,Y,acceleration,-0.240769
1,0.00782,ADLs,SQ,1,r.ankle,Y,acceleration,-0.245077
2,0.01563,ADLs,SQ,1,r.ankle,Y,acceleration,-0.240524
3,0.02344,ADLs,SQ,1,r.ankle,Y,acceleration,-0.238417
4,0.03125,ADLs,SQ,1,r.ankle,Y,acceleration,-0.242868


In [17]:
# read the sensor locations (X,Y) to plot the graphic with the human body sensors
df_sensors = pd.read_csv('./dashboard/data/sensor_location.csv')

In [18]:
df_sensors

Unnamed: 0,sensor,x,y,size
0,head,1.0,15.5,5
1,l.ankle,1.2,1.5,5
2,r.ankle,0.8,1.5,5
3,l.thigh,1.2,5.7,5
4,r.thigh,0.8,5.7,5
5,sternum,1.0,12.0,5
6,waist,1.0,8.0,5


In [19]:
df_dash = df_dash.merge(df_sensors, on='sensor', how='left')

In [20]:
df_dash.shape

(3241980, 11)

In [21]:
df_dash.columns

Index(['time_seconds', 'trial_type', 'trial_subtype', 'trial_num', 'sensor',
       'axis', 'measure', 'reading', 'x', 'y', 'size'],
      dtype='object')

In [22]:
df_dash[['time_seconds', 'trial_type', 'trial_subtype',
       'sensor', 'axis', 'measure', 'reading', 'x', 'y']].to_csv('./dashboard/data/dataset_dashboard_d3.csv',index=False)