In [30]:
%load_ext autoreload
%autoreload 2
%matplotlib inline  

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.offline as pl
from plotly.graph_objs import *

import hervpd as hp

pl.init_notebook_mode(connected=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Naming variables to reuse them later

In [31]:
# DATA_PATH = '/home/ju/GDrive/Projects/HeRV/Data/PreProcessed/'
DATA_PATH = "C:\\Users\\ju\\GDrive\\Projects\\HeRV\\Data\\PreProcessed\\" 
filename = DATA_PATH + 'df_90_240_grouped.xlsx'
features_fd = ['hf', 'lf', 'lf_hf', 'vlf']
features_td = ['mhr', 'mrri', 'sdnn', 'pnn50', 'rmssd']
features_all = features_td.copy()
features_all.extend(features_fd)

In [32]:
df = pd.read_excel(filename)
df.sample(10)

Unnamed: 0,activity,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,...,sdnn,sess,start,stop,total_power,user,vlf,activity_gr1,activity_gr2,activity_gr3
2313,focused-active,505.367617,39.599439,770.831298,1.525288,60.400561,73.48723,822.71,37,0,...,68.746439,251,2017-10-08 20:47:30,2017-10-08 20:51:30,2409.429443,2,1133.230527,active_standing,focused,focused
2447,household-chores,118.615614,12.943046,797.82718,6.726156,87.056954,102.868696,585.519231,4,1,...,36.759257,270,2017-10-17 21:05:30,2017-10-17 21:09:30,1074.752359,2,158.309565,moving,moving,moving
1649,focused-active,346.993849,16.286399,1783.580608,5.140093,83.713601,76.32094,791.68932,31,2,...,64.858141,130,2017-10-29 18:07:59,2017-10-29 18:11:59,3132.207257,1,1001.6328,active_standing,focused,focused
436,sleep,3231.747884,15.111412,18154.392956,5.617515,84.888588,68.529257,961.819608,233,76,...,246.094346,31,2017-10-06 05:17:19,2017-10-06 05:21:19,24844.836712,0,3458.695872,sleep,sleep,sleep
1148,movement,521.814692,37.723718,861.438915,1.650852,62.276282,92.070235,662.5,49,17,...,70.947412,79,2017-10-15 15:55:32,2017-10-15 15:59:32,2073.673452,0,690.419845,moving,moving,moving
2636,exercise-high,580.012294,67.321659,281.541485,0.485406,32.678341,154.930366,399.016313,18,8,...,73.165961,306,2017-11-01 07:17:55,2017-11-01 07:21:55,1344.582746,3,483.028967,moving,moving,moving
730,movement,31.093566,9.797775,286.259769,9.206399,90.202225,105.888762,567.99768,2,20,...,27.773628,52,2017-10-12 21:07:28,2017-10-12 21:11:28,504.641392,0,187.288057,moving,moving,moving
2536,focused-active,341.55617,50.528575,334.410195,0.979078,49.471425,80.888676,743.899696,13,2,...,40.236787,284,2017-10-30 11:50:30,2017-10-30 11:54:30,994.224427,2,318.258062,active_standing,focused,focused
1886,focused-active,271.359147,22.916916,912.740606,3.363589,77.083084,66.901598,900.003663,22,22,...,52.744772,163,2017-11-03 05:37:14,2017-11-03 05:41:14,2201.127138,1,1017.027384,active_standing,focused,focused
1877,focused-active,486.249474,22.507726,1674.117504,3.442919,77.492274,65.381518,921.962264,42,13,...,61.976707,163,2017-11-03 05:01:14,2017-11-03 05:05:14,2855.260105,1,694.893127,active_standing,focused,focused


In [33]:
# just a reminder for selection of rows and columns simultaneously
# df.loc[df.user==0,['mrri', 'mhr']]

def boxplot_compare(df, feature, groupby):
    
    data = []

    for val in df[groupby].unique():
        data.append(Box(y=df.loc[df[groupby]==val, feature], name=val, showlegend=False))
        
    layout = Layout(yaxis=dict(title=feature, zeroline=False))
    fig = Figure(data=data, layout=layout)
    pl.iplot(fig)   

------------

# Plotting features per user
-------------

## Time domain

In [34]:
for feat in features_td:
    boxplot_compare(df, feat, 'user') 

----------

## Frequency domain

In [9]:
for feat in features_fd:
    boxplot_compare(df, feat, 'user') 

## Those ouliers are ruining it! Let us prune some of it

In [22]:
print ('before pruning: ',  df.activity.count())

val_hf = df['hf']   < 10000
val_lf = df['lf']   < 10000
val_vlf = df['vlf'] < 10000


df2 = df[val_hf & val_lf & val_vlf]

print ('after pruning: ',  df2.activity.count())

before pruning:  3458
after pruning:  2789


In [23]:
for feat in features_fd:
    boxplot_compare(df2, feat, 'user')

In [35]:
boxplot_compare(df2, 'feat', 'user') 

------------

# Plotting features per activity
-------------

## Time domain, grouping #1 
-------------

In [13]:
for feat in features_td:
    boxplot_compare(df2, feat, 'activity_gr1') 

## Time domain, grouping #2
----------

In [38]:
for feat in features_all:
    boxplot_compare(df2, feat, 'activity_gr2') 

## Time domain, grouping #3 
-----

In [41]:
for feat in features_td:
    boxplot_compare(df2, feat, 'activity_gr3') 

## Frequency domain domain, grouping #1
----------

In [44]:
for feat in features_fd:
    boxplot_compare(df2, feat, 'activity_gr1') 

## Frequency domain domain, grouping #2
----------

In [48]:
for feat in features_fd:
    boxplot_compare(df2, feat, 'activity_gr2') 

## Frequency domain domain, grouping #3
----------

In [47]:
for feat in features_fd:
    boxplot_compare(df2, feat, 'activity_gr3') 