In [85]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime

from helper_functions import get_routine

In [86]:
# Read the merged dataset
dataset = pd.read_csv("all-merged.csv")
dataset

Unnamed: 0,Id,ActivityMinute,Calories,Steps,Value
0,2022484408,4/12/2016 7:21:00 AM,3.32064,17,97.0
1,2022484408,4/12/2016 7:23:00 AM,1.34901,0,60.0
2,2022484408,4/12/2016 7:24:00 AM,1.03770,0,58.0
3,2022484408,4/12/2016 7:26:00 AM,2.49048,7,53.0
4,2022484408,4/12/2016 7:27:00 AM,1.03770,0,53.0
...,...,...,...,...,...
200068,8877689391,5/12/2016 12:23:00 PM,8.24364,97,100.0
200069,8877689391,5/12/2016 12:24:00 PM,8.24364,101,97.0
200070,8877689391,5/12/2016 12:25:00 PM,7.75872,101,93.0
200071,8877689391,5/12/2016 1:54:00 PM,1.33353,0,63.0


In [87]:
# change to python datetime object
dataset['ActivityMinute']=pd.to_datetime(dataset['ActivityMinute']).astype("string")

In [88]:
dataset.rename(columns={'Value':'Heartrate'}, inplace=True)

### Feature Engineering
- Extract more info the ActivityMinute column.
- We can know more about the data if we analyse the timestamp. Wheather the recorded data is from morning activity, or daytime activity or from evening activity or during sleep hours.
- Create 4 distinct categories based on the hours of reporting.

In [105]:
dataset['is_morning'] = dataset['ActivityMinute'].apply(lambda x: get_routine("morning", x))
dataset['is_daytime'] = dataset['ActivityMinute'].apply(lambda x: get_routine("daytime", x))
dataset['is_evening'] = dataset['ActivityMinute'].apply(lambda x: get_routine("evening", x))
dataset['is_sleep'] = 0

In [108]:
dataset['is_sleep'] = np.where( np.logical_and(dataset['is_morning'] == 0, dataset['is_daytime'] == 0  , dataset['is_evening'] ==0) ,1, 0)

In [109]:
dataset

Unnamed: 0,Id,ActivityMinute,Calories,Steps,Heartrate,is_morning,is_daytime,is_evening,is_sleep
0,2022484408,2016-04-12 07:21:00,3.32064,17,97.0,1,0,0,0
1,2022484408,2016-04-12 07:23:00,1.34901,0,60.0,1,0,0,0
2,2022484408,2016-04-12 07:24:00,1.03770,0,58.0,1,0,0,0
3,2022484408,2016-04-12 07:26:00,2.49048,7,53.0,1,0,0,0
4,2022484408,2016-04-12 07:27:00,1.03770,0,53.0,1,0,0,0
...,...,...,...,...,...,...,...,...,...
200068,8877689391,2016-05-12 12:23:00,8.24364,97,100.0,0,1,0,0
200069,8877689391,2016-05-12 12:24:00,8.24364,101,97.0,0,1,0,0
200070,8877689391,2016-05-12 12:25:00,7.75872,101,93.0,0,1,0,0
200071,8877689391,2016-05-12 13:54:00,1.33353,0,63.0,0,1,0,0


In [110]:
# Save to CSV with new features
dataset.reset_index(inplace=True, drop=True)
dataset.to_csv('with-new-features.csv',index=False)

In [111]:
morning_calories = dataset.loc[dataset['is_morning'] == 1, 'Calories']
morning_steps = dataset.loc[dataset['is_morning'] == 1, 'Steps']
morning_heartrate = dataset.loc[dataset['is_morning'] == 1, 'Heartrate']

In [113]:
morning = dataset.query('is_morning == 0 and is_daytime == 0 and is_evening==0')
# morning = morning.drop(['ActivityMinute', 'is_morning', 'is_daytime', 'is_evening', 'is_sleep'], axis=1)
morning

Unnamed: 0,Id,ActivityMinute,Calories,Steps,Heartrate,is_morning,is_daytime,is_evening,is_sleep
5419,2022484408,2016-04-23 21:01:00,1.14147,0,68.0,0,0,0,1
5420,2022484408,2016-04-23 21:02:00,1.03770,0,68.0,0,0,0,1
5421,2022484408,2016-04-23 21:06:00,1.03770,0,58.0,0,0,0,1
5422,2022484408,2016-04-23 21:07:00,1.03770,0,59.0,0,0,0,1
5423,2022484408,2016-04-23 21:08:00,1.03770,0,59.0,0,0,0,1
...,...,...,...,...,...,...,...,...,...
200030,8877689391,2016-05-11 21:26:00,1.46640,0,80.0,0,0,0,1
200031,8877689391,2016-05-11 21:27:00,1.46640,0,72.0,0,0,0,1
200032,8877689391,2016-05-11 21:28:00,1.58860,0,77.0,0,0,0,1
200033,8877689391,2016-05-11 21:29:00,1.58860,0,74.0,0,0,0,1


In [114]:
dataset.groupby('is_morning').agg({'Calories': ['mean', 'min', 'max']})

Unnamed: 0_level_0,Calories,Calories,Calories
Unnamed: 0_level_1,mean,min,max
is_sleep,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
0,2.238454,0.7751,17.779921
1,1.765203,0.7751,19.2556


In [130]:
dataset.groupby('is_daytime').agg({'Heartrate': ['mean', 'min', 'max']})

Unnamed: 0_level_0,Heartrate,Heartrate,Heartrate
Unnamed: 0_level_1,mean,min,max
is_daytime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
0,71.164041,38.0,197.0
1,77.951334,38.0,203.0


In [127]:
dataset.groupby('is_evening').agg({'Heartrate': ['mean', 'min', 'max']})

Unnamed: 0_level_0,Heartrate,Heartrate,Heartrate
Unnamed: 0_level_1,mean,min,max
is_evening,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
0,72.563007,38.0,203.0
1,79.376471,40.0,197.0


In [124]:
dataset.groupby('is_sleep').agg({'Heartrate': ['mean', 'min', 'max']})

Unnamed: 0_level_0,Heartrate,Heartrate,Heartrate
Unnamed: 0_level_1,mean,min,max
is_sleep,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
0,76.093728,38.0,203.0
1,71.061754,38.0,197.0
