# Analyze sleep routine by device event

In [1]:
# Please use absolute path.
# fyi. use `pwd` command for mac users.
DATASET_DIRECTORY = '/Users/osjun/Downloads/P3029'
USER_ID = 'P3029'

In [10]:
import glob, os
import pandas as pd

device_event_files = glob.glob(os.path.join(DATASET_DIRECTORY, 'DeviceEventEntity-*.csv'))

device_df = pd.concat([pd.read_csv(f) for f in device_event_files])
device_df['datetime'] = pd.to_datetime(device_df['timestamp'], utc=True, unit='ms')
device_df['datetime'] = device_df.datetime.dt.tz_convert('Asia/Seoul')
device_df = device_df.sort_values('timestamp')
device_df

Unnamed: 0,timestamp,type,datetime
0,1556582474331,SCREEN_ON,2019-04-30 09:01:14.331000+09:00
1,1556582477667,SCREEN_OFF,2019-04-30 09:01:17.667000+09:00
2,1556582499924,SCREEN_ON,2019-04-30 09:01:39.924000+09:00
3,1556582499968,UNLOCK,2019-04-30 09:01:39.968000+09:00
4,1556582556601,SCREEN_OFF,2019-04-30 09:02:36.601000+09:00
...,...,...,...
223,1557138793804,UNLOCK,2019-05-06 19:33:13.804000+09:00
224,1557138828823,SCREEN_OFF,2019-05-06 19:33:48.823000+09:00
225,1557139024557,UNLOCK,2019-05-06 19:37:04.557000+09:00
226,1557139024844,SCREEN_ON,2019-05-06 19:37:04.844000+09:00


In [11]:
import plotly.express as px

fig = px.scatter(x=device_df['datetime'], y=device_df['type'])
fig.show()

In [12]:
device_df['lead'] = device_df['datetime'].shift(-1)
device_df['diff'] = device_df['lead'] - device_df['datetime']
device_df

Unnamed: 0,timestamp,type,datetime,lead,diff
0,1556582474331,SCREEN_ON,2019-04-30 09:01:14.331000+09:00,2019-04-30 09:01:17.667000+09:00,0 days 00:00:03.336000
1,1556582477667,SCREEN_OFF,2019-04-30 09:01:17.667000+09:00,2019-04-30 09:01:39.924000+09:00,0 days 00:00:22.257000
2,1556582499924,SCREEN_ON,2019-04-30 09:01:39.924000+09:00,2019-04-30 09:01:39.968000+09:00,0 days 00:00:00.044000
3,1556582499968,UNLOCK,2019-04-30 09:01:39.968000+09:00,2019-04-30 09:02:36.601000+09:00,0 days 00:00:56.633000
4,1556582556601,SCREEN_OFF,2019-04-30 09:02:36.601000+09:00,2019-04-30 09:05:08.136000+09:00,0 days 00:02:31.535000
...,...,...,...,...,...
223,1557138793804,UNLOCK,2019-05-06 19:33:13.804000+09:00,2019-05-06 19:33:48.823000+09:00,0 days 00:00:35.019000
224,1557138828823,SCREEN_OFF,2019-05-06 19:33:48.823000+09:00,2019-05-06 19:37:04.557000+09:00,0 days 00:03:15.734000
225,1557139024557,UNLOCK,2019-05-06 19:37:04.557000+09:00,2019-05-06 19:37:04.844000+09:00,0 days 00:00:00.287000
226,1557139024844,SCREEN_ON,2019-05-06 19:37:04.844000+09:00,2019-05-06 19:37:22.718000+09:00,0 days 00:00:17.874000


In [67]:
# Sleep: no device event during 4 hours
sleep_df = device_df[device_df['diff'].dt.seconds >= 4 * 60 * 60] \
                    [['timestamp', 'datetime', 'lead', 'diff']] \
                    .rename(columns={'datetime': 'start_at', 'lead': 'end_at'})
sleep_df['start_at'] = sleep_df['start_at'].dt.round('15min')
sleep_df['end_at'] = sleep_df['end_at'].dt.round('15min')
sleep_df['routine'] = 'SLEEP'
sleep_df['user_id'] = USER_ID
sleep_df

Unnamed: 0,timestamp,start_at,end_at,diff,routine,user_id
707,1556646859337,2019-05-01 03:00:00+09:00,2019-05-01 08:15:00+09:00,0 days 05:25:43.514000,SLEEP,P3029
659,1556736514174,2019-05-02 03:45:00+09:00,2019-05-02 08:15:00+09:00,0 days 04:26:47.946000,SLEEP,P3029
1162,1556818724716,2019-05-03 02:45:00+09:00,2019-05-03 08:45:00+09:00,0 days 06:01:17.781000,SLEEP,P3029
440,1556909254829,2019-05-04 03:45:00+09:00,2019-05-04 09:15:00+09:00,0 days 05:22:39.026000,SLEEP,P3029
605,1556993208054,2019-05-05 03:00:00+09:00,2019-05-05 07:30:00+09:00,0 days 04:29:18.201000,SLEEP,P3029
634,1557081834574,2019-05-06 03:45:00+09:00,2019-05-06 12:00:00+09:00,0 days 08:11:30.170000,SLEEP,P3029


In [68]:
sleep_df.to_csv(f'../csv/routines_raw/{USER_ID}-sleep.csv', index=False)