# Creating the daily dataframe

In [71]:
import json
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
from datetime import datetime
import preprocessing_functions
from pymongo import MongoClient
warnings.filterwarnings("ignore")
from dataprep.eda import create_report
import feature_engineering_functions

In [72]:
# load fitbit data
fitbit = pd.read_pickle('../data/daily_fitbit_df_unprocessed.pkl')

---------------------------------------------------------
Fitbit Basic Preprocessing
---------------------------------------------------------
1. Select the experiment days
2. Drop duplicates
3. Convert data types falsely described as categorical

In [73]:
fitbit = preprocessing_functions.fitbit_basic_preprocessing(fitbit)
fitbit.columns

Index(['id', 'date', 'nightly_temperature', 'nremhr', 'rmssd', 'spo2',
       'full_sleep_breathing_rate', 'stress_score', 'sleep_points_percentage',
       'exertion_points_percentage', 'responsiveness_points_percentage',
       'daily_temperature_variation', 'badgeType', 'calories',
       'filteredDemographicVO2Max', 'distance', 'activityType', 'bpm',
       'lightly_active_minutes', 'moderately_active_minutes',
       'very_active_minutes', 'sedentary_minutes', 'mindfulness_session',
       'scl_avg', 'resting_hr', 'sleep_duration', 'minutesToFallAsleep',
       'minutesAsleep', 'minutesAwake', 'minutesAfterWakeup',
       'sleep_efficiency', 'sleep_deep_ratio', 'sleep_wake_ratio',
       'sleep_light_ratio', 'sleep_rem_ratio', 'steps',
       'minutes_in_default_zone_1', 'minutes_below_default_zone_1',
       'minutes_in_default_zone_2', 'minutes_in_default_zone_3', 'age',
       'gender', 'bmi', 'heart_rate_alert'],
      dtype='object')

In [74]:
fitbit["date"] = pd.to_datetime(pd.to_datetime(fitbit["date"]).dt.date)
fitbit = fitbit.sort_values(by='date', ascending=True)
fitbit.head()

Unnamed: 0,id,date,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,badgeType,calories,filteredDemographicVO2Max,distance,activityType,bpm,lightly_active_minutes,moderately_active_minutes,very_active_minutes,sedentary_minutes,mindfulness_session,scl_avg,resting_hr,sleep_duration,minutesToFallAsleep,minutesAsleep,minutesAwake,minutesAfterWakeup,sleep_efficiency,sleep_deep_ratio,sleep_wake_ratio,sleep_light_ratio,sleep_rem_ratio,steps,minutes_in_default_zone_1,minutes_below_default_zone_1,minutes_in_default_zone_2,minutes_in_default_zone_3,age,gender,bmi,heart_rate_alert
0,621e2e8e67b776a24055b564,2021-05-24,34.137687,57.432,89.603,,14.8,78.0,0.833333,0.675,0.866667,-1.788325,,2351.59,62.7921,6517.5,[Walk],71.701565,149.0,24.0,33.0,713.0,False,,62.07307,31260000.0,0.0,445.0,76.0,0.0,93.0,1.243243,0.987013,0.921642,1.341772,8833.0,83.0,1349.0,0.0,0.0,<30,MALE,<19,
24,621e328667b776a240281372,2021-05-24,,,,,,,,,,,,2619.85,41.38447,6568.9,[Walk],83.919698,132.0,7.0,30.0,1271.0,False,,,,,,,,,,,,,8550.0,278.0,766.0,29.0,1.0,>=30,MALE,>=30,
25,621e326767b776a24012e179,2021-05-24,,46.12,53.968,95.3,14.4,80.0,0.666667,0.925,0.766667,-4.129593,,2204.5,52.76058,11283.7,[Walk],68.275766,278.0,41.0,67.0,616.0,False,,52.516339,,,,,,,,,,,16992.0,131.0,1225.0,0.0,0.0,<30,FEMALE,<19,
26,621e332267b776a24092a584,2021-05-24,,,,,,,,,,,,3792.13,53.53183,16776.8,[Walk],78.842893,190.0,79.0,108.0,1063.0,False,,60.977217,,,,,,,,,,,21284.0,175.0,1130.0,9.0,0.0,<30,MALE,21.0,
27,621e333567b776a240a0c217,2021-05-24,,,,,,,,,,,[LIFETIME_DISTANCE],,,,,,,,,,False,,,,,,,,,,,,,,,,,,<30,MALE,21.0,


In [75]:
# read intra-day data from Mongo
fitbit = preprocessing_functions.fitbit_intraday_sleep(fitbit, 'rais')
fitbit.head(200)

Unnamed: 0,id,date,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,badgeType,calories,filteredDemographicVO2Max,distance,activityType,bpm,lightly_active_minutes,moderately_active_minutes,very_active_minutes,sedentary_minutes,mindfulness_session,scl_avg,resting_hr,sleep_duration,minutesToFallAsleep,minutesAsleep,minutesAwake,minutesAfterWakeup,sleep_efficiency,sleep_deep_ratio,sleep_wake_ratio,sleep_light_ratio,sleep_rem_ratio,steps,minutes_in_default_zone_1,minutes_below_default_zone_1,minutes_in_default_zone_2,minutes_in_default_zone_3,age,gender,bmi,heart_rate_alert,startTime,endTime
0,621e2e8e67b776a24055b564,2021-05-24,34.137687,57.432,89.603,,14.8,78.0,0.833333,0.675,0.866667,-1.788325,,2351.59,62.7921,6517.5,[Walk],71.701565,149.0,24.0,33.0,713.0,False,,62.07307,31260000.0,0.0,445.0,76.0,0.0,93.0,1.243243,0.987013,0.921642,1.341772,8833.0,83.0,1349.0,0.0,0.0,<30,MALE,<19,,2021-05-24T00:40:00.000,2021-05-24T09:21:00.000
1,621e328667b776a240281372,2021-05-24,,,,,,,,,,,,2619.85,41.38447,6568.9,[Walk],83.919698,132.0,7.0,30.0,1271.0,False,,,,,,,,,,,,,8550.0,278.0,766.0,29.0,1.0,>=30,MALE,>=30,,,
2,621e326767b776a24012e179,2021-05-24,,46.12,53.968,95.3,14.4,80.0,0.666667,0.925,0.766667,-4.129593,,2204.5,52.76058,11283.7,[Walk],68.275766,278.0,41.0,67.0,616.0,False,,52.516339,,,,,,,,,,,16992.0,131.0,1225.0,0.0,0.0,<30,FEMALE,<19,,,
3,621e332267b776a24092a584,2021-05-24,,,,,,,,,,,,3792.13,53.53183,16776.8,[Walk],78.842893,190.0,79.0,108.0,1063.0,False,,60.977217,,,,,,,,,,,21284.0,175.0,1130.0,9.0,0.0,<30,MALE,21.0,,,
4,621e333567b776a240a0c217,2021-05-24,,,,,,,,,,,[LIFETIME_DISTANCE],,,,,,,,,,False,,,,,,,,,,,,,,,,,,<30,MALE,21.0,,,
5,621e335a67b776a240bb12ff,2021-05-24,,,,,,,,,,-2.667528,,1745.64,43.03988,5574.0,,80.076435,234.0,11.0,13.0,682.0,False,,72.108105,,,,,,,,,,,8073.0,170.0,1256.0,12.0,2.0,>=30,FEMALE,19.0,,,
6,621e337667b776a240ce78ab,2021-05-24,33.662874,52.927,39.939,95.5,11.2,60.0,0.633333,0.55,0.633333,-1.160651,,2499.37,53.73803,5047.6,[Walk],73.011712,185.0,26.0,0.0,754.0,False,,60.863542,28500000.0,0.0,393.0,82.0,0.0,93.0,0.434211,1.093333,0.983108,0.663462,6858.0,6.0,1434.0,0.0,0.0,<30,MALE,21.0,,2021-05-24T00:26:30.000,2021-05-24T08:21:30.000
7,621e2ed667b776a24085d8d1,2021-05-24,,,,,,,,,,,,1398.73,,454.2,,86.172061,46.0,0.0,0.0,1394.0,False,,78.333219,,,,,,,,,,,686.0,65.0,381.0,0.0,0.0,<30,FEMALE,23.0,,,
8,621e314867b776a24029ebf9,2021-05-24,,,,,,,,,,-3.102195,,2559.16,55.36342,5488.0,,55.48961,94.0,12.0,54.0,838.0,False,,44.183477,,,,,,,,,,,7436.0,51.0,1389.0,0.0,0.0,<30,MALE,24.0,,,
9,621e329067b776a2402ffad2,2021-05-24,,,,,,85.0,0.833333,0.925,0.766667,-0.783995,,3420.41,47.75426,11812.2,[Walk],92.222049,265.0,51.0,84.0,579.0,False,,74.941883,,,,,,,,,,,16469.0,705.0,723.0,11.0,0.0,>=30,MALE,23.0,,,


-----------------------
# Merge all self-reported data with the fitbit data constructing a unified dataframe


# ttm

In [76]:
ttm = pd.read_pickle('../data/surveys/ttm_classification.pkl')
ttm = ttm.rename(columns = {"stage": "label_ttm_stage"})
ttm["date"] = pd.to_datetime(pd.to_datetime(ttm["date"]).dt.date)
ttm = ttm.sort_values(by='date', ascending=True)
ttm.drop(columns=['date'], inplace=True)
ttm.head(100)

Unnamed: 0,id,label_ttm_stage
0,621e314867b776a24029ebf9,Preparation
1,621e36c267b776a240ba2756,Action
2,621e328667b776a240281372,Maintenance
3,621e2f3967b776a240c654db,Preparation
4,621e32d967b776a240627414,Maintenance
5,621e34ec67b776a240d60873,Maintenance
6,621e332267b776a24092a584,Maintenance
7,621e326767b776a24012e179,Maintenance
8,621e375b67b776a240290cdc,Action
9,621e34db67b776a240c9c2be,Preparation


In [77]:
data = fitbit.merge(ttm, how='left', on=['id'])
data

Unnamed: 0,id,date,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,badgeType,calories,filteredDemographicVO2Max,distance,activityType,bpm,lightly_active_minutes,moderately_active_minutes,very_active_minutes,sedentary_minutes,mindfulness_session,scl_avg,resting_hr,sleep_duration,minutesToFallAsleep,minutesAsleep,minutesAwake,minutesAfterWakeup,sleep_efficiency,sleep_deep_ratio,sleep_wake_ratio,sleep_light_ratio,sleep_rem_ratio,steps,minutes_in_default_zone_1,minutes_below_default_zone_1,minutes_in_default_zone_2,minutes_in_default_zone_3,age,gender,bmi,heart_rate_alert,startTime,endTime,label_ttm_stage
0,621e2e8e67b776a24055b564,2021-05-24,34.137687,57.432,89.603,,14.8,78.0,0.833333,0.675,0.866667,-1.788325,,2351.59,62.79210,6517.5,[Walk],71.701565,149.0,24.0,33.0,713.0,False,,62.073070,31260000.0,0.0,445.0,76.0,0.0,93.0,1.243243,0.987013,0.921642,1.341772,8833.0,83.0,1349.0,0.0,0.0,<30,MALE,<19,,2021-05-24T00:40:00.000,2021-05-24T09:21:00.000,Maintenance
1,621e328667b776a240281372,2021-05-24,,,,,,,,,,,,2619.85,41.38447,6568.9,[Walk],83.919698,132.0,7.0,30.0,1271.0,False,,,,,,,,,,,,,8550.0,278.0,766.0,29.0,1.0,>=30,MALE,>=30,,,,Maintenance
2,621e326767b776a24012e179,2021-05-24,,46.120,53.968,95.3,14.4,80.0,0.666667,0.925,0.766667,-4.129593,,2204.50,52.76058,11283.7,[Walk],68.275766,278.0,41.0,67.0,616.0,False,,52.516339,,,,,,,,,,,16992.0,131.0,1225.0,0.0,0.0,<30,FEMALE,<19,,,,Maintenance
3,621e332267b776a24092a584,2021-05-24,,,,,,,,,,,,3792.13,53.53183,16776.8,[Walk],78.842893,190.0,79.0,108.0,1063.0,False,,60.977217,,,,,,,,,,,21284.0,175.0,1130.0,9.0,0.0,<30,MALE,21.0,,,,Maintenance
4,621e333567b776a240a0c217,2021-05-24,,,,,,,,,,,[LIFETIME_DISTANCE],,,,,,,,,,False,,,,,,,,,,,,,,,,,,<30,MALE,21.0,,,,Contemplation
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5022,621e30b267b776a240c5e13f,2022-01-17,,,,,,,,,,,,1554.15,42.17696,209.4,,89.131283,33.0,0.0,0.0,1407.0,False,,,,,,,,,,,,,287.0,19.0,126.0,0.0,0.0,<30,FEMALE,21.0,,,,
5023,621e312a67b776a240164d59,2022-01-17,34.237581,,,,,,,,,-0.165995,[DAILY_FLOORS],1602.67,47.22494,3806.9,,67.473381,125.0,27.0,5.0,116.0,False,,58.157746,25800000.0,0.0,379.0,51.0,1.0,94.0,0.835443,0.728571,0.796667,0.902439,4966.0,114.0,563.0,0.0,0.0,>=30,MALE,25.0,,2022-01-17T00:45:00.000,2022-01-17T07:55:00.000,Contemplation
5024,621e309b67b776a240b532b0,2022-01-17,31.815133,,,,,,,,,-2.999818,[DAILY_FLOORS],2887.52,49.82090,9000.0,"[Sport, Walk]",74.452687,164.0,37.0,63.0,875.0,False,,64.007904,18060000.0,0.0,276.0,25.0,0.0,99.0,0.708333,0.609756,0.714286,1.066667,12096.0,129.0,1208.0,6.0,0.0,>=30,MALE,23.0,,2022-01-17T00:56:30.000,2022-01-17T05:57:30.000,
5025,621e333567b776a240a0c217,2022-01-17,,,,,,,,,,,,1569.60,,,,,0.0,0.0,0.0,1440.0,False,,,,,,,,,,,,,,,,,,<30,MALE,21.0,,,,Contemplation


# breq

In [78]:
breq = pd.read_pickle('../data/surveys/breq.pkl')
breq = breq.rename(columns = {"user_id": "id", "submitdate":"date", "breq_self_determination":"label_breq_self_determination"})
breq["date"] = pd.to_datetime(pd.to_datetime(breq["date"]).dt.date)
breq = breq.sort_values(by='date', ascending=True)
breq.drop_duplicates(subset='id', keep='last', inplace=True)  # keep only one record per user
breq = breq.drop(columns=['date', 'type','breq_amotivation', 'breq_external_regulation', 'breq_introjected_regulation', 'breq_identified_regulation', 'breq_intrinsic_regulation'])
breq.head(100)

Unnamed: 0,id,label_breq_self_determination
0,621e314867b776a24029ebf9,identified_regulation
0,621e328667b776a240281372,intrinsic_regulation
0,621e32d967b776a240627414,identified_regulation
0,621e36c267b776a240ba2756,intrinsic_regulation
0,621e2f3967b776a240c654db,intrinsic_regulation
0,621e332267b776a24092a584,identified_regulation
0,621e34ec67b776a240d60873,identified_regulation
1,621e375b67b776a240290cdc,identified_regulation
1,621e34db67b776a240c9c2be,intrinsic_regulation
1,621e36f967b776a240e5e7c9,introjected_regulation


In [79]:
data = data.merge(breq, how='left', on=['id'])
data.head()

Unnamed: 0,id,date,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,badgeType,calories,filteredDemographicVO2Max,distance,activityType,bpm,lightly_active_minutes,moderately_active_minutes,very_active_minutes,sedentary_minutes,mindfulness_session,scl_avg,resting_hr,sleep_duration,minutesToFallAsleep,minutesAsleep,minutesAwake,minutesAfterWakeup,sleep_efficiency,sleep_deep_ratio,sleep_wake_ratio,sleep_light_ratio,sleep_rem_ratio,steps,minutes_in_default_zone_1,minutes_below_default_zone_1,minutes_in_default_zone_2,minutes_in_default_zone_3,age,gender,bmi,heart_rate_alert,startTime,endTime,label_ttm_stage,label_breq_self_determination
0,621e2e8e67b776a24055b564,2021-05-24,34.137687,57.432,89.603,,14.8,78.0,0.833333,0.675,0.866667,-1.788325,,2351.59,62.7921,6517.5,[Walk],71.701565,149.0,24.0,33.0,713.0,False,,62.07307,31260000.0,0.0,445.0,76.0,0.0,93.0,1.243243,0.987013,0.921642,1.341772,8833.0,83.0,1349.0,0.0,0.0,<30,MALE,<19,,2021-05-24T00:40:00.000,2021-05-24T09:21:00.000,Maintenance,intrinsic_regulation
1,621e328667b776a240281372,2021-05-24,,,,,,,,,,,,2619.85,41.38447,6568.9,[Walk],83.919698,132.0,7.0,30.0,1271.0,False,,,,,,,,,,,,,8550.0,278.0,766.0,29.0,1.0,>=30,MALE,>=30,,,,Maintenance,intrinsic_regulation
2,621e326767b776a24012e179,2021-05-24,,46.12,53.968,95.3,14.4,80.0,0.666667,0.925,0.766667,-4.129593,,2204.5,52.76058,11283.7,[Walk],68.275766,278.0,41.0,67.0,616.0,False,,52.516339,,,,,,,,,,,16992.0,131.0,1225.0,0.0,0.0,<30,FEMALE,<19,,,,Maintenance,
3,621e332267b776a24092a584,2021-05-24,,,,,,,,,,,,3792.13,53.53183,16776.8,[Walk],78.842893,190.0,79.0,108.0,1063.0,False,,60.977217,,,,,,,,,,,21284.0,175.0,1130.0,9.0,0.0,<30,MALE,21.0,,,,Maintenance,identified_regulation
4,621e333567b776a240a0c217,2021-05-24,,,,,,,,,,,[LIFETIME_DISTANCE],,,,,,,,,,False,,,,,,,,,,,,,,,,,,<30,MALE,21.0,,,,Contemplation,intrinsic_regulation


# sema

In [80]:
sema = pd.read_pickle('../data/semas/semas_read_from_the_base_experiment_dates.pkl')
sema = preprocessing_functions.sema_basic_preprocessing(sema)
sema["date"] = pd.to_datetime(pd.to_datetime(sema["date"]).dt.date)
sema = sema.sort_values(by='date', ascending=True)
sema = sema.drop(columns='positive_feelings')
sema = sema.rename(columns = {"negative_feelings": "label_sema_negative_feelings"})
sema.head()

Unnamed: 0,id,date,label_sema_negative_feelings
0,621e2f3967b776a240c654db,2021-05-24,0
11,621e362467b776a2404ad513,2021-05-24,0
10,621e301e67b776a240608a72,2021-05-24,0
9,621e30e467b776a240e817c7,2021-05-24,1
8,621e36f967b776a240e5e7c9,2021-05-24,1


In [81]:
data = data.merge(sema, how='left', on=['id','date'])
data.head()

Unnamed: 0,id,date,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,badgeType,calories,filteredDemographicVO2Max,distance,activityType,bpm,lightly_active_minutes,moderately_active_minutes,very_active_minutes,sedentary_minutes,mindfulness_session,scl_avg,resting_hr,sleep_duration,minutesToFallAsleep,minutesAsleep,minutesAwake,minutesAfterWakeup,sleep_efficiency,sleep_deep_ratio,sleep_wake_ratio,sleep_light_ratio,sleep_rem_ratio,steps,minutes_in_default_zone_1,minutes_below_default_zone_1,minutes_in_default_zone_2,minutes_in_default_zone_3,age,gender,bmi,heart_rate_alert,startTime,endTime,label_ttm_stage,label_breq_self_determination,label_sema_negative_feelings
0,621e2e8e67b776a24055b564,2021-05-24,34.137687,57.432,89.603,,14.8,78.0,0.833333,0.675,0.866667,-1.788325,,2351.59,62.7921,6517.5,[Walk],71.701565,149.0,24.0,33.0,713.0,False,,62.07307,31260000.0,0.0,445.0,76.0,0.0,93.0,1.243243,0.987013,0.921642,1.341772,8833.0,83.0,1349.0,0.0,0.0,<30,MALE,<19,,2021-05-24T00:40:00.000,2021-05-24T09:21:00.000,Maintenance,intrinsic_regulation,
1,621e328667b776a240281372,2021-05-24,,,,,,,,,,,,2619.85,41.38447,6568.9,[Walk],83.919698,132.0,7.0,30.0,1271.0,False,,,,,,,,,,,,,8550.0,278.0,766.0,29.0,1.0,>=30,MALE,>=30,,,,Maintenance,intrinsic_regulation,
2,621e326767b776a24012e179,2021-05-24,,46.12,53.968,95.3,14.4,80.0,0.666667,0.925,0.766667,-4.129593,,2204.5,52.76058,11283.7,[Walk],68.275766,278.0,41.0,67.0,616.0,False,,52.516339,,,,,,,,,,,16992.0,131.0,1225.0,0.0,0.0,<30,FEMALE,<19,,,,Maintenance,,
3,621e332267b776a24092a584,2021-05-24,,,,,,,,,,,,3792.13,53.53183,16776.8,[Walk],78.842893,190.0,79.0,108.0,1063.0,False,,60.977217,,,,,,,,,,,21284.0,175.0,1130.0,9.0,0.0,<30,MALE,21.0,,,,Maintenance,identified_regulation,
4,621e333567b776a240a0c217,2021-05-24,,,,,,,,,,,[LIFETIME_DISTANCE],,,,,,,,,,False,,,,,,,,,,,,,,,,,,<30,MALE,21.0,,,,Contemplation,intrinsic_regulation,


# personality

In [82]:
big5 = pd.read_pickle('../data/surveys/personality.pkl')
big5 = big5.rename(columns={"user_id": "id", "submitdate": "date", "ipip_extraversion_category":"label_ipip_extraversion_category",
                            "ipip_agreeableness_category":"label_ipip_agreeableness_category", "ipip_conscientiousness_category":"label_ipip_conscientiousness_category",
                           "ipip_stability_category":"label_ipip_stability_category", "ipip_intellect_category":"label_ipip_intellect_category"})
big5["date"] = pd.to_datetime(pd.to_datetime(big5["date"]).dt.date)
big5 = big5.sort_values(by='date', ascending=True)
big5.drop_duplicates(subset='id', keep='last', inplace=True)
big5 = big5.drop(columns=['date', 'type','extraversion', 'agreeableness', 'conscientiousness', 'stability', 'intellect',  'gender'])
big5.head(100)

Unnamed: 0,id,label_ipip_extraversion_category,label_ipip_agreeableness_category,label_ipip_conscientiousness_category,label_ipip_stability_category,label_ipip_intellect_category
0,621e2e8e67b776a24055b564,LOW,LOW,HIGH,HIGH,AVERAGE
48,621e36c267b776a240ba2756,HIGH,AVERAGE,LOW,HIGH,LOW
39,621e34db67b776a240c9c2be,AVERAGE,HIGH,AVERAGE,HIGH,HIGH
3,621e2f3967b776a240c654db,AVERAGE,AVERAGE,LOW,HIGH,AVERAGE
37,621e341067b776a24037b105,LOW,LOW,LOW,LOW,LOW
5,621e2f7a67b776a240f14425,AVERAGE,AVERAGE,HIGH,HIGH,AVERAGE
30,621e335a67b776a240bb12ff,LOW,LOW,HIGH,AVERAGE,LOW
7,621e2fb367b776a24015accd,HIGH,AVERAGE,LOW,LOW,HIGH
8,621e2fce67b776a240279baa,HIGH,HIGH,AVERAGE,LOW,AVERAGE
25,621e32d967b776a240627414,HIGH,AVERAGE,HIGH,AVERAGE,HIGH


In [83]:
data = data.merge(big5, how='left', on=['id'])
data.head()

Unnamed: 0,id,date,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,badgeType,calories,filteredDemographicVO2Max,distance,activityType,bpm,lightly_active_minutes,moderately_active_minutes,very_active_minutes,sedentary_minutes,mindfulness_session,scl_avg,resting_hr,sleep_duration,minutesToFallAsleep,minutesAsleep,minutesAwake,minutesAfterWakeup,sleep_efficiency,sleep_deep_ratio,sleep_wake_ratio,sleep_light_ratio,sleep_rem_ratio,steps,minutes_in_default_zone_1,minutes_below_default_zone_1,minutes_in_default_zone_2,minutes_in_default_zone_3,age,gender,bmi,heart_rate_alert,startTime,endTime,label_ttm_stage,label_breq_self_determination,label_sema_negative_feelings,label_ipip_extraversion_category,label_ipip_agreeableness_category,label_ipip_conscientiousness_category,label_ipip_stability_category,label_ipip_intellect_category
0,621e2e8e67b776a24055b564,2021-05-24,34.137687,57.432,89.603,,14.8,78.0,0.833333,0.675,0.866667,-1.788325,,2351.59,62.7921,6517.5,[Walk],71.701565,149.0,24.0,33.0,713.0,False,,62.07307,31260000.0,0.0,445.0,76.0,0.0,93.0,1.243243,0.987013,0.921642,1.341772,8833.0,83.0,1349.0,0.0,0.0,<30,MALE,<19,,2021-05-24T00:40:00.000,2021-05-24T09:21:00.000,Maintenance,intrinsic_regulation,,LOW,LOW,HIGH,HIGH,AVERAGE
1,621e328667b776a240281372,2021-05-24,,,,,,,,,,,,2619.85,41.38447,6568.9,[Walk],83.919698,132.0,7.0,30.0,1271.0,False,,,,,,,,,,,,,8550.0,278.0,766.0,29.0,1.0,>=30,MALE,>=30,,,,Maintenance,intrinsic_regulation,,HIGH,AVERAGE,HIGH,LOW,LOW
2,621e326767b776a24012e179,2021-05-24,,46.12,53.968,95.3,14.4,80.0,0.666667,0.925,0.766667,-4.129593,,2204.5,52.76058,11283.7,[Walk],68.275766,278.0,41.0,67.0,616.0,False,,52.516339,,,,,,,,,,,16992.0,131.0,1225.0,0.0,0.0,<30,FEMALE,<19,,,,Maintenance,,,,,,,
3,621e332267b776a24092a584,2021-05-24,,,,,,,,,,,,3792.13,53.53183,16776.8,[Walk],78.842893,190.0,79.0,108.0,1063.0,False,,60.977217,,,,,,,,,,,21284.0,175.0,1130.0,9.0,0.0,<30,MALE,21.0,,,,Maintenance,identified_regulation,,LOW,LOW,LOW,AVERAGE,HIGH
4,621e333567b776a240a0c217,2021-05-24,,,,,,,,,,,[LIFETIME_DISTANCE],,,,,,,,,,False,,,,,,,,,,,,,,,,,,<30,MALE,21.0,,,,Contemplation,intrinsic_regulation,,AVERAGE,HIGH,AVERAGE,LOW,LOW


# stai

In [84]:
stai = pd.read_pickle('../data/surveys/stai.pkl')
stai = stai.rename(columns={"user_id": "id", "submitdate": "date", "stai_stress_category":"label_stai_stress_category"})
stai["date"] = pd.to_datetime(pd.to_datetime(stai["date"]).dt.date)
stai = stai.sort_values(by='date', ascending=True)
stai = stai.drop(columns=['type', 'stai_stress'])
stai.head()

Unnamed: 0,id,date,label_stai_stress_category
0,621e2e8e67b776a24055b564,2021-05-31,Below average
0,621e328667b776a240281372,2021-05-31,Average
0,621e329067b776a2402ffad2,2021-05-31,Above average
3,621e30e467b776a240e817c7,2021-05-31,Above average
4,621e32af67b776a24045b4cf,2021-05-31,Above average


In [85]:
data = data.merge(stai, how='left', on=['id','date'])
data.head()

Unnamed: 0,id,date,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,badgeType,calories,filteredDemographicVO2Max,distance,activityType,bpm,lightly_active_minutes,moderately_active_minutes,very_active_minutes,sedentary_minutes,mindfulness_session,scl_avg,resting_hr,sleep_duration,minutesToFallAsleep,minutesAsleep,minutesAwake,minutesAfterWakeup,sleep_efficiency,sleep_deep_ratio,sleep_wake_ratio,sleep_light_ratio,sleep_rem_ratio,steps,minutes_in_default_zone_1,minutes_below_default_zone_1,minutes_in_default_zone_2,minutes_in_default_zone_3,age,gender,bmi,heart_rate_alert,startTime,endTime,label_ttm_stage,label_breq_self_determination,label_sema_negative_feelings,label_ipip_extraversion_category,label_ipip_agreeableness_category,label_ipip_conscientiousness_category,label_ipip_stability_category,label_ipip_intellect_category,label_stai_stress_category
0,621e2e8e67b776a24055b564,2021-05-24,34.137687,57.432,89.603,,14.8,78.0,0.833333,0.675,0.866667,-1.788325,,2351.59,62.7921,6517.5,[Walk],71.701565,149.0,24.0,33.0,713.0,False,,62.07307,31260000.0,0.0,445.0,76.0,0.0,93.0,1.243243,0.987013,0.921642,1.341772,8833.0,83.0,1349.0,0.0,0.0,<30,MALE,<19,,2021-05-24T00:40:00.000,2021-05-24T09:21:00.000,Maintenance,intrinsic_regulation,,LOW,LOW,HIGH,HIGH,AVERAGE,
1,621e328667b776a240281372,2021-05-24,,,,,,,,,,,,2619.85,41.38447,6568.9,[Walk],83.919698,132.0,7.0,30.0,1271.0,False,,,,,,,,,,,,,8550.0,278.0,766.0,29.0,1.0,>=30,MALE,>=30,,,,Maintenance,intrinsic_regulation,,HIGH,AVERAGE,HIGH,LOW,LOW,
2,621e326767b776a24012e179,2021-05-24,,46.12,53.968,95.3,14.4,80.0,0.666667,0.925,0.766667,-4.129593,,2204.5,52.76058,11283.7,[Walk],68.275766,278.0,41.0,67.0,616.0,False,,52.516339,,,,,,,,,,,16992.0,131.0,1225.0,0.0,0.0,<30,FEMALE,<19,,,,Maintenance,,,,,,,,
3,621e332267b776a24092a584,2021-05-24,,,,,,,,,,,,3792.13,53.53183,16776.8,[Walk],78.842893,190.0,79.0,108.0,1063.0,False,,60.977217,,,,,,,,,,,21284.0,175.0,1130.0,9.0,0.0,<30,MALE,21.0,,,,Maintenance,identified_regulation,,LOW,LOW,LOW,AVERAGE,HIGH,
4,621e333567b776a240a0c217,2021-05-24,,,,,,,,,,,[LIFETIME_DISTANCE],,,,,,,,,,False,,,,,,,,,,,,,,,,,,<30,MALE,21.0,,,,Contemplation,intrinsic_regulation,,AVERAGE,HIGH,AVERAGE,LOW,LOW,


# panas

In [86]:
panas = pd.read_pickle('../data/surveys/panas_classification.pkl')
panas = panas.rename(columns={"user_id": "id", "submitdate": "date", "negative_affect_category":"label_panas_negative_affect"})
panas["date"] = pd.to_datetime(pd.to_datetime(panas["date"]).dt.date)
panas = panas.sort_values(by='date', ascending=True)
panas.head()

Unnamed: 0,id,date,label_panas_negative_affect
0,621e329067b776a2402ffad2,2021-05-31,Average
0,621e328667b776a240281372,2021-05-31,Above average
4,621e32af67b776a24045b4cf,2021-05-31,Above average
6,621e301e67b776a240608a72,2021-05-31,Average
2,621e32d967b776a240627414,2021-05-31,Average


In [87]:
data = data.merge(panas, how='left', on=['id','date'])
data.head(1000)

Unnamed: 0,id,date,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,badgeType,calories,filteredDemographicVO2Max,distance,activityType,bpm,lightly_active_minutes,moderately_active_minutes,very_active_minutes,sedentary_minutes,mindfulness_session,scl_avg,resting_hr,sleep_duration,minutesToFallAsleep,minutesAsleep,minutesAwake,minutesAfterWakeup,sleep_efficiency,sleep_deep_ratio,sleep_wake_ratio,sleep_light_ratio,sleep_rem_ratio,steps,minutes_in_default_zone_1,minutes_below_default_zone_1,minutes_in_default_zone_2,minutes_in_default_zone_3,age,gender,bmi,heart_rate_alert,startTime,endTime,label_ttm_stage,label_breq_self_determination,label_sema_negative_feelings,label_ipip_extraversion_category,label_ipip_agreeableness_category,label_ipip_conscientiousness_category,label_ipip_stability_category,label_ipip_intellect_category,label_stai_stress_category,label_panas_negative_affect
0,621e2e8e67b776a24055b564,2021-05-24,34.137687,57.432,89.603,,14.8,78.0,0.833333,0.675,0.866667,-1.788325,,2351.59,62.79210,6517.5,[Walk],71.701565,149.0,24.0,33.0,713.0,False,,62.073070,31260000.0,0.0,445.0,76.0,0.0,93.0,1.243243,0.987013,0.921642,1.341772,8833.0,83.0,1349.0,0.0,0.0,<30,MALE,<19,,2021-05-24T00:40:00.000,2021-05-24T09:21:00.000,Maintenance,intrinsic_regulation,,LOW,LOW,HIGH,HIGH,AVERAGE,,
1,621e328667b776a240281372,2021-05-24,,,,,,,,,,,,2619.85,41.38447,6568.9,[Walk],83.919698,132.0,7.0,30.0,1271.0,False,,,,,,,,,,,,,8550.0,278.0,766.0,29.0,1.0,>=30,MALE,>=30,,,,Maintenance,intrinsic_regulation,,HIGH,AVERAGE,HIGH,LOW,LOW,,
2,621e326767b776a24012e179,2021-05-24,,46.120,53.968,95.3,14.4,80.0,0.666667,0.925,0.766667,-4.129593,,2204.50,52.76058,11283.7,[Walk],68.275766,278.0,41.0,67.0,616.0,False,,52.516339,,,,,,,,,,,16992.0,131.0,1225.0,0.0,0.0,<30,FEMALE,<19,,,,Maintenance,,,,,,,,,
3,621e332267b776a24092a584,2021-05-24,,,,,,,,,,,,3792.13,53.53183,16776.8,[Walk],78.842893,190.0,79.0,108.0,1063.0,False,,60.977217,,,,,,,,,,,21284.0,175.0,1130.0,9.0,0.0,<30,MALE,21.0,,,,Maintenance,identified_regulation,,LOW,LOW,LOW,AVERAGE,HIGH,,
4,621e333567b776a240a0c217,2021-05-24,,,,,,,,,,,[LIFETIME_DISTANCE],,,,,,,,,,False,,,,,,,,,,,,,,,,,,<30,MALE,21.0,,,,Contemplation,intrinsic_regulation,,AVERAGE,HIGH,AVERAGE,LOW,LOW,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,621e2efa67b776a2409dd1c3,2021-06-06,,,,,,,,,,,,2132.79,40.06020,3823.6,,95.635257,293.0,0.0,0.0,1147.0,False,,72.250600,,,,,,,,,,,5459.0,441.0,429.0,0.0,0.0,<30,FEMALE,21.0,,,,,,,,,,,,,
996,621e33ed67b776a2401cf5f7,2021-06-06,33.996583,,,,,,,,,-1.249856,,2298.16,49.76804,1472.1,,64.514724,133.0,0.0,0.0,808.0,False,,55.461127,29940000.0,0.0,445.0,54.0,0.0,94.0,1.009259,0.794118,0.962185,1.175824,2011.0,0.0,1440.0,0.0,0.0,<30,MALE,26.0,,2021-06-06T05:10:30.000,2021-06-06T13:30:00.000,Precontemplation,intrinsic_regulation,0.0,HIGH,HIGH,AVERAGE,HIGH,HIGH,,
997,621e351a67b776a240f6204b,2021-06-06,33.651786,81.510,17.497,,17.4,0.0,0.000000,0.000,0.000000,-0.812610,,2301.46,53.95773,1824.7,"[Walk, Bike]",91.889716,61.0,22.0,44.0,941.0,False,,64.050778,15060000.0,0.0,208.0,43.0,3.0,85.0,0.611111,0.728814,0.668478,0.491803,3843.0,126.0,606.0,19.0,0.0,<30,MALE,22.0,,2021-06-06T03:57:00.000,2021-06-06T08:08:30.000,Action,intrinsic_regulation,0.0,,,,,,,
998,621e351a67b776a240f6204b,2021-06-06,33.651786,81.510,17.497,,17.4,0.0,0.000000,0.000,0.000000,-0.812610,,2301.46,53.95773,1824.7,"[Walk, Bike]",91.889716,61.0,22.0,44.0,941.0,False,,64.050778,15060000.0,0.0,208.0,43.0,3.0,85.0,0.611111,0.728814,0.668478,0.491803,3843.0,126.0,606.0,19.0,0.0,<30,MALE,22.0,,2021-06-06T03:57:00.000,2021-06-06T08:08:30.000,Action,intrinsic_regulation,0.0,,,,,,,


In [88]:
data.to_pickle("../data/unified_dataframe/data_unprocessed.pkl")

----------------------------------------------
# Checking preprocessing steps 

In [89]:
data = pd.read_pickle('../data/unified_dataframe/data_unprocessed.pkl')
data

Unnamed: 0,id,date,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,badgeType,calories,filteredDemographicVO2Max,distance,activityType,bpm,lightly_active_minutes,moderately_active_minutes,very_active_minutes,sedentary_minutes,mindfulness_session,scl_avg,resting_hr,sleep_duration,minutesToFallAsleep,minutesAsleep,minutesAwake,minutesAfterWakeup,sleep_efficiency,sleep_deep_ratio,sleep_wake_ratio,sleep_light_ratio,sleep_rem_ratio,steps,minutes_in_default_zone_1,minutes_below_default_zone_1,minutes_in_default_zone_2,minutes_in_default_zone_3,age,gender,bmi,heart_rate_alert,startTime,endTime,label_ttm_stage,label_breq_self_determination,label_sema_negative_feelings,label_ipip_extraversion_category,label_ipip_agreeableness_category,label_ipip_conscientiousness_category,label_ipip_stability_category,label_ipip_intellect_category,label_stai_stress_category,label_panas_negative_affect
0,621e2e8e67b776a24055b564,2021-05-24,34.137687,57.432,89.603,,14.8,78.0,0.833333,0.675,0.866667,-1.788325,,2351.59,62.79210,6517.5,[Walk],71.701565,149.0,24.0,33.0,713.0,False,,62.073070,31260000.0,0.0,445.0,76.0,0.0,93.0,1.243243,0.987013,0.921642,1.341772,8833.0,83.0,1349.0,0.0,0.0,<30,MALE,<19,,2021-05-24T00:40:00.000,2021-05-24T09:21:00.000,Maintenance,intrinsic_regulation,,LOW,LOW,HIGH,HIGH,AVERAGE,,
1,621e328667b776a240281372,2021-05-24,,,,,,,,,,,,2619.85,41.38447,6568.9,[Walk],83.919698,132.0,7.0,30.0,1271.0,False,,,,,,,,,,,,,8550.0,278.0,766.0,29.0,1.0,>=30,MALE,>=30,,,,Maintenance,intrinsic_regulation,,HIGH,AVERAGE,HIGH,LOW,LOW,,
2,621e326767b776a24012e179,2021-05-24,,46.120,53.968,95.3,14.4,80.0,0.666667,0.925,0.766667,-4.129593,,2204.50,52.76058,11283.7,[Walk],68.275766,278.0,41.0,67.0,616.0,False,,52.516339,,,,,,,,,,,16992.0,131.0,1225.0,0.0,0.0,<30,FEMALE,<19,,,,Maintenance,,,,,,,,,
3,621e332267b776a24092a584,2021-05-24,,,,,,,,,,,,3792.13,53.53183,16776.8,[Walk],78.842893,190.0,79.0,108.0,1063.0,False,,60.977217,,,,,,,,,,,21284.0,175.0,1130.0,9.0,0.0,<30,MALE,21.0,,,,Maintenance,identified_regulation,,LOW,LOW,LOW,AVERAGE,HIGH,,
4,621e333567b776a240a0c217,2021-05-24,,,,,,,,,,,[LIFETIME_DISTANCE],,,,,,,,,,False,,,,,,,,,,,,,,,,,,<30,MALE,21.0,,,,Contemplation,intrinsic_regulation,,AVERAGE,HIGH,AVERAGE,LOW,LOW,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7799,621e30b267b776a240c5e13f,2022-01-17,,,,,,,,,,,,1554.15,42.17696,209.4,,89.131283,33.0,0.0,0.0,1407.0,False,,,,,,,,,,,,,287.0,19.0,126.0,0.0,0.0,<30,FEMALE,21.0,,,,,,,,,,,,,
7800,621e312a67b776a240164d59,2022-01-17,34.237581,,,,,,,,,-0.165995,[DAILY_FLOORS],1602.67,47.22494,3806.9,,67.473381,125.0,27.0,5.0,116.0,False,,58.157746,25800000.0,0.0,379.0,51.0,1.0,94.0,0.835443,0.728571,0.796667,0.902439,4966.0,114.0,563.0,0.0,0.0,>=30,MALE,25.0,,2022-01-17T00:45:00.000,2022-01-17T07:55:00.000,Contemplation,intrinsic_regulation,,AVERAGE,LOW,HIGH,AVERAGE,LOW,,
7801,621e309b67b776a240b532b0,2022-01-17,31.815133,,,,,,,,,-2.999818,[DAILY_FLOORS],2887.52,49.82090,9000.0,"[Sport, Walk]",74.452687,164.0,37.0,63.0,875.0,False,,64.007904,18060000.0,0.0,276.0,25.0,0.0,99.0,0.708333,0.609756,0.714286,1.066667,12096.0,129.0,1208.0,6.0,0.0,>=30,MALE,23.0,,2022-01-17T00:56:30.000,2022-01-17T05:57:30.000,,,,,,,,,,
7802,621e333567b776a240a0c217,2022-01-17,,,,,,,,,,,,1569.60,,,,,0.0,0.0,0.0,1440.0,False,,,,,,,,,,,,,,,,,,<30,MALE,21.0,,,,Contemplation,intrinsic_regulation,,AVERAGE,HIGH,AVERAGE,LOW,LOW,,


# date engineering

In [90]:
data = preprocessing_functions.date_engineering(data)
# We do not process the startTime and endTime columns as we only use them for feature engineering later and then drop them completely
data

Unnamed: 0,id,date,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,badgeType,calories,filteredDemographicVO2Max,distance,activityType,bpm,lightly_active_minutes,moderately_active_minutes,very_active_minutes,sedentary_minutes,mindfulness_session,scl_avg,resting_hr,sleep_duration,minutesToFallAsleep,minutesAsleep,minutesAwake,minutesAfterWakeup,sleep_efficiency,sleep_deep_ratio,sleep_wake_ratio,sleep_light_ratio,sleep_rem_ratio,steps,minutes_in_default_zone_1,minutes_below_default_zone_1,minutes_in_default_zone_2,minutes_in_default_zone_3,age,gender,bmi,heart_rate_alert,startTime,endTime,label_ttm_stage,label_breq_self_determination,label_sema_negative_feelings,label_ipip_extraversion_category,label_ipip_agreeableness_category,label_ipip_conscientiousness_category,label_ipip_stability_category,label_ipip_intellect_category,label_stai_stress_category,label_panas_negative_affect,month_sin,weekday_sin,week_sin,day_sin,month_cos,weekday_cos,week_cos,day_cos
0,621e2e8e67b776a24055b564,2021-05-24,34.137687,57.432,89.603,,14.8,78.0,0.833333,0.675,0.866667,-1.788325,,2351.59,62.79210,6517.5,[Walk],71.701565,149.0,24.0,33.0,713.0,False,,62.073070,31260000.0,0.0,445.0,76.0,0.0,93.0,1.243243,0.987013,0.921642,1.341772,8833.0,83.0,1349.0,0.0,0.0,<30,MALE,<19,,2021-05-24T00:40:00.000,2021-05-24T09:21:00.000,Maintenance,intrinsic_regulation,,LOW,LOW,HIGH,HIGH,AVERAGE,,,-0.866025,0.0,0.309017,-0.988468,0.5,1.0,0.951057,0.151428
1,621e328667b776a240281372,2021-05-24,,,,,,,,,,,,2619.85,41.38447,6568.9,[Walk],83.919698,132.0,7.0,30.0,1271.0,False,,,,,,,,,,,,,8550.0,278.0,766.0,29.0,1.0,>=30,MALE,>=30,,,,Maintenance,intrinsic_regulation,,HIGH,AVERAGE,HIGH,LOW,LOW,,,-0.866025,0.0,0.309017,-0.988468,0.5,1.0,0.951057,0.151428
2,621e326767b776a24012e179,2021-05-24,,46.120,53.968,95.3,14.4,80.0,0.666667,0.925,0.766667,-4.129593,,2204.50,52.76058,11283.7,[Walk],68.275766,278.0,41.0,67.0,616.0,False,,52.516339,,,,,,,,,,,16992.0,131.0,1225.0,0.0,0.0,<30,FEMALE,<19,,,,Maintenance,,,,,,,,,,-0.866025,0.0,0.309017,-0.988468,0.5,1.0,0.951057,0.151428
3,621e332267b776a24092a584,2021-05-24,,,,,,,,,,,,3792.13,53.53183,16776.8,[Walk],78.842893,190.0,79.0,108.0,1063.0,False,,60.977217,,,,,,,,,,,21284.0,175.0,1130.0,9.0,0.0,<30,MALE,21.0,,,,Maintenance,identified_regulation,,LOW,LOW,LOW,AVERAGE,HIGH,,,-0.866025,0.0,0.309017,-0.988468,0.5,1.0,0.951057,0.151428
4,621e333567b776a240a0c217,2021-05-24,,,,,,,,,,,[LIFETIME_DISTANCE],,,,,,,,,,False,,,,,,,,,,,,,,,,,,<30,MALE,21.0,,,,Contemplation,intrinsic_regulation,,AVERAGE,HIGH,AVERAGE,LOW,LOW,,,-0.866025,0.0,0.309017,-0.988468,0.5,1.0,0.951057,0.151428
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7799,621e30b267b776a240c5e13f,2022-01-17,,,,,,,,,,,,1554.15,42.17696,209.4,,89.131283,33.0,0.0,0.0,1407.0,False,,,,,,,,,,,,,287.0,19.0,126.0,0.0,0.0,<30,FEMALE,21.0,,,,,,,,,,,,,,0.866025,0.0,0.809017,-0.299363,0.5,1.0,0.587785,-0.954139
7800,621e312a67b776a240164d59,2022-01-17,34.237581,,,,,,,,,-0.165995,[DAILY_FLOORS],1602.67,47.22494,3806.9,,67.473381,125.0,27.0,5.0,116.0,False,,58.157746,25800000.0,0.0,379.0,51.0,1.0,94.0,0.835443,0.728571,0.796667,0.902439,4966.0,114.0,563.0,0.0,0.0,>=30,MALE,25.0,,2022-01-17T00:45:00.000,2022-01-17T07:55:00.000,Contemplation,intrinsic_regulation,,AVERAGE,LOW,HIGH,AVERAGE,LOW,,,0.866025,0.0,0.809017,-0.299363,0.5,1.0,0.587785,-0.954139
7801,621e309b67b776a240b532b0,2022-01-17,31.815133,,,,,,,,,-2.999818,[DAILY_FLOORS],2887.52,49.82090,9000.0,"[Sport, Walk]",74.452687,164.0,37.0,63.0,875.0,False,,64.007904,18060000.0,0.0,276.0,25.0,0.0,99.0,0.708333,0.609756,0.714286,1.066667,12096.0,129.0,1208.0,6.0,0.0,>=30,MALE,23.0,,2022-01-17T00:56:30.000,2022-01-17T05:57:30.000,,,,,,,,,,,0.866025,0.0,0.809017,-0.299363,0.5,1.0,0.587785,-0.954139
7802,621e333567b776a240a0c217,2022-01-17,,,,,,,,,,,,1569.60,,,,,0.0,0.0,0.0,1440.0,False,,,,,,,,,,,,,,,,,,<30,MALE,21.0,,,,Contemplation,intrinsic_regulation,,AVERAGE,HIGH,AVERAGE,LOW,LOW,,,0.866025,0.0,0.809017,-0.299363,0.5,1.0,0.587785,-0.954139


# Οne-hot-encoding
CATEGORICAL VARIABLES
1. badgeType
2. activityType
3. mindfulness_session
4. age
5. gender
6. bmi
7. heart_rate_alert
-----------------------------
8. ttm_stage
9. breq_self_determination
10. sema_negative_feelings
11. ipip_extraversion_category
12. ipip_agreeableness_category
13. ipip_conscientiousness_category
14. ipip_stability_category
15. ipip_intellect_category
16. stai_stress_category
17. panas_negative_affect

In [91]:
data = preprocessing_functions.one_hot_encoding(data)
data

Unnamed: 0,id,date,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,calories,filteredDemographicVO2Max,distance,bpm,lightly_active_minutes,moderately_active_minutes,very_active_minutes,sedentary_minutes,mindfulness_session,scl_avg,resting_hr,sleep_duration,minutesToFallAsleep,minutesAsleep,minutesAwake,minutesAfterWakeup,sleep_efficiency,sleep_deep_ratio,sleep_wake_ratio,sleep_light_ratio,sleep_rem_ratio,steps,minutes_in_default_zone_1,minutes_below_default_zone_1,minutes_in_default_zone_2,minutes_in_default_zone_3,age,gender,bmi,heart_rate_alert,startTime,endTime,label_ttm_stage,label_breq_self_determination,label_sema_negative_feelings,label_ipip_extraversion_category,label_ipip_agreeableness_category,label_ipip_conscientiousness_category,label_ipip_stability_category,label_ipip_intellect_category,label_stai_stress_category,label_panas_negative_affect,month_sin,weekday_sin,week_sin,day_sin,month_cos,weekday_cos,week_cos,day_cos,DAILY_FLOORS,DAILY_STEPS,GOAL_BASED_WEIGHT_LOSS,LIFETIME_DISTANCE,LIFETIME_FLOORS,LIFETIME_WEIGHT_GOAL_SETUP,Aerobic Workout,Bike,Bootcamp,Circuit Training,Elliptical,Hike,Interval Workout,Martial Arts,Run,Spinning,Sport,Swim,Treadmill,Walk,Weights,Workout,Yoga/Pilates
0,621e2e8e67b776a24055b564,2021-05-24,34.137687,57.432,89.603,,14.8,78.0,0.833333,0.675,0.866667,-1.788325,2351.59,62.79210,6517.5,71.701565,149.0,24.0,33.0,713.0,0.0,,62.073070,31260000.0,0.0,445.0,76.0,0.0,93.0,1.243243,0.987013,0.921642,1.341772,8833.0,83.0,1349.0,0.0,0.0,0.0,0.0,Underweight,,2021-05-24T00:40:00.000,2021-05-24T09:21:00.000,Maintenance,intrinsic_regulation,,LOW,LOW,HIGH,HIGH,AVERAGE,,,-0.866025,0.0,0.309017,-0.988468,0.5,1.0,0.951057,0.151428,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,621e328667b776a240281372,2021-05-24,,,,,,,,,,,2619.85,41.38447,6568.9,83.919698,132.0,7.0,30.0,1271.0,0.0,,,,,,,,,,,,,8550.0,278.0,766.0,29.0,1.0,1.0,0.0,Obese,,,,Maintenance,intrinsic_regulation,,HIGH,AVERAGE,HIGH,LOW,LOW,,,-0.866025,0.0,0.309017,-0.988468,0.5,1.0,0.951057,0.151428,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,621e326767b776a24012e179,2021-05-24,,46.120,53.968,95.3,14.4,80.0,0.666667,0.925,0.766667,-4.129593,2204.50,52.76058,11283.7,68.275766,278.0,41.0,67.0,616.0,0.0,,52.516339,,,,,,,,,,,16992.0,131.0,1225.0,0.0,0.0,0.0,1.0,Underweight,,,,Maintenance,,,,,,,,,,-0.866025,0.0,0.309017,-0.988468,0.5,1.0,0.951057,0.151428,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,621e332267b776a24092a584,2021-05-24,,,,,,,,,,,3792.13,53.53183,16776.8,78.842893,190.0,79.0,108.0,1063.0,0.0,,60.977217,,,,,,,,,,,21284.0,175.0,1130.0,9.0,0.0,0.0,0.0,Normal,,,,Maintenance,identified_regulation,,LOW,LOW,LOW,AVERAGE,HIGH,,,-0.866025,0.0,0.309017,-0.988468,0.5,1.0,0.951057,0.151428,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,621e333567b776a240a0c217,2021-05-24,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,0.0,0.0,Normal,,,,Contemplation,intrinsic_regulation,,AVERAGE,HIGH,AVERAGE,LOW,LOW,,,-0.866025,0.0,0.309017,-0.988468,0.5,1.0,0.951057,0.151428,0.0,0.0,0.0,1.0,0.0,0.0,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7799,621e30b267b776a240c5e13f,2022-01-17,,,,,,,,,,,1554.15,42.17696,209.4,89.131283,33.0,0.0,0.0,1407.0,0.0,,,,,,,,,,,,,287.0,19.0,126.0,0.0,0.0,0.0,1.0,Normal,,,,,,,,,,,,,,0.866025,0.0,0.809017,-0.299363,0.5,1.0,0.587785,-0.954139,,,,,,,,,,,,,,,,,,,,,,,
7800,621e312a67b776a240164d59,2022-01-17,34.237581,,,,,,,,,-0.165995,1602.67,47.22494,3806.9,67.473381,125.0,27.0,5.0,116.0,0.0,,58.157746,25800000.0,0.0,379.0,51.0,1.0,94.0,0.835443,0.728571,0.796667,0.902439,4966.0,114.0,563.0,0.0,0.0,1.0,0.0,Overweight,,2022-01-17T00:45:00.000,2022-01-17T07:55:00.000,Contemplation,intrinsic_regulation,,AVERAGE,LOW,HIGH,AVERAGE,LOW,,,0.866025,0.0,0.809017,-0.299363,0.5,1.0,0.587785,-0.954139,1.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,
7801,621e309b67b776a240b532b0,2022-01-17,31.815133,,,,,,,,,-2.999818,2887.52,49.82090,9000.0,74.452687,164.0,37.0,63.0,875.0,0.0,,64.007904,18060000.0,0.0,276.0,25.0,0.0,99.0,0.708333,0.609756,0.714286,1.066667,12096.0,129.0,1208.0,6.0,0.0,1.0,0.0,Normal,,2022-01-17T00:56:30.000,2022-01-17T05:57:30.000,,,,,,,,,,,0.866025,0.0,0.809017,-0.299363,0.5,1.0,0.587785,-0.954139,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
7802,621e333567b776a240a0c217,2022-01-17,,,,,,,,,,,1569.60,,,,0.0,0.0,0.0,1440.0,0.0,,,,,,,,,,,,,,,,,,0.0,0.0,Normal,,,,Contemplation,intrinsic_regulation,,AVERAGE,HIGH,AVERAGE,LOW,LOW,,,0.866025,0.0,0.809017,-0.299363,0.5,1.0,0.587785,-0.954139,,,,,,,,,,,,,,,,,,,,,,,


In [92]:
data.to_pickle("../data/unified_dataframe/temp.pkl")

In [93]:
data = pd.read_pickle("../data/unified_dataframe/temp.pkl")

------------------------------
# Post-preprocessing

1. Creates the 2 new columns: 1 about early adaptive features and 1 about using fitbit while sleeping
2. Replace outliers with NaNs
3. Replace NaN values with column's median for continuous features
4. Replace NaN values with column's more frequent occurrence for categorical features

In [94]:
data = preprocessing_functions.post_preprocessing(data, frequency='daily')
data

Unnamed: 0,id,date,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,calories,filteredDemographicVO2Max,distance,bpm,lightly_active_minutes,moderately_active_minutes,very_active_minutes,sedentary_minutes,mindfulness_session,scl_avg,resting_hr,sleep_duration,minutesToFallAsleep,minutesAsleep,minutesAwake,minutesAfterWakeup,sleep_efficiency,sleep_deep_ratio,sleep_wake_ratio,sleep_light_ratio,sleep_rem_ratio,steps,minutes_in_default_zone_1,minutes_below_default_zone_1,minutes_in_default_zone_2,minutes_in_default_zone_3,age,gender,bmi,heart_rate_alert,startTime,endTime,label_ttm_stage,label_breq_self_determination,label_sema_negative_feelings,label_ipip_extraversion_category,label_ipip_agreeableness_category,label_ipip_conscientiousness_category,label_ipip_stability_category,label_ipip_intellect_category,label_stai_stress_category,label_panas_negative_affect,month_sin,weekday_sin,week_sin,day_sin,month_cos,weekday_cos,week_cos,day_cos,DAILY_FLOORS,DAILY_STEPS,GOAL_BASED_WEIGHT_LOSS,LIFETIME_DISTANCE,LIFETIME_FLOORS,LIFETIME_WEIGHT_GOAL_SETUP,Aerobic Workout,Bike,Bootcamp,Circuit Training,Elliptical,Hike,Interval Workout,Martial Arts,Run,Spinning,Sport,Swim,Treadmill,Walk,Weights,Workout,Yoga/Pilates,wear_day,early_features,used_during_night
0,621e2e8e67b776a24055b564,2021-05-24,34.137687,57.432,89.603,95.9,14.8,78.0,0.833333,0.675,0.866667,-1.788325,2351.59,62.792100,6517.5,71.701565,149.0,24.0,33.0,713.0,0.0,10.907065,62.073070,31260000.0,0.0,445.0,76.0,0.0,93.0,1.243243,0.987013,0.921642,1.341772,8833.0,83.0,1349.0,0.0,0.0,0.0,0.0,Underweight,0.0,2021-05-24T00:40:00.000,2021-05-24T09:21:00.000,Maintenance,intrinsic_regulation,,LOW,LOW,HIGH,HIGH,AVERAGE,,,-0.866025,0.0,0.309017,-0.988468,0.5,1.0,0.951057,0.151428,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.873563
1,621e328667b776a240281372,2021-05-24,33.973120,62.388,35.216,95.9,14.6,76.0,0.766667,0.725,0.766667,-1.405808,2619.85,41.384470,6568.9,83.919698,132.0,7.0,30.0,1271.0,0.0,10.907065,66.100178,27240000.0,0.0,399.0,54.0,0.0,95.0,0.986206,0.963636,0.984866,0.987500,8550.0,278.0,766.0,29.0,1.0,1.0,0.0,Obese,0.0,,,Maintenance,intrinsic_regulation,,HIGH,AVERAGE,HIGH,LOW,LOW,,,-0.866025,0.0,0.309017,-0.988468,0.5,1.0,0.951057,0.151428,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.100000
2,621e326767b776a24012e179,2021-05-24,33.973120,46.120,53.968,95.3,14.4,80.0,0.666667,0.925,0.766667,-4.129593,2204.50,52.760580,11283.7,68.275766,278.0,41.0,67.0,616.0,0.0,10.907065,52.516339,27240000.0,0.0,399.0,54.0,0.0,95.0,0.986206,0.963636,0.984866,0.987500,16992.0,131.0,1225.0,0.0,0.0,0.0,1.0,Underweight,0.0,,,Maintenance,,,,,,,,,,-0.866025,0.0,0.309017,-0.988468,0.5,1.0,0.951057,0.151428,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,1,0.991379
3,621e332267b776a24092a584,2021-05-24,33.973120,62.388,35.216,95.9,14.6,76.0,0.766667,0.725,0.766667,-1.405808,3792.13,53.531830,16776.8,78.842893,190.0,79.0,108.0,1063.0,0.0,10.907065,60.977217,27240000.0,0.0,399.0,54.0,0.0,95.0,0.986206,0.963636,0.984866,0.987500,21284.0,175.0,1130.0,9.0,0.0,0.0,0.0,Normal,0.0,,,Maintenance,identified_regulation,,LOW,LOW,LOW,AVERAGE,HIGH,,,-0.866025,0.0,0.309017,-0.988468,0.5,1.0,0.951057,0.151428,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,1,0.405405
4,621e333567b776a240a0c217,2021-05-24,33.973120,62.388,35.216,95.9,14.6,76.0,0.766667,0.725,0.766667,-1.405808,2251.67,45.738965,5434.6,78.919551,183.0,9.0,5.0,781.0,0.0,10.907065,66.100178,27240000.0,0.0,399.0,54.0,0.0,95.0,0.986206,0.963636,0.984866,0.987500,7794.0,133.0,1129.0,0.0,0.0,0.0,0.0,Normal,0.0,,,Contemplation,intrinsic_regulation,,AVERAGE,HIGH,AVERAGE,LOW,LOW,,,-0.866025,0.0,0.309017,-0.988468,0.5,1.0,0.951057,0.151428,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7799,621e30b267b776a240c5e13f,2022-01-17,33.973120,62.388,35.216,95.9,14.6,76.0,0.766667,0.725,0.766667,-1.405808,1554.15,42.176960,209.4,89.131283,33.0,0.0,0.0,1407.0,0.0,10.907065,66.100178,27240000.0,0.0,399.0,54.0,0.0,95.0,0.986206,0.963636,0.984866,0.987500,287.0,19.0,126.0,0.0,0.0,0.0,1.0,Normal,0.0,,,,,,,,,,,,,0.866025,0.0,0.809017,-0.299363,0.5,1.0,0.587785,-0.954139,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,0.876923
7800,621e312a67b776a240164d59,2022-01-17,34.237581,62.388,35.216,95.9,14.6,76.0,0.766667,0.725,0.766667,-0.165995,1602.67,47.224940,3806.9,67.473381,125.0,27.0,5.0,116.0,0.0,10.907065,58.157746,25800000.0,0.0,379.0,51.0,1.0,94.0,0.835443,0.728571,0.796667,0.902439,4966.0,114.0,563.0,0.0,0.0,1.0,0.0,Overweight,0.0,2022-01-17T00:45:00.000,2022-01-17T07:55:00.000,Contemplation,intrinsic_regulation,,AVERAGE,LOW,HIGH,AVERAGE,LOW,,,0.866025,0.0,0.809017,-0.299363,0.5,1.0,0.587785,-0.954139,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,1,0.490741
7801,621e309b67b776a240b532b0,2022-01-17,31.815133,62.388,35.216,95.9,14.6,76.0,0.766667,0.725,0.766667,-2.999818,2887.52,49.820900,9000.0,74.452687,164.0,37.0,63.0,875.0,0.0,10.907065,64.007904,18060000.0,0.0,276.0,25.0,0.0,99.0,0.708333,0.609756,0.714286,1.066667,12096.0,129.0,1208.0,6.0,0.0,1.0,0.0,Normal,0.0,2022-01-17T00:56:30.000,2022-01-17T05:57:30.000,,,,,,,,,,,0.866025,0.0,0.809017,-0.299363,0.5,1.0,0.587785,-0.954139,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.500000
7802,621e333567b776a240a0c217,2022-01-17,33.973120,62.388,35.216,95.9,14.6,76.0,0.766667,0.725,0.766667,-1.405808,1569.60,45.738965,5434.6,78.919551,0.0,0.0,0.0,1440.0,0.0,10.907065,66.100178,27240000.0,0.0,399.0,54.0,0.0,95.0,0.986206,0.963636,0.984866,0.987500,7794.0,133.0,1129.0,0.0,0.0,0.0,0.0,Normal,0.0,,,Contemplation,intrinsic_regulation,,AVERAGE,HIGH,AVERAGE,LOW,LOW,,,0.866025,0.0,0.809017,-0.299363,0.5,1.0,0.587785,-0.954139,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.000000


In [95]:
special_zeros = ['stress_score', 'sleep_points_percentage', 'exertion_points_percentage', 
                 'responsiveness_points_percentage']

for col in special_zeros:
    data.loc[data[col] == 0, col] = data[col].median()

In [96]:
# these columns have only 0 values
data = data.drop(columns=['heart_rate_alert'])

In [97]:
# replace different nan formats
data = data.where(~pd.isna(data), np.nan)

In [98]:
#create_report(data).show_browser()

  0%|                                                                                        | 0/14080 [00:00<…

In [99]:
data = data.sort_values(by=['id', 'date'])
data['label_panas_negative_affect'] = data['label_panas_negative_affect'].bfill()
data['label_stai_stress_category'] = data['label_stai_stress_category'].bfill()

In [100]:
#create_report(data).show_browser()

  0%|                                                                                        | 0/14080 [00:00<…

# Add all the new features


In [101]:
data = feature_engineering_functions.add_features(data, 'daily')

In [108]:
data['stress_quantile'].value_counts()

1    3902
0    1951
2    1951
Name: stress_quantile, dtype: int64

In [109]:
data['user_stress_quantile'].value_counts()

1    3867
0    1978
2    1959
Name: user_stress_quantile, dtype: int64

In [102]:
data

Unnamed: 0,id,date,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,calories,filteredDemographicVO2Max,distance,bpm,lightly_active_minutes,moderately_active_minutes,very_active_minutes,sedentary_minutes,mindfulness_session,scl_avg,resting_hr,sleep_duration,minutesToFallAsleep,minutesAsleep,minutesAwake,minutesAfterWakeup,sleep_efficiency,sleep_deep_ratio,sleep_wake_ratio,sleep_light_ratio,sleep_rem_ratio,steps,minutes_in_default_zone_1,minutes_below_default_zone_1,minutes_in_default_zone_2,minutes_in_default_zone_3,age,gender,bmi,label_ttm_stage,label_breq_self_determination,label_sema_negative_feelings,label_ipip_extraversion_category,label_ipip_agreeableness_category,label_ipip_conscientiousness_category,label_ipip_stability_category,label_ipip_intellect_category,label_stai_stress_category,label_panas_negative_affect,month_sin,weekday_sin,week_sin,day_sin,month_cos,weekday_cos,week_cos,day_cos,DAILY_FLOORS,DAILY_STEPS,GOAL_BASED_WEIGHT_LOSS,LIFETIME_DISTANCE,LIFETIME_FLOORS,LIFETIME_WEIGHT_GOAL_SETUP,Aerobic Workout,Bike,Bootcamp,Circuit Training,Elliptical,Hike,Interval Workout,Martial Arts,Run,Spinning,Sport,Swim,Treadmill,Walk,Weights,Workout,Yoga/Pilates,wear_day,early_features,used_during_night,different_activity_types,different_badge_types,is_index,iv_index,sri_index,sjl_index,mode_startTime,mode_endTime,mode_startTime_sin,mode_startTime_cos,mode_endTime_sin,mode_endTime_cos,isp_index,steps_is_index,steps_iv_index,steps_sri_index,steps_isp_index,exercise_is_index,exercise_iv_index,exercise_sri_index,stress_quantile,user_stress_quantile,average_sleep_duration,average_steps,is_weekend,is_holiday,startDay_sin,startDay_cos,startWeek_sin,startWeek_cos,startWeekday_sin,startWeekday_cos,startMonth_sin,startMonth_cos,startYear_sin,startYear_cos,endDay_sin,endDay_cos,endWeek_sin,endWeek_cos,endWeekday_sin,endWeekday_cos,endMonth_sin,endMonth_cos,endYear_sin,endYear_cos,startHour_sin,startHour_cos,endHour_sin,endHour_cos,Steps_hour0,Steps_hour1,Steps_hour2,Steps_hour3,Steps_hour4,Steps_hour5,Steps_hour6,Steps_hour7,Steps_hour8,Steps_hour9,Steps_hour10,Steps_hour11,Steps_hour12,Steps_hour13,Steps_hour14,Steps_hour15,Steps_hour16,Steps_hour17,Steps_hour18,Steps_hour19,Steps_hour20,Steps_hour21,Steps_hour22,Steps_hour23
0,621e2e8e67b776a24055b564,2021-05-24,34.137687,57.432,89.603,95.9,14.8,78.0,0.833333,0.675,0.866667,-1.788325,2351.59,62.79210,6517.5,71.701565,149.0,24.0,33.0,713.0,0.0,10.907065,62.073070,31260000.0,0.0,445.0,76.0,0.0,93.0,1.243243,0.987013,0.921642,1.341772,8833.0,83.0,1349.0,0.0,0.0,0.0,0.0,Underweight,Maintenance,intrinsic_regulation,,LOW,LOW,HIGH,HIGH,AVERAGE,,,-0.866025,0.000000,3.090170e-01,-0.988468,0.5,1.000000,0.951057,0.151428,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.873563,1,0,0.930443,0.343581,90.495130,1.091284,23.0,9.0,0.382683,-0.92388,-3.826834e-01,-0.92388,0.945035,0.287117,1.655374,89.963574,0.381108,0.224056,2.003963,77.127735,2,1,3.255517e+07,8892.885057,0.0,0.0,0.060330,0.998178,-0.281733,0.959493,0.000000,1.000000,0.012703,0.999919,-0.908669,0.417518,0.060330,0.998178,-0.0,1.0,0.000000,1.000000,0.012703,0.999919,-0.908669,0.417518,0.000000,1.000000,0.022699,0.999742,134.0,0.0,0.0,15.0,0.0,0.0,39.0,0.0,7.0,10.0,2626.0,992.0,429.0,29.0,191.0,33.0,342.0,1712.0,1838.0,160.0,155.0,37.0,31.0,53.0
1,621e2e8e67b776a24055b564,2021-05-25,33.794544,57.681,94.303,95.9,15.8,80.0,0.833333,0.725,0.866667,-2.462709,2332.08,62.67912,7178.6,70.579300,132.0,25.0,31.0,704.0,0.0,10.907065,62.121476,32880000.0,0.0,460.0,88.0,0.0,94.0,1.466667,1.142857,0.947566,1.197531,9727.0,56.0,1374.0,4.0,0.0,0.0,0.0,Underweight,Maintenance,intrinsic_regulation,1.0,LOW,LOW,HIGH,HIGH,AVERAGE,Below average,Below average,-0.866025,0.781831,3.090170e-01,-0.937752,0.5,0.623490,0.951057,0.347305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.873563,1,4,0.930443,0.343581,90.495130,1.091284,23.0,9.0,0.382683,-0.92388,-3.826834e-01,-0.92388,0.945035,0.287117,1.655374,89.963574,0.381108,0.224056,2.003963,77.127735,2,1,3.255517e+07,8892.885057,0.0,0.0,0.060330,0.998178,-0.281733,0.959493,0.000000,1.000000,0.012703,0.999919,-0.908669,0.417518,0.062841,0.998024,-0.0,1.0,0.002540,0.999997,0.012703,0.999919,-0.908669,0.417518,0.057982,0.998318,0.020177,0.999796,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,2703.0,735.0,76.0,1010.0,544.0,120.0,24.0,446.0,377.0,3165.0,332.0,97.0,9.0,24.0,58.0
2,621e2e8e67b776a24055b564,2021-05-25,33.794544,57.681,94.303,95.9,15.8,80.0,0.833333,0.725,0.866667,-2.462709,2332.08,62.67912,7178.6,70.579300,132.0,25.0,31.0,704.0,0.0,10.907065,62.121476,32880000.0,0.0,460.0,88.0,0.0,94.0,1.466667,1.142857,0.947566,1.197531,9727.0,56.0,1374.0,4.0,0.0,0.0,0.0,Underweight,Maintenance,intrinsic_regulation,0.0,LOW,LOW,HIGH,HIGH,AVERAGE,Below average,Below average,-0.866025,0.781831,3.090170e-01,-0.937752,0.5,0.623490,0.951057,0.347305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.873563,1,4,0.930443,0.343581,90.495130,1.091284,23.0,9.0,0.382683,-0.92388,-3.826834e-01,-0.92388,0.945035,0.287117,1.655374,89.963574,0.381108,0.224056,2.003963,77.127735,2,1,3.255517e+07,8892.885057,0.0,0.0,0.060330,0.998178,-0.281733,0.959493,0.000000,1.000000,0.012703,0.999919,-0.908669,0.417518,0.062841,0.998024,-0.0,1.0,0.002540,0.999997,0.012703,0.999919,-0.908669,0.417518,0.057982,0.998318,0.020177,0.999796,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,2703.0,735.0,76.0,1010.0,544.0,120.0,24.0,446.0,377.0,3165.0,332.0,97.0,9.0,24.0,58.0
3,621e2e8e67b776a24055b564,2021-05-26,34.611011,57.481,35.216,95.9,14.6,84.0,0.966667,0.725,0.866667,-2.385801,2262.30,62.57307,6090.9,71.842573,112.0,27.0,31.0,710.0,0.0,10.907065,62.263999,33600000.0,0.0,493.0,67.0,0.0,96.0,1.116883,0.858974,1.015038,1.670732,8253.0,85.0,1350.0,0.0,0.0,0.0,0.0,Underweight,Maintenance,intrinsic_regulation,0.0,LOW,LOW,HIGH,HIGH,AVERAGE,Below average,Below average,-0.866025,0.974928,3.090170e-01,-0.848644,0.5,-0.222521,0.951057,0.528964,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.873563,1,4,0.930443,0.343581,90.495130,1.091284,23.0,9.0,0.382683,-0.92388,-3.826834e-01,-0.92388,0.945035,0.287117,1.655374,89.963574,0.381108,0.224056,2.003963,77.127735,2,2,3.255517e+07,8892.885057,0.0,0.0,0.062841,0.998024,-0.281733,0.959493,0.002540,0.999997,0.012703,0.999919,-0.908669,0.417518,0.065351,0.997862,-0.0,1.0,0.005079,0.999987,0.012703,0.999919,-0.908669,0.417518,0.057982,0.998318,0.022699,0.999742,0.0,0.0,0.0,0.0,7.0,8.0,0.0,0.0,11.0,2013.0,1280.0,64.0,150.0,286.0,85.0,477.0,390.0,2821.0,293.0,158.0,66.0,0.0,70.0,74.0
4,621e2e8e67b776a24055b564,2021-05-27,34.408304,57.493,35.216,95.9,14.8,82.0,0.933333,0.725,0.833333,-2.124199,2325.10,62.47493,6653.1,71.725477,133.0,21.0,37.0,622.0,0.0,10.907065,62.368900,37620000.0,0.0,540.0,87.0,0.0,93.0,1.128205,1.129870,1.191729,1.588235,9015.0,90.0,1282.0,0.0,0.0,0.0,0.0,Underweight,Maintenance,intrinsic_regulation,0.0,LOW,LOW,HIGH,HIGH,AVERAGE,Below average,Below average,-0.866025,0.433884,3.090170e-01,-0.724793,0.5,-0.900969,0.951057,0.688967,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.873563,1,4,0.930443,0.343581,90.495130,1.091284,23.0,9.0,0.382683,-0.92388,-3.826834e-01,-0.92388,0.945035,0.287117,1.655374,89.963574,0.381108,0.224056,2.003963,77.127735,2,2,3.255517e+07,8892.885057,0.0,0.0,0.065351,0.997862,-0.281733,0.959493,0.005079,0.999987,0.012703,0.999919,-0.908669,0.417518,0.067861,0.997695,-0.0,1.0,0.007619,0.999971,0.012703,0.999919,-0.908669,0.417518,0.057982,0.998318,0.022699,0.999742,0.0,0.0,0.0,7.0,8.0,0.0,7.0,8.0,0.0,245.0,3396.0,11.0,309.0,113.0,58.0,296.0,401.0,21.0,3054.0,503.0,411.0,96.0,63.0,8.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7799,621e375b67b776a240290cdc,2021-07-22,33.973120,62.388,35.216,95.9,14.6,76.0,0.766667,0.725,0.766667,-1.405808,1699.36,36.08147,4490.0,74.782707,137.0,16.0,12.0,1210.0,0.0,10.907065,72.171246,27240000.0,0.0,399.0,7.0,0.0,89.0,0.986206,0.963636,0.984866,0.987500,6805.0,57.0,1383.0,0.0,0.0,0.0,1.0,Normal,Action,identified_regulation,,AVERAGE,HIGH,AVERAGE,LOW,HIGH,,,0.866025,0.433884,3.090170e-01,-0.968077,0.5,-0.900969,-0.951057,-0.250653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.985507,5,4,0.763276,0.367949,76.051031,-0.353321,0.0,8.0,0.000000,1.00000,1.224647e-16,-1.00000,0.665562,0.238919,1.150768,88.327172,0.371521,0.099220,1.874527,82.739821,2,1,2.732087e+07,11508.710145,0.0,0.0,0.055308,0.998469,0.909632,-0.415415,0.007619,0.999971,0.017784,0.999842,-0.908669,0.417518,0.055308,0.998469,0.680173,-0.733052,0.007619,0.999971,0.017784,0.999842,-0.908669,0.417518,0.040347,0.999186,0.042867,0.999081,21.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,290.0,78.0,22.0,79.0,157.0,12.0,7.0,60.0,7.0,163.0,92.0,106.0,2034.0,3453.0,49.0,170.0
7800,621e375b67b776a240290cdc,2021-07-23,34.011607,55.542,30.813,95.9,16.6,81.0,0.700000,0.800,0.933333,-1.467558,2437.51,36.32305,8309.8,91.478693,294.0,59.0,43.0,567.0,0.0,10.907065,69.927633,24960000.0,0.0,359.0,57.0,0.0,91.0,1.000000,1.096154,0.960177,1.153846,13187.0,366.0,1036.0,37.0,0.0,0.0,1.0,Normal,Action,identified_regulation,,AVERAGE,HIGH,AVERAGE,LOW,HIGH,,,0.866025,-0.433884,3.090170e-01,-0.998717,0.5,-0.900969,-0.951057,-0.050649,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1,0,0.985507,5,4,0.763276,0.367949,76.051031,-0.353321,0.0,8.0,0.000000,1.00000,1.224647e-16,-1.00000,0.665562,0.238919,1.150768,88.327172,0.371521,0.099220,1.874527,82.739821,2,1,2.732087e+07,11508.710145,0.0,0.0,0.057819,0.998327,0.909632,-0.415415,0.010159,0.999948,0.017784,0.999842,-0.908669,0.417518,0.057819,0.998327,0.680173,-0.733052,0.010159,0.999948,0.017784,0.999842,-0.908669,0.417518,0.000000,1.000000,0.017656,0.999844,0.0,9.0,0.0,0.0,8.0,6.0,0.0,142.0,57.0,94.0,65.0,97.0,759.0,1841.0,1013.0,810.0,371.0,833.0,1411.0,93.0,424.0,4029.0,1083.0,42.0
7801,621e375b67b776a240290cdc,2021-07-24,33.687826,69.579,19.407,95.9,17.0,84.0,0.833333,0.900,0.766667,-2.258333,2344.01,36.39149,8922.8,84.184847,342.0,26.0,31.0,581.0,0.0,10.907065,72.640440,27600000.0,0.0,414.0,46.0,0.0,94.0,0.500000,0.867925,1.276786,1.259259,13554.0,286.0,1152.0,2.0,0.0,0.0,1.0,Normal,Action,identified_regulation,,AVERAGE,HIGH,AVERAGE,LOW,HIGH,,,0.866025,-0.974928,3.090170e-01,-0.988468,0.5,-0.222521,-0.951057,0.151428,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1,0,0.985507,5,4,0.763276,0.367949,76.051031,-0.353321,0.0,8.0,0.000000,1.00000,1.224647e-16,-1.00000,0.665562,0.238919,1.150768,88.327172,0.371521,0.099220,1.874527,82.739821,2,2,2.732087e+07,11508.710145,1.0,0.0,0.060330,0.998178,0.909632,-0.415415,0.012698,0.999919,0.017784,0.999842,-0.908669,0.417518,0.060330,0.998178,0.680173,-0.733052,0.012698,0.999919,0.017784,0.999842,-0.908669,0.417518,0.000000,1.000000,0.020177,0.999796,1091.0,8.0,0.0,0.0,5.0,6.0,0.0,0.0,468.0,501.0,1571.0,360.0,536.0,20.0,1656.0,1276.0,1903.0,534.0,491.0,583.0,84.0,1103.0,621.0,737.0
7802,621e375b67b776a240290cdc,2021-07-25,34.112386,65.899,22.892,95.9,18.0,84.0,0.833333,0.900,0.766667,-1.309401,2239.31,36.39193,8995.8,83.825390,355.0,19.0,2.0,668.0,0.0,10.907065,73.766306,23760000.0,0.0,354.0,42.0,1.0,95.0,1.148936,0.807692,0.983051,0.800000,13638.0,347.0,1083.0,0.0,0.0,0.0,1.0,Normal,Action,identified_regulation,,AVERAGE,HIGH,AVERAGE,LOW,HIGH,,,0.866025,-0.781831,3.090170e-01,-0.937752,0.5,0.623490,-0.951057,0.347305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.985507,5,4,0.763276,0.367949,76.051031,-0.353321,0.0,8.0,0.000000,1.00000,1.224647e-16,-1.00000,0.665562,0.238919,1.150768,88.327172,0.371521,0.099220,1.874527,82.739821,2,2,2.732087e+07,11508.710145,1.0,1.0,0.062841,0.998024,0.909632,-0.415415,0.015238,0.999884,0.017784,0.999842,-0.908669,0.417518,0.062841,0.998024,0.680173,-0.733052,0.015238,0.999884,0.017784,0.999842,-0.908669,0.417518,0.002522,0.999997,0.020177,0.999796,196.0,92.0,0.0,7.0,8.0,6.0,0.0,0.0,320.0,954.0,755.0,243.0,1513.0,1043.0,42.0,1235.0,716.0,1130.0,1163.0,888.0,245.0,1401.0,54.0,1627.0


In [103]:
data.to_pickle("../data/unified_dataframe/data_preprocessed.pkl")

In [106]:
user_data = data.loc[:, 'DAILY_FLOORS':'LIFETIME_WEIGHT_GOAL_SETUP']
user_data.columns

Index(['DAILY_FLOORS', 'DAILY_STEPS', 'GOAL_BASED_WEIGHT_LOSS',
       'LIFETIME_DISTANCE', 'LIFETIME_FLOORS', 'LIFETIME_WEIGHT_GOAL_SETUP'],
      dtype='object')