In [2]:
import pickle
import warnings
import numpy as np
import pandas as pd
from datetime import datetime
import preprocessing_functions
warnings.filterwarnings("ignore")

Create the baseline dataframe. It contains the following pre-processing actions:
1. Selecting experiment days
2. Drop duplicates
3. Type conversion
4. One-hot encoding

In [2]:
fitbit = pd.read_pickle('data/daily_fitbit_df_unprocessed.pkl')
fitbit = preprocessing_functions.fitbit_basic_preprocessing(fitbit)
fitbit = preprocessing_functions.fitbit_one_hot_encoding(fitbit)
fitbit

Unnamed: 0,id,date,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,...,Martial Arts,Run,Spinning,Sport,Swim,Treadmill,Walk,Weights,Workout,Yoga/Pilates
0,621e2e8e67b776a24055b564,2021-05-24,34.137687,57.432,89.603,,14.8,78.0,0.833333,0.675,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,621e301e67b776a240608a72,2021-05-24,34.710232,,,,,74.0,0.700000,0.700,...,,,,,,,,,,
3,621e301367b776a24057738e,2021-05-24,,,,,,,,,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,621e360b67b776a24039709f,2021-05-24,,68.321,31.413,,15.4,69.0,0.633333,0.650,...,,,,,,,,,,
5,621e2fce67b776a240279baa,2021-05-24,34.783747,83.170,15.941,96.6,15.2,70.0,0.733333,0.725,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4959,621e345267b776a240691064,2022-01-17,,,,,,,,,...,,,,,,,,,,
4960,621e310d67b776a24003096d,2022-01-17,,,,,,,,,...,,,,,,,,,,
4961,621e324e67b776a2400191cb,2022-01-17,33.104768,,,,,,,,...,,,,,,,,,,
4962,621e30f467b776a240f22944,2022-01-17,34.352490,,,,13.4,,,,...,,,,,,,,,,


In [3]:
fitbit.to_pickle('./data/fitbit_to_baseline_model.pkl')

Continue with the following pre-processing actions after merging the fitbit dataframe with a survey dataframe:
1. Drop highly NaN columns
2. Drop duplicates
3. Extract day-related features
4. Replace outliers
5. Replace NaN values

Fitbit merged TTM preprocessing

In [20]:
fitbit_ttm = pd.read_pickle('data/fitbit_ttm_to_model.pkl')
target = fitbit_ttm['stage']
fitbit_ttm.drop(columns=['stage'], inplace=True)
fitbit_ttm_processed = preprocessing_functions.post_preprocessing(fitbit_ttm, False)
fitbit_ttm_processed['stage'] = target
fitbit_ttm_processed.to_pickle('./data/fitbit_ttm_processed.pkl')
fitbit_ttm_processed

Unnamed: 0,id,nightly_temperature,nremhr,rmssd,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,...,Yoga/Pilates,month_sin,weekday_sin,week_sin,day_sin,month_cos,weekday_cos,week_cos,day_cos,stage
0,621e2e8e67b776a24055b564,34.137687,57.4320,33.038,14.8,78.0,0.833333,0.675,0.866667,-1.788325,...,0.0,-0.866025,0.000000,0.309017,-0.988468,0.5,1.000000,0.951057,0.151428,Maintenance
1,621e2e8e67b776a24055b564,33.794544,57.6810,33.038,15.8,80.0,0.833333,0.725,0.866667,-2.462709,...,0.0,-0.866025,0.781831,0.309017,-0.937752,0.5,0.623490,0.951057,0.347305,Maintenance
2,621e2e8e67b776a24055b564,34.611011,57.4810,33.038,14.6,84.0,0.966667,0.725,0.866667,-2.385801,...,0.0,-0.866025,0.974928,0.309017,-0.848644,0.5,-0.222521,0.951057,0.528964,Maintenance
3,621e2e8e67b776a24055b564,34.408304,57.4930,33.038,14.8,82.0,0.933333,0.725,0.833333,-2.124199,...,0.0,-0.866025,0.433884,0.309017,-0.724793,0.5,-0.900969,0.951057,0.688967,Maintenance
4,621e2e8e67b776a24055b564,34.178922,56.7500,33.038,15.2,81.0,0.866667,0.725,0.866667,-2.396873,...,0.0,-0.866025,-0.433884,0.309017,-0.571268,0.5,-0.900969,0.951057,0.820763,Maintenance
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3642,621e339967b776a240e502de,33.816660,51.7600,47.281,11.6,79.0,0.700000,0.725,0.966667,-1.793898,...,0.0,0.866025,0.433884,0.587785,0.485302,0.5,-0.900969,0.809017,-0.874347,Maintenance
3643,621e339967b776a240e502de,34.460723,51.3510,45.883,12.0,79.0,0.766667,0.775,0.833333,-0.705708,...,0.0,0.866025,-0.433884,0.587785,0.299363,0.5,-0.900969,0.809017,-0.954139,Maintenance
3644,621e339967b776a240e502de,34.137341,50.5040,49.779,12.0,78.0,0.833333,0.700,0.833333,-1.229213,...,0.0,0.866025,-0.974928,0.587785,0.101168,0.5,-0.222521,0.809017,-0.994869,Maintenance
3645,621e339967b776a240e502de,34.406884,51.5330,45.322,11.6,73.0,0.700000,0.725,0.766667,-1.643875,...,0.0,0.866025,-0.781831,0.587785,-0.101168,0.5,0.623490,0.809017,-0.994869,Maintenance


Fitbit merged BREQ preprocessing

In [24]:
fitbit_breq = pd.read_pickle('data/fitbit_breq_to_model.pkl')
target = fitbit_breq['breq_self_determination']
fitbit_breq.drop(columns=['breq_self_determination'], inplace=True)
fitbit_breq_processed = preprocessing_functions.post_preprocessing(fitbit_breq, False)
fitbit_breq_processed['breq_self_determination'] = target
fitbit_breq_processed.to_pickle('./data/fitbit_breq_processed.pkl')
fitbit_breq_processed

Unnamed: 0,id,nightly_temperature,nremhr,rmssd,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,...,Yoga/Pilates,month_sin,weekday_sin,week_sin,day_sin,month_cos,weekday_cos,week_cos,day_cos,breq_self_determination
0,621e2e8e67b776a24055b564,34.137687,57.432,33.094,14.8,78.0,0.833333,0.675,0.866667,-1.788325,...,0.0,-0.866025,0.000000,0.309017,-0.988468,0.5,1.000000,0.951057,0.151428,intrinsic_regulation
1,621e2e8e67b776a24055b564,33.794544,57.681,33.094,15.8,80.0,0.833333,0.725,0.866667,-2.462709,...,0.0,-0.866025,0.781831,0.309017,-0.937752,0.5,0.623490,0.951057,0.347305,intrinsic_regulation
2,621e2e8e67b776a24055b564,34.611011,57.481,33.094,14.6,84.0,0.966667,0.725,0.866667,-2.385801,...,0.0,-0.866025,0.974928,0.309017,-0.848644,0.5,-0.222521,0.951057,0.528964,intrinsic_regulation
3,621e2e8e67b776a24055b564,34.408304,57.493,33.094,14.8,82.0,0.933333,0.725,0.833333,-2.124199,...,0.0,-0.866025,0.433884,0.309017,-0.724793,0.5,-0.900969,0.951057,0.688967,intrinsic_regulation
4,621e2e8e67b776a24055b564,34.178922,56.750,33.094,15.2,81.0,0.866667,0.725,0.866667,-2.396873,...,0.0,-0.866025,-0.433884,0.309017,-0.571268,0.5,-0.900969,0.951057,0.820763,intrinsic_regulation
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3578,621e339967b776a240e502de,33.816660,51.760,47.281,11.6,79.0,0.700000,0.725,0.966667,-1.793898,...,0.0,0.866025,0.433884,0.587785,0.485302,0.5,-0.900969,0.809017,-0.874347,identified_regulation
3579,621e339967b776a240e502de,34.460723,51.351,45.883,12.0,79.0,0.766667,0.775,0.833333,-0.705708,...,0.0,0.866025,-0.433884,0.587785,0.299363,0.5,-0.900969,0.809017,-0.954139,identified_regulation
3580,621e339967b776a240e502de,34.137341,50.504,49.779,12.0,78.0,0.833333,0.700,0.833333,-1.229213,...,0.0,0.866025,-0.974928,0.587785,0.101168,0.5,-0.222521,0.809017,-0.994869,identified_regulation
3581,621e339967b776a240e502de,34.406884,51.533,45.322,11.6,73.0,0.700000,0.725,0.766667,-1.643875,...,0.0,0.866025,-0.781831,0.587785,-0.101168,0.5,0.623490,0.809017,-0.994869,identified_regulation


Fitbit merged personality preprocessing

In [25]:
fitbit_personality = pd.read_pickle('data/fitbit_personality_to_model.pkl')
target_df = fitbit_personality.loc[:, 'ipip_extraversion_category':'ipip_intellect_category']
fitbit_personality.drop(columns=['ipip_extraversion_category', 'ipip_agreeableness_category', 'ipip_conscientiousness_category', 'ipip_stability_category', 'ipip_intellect_category'], inplace=True)
fitbit_personality_processed = preprocessing_functions.post_preprocessing(fitbit_personality, False)
fitbit_personality_processed = pd.concat([fitbit_personality_processed, target_df], axis=1)
fitbit_personality_processed.to_pickle('./data/fitbit_personality_processed.pkl')
fitbit_personality_processed

Unnamed: 0,id,nightly_temperature,nremhr,rmssd,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,...,day_sin,month_cos,weekday_cos,week_cos,day_cos,ipip_extraversion_category,ipip_agreeableness_category,ipip_conscientiousness_category,ipip_stability_category,ipip_intellect_category
0,621e2e8e67b776a24055b564,34.137687,57.4320,33.852,14.8,78.0,0.833333,0.675,0.866667,-1.788325,...,-0.988468,0.5,1.000000,0.951057,0.151428,LOW,LOW,HIGH,HIGH,AVERAGE
1,621e2e8e67b776a24055b564,33.794544,57.6810,33.852,15.8,80.0,0.833333,0.725,0.866667,-2.462709,...,-0.937752,0.5,0.623490,0.951057,0.347305,LOW,LOW,HIGH,HIGH,AVERAGE
2,621e2e8e67b776a24055b564,34.611011,57.4810,33.852,14.6,84.0,0.966667,0.725,0.866667,-2.385801,...,-0.848644,0.5,-0.222521,0.951057,0.528964,LOW,LOW,HIGH,HIGH,AVERAGE
3,621e2e8e67b776a24055b564,34.408304,57.4930,33.852,14.8,82.0,0.933333,0.725,0.833333,-2.124199,...,-0.724793,0.5,-0.900969,0.951057,0.688967,LOW,LOW,HIGH,HIGH,AVERAGE
4,621e2e8e67b776a24055b564,34.178922,56.7500,33.852,15.2,81.0,0.866667,0.725,0.866667,-2.396873,...,-0.571268,0.5,-0.900969,0.951057,0.820763,LOW,LOW,HIGH,HIGH,AVERAGE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3450,621e339967b776a240e502de,33.816660,51.7600,47.281,11.6,79.0,0.700000,0.725,0.966667,-1.793898,...,0.485302,0.5,-0.900969,0.809017,-0.874347,AVERAGE,HIGH,AVERAGE,HIGH,AVERAGE
3451,621e339967b776a240e502de,34.460723,51.3510,45.883,12.0,79.0,0.766667,0.775,0.833333,-0.705708,...,0.299363,0.5,-0.900969,0.809017,-0.954139,AVERAGE,HIGH,AVERAGE,HIGH,AVERAGE
3452,621e339967b776a240e502de,34.137341,50.5040,49.779,12.0,78.0,0.833333,0.700,0.833333,-1.229213,...,0.101168,0.5,-0.222521,0.809017,-0.994869,AVERAGE,HIGH,AVERAGE,HIGH,AVERAGE
3453,621e339967b776a240e502de,34.406884,51.5330,45.322,11.6,73.0,0.700000,0.725,0.766667,-1.643875,...,-0.101168,0.5,0.623490,0.809017,-0.994869,AVERAGE,HIGH,AVERAGE,HIGH,AVERAGE


Fitbit merged panas preprocessing

In [3]:
fitbit_panas = pd.read_pickle('data/fitbit_panas_to_model.pkl')
target_df = fitbit_panas.loc[:, 'positive_affect_score':'negative_affect_score']
fitbit_panas.drop(columns=['positive_affect_score', 'negative_affect_score'], inplace=True)
fitbit_panas_processed = preprocessing_functions.post_preprocessing(fitbit_panas, False)  # remove manually outliers line
fitbit_panas_processed = pd.concat([fitbit_panas_processed, target_df], axis=1)
fitbit_panas_processed.to_pickle('./data/fitbit_panas_processed.pkl')
fitbit_panas_processed

Unnamed: 0,id,nightly_temperature,nremhr,rmssd,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,...,month_sin,weekday_sin,week_sin,day_sin,month_cos,weekday_cos,week_cos,day_cos,positive_affect_score,negative_affect_score
0,621e2e8e67b776a24055b564,34.515655,59.798,98.515,15.4,74.0,0.766667,0.725,0.700000,-1.972848,...,-7.071068e-01,0.000000,7.557496e-01,-0.743145,7.071068e-01,1.000000,-0.654861,0.669131,37,14
1,621e2e8e67b776a24055b564,34.408304,57.481,94.303,14.8,81.0,0.866667,0.725,0.866667,-2.385801,...,-7.071068e-01,0.000000,-1.133108e-15,0.207912,-7.071068e-01,1.000000,1.000000,0.978148,38,12
2,621e2e8e67b776a24055b564,34.476865,56.570,109.509,14.4,83.0,0.833333,0.775,0.800000,-1.912061,...,-1.000000e+00,0.000000,2.817326e-01,0.994522,-1.836970e-16,1.000000,0.959493,0.104528,37,12
3,621e2e8e67b776a24055b564,34.418045,53.006,107.388,15.6,76.0,0.766667,0.700,0.766667,-2.170245,...,-1.000000e+00,0.000000,9.096320e-01,-0.406737,-1.836970e-16,1.000000,0.415415,0.913545,31,12
4,621e2e8e67b776a24055b564,34.254286,57.330,100.374,15.0,75.0,0.833333,0.700,0.766667,-2.154991,...,-7.071068e-01,0.000000,9.096320e-01,-0.743145,7.071068e-01,1.000000,-0.415415,-0.669131,37,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
263,621e339967b776a240e502de,34.876472,50.683,47.293,11.6,82.0,0.766667,0.875,0.866667,-1.986618,...,3.673940e-16,0.000000,9.096320e-01,-0.866025,-1.000000e+00,1.000000,-0.415415,-0.500000,44,13
264,621e339967b776a240e502de,35.107574,54.325,47.929,12.2,87.0,0.866667,0.950,0.766667,-1.998401,...,3.673940e-16,0.000000,7.557496e-01,-0.587785,-1.000000e+00,1.000000,-0.654861,0.809017,45,11
265,621e339967b776a240e502de,34.957428,52.810,47.801,12.4,81.0,0.700000,0.900,0.866667,-2.028679,...,7.071068e-01,0.000000,2.817326e-01,0.587785,7.071068e-01,1.000000,0.959493,0.809017,44,11
266,621e339967b776a240e502de,34.073346,51.364,51.324,12.2,84.0,0.833333,0.875,0.866667,-2.023243,...,7.071068e-01,0.974928,5.406408e-01,0.587785,7.071068e-01,-0.222521,0.841254,-0.809017,45,13


Fitbit merged stai preprocessing

In [4]:
fitbit_stai = pd.read_pickle('data/fitbit_stai_to_model.pkl')
target = fitbit_stai['stai_stress_category']
fitbit_stai.drop(columns=['stai_stress_category'], inplace=True)
fitbit_stai_processed = preprocessing_functions.post_preprocessing(fitbit_stai, False)  # remove manually outliers line
fitbit_stai_processed['stai_stress_category'] = target
fitbit_stai_processed.to_pickle('./data/fitbit_stai_processed.pkl')
fitbit_stai_processed

Unnamed: 0,id,stai_stress,nightly_temperature,nremhr,rmssd,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,...,Yoga/Pilates,month_sin,weekday_sin,week_sin,day_sin,month_cos,weekday_cos,week_cos,day_cos,stai_stress_category
0,621e2e8e67b776a24055b564,45.0,34.408304,57.481,94.303,14.8,81.0,0.866667,0.725,0.866667,...,0.0,-7.071068e-01,0.000000,-1.133108e-15,0.207912,-7.071068e-01,1.000000,1.000000,0.978148,Below average
1,621e2e8e67b776a24055b564,46.0,34.476865,56.570,109.509,14.4,83.0,0.833333,0.775,0.800000,...,0.0,-1.000000e+00,0.000000,2.817326e-01,0.994522,-1.836970e-16,1.000000,0.959493,0.104528,Average
2,621e2e8e67b776a24055b564,46.0,34.254286,57.330,100.374,15.0,75.0,0.833333,0.700,0.766667,...,0.0,-7.071068e-01,0.000000,9.096320e-01,-0.743145,7.071068e-01,1.000000,-0.415415,-0.669131,Average
3,621e2e8e67b776a24055b564,46.0,34.515655,59.798,98.515,15.4,74.0,0.766667,0.725,0.700000,...,0.0,-7.071068e-01,0.000000,7.557496e-01,-0.743145,7.071068e-01,1.000000,-0.654861,0.669131,Average
4,621e2e8e67b776a24055b564,54.0,34.418045,53.006,107.388,15.6,76.0,0.766667,0.700,0.766667,...,0.0,-1.000000e+00,0.000000,9.096320e-01,-0.406737,-1.836970e-16,1.000000,0.415415,0.913545,Above average
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274,621e339967b776a240e502de,49.0,34.050838,61.043,34.792,14.4,77.0,0.766667,0.750,0.766667,...,0.0,7.071068e-01,0.000000,7.557496e-01,-0.994522,-7.071068e-01,1.000000,0.654861,-0.104528,Average
275,621e339967b776a240e502de,51.0,34.674633,51.192,49.232,12.2,83.0,0.733333,0.925,0.800000,...,0.0,3.673940e-16,0.000000,9.898214e-01,0.951057,-1.000000e+00,1.000000,0.142315,0.309017,Above average
276,621e339967b776a240e502de,45.0,34.303759,46.311,56.468,11.0,87.0,0.833333,0.900,0.900000,...,0.0,3.673940e-16,0.781831,9.898214e-01,0.207912,-1.000000e+00,0.623490,-0.142315,-0.978148,Below average
277,621e339967b776a240e502de,48.0,34.876472,50.683,47.293,11.6,82.0,0.766667,0.875,0.866667,...,0.0,3.673940e-16,0.000000,9.096320e-01,-0.866025,-1.000000e+00,1.000000,-0.415415,-0.500000,Average
