In [41]:
import numpy as np
import pandas as pd
from dataprep.eda import create_report
from sklearn.metrics import auc, roc_curve
from sklearn.ensemble import ExtraTreesClassifier
import seaborn as sns
import pickle
import pycaret
from pycaret.classification import *
from dataprep.eda import create_report
import matplotlib.pyplot as plt
from sklearn.ensemble import ExtraTreesClassifier
from tsfresh import extract_features
from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh import extract_relevant_features
from tsfresh.feature_extraction import MinimalFCParameters, EfficientFCParameters

In [42]:
cmap='seismic'
sns.set_palette(sns.color_palette(cmap))

In [43]:
def train_test_split_per_user(data, train_size=0.7):
    users = list(set(data.id))
    users = sorted(users, reverse=True)  # fix randomness
    total_users = len(users)
    slice = int(train_size * total_users)
    users_train = users[:slice]
    users_test = users[slice:]
    return data[data.id.isin(users_train)], data[data.id.isin(users_test)]

def get_cardio_category(gender, age, vo2max):
    if pd.isna(gender):
        return np.nan
    if gender == "MALE":
        if age == "<30":
            if vo2max >= 51.1:
                return "Superior/Excellent"
            elif vo2max >= 41.7:
                return "Fair/Good"
            else:
                return "Poor"
        else:
            if vo2max >= 48.3:
                return "Superior/Excellent"
            elif vo2max >= 40.5:
                return "Fair/Good"
            else:
                return "Poor"
    else:
        if age == "<30":
            if vo2max >= 43.9:
                return "Superior/Excellent"
            elif vo2max >= 36.1:
                return "Fair/Good"
            else:
                return "Poor"
        else:
            if vo2max >= 42.4:
                return "Superior/Excellent"
            elif vo2max >= 34.4:
                return "Fair/Good"
            else:
                return "Poor"

In [44]:
df=pd.read_pickle('../data/unified_dataframe/dataframe_demographics_preprocessed.pkl')
df = df[df['filteredDemographicVO2Max'].notna()]
df.head()

Unnamed: 0,id,date,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,calories,filteredDemographicVO2Max,distance,bpm,lightly_active_minutes,moderately_active_minutes,very_active_minutes,sedentary_minutes,mindfulness_session,scl_avg,resting_hr,sleep_duration,minutesToFallAsleep,minutesAsleep,minutesAwake,minutesAfterWakeup,sleep_efficiency,sleep_deep_ratio,sleep_wake_ratio,sleep_light_ratio,sleep_rem_ratio,steps,minutes_in_default_zone_1,minutes_below_default_zone_1,minutes_in_default_zone_2,minutes_in_default_zone_3,age,gender,bmi,heart_rate_alert,month_sin,weekday_sin,week_sin,day_sin,month_cos,weekday_cos,week_cos,day_cos,DAILY_FLOORS,DAILY_STEPS,GOAL_BASED_WEIGHT_LOSS,LIFETIME_DISTANCE,LIFETIME_FLOORS,LIFETIME_WEIGHT_GOAL_SETUP,Aerobic Workout,Bike,Bootcamp,Circuit Training,Elliptical,Hike,Interval Workout,Martial Arts,Run,Spinning,Sport,Swim,Treadmill,Walk,Weights,Workout,Yoga/Pilates,wear_day,early_features,used_during_night,different_activity_types,different_badge_types,is_index,iv_index,sri_index,sjl_index,mode_startTime,mode_endTime,mode_startTime_sin,mode_startTime_cos,mode_endTime_sin,mode_endTime_cos,isp_index,steps_is_index,steps_iv_index,steps_sri_index,steps_isp_index,exercise_is_index,exercise_iv_index,exercise_sri_index,stress_quantile,user_stress_quantile,average_sleep_duration,average_steps,is_weekend,is_holiday,startDay_sin,startDay_cos,startWeek_sin,startWeek_cos,startWeekday_sin,startWeekday_cos,startMonth_sin,startMonth_cos,startYear_sin,startYear_cos,endDay_sin,endDay_cos,endWeek_sin,endWeek_cos,endWeekday_sin,endWeekday_cos,endMonth_sin,endMonth_cos,endYear_sin,endYear_cos,startHour_sin,startHour_cos,endHour_sin,endHour_cos,Steps_hour0,Steps_hour1,Steps_hour2,Steps_hour3,Steps_hour4,Steps_hour5,Steps_hour6,Steps_hour7,Steps_hour8,Steps_hour9,Steps_hour10,Steps_hour11,Steps_hour12,Steps_hour13,Steps_hour14,Steps_hour15,Steps_hour16,Steps_hour17,Steps_hour18,Steps_hour19,Steps_hour20,Steps_hour21,Steps_hour22,Steps_hour23
0,621e2e8e67b776a24055b564,2021-05-24,34.137687,57.432,89.603,95.9,14.8,78.0,0.833333,0.675,0.866667,-1.788325,2351.59,62.7921,6517.5,71.701565,149.0,24.0,33.0,713.0,0.0,5.340968,62.07307,31260000.0,0.0,445.0,76.0,0.0,93.0,1.243243,0.987013,0.921642,1.341772,8833.0,83.0,1349.0,0.0,0.0,0.0,0.0,Underweight,0.0,-0.866025,0.0,0.309017,-0.988468,0.5,1.0,0.951057,0.151428,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.892308,1,4,0.930443,0.343581,90.49513,1.091284,23.0,9.0,0.382683,-0.92388,-0.382683,-0.92388,0.945035,0.287117,1.655374,89.963574,0.381108,0.224056,2.003963,77.127735,2,1,32604000.0,9219.830769,0.0,0.0,0.073172,0.997319,-0.281733,0.959493,0.0,1.0,0.015445,0.999881,-0.027853,0.999612,0.073172,0.997319,-0.0,1.0,0.0,1.0,0.015445,0.999881,-0.027853,0.999612,0.0,1.0,0.027554,0.99962,134.0,0.0,0.0,15.0,0.0,0.0,39.0,0.0,7.0,10.0,2626.0,992.0,429.0,29.0,191.0,33.0,342.0,1712.0,1838.0,160.0,155.0,37.0,31.0,53.0
1,621e2e8e67b776a24055b564,2021-05-25,33.794544,57.681,94.303,95.9,15.8,80.0,0.833333,0.725,0.866667,-2.462709,2332.08,62.67912,7178.6,70.5793,132.0,25.0,31.0,704.0,0.0,5.340968,62.121476,32880000.0,0.0,460.0,88.0,0.0,94.0,1.466667,1.142857,0.947566,1.197531,9727.0,56.0,1374.0,4.0,0.0,0.0,0.0,Underweight,0.0,-0.866025,0.781831,0.309017,-0.937752,0.5,0.62349,0.951057,0.347305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.892308,1,4,0.930443,0.343581,90.49513,1.091284,23.0,9.0,0.382683,-0.92388,-0.382683,-0.92388,0.945035,0.287117,1.655374,89.963574,0.381108,0.224056,2.003963,77.127735,2,2,32604000.0,9219.830769,0.0,0.0,0.073172,0.997319,-0.281733,0.959493,0.0,1.0,0.015445,0.999881,-0.027853,0.999612,0.076215,0.997091,-0.0,1.0,0.003088,0.999995,0.015445,0.999881,-0.027853,0.999612,0.070367,0.997521,0.024493,0.9997,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,2703.0,735.0,76.0,1010.0,544.0,120.0,24.0,446.0,377.0,3165.0,332.0,97.0,9.0,24.0,58.0
2,621e2e8e67b776a24055b564,2021-05-26,34.611011,57.481,119.212,95.9,14.6,84.0,0.966667,0.725,0.866667,-2.385801,2262.3,62.57307,6090.9,71.842573,112.0,27.0,31.0,710.0,0.0,5.340968,62.263999,33600000.0,0.0,493.0,67.0,0.0,96.0,1.116883,0.858974,1.015038,1.670732,8253.0,85.0,1350.0,0.0,0.0,0.0,0.0,Underweight,0.0,-0.866025,0.974928,0.309017,-0.848644,0.5,-0.222521,0.951057,0.528964,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.892308,1,4,0.930443,0.343581,90.49513,1.091284,23.0,9.0,0.382683,-0.92388,-0.382683,-0.92388,0.945035,0.287117,1.655374,89.963574,0.381108,0.224056,2.003963,77.127735,2,2,32604000.0,9219.830769,0.0,0.0,0.076215,0.997091,-0.281733,0.959493,0.003088,0.999995,0.015445,0.999881,-0.027853,0.999612,0.079258,0.996854,-0.0,1.0,0.006175,0.999981,0.015445,0.999881,-0.027853,0.999612,0.070367,0.997521,0.027554,0.99962,0.0,0.0,0.0,0.0,7.0,8.0,0.0,0.0,11.0,2013.0,1280.0,64.0,150.0,286.0,85.0,477.0,390.0,2821.0,293.0,158.0,66.0,0.0,70.0,74.0
3,621e2e8e67b776a24055b564,2021-05-27,34.408304,57.493,111.709,95.9,14.8,82.0,0.933333,0.725,0.833333,-2.124199,2325.1,62.47493,6653.1,71.725477,133.0,21.0,37.0,622.0,0.0,5.340968,62.3689,37620000.0,0.0,540.0,87.0,0.0,93.0,1.128205,1.12987,1.191729,1.588235,9015.0,90.0,1282.0,0.0,0.0,0.0,0.0,Underweight,0.0,-0.866025,0.433884,0.309017,-0.724793,0.5,-0.900969,0.951057,0.688967,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.892308,1,4,0.930443,0.343581,90.49513,1.091284,23.0,9.0,0.382683,-0.92388,-0.382683,-0.92388,0.945035,0.287117,1.655374,89.963574,0.381108,0.224056,2.003963,77.127735,2,2,32604000.0,9219.830769,0.0,0.0,0.079258,0.996854,-0.281733,0.959493,0.006175,0.999981,0.015445,0.999881,-0.027853,0.999612,0.082299,0.996608,-0.0,1.0,0.009263,0.999957,0.015445,0.999881,-0.027853,0.999612,0.070367,0.997521,0.027554,0.99962,0.0,0.0,0.0,7.0,8.0,0.0,7.0,8.0,0.0,245.0,3396.0,11.0,309.0,113.0,58.0,296.0,401.0,21.0,3054.0,503.0,411.0,96.0,63.0,8.0
4,621e2e8e67b776a24055b564,2021-05-28,34.178922,56.75,103.034,95.9,15.2,81.0,0.866667,0.725,0.866667,-2.396873,2586.76,62.41166,9557.9,74.401028,136.0,42.0,54.0,647.0,0.0,5.340968,61.965409,33660000.0,0.0,493.0,68.0,0.0,94.0,0.910256,0.871795,1.211896,1.090909,12949.0,146.0,1274.0,4.0,0.0,0.0,0.0,Underweight,0.0,-0.866025,-0.433884,0.309017,-0.571268,0.5,-0.900969,0.951057,0.820763,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.892308,1,4,0.930443,0.343581,90.49513,1.091284,23.0,9.0,0.382683,-0.92388,-0.382683,-0.92388,0.945035,0.287117,1.655374,89.963574,0.381108,0.224056,2.003963,77.127735,2,2,32604000.0,9219.830769,0.0,0.0,0.082299,0.996608,-0.281733,0.959493,0.009263,0.999957,0.015445,0.999881,-0.027853,0.999612,0.08534,0.996352,-0.0,1.0,0.01235,0.999924,0.015445,0.999881,-0.027853,0.999612,0.070367,0.997521,0.024493,0.9997,0.0,0.0,0.0,13.0,0.0,0.0,0.0,0.0,14.0,306.0,3046.0,24.0,1228.0,1221.0,250.0,82.0,1363.0,3014.0,81.0,104.0,1984.0,18.0,141.0,60.0


In [45]:
df['fitness_level'] = df.apply(lambda row: get_cardio_category(row["gender"], row['age'], row["filteredDemographicVO2Max"]), axis=1)
df.fitness_level.astype(str)
df.head()

Unnamed: 0,id,date,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,responsiveness_points_percentage,daily_temperature_variation,calories,filteredDemographicVO2Max,distance,bpm,lightly_active_minutes,moderately_active_minutes,very_active_minutes,sedentary_minutes,mindfulness_session,scl_avg,resting_hr,sleep_duration,minutesToFallAsleep,minutesAsleep,minutesAwake,minutesAfterWakeup,sleep_efficiency,sleep_deep_ratio,sleep_wake_ratio,sleep_light_ratio,sleep_rem_ratio,steps,minutes_in_default_zone_1,minutes_below_default_zone_1,minutes_in_default_zone_2,minutes_in_default_zone_3,age,gender,bmi,heart_rate_alert,month_sin,weekday_sin,week_sin,day_sin,month_cos,weekday_cos,week_cos,day_cos,DAILY_FLOORS,DAILY_STEPS,GOAL_BASED_WEIGHT_LOSS,LIFETIME_DISTANCE,LIFETIME_FLOORS,LIFETIME_WEIGHT_GOAL_SETUP,Aerobic Workout,Bike,Bootcamp,Circuit Training,Elliptical,Hike,Interval Workout,Martial Arts,Run,Spinning,Sport,Swim,Treadmill,Walk,Weights,Workout,Yoga/Pilates,wear_day,early_features,used_during_night,different_activity_types,different_badge_types,is_index,iv_index,sri_index,sjl_index,mode_startTime,mode_endTime,mode_startTime_sin,mode_startTime_cos,mode_endTime_sin,mode_endTime_cos,isp_index,steps_is_index,steps_iv_index,steps_sri_index,steps_isp_index,exercise_is_index,exercise_iv_index,exercise_sri_index,stress_quantile,user_stress_quantile,average_sleep_duration,average_steps,is_weekend,is_holiday,startDay_sin,startDay_cos,startWeek_sin,startWeek_cos,startWeekday_sin,startWeekday_cos,startMonth_sin,startMonth_cos,startYear_sin,startYear_cos,endDay_sin,endDay_cos,endWeek_sin,endWeek_cos,endWeekday_sin,endWeekday_cos,endMonth_sin,endMonth_cos,endYear_sin,endYear_cos,startHour_sin,startHour_cos,endHour_sin,endHour_cos,Steps_hour0,Steps_hour1,Steps_hour2,Steps_hour3,Steps_hour4,Steps_hour5,Steps_hour6,Steps_hour7,Steps_hour8,Steps_hour9,Steps_hour10,Steps_hour11,Steps_hour12,Steps_hour13,Steps_hour14,Steps_hour15,Steps_hour16,Steps_hour17,Steps_hour18,Steps_hour19,Steps_hour20,Steps_hour21,Steps_hour22,Steps_hour23,fitness_level
0,621e2e8e67b776a24055b564,2021-05-24,34.137687,57.432,89.603,95.9,14.8,78.0,0.833333,0.675,0.866667,-1.788325,2351.59,62.7921,6517.5,71.701565,149.0,24.0,33.0,713.0,0.0,5.340968,62.07307,31260000.0,0.0,445.0,76.0,0.0,93.0,1.243243,0.987013,0.921642,1.341772,8833.0,83.0,1349.0,0.0,0.0,0.0,0.0,Underweight,0.0,-0.866025,0.0,0.309017,-0.988468,0.5,1.0,0.951057,0.151428,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.892308,1,4,0.930443,0.343581,90.49513,1.091284,23.0,9.0,0.382683,-0.92388,-0.382683,-0.92388,0.945035,0.287117,1.655374,89.963574,0.381108,0.224056,2.003963,77.127735,2,1,32604000.0,9219.830769,0.0,0.0,0.073172,0.997319,-0.281733,0.959493,0.0,1.0,0.015445,0.999881,-0.027853,0.999612,0.073172,0.997319,-0.0,1.0,0.0,1.0,0.015445,0.999881,-0.027853,0.999612,0.0,1.0,0.027554,0.99962,134.0,0.0,0.0,15.0,0.0,0.0,39.0,0.0,7.0,10.0,2626.0,992.0,429.0,29.0,191.0,33.0,342.0,1712.0,1838.0,160.0,155.0,37.0,31.0,53.0,Superior/Excellent
1,621e2e8e67b776a24055b564,2021-05-25,33.794544,57.681,94.303,95.9,15.8,80.0,0.833333,0.725,0.866667,-2.462709,2332.08,62.67912,7178.6,70.5793,132.0,25.0,31.0,704.0,0.0,5.340968,62.121476,32880000.0,0.0,460.0,88.0,0.0,94.0,1.466667,1.142857,0.947566,1.197531,9727.0,56.0,1374.0,4.0,0.0,0.0,0.0,Underweight,0.0,-0.866025,0.781831,0.309017,-0.937752,0.5,0.62349,0.951057,0.347305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.892308,1,4,0.930443,0.343581,90.49513,1.091284,23.0,9.0,0.382683,-0.92388,-0.382683,-0.92388,0.945035,0.287117,1.655374,89.963574,0.381108,0.224056,2.003963,77.127735,2,2,32604000.0,9219.830769,0.0,0.0,0.073172,0.997319,-0.281733,0.959493,0.0,1.0,0.015445,0.999881,-0.027853,0.999612,0.076215,0.997091,-0.0,1.0,0.003088,0.999995,0.015445,0.999881,-0.027853,0.999612,0.070367,0.997521,0.024493,0.9997,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,2703.0,735.0,76.0,1010.0,544.0,120.0,24.0,446.0,377.0,3165.0,332.0,97.0,9.0,24.0,58.0,Superior/Excellent
2,621e2e8e67b776a24055b564,2021-05-26,34.611011,57.481,119.212,95.9,14.6,84.0,0.966667,0.725,0.866667,-2.385801,2262.3,62.57307,6090.9,71.842573,112.0,27.0,31.0,710.0,0.0,5.340968,62.263999,33600000.0,0.0,493.0,67.0,0.0,96.0,1.116883,0.858974,1.015038,1.670732,8253.0,85.0,1350.0,0.0,0.0,0.0,0.0,Underweight,0.0,-0.866025,0.974928,0.309017,-0.848644,0.5,-0.222521,0.951057,0.528964,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.892308,1,4,0.930443,0.343581,90.49513,1.091284,23.0,9.0,0.382683,-0.92388,-0.382683,-0.92388,0.945035,0.287117,1.655374,89.963574,0.381108,0.224056,2.003963,77.127735,2,2,32604000.0,9219.830769,0.0,0.0,0.076215,0.997091,-0.281733,0.959493,0.003088,0.999995,0.015445,0.999881,-0.027853,0.999612,0.079258,0.996854,-0.0,1.0,0.006175,0.999981,0.015445,0.999881,-0.027853,0.999612,0.070367,0.997521,0.027554,0.99962,0.0,0.0,0.0,0.0,7.0,8.0,0.0,0.0,11.0,2013.0,1280.0,64.0,150.0,286.0,85.0,477.0,390.0,2821.0,293.0,158.0,66.0,0.0,70.0,74.0,Superior/Excellent
3,621e2e8e67b776a24055b564,2021-05-27,34.408304,57.493,111.709,95.9,14.8,82.0,0.933333,0.725,0.833333,-2.124199,2325.1,62.47493,6653.1,71.725477,133.0,21.0,37.0,622.0,0.0,5.340968,62.3689,37620000.0,0.0,540.0,87.0,0.0,93.0,1.128205,1.12987,1.191729,1.588235,9015.0,90.0,1282.0,0.0,0.0,0.0,0.0,Underweight,0.0,-0.866025,0.433884,0.309017,-0.724793,0.5,-0.900969,0.951057,0.688967,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.892308,1,4,0.930443,0.343581,90.49513,1.091284,23.0,9.0,0.382683,-0.92388,-0.382683,-0.92388,0.945035,0.287117,1.655374,89.963574,0.381108,0.224056,2.003963,77.127735,2,2,32604000.0,9219.830769,0.0,0.0,0.079258,0.996854,-0.281733,0.959493,0.006175,0.999981,0.015445,0.999881,-0.027853,0.999612,0.082299,0.996608,-0.0,1.0,0.009263,0.999957,0.015445,0.999881,-0.027853,0.999612,0.070367,0.997521,0.027554,0.99962,0.0,0.0,0.0,7.0,8.0,0.0,7.0,8.0,0.0,245.0,3396.0,11.0,309.0,113.0,58.0,296.0,401.0,21.0,3054.0,503.0,411.0,96.0,63.0,8.0,Superior/Excellent
4,621e2e8e67b776a24055b564,2021-05-28,34.178922,56.75,103.034,95.9,15.2,81.0,0.866667,0.725,0.866667,-2.396873,2586.76,62.41166,9557.9,74.401028,136.0,42.0,54.0,647.0,0.0,5.340968,61.965409,33660000.0,0.0,493.0,68.0,0.0,94.0,0.910256,0.871795,1.211896,1.090909,12949.0,146.0,1274.0,4.0,0.0,0.0,0.0,Underweight,0.0,-0.866025,-0.433884,0.309017,-0.571268,0.5,-0.900969,0.951057,0.820763,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,0,0.892308,1,4,0.930443,0.343581,90.49513,1.091284,23.0,9.0,0.382683,-0.92388,-0.382683,-0.92388,0.945035,0.287117,1.655374,89.963574,0.381108,0.224056,2.003963,77.127735,2,2,32604000.0,9219.830769,0.0,0.0,0.082299,0.996608,-0.281733,0.959493,0.009263,0.999957,0.015445,0.999881,-0.027853,0.999612,0.08534,0.996352,-0.0,1.0,0.01235,0.999924,0.015445,0.999881,-0.027853,0.999612,0.070367,0.997521,0.024493,0.9997,0.0,0.0,0.0,13.0,0.0,0.0,0.0,0.0,14.0,306.0,3046.0,24.0,1228.0,1221.0,250.0,82.0,1363.0,3014.0,81.0,104.0,1984.0,18.0,141.0,60.0,Superior/Excellent


In [46]:
fitness_level = df.drop(columns=['date','age', 'gender', 'bmi','filteredDemographicVO2Max'])

In [47]:
fitness_level['fitness_level'].value_counts()

Superior/Excellent    2676
Fair/Good              912
Poor                    95
Name: fitness_level, dtype: int64

In [48]:
fitness_level['fitness_level'].replace(to_replace=['Superior/Excellent', 'Fair/Good', 'Poor'], value=[2, 1, 0], inplace=True)

In [49]:
print(fitness_level['fitness_level'].isna().sum())
fitness_level = fitness_level[fitness_level['fitness_level'].notna()]
print(fitness_level['fitness_level'].isna().sum())

193
0


In [50]:
drop_columns =  fitness_level.columns[fitness_level.isna().any()].tolist()
fitness_level.drop(columns=drop_columns, inplace=True)

In [51]:
train_data, test_data = train_test_split_per_user(fitness_level)
fold_groups = train_data.id
train_data = train_data.drop(columns=['id'])
test_data = test_data.drop(columns=['id'])

In [52]:
et = ExtraTreesClassifier(n_estimators=100, random_state=0)

In [53]:
x_train = train_data.drop(columns=['fitness_level'])
y_train = train_data['fitness_level']
x_test = test_data.drop(columns=['fitness_level'])
y_test = test_data['fitness_level']

In [54]:
et = et.fit(x_train, y_train)

In [55]:
y_pred = et.predict(x_test)

In [56]:
fpr, tpr, thresholds = roc_curve(y_test, y_pred, pos_label=2)
auc(fpr, tpr)

0.4868212819202369

In [63]:
df=pd.read_pickle('../data/unified_dataframe/dataframe_demographics_preprocessed.pkl')



df['fitness_level'] = df.apply(lambda row: get_cardio_category(row["gender"], row['age'], row["filteredDemographicVO2Max"]), axis=1)
df.fitness_level.astype(str)
df.head()

fitness_level = df.drop(columns=['date', 'age','gender','bmi','filteredDemographicVO2Max'])

fitness_level = fitness_level[fitness_level['fitness_level'].notna()]
y = fitness_level['fitness_level']

In [64]:
cols = ['stress_score', 'steps', 'sleep_duration']

for col in cols:
    extracted_features = extract_features(df, column_id="id", column_sort="date", 
                                          n_jobs=5, column_kind=None, column_value=col, 
                                          show_warnings=False)
    extracted_features = extracted_features.reset_index()
    extracted_features = extracted_features.rename(columns={"index": "id"})
    fitness_level = fitness_level.merge(extracted_features, how='left', on=['id'])

Feature Extraction: 100%|██████████████████████████████████████████████████████████████| 24/24 [00:03<00:00,  6.53it/s]
Feature Extraction: 100%|██████████████████████████████████████████████████████████████| 24/24 [00:03<00:00,  6.58it/s]
Feature Extraction: 100%|██████████████████████████████████████████████████████████████| 24/24 [00:03<00:00,  6.68it/s]


In [65]:
train_data, test_data = train_test_split_per_user(fitness_level)

fold_groups = train_data.id
train_data = train_data.drop(columns=['id'])
test_data = test_data.drop(columns=['id'])

In [66]:
s = setup(data=train_data, target='fitness_level', silent=True, session_id=123, 
          fold_strategy='groupkfold', fold=3,  fold_groups=fold_groups, test_data=test_data, 
          normalize=True, normalize_method='minmax', fix_imbalance=True, remove_multicollinearity = True,
          multicollinearity_threshold = 0.6)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,fitness_level
2,Target Type,Multiclass
3,Label Encoded,"Fair/Good: 0, Poor: 1, Superior/Excellent: 2"
4,Original Data,"(3234, 2512)"
5,Missing Values,True
6,Numeric Features,1685
7,Categorical Features,826
8,Ordinal Features,False
9,High Cardinality Features,False


create_model_container: 0
master_model_container: 0
display_container: 1
Pipeline(memory=None,
         steps=[('dtypes',
                 DataTypes_Auto_infer(categorical_features=[],
                                      display_types=False, features_todrop=[],
                                      id_columns=[],
                                      ml_usecase='classification',
                                      numerical_features=[],
                                      target='fitness_level',
                                      time_features=[])),
                ('imputer',
                 Simple_Imputer(categorical_strategy='not_available',
                                fill_value_categorical=None,
                                fill_value_numerical=None,
                                numer...
                ('dummy', Dummify(target='fitness_level')),
                ('fix_perfect', Remove_100(target='fitness_level')),
                ('clean_names', Clean_Colum_Nam

In [67]:
best = compare_models(sort = 'auc')

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.6984,0.7568,0.5844,0.675,0.6432,0.4615,0.5059,0.4233
rf,Random Forest Classifier,0.6834,0.7492,0.5667,0.6168,0.6366,0.4362,0.4785,0.3933
gbc,Gradient Boosting Classifier,0.5735,0.7426,0.524,0.62,0.5794,0.3084,0.3378,7.5933
lightgbm,Light Gradient Boosting Machine,0.6321,0.732,0.5485,0.6541,0.6282,0.373,0.3921,1.86
lr,Logistic Regression,0.6046,0.7198,0.5984,0.6431,0.5719,0.356,0.3897,3.18
lda,Linear Discriminant Analysis,0.5626,0.7022,0.5935,0.5965,0.5529,0.3002,0.3274,0.4133
nb,Naive Bayes,0.5542,0.6516,0.52,0.6322,0.5634,0.2871,0.3025,0.91
dt,Decision Tree Classifier,0.4847,0.6038,0.4621,0.5421,0.4794,0.2058,0.2255,0.3
knn,K Neighbors Classifier,0.3234,0.6011,0.4506,0.5923,0.2735,0.0966,0.1444,2.2733
ada,Ada Boost Classifier,0.3568,0.5501,0.4038,0.4178,0.3063,0.0919,0.0974,0.98


create_model_container: 14
master_model_container: 14
display_container: 2
ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, max_features='auto',
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,
                     oob_score=False, random_state=123, verbose=0,
                     warm_start=False)
compare_models() succesfully completed......................................


In [68]:
lr = create_model('lr', fold=3)
tuned_lr = tune_model(lr)
d=pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(tuned_lr.coef_[0])}).sort_values(by='Value', ascending=False)
d.reset_index(inplace=True)
d=d.head(15)
d = d.drop(columns='index')
d['Label'] = 'fitness'
d = d[['Label', 'Feature', 'Value']]
d.to_pickle('../data/feature_importances/fitness1.pkl')
d

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.735,0.8551,0.6637,0.8384,0.7762,0.4987,0.5205
1,0.4903,0.6093,0.4967,0.41,0.4234,0.2057,0.2255
2,0.6225,0.6932,0.5742,0.7279,0.5367,0.3767,0.4271
Mean,0.6159,0.7192,0.5782,0.6588,0.5788,0.3604,0.391
Std,0.1,0.102,0.0682,0.1816,0.1471,0.1202,0.1231


create_model_container: 16
master_model_container: 16
display_container: 4
LogisticRegression(C=0.049, class_weight='balanced', dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=1000, multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=123, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)
tune_model() succesfully completed......................................
Initializing get_config()
get_config(variable=X_train)
Global variable: X_train returned as       nightly_temperature    nremhr     rmssd      spo2  \
1595             0.748920  0.681253  0.316019  0.517648   
1596             0.748920  0.681253  0.316019  0.517648   
1597             0.748920  0.681253  0.316019  0.517648   
1598             0.748920  0.681253  0.316019  0.517648   
1599             0.748920  0.681253  0.316019  0.517648   
...                   ...       ...       ...       ...   
4824         

Unnamed: 0,Label,Feature,Value
0,fitness,minutes_below_default_zone_1,1.186083
1,fitness,lightly_active_minutes,1.015977
2,fitness,endYear_sin_not_available,0.750949
3,fitness,month_sin,0.637995
4,fitness,sleep_duration__fft_coefficient__attr_angle__c...,0.499821
5,fitness,mode_endTime_9.0,0.449147
6,fitness,different_activity_types_4,0.448319
7,fitness,week_sin,0.437924
8,fitness,wear_day_1,0.394778
9,fitness,sleep_duration__large_standard_deviation__r_0....,0.372068


In [69]:
lr = create_model('lr', fold=3)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7414,0.8718,0.7432,0.8639,0.7832,0.5317,0.5666
1,0.47,0.6168,0.4936,0.4716,0.4206,0.1908,0.2114
2,0.6025,0.6707,0.5584,0.5937,0.5118,0.3456,0.391
Mean,0.6046,0.7198,0.5984,0.6431,0.5719,0.356,0.3897
Std,0.1108,0.1097,0.1058,0.1639,0.154,0.1394,0.145


create_model_container: 17
master_model_container: 17
display_container: 5
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=123, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)
create_model() succesfully completed......................................


In [70]:
tuned_lr = tune_model(lr)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.735,0.8551,0.6637,0.8384,0.7762,0.4987,0.5205
1,0.4903,0.6093,0.4967,0.41,0.4234,0.2057,0.2255
2,0.6225,0.6932,0.5742,0.7279,0.5367,0.3767,0.4271
Mean,0.6159,0.7192,0.5782,0.6588,0.5788,0.3604,0.391
Std,0.1,0.102,0.0682,0.1816,0.1471,0.1202,0.1231


create_model_container: 18
master_model_container: 18
display_container: 6
LogisticRegression(C=0.049, class_weight='balanced', dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=1000, multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=123, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)
tune_model() succesfully completed......................................


In [71]:
d=pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(tuned_lr.coef_[0])}).sort_values(by='Value', ascending=False)

Initializing get_config()
get_config(variable=X_train)
Global variable: X_train returned as       nightly_temperature    nremhr     rmssd      spo2  \
1595             0.748920  0.681253  0.316019  0.517648   
1596             0.748920  0.681253  0.316019  0.517648   
1597             0.748920  0.681253  0.316019  0.517648   
1598             0.748920  0.681253  0.316019  0.517648   
1599             0.748920  0.681253  0.316019  0.517648   
...                   ...       ...       ...       ...   
4824             0.748920  0.681253  0.316019  0.517648   
4825             0.753996  0.604888  0.283322  0.517648   
4826             0.724200  0.757760  0.178445  0.517648   
4827             0.763270  0.717682  0.210490  0.517648   
4828             0.743278  0.712988  0.210453  0.517648   

      full_sleep_breathing_rate  sleep_points_percentage  \
1595                   0.621849                 0.733333   
1596                   0.621849                 0.733333   
1597               

In [72]:
d.reset_index(inplace=True)
d=d.head(20)
d = d.drop(columns='index')
d['Label'] = 'fitness'
d = d[['Label', 'Feature', 'Value']]
d.to_pickle('../data/feature_importances/fitness.pkl')
d

Unnamed: 0,Label,Feature,Value
0,fitness,minutes_below_default_zone_1,1.186083
1,fitness,lightly_active_minutes,1.015977
2,fitness,endYear_sin_not_available,0.750949
3,fitness,month_sin,0.637995
4,fitness,sleep_duration__fft_coefficient__attr_angle__c...,0.499821
5,fitness,mode_endTime_9.0,0.449147
6,fitness,different_activity_types_4,0.448319
7,fitness,week_sin,0.437924
8,fitness,wear_day_1,0.394778
9,fitness,sleep_duration__large_standard_deviation__r_0....,0.372068
