## Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from lazypredict.Supervised import LazyRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import all_estimators
from sklearn.base import RegressorMixin

## Define the files to be loaded

In [2]:
# Participant number whose data should be loaded
participant_number = 3

# The version of the questionnaires to load
quest_version = 1

## Loading the pre-processed data

In [3]:
df = pd.read_csv('data/preprocessed/preprocessed_data_' + str(participant_number) + '_v' + str(quest_version) + '.csv')
df = df.drop(labels=['actual_day', 'actual_day.1'], axis=1)

df_oura = pd.read_csv('data/preprocessed/preprocessed_sleep_' + str(participant_number) + '_v' + str(quest_version) + '.csv')
df_oura = df_oura.drop(labels=['actual_day'], axis=1)

df_quest = pd.read_csv('data/preprocessed/preprocessed_questionnaires_' + str(participant_number) + '_v' + str(quest_version) + '.csv')
df_quest = df_quest.drop(labels=['actual_day'], axis=1)

In [4]:
# Remove these to try and fix the issues raised by LazyRegressor for them 
# example: 
# AdaBoostRegressor model failed to execute
# Found unknown categories ['[0.0, 51.0, 53.0, 53.0, 54.0, 53.0, ...
df = df.drop(labels=['hrv.items', 'heart_rate.items', 'sleep_phase_5_min', 'movement_30_sec'], axis=1)
df_oura = df_oura.drop(labels=['hrv.items', 'heart_rate.items', 'sleep_phase_5_min', 'movement_30_sec'], axis=1)

In [5]:
print(df.columns.tolist())

['0', 'average_breath', 'average_breath_variation', 'average_heart_rate', 'average_hrv', 'awake_time', 'bedtime_end_delta', 'bedtime_end_seconds', 'bedtime_start_delta', 'bedtime_start_seconds', 'contributors.deep_sleep', 'contributors.efficiency', 'contributors.latency', 'contributors.rem_sleep', 'contributors.restfulness', 'contributors.timing', 'contributors.total_sleep', 'deep_sleep_duration', 'efficiency', 'got_ups', 'latency', 'light_sleep_duration', 'lowest_heart_rate', 'lowest_heart_rate_time_offset', 'period', 'readiness.contributors.activity_balance', 'readiness.contributors.body_temperature', 'readiness.contributors.hrv_balance', 'readiness.contributors.previous_day_activity', 'readiness.contributors.previous_night', 'readiness.contributors.recovery_index', 'readiness.contributors.resting_heart_rate', 'readiness.contributors.sleep_balance', 'readiness.score', 'readiness.temperature_deviation', 'readiness.temperature_trend_deviation', 'rem_sleep_duration', 'restless', 'restle

## Prepare regressor list for lazypredict

In [6]:
removed_regressors = [
    "TheilSenRegressor",
    "ARDRegression", 
    "CCA", 
    "IsotonicRegression", 
    "StackingRegressor",
    "MultiOutputRegressor", 
    "MultiTaskElasticNet", 
    "MultiTaskElasticNetCV", 
    "MultiTaskLasso", 
    "MultiTaskLassoCV", 
    "PLSCanonical", 
    "PLSRegression", 
    "RadiusNeighborsRegressor", 
    "RegressorChain", 
    "VotingRegressor", 
]

In [7]:
REGRESSORS = [
    est
    for est in all_estimators()
    if (issubclass(est[1], RegressorMixin) and (est[0] not in removed_regressors))
]

## Run LazyRegressor on datasets

In [8]:
def lazy_regressor(X, y, test_size, random_state, regressor):
    # Create the test and train sets for the given prediction
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state = random_state)
    
    models_hrv, predictions_hrv = regressor.fit(X_train, X_test, y_train, y_test)
    return models_hrv

In [9]:
# Apply the LazyRegressor on the data
reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None, regressors = REGRESSORS)

### LazyRegressor for the factors influencing the sleep scores given all the variables

In [10]:
# Separate the indenpendent variables from the dependent variables
y_oura = df['score']
y_sub = df['subjective_sleep_score']
X = df.drop(labels=['score', 'subjective_sleep_score'], axis=1)

#### Oura sleep scores

In [11]:
models_oura = lazy_regressor(X, y_oura, test_size = 0.2, random_state = 2, regressor = reg)

print(models_oura)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


100%|██████████| 40/40 [00:01<00:00, 24.74it/s]

                               Adjusted R-Squared   R-Squared    RMSE  \
Model                                                                   
Lars                                   2797661.13 -5595319.26 2185.42   
GaussianProcessRegressor                    37.32      -71.65    7.87   
MLPRegressor                                 2.38       -1.76    1.54   
QuantileRegressor                            1.52       -0.04    0.94   
LassoLars                                    1.50       -0.00    0.92   
DummyRegressor                               1.50       -0.00    0.92   
Lasso                                        1.50       -0.00    0.92   
LarsCV                                       1.50       -0.00    0.92   
ElasticNet                                   1.31        0.38    0.73   
BaggingRegressor                             1.26        0.48    0.67   
AdaBoostRegressor                            1.26        0.48    0.67   
DecisionTreeRegressor                        1.25  




#### Subjective sleep scores

In [12]:
models_sub = lazy_regressor(X, y_sub, test_size = 0.2, random_state = 6, regressor = reg)

print(models_sub)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


100%|██████████| 40/40 [00:02<00:00, 15.44it/s]

                               Adjusted R-Squared         R-Squared  \
Model                                                                 
Lars                             2393773643791.91 -4787547287580.83   
GaussianProcessRegressor                    31.62            -60.23   
MLPRegressor                                 3.04             -3.07   
RANSACRegressor                              1.92             -0.84   
TransformedTargetRegressor                   1.80             -0.60   
LinearRegression                             1.80             -0.60   
HuberRegressor                               1.72             -0.43   
DecisionTreeRegressor                        1.71             -0.41   
PassiveAggressiveRegressor                   1.68             -0.36   
LinearSVR                                    1.65             -0.30   
ExtraTreeRegressor                           1.60             -0.19   
Lasso                                        1.54             -0.08   
DummyR




### LazyRegressor for the factors influencing the sleep scores given the questionnaire variables only

In [13]:
# Separate the indenpendent variables from the dependent variables
X = df_quest.drop(labels=['subjective_sleep_score'], axis=1)

#### Oura sleep score

In [14]:
models_oura = lazy_regressor(X, y_oura, test_size = 0.2, random_state = 2, regressor = reg)

print(models_oura)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


100%|██████████| 40/40 [00:00<00:00, 41.34it/s]

                                         Adjusted R-Squared  \
Model                                                         
TransformedTargetRegressor    25746711327061882567655424.00   
LinearRegression              25746711327061882567655424.00   
KernelRidge                                          373.13   
GaussianProcessRegressor                             188.71   
RANSACRegressor                                       14.87   
PassiveAggressiveRegressor                             8.31   
MLPRegressor                                           8.11   
DecisionTreeRegressor                                  7.82   
QuantileRegressor                                      6.39   
LassoLars                                              6.21   
DummyRegressor                                         6.21   
ElasticNet                                             6.21   
Lasso                                                  6.21   
BaggingRegressor                                       




#### Subjective sleep scores

In [15]:
models_sub = lazy_regressor(X, y_sub, test_size = 0.2, random_state = 6, regressor = reg)

print(models_sub)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


100%|██████████| 40/40 [00:00<00:00, 41.57it/s]

                                          Adjusted R-Squared  \
Model                                                          
RANSACRegressor               960809848391123624492269568.00   
KernelRidge                                           328.53   
GaussianProcessRegressor                              206.18   
MLPRegressor                                           13.63   
PassiveAggressiveRegressor                             12.24   
ExtraTreeRegressor                                      9.03   
DecisionTreeRegressor                                   8.80   
Lasso                                                   6.62   
LassoLars                                               6.62   
ElasticNet                                              6.62   
DummyRegressor                                          6.62   
QuantileRegressor                                       6.28   
HistGradientBoostingRegressor                           5.88   
ExtraTreesRegressor                     




### LazyRegressor for the factors influencing the sleep scores given the Oura variables only

In [16]:
# Separate the indenpendent variables from the dependent variables
X = df_oura.drop(labels=['score'], axis=1)

#### Oura sleep score

In [17]:
models_oura = lazy_regressor(X, y_oura, test_size = 0.2, random_state = 2, regressor = reg)

print(models_oura)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


100%|██████████| 40/40 [00:01<00:00, 27.50it/s]

                               Adjusted R-Squared  R-Squared  RMSE  Time Taken
Model                                                                         
GaussianProcessRegressor                    83.00     -71.54  7.87        0.01
MLPRegressor                                 4.12      -1.76  1.53        0.11
QuantileRegressor                            2.17      -0.04  0.94        0.06
LassoLars                                    2.13      -0.00  0.92        0.01
DummyRegressor                               2.13      -0.00  0.92        0.01
Lasso                                        2.13      -0.00  0.92        0.01
ElasticNet                                   1.70       0.38  0.73        0.01
DecisionTreeRegressor                        1.59       0.48  0.67        0.01
BaggingRegressor                             1.59       0.48  0.67        0.02
RandomForestRegressor                        1.56       0.50  0.65        0.15
AdaBoostRegressor                            1.55   




#### Subjective sleep scores

In [18]:
models_sub = lazy_regressor(X, y_sub, test_size = 0.2, random_state = 6, regressor = reg)

print(models_sub)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


100%|██████████| 40/40 [00:01<00:00, 20.21it/s]

                               Adjusted R-Squared  R-Squared  RMSE  Time Taken
Model                                                                         
GaussianProcessRegressor                    69.99     -60.03  7.16        0.01
Lars                                        26.51     -21.57  4.35        0.02
RANSACRegressor                              4.63      -2.21  1.64        0.06
MLPRegressor                                 3.60      -1.30  1.39        0.10
TransformedTargetRegressor                   3.57      -1.27  1.38        0.01
LinearRegression                             3.57      -1.27  1.38        0.01
ExtraTreeRegressor                           3.54      -1.25  1.37        0.01
PassiveAggressiveRegressor                   3.51      -1.22  1.37        0.01
DecisionTreeRegressor                        2.50      -0.32  1.05        0.01
LassoLars                                    2.22      -0.08  0.95        0.01
Lasso                                        2.22   




### LazyRegressor for the factors influencing the average HRV given all the variables

In [19]:
# Separate the indenpendent variables from the dependent variables
y_hrv = df['average_hrv']
X_hrv = df.drop(labels=['average_hrv'], axis=1)

In [20]:
models_hrv = lazy_regressor(X_hrv, y_hrv, test_size = 0.2, random_state = 5, regressor = reg)

print(models_hrv)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


100%|██████████| 40/40 [00:01<00:00, 22.04it/s]

                                      Adjusted R-Squared  \
Model                                                      
TransformedTargetRegressor    58548449100391799324672.00   
LinearRegression              58548449100391799324672.00   
GaussianProcessRegressor                            8.97   
MLPRegressor                                        2.63   
QuantileRegressor                                   1.53   
DummyRegressor                                      1.51   
LassoLars                                           1.46   
NuSVR                                               1.45   
SVR                                                 1.43   
Lars                                                1.35   
KNeighborsRegressor                                 1.27   
DecisionTreeRegressor                               1.25   
ExtraTreeRegressor                                  1.21   
RANSACRegressor                                     1.21   
HuberRegressor                          




### LazyRegressor for the factors influencing the average HRV given the questionnaire variables only

In [21]:
X_hrv = df_quest

In [22]:
models_hrv = lazy_regressor(X_hrv, y_hrv, test_size = 0.2, random_state = 5, regressor = reg)

print(models_hrv)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


100%|██████████| 40/40 [00:01<00:00, 37.48it/s]

                               Adjusted R-Squared  R-Squared  RMSE  Time Taken
Model                                                                         
KernelRidge                                 73.42     -15.71 70.50        0.01
GaussianProcessRegressor                    51.09     -10.56 58.63        0.01
MLPRegressor                                40.45      -8.10 52.03        0.10
QuantileRegressor                            5.65      -0.07 17.87        0.06
DecisionTreeRegressor                        5.52      -0.04 17.61        0.01
LassoLars                                    5.51      -0.04 17.60        0.01
DummyRegressor                               5.51      -0.04 17.60        0.01
PassiveAggressiveRegressor                   5.27       0.02 17.11        0.01
NuSVR                                        5.19       0.03 16.96        0.01
SVR                                          5.17       0.04 16.91        0.01
ExtraTreeRegressor                           4.87   




### LazyRegressor for the factors influencing the average HRV given the Oura variables only

In [23]:
X_hrv = df_oura.drop(labels=['average_hrv'], axis=1)

In [24]:
models_hrv = lazy_regressor(X_hrv, y_hrv, test_size = 0.2, random_state = 5, regressor = reg)

print(models_hrv)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


100%|██████████| 40/40 [00:01<00:00, 27.01it/s]

                               Adjusted R-Squared  R-Squared  RMSE  Time Taken
Model                                                                         
GaussianProcessRegressor                    19.32     -15.21 69.43        0.01
MLPRegressor                                 4.14      -1.78 28.76        0.11
QuantileRegressor                            2.21      -0.07 17.87        0.06
DummyRegressor                               2.18      -0.04 17.60        0.01
LassoLars                                    2.06       0.06 16.70        0.01
NuSVR                                        1.97       0.14 15.95        0.01
SVR                                          1.92       0.18 15.59        0.01
Lars                                         1.87       0.23 15.10        0.01
RANSACRegressor                              1.53       0.53 11.83        0.06
DecisionTreeRegressor                        1.51       0.55 11.59        0.01
KNeighborsRegressor                          1.44   




### LazyRegressor for the factors influencing the total sleep duration

In [25]:
# Separate the indenpendent variables from the dependent variables
# For the total sleep duration, only consider the days for which a person woke up naturally
y_total_sleep = df[df['Naturally'] == 1]['total_sleep_duration']
X_total_sleep = df[df['Naturally'] == 1]
X_total_sleep = X_total_sleep.drop(labels=['total_sleep_duration'], axis=1)

In [26]:
models_sleep_duration = lazy_regressor(X_total_sleep, y_total_sleep, test_size = 0.2, random_state = 89, regressor = reg)

print(models_sleep_duration)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


 65%|██████▌   | 26/40 [00:00<00:00, 39.30it/s]

LassoLarsIC model failed to execute
You are using LassoLarsIC in the case where the number of samples is smaller than the number of features. In this setting, getting a good estimate for the variance of the noise is not possible. Provide an estimate of the noise variance in the constructor.
RANSACRegressor model failed to execute
`min_samples` may not be larger than number of samples: n_samples = 16.


100%|██████████| 40/40 [00:00<00:00, 44.09it/s]

                                                              Adjusted R-Squared  \
Model                                                                              
Lars                          14452432009214087842324165777411226639273023862...   
GaussianProcessRegressor                                                    1.87   
LinearSVR                                                                   1.87   
MLPRegressor                                                                1.87   
QuantileRegressor                                                           1.05   
SVR                                                                         1.05   
NuSVR                                                                       1.05   
DummyRegressor                                                              1.05   
HistGradientBoostingRegressor                                               1.05   
ExtraTreeRegressor                                                          




### LazyRegressor for the factors influencing the total sleep duration given the questionnaire variables only

In [27]:
X_total_sleep = df_quest[df_quest['Naturally'] == 1]

In [28]:
models_sleep_duration = lazy_regressor(X_total_sleep, y_total_sleep, test_size = 0.2, random_state = 89, regressor = reg)

print(models_sleep_duration)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


 70%|███████   | 28/40 [00:00<00:00, 57.23it/s]

LassoLarsIC model failed to execute
You are using LassoLarsIC in the case where the number of samples is smaller than the number of features. In this setting, getting a good estimate for the variance of the noise is not possible. Provide an estimate of the noise variance in the constructor.
RANSACRegressor model failed to execute
`min_samples` may not be larger than number of samples: n_samples = 16.


100%|██████████| 40/40 [00:00<00:00, 61.85it/s]

                               Adjusted R-Squared  R-Squared     RMSE  \
Model                                                                   
KernelRidge                                  3.37     -21.91 25789.77   
LinearSVR                                    3.28     -21.07 25314.17   
GaussianProcessRegressor                     3.28     -21.06 25310.31   
MLPRegressor                                 3.28     -21.06 25308.30   
PoissonRegressor                             2.09      -9.52 17478.46   
Lars                                         1.62      -4.99 13191.63   
LassoLars                                    1.56      -4.44 12566.36   
HuberRegressor                               1.51      -3.94 11972.89   
LinearRegression                             1.50      -3.88 11899.96   
TransformedTargetRegressor                   1.50      -3.88 11899.96   
Lasso                                        1.43      -3.19 11033.39   
PassiveAggressiveRegressor                   1.38  




### LazyRegressor for the factors influencing the total sleep duration given the Oura variables only

In [29]:
X_total_sleep = df[df['Naturally'] == 1]
X_total_sleep = X_total_sleep.iloc[:, 0:49]
X_total_sleep = X_total_sleep.drop(labels=['total_sleep_duration'], axis=1)

In [30]:
models_sleep_duration = lazy_regressor(X_total_sleep, y_total_sleep, test_size = 0.2, random_state = 89, regressor = reg)

print(models_sleep_duration)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


 65%|██████▌   | 26/40 [00:00<00:00, 44.88it/s]

LassoLarsIC model failed to execute
You are using LassoLarsIC in the case where the number of samples is smaller than the number of features. In this setting, getting a good estimate for the variance of the noise is not possible. Provide an estimate of the noise variance in the constructor.
RANSACRegressor model failed to execute
`min_samples` may not be larger than number of samples: n_samples = 16.


100%|██████████| 40/40 [00:00<00:00, 48.10it/s]

                                                         Adjusted R-Squared  \
Model                                                                         
Lars                          656378762935145151708421696646946282799104.00   
GaussianProcessRegressor                                               2.47   
LinearSVR                                                              2.47   
MLPRegressor                                                           2.47   
QuantileRegressor                                                      1.08   
SVR                                                                    1.08   
NuSVR                                                                  1.08   
DummyRegressor                                                         1.08   
HistGradientBoostingRegressor                                          1.08   
KNeighborsRegressor                                                    1.02   
BaggingRegressor                                    




### LazyRegressor for the factors influencing the total awake time during the night given all variables

In [31]:
# Separate the indenpendent variables from the dependent variables
# For the total awake time, Oura also considers some time before falling asleep and some time after waking up. 
# Subtract what we can from that, which is the time before falling asleep
y_awake_time = df['awake_time'] - df['latency']
X_awake_time = df.drop(labels=['awake_time'], axis=1)

In [32]:
models_awake_time = lazy_regressor(X_awake_time, y_awake_time, test_size = 0.2, random_state = 56, regressor = reg)

print(models_awake_time)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


100%|██████████| 40/40 [00:01<00:00, 25.25it/s]

                                                              Adjusted R-Squared  \
Model                                                                              
Lars                          62197584941304097753076317279699411620529477104...   
GaussianProcessRegressor                                                    7.93   
MLPRegressor                                                                7.62   
LinearSVR                                                                   6.12   
KNeighborsRegressor                                                         1.67   
QuantileRegressor                                                           1.51   
SVR                                                                         1.51   
ExtraTreeRegressor                                                          1.51   
NuSVR                                                                       1.49   
DummyRegressor                                                              




### LazyRegressor for the factors influencing the total awake time during the night given the questionnaire variables only

In [33]:
X_awake_time = df_oura.drop(labels=['awake_time'], axis=1)

In [34]:
models_awake_time = lazy_regressor(X_awake_time, y_awake_time, test_size = 0.2, random_state = 56, regressor = reg)

print(models_awake_time)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


100%|██████████| 40/40 [00:01<00:00, 28.14it/s]

                               Adjusted R-Squared  R-Squared    RMSE  \
Model                                                                  
GaussianProcessRegressor                    16.93     -13.09 2209.26   
MLPRegressor                                16.19     -12.44 2157.71   
LinearSVR                                   12.80      -9.44 1901.59   
Lars                                         3.07      -0.83  795.77   
KNeighborsRegressor                          2.29      -0.14  629.49   
QuantileRegressor                            2.18      -0.04  601.11   
SVR                                          2.17      -0.04  599.57   
DummyRegressor                               2.13      -0.00  588.67   
NuSVR                                        2.13       0.00  588.48   
DecisionTreeRegressor                        1.98       0.13  549.15   
BaggingRegressor                             1.82       0.27  501.20   
ExtraTreeRegressor                           1.79       0.30  49




### LazyRegressor for the factors influencing the total awake time during the night given the Oura variables only

In [35]:
X_awake_time = df_quest

In [36]:
models_awake_time = lazy_regressor(X_awake_time, y_awake_time, test_size = 0.2, random_state = 56, regressor = reg)

print(models_awake_time)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


100%|██████████| 40/40 [00:01<00:00, 33.48it/s]

                                          Adjusted R-Squared  \
Model                                                          
TransformedTargetRegressor    414904848943085762084077568.00   
LinearRegression              414904848943085762084077568.00   
RANSACRegressor                93152483602900156552314880.00   
KernelRidge                                            68.53   
MLPRegressor                                           61.22   
LinearSVR                                              56.59   
GaussianProcessRegressor                               45.14   
PoissonRegressor                                       32.73   
Lars                                                   24.58   
ExtraTreeRegressor                                     23.25   
Lasso                                                  19.82   
HuberRegressor                                         19.23   
Ridge                                                  18.23   
SGDRegressor                            


