## Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from lazypredict.Supervised import LazyRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import all_estimators
from sklearn.base import RegressorMixin

## Define the files to be loaded

In [2]:
# Participant number whose data should be loaded
participant_number = 3

# The version of the questionnaires to load
quest_version = 1

## Loading the pre-processed data

In [3]:
df = pd.read_csv('data/preprocessed_data_' + str(participant_number) + '_v' + str(quest_version) + '.csv')
df = df.drop(labels=['actual_day'], axis=1)

## Prepare regressor list for lazypredict

In [4]:
removed_regressors = [
    "TheilSenRegressor",
    "ARDRegression", 
    "CCA", 
    "IsotonicRegression", 
    "StackingRegressor",
    "MultiOutputRegressor", 
    "MultiTaskElasticNet", 
    "MultiTaskElasticNetCV", 
    "MultiTaskLasso", 
    "MultiTaskLassoCV", 
    "PLSCanonical", 
    "PLSRegression", 
    "RadiusNeighborsRegressor", 
    "RegressorChain", 
    "VotingRegressor", 
]

In [5]:
REGRESSORS = [
    est
    for est in all_estimators()
    if (issubclass(est[1], RegressorMixin) and (est[0] not in removed_regressors))
]

## Run LazyRegressor on dataset

### LazyRegressor for the factors influencing the sleep scores

In [26]:
# Separate the indenpendent variables from the dependent variables
y_oura = df['score']
y_sub = df['subjective_sleep_score']
X = df.drop(labels=['score', 'subjective_sleep_score'], axis=1)

# Create the test and train sets for the oura sleep score prediction
X_train_oura, X_test_oura, y_train_oura, y_test_oura = train_test_split(X, y_oura, test_size = 0.2, random_state = 2)

In [8]:
# Apply the LazyRegressor on the data
reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None, regressors = REGRESSORS)

#### Oura sleep scores

In [27]:
models_oura, predictions_oura = reg.fit(X_train_oura, X_test_oura, y_train_oura, y_test_oura)

print(models_oura)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


100%|██████████| 40/40 [00:01<00:00, 20.53it/s]

                               Adjusted R-Squared  R-Squared  RMSE  Time Taken
Model                                                                         
GaussianProcessRegressor                    38.76     -71.61  7.87        0.02
MLPRegressor                                 1.91      -0.74  1.22        0.14
QuantileRegressor                            1.54      -0.04  0.94        0.13
LassoLars                                    1.52      -0.00  0.92        0.01
DummyRegressor                               1.52      -0.00  0.92        0.02
Lasso                                        1.52      -0.00  0.92        0.02
ElasticNet                                   1.32       0.38  0.73        0.02
Lars                                         1.32       0.39  0.72        0.02
LarsCV                                       1.27       0.47  0.67        0.05
BaggingRegressor                             1.27       0.48  0.67        0.03
DecisionTreeRegressor                        1.27   




#### Subjective sleep scores

In [21]:
# Create the test and train sets for the subjective sleep score prediction
X_train_sub, X_test_sub, y_train_sub, y_test_sub = train_test_split(X, y_sub, test_size = 0.2, random_state = 6)

In [22]:
models_sub, predictions_sub = reg.fit(X_train_sub, X_test_sub, y_train_sub, y_test_sub)

print(models_sub)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


100%|██████████| 40/40 [00:02<00:00, 14.11it/s]

                                    Adjusted R-Squared  \
Model                                                    
LinearRegression              122137680211875053568.00   
TransformedTargetRegressor    122137680211875053568.00   
Lars                                            792.89   
GaussianProcessRegressor                         32.84   
RANSACRegressor                                   2.43   
MLPRegressor                                      2.40   
ExtraTreeRegressor                                1.83   
HuberRegressor                                    1.67   
PassiveAggressiveRegressor                        1.66   
DecisionTreeRegressor                             1.62   
Lasso                                             1.56   
LassoLars                                         1.56   
DummyRegressor                                    1.56   
ElasticNet                                        1.55   
LinearSVR                                         1.53   
QuantileRegres




### LazyRegressor for the factors influencing the average HRV

In [11]:
# Separate the indenpendent variables from the dependent variables
y_hrv = df['average_hrv']
X_hrv = df.drop(labels=['average_hrv'], axis=1)

# Create the test and train sets for the average hrv prediction
X_train_hrv, X_test_hrv, y_train_hrv, y_test_hrv = train_test_split(X_hrv, y_hrv, test_size = 0.2, random_state = 5)

In [12]:
models_hrv, predictions_hrv = reg.fit(X_train_hrv, X_test_hrv, y_train_hrv, y_test_hrv)

print(models_hrv)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


100%|██████████| 40/40 [00:01<00:00, 20.88it/s]

                                      Adjusted R-Squared  \
Model                                                      
LinearRegression              31062329562467961667584.00   
TransformedTargetRegressor    31062329562467961667584.00   
Lars                                              518.12   
GaussianProcessRegressor                            9.28   
MLPRegressor                                        2.74   
QuantileRegressor                                   1.55   
DummyRegressor                                      1.53   
LassoLars                                           1.48   
NuSVR                                               1.47   
SVR                                                 1.45   
ExtraTreeRegressor                                  1.27   
KNeighborsRegressor                                 1.25   
DecisionTreeRegressor                               1.22   
HuberRegressor                                      1.21   
PassiveAggressiveRegressor              




### LazyRegressor for the factors influencing the total sleep duration

In [13]:
# Separate the indenpendent variables from the dependent variables
# For the total sleep duration, only consider the days for which a person woke up naturally
y_total_sleep = df[df['Naturally'] == 1]['total_sleep_duration']
X_total_sleep = df[df['Naturally'] == 1]
X_total_sleep = X_total_sleep.drop(labels=['total_sleep_duration'], axis=1)

# Create the test and train sets for the average hrv prediction
X_train_total_sleep, X_test_total_sleep, y_train_total_sleep, y_test_total_sleep = train_test_split(X_total_sleep, y_total_sleep, test_size = 0.2, random_state = 89)

In [14]:
models_total_sleep, predictions_total_sleep = reg.fit(X_train_total_sleep, X_test_total_sleep, y_train_total_sleep, y_test_total_sleep)

print(models_total_sleep)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


 65%|██████▌   | 26/40 [00:00<00:00, 37.87it/s]

LassoLarsIC model failed to execute
You are using LassoLarsIC in the case where the number of samples is smaller than the number of features. In this setting, getting a good estimate for the variance of the noise is not possible. Provide an estimate of the noise variance in the constructor.
RANSACRegressor model failed to execute
`min_samples` may not be larger than number of samples: n_samples = 16.


100%|██████████| 40/40 [00:00<00:00, 42.79it/s]

                                                              Adjusted R-Squared  \
Model                                                                              
Lars                          52521160215245839804316156957602960106071052945...   
GaussianProcessRegressor                                                    1.90   
LinearSVR                                                                   1.89   
MLPRegressor                                                                1.89   
QuantileRegressor                                                           1.05   
SVR                                                                         1.05   
NuSVR                                                                       1.05   
DummyRegressor                                                              1.05   
HistGradientBoostingRegressor                                               1.05   
DecisionTreeRegressor                                                       




### LazyRegressor for the factors influencing the total awake time during the night