## Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from lazypredict.Supervised import LazyRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import all_estimators
from sklearn.base import RegressorMixin

### Define the files to be loaded

In [2]:
# Participant number whose data should be loaded
participant_number = 3

# The version of the questionnaires to load
quest_version = 1

## Loading the pre-processed data

In [3]:
df = pd.read_csv('data/preprocessed_data_' + str(participant_number) + '_v' + str(quest_version) + '.csv')
df = df.drop(labels=['actual_day'], axis=1)
print(df.columns)

Index(['index', 'average_breath', 'average_breath_variation',
       'average_heart_rate', 'average_hrv', 'awake_time', 'bedtime_end_delta',
       'bedtime_end_seconds', 'bedtime_start_delta', 'bedtime_start_seconds',
       'contributors.deep_sleep', 'contributors.efficiency',
       'contributors.latency', 'contributors.rem_sleep',
       'contributors.restfulness', 'contributors.timing',
       'contributors.total_sleep', 'deep_sleep_duration', 'efficiency',
       'got_ups', 'latency', 'light_sleep_duration', 'lowest_heart_rate',
       'lowest_heart_rate_time_offset', 'period',
       'readiness.contributors.activity_balance',
       'readiness.contributors.body_temperature',
       'readiness.contributors.hrv_balance',
       'readiness.contributors.previous_day_activity',
       'readiness.contributors.previous_night',
       'readiness.contributors.recovery_index',
       'readiness.contributors.resting_heart_rate',
       'readiness.contributors.sleep_balance', 'readiness.sco

## Prepare regressor list for lazypredict

In [4]:
removed_regressors = [
    "TheilSenRegressor",
    "ARDRegression", 
    "CCA", 
    "IsotonicRegression", 
    "StackingRegressor",
    "MultiOutputRegressor", 
    "MultiTaskElasticNet", 
    "MultiTaskElasticNetCV", 
    "MultiTaskLasso", 
    "MultiTaskLassoCV", 
    "PLSCanonical", 
    "PLSRegression", 
    "RadiusNeighborsRegressor", 
    "RegressorChain", 
    "VotingRegressor", 
]

In [5]:
REGRESSORS = [
    est
    for est in all_estimators()
    if (issubclass(est[1], RegressorMixin) and (est[0] not in removed_regressors))
]

In [6]:
print(REGRESSORS)

[('AdaBoostRegressor', <class 'sklearn.ensemble._weight_boosting.AdaBoostRegressor'>), ('BaggingRegressor', <class 'sklearn.ensemble._bagging.BaggingRegressor'>), ('BayesianRidge', <class 'sklearn.linear_model._bayes.BayesianRidge'>), ('DecisionTreeRegressor', <class 'sklearn.tree._classes.DecisionTreeRegressor'>), ('DummyRegressor', <class 'sklearn.dummy.DummyRegressor'>), ('ElasticNet', <class 'sklearn.linear_model._coordinate_descent.ElasticNet'>), ('ElasticNetCV', <class 'sklearn.linear_model._coordinate_descent.ElasticNetCV'>), ('ExtraTreeRegressor', <class 'sklearn.tree._classes.ExtraTreeRegressor'>), ('ExtraTreesRegressor', <class 'sklearn.ensemble._forest.ExtraTreesRegressor'>), ('GammaRegressor', <class 'sklearn.linear_model._glm.glm.GammaRegressor'>), ('GaussianProcessRegressor', <class 'sklearn.gaussian_process._gpr.GaussianProcessRegressor'>), ('GradientBoostingRegressor', <class 'sklearn.ensemble._gb.GradientBoostingRegressor'>), ('HistGradientBoostingRegressor', <class 's

In [7]:
regressors = ['AdaBoostRegressor', 'BaggingRegressor', 'BayesianRidge', 'DecisionTreeRegressor', 'DummyRegressor', 'ElasticNet', 'ElasticNetCV', 
    'ExtraTreeRegressor', 'ExtraTreesRegressor', 'GammaRegressor', 'GaussianProcessRegressor', 'GradientBoostingRegressor', 'HistGradientBoostingRegressor', 
    'HuberRegressor', 'KNeighborsRegressor', 'KernelRidge', 'Lars', 'LarsCV', 'Lasso', 'LassoCV', 'LassoLars', 'LassoLarsCV', 'LassoLarsIC',
    'LinearRegression', 'LinearSVR', 'MLPRegressor', 'NuSVR', 'OrthogonalMatchingPursuit', 'OrthogonalMatchingPursuitCV', 'PassiveAggressiveRegressor',
    'PoissonRegressor', 'QuantileRegressor', 'RANSACRegressor', 'RandomForestRegressor', 'Ridge', 'RidgeCV', 'SGDRegressor', 'SVR', 
    'TransformedTargetRegressor', 'TweedieRegressor']

## Run LazyRegressor on dataset

In [9]:
# Separate the indenpendent variables from the dependent variables
y_oura = df['score']
y_sub = df['subjective_sleep_score']
X = df.drop(labels=['score', 'subjective_sleep_score'], axis=1)

# X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train_oura, y_train_oura = X[:offset], y_oura[:offset]
X_test_oura, y_test_oura = X[offset:], y_oura[offset:]
# X_train_oura, X_test_oura, y_train_oura, y_test_oura = train_test_split(X, y_oura, test_size = 0.2, random_state = 64)

In [10]:
X_train_sub, y_train_sub = X[:offset], y_sub[:offset]
X_test_sub, y_test_sub = X[offset:], y_sub[offset:]

In [11]:
# Apply the LazyRegressor on the data
le = LabelEncoder()
Y_train_oura = le.fit_transform(y_train_oura)

reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None, regressors = REGRESSORS)
models_oura, predictions_oura = reg.fit(X_train_oura, X_test_oura, y_train_oura, y_test_oura)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


100%|██████████| 40/40 [00:01<00:00, 21.65it/s]


In [12]:
print(models_oura)

                               Adjusted R-Squared  R-Squared  RMSE  Time Taken
Model                                                                         
Lars                                       285.73   -1378.84 30.96        0.02
GaussianProcessRegressor                    19.85     -90.34  7.97        0.01
MLPRegressor                                 2.09      -4.29  1.92        0.13
QuantileRegressor                            1.21      -0.01  0.84        0.09
LassoLars                                    1.21      -0.01  0.84        0.01
DummyRegressor                               1.21      -0.01  0.84        0.01
Lasso                                        1.21      -0.01  0.84        0.01
ElasticNet                                   1.09       0.55  0.56        0.02
KNeighborsRegressor                          1.06       0.69  0.46        0.01
ExtraTreeRegressor                           1.02       0.88  0.28        0.01
SVR                                          1.02   

In [13]:
models_sub, predictions_sub = reg.fit(X_train_sub, X_test_sub, y_train_sub, y_test_sub)

'tuple' object has no attribute '__name__'
Invalid Regressor(s)


100%|██████████| 40/40 [00:02<00:00, 14.29it/s]


In [14]:
print(models_sub)

                               Adjusted R-Squared  R-Squared  RMSE  Time Taken
Model                                                                         
Lars                                      1625.48   -7871.50 64.94        0.02
GaussianProcessRegressor                    22.87    -105.00  7.54        0.01
ExtraTreeRegressor                           2.40      -5.80  1.91        0.01
MLPRegressor                                 2.21      -4.84  1.77        0.13
RANSACRegressor                              1.69      -2.37  1.34        0.11
PassiveAggressiveRegressor                   1.56      -1.73  1.21        0.01
LinearSVR                                    1.34      -0.67  0.95        0.02
TransformedTargetRegressor                   1.34      -0.66  0.94        0.01
LinearRegression                             1.34      -0.66  0.94        0.01
HuberRegressor                               1.34      -0.63  0.94        0.02
QuantileRegressor                            1.30   