In [None]:
%load_ext autoreload
%autoreload 2

import hydra
import os
import datetime
from pathlib import Path

# Initialize hydra and move to the root of the repository
try:
    hydra.initialize(version_base=None, config_path="../config/")
    CONFIG = hydra.compose(config_name="main.yaml")
    print('Initializing hydra')
except:
    print('Hydra already initalized!')
else:
    os.chdir('..')
    # Create an output folder in the root of the repository
    OUTPUT_FOLDER = Path('output/{0}'.format(datetime.datetime.now()))
    Path(OUTPUT_FOLDER).mkdir(parents=True, exist_ok=True)

In [None]:
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm

In [None]:
def aic(model):
    return -2 * mdf.llf + 2 * mdf.df_modelwc

def bic(model):
    return -2 * model.llf + np.log(model.nobs) * model.df_modelwc

def fit(equation, data):
    md = smf.mixedlm(equation, data, groups=data['user_device'])
    mdf = md.fit()
    print(mdf.summary())
    print('AIC:', aic(mdf))
    print('BIC:', bic(mdf))

In [None]:
# Load data
df = pd.read_feather(Path(CONFIG.data.processed) / CONFIG.data.filenames.merged_data)
df['user_device'] = df.user_id.astype(str) + "_" + df.deviceid.astype(str)
df['v9'] /= 1000

In [None]:
model_data1 = df[['user_device', 'total_wellbeing', 'v9', 'v65', 'salutation', 'age_group']].dropna()
model_data2 = df[['user_device', 'total_wellbeing', 'v9', 'v65', 'midsleep', 'salutation', 'age_group']].dropna()
model_data3 = df[['user_device', 'total_wellbeing', 'v9', 'v65', 'midsleep', 'v43', 'salutation', 'age_group']].dropna()
model_data3['v43_squared'] = model_data3['v43'] ** 0.5
model_data3['early_midsleep'] = model_data['midsleep'] < 4

In [None]:
fit('total_wellbeing ~ C(age_group) + v9 + v65 + C(salutation)', model_data1)

In [None]:
fit('total_wellbeing ~ C(age_group) + v9 + v65 + midsleep + C(salutation)', model_data2)

In [None]:
fit('total_wellbeing ~ C(age_group) + v9 + v65 + midsleep + v43 + C(salutation)', model_data3)