# Linear mixed models


In [None]:
import pandas as pd
import os
import json
import numpy as np
from itertools import groupby
import matplotlib.pyplot as plt
from scipy import stats,signal
import matplotlib as mpl
from sklearn.linear_model import LogisticRegression
import random
import re
import csv
from IPython.display import HTML, display, Image
import tabulate
import math as m
import warnings
warnings.filterwarnings('ignore')
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [None]:
current_path = os.path.abspath(os.getcwd())
parent_path = os.path.abspath(os.path.join(current_path, os.pardir))
grand_parent_path = os.path.abspath(os.path.join(parent_path, os.pardir))
main_path = os.path.abspath(os.path.join(grand_parent_path, os.pardir))

path_results = main_path+'/results/gabor/'

In [None]:
import sys
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, main_path+'/scr')
import my_functions as myf

In [None]:
mpl.rcParams['lines.linewidth'] = 3
mpl.rcParams['axes.titlesize'] = 18
mpl.rcParams['axes.labelsize'] = 18
mpl.rcParams['lines.markersize'] = 10
mpl.rcParams['xtick.labelsize'] = 20
mpl.rcParams['ytick.labelsize'] = 20
mpl.rcParams['axes.linewidth'] = 3
#mpl.rcParams['xtick.major.size'] = 20
mpl.rcParams['xtick.major.width'] = 4
#mpl.rcParams['xtick.minor.size'] = 10
mpl.rcParams['xtick.minor.width'] = 2
mpl.rcParams['ytick.major.width'] = 4
mpl.rcParams['ytick.minor.width'] = 2

fday = [1,2,3,4,5,6,7,8,9,10]
fsession = [1,2]

adf = pd.read_csv(path_results+'preanalyzed.csv')  

excluded_miss = ['1011_11', '1011_18', '1014_12']
excluded_time = ['1008_2','1009_9']
excluded = excluded_miss+excluded_time
adf_sin_nan = adf[~adf.user_sessionID.isin(excluded)]

mdf_sin_nan = adf_sin_nan[adf_sin_nan['sessionID_x']%2==1]

reports = ['mood','real_stress','food','sleep']
confidence = ['Dsubj_optout_oo','OKubj_RT_no']

In [None]:
adf.columns

In [None]:
userids = adf['userID'].unique()

In [None]:
# mean self-reports across participants
sessionids = adf['sessionID_x'].unique()

## Self-reports

### Mood & Stress

In [None]:
mm_mo_st = smf.mixedlm("mood ~ stress", adf_sin_nan, groups=adf_sin_nan["userID"])
mdf_mo_st = mm_mo_st.fit()
print(mdf_mo_st.summary())
print(mdf_mo_st.pvalues)

In [None]:
mm_mo = smf.mixedlm("mood ~ food + stress", adf_sin_nan, groups=adf_sin_nan["userID"])
mdf_mo = mm_mo.fit()
print(mdf_mo.summary())

In [None]:
mm_mo = smf.mixedlm("mood ~ sleep + food + real_stress", mdf_sin_nan, groups=mdf_sin_nan["userID"])
mdf_mo = mm_mo.fit()
print(mdf_mo.summary())

In [None]:
results = {}
with open(path_results+'LMMgabor.csv', 'w', newline='') as myfile:
    header = [['','','FE','','','RE','',''],['','','slope','1|subj','daic','slope|subj','1|subj','daic']]
    wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
    wr.writerows(header)
    for conf in confidence:
        results[conf] = {}
        control_model = smf.mixedlm(conf+" ~ 1", adf_sin_nan, groups=adf_sin_nan["userID"])
        control_model_fit = control_model.fit()
        control_AIC = myf.AIC(2,control_model_fit.llf)
        results[conf]['controlAIC'] = control_AIC
        results[conf]['RMSE_control'] = np.round(np.sqrt(((adf_sin_nan[conf]-control_model_fit.fittedvalues)**2).values.mean()),2)
        print(conf,np.sqrt(((adf_sin_nan[conf]-control_model_fit.fittedvalues)**2).values.mean()))

        mor_control_model = smf.mixedlm(conf+" ~ 1", mdf_sin_nan, groups=mdf_sin_nan["userID"])
        mor_control_model_fit = mor_control_model.fit()
        mor_control_AIC = myf.AIC(2,mor_control_model_fit.llf)
        results[conf]['morning_controlAIC'] = mor_control_AIC
        results[conf]['RMSE_mor_control'] = np.round(np.sqrt(((mdf_sin_nan[conf]-mor_control_model_fit.fittedvalues)**2).values.mean()),2)
        print(conf,np.sqrt(((mdf_sin_nan[conf]-mor_control_model_fit.fittedvalues)**2).values.mean()))
        for rep in reports:
            results[conf][rep] = {}
            results[conf][rep]['FE'],results[conf][rep]['RE'] = {},{}
            if rep != 'sleep':
                DF = adf_sin_nan
                controlModel = control_model_fit

            else:
                DF = mdf_sin_nan
                controlModel = mor_control_model_fit
            FE = smf.mixedlm(conf+" ~ "+rep, DF, groups=DF["userID"])
            FE_fit = FE.fit()
            p_value_FE = FE_fit.pvalues[rep]
            results[conf][rep]['FE']['pvalue'] = myf.roundP(p_value_FE)
            results[conf][rep]['FE']['dAIC_FE'] = np.round(myf.dAIC(3,FE_fit.llf,controlModel.llf),1)
            results[conf][rep]['FE']['RMSE_FE'] = np.round(np.sqrt(((DF[conf]-FE_fit.fittedvalues)**2).values.mean()),1)

            RE = smf.mixedlm(conf+" ~ "+rep,DF,groups=DF["userID"],re_formula="~"+rep)
            RE_fit = RE.fit()
            p_value_RE = RE_fit.pvalues[rep]
            results[conf][rep]['RE']['pvalue'] = myf.roundP(p_value_RE)
            results[conf][rep]['RE']['dAIC_RE'] = np.round(myf.dAIC(5,RE_fit.llf,controlModel.llf),1)
            results[conf][rep]['RE']['RMSE_RE'] = np.round(np.sqrt(((DF[conf]-RE_fit.fittedvalues)**2).values.mean()),1)
            if rep=='mood':
                row = [conf,rep,FE_fit.params[rep],FE_fit.params['Group Var'],results[conf][rep]['FE']['dAIC_FE'],\
                       RE_fit.params[rep],RE_fit.params['Group Var'],results[conf][rep]['RE']['dAIC_RE']]
            else:
                row = ['',rep,FE_fit.params[rep],FE_fit.params['Group Var'],results[conf][rep]['FE']['dAIC_FE'],\
                       RE_fit.params[rep],RE_fit.params['Group Var'],results[conf][rep]['RE']['dAIC_RE']]            
            wr.writerow(row)

In [None]:
# DO NOT RUN AGAIN

# write the result in file
filename=path_results+'LMMresults.json'
# Serializing json  
json_results = json.dumps(results) 

# Writing to sample.json 
with open(filename, "w") as outfile: 
    outfile.write(json_results) 

## Optout & mood

### Fixed effects model

In [None]:
mm_oo_FE_FE = smf.mixedlm("Dsubj_optout_oo ~ mood", adf_sin_nan, groups=adf_sin_nan["userID"])
mdf_oo_FE_FE = mm_oo_FE_FE.fit()
print(mdf_oo_FE_FE.summary())

In [None]:
print('AIC = 2k - 2 log-likehood')
print('AIC=',2*3-2*(-1718.1956))

### Random effects model

In [None]:
mm_oo_re_RE = smf.mixedlm("Dsubj_optout_oo ~ mood",adf_sin_nan,groups=adf_sin_nan["userID"],\
                         re_formula="~mood")
mdf_oo_re_RE = mm_oo_re_RE.fit()
print(mdf_oo_re_RE.summary())

In [None]:
print('AIC = 2k - 2 log-likehood')
print('AIC=',2*5-2*(-1717.0470))

### Control model

In [None]:
mm_Doo_ID_RE = smf.mixedlm("Dsubj_optout_oo ~ 1", adf_sin_nan, groups=adf_sin_nan["userID"])
mdf_Doo_ID_RE = mm_Doo_ID_RE.fit()
print(mdf_Doo_ID_RE.summary())

In [None]:
print('AIC = 2k - 2 log-likehood')
print('AIC=',2*2-2*(-1720.5161))

## RT (correct+NO) & mood

### Fixed effects model

In [None]:
mm_rt_FE_FE = smf.mixedlm("OKubj_RT_no ~ mood", adf_sin_nan, groups=adf_sin_nan["userID"])
mdf_rt_FE_FE = mm_rt_FE_FE.fit()
print(mdf_rt_FE_FE.summary())

In [None]:
print('AIC = 2k - 2 log-likehood')
print('AIC=',2*3-2*(738.4457))

### Random effects model

In [None]:
mm_rt_re_RE = smf.mixedlm("OKubj_RT_no ~ mood",adf_sin_nan,groups=adf_sin_nan["userID"],\
                         re_formula="~mood")
mdf_rt_re_RE = mm_rt_re_RE.fit()
print(mdf_rt_re_RE.summary())

In [None]:
print('AIC = 2k - 2 log-likehood')
print('AIC=',2*5-2*(739.8749))

### Control model

In [None]:
mm_rt_ID_RE = smf.mixedlm("OKubj_RT_no ~ 1", adf_sin_nan, groups=adf_sin_nan["userID"])
mdf_rt_ID_RE = mm_rt_ID_RE.fit()
print(mdf_rt_ID_RE.summary())

In [None]:
print('AIC = 2k - 2 log-likehood')
print('AIC=',2*2-2*(741.6505))

## Mood distribution

In [None]:
plt.hist(adf['mood'])
plt.show()

## Stress & optout

### Fixed effects model

In [None]:
mm_oo_stress_FE = smf.mixedlm("Dsubj_optout_oo ~ stress", adf_sin_nan, groups=adf_sin_nan["userID"])
mdf_oo_stress_FE = mm_oo_stress_FE.fit()
print(mdf_oo_stress_FE.summary())

In [None]:
print('AIC = 2k - 2 log-likehood')
print('AIC=',2*3-2*(-1718.0309))

### Random effects model

In [None]:
mm_oo_stress_RE = smf.mixedlm("Dsubj_optout_oo ~ stress", adf_sin_nan, groups=adf_sin_nan["userID"],re_formula="~stress")
mdf_oo_stress_RE = mm_oo_stress_RE.fit()
print(mdf_oo_stress_RE.summary())

In [None]:
print('AIC = 2k - 2 log-likehood')
print('AIC=',2*5-2*(-1716.2412))

## Stress and type optout

We double stress data and see if we can join D and S optout data. 

In [None]:
from pingouin import ancova

In [None]:
stress_todos = np.array(adf_sin_nan['real_stress'])
DO_todos = np.array(adf_sin_nan['Dsubj_optout_oo'])
SO_todos = np.array(adf_sin_nan['Ssubj_optout_oo'])

In [None]:
optout_todos = np.concatenate((DO_todos,SO_todos))
optout_type = ['D']*len(DO_todos)+['S']*len(SO_todos)
doble_stress = np.concatenate((stress_todos,stress_todos))
users = np.concatenate((np.array(adf_sin_nan['userID']),np.array(adf_sin_nan['userID'])))

In [None]:
ancova_df = pd.DataFrame({'userID':users,'stress':doble_stress,'optout':optout_todos,'optout_type':optout_type})

### Ancova

In [None]:
ancova(data=ancova_df, dv='optout', covar='stress', between='optout_type')

### Fixed effects D+Soptout & stress

In [None]:
mm_DSoo_stress_FE = smf.mixedlm("optout ~ stress", ancova_df, groups=ancova_df["userID"])
mdf_DSoo_stress_FE = mm_DSoo_stress_FE.fit()
print(mdf_DSoo_stress_FE.summary())

In [None]:
print('AIC = 2k - 2 log-likehood')
print('AIC=',2*3-2*(-3555.3327))

### Random effects D+Soptout & stress

In [None]:
mm_DSoo_stress_RE = smf.mixedlm("optout ~ stress", ancova_df, groups=ancova_df["userID"],re_formula="~stress")
mdf_DSoo_stress_RE = mm_DSoo_stress_RE.fit()
print(mdf_DSoo_stress_RE.summary())

In [None]:
print('AIC = 2k - 2 log-likehood')
print('AIC=',2*5-2*(-3554.9826))