In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import random
import re
import statsmodels.api as sapi
import sys

In [2]:
import json
plt_properties = json.load(open('utils/plt_properties.json'))
fig_width = plt_properties['fig_width']
plt.rcParams.update(plt_properties['plt_params'])

In [3]:
sys.path.append(os.path.join(os.getcwd(), 'utils'))
from bland_altman import corr_test
from models import get_models_df_preds, glmm_lasso, mixedlm_backward_sel
from variables import target_col

In [4]:
# data
pd.set_option('display.max_colwidth', None)
df = pd.read_csv('../Data/dataset_cleaned.csv')

# labels
labels_df_1 = pd.read_excel('../Data/short_codebook_SK1_pgeld_with_labels.xlsx')
labels_df_2 = pd.read_excel('../Data/short_codebook_SK1_pgeld_with_labels_oct2022.xlsx')
labels_df_3 = pd.read_excel('../Data/short_codebook_SK2_pgeld_with_labels.xlsx')
labels_df_4 = pd.read_excel('../Data/short_codebook_SK2_pgeld_with_labels_oct2022.xlsx')
labels_df = pd.concat([labels_df_1, labels_df_2, labels_df_3, labels_df_4]).drop_duplicates(
    subset=['Variable Name']).set_index('Variable Name')[['Variable Label']]
labels_df.dropna(inplace=True)
labels_df['Variable Label'] = labels_df['Variable Label'].apply(lambda s: s.replace(' (NewVar)', ''))

  df = pd.read_csv('../Data/dataset_cleaned.csv')


In [5]:
# sodium-creatinine and potassium-creatinine ratios
df['un_na_k'] = df['una_un_mmol'] / df['uk_un_mmol']
df['un_na_crt'] = 1e3 * df['una_un_mmol'] / df['ucrt_un_umol']

labels_df.loc['un_na_k'] = 'Sodium-potassium excretion ratio'
labels_df.loc['un_na_crt'] = 'Sodium-creatinine excretion ratio'

# 1) Multivariate regression

**Models**

In [6]:
models = {}
cols = []
curr = models['M0 = Urinary sodium'] = ['una_un_mmolh', 'una_un', 'una_un_mmol']
curr = models['M1 = M0 + duration + volume'] = curr + ['un_min', 'un_ml']
curr = models['M2 = M1 + sex + age'] = curr + ['sex', 'age']  # 'edu_3cat', 'center'
curr = models['M3 = M2 + smoking + alcohol'] = curr + ['t_now', 't_ctf', 't_ct', 'unit_alc_week', 'a_grad']
curr = models['M4 = M3 + potassium + creatinine'] = curr + ['uk_un', 'uk_un_mmol', 'uk_un_mmolh', 'un_na_k', 'ucrt_un_umolh', 'ucrt_un', 'ucrt_un_umol', 'un_na_crt']
curr = models['M5 = M4 + weight + height + BMI + waist-to-hip ratio'] = curr + ['bwi', 'bh1', 'bmi', 'waist_hip']
curr = models['M6 = M5 + blood pressure'] = curr + ['sbp1_5', 'dbp1_5']
curr = models['M7 = M6 + hypertension + diabetes'] = curr + ['hypertension_combined', 'diabetes_combined']
curr = models['M8 = M7 + blood lipid values'] = curr + ['cho', 'ldl', 'tg', 'ldlhdl']

In [7]:
# variables of interest
cols = [
    'sk1_id', 'source', 'center',
    'una_un', 'una_un_mmol', 'una_un_mmolh',
    'un_min', 'un_ml',
    'sex', 'age',
    'uk_un', 'uk_un_mmol', 'uk_un_mmolh', 'un_na_k', 'ucrt_un', 'ucrt_un_umol', 'ucrt_un_umolh', 'un_na_crt',
    'bwi', 'bh1', 'bmi',
    'sbp1_5', 'dbp1_5',
    'hypertension_combined', 'diabetes_combined',
    't_now', 't_ctf', 't_ct', # tobacco
    'd_diet', 'f_fruit', 'f_veg', 'f_meat_24n', 'f_fish', # diet: NOT USED
    'waist_hip', 'waistc1', # waist-to-hip
    'cho', 'ldl', 'ldlhdl', 'tg', # blood lipid values
    'e_now', 'e_p_past', 'unit_alc_week', # alcohol: ONLY UNITS USED
    'p_act1', 'p_act2', 'p_act3', 'p_act4', 'phys_activity_22fN', 'phys_activity_22fD', 'phys_activity_t', 'a_grad', # physical activity: ONLY A_GRAD USED
    'sleep_duration', 'sleep_22fN', 'sleep_t', 'sleep_22fD', # [follow-up] # sleep: NOT USED
    'sc_now', 'educat_code', 'edu_3cat', # education: NOT USED
    'origin_final', # [follow-up] # origin: NOT USED
    'orig_regio', # [follow-up]: NOT USED
    'p_h_income', # [follow-up] # income: NOT USED
    'inc_hh', # [follow-up]: NOT USED
    'fin_diff', # [follow-up]: NOT USED
]

cat_cols = ['source', 'sk1_id', 'center',
           'sex', 'hypertension_combined', 'diabetes_combined', 't_now',
           'd_diet', 'f_fruit', 'f_veg', 'f_meat_24n', 'f_fish',
           'e_now', 'e_p_past',
           'p_act1', 'p_act2', 'p_act3', 'p_act4',
           'sc_now', 'educat_code', 'edu_3cat',
           'origin_final', 'orig_regio', 'fin_diff', 'p_h_income']

for col in cat_cols:
    print(f'{col}: {df[col].unique()}')

source: ['sk1' 'sk2']
sk1_id: ['GJOXF_11' 'GJOXF_12' 'GJOXF_13' ... 'TBNPL_31' 'TBNPL_32' 'VQMGA_81']
center: ['LS' 'BE' 'GE']
sex: ['Male' 'Female']
hypertension_combined: ['Yes' 'No' nan]
diabetes_combined: ['No' nan 'Yes']
t_now: ['Yes' 'No' nan]
d_diet: [nan 'Yes' 'No']
f_fruit: ['1-2 portions/day' '3-4 portions per day' 'Less than 1 portion/day'
 'Never' nan '>=5 portions/day']
f_veg: ['1-2 portions/day' '3-4 portions per day' 'Less than 1 portion/day'
 'Never' nan '>=5 portions/day']
f_meat_24n: ['4 days/week' '2 days/week' '3 days/week' 'Never' 'Rarely' '1 day/week'
 '5 days/week' '6 days/week' '7 days/week' nan]
f_fish: ['1 day/week' 'Never' 'Rarely' '2 days/week' '4 days/week' '5 days/week'
 '3 days/week' nan '6 days/week']
e_now: ['No' 'Yes' nan]
e_p_past: ['No' nan 'Yes']
p_act1: [nan 'Yes' 'No']
p_act2: [nan 'No' 'Yes']
p_act3: [nan 'No' 'Yes']
p_act4: [nan 'No' 'Yes']
sc_now: ['No' 'Yes' nan]
educat_code: ['Secondary education - vocational, lower level (apprenticeship - CF

In [8]:
nans = []
pvalues = []
names = []

for col in cols:
    nan = df[col].isna().sum()
    X_sel = df.loc[~(df[col].isna()), [col]]
    if col in cat_cols:
        X_sel[col] = X_sel[col].astype("category")
        X_sel = pd.get_dummies(X_sel[col], prefix=f'{col}::', drop_first=False)
    y_sel = df[target_col][~(df[col].isna())].values
    if col != 'sk1_id':
        for c in X_sel.columns:
            result = sapi.OLS(y_sel, X_sel[[c]]).fit()
            names.append(c)
            nans.append(nan)
            pvalues.append(result.pvalues[c])

col_descr_df = pd.DataFrame({'Variable name': names, '# Missing': nans, 'P-value correlation': pvalues})
with pd.option_context("display.max_rows", None):
    display(col_descr_df.sort_values('P-value correlation'))

Unnamed: 0,Variable name,# Missing,P-value correlation
71,e_p_past::_No,216,0.0
19,ucrt_un_umolh,0,0.0
20,un_na_crt,0,0.0
21,bwi,3,0.0
22,bh1,4,0.0
23,bmi,4,0.0
24,sbp1_5,6,0.0
18,ucrt_un_umol,0,0.0
25,dbp1_5,6,0.0
68,tg,11,0.0


In [9]:
with pd.option_context("display.max_rows", None):
    display(col_descr_df[col_descr_df["Variable name"].isin(list(set([col for val in models.values() for col in val])))])

Unnamed: 0,Variable name,# Missing,P-value correlation
5,una_un,0,0.0
6,una_un_mmol,0,0.0
7,una_un_mmolh,0,0.0
8,un_min,0,0.0
9,un_ml,0,0.0
12,age,0,0.0
13,uk_un,0,0.0
14,uk_un_mmol,0,0.0
15,uk_un_mmolh,0,0.0
16,un_na_k,0,0.0


**Dataset size**

In [10]:
def count_participants(df):
    return np.array([
        len(df),  # total
        (df['source'] == 'sk1').sum(),  # sk1
        (df['source'] == 'sk2').sum(),  # sk2
        len(set(df[df['source'] == 'sk1']['sk1_id']) & set(df[df['source'] == 'sk2']['sk1_id'])),  # common
    ])

N0 = count_participants(df)
print('Initial: Sample size: {} ({} + {}, common {}).'.format(*N0))

all_cols = list(set([col for val in models.values() for col in val]))
unused_cols = set(cols) - set(all_cols)
data = df[all_cols + [target_col, 'sk1_id', 'source']]  # X u y u id

# handle categorical variables
categorical_vars = ['sex', 'hypertension_combined', 'diabetes_combined', 't_now']  # 'edu_3cat'
for col in categorical_vars:
    data[col] = data[col].astype("category")

# remove nans
print(f'Initial dataset size: {len(data)}.')
data.dropna(inplace=True)
print(f'Multivariate linear regression dataset size: {len(data)}.')

N = count_participants(data)
print(N0 - N)
print('Initial: Sample size: {} ({} + {}, common {}).'.format(*N))
data = data[[col for col in data.columns]]  # if col != 'source']]

Initial: Sample size: 1757 (962 + 795, common 667).
Initial dataset size: 1757.
Multivariate linear regression dataset size: 1649.
[108  71  37  74]
Initial: Sample size: 1649 (891 + 758, common 593).


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[col] = data[col].astype("category")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[col] = data[col].astype("category")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[col] = data[col].astype("category")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[

In [11]:
data

Unnamed: 0,ldl,un_na_k,hypertension_combined,ucrt_un,bmi,waist_hip,age,t_ct,t_now,una_un_mmol,...,a_grad,cho,un_min,dbp1_5,un_ml,sex,bh1,una_u24corr_mmol,sk1_id,source
0,4.30,4.416666,Yes,14548.0,26.621561,0.929293,65.000000,0.0,Yes,82.838997,...,5.0,6.30,515.549866,78.400002,521.0,Male,171.5,211.244079,GJOXF_11,sk1
1,3.90,1.500000,No,7966.0,21.657286,0.767677,41.299999,0.0,Yes,8.910000,...,7.0,5.90,478.412811,66.800003,330.0,Female,167.0,59.113895,GJOXF_12,sk1
2,3.80,2.263158,No,5783.0,26.318621,0.870000,40.000000,0.0,No,25.240999,...,3.0,6.20,425.984009,80.000000,587.0,Female,167.0,101.963514,GJOXF_13,sk1
3,4.20,1.800000,No,2744.0,17.922968,0.761364,67.500000,0.0,No,23.868000,...,4.0,6.90,526.472534,71.599998,884.0,Female,164.5,52.078190,XYRQB_21,sk1
4,3.90,2.913044,No,17553.0,25.244705,0.804651,35.299999,0.0,Yes,43.282001,...,7.0,5.80,434.722137,71.599998,323.0,Male,185.0,185.786801,FERJU_32,sk1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1751,1.34,6.200000,No,14627.0,19.244329,0.746939,27.700000,0.0,No,32.550000,...,6.0,3.24,420.000000,68.800000,210.0,Female,177.6,199.327304,IDGJV_65,sk2
1752,2.55,3.206897,No,12461.0,23.900369,0.884861,29.100000,0.0,No,41.478000,...,5.0,4.01,455.000000,72.800000,446.0,Male,181.0,180.224681,IDGJV_66,sk2
1753,4.09,2.821429,Yes,4974.0,30.991736,0.984288,80.800000,0.0,No,39.500000,...,8.0,5.61,525.000000,61.200000,500.0,Female,154.0,110.087474,UYVDP_72,sk2
1754,2.00,1.588235,No,5194.0,25.356223,0.825490,53.100000,0.0,No,28.782000,...,5.0,3.63,420.000000,67.200000,533.0,Female,163.4,81.752774,UYVDP_73,sk2


**Create CV splits**

In [12]:
n_cv_splits = 5

data['cv_split'] = -1
ind = sorted(list(set(data['sk1_id'].values)))
random.Random(7).shuffle(ind)
for i in range(n_cv_splits):
    start = int((i * len(ind)) / n_cv_splits)
    stop = len(ind) if i == (n_cv_splits - 1) else int(((i+1) * len(ind)) / n_cv_splits)
    subset = ind[start : stop]
    data.loc[data['sk1_id'].isin(subset), 'cv_split'] = i
assert (data['cv_split'] == -1).sum() == 0

In [13]:
factor = np.mean(df.u24_ml) / np.mean(df.un_ml)
corr_test(df[target_col], df.una_un_mmol * factor - df[target_col])

0.17618238176182383

## 1.1) MixedLM: No variable selection

Here for reference only.
In general, we want to do some variable selection/regularization, especially for the models in the sensitivity analysis since they otherwise have a tendency to overfit.

### 1.1.1) Incremental construction of linear regression equations

In [14]:
models_df_no_selec, preds_no_selecs = get_models_df_preds(
    models, data, target_col, cat_cols, labels_df, norm_out=True, model_fn=mixedlm_backward_sel, sig=1.
)

models_df_no_selec.to_csv('../Figures/linear_regression/lin_regr_no_selec.csv')
models_df_no_selec


Model M0 = Urinary sodium:
0 categorical columns. Will encode.
3 numerical columns. Will standardize.
{'Intercept': 0.002485897897614049, 'una_un_mmolh': 0.462587595794723, 'una_un_mmol': 0.12419631575552226, 'una_un': 0.15158745879030464, 'sk1_id Var': 0.4526256588270357}

RMSE (43.88723899673437, 44.036869170705835)

Model M1 = M0 + duration + volume:
0 categorical columns. Will encode.
5 numerical columns. Will standardize.
{'Intercept': 0.0029043519555376285, 'un_min': -0.14944442225982862, 'una_un_mmol': 0.6387352808951444, 'un_ml': -0.018814632958574724, 'una_un_mmolh': -0.03412807711710043, 'una_un': 0.13761455949215895, 'sk1_id Var': 0.4424001749498199}

RMSE (43.602804725663226, 43.848230034919396)

Model M2 = M1 + sex + age:
1 categorical columns. Will encode.
6 numerical columns. Will standardize.
{'Intercept': -0.15648410444065283, 'age': -0.1705928642465623, 'sex_SUB_Male': 0.33000127335184876, 'un_min': -0.13859540861905262, 'una_un_mmol': 0.651189432331544, 'un_ml': -0.

Unnamed: 0,Model,Number of predictors,R2,AIC,BIC,Root-mean-square error,P-value difference hypertensive-normotensive,P-value trend measured-error,Correlation measured-error,Predictors
0,M0 = Urinary sodium,3/3 | 3/3 (3.0/3.0 | 3.0/3.0),0.469 (0.465),3593 (2876),3626 (2907),43.887 (44.037),0.047 (0.374),0.0 (0.0),-0.741 | 0.0 (-0.741 | 0.0),"[Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h)]"
1,M1 = M0 + duration + volume,5/5 | 5/5 (5.0/5.0 | 5.0/5.0),0.476 (0.47),3577 (2864),3621 (2905),43.603 (43.848),0.043 (0.35),0.0 (0.0),-0.737 | 0.0 (-0.735 | 0.0),"[Duration of night urine collection (min), Night urinary volume (ml), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h)]"
2,M2 = M1 + sex + age,7/7 | 7/7 (7.0/7.0 | 7.0/7.0),0.528 (0.521),3427 (2744),3481 (2796),41.392 (41.656),0.188 (0.341),0.0 (0.0),-0.692 | 0.0 (-0.689 | 0.0),"[Age at clinical visit, Duration of night urine collection (min), Night urinary volume (ml), Sex of the participant (1=Male,2=Female): Male, Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h)]"
3,M3 = M2 + smoking + alcohol,12/12 | 12/12 (12.0/12.0 | 12.0/12.0),0.53 (0.518),3431 (2748),3512 (2826),41.314 (41.799),0.208 (0.339),0.0 (0.0),-0.69 | 0.0 (-0.688 | 0.0),"[Age at clinical visit, Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Night urinary volume (ml), Number of cigarettes without filter per day, Number of filter cigarettes per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Units of alcohol per week (1unit~10g pure alcohol), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h)]"
4,M4 = M3 + potassium + creatinine,20/20 | 20/20 (20.0/20.0 | 20.0/20.0),0.566 (0.547),3303 (2646),3427 (2765),39.703 (40.579),0.101 (0.408),0.0 (0.0),-0.659 | 0.0 (-0.658 | 0.0),"[Age at clinical visit, Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Night urinary volume (ml), Number of cigarettes without filter per day, Number of filter cigarettes per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Units of alcohol per week (1unit~10g pure alcohol), Urinary creatinin night (umol/L), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umolh), Urinary potassium night (mmol/L), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h)]"
5,M5 = M4 + weight + height + BMI + waist-to-hip ratio,24/24 | 24/24 (24.0/24.0 | 24.0/24.0),0.585 (0.566),3247 (2603),3393 (2743),38.819 (39.716),0.626 (0.516),0.0 (0.0),-0.642 | 0.0 (-0.643 | 0.0),"[Age at clinical visit, Body height (cm), Body mass index (kg/m2), Body weight (kg), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Night urinary volume (ml), Number of cigarettes without filter per day, Number of filter cigarettes per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Units of alcohol per week (1unit~10g pure alcohol), Urinary creatinin night (umol/L), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umolh), Urinary potassium night (mmol/L), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h), Waist/hip ratio]"
6,M6 = M5 + blood pressure,26/26 | 26/26 (26.0/26.0 | 26.0/26.0),0.585 (0.565),3249 (2605),3406 (2755),38.803 (39.761),0.866 (0.58),0.0 (0.0),-0.642 | 0.0 (-0.643 | 0.0),"[Age at clinical visit, Body height (cm), Body mass index (kg/m2), Body weight (kg), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5), Night urinary volume (ml), Number of cigarettes without filter per day, Number of filter cigarettes per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Units of alcohol per week (1unit~10g pure alcohol), Urinary creatinin night (umol/L), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umolh), Urinary potassium night (mmol/L), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h), Waist/hip ratio]"
7,M7 = M6 + hypertension + diabetes,28/28 | 28/28 (28.0/28.0 | 28.0/28.0),0.586 (0.565),3249 (2605),3417 (2766),38.746 (39.762),0.882 (0.533),0.0 (0.0),-0.641 | 0.0 (-0.643 | 0.0),"[Age at clinical visit, Body height (cm), Body mass index (kg/m2), Body weight (kg), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Ever had diabetes (office glu>7,self-report,anti-Diab drugs)(Y=1,N=0): Yes, Ever had hypertension (office BP>140/90,self-report,anti-HTA drugs)(Y=1,N=0) (Ne: Yes, Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5), Night urinary volume (ml), Number of cigarettes without filter per day, Number of filter cigarettes per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Units of alcohol per week (1unit~10g pure alcohol), Urinary creatinin night (umol/L), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umolh), Urinary potassium night (mmol/L), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h), Waist/hip ratio]"
8,M8 = M7 + blood lipid values,32/32 | 32/32 (32.0/32.0 | 32.0/32.0),0.588 (0.565),3251 (2607),3440 (2789),38.658 (39.76),0.869 (0.492),0.0 (0.0),-0.64 | 0.0 (-0.642 | 0.0),"[Age at clinical visit, Blood LDL cholesterol (mmol/L), Blood LDL/HDL ratio, Blood total cholesterol (mmol/L), Blood triglyceride (mmol/L), Body height (cm), Body mass index (kg/m2), Body weight (kg), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Ever had diabetes (office glu>7,self-report,anti-Diab drugs)(Y=1,N=0): Yes, Ever had hypertension (office BP>140/90,self-report,anti-HTA drugs)(Y=1,N=0) (Ne: Yes, Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5), Night urinary volume (ml), Number of cigarettes without filter per day, Number of filter cigarettes per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Units of alcohol per week (1unit~10g pure alcohol), Urinary creatinin night (umol/L), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umolh), Urinary potassium night (mmol/L), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h), Waist/hip ratio]"


In [15]:
models_df_no_selec, preds_no_selecs = get_models_df_preds(
    models, data, target_col, cat_cols, labels_df, norm_out=True, model_fn=mixedlm_backward_sel, sig=1.
)

models_df_no_selec.to_csv('../Figures/linear_regression/lin_regr_no_selec_group.csv')
models_df_no_selec


Model M0 = Urinary sodium:
0 categorical columns. Will encode.
3 numerical columns. Will standardize.
{'Intercept': 0.002485897897614049, 'una_un_mmolh': 0.462587595794723, 'una_un_mmol': 0.12419631575552226, 'una_un': 0.15158745879030464, 'sk1_id Var': 0.4526256588270357}

RMSE (43.88723899673437, 44.036869170705835)

Model M1 = M0 + duration + volume:
0 categorical columns. Will encode.
5 numerical columns. Will standardize.
{'Intercept': 0.0029043519555376285, 'un_min': -0.14944442225982862, 'una_un_mmol': 0.6387352808951444, 'un_ml': -0.018814632958574724, 'una_un_mmolh': -0.03412807711710043, 'una_un': 0.13761455949215895, 'sk1_id Var': 0.4424001749498199}

RMSE (43.602804725663226, 43.848230034919396)

Model M2 = M1 + sex + age:
1 categorical columns. Will encode.
6 numerical columns. Will standardize.
{'Intercept': -0.15648410444065283, 'age': -0.1705928642465623, 'sex_SUB_Male': 0.33000127335184876, 'un_min': -0.13859540861905262, 'una_un_mmol': 0.651189432331544, 'un_ml': -0.

Unnamed: 0,Model,Number of predictors,R2,AIC,BIC,Root-mean-square error,P-value difference hypertensive-normotensive,P-value trend measured-error,Correlation measured-error,Predictors
0,M0 = Urinary sodium,3/3 | 3/3 (3.0/3.0 | 3.0/3.0),0.469 (0.465),3593 (2876),3626 (2907),43.887 (44.037),0.047 (0.374),0.0 (0.0),-0.741 | 0.0 (-0.741 | 0.0),"[Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h)]"
1,M1 = M0 + duration + volume,5/5 | 5/5 (5.0/5.0 | 5.0/5.0),0.476 (0.47),3577 (2864),3621 (2905),43.603 (43.848),0.043 (0.35),0.0 (0.0),-0.737 | 0.0 (-0.735 | 0.0),"[Duration of night urine collection (min), Night urinary volume (ml), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h)]"
2,M2 = M1 + sex + age,7/7 | 7/7 (7.0/7.0 | 7.0/7.0),0.528 (0.521),3427 (2744),3481 (2796),41.392 (41.656),0.188 (0.341),0.0 (0.0),-0.692 | 0.0 (-0.689 | 0.0),"[Age at clinical visit, Duration of night urine collection (min), Night urinary volume (ml), Sex of the participant (1=Male,2=Female): Male, Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h)]"
3,M3 = M2 + smoking + alcohol,12/12 | 12/12 (12.0/12.0 | 12.0/12.0),0.53 (0.518),3431 (2748),3512 (2826),41.314 (41.799),0.208 (0.339),0.0 (0.0),-0.69 | 0.0 (-0.688 | 0.0),"[Age at clinical visit, Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Night urinary volume (ml), Number of cigarettes without filter per day, Number of filter cigarettes per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Units of alcohol per week (1unit~10g pure alcohol), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h)]"
4,M4 = M3 + potassium + creatinine,20/20 | 20/20 (20.0/20.0 | 20.0/20.0),0.566 (0.547),3303 (2646),3427 (2765),39.703 (40.579),0.101 (0.408),0.0 (0.0),-0.659 | 0.0 (-0.658 | 0.0),"[Age at clinical visit, Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Night urinary volume (ml), Number of cigarettes without filter per day, Number of filter cigarettes per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Units of alcohol per week (1unit~10g pure alcohol), Urinary creatinin night (umol/L), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umolh), Urinary potassium night (mmol/L), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h)]"
5,M5 = M4 + weight + height + BMI + waist-to-hip ratio,24/24 | 24/24 (24.0/24.0 | 24.0/24.0),0.585 (0.566),3247 (2603),3393 (2743),38.819 (39.716),0.626 (0.516),0.0 (0.0),-0.642 | 0.0 (-0.643 | 0.0),"[Age at clinical visit, Body height (cm), Body mass index (kg/m2), Body weight (kg), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Night urinary volume (ml), Number of cigarettes without filter per day, Number of filter cigarettes per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Units of alcohol per week (1unit~10g pure alcohol), Urinary creatinin night (umol/L), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umolh), Urinary potassium night (mmol/L), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h), Waist/hip ratio]"
6,M6 = M5 + blood pressure,26/26 | 26/26 (26.0/26.0 | 26.0/26.0),0.585 (0.565),3249 (2605),3406 (2755),38.803 (39.761),0.866 (0.58),0.0 (0.0),-0.642 | 0.0 (-0.643 | 0.0),"[Age at clinical visit, Body height (cm), Body mass index (kg/m2), Body weight (kg), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5), Night urinary volume (ml), Number of cigarettes without filter per day, Number of filter cigarettes per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Units of alcohol per week (1unit~10g pure alcohol), Urinary creatinin night (umol/L), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umolh), Urinary potassium night (mmol/L), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h), Waist/hip ratio]"
7,M7 = M6 + hypertension + diabetes,28/28 | 28/28 (28.0/28.0 | 28.0/28.0),0.586 (0.565),3249 (2605),3417 (2766),38.746 (39.762),0.882 (0.533),0.0 (0.0),-0.641 | 0.0 (-0.643 | 0.0),"[Age at clinical visit, Body height (cm), Body mass index (kg/m2), Body weight (kg), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Ever had diabetes (office glu>7,self-report,anti-Diab drugs)(Y=1,N=0): Yes, Ever had hypertension (office BP>140/90,self-report,anti-HTA drugs)(Y=1,N=0) (Ne: Yes, Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5), Night urinary volume (ml), Number of cigarettes without filter per day, Number of filter cigarettes per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Units of alcohol per week (1unit~10g pure alcohol), Urinary creatinin night (umol/L), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umolh), Urinary potassium night (mmol/L), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h), Waist/hip ratio]"
8,M8 = M7 + blood lipid values,32/32 | 32/32 (32.0/32.0 | 32.0/32.0),0.588 (0.565),3251 (2607),3440 (2789),38.658 (39.76),0.869 (0.492),0.0 (0.0),-0.64 | 0.0 (-0.642 | 0.0),"[Age at clinical visit, Blood LDL cholesterol (mmol/L), Blood LDL/HDL ratio, Blood total cholesterol (mmol/L), Blood triglyceride (mmol/L), Body height (cm), Body mass index (kg/m2), Body weight (kg), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Ever had diabetes (office glu>7,self-report,anti-Diab drugs)(Y=1,N=0): Yes, Ever had hypertension (office BP>140/90,self-report,anti-HTA drugs)(Y=1,N=0) (Ne: Yes, Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5), Night urinary volume (ml), Number of cigarettes without filter per day, Number of filter cigarettes per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Units of alcohol per week (1unit~10g pure alcohol), Urinary creatinin night (umol/L), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umolh), Urinary potassium night (mmol/L), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h), Waist/hip ratio]"


## 1.2) MixedLM: Backward elimination of predictors

Initially, we were using this model.
In review it was however pointed out that backward selection is criticized and models like Lasso are generally preferred.
So, we switched to the following.
Left it here for reference only.

### 1.2.1) Incremental construction of linear regression equations

In [16]:
models_df_selec, preds_selecs = get_models_df_preds(
    models, data, target_col, cat_cols, labels_df, norm_out=True, model_fn=mixedlm_backward_sel, sig=.05
)

models_df_selec.to_csv('../Figures/linear_regression/lin_regr_selec.csv')
models_df_selec


Model M0 = Urinary sodium:
0 categorical columns. Will encode.
3 numerical columns. Will standardize.
{'Intercept': 0.002588877739806719, 'una_un_mmolh': 0.5816939799213466, 'una_un': 0.15460890710391811, 'sk1_id Var': 0.4456982937943033}

RMSE (43.91134370102365, 44.05104793923641)

Model M1 = M0 + duration + volume:
0 categorical columns. Will encode.
5 numerical columns. Will standardize.
{'Intercept': 0.0029094487163252363, 'un_min': -0.14228006948876182, 'una_un_mmol': 0.5885031967206127, 'una_un': 0.15223116254822713, 'sk1_id Var': 0.44277842101938963}

RMSE (43.60857320327379, 43.76301141857549)

Model M2 = M1 + sex + age:
1 categorical columns. Will encode.
6 numerical columns. Will standardize.
{'Intercept': -0.1564252055497303, 'age': -0.1707652553627504, 'sex_SUB_Male': 0.3298668788979068, 'un_min': -0.12875436146428215, 'una_un_mmol': 0.5948234480977175, 'una_un': 0.10235041352352406, 'sk1_id Var': 0.33421083661753526}

RMSE (41.39796188622778, 41.55940186692497)

Model M3

Unnamed: 0,Model,Number of predictors,R2,AIC,BIC,Root-mean-square error,P-value difference hypertensive-normotensive,P-value trend measured-error,Correlation measured-error,Predictors
0,M0 = Urinary sodium,2/2 | 2/2 (2.2/2.2 | 2.2/2.2),0.469 (0.465),3594 (2876),3621 (2903),43.911 (44.051),0.061 (0.355),0.0 (0.0),-0.741 | 0.0 (-0.742 | 0.0),"[Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol/h)]"
1,M1 = M0 + duration + volume,3/3 | 3/3 (3.0/3.0 | 3.0/3.0),0.476 (0.472),3574 (2861),3606 (2892),43.609 (43.763),0.045 (0.345),0.0 (0.0),-0.737 | 0.0 (-0.736 | 0.0),"[Duration of night urine collection (min), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol)]"
2,M2 = M1 + sex + age,5/5 | 5/5 (5.0/5.0 | 5.0/5.0),0.528 (0.523),3424 (2741),3467 (2783),41.398 (41.559),0.181 (0.346),0.0 (0.0),-0.692 | 0.0 (-0.691 | 0.0),"[Age at clinical visit, Duration of night urine collection (min), Sex of the participant (1=Male,2=Female): Male, Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol)]"
3,M3 = M2 + smoking + alcohol,5/5 | 5/5 (5.2/5.2 | 5.2/5.2),0.528 (0.522),3424 (2741),3467 (2783),41.398 (41.625),0.181 (0.352),0.0 (0.0),-0.692 | 0.0 (-0.691 | 0.0),"[Age at clinical visit, Duration of night urine collection (min), Sex of the participant (1=Male,2=Female): Male, Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol)]"
4,M4 = M3 + potassium + creatinine,8/8 | 8/8 (8.4/8.4 | 8.4/8.4),0.563 (0.55),3291 (2633),3351 (2692),39.842 (40.451),0.1 (0.438),0.0 (0.0),-0.662 | 0.0 (-0.66 | 0.0),"[Age at clinical visit, Duration of night urine collection (min), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Urinary creatinin night (umol/L), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol)]"
5,M5 = M4 + weight + height + BMI + waist-to-hip ratio,12/12 | 12/12 (9.8/9.8 | 9.8/9.8),0.582 (0.569),3231 (2590),3312 (2656),38.934 (39.583),0.739 (0.578),0.0 (0.0),-0.644 | 0.0 (-0.648 | 0.0),"[Age at clinical visit, Body weight (kg), Duration of night urine collection (min), Night urinary volume (ml), Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Urinary creatinin night (umol/L), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol)]"
6,M6 = M5 + blood pressure,12/12 | 12/12 (9.8/9.8 | 9.8/9.8),0.582 (0.569),3231 (2590),3312 (2656),38.934 (39.583),0.739 (0.578),0.0 (0.0),-0.644 | 0.0 (-0.648 | 0.0),"[Age at clinical visit, Body weight (kg), Duration of night urine collection (min), Night urinary volume (ml), Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Urinary creatinin night (umol/L), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol)]"
7,M7 = M6 + hypertension + diabetes,13/13 | 13/13 (10.6/10.6 | 10.6/10.6),0.584 (0.569),3229 (2588),3315 (2658),38.872 (39.582),0.696 (0.575),0.0 (0.0),-0.643 | 0.0 (-0.647 | 0.0),"[Age at clinical visit, Body weight (kg), Duration of night urine collection (min), Ever had diabetes (office glu>7,self-report,anti-Diab drugs)(Y=1,N=0): Yes, Night urinary volume (ml), Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Urinary creatinin night (umol/L), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol)]"
8,M8 = M7 + blood lipid values,13/13 | 13/13 (10.4/10.4 | 10.4/10.4),0.584 (0.567),3229 (2588),3315 (2657),38.872 (39.687),0.696 (0.574),0.0 (0.0),-0.643 | 0.0 (-0.649 | 0.0),"[Age at clinical visit, Body weight (kg), Duration of night urine collection (min), Ever had diabetes (office glu>7,self-report,anti-Diab drugs)(Y=1,N=0): Yes, Night urinary volume (ml), Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Urinary creatinin night (umol/L), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol)]"


## 1.3) GlmmLasso

### 1.3.1) Used model

In [17]:
models_df, preds = get_models_df_preds(
    models,
    data,
    target_col,
    cat_cols,
    labels_df,
    norm_out=True,
    model_fn=glmm_lasso,
    lambda_val=[0, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
    final_re=False,
    se=False,
)

models_df.to_csv('../Figures/linear_regression/glmm_lasso.csv')
models_df


Model M0 = Urinary sodium:
0 categorical columns. Will encode.
3 numerical columns. Will standardize.
[1] 0.5317314 0.5317312 0.5317299 0.5324042 0.5310970 0.5311346 0.5314515
[8] 0.5359285
Selected lambda: [0.1]
[1] 0.5614239 0.5614239 0.5614235 0.5618326 0.5621517 0.5599842 0.5605238
[8] 0.5633911
Selected lambda: [1.]
[1] 0.5213816 0.5213813 0.5213790 0.5212952 0.5201847 0.5197505 0.5204246
[8] 0.5264790
Selected lambda: [1.]
[1] 0.5253268 0.5253266 0.5253253 0.5253119 0.5243469 0.5234694 0.5240256
[8] 0.5306939
Selected lambda: [1.]
[1] 0.5193984 0.5193983 0.5193975 0.5194089 0.5190343 0.5183211 0.5185433
[8] 0.5219063
Selected lambda: [1.]
[1] 0.5338558 0.5338558 0.5337204 0.5337211 0.5335758 0.5309228 0.5304694
[8] 0.5357567
Selected lambda: [10.]
{'(Intercept)': 0.00015327907832240643, 'una_un_mmolh': 0.4546205780980157, 'una_un': 0.16033983504140578, 'una_un_mmol': 0.13706749226325562}

RMSE (42.922871092764865, 44.01187778385297)

Model M1 = M0 + duration + volume:
0 categori

Unnamed: 0,Model,Number of predictors,R2,AIC,BIC,Root-mean-square error,P-value difference hypertensive-normotensive,P-value trend measured-error,Correlation measured-error,Predictors
0,M0 = Urinary sodium,3/3 | 3/3 (3.0/3.0 | 3.0/3.0),0.493 (0.465),3651 (2930),3845 (3115),42.923 (44.012),0.045 (0.361),0.0 (0.0),-0.731 | 0.0 (-0.726 | 0.0),"[Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h)]"
1,M1 = M0 + duration + volume,5/5 | 5/5 (4.8/5.0 | 4.8/5.0),0.483 (0.47),3707 (2941),4007 (3140),43.329 (43.834),0.056 (0.334),0.0 (0.0),-0.735 | 0.0 (-0.729 | 0.0),"[Duration of night urine collection (min), Night urinary volume (ml), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h)]"
2,M2 = M1 + sex + age,7/7 | 7/7 (7.0/7.0 | 7.0/7.0),0.535 (0.521),3521 (2805),3791 (3003),41.096 (41.665),0.191 (0.346),0.0 (0.0),-0.696 | 0.0 (-0.692 | 0.0),"[Age at clinical visit, Duration of night urine collection (min), Night urinary volume (ml), Sex of the participant (1=Male,2=Female): Male, Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h)]"
3,M3 = M2 + smoking + alcohol,11/12 | 11/12 (11.2/12.0 | 11.2/12.0),0.525 (0.517),3553 (2840),3819 (3105),41.542 (41.849),0.652 (0.348),0.0 (0.0),-0.702 | 0.0 (-0.688 | 0.0),"[Age at clinical visit, Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Night urinary volume (ml), Number of cigarettes without filter per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Units of alcohol per week (1unit~10g pure alcohol), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h)]"
4,M4 = M3 + potassium + creatinine,19/20 | 19/20 (19.0/20.0 | 19.0/20.0),0.549 (0.547),3467 (2733),3728 (2973),40.49 (40.529),0.448 (0.422),0.0 (0.0),-0.698 | 0.0 (-0.668 | 0.0),"[Age at clinical visit, Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Night urinary volume (ml), Number of cigarettes without filter per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Units of alcohol per week (1unit~10g pure alcohol), Urinary creatinin night (umol/L), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umolh), Urinary potassium night (mmol/L), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h)]"
5,M5 = M4 + weight + height + BMI + waist-to-hip ratio,23/24 | 23/24 (23.0/24.0 | 23.0/24.0),0.574 (0.561),3387 (2695),3687 (2956),39.342 (39.89),0.832 (0.587),0.0 (0.0),-0.669 | 0.0 (-0.658 | 0.0),"[Age at clinical visit, Body height (cm), Body mass index (kg/m2), Body weight (kg), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Night urinary volume (ml), Number of cigarettes without filter per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Units of alcohol per week (1unit~10g pure alcohol), Urinary creatinin night (umol/L), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umolh), Urinary potassium night (mmol/L), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h), Waist/hip ratio]"
6,M6 = M5 + blood pressure,25/26 | 25/26 (25.0/26.0 | 25.0/26.0),0.573 (0.56),3394 (2700),3710 (2962),39.352 (39.926),0.874 (0.612),0.0 (0.0),-0.669 | 0.0 (-0.661 | 0.0),"[Age at clinical visit, Body height (cm), Body mass index (kg/m2), Body weight (kg), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5), Night urinary volume (ml), Number of cigarettes without filter per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Units of alcohol per week (1unit~10g pure alcohol), Urinary creatinin night (umol/L), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umolh), Urinary potassium night (mmol/L), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h), Waist/hip ratio]"
7,M7 = M6 + hypertension + diabetes,27/28 | 27/28 (27.0/28.0 | 27.0/28.0),0.566 (0.556),3414 (2716),3700 (2966),39.734 (40.079),0.508 (0.528),0.0 (0.0),-0.695 | 0.0 (-0.687 | 0.0),"[Age at clinical visit, Body height (cm), Body mass index (kg/m2), Body weight (kg), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Ever had diabetes (office glu>7,self-report,anti-Diab drugs)(Y=1,N=0): Yes, Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5), Night urinary volume (ml), Number of cigarettes without filter per day, Number of filter cigarettes per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Units of alcohol per week (1unit~10g pure alcohol), Urinary creatinin night (umol/L), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umolh), Urinary potassium night (mmol/L), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h), Waist/hip ratio]"
8,M8 = M7 + blood lipid values,31/32 | 31/32 (31.0/32.0 | 31.0/32.0),0.566 (0.551),3418 (2759),3720 (3018),39.714 (40.443),0.386 (0.524),0.0 (0.0),-0.696 | 0.0 (-0.705 | 0.0),"[Age at clinical visit, Blood LDL cholesterol (mmol/L), Blood LDL/HDL ratio, Blood total cholesterol (mmol/L), Blood triglyceride (mmol/L), Body height (cm), Body mass index (kg/m2), Body weight (kg), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Ever had diabetes (office glu>7,self-report,anti-Diab drugs)(Y=1,N=0): Yes, Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5), Night urinary volume (ml), Number of cigarettes without filter per day, Number of filter cigarettes per day, Physical activity (1-10), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-potassium excretion ratio, Units of alcohol per week (1unit~10g pure alcohol), Urinary creatinin night (umol/L), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umolh), Urinary potassium night (mmol/L), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h), Waist/hip ratio]"


In [18]:
models_df.set_index('Model', drop=True)[[
    'Number of predictors', 'R2', 'Root-mean-square error',
    # No BIC because its meaning is unclear in lasso regression ("do shrunk features still count as 1?").
    # Additionally, also not correct (like p-values, standard errors) for post-selection inference (final_re=True).
    'P-value difference hypertensive-normotensive', 'Correlation measured-error'
]]

Unnamed: 0_level_0,Number of predictors,R2,Root-mean-square error,P-value difference hypertensive-normotensive,Correlation measured-error
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
M0 = Urinary sodium,3/3 | 3/3 (3.0/3.0 | 3.0/3.0),0.493 (0.465),42.923 (44.012),0.045 (0.361),-0.731 | 0.0 (-0.726 | 0.0)
M1 = M0 + duration + volume,5/5 | 5/5 (4.8/5.0 | 4.8/5.0),0.483 (0.47),43.329 (43.834),0.056 (0.334),-0.735 | 0.0 (-0.729 | 0.0)
M2 = M1 + sex + age,7/7 | 7/7 (7.0/7.0 | 7.0/7.0),0.535 (0.521),41.096 (41.665),0.191 (0.346),-0.696 | 0.0 (-0.692 | 0.0)
M3 = M2 + smoking + alcohol,11/12 | 11/12 (11.2/12.0 | 11.2/12.0),0.525 (0.517),41.542 (41.849),0.652 (0.348),-0.702 | 0.0 (-0.688 | 0.0)
M4 = M3 + potassium + creatinine,19/20 | 19/20 (19.0/20.0 | 19.0/20.0),0.549 (0.547),40.49 (40.529),0.448 (0.422),-0.698 | 0.0 (-0.668 | 0.0)
M5 = M4 + weight + height + BMI + waist-to-hip ratio,23/24 | 23/24 (23.0/24.0 | 23.0/24.0),0.574 (0.561),39.342 (39.89),0.832 (0.587),-0.669 | 0.0 (-0.658 | 0.0)
M6 = M5 + blood pressure,25/26 | 25/26 (25.0/26.0 | 25.0/26.0),0.573 (0.56),39.352 (39.926),0.874 (0.612),-0.669 | 0.0 (-0.661 | 0.0)
M7 = M6 + hypertension + diabetes,27/28 | 27/28 (27.0/28.0 | 27.0/28.0),0.566 (0.556),39.734 (40.079),0.508 (0.528),-0.695 | 0.0 (-0.687 | 0.0)
M8 = M7 + blood lipid values,31/32 | 31/32 (31.0/32.0 | 31.0/32.0),0.566 (0.551),39.714 (40.443),0.386 (0.524),-0.696 | 0.0 (-0.705 | 0.0)


**Select one model for further analysis**

In [19]:
selected_idx = 5
preds = preds[selected_idx][-1]  # !!! CV predictions !!!
df_to_save = pd.read_csv('../Data/dataset_cleaned.csv')
preds.name = 'mlr_preds'
df_to_save = df_to_save.join(preds, how='left').join(data[['cv_split']], how='left')
df_to_save.to_csv('../Data/dataset_mlr.csv')

  df_to_save = pd.read_csv('../Data/dataset_cleaned.csv')


**Selected predictors**

In [20]:
selected_predictors = models_df['Predictors'][selected_idx]
selected_predictors = [p.split(":")[0] for p in selected_predictors]
print(f"{len(selected_predictors)} selected predictors: {selected_predictors}")

23 selected predictors: ['Age at clinical visit', 'Body height (cm)', 'Body mass index (kg/m2)', 'Body weight (kg)', 'Current smoking (Y=1,N=2)', 'Duration of night urine collection (min)', 'Night urinary volume (ml)', 'Number of cigarettes without filter per day', 'Physical activity (1-10)', 'Sex of the participant (1=Male,2=Female)', 'Sodium-creatinine excretion ratio', 'Sodium-potassium excretion ratio', 'Units of alcohol per week (1unit~10g pure alcohol)', 'Urinary creatinin night (umol/L)', 'Urinary creatinine night excretion (umol)', 'Urinary creatinine night excretion (umolh)', 'Urinary potassium night (mmol/L)', 'Urinary potassium night excretion (mmol)', 'Urinary potassium night excretion (mmol/h)', 'Urinary sodium night (mmol/L)', 'Urinary sodium night excretion (mmol)', 'Urinary sodium night excretion (mmol/h)', 'Waist/hip ratio']


In [21]:
predictors = list(models.values())[selected_idx]
all_predictors = labels_df.loc[predictors]['Variable Label']
removed_predictors = set(all_predictors) - set(selected_predictors)
print(f"{len(removed_predictors)} removed predictors: {removed_predictors}")

1 removed predictors: {'Number of filter cigarettes per day'}


**Other important factors?**

In [22]:
cols = [
    'una_un', 'una_un_mmol', 'una_un_mmolh',
    'un_min', 'un_ml',
    'sex', 'age',
    'uk_un', 'uk_un_mmol', 'uk_un_mmolh', 'un_na_k', 'ucrt_un', 'ucrt_un_umol', 'ucrt_un_umolh', 'un_na_crt',
    'bmi',
    'sbp1_5', 'dbp1_5',
    'hypertension_combined', 'diabetes_combined',
    't_now', 't_ctf', 't_ct', # tobacco
    'd_diet', 'f_fruit', 'f_veg', 'f_meat_24n', 'f_fish', # diet: NOT USED
    'waist_hip', 'waistc1', # waist-to-hip
    'cho', 'ldl', 'ldlhdl', 'tg', # blood lipid values
    'e_now', 'e_p_past', 'unit_alc_week', # alcohol: ONLY UNITS USED
    'p_act1', 'p_act2', 'p_act3', 'p_act4', 'phys_activity_22fN', 'phys_activity_22fD', 'phys_activity_t', 'a_grad', # physical activity: ONLY A_GRAD USED
    'sleep_t', 'sleep_22fN', 'sleep_22fD', 'sleep_duration',# [follow-up] # sleep: NOT USED
    'sc_now', 'educat_code', 'edu_3cat', # education: ONLY EDU_3CAT USED
    'origin_final', # [follow-up] # origin: NOT USED
    'orig_regio', # [follow-up]: NOT USED
    'p_h_income', # [follow-up] # income: NOT USED
    'inc_hh', # [follow-up]: NOT USED
    'fin_diff', # [follow-up]: NOT USED
]

unused_cols = set(cols) - set(all_cols)

In [23]:
pvalues = []
names = []
nans = []

df_selec = df.loc[data.index]

for col in unused_cols:
    nan = df_selec[col].isna().sum()
    X_sel = df_selec.loc[~(df_selec[col].isna()), [col]]
    if col in cat_cols:
        X_sel[col] = X_sel[col].astype("category")
        X_sel = pd.get_dummies(X_sel[col], prefix=f'{col}::', drop_first=False)
    y_sel = df_selec[target_col][~(df_selec[col].isna())].values
    preds_sel = preds[~(df_selec[col].isna())]
    err = preds_sel - y_sel
    for c in X_sel.columns:
        result = sapi.OLS(err, X_sel[[c]]).fit()
        names.append(c)
        nans.append(nan)
        pvalues.append(result.pvalues[c])

col_descr_df = pd.DataFrame({'Variable name': names, 'P-value correlation': pvalues, '# Missing': nans}).set_index('Variable name')
col_descr_df['Variable description'] = list(
    labels_df.loc[[re.sub('::.*', '', c) for c in col_descr_df.index], 'Variable Label'])
col_descr_df.sort_values('P-value correlation', inplace=True)
with pd.option_context("display.max_rows", None):
    display(col_descr_df)
print(len(col_descr_df))
col_descr_df.to_csv('../Figures/linear_regression/missing_vars_lasso.csv')

Unnamed: 0_level_0,P-value correlation,# Missing,Variable description
Variable name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
p_h_income::_3000-4999 CHF,0.005556,989,Monthly household income
sleep_22fD,0.006502,906,Number of minutes spent sleeping during the day-Armband
p_h_income::_-9,0.014296,989,Monthly household income
origin_final::_1.0,0.037593,1084,"Origin of the participant (1=European, 2-4=Other, -9=Unknown)"
f_meat_24n::_5 days/week,0.038035,758,"In general, how many days per week do you eat meat or delicatessen?"
f_fruit::_Less than 1 portion/day,0.050576,761,How many fruit portions do you eat on a daily basis?
sleep_duration,0.055132,908,Average number of hours of sleep per night
orig_regio::_CH,0.055885,905,Origin related - Questionnaire
orig_regio::_XK,0.058592,905,Origin related - Questionnaire
orig_regio::_CD,0.064738,905,Origin related - Questionnaire


102


### 1.3.2) Sensitivity analyses

In [24]:
models_df_int, preds_int = get_models_df_preds(
    models,
    data,
    target_col,
    cat_cols,
    labels_df,
    kernels=None,
    inter_col="una_un_mmolh",
    norm_out=True,
    model_fn=glmm_lasso,
    lambda_val=[0, 0.0001, 0.001, 0.01, 0.1, 1, 10],
    final_re=False,
    se=False,
)
models_df_int


Model M0 = Urinary sodium:
0 categorical columns. Will encode.
3 numerical columns. Will standardize.
[1] 0.5323305 0.5323305 0.5323306 0.5323320 0.5323451 0.5312250 0.5336598
Selected lambda: [1.]
[1] 0.5537240 0.5537240 0.5537240 0.5537244 0.5532938 0.5533011 0.5582535
Selected lambda: [0.1]
[1] 0.5201710 0.5201710 0.5201709 0.5201695 0.5201525 0.5174279 0.5209226
Selected lambda: [1.]
[1] 0.5277092 0.5277092 0.5277088 0.5277050 0.5278061 0.5254982 0.5258752
Selected lambda: [1.]
[1] 0.5187348 0.5187348 0.5187345 0.5187320 0.5178086 0.5159866 0.5168499
Selected lambda: [1.]
[1] 0.5326496 0.5326495 0.5326494 0.5326482 0.5316607 0.5311078 0.5305848
Selected lambda: [10.]
{'(Intercept)': 8.960880674875326e-05, 'una_un_mmolh': 0.4192324410690238, 'una_un': 0.1731487196008685, 'una_un_mmol': 0.2114608866402653, 'una_un_mmol_INT_una_un_mmolh': -0.0, 'una_un_INT_una_un_mmolh': -0.014625800479384183}

RMSE (43.31573751132226, 44.40767895573292)

Model M1 = M0 + duration + volume:
0 categori

Unnamed: 0,Model,Number of predictors,R2,AIC,BIC,Root-mean-square error,P-value difference hypertensive-normotensive,P-value trend measured-error,Correlation measured-error,Predictors
0,M0 = Urinary sodium,4/5 | 3/3 (5.0/5.0 | 3.0/3.0),0.483 (0.459),3680 (2913),3901 (3040),43.316 (44.408),0.03 (0.404),0.0 (0.0),-0.7 | 0.0 (-0.743 | 0.0),"[Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) x Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol/h)]"
1,M1 = M0 + duration + volume,8/8 | 5/5 (8.0/8.0 | 5.0/5.0),0.49 (0.465),3638 (2909),3807 (3052),43.029 (44.177),0.031 (0.362),0.0 (0.0),-0.719 | 0.0 (-0.737 | 0.0),"[Duration of night urine collection (min), Night urinary volume (ml), Night urinary volume (ml) x Urinary sodium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) x Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol) x Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol/h)]"
2,M2 = M1 + sex + age,11/11 | 7/7 (11.0/11.0 | 7.0/7.0),0.541 (0.515),3467 (2772),3643 (2920),40.828 (42.041),0.207 (0.392),0.0 (0.0),-0.697 | 0.0 (-0.698 | 0.0),"[Age at clinical visit, Age at clinical visit x Urinary sodium night excretion (mmol/h), Duration of night urine collection (min), Night urinary volume (ml), Night urinary volume (ml) x Urinary sodium night excretion (mmol/h), Sex of the participant (1=Male,2=Female): Male, Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) x Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol) x Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol/h)]"
3,M3 = M2 + smoking + alcohol,19/20 | 12/12 (19.4/20.0 | 12.0/12.0),0.531 (0.498),3529 (2817),3786 (3042),41.261 (42.844),0.275 (0.472),0.0 (0.0),-0.706 | 0.0 (-0.684 | 0.0),"[Age at clinical visit, Age at clinical visit x Urinary sodium night excretion (mmol/h), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Night urinary volume (ml), Night urinary volume (ml) x Urinary sodium night excretion (mmol/h), Number of cigarettes without filter per day, Number of cigarettes without filter per day x Urinary sodium night excretion (mmol/h), Number of filter cigarettes per day, Number of filter cigarettes per day x Urinary sodium night excretion (mmol/h), Physical activity (1-10), Physical activity (1-10) x Urinary sodium night excretion (mmol/h), Sex of the participant (1=Male,2=Female): Male, Units of alcohol per week (1unit~10g pure alcohol) x Urinary sodium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) x Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol) x Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol/h)]"
4,M4 = M3 + potassium + creatinine,35/36 | 20/20 (35.0/36.0 | 20.0/20.0),0.541 (0.531),3517 (2784),3840 (3070),40.82 (41.274),0.575 (0.444),0.0 (0.0),-0.704 | 0.0 (-0.684 | 0.0),"[Age at clinical visit, Age at clinical visit x Urinary sodium night excretion (mmol/h), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Night urinary volume (ml), Night urinary volume (ml) x Urinary sodium night excretion (mmol/h), Number of cigarettes without filter per day, Number of cigarettes without filter per day x Urinary sodium night excretion (mmol/h), Number of filter cigarettes per day x Urinary sodium night excretion (mmol/h), Physical activity (1-10), Physical activity (1-10) x Urinary sodium night excretion (mmol/h), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-creatinine excretion ratio x Urinary sodium night excretion (mmol/h), Sodium-potassium excretion ratio, Sodium-potassium excretion ratio x Urinary sodium night excretion (mmol/h), Units of alcohol per week (1unit~10g pure alcohol), Units of alcohol per week (1unit~10g pure alcohol) x Urinary sodium night excretion (mmol/h), Urinary creatinin night (umol/L), Urinary creatinin night (umol/L) x Urinary sodium night excretion (mmol/h), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umol) x Urinary sodium night excretion (mmol/h), Urinary creatinine night excretion (umolh), Urinary creatinine night excretion (umolh) x Urinary sodium night excretion (mmol/h), Urinary potassium night (mmol/L), Urinary potassium night (mmol/L) x Urinary sodium night excretion (mmol/h), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol) x Urinary sodium night excretion (mmol/h), Urinary potassium night excretion (mmol/h), Urinary potassium night excretion (mmol/h) x Urinary sodium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) x Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol) x Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol/h)]"
5,M5 = M4 + weight + height + BMI + waist-to-hip ratio,43/44 | 24/24 (43.0/44.0 | 24.0/24.0),0.562 (0.55),3458 (2754),3822 (3093),39.915 (40.439),0.342 (0.546),0.0 (0.0),-0.69 | 0.0 (-0.675 | 0.0),"[Age at clinical visit, Age at clinical visit x Urinary sodium night excretion (mmol/h), Body height (cm), Body height (cm) x Urinary sodium night excretion (mmol/h), Body mass index (kg/m2), Body mass index (kg/m2) x Urinary sodium night excretion (mmol/h), Body weight (kg), Body weight (kg) x Urinary sodium night excretion (mmol/h), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Night urinary volume (ml), Night urinary volume (ml) x Urinary sodium night excretion (mmol/h), Number of cigarettes without filter per day, Number of cigarettes without filter per day x Urinary sodium night excretion (mmol/h), Number of filter cigarettes per day x Urinary sodium night excretion (mmol/h), Physical activity (1-10), Physical activity (1-10) x Urinary sodium night excretion (mmol/h), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-creatinine excretion ratio x Urinary sodium night excretion (mmol/h), Sodium-potassium excretion ratio, Sodium-potassium excretion ratio x Urinary sodium night excretion (mmol/h), Units of alcohol per week (1unit~10g pure alcohol), Units of alcohol per week (1unit~10g pure alcohol) x Urinary sodium night excretion (mmol/h), Urinary creatinin night (umol/L), Urinary creatinin night (umol/L) x Urinary sodium night excretion (mmol/h), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umol) x Urinary sodium night excretion (mmol/h), Urinary creatinine night excretion (umolh), Urinary creatinine night excretion (umolh) x Urinary sodium night excretion (mmol/h), Urinary potassium night (mmol/L), Urinary potassium night (mmol/L) x Urinary sodium night excretion (mmol/h), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol) x Urinary sodium night excretion (mmol/h), Urinary potassium night excretion (mmol/h), Urinary potassium night excretion (mmol/h) x Urinary sodium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) x Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol) x Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol/h), Waist/hip ratio, Waist/hip ratio x Urinary sodium night excretion (mmol/h)]"
6,M6 = M5 + blood pressure,47/48 | 26/26 (47.0/48.0 | 25.8/26.0),0.559 (0.547),3473 (2769),3848 (3122),40.041 (40.601),0.286 (0.537),0.0 (0.0),-0.696 | 0.0 (-0.683 | 0.0),"[Age at clinical visit, Age at clinical visit x Urinary sodium night excretion (mmol/h), Body height (cm), Body height (cm) x Urinary sodium night excretion (mmol/h), Body mass index (kg/m2), Body mass index (kg/m2) x Urinary sodium night excretion (mmol/h), Body weight (kg), Body weight (kg) x Urinary sodium night excretion (mmol/h), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5), Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5) x Urinary sodium night excretion (mmol/h), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5) x Urinary sodium night excretion (mmol/h), Night urinary volume (ml), Night urinary volume (ml) x Urinary sodium night excretion (mmol/h), Number of cigarettes without filter per day, Number of cigarettes without filter per day x Urinary sodium night excretion (mmol/h), Number of filter cigarettes per day x Urinary sodium night excretion (mmol/h), Physical activity (1-10), Physical activity (1-10) x Urinary sodium night excretion (mmol/h), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-creatinine excretion ratio x Urinary sodium night excretion (mmol/h), Sodium-potassium excretion ratio, Sodium-potassium excretion ratio x Urinary sodium night excretion (mmol/h), Units of alcohol per week (1unit~10g pure alcohol), Units of alcohol per week (1unit~10g pure alcohol) x Urinary sodium night excretion (mmol/h), Urinary creatinin night (umol/L), Urinary creatinin night (umol/L) x Urinary sodium night excretion (mmol/h), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umol) x Urinary sodium night excretion (mmol/h), Urinary creatinine night excretion (umolh), Urinary creatinine night excretion (umolh) x Urinary sodium night excretion (mmol/h), Urinary potassium night (mmol/L), Urinary potassium night (mmol/L) x Urinary sodium night excretion (mmol/h), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol) x Urinary sodium night excretion (mmol/h), Urinary potassium night excretion (mmol/h), Urinary potassium night excretion (mmol/h) x Urinary sodium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) x Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol) x Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol/h), Waist/hip ratio, Waist/hip ratio x Urinary sodium night excretion (mmol/h)]"
7,M7 = M6 + hypertension + diabetes,49/50 | 27/28 (49.0/50.0 | 27.2/28.0),0.547 (0.524),3518 (2890),3888 (3309),40.617 (42.005),0.091 (0.592),0.0 (0.0),-0.72 | 0.0 (-0.662 | 0.0),"[Age at clinical visit, Age at clinical visit x Urinary sodium night excretion (mmol/h), Body height (cm), Body height (cm) x Urinary sodium night excretion (mmol/h), Body mass index (kg/m2), Body mass index (kg/m2) x Urinary sodium night excretion (mmol/h), Body weight (kg), Body weight (kg) x Urinary sodium night excretion (mmol/h), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Ever had diabetes (office glu>7,self-report,anti-Diab drugs)(Y=1,N=0): Yes, Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5), Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5) x Urinary sodium night excretion (mmol/h), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5) x Urinary sodium night excretion (mmol/h), Night urinary volume (ml), Night urinary volume (ml) x Urinary sodium night excretion (mmol/h), Number of cigarettes without filter per day, Number of cigarettes without filter per day x Urinary sodium night excretion (mmol/h), Number of filter cigarettes per day, Number of filter cigarettes per day x Urinary sodium night excretion (mmol/h), Physical activity (1-10), Physical activity (1-10) x Urinary sodium night excretion (mmol/h), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-creatinine excretion ratio x Urinary sodium night excretion (mmol/h), Sodium-potassium excretion ratio, Sodium-potassium excretion ratio x Urinary sodium night excretion (mmol/h), Units of alcohol per week (1unit~10g pure alcohol), Units of alcohol per week (1unit~10g pure alcohol) x Urinary sodium night excretion (mmol/h), Urinary creatinin night (umol/L), Urinary creatinin night (umol/L) x Urinary sodium night excretion (mmol/h), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umol) x Urinary sodium night excretion (mmol/h), Urinary creatinine night excretion (umolh), Urinary creatinine night excretion (umolh) x Urinary sodium night excretion (mmol/h), Urinary potassium night (mmol/L), Urinary potassium night (mmol/L) x Urinary sodium night excretion (mmol/h), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol) x Urinary sodium night excretion (mmol/h), Urinary potassium night excretion (mmol/h), Urinary potassium night excretion (mmol/h) x Urinary sodium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) x Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol) x Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol/h), Waist/hip ratio, Waist/hip ratio x Urinary sodium night excretion (mmol/h)]"
8,M8 = M7 + blood lipid values,57/58 | 31/32 (57.0/58.0 | 31.2/32.0),0.543 (0.508),3549 (2977),3960 (3485),40.807 (42.663),0.048 (0.536),0.0 (0.0),-0.726 | 0.0 (-0.679 | 0.0),"[Age at clinical visit, Age at clinical visit x Urinary sodium night excretion (mmol/h), Blood LDL cholesterol (mmol/L), Blood LDL cholesterol (mmol/L) x Urinary sodium night excretion (mmol/h), Blood LDL/HDL ratio, Blood LDL/HDL ratio x Urinary sodium night excretion (mmol/h), Blood total cholesterol (mmol/L), Blood total cholesterol (mmol/L) x Urinary sodium night excretion (mmol/h), Blood triglyceride (mmol/L), Blood triglyceride (mmol/L) x Urinary sodium night excretion (mmol/h), Body height (cm), Body height (cm) x Urinary sodium night excretion (mmol/h), Body mass index (kg/m2), Body mass index (kg/m2) x Urinary sodium night excretion (mmol/h), Body weight (kg), Body weight (kg) x Urinary sodium night excretion (mmol/h), Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Ever had diabetes (office glu>7,self-report,anti-Diab drugs)(Y=1,N=0): Yes, Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5), Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5) x Urinary sodium night excretion (mmol/h), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5) x Urinary sodium night excretion (mmol/h), Night urinary volume (ml), Night urinary volume (ml) x Urinary sodium night excretion (mmol/h), Number of cigarettes without filter per day, Number of cigarettes without filter per day x Urinary sodium night excretion (mmol/h), Number of filter cigarettes per day, Number of filter cigarettes per day x Urinary sodium night excretion (mmol/h), Physical activity (1-10), Physical activity (1-10) x Urinary sodium night excretion (mmol/h), Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-creatinine excretion ratio x Urinary sodium night excretion (mmol/h), Sodium-potassium excretion ratio, Sodium-potassium excretion ratio x Urinary sodium night excretion (mmol/h), Units of alcohol per week (1unit~10g pure alcohol), Units of alcohol per week (1unit~10g pure alcohol) x Urinary sodium night excretion (mmol/h), Urinary creatinin night (umol/L), Urinary creatinin night (umol/L) x Urinary sodium night excretion (mmol/h), Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umol) x Urinary sodium night excretion (mmol/h), Urinary creatinine night excretion (umolh), Urinary creatinine night excretion (umolh) x Urinary sodium night excretion (mmol/h), Urinary potassium night (mmol/L), Urinary potassium night (mmol/L) x Urinary sodium night excretion (mmol/h), Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol) x Urinary sodium night excretion (mmol/h), Urinary potassium night excretion (mmol/h), Urinary potassium night excretion (mmol/h) x Urinary sodium night excretion (mmol/h), Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) x Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol) x Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol/h), Waist/hip ratio, Waist/hip ratio x Urinary sodium night excretion (mmol/h)]"


In [27]:
models_df_square, preds_square = get_models_df_preds(
    models,
    data,
    target_col,
    cat_cols,
    labels_df,
    kernels={"2": np.square},
    inter_col=None,
    norm_out=True,
    model_fn=glmm_lasso,
    lambda_val=[0, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
    final_re=False,
    se=False,
)
models_df_square


Model M0 = Urinary sodium:
0 categorical columns. Will encode.
3 numerical columns. Will standardize.
[1] 0.5322107 0.5322107 0.5322108 0.5322111 0.5322156 0.5324975 0.5341092
[8] 0.5791288
Selected lambda: [0.]
[1] 0.5530474 0.5530474 0.5530474 0.5530478 0.5530503 0.5539614 0.5585095
[8] 0.5957726
Selected lambda: [0.]
[1] 0.5206538 0.5206538 0.5206538 0.5206537 0.5206884 0.5205216 0.5211656
[8] 0.5667088
Selected lambda: [1.]
[1] 0.5260254 0.5260254 0.5260253 0.5260240 0.5260107 0.5248144 0.5251655
[8] 0.5697562
Selected lambda: [1.]
[1] 0.5179924 0.5179924 0.5179921 0.5179895 0.5179640 0.5177517 0.5159036
[8] 0.5651366
Selected lambda: [10.]
[1] 0.5317718 0.5317718 0.5317718 0.5317712 0.5326730 0.5321785 0.5307253
[8] 0.5540277
Selected lambda: [10.]
{'(Intercept)': 5.6235381162125055e-05, 'una_un_mmolh': 0.3963872594892293, 'una_un': 0.15727519875949092, 'una_un_mmol': 0.25501459827502226, 'una_un_mmolh_2': -0.01668839411886247, 'una_un_mmol_2': -0.04401187863292819, 'una_un_2': -

Unnamed: 0,Model,Number of predictors,R2,AIC,BIC,Root-mean-square error,P-value difference hypertensive-normotensive,P-value trend measured-error,Correlation measured-error,Predictors
0,M0 = Urinary sodium,6/6 | 3/3 (6.0/6.0 | 3.0/3.0),0.481 (0.457),3697 (2931),3954 (3085),43.412 (44.477),0.03 (0.409),0.0 (0.0),-0.726 | 0.0 (-0.743 | 0.0),"[Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) 2, Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol) 2, Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol/h) 2]"
1,M1 = M0 + duration + volume,10/10 | 5/5 (9.6/10.0 | 5.0/5.0),0.489 (0.467),3648 (2918),3838 (3075),43.076 (44.044),0.034 (0.374),0.0 (0.0),-0.734 | 0.0 (-0.746 | 0.0),"[Duration of night urine collection (min), Duration of night urine collection (min) 2, Night urinary volume (ml), Night urinary volume (ml) 2, Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) 2, Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol) 2, Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol/h) 2]"
2,M2 = M1 + sex + age,13/13 | 7/7 (12.4/13.0 | 7.0/7.0),0.546 (0.525),3460 (2773),3668 (2942),40.605 (41.606),0.164 (0.359),0.0 (0.0),-0.677 | 0.0 (-0.697 | 0.0),"[Age at clinical visit, Age at clinical visit 2, Duration of night urine collection (min), Duration of night urine collection (min) 2, Night urinary volume (ml), Night urinary volume (ml) 2, Sex of the participant (1=Male,2=Female): Male, Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) 2, Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol) 2, Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol/h) 2]"
3,M3 = M2 + smoking + alcohol,21/22 | 12/12 (21.2/22.0 | 12.0/12.0),0.52 (0.523),3572 (2794),3844 (3023),41.755 (41.669),0.117 (0.355),0.0 (0.0),-0.713 | 0.0 (-0.699 | 0.0),"[Age at clinical visit, Age at clinical visit 2, Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Duration of night urine collection (min) 2, Night urinary volume (ml), Night urinary volume (ml) 2, Number of cigarettes without filter per day, Number of cigarettes without filter per day 2, Number of filter cigarettes per day 2, Physical activity (1-10), Physical activity (1-10) 2, Sex of the participant (1=Male,2=Female): Male, Units of alcohol per week (1unit~10g pure alcohol), Units of alcohol per week (1unit~10g pure alcohol) 2, Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) 2, Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol) 2, Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol/h) 2]"
4,M4 = M3 + potassium + creatinine,37/38 | 20/20 (37.0/38.0 | 20.0/20.0),0.549 (0.541),3497 (2769),3837 (3086),40.49 (40.821),0.118 (0.398),0.0 (0.0),-0.702 | 0.0 (-0.669 | 0.0),"[Age at clinical visit, Age at clinical visit 2, Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Duration of night urine collection (min) 2, Night urinary volume (ml), Night urinary volume (ml) 2, Number of cigarettes without filter per day, Number of cigarettes without filter per day 2, Number of filter cigarettes per day 2, Physical activity (1-10), Physical activity (1-10) 2, Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-creatinine excretion ratio 2, Sodium-potassium excretion ratio, Sodium-potassium excretion ratio 2, Units of alcohol per week (1unit~10g pure alcohol), Units of alcohol per week (1unit~10g pure alcohol) 2, Urinary creatinin night (umol/L), Urinary creatinin night (umol/L) 2, Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umol) 2, Urinary creatinine night excretion (umolh), Urinary creatinine night excretion (umolh) 2, Urinary potassium night (mmol/L), Urinary potassium night (mmol/L) 2, Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol) 2, Urinary potassium night excretion (mmol/h), Urinary potassium night excretion (mmol/h) 2, Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) 2, Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol) 2, Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol/h) 2]"
5,M5 = M4 + weight + height + BMI + waist-to-hip ratio,45/46 | 24/24 (45.0/46.0 | 24.0/24.0),0.573 (0.55),3432 (2748),3846 (3116),39.372 (40.359),0.931 (0.529),0.0 (0.0),-0.67 | 0.0 (-0.661 | 0.0),"[Age at clinical visit, Age at clinical visit 2, Body height (cm), Body height (cm) 2, Body mass index (kg/m2), Body mass index (kg/m2) 2, Body weight (kg), Body weight (kg) 2, Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Duration of night urine collection (min) 2, Night urinary volume (ml), Night urinary volume (ml) 2, Number of cigarettes without filter per day, Number of cigarettes without filter per day 2, Number of filter cigarettes per day, Physical activity (1-10), Physical activity (1-10) 2, Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-creatinine excretion ratio 2, Sodium-potassium excretion ratio, Sodium-potassium excretion ratio 2, Units of alcohol per week (1unit~10g pure alcohol), Units of alcohol per week (1unit~10g pure alcohol) 2, Urinary creatinin night (umol/L), Urinary creatinin night (umol/L) 2, Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umol) 2, Urinary creatinine night excretion (umolh), Urinary creatinine night excretion (umolh) 2, Urinary potassium night (mmol/L), Urinary potassium night (mmol/L) 2, Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol) 2, Urinary potassium night excretion (mmol/h), Urinary potassium night excretion (mmol/h) 2, Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) 2, Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol) 2, Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol/h) 2, Waist/hip ratio, Waist/hip ratio 2]"
6,M6 = M5 + blood pressure,49/50 | 26/26 (49.0/50.0 | 26.0/26.0),0.569 (0.549),3451 (2756),3866 (3138),39.595 (40.416),0.786 (0.547),0.0 (0.0),-0.685 | 0.0 (-0.663 | 0.0),"[Age at clinical visit, Age at clinical visit 2, Body height (cm), Body height (cm) 2, Body mass index (kg/m2), Body mass index (kg/m2) 2, Body weight (kg), Body weight (kg) 2, Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Duration of night urine collection (min) 2, Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5), Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5) 2, Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5) 2, Night urinary volume (ml), Night urinary volume (ml) 2, Number of cigarettes without filter per day, Number of cigarettes without filter per day 2, Number of filter cigarettes per day, Physical activity (1-10), Physical activity (1-10) 2, Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-creatinine excretion ratio 2, Sodium-potassium excretion ratio, Sodium-potassium excretion ratio 2, Units of alcohol per week (1unit~10g pure alcohol), Units of alcohol per week (1unit~10g pure alcohol) 2, Urinary creatinin night (umol/L), Urinary creatinin night (umol/L) 2, Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umol) 2, Urinary creatinine night excretion (umolh), Urinary creatinine night excretion (umolh) 2, Urinary potassium night (mmol/L), Urinary potassium night (mmol/L) 2, Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol) 2, Urinary potassium night excretion (mmol/h), Urinary potassium night excretion (mmol/h) 2, Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) 2, Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol) 2, Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol/h) 2, Waist/hip ratio, Waist/hip ratio 2]"
7,M7 = M6 + hypertension + diabetes,51/52 | 27/28 (51.0/52.0 | 27.6/28.0),0.567 (0.533),3458 (2789),3867 (3164),39.697 (41.322),0.69 (0.537),0.0 (0.0),-0.696 | 0.0 (-0.668 | 0.0),"[Age at clinical visit, Age at clinical visit 2, Body height (cm), Body height (cm) 2, Body mass index (kg/m2), Body mass index (kg/m2) 2, Body weight (kg), Body weight (kg) 2, Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Duration of night urine collection (min) 2, Ever had diabetes (office glu>7,self-report,anti-Diab drugs)(Y=1,N=0): Yes, Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5), Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5) 2, Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5) 2, Night urinary volume (ml), Night urinary volume (ml) 2, Number of cigarettes without filter per day, Number of cigarettes without filter per day 2, Number of filter cigarettes per day, Number of filter cigarettes per day 2, Physical activity (1-10), Physical activity (1-10) 2, Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-creatinine excretion ratio 2, Sodium-potassium excretion ratio, Sodium-potassium excretion ratio 2, Units of alcohol per week (1unit~10g pure alcohol), Units of alcohol per week (1unit~10g pure alcohol) 2, Urinary creatinin night (umol/L), Urinary creatinin night (umol/L) 2, Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umol) 2, Urinary creatinine night excretion (umolh), Urinary creatinine night excretion (umolh) 2, Urinary potassium night (mmol/L), Urinary potassium night (mmol/L) 2, Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol) 2, Urinary potassium night excretion (mmol/h), Urinary potassium night excretion (mmol/h) 2, Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) 2, Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol) 2, Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol/h) 2, Waist/hip ratio, Waist/hip ratio 2]"
8,M8 = M7 + blood lipid values,59/60 | 32/32 (58.8/60.0 | 31.6/32.0),0.563 (0.532),3486 (2814),3921 (3218),39.919 (41.525),0.624 (0.445),0.0 (0.0),-0.715 | 0.0 (-0.683 | 0.0),"[Age at clinical visit, Age at clinical visit 2, Blood LDL cholesterol (mmol/L), Blood LDL/HDL ratio, Blood LDL/HDL ratio 2, Blood total cholesterol (mmol/L), Blood total cholesterol (mmol/L) 2, Blood triglyceride (mmol/L), Blood triglyceride (mmol/L) 2, Body height (cm), Body height (cm) 2, Body mass index (kg/m2), Body mass index (kg/m2) 2, Body weight (kg), Body weight (kg) 2, Current smoking (Y=1,N=2): Yes, Duration of night urine collection (min), Duration of night urine collection (min) 2, Ever had diabetes (office glu>7,self-report,anti-Diab drugs)(Y=1,N=0): Yes, Ever had hypertension (office BP>140/90,self-report,anti-HTA drugs)(Y=1,N=0) (Ne: Yes, Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5), Mean of 5 dbp measurements at clinical visit (mmHg: dbp1-5) 2, Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5), Mean of 5 sbp measurements at clinical visit (mmHg: sbp1-5) 2, Night urinary volume (ml), Night urinary volume (ml) 2, Number of cigarettes without filter per day, Number of cigarettes without filter per day 2, Number of filter cigarettes per day, Number of filter cigarettes per day 2, Physical activity (1-10), Physical activity (1-10) 2, Sex of the participant (1=Male,2=Female): Male, Sodium-creatinine excretion ratio, Sodium-creatinine excretion ratio 2, Sodium-potassium excretion ratio, Sodium-potassium excretion ratio 2, Units of alcohol per week (1unit~10g pure alcohol), Units of alcohol per week (1unit~10g pure alcohol) 2, Urinary creatinin night (umol/L), Urinary creatinin night (umol/L) 2, Urinary creatinine night excretion (umol), Urinary creatinine night excretion (umol) 2, Urinary creatinine night excretion (umolh), Urinary creatinine night excretion (umolh) 2, Urinary potassium night (mmol/L), Urinary potassium night (mmol/L) 2, Urinary potassium night excretion (mmol), Urinary potassium night excretion (mmol) 2, Urinary potassium night excretion (mmol/h), Urinary potassium night excretion (mmol/h) 2, Urinary sodium night (mmol/L), Urinary sodium night (mmol/L) 2, Urinary sodium night excretion (mmol), Urinary sodium night excretion (mmol) 2, Urinary sodium night excretion (mmol/h), Urinary sodium night excretion (mmol/h) 2, Waist/hip ratio, Waist/hip ratio 2]"


In [None]:
models_df_log, preds_log = get_models_df_preds(
    models,
    data,
    target_col,
    cat_cols,
    labels_df,
    kernels={"log": np.log},
    inter_col=None,
    norm_out=True,
    model_fn=glmm_lasso,
    lambda_val=[0, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
    final_re=False,
    se=False,
)
models_df_log


Model M0 = Urinary sodium:
0 categorical columns. Will encode.
3 numerical columns. Will standardize.
[1] 0.5254440 0.5254439 0.5254428 0.5253516 0.5242167 0.5227561 0.5252998
[8] 0.5357536
Selected lambda: [1.]
[1] 0.5521304 0.5521302 0.5521281 0.5528512 0.5511676 0.5517603 0.5522967
[8] 0.5590163
Selected lambda: [0.1]
[1] 0.5105871 0.5105868 0.5105844 0.5096573 0.5089241 0.5093291 0.5125004
[8] 0.5260848
Selected lambda: [0.1]
[1] 0.5172552 0.5172549 0.5171632 0.5174191 0.5167024 0.5173065 0.5185649
[8] 0.5288970
Selected lambda: [0.1]
[1] 0.5123277 0.5123277 0.5123273 0.5125753 0.5122022 0.5120429 0.5118017
[8] 0.5228957
Selected lambda: [10.]
[1] 0.5240046 0.5240045 0.5240036 0.5239944 0.5235056 0.5236972 0.5232236
[8] 0.5350823
Selected lambda: [10.]
{'(Intercept)': 0.0001404704758221413, 'una_un_mmolh': 0.3115432772898247, 'una_un': 0.12351801627166055, 'una_un_mmol': 0.10763691513109763, 'una_un_mmolh_log': 0.19825057234981414, 'una_un_mmol_log': -0.0012682822264363717, 'una_u