# Clot Burden Prediction

We perform a univariable regression for each target variable and explanatory variable. We then perform multivariable regressions. For these, we focus on body composition only, cardiopulmonary features only, and then a composite model. For each of these, we perform three forms of feature selection, using (1) recursive feature elimination with cross validation, (2) forward sequential feature selection with cross validation, and (3) backward feature selection with cross validation. For these groups of selected features, we also perform sensitivities controlling for gender, age, and both gender and age.

# Imports

In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import os
import pickle
from tqdm.notebook import trange, tqdm

# Custom / Lcoal
from config import model_config
from regression import reg

# Stats
from scipy.stats import shapiro
from sklearn.feature_selection import(
	RFECV, SequentialFeatureSelector,
)
from sklearn.metrics import (
    make_scorer, r2_score
)
from sklearn.linear_model import (
	LinearRegression, LogisticRegression, LassoCV
)
from sklearn.model_selection import (
	train_test_split, cross_val_score,
    RepeatedKFold
)
from sklearn.pipeline import (
	Pipeline
)
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor

# Global vars
SEED = 123
TEST_SIZE = 0.25
HEATMAP_COLORS = sns.diverging_palette(h_neg=250, h_pos=359, as_cmap=True)
SIGNIFICANCE_CUTOFF = 0.05
CV_FOLDS = 5
CUSTOM_CV = RepeatedKFold(n_splits=CV_FOLDS, n_repeats=10, random_state=SEED)


# Cov type
# Options:
#  - robust
#  - clustered
COV_TYPE = 'robust'


# Import/Preprocess Data

In [32]:
with open(Path('../data/prediction_data.pkl'), 'rb') as f:
	data = pickle.load(f)
	
X = data.get('X')
y = data.get('y')
body_features = data.get('body_features')
cardio_features = data.get('cardio_features')
control_features = data.get('controls')
all_features = body_features + cardio_features + control_features

print(X.shape)
print(y.shape)
print(body_features)
print(cardio_features)
print(control_features)

(96, 37)
(96, 7)
['volume_visceral_fat', 'density_visceral_fat', 'mass_visceral_fat', 'volume_subcutaneous_fat', 'density_subcutaneous_fat', 'mass_subcutaneous_fat', 'volume_intermuscular_fat', 'density_intermuscular_fat', 'mass_intermuscular_fat', 'volume_muscle', 'density_muscle', 'mass_muscle', 'volume_bone', 'density_bone', 'mass_bone', 'bmi', 'bsa']
['emphysema_volume_950hu', 'lung_volume', 'extrapulmonary_artery_volume', 'extrapulmonary_vein_volume', 'intrapulmonary_artery_volume', 'intrapulmonary_vein_volume', 'artery_vein_ratio', 'bv5', 'bv10', 'pb_larger_10', 'pv_diameter', 'a_diameter', 'pv_a', 'heart_volume', 'airway_volume', 'airway_ratio', 'ild_volume', 'ild_ratio']
['age', 'gender_cl_Male']


In [33]:
pe_numbers = y.index.str[:-2]
pe_numbers

Index(['PE1', 'PE12', 'PE12', 'PE14', 'PE15', 'PE15', 'PE15', 'PE15', 'PE16',
       'PE16', 'PE17', 'PE18', 'PE18', 'PE18', 'PE18', 'PE19', 'PE2', 'PE20',
       'PE21', 'PE22', 'PE22', 'PE22', 'PE23', 'PE23', 'PE23', 'PE24', 'PE24',
       'PE25', 'PE25', 'PE25', 'PE27', 'PE27', 'PE28', 'PE3', 'PE3', 'PE3',
       'PE31', 'PE32', 'PE32', 'PE32', 'PE32', 'PE33', 'PE34', 'PE34', 'PE35',
       'PE36', 'PE36', 'PE37', 'PE37', 'PE37', 'PE4', 'PE40', 'PE40', 'PE41',
       'PE41', 'PE41', 'PE41', 'PE42', 'PE42', 'PE43', 'PE44', 'PE45', 'PE47',
       'PE48', 'PE48', 'PE49', 'PE5', 'PE5', 'PE51', 'PE51', 'PE51', 'PE52',
       'PE52', 'PE52', 'PE52', 'PE52', 'PE52', 'PE52', 'PE53', 'PE54', 'PE54',
       'PE54', 'PE56', 'PE56', 'PE6', 'PE6', 'PE6', 'PE6', 'PE6', 'PE6', 'PE6',
       'PE7', 'PE8', 'PE8', 'PE8', 'PE9'],
      dtype='object')

# OLS Regression Functions

In [34]:
def get_params(model, X, y):
    """Returns pd.Series of coefs for comparison with statsmodels params."""
    model.fit(X, y)
    coef = pd.Series(model.coef_, index=model.feature_names_in_)
    coef['const'] = model.intercept_
    return coef.sort_values()

In [35]:
def model_residual_correlation(model):
    """Returns measure of correlation."""
    return np.corrcoef(np.arange(len(model.resid)), model.resid)[1, 0]

In [36]:
def fit_model(X, y):
    """Fit statsmodels OLS model with robust SEs and sklearn OLS model."""
    # Fit statsmodels model for pvalues and coef
    if COV_TYPE == 'robust':
        model_sm = sm.OLS(y, X).fit(cov_type='HC3')
    elif COV_TYPE == 'clustered':
        model_sm = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': pe_numbers})
    # Define sklearn model for CV evaluation
    model_sk = LinearRegression(fit_intercept=True, n_jobs=-1)
    # Check that model params match
    sk_model_params = get_params(model_sk, X, y)
    sm_model_params = model_sm.params.sort_values()
    params_match = np.isclose(sk_model_params, sm_model_params, atol=1e-5)
    if not np.all(params_match):
        print(f"Regressions on {y.name} did not match for sklearn and statsmodels. CV scores may differ.")
    return model_sm, model_sk

In [37]:
def store_model_results(model_sm, model_sk, X, y):
    """
    Params:
        - model_sm: statsmodel model for coefs, pvalues, and residuals.
        - model_sk: sklearn model for cross validation
        - X: X data.
        - y: y data.
    """
    # Calculate CV scores
    cv_scores = cross_val_score(
        model_sk, X, y, 
        scoring='r2', 
        cv=CUSTOM_CV, n_jobs=-1
    )
    # Store model results
    model_results = pd.DataFrame(
        {
            'y': y.name,
            'model_dfn': [tuple(X.columns.values)],
            'nobs': model_sm.nobs,
            'shapiro_resid_pvalue': shapiro(model_sm.resid).pvalue,
            'metric_train': model_sk.score(X, y),
            'metric_cv_mean': np.mean(np.maximum(cv_scores, np.zeros_like(cv_scores))),
            'metric_cv_std': np.std(np.maximum(cv_scores, np.zeros_like(cv_scores))),
            'fpvalue': model_sm.f_pvalue
        }
    )
    # Set model index
    model_results = model_results.set_index(['y', 'model_dfn'])
    return model_results

In [38]:
def store_coef_results(model_sm, y):
    """
    Params:
        - model_sm: statsmodel model for coefs, pvalues, and residuals.
        - y: y data.
    """
    results = pd.DataFrame(
        {
            'model_dfn': [tuple(model_sm.params.index) for _ in range(len(model_sm.params))],
            'coef': model_sm.params, 
            'pval': model_sm.pvalues,
        },
    )
    results['signif'] = results['pval'].apply(reg.add_significance)
    results = results.reset_index(names='x')
    results['y'] = y.name
    results = results.pivot(index=['y', 'model_dfn'], columns=['x'], values=['coef', 'pval', 'signif'])
    results.columns = ['_'.join(idx) for idx in results.columns]
    return results

In [39]:
def combine_model_results(model_sm, model_sk, X, y):
    model_results = store_model_results(model_sm, model_sk, X, y)
    coef_results = store_coef_results(model_sm, y)
    assert model_results.shape[0] == coef_results.shape[0] 
    combined_results = pd.concat([model_results, coef_results], axis=1)
    return combined_results

### Example

In [40]:
target = 'total_clot_burden'
features = 'density_visceral_fat'
X_temp = sm.add_constant(X[features])
y_temp = y.loc[:, target]
model_sm, model_sk = fit_model(X_temp, y_temp)
combine_model_results(model_sm, model_sk, X_temp, y_temp)

Unnamed: 0_level_0,Unnamed: 1_level_0,nobs,shapiro_resid_pvalue,metric_train,metric_cv_mean,metric_cv_std,fpvalue,coef_const,coef_density_visceral_fat,pval_const,pval_density_visceral_fat,signif_const,signif_density_visceral_fat
y,model_dfn,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
total_clot_burden,"(const, density_visceral_fat)",96.0,1.260388e-08,0.059386,0.029316,0.041349,0.02186,0.0,-0.243692,1.0,0.019724,,*


# Perform univariable regressions

In [41]:
univariable_results = pd.DataFrame()

for target in tqdm(model_config.num_targets):
    for feature in all_features:
        # Fit model
        X_temp = sm.add_constant(X[feature])
        y_temp = y.loc[:, target]
        model_sm, model_sk = fit_model(X_temp, y_temp)

        # Store results
        univariable_results = pd.concat(
            [univariable_results, combine_model_results(model_sm, model_sk, X_temp, y_temp)],
            axis=0
        )
        
print(univariable_results.shape)

univariable_results = univariable_results.reset_index()
univariable_results['selection_method'] = 'All'
univariable_results['model_dfn'] = univariable_results['model_dfn'].apply(lambda x: x[1])
univariable_results['category'] = 'univariable_' + univariable_results['model_dfn']
univariable_results['controls'] = 'None'
univariable_results.index = univariable_results[['category', 'selection_method', 'y', 'controls']].apply('%'.join, axis=1)
univariable_results.index.name = 'Lookup'

print(univariable_results.shape)

univariable_results.tail()

  0%|          | 0/7 [00:00<?, ?it/s]

(259, 120)
(259, 125)


Unnamed: 0_level_0,y,model_dfn,nobs,shapiro_resid_pvalue,metric_train,metric_cv_mean,metric_cv_std,fpvalue,coef_const,coef_volume_visceral_fat,...,signif_ild_ratio,coef_age,pval_age,signif_age,coef_gender_cl_Male,pval_gender_cl_Male,signif_gender_cl_Male,selection_method,category,controls
Lookup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
univariable_airway_ratio%All%centralartery%None,centralartery,airway_ratio,96.0,1.702099e-11,0.020135,0.011161,0.020749,0.298965,0.0,,...,,,,,,,,All,univariable_airway_ratio,
univariable_ild_volume%All%centralartery%None,centralartery,ild_volume,96.0,4.270844e-13,0.001456,0.000154,0.000536,0.751051,0.0,,...,,,,,,,,All,univariable_ild_volume,
univariable_ild_ratio%All%centralartery%None,centralartery,ild_ratio,96.0,4.05917e-12,0.017948,0.010779,0.017921,0.126185,0.0,,...,,,,,,,,All,univariable_ild_ratio,
univariable_age%All%centralartery%None,centralartery,age,96.0,1.521341e-12,0.012366,0.004326,0.011861,0.305216,0.0,,...,,0.111203,0.30257,,,,,All,univariable_age,
univariable_gender_cl_Male%All%centralartery%None,centralartery,gender_cl_Male,96.0,3.216951e-12,0.019572,0.037928,0.052893,0.051687,-0.14673,,...,,,,,0.426849,0.048746,*,All,univariable_gender_cl_Male,


## Control for gender

In [42]:
univariable_gender_results = pd.DataFrame()

for target in tqdm(model_config.num_targets):
    for feature in all_features:

        if feature in model_config.controls_encoded:
            continue
            
        # Fit model
        features = [feature, 'gender_cl_Male']
        X_temp = sm.add_constant(X[features])
        y_temp = y[target]
        model_sm, model_sk = fit_model(X_temp, y_temp)

        # Store results
        univariable_gender_results = pd.concat(
            [univariable_gender_results, combine_model_results(model_sm, model_sk, X_temp, y_temp)],
            axis=0
        )
        
print(univariable_gender_results.shape)

univariable_gender_results = univariable_gender_results.reset_index()
univariable_gender_results['selection_method'] = 'All'
univariable_gender_results['model_dfn'] = univariable_gender_results['model_dfn'].apply(lambda x: x[1])
univariable_gender_results['category'] = 'univariable_' + univariable_gender_results['model_dfn']
univariable_gender_results['controls'] = 'gender'
univariable_gender_results.index = univariable_gender_results[['category', 'selection_method', 'y', 'controls']].apply('%'.join, axis=1)
univariable_gender_results.index.name = 'Lookup'

print(univariable_gender_results.shape)

univariable_gender_results.tail()

  0%|          | 0/7 [00:00<?, ?it/s]

(245, 117)
(245, 122)


Unnamed: 0_level_0,y,model_dfn,nobs,shapiro_resid_pvalue,metric_train,metric_cv_mean,metric_cv_std,fpvalue,coef_const,coef_gender_cl_Male,...,signif_airway_ratio,coef_ild_volume,pval_ild_volume,signif_ild_volume,coef_ild_ratio,pval_ild_ratio,signif_ild_ratio,selection_method,category,controls
Lookup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
univariable_heart_volume%All%centralartery%gender,centralartery,heart_volume,96.0,3.470715e-12,0.022395,0.031954,0.04801,0.152671,-0.138433,0.402714,...,,,,,,,,All,univariable_heart_volume,gender
univariable_airway_volume%All%centralartery%gender,centralartery,airway_volume,96.0,4.232489e-11,0.044432,0.03195,0.053173,0.085889,-0.0848,0.24669,...,,,,,,,,All,univariable_airway_volume,gender
univariable_airway_ratio%All%centralartery%gender,centralartery,airway_ratio,96.0,5.575437e-11,0.039828,0.038299,0.061011,0.057399,-0.147182,0.428167,...,,,,,,,,All,univariable_airway_ratio,gender
univariable_ild_volume%All%centralartery%gender,centralartery,ild_volume,96.0,7.329235e-12,0.022348,0.031638,0.050245,0.078758,-0.157762,0.458944,...,,-0.079798,0.476989,,,,,All,univariable_ild_volume,gender
univariable_ild_ratio%All%centralartery%gender,centralartery,ild_ratio,96.0,2.277056e-11,0.036318,0.039415,0.059746,0.031093,-0.142485,0.4145,...,,,,,-0.12471,0.156036,,All,univariable_ild_ratio,gender


## Control for age

In [43]:
univariable_age_results = pd.DataFrame()

for target in tqdm(model_config.num_targets):
    for feature in all_features:

        if feature in model_config.controls_encoded:
            continue
            
        # Fit model
        features = [feature, 'age']
        X_temp = sm.add_constant(X[features])
        y_temp = y[target]
        model_sm, model_sk = fit_model(X_temp, y_temp)

        # Store results
        univariable_age_results = pd.concat(
            [univariable_age_results, combine_model_results(model_sm, model_sk, X_temp, y_temp)],
            axis=0
        )
        
print(univariable_age_results.shape)

univariable_age_results = univariable_age_results.reset_index()
univariable_age_results['selection_method'] = 'All'
univariable_age_results['model_dfn'] = univariable_age_results['model_dfn'].apply(lambda x: x[1])
univariable_age_results['category'] = 'univariable_' + univariable_age_results['model_dfn']
univariable_age_results['controls'] = 'age'
univariable_age_results.index = univariable_age_results[['category', 'selection_method', 'y', 'controls']].apply('%'.join, axis=1)
univariable_age_results.index.name = 'Lookup'

print(univariable_age_results.shape)

univariable_age_results.tail()

  0%|          | 0/7 [00:00<?, ?it/s]

(245, 117)
(245, 122)


Unnamed: 0_level_0,y,model_dfn,nobs,shapiro_resid_pvalue,metric_train,metric_cv_mean,metric_cv_std,fpvalue,coef_age,coef_const,...,signif_airway_ratio,coef_ild_volume,pval_ild_volume,signif_ild_volume,coef_ild_ratio,pval_ild_ratio,signif_ild_ratio,selection_method,category,controls
Lookup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
univariable_heart_volume%All%centralartery%age,centralartery,heart_volume,96.0,3.893628e-12,0.022795,0.006695,0.013478,0.348842,0.098922,0.0,...,,,,,,,,All,univariable_heart_volume,age
univariable_airway_volume%All%centralartery%age,centralartery,airway_volume,96.0,9.529956e-11,0.044115,0.019802,0.037206,0.134826,0.026807,0.0,...,,,,,,,,All,univariable_airway_volume,age
univariable_airway_ratio%All%centralartery%age,centralartery,airway_ratio,96.0,5.427875e-11,0.035402,0.009954,0.021437,0.354581,0.123996,0.0,...,,,,,,,,All,univariable_airway_ratio,age
univariable_ild_volume%All%centralartery%age,centralartery,ild_volume,96.0,2.468305e-12,0.015295,0.002365,0.008211,0.504157,0.11879,0.0,...,,-0.054649,0.63491,,,,,All,univariable_ild_volume,age
univariable_ild_ratio%All%centralartery%age,centralartery,ild_ratio,96.0,1.270781e-11,0.029305,0.011128,0.021828,0.210678,0.106635,0.0,...,,,,,-0.130229,0.131698,,All,univariable_ild_ratio,age


# Perform multivariable regressions

In [45]:
MAX_NUM_REGRESSORS = y.shape[0] // 10
print(f"MAX_NUM_REGRESSORS: {MAX_NUM_REGRESSORS}")

multivariable_results = pd.DataFrame()

for target in tqdm(model_config.num_targets):
    
    low_alpha = -3
    high_alpha = 4
    
    lassoCV = LassoCV(
        alphas=np.logspace(low_alpha, high_alpha, 100),
        cv=CUSTOM_CV,
        fit_intercept=True,
        max_iter=100_000,
        tol=0.001,
        n_jobs=-1
    )

    exceeds_max_num_regressors = True
    while exceeds_max_num_regressors:
        lassoCV.fit(X, y[target])
        coefs = pd.DataFrame(
            {'coef': lassoCV.coef_},
            index=lassoCV.feature_names_in_
        )
        remaining_features_lasso = coefs.loc[~np.isclose(coefs['coef'], 0.0), :].index.values

        if len(remaining_features_lasso) > MAX_NUM_REGRESSORS:
            low_alpha += 0.2
            lassoCV.set_params(**{'alphas': np.logspace(low_alpha, high_alpha, 50)})
        else:
            exceeds_max_num_regressors = False
            
    print(f"{target:<20s} alpha={lassoCV.alpha_:.3f}, feats: {remaining_features_lasso}")
    
    # Fit models
    X_temp_lasso = sm.add_constant(X[remaining_features_lasso])
    y_temp = y[target]
    model_sm_lasso, model_sk_lasso = fit_model(X_temp_lasso, y_temp)

    # Collect model/coef information and store
    model_eval = store_model_results(model_sm_lasso, model_sk_lasso, X_temp_lasso, y_temp)
    model_coefs = store_coef_results(model_sm_lasso, y_temp)
    model_results = pd.concat([model_eval, model_coefs], axis=1)
    multivariable_results = pd.concat([multivariable_results, model_results], axis=0)


MAX_NUM_REGRESSORS: 9


  0%|          | 0/7 [00:00<?, ?it/s]

total_clot_burden    alpha=0.158, feats: ['volume_visceral_fat' 'mass_bone' 'bsa' 'extrapulmonary_artery_volume'
 'extrapulmonary_vein_volume' 'artery_vein_ratio' 'pv_a']
superior_right       alpha=0.081, feats: ['volume_bone' 'bsa' 'emphysema_volume_950hu' 'artery_vein_ratio' 'bv5'
 'pv_a' 'heart_volume' 'airway_ratio']
superior_left        alpha=0.158, feats: ['volume_visceral_fat' 'density_visceral_fat' 'density_bone' 'mass_bone'
 'artery_vein_ratio' 'bv10' 'pv_diameter']
middle_right         alpha=0.095, feats: ['volume_bone' 'bsa' 'emphysema_volume_950hu' 'artery_vein_ratio' 'bv10'
 'pb_larger_10' 'pv_a' 'heart_volume']
inferior_right       alpha=0.158, feats: ['volume_visceral_fat' 'volume_bone' 'emphysema_volume_950hu'
 'extrapulmonary_artery_volume' 'artery_vein_ratio' 'a_diameter' 'pv_a'
 'heart_volume']
inferior_left        alpha=0.158, feats: ['volume_visceral_fat' 'mass_muscle' 'volume_bone'
 'extrapulmonary_artery_volume' 'artery_vein_ratio' 'bv10' 'pv_a'
 'heart_volume']


In [46]:
multivariable_results = multivariable_results.reset_index()
multivariable_results['selection_method'] = 'LassoCV'
multivariable_results['category'] = 'composite'
multivariable_results['controls'] = 'None'
multivariable_results.index = multivariable_results[['category', 'selection_method', 'y', 'controls']].apply('%'.join, axis=1)
multivariable_results.index.name = 'Lookup'
print(multivariable_results.shape)
multivariable_results.head()

(7, 74)


Unnamed: 0_level_0,y,model_dfn,nobs,shapiro_resid_pvalue,metric_train,metric_cv_mean,metric_cv_std,fpvalue,coef_artery_vein_ratio,coef_bsa,...,signif_a_diameter,coef_mass_muscle,pval_mass_muscle,signif_mass_muscle,coef_density_intermuscular_fat,pval_density_intermuscular_fat,signif_density_intermuscular_fat,selection_method,category,controls
Lookup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
composite%LassoCV%total_clot_burden%None,total_clot_burden,"(const, volume_visceral_fat, mass_bone, bsa, e...",96.0,0.1007654,0.560388,0.384239,0.18673,1.622026e-10,-0.283942,0.274244,...,,,,,,,,LassoCV,composite,
composite%LassoCV%superior_right%None,superior_right,"(const, volume_bone, bsa, emphysema_volume_950...",96.0,0.02694084,0.532025,0.274873,0.188572,1.258323e-08,-0.28778,0.198681,...,,,,,,,,LassoCV,composite,
composite%LassoCV%superior_left%None,superior_left,"(const, volume_visceral_fat, density_visceral_...",96.0,1.118983e-10,0.375453,0.132287,0.14323,0.01179829,-0.342758,,...,,,,,,,,LassoCV,composite,
composite%LassoCV%middle_right%None,middle_right,"(const, volume_bone, bsa, emphysema_volume_950...",96.0,2.582721e-08,0.399171,0.185554,0.179914,5.297557e-05,-0.235085,0.068872,...,,,,,,,,LassoCV,composite,
composite%LassoCV%inferior_right%None,inferior_right,"(const, volume_visceral_fat, volume_bone, emph...",96.0,1.407108e-06,0.467483,0.149068,0.179917,0.0001622525,-0.174823,,...,,,,,,,,LassoCV,composite,


# Combine Univariate and Multivariable regression results

In [47]:
ols_results = pd.concat(
    [
        univariable_results, 
        univariable_gender_results,
        univariable_age_results, 
        multivariable_results
    ], axis=0
)
if COV_TYPE == 'robust':
    ols_results.to_csv('../output/regressions/ols_results_robust.csv')
elif COV_TYPE == 'clustered':
    ols_results.to_csv('../output/regressions/ols_results_clustered.csv')