In [None]:
## import modules
import pandas as pd
import numpy as np
import datetime
import scipy as sp
from scipy import stats
import statsmodels.formula.api as smf
import statsmodels.api as sm
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
import os
from sklearn.metrics import roc_auc_score
import math

In [None]:
## import data 
all_data = pd.read_csv('input_quantile-analysis.csv')
all_data

In [None]:
## Extract features for use
feature_data = all_data.loc[:, ['Sample_ID', 'CKD', 'q_label', 'Age', 'Sex']]
feature_data = feature_data.set_index('Sample_ID')
feature_data

In [None]:
## Generate dummy variables in quantile label
feature_data = pd.get_dummies(feature_data, columns=['q_label'], drop_first = True)

In [None]:
## Set analysis features sets
basic_feature_set = ['CKD', 'q_label_2', 'q_label_3', 'q_label_4']
additional_feature = [[], ['Age'], ['Sex'], ['Age', 'Sex']]

In [None]:
## additional feature loop

cumulative_result_df = None

for a_fea in range(len(additional_feature)):
    
    ## data selection for input
    cur_analysis_feature = basic_feature_set + additional_feature[a_fea]
    cur_input_data = feature_data.loc[:, cur_analysis_feature]
    
    ## logistic modeling
    mod_glm = smf.glm(formula = " + ".join(cur_analysis_feature).replace('CKD + ', 'CKD ~ '),
                    data = cur_input_data,
                    family = sm.families.Binomial()).fit()
    
    ## extract the result
    if a_fea == 0:
        analysis_label = 'Unadjusted'
    else:
        analysis_label = "".join(['Adjusted by ', " and ".join(additional_feature[a_fea])])
    
    
    cur_result_df = pd.DataFrame(data = [analysis_label, 
                                        np.exp(mod_glm.params[['q_label_2']].values[0]), np.exp(mod_glm.params[['q_label_3']].values[0]), np.exp(mod_glm.params[['q_label_4']].values[0]),    
                                        mod_glm.pvalues[['q_label_2']].values[0],mod_glm.pvalues[['q_label_3']].values[0], mod_glm.pvalues[['q_label_4']].values[0],
                                        np.exp(mod_glm.conf_int().iloc[1,0]), np.exp(mod_glm.conf_int().iloc[1,1]),
                                        np.exp(mod_glm.conf_int().iloc[2,0]), np.exp(mod_glm.conf_int().iloc[2,1]),
                                        np.exp(mod_glm.conf_int().iloc[3,0]), np.exp(mod_glm.conf_int().iloc[3,1])],
                                index = ['Analysis_label', 'Q2_coeff', 'Q3_coeff', 'Q4_coeff',
                                        'Q2_pvalue', 'Q3_pvalue', 'Q4_pvalue', 'Q2_95percCI-low', 'Q2_95percCI-high', 'Q3_95percCI-low', 'Q3_95percCI-high',
                                        'Q4_95percCI-low', 'Q4_95percCI-high'])
    
    if cumulative_result_df is None:
        cumulative_result_df = cur_result_df
    else:
        cumulative_result_df = pd.concat([cumulative_result_df, cur_result_df], axis = 1)

In [None]:
cumulative_result_df.to_csv('results/results_q-analysis.csv', index=True)