In [1]:
import pandas as pd
import numpy as np

from os import listdir
from os.path import join
from pdb import set_trace as st

In [81]:
data_type = "0506"
base_dir = f"./{data_type}/"

In [82]:
insertions = {
    0: ["Intercept", ""],
    1: ["Age", ""],
    2: ["Diabete", ""],
    5: ["BMI", ""],
    6: ["Smoke", ""],
    8: ["Country", ""],
    10: ["Race", ""],
    12: ["Education", ""],
    14: ["Gender", ""],
    15: ["Activity", ""]
}

In [83]:
def collect_forms():
    forms = []
    for i, file_name in enumerate(listdir(base_dir)):
        assert file_name.endswith("csv")
        
        if not file_name.startswith(data_type): # Not include 0304_di_lm_model_coef.csv ones for merging
            continue

        df = pd.read_csv(join(base_dir, file_name))
        
        data = []
        for i in range(df.shape[0]):
            if i in insertions.keys():
                data.append(insertions[i])
            line = df.iloc[i]
            name = line["Unnamed: 0"]
            estm = round(float(line["Estimate"]), 3)
            lb = round(float(line['LB']), 3)
            ub = round(float(line['UB']), 3)
            p_value = line['Pr(>|t|)']
            if p_value <= 0.001:
                sig = "***"
            elif p_value <= 0.01:
                sig = "**"
            elif p_value <= 0.05:
                sig = "*"
            else:
                sig=""
            dataline = [name, f"{estm}{sig} ({lb}~{ub})",]
            data.append(dataline)

        columns = ["Variables", file_name.split('log')[0][:-1]]
        form = pd.DataFrame(data, index=None, columns=columns)
        forms.append(form)
        
        form.to_csv(f"./Result_{data_type}/{file_name}", index=False)
    return forms

In [84]:
forms = collect_forms()

In [85]:
forms

[                         Variables    0506_sy_lm_model_coef.cs
 0                        Intercept                            
 1                      (Intercept)  84.737*** (49.967~119.508)
 2                              Age                            
 3                              age      0.562*** (0.523~0.602)
 4                          Diabete                            
 5                  diabPre-diabete      6.455 (-27.747~40.658)
 6                      diabdiabete      4.038 (-30.473~38.549)
 7                      diabUnknown      2.461 (-31.678~36.601)
 8                              BMI                            
 9                              bmi        0.405*** (0.3~0.511)
 10                           Smoke                            
 11                      smokeSmoke        1.829 (-0.264~3.921)
 12                    smokeUnknown       2.701*** (1.08~4.322)
 13                         Country                            
 14            countryUnited States     

In [86]:
merge_form = list()

variables = forms[0]['Variables']
merge_form.append(variables)

for form in forms:
    data_form = form[form.columns[-1]]
    merge_form.append(data_form)

In [87]:
merge_form = pd.concat(merge_form, axis=1)

In [88]:
merge_form.columns

Index(['Variables', '0506_sy_lm_model_coef.cs', '0506_di_lm_model_coef.cs'], dtype='object')

In [68]:
merge_form = merge_form[["Variables", "normal", "elevated", "stage_1", "stage_2", "stage_3"]]

In [89]:
merge_form

Unnamed: 0,Variables,0506_sy_lm_model_coef.cs,0506_di_lm_model_coef.cs
0,Intercept,,
1,(Intercept),84.737*** (49.967~119.508),53.3*** (29.018~77.582)
2,Age,,
3,age,0.562*** (0.523~0.602),0.117*** (0.09~0.144)
4,Diabete,,
5,diabPre-diabete,6.455 (-27.747~40.658),-1.209 (-25.094~22.676)
6,diabdiabete,4.038 (-30.473~38.549),-0.587 (-24.688~23.514)
7,diabUnknown,2.461 (-31.678~36.601),0.092 (-23.75~23.934)
8,BMI,,
9,bmi,0.405*** (0.3~0.511),0.313*** (0.239~0.387)


In [90]:
merge_form.to_csv(f"./Result_{data_type}/merge_general_{data_type}.csv", index=False)