In [1]:
import pandas as pd
import numpy as np

from os import listdir
from os.path import join
from pdb import set_trace as st

In [2]:
data_type = "0304"
base_dir = f"./{data_type}/"

In [10]:
insertions = {
    0: ["Intercept", ""],
    1: ["Age", ""],
    2: ["Diabete", ""],
    5: ["BMI", ""],
    6: ["Smoke", ""],
    8: ["Country", ""],
    10: ["Race", ""],
    14: ["Education", ""],
    20: ["Gender", ""],
    21: ["Activity", ""]
}

In [11]:
def collect_forms():
    forms = []
    for i, file_name in enumerate(listdir(base_dir)):
        assert file_name.endswith("csv")
        
        if file_name.startswith(data_type): # Not include 0304_di_lm_model_coef.csv ones for merging
            continue

        df = pd.read_csv(join(base_dir, file_name))
        
        data = []
        for i in range(df.shape[0]):
            if i in insertions.keys():
                data.append(insertions[i])
            line = df.iloc[i]
            name = line["Unnamed: 0"]
            estm = round(float(line["Estimate"]), 3)
            lb = round(float(line['LB']), 3)
            ub = round(float(line['UB']), 3)
            p_value = line['Pr(>|t|)']
            if p_value <= 0.001:
                sig = "***"
            elif p_value <= 0.01:
                sig = "**"
            elif p_value <= 0.05:
                sig = "*"
            else:
                sig=""
            dataline = [name, f"{estm}{sig} ({lb}~{ub})",]
            data.append(dataline)

        columns = ["Variables", file_name.split('log')[0][:-1]]
        form = pd.DataFrame(data, index=None, columns=columns)
        forms.append(form)
        
        form.to_csv(f"./Result_{data_type}/{file_name}", index=False)
    return forms

In [12]:
forms = collect_forms()

In [13]:
merge_form = list()

variables = forms[0]['Variables']
merge_form.append(variables)

for form in forms:
    data_form = form[form.columns[-1]]
    merge_form.append(data_form)

In [14]:
merge_form = pd.concat(merge_form, axis=1)

In [15]:
merge_form.columns

Index(['Variables', 'stage_1', 'normal', 'elevated', 'stage_3', 'stage_2'], dtype='object')

In [16]:
merge_form = merge_form[["Variables", "normal", "elevated", "stage_1", "stage_2", "stage_3"]]

In [17]:
merge_form

Unnamed: 0,Variables,normal,elevated,stage_1,stage_2,stage_3
0,Intercept,,,,,
1,(Intercept),1.519*** (0.679~2.359),-0.095 (-0.796~0.607),-0.158 (-0.933~0.617),-0.25 (-1.047~0.547),-0.016 (-0.378~0.345)
2,Age,,,,,
3,age,-0.013*** (-0.013~-0.012),0.001 (-0.0~0.001),0.001*** (0.001~0.002),0.009*** (0.008~0.01),0.002*** (0.001~0.002)
4,Diabete,,,,,
5,diabPre-diabete,-0.175 (-1.003~0.653),0.114 (-0.577~0.806),0.069 (-0.695~0.833),-0.01 (-0.796~0.776),0.002 (-0.355~0.358)
6,diabdiabete,-0.271 (-1.108~0.567),0.11 (-0.59~0.809),0.067 (-0.706~0.84),0.119 (-0.676~0.914),-0.025 (-0.385~0.336)
7,diabUnknown,-0.159 (-0.985~0.668),0.079 (-0.612~0.769),0.074 (-0.689~0.837),0.001 (-0.784~0.785),0.005 (-0.351~0.361)
8,BMI,,,,,
9,bmi,-0.013*** (-0.015~-0.01),0.002* (0.0~0.005),0.006*** (0.003~0.008),0.006*** (0.003~0.008),-0.001. (-0.002~0.0)


In [29]:
merge_form.to_csv(f"./Result_{data_type}/merge_{data_type}.csv", index=False)