In [None]:
# This program creates Table 1 - baseline characteristics
# Author: Prasanth Ganesan, PhD
# Author email: prasanthganesan.phd@gmail.com

# Import statements
import pandas as pd
import os
import scipy.stats as scistats


In [None]:
# Function Definitions
def import_csv_as_df(rootdir_local, filename_local, columns_local):
    csvfile = os.path.join(rootdir_local, filename_local)
    data = pd.read_csv(csvfile)
    df_local = data[columns_local]
    df_local = df_local.replace(True, "Yes")
    df_local = df_local.replace(False, "No")
    return df_local


def get_categorical(dfin):
    categorical = []
    for column in dfin:
        current = dfin[column]
        if current.dtype.name == "object":
            categorical.append(column)
    return categorical


def calculate_stats(filtered_df_local, categorical_local):
    columnheads = filtered_df_local.columns.values.tolist()

    data_forpval_local = []
    for inds, i in enumerate(columnheads):
        table1_dic = dict()
        totalpat = filtered_df_local.shape[0]
        table1_dic["N_Patients"] = [totalpat]
        table1_dic["Characteristics"] = i
        data_forpval_local.append(i)
        if i not in categorical_local:
            meanval = filtered_df_local[i].mean()
            stdval = filtered_df_local[i].std()
            table1_dic["Mean"] = [meanval]
            table1_dic["Std"] = [stdval]
            table1_dic["Categories"] = "NA"
            table1_dic["Count"] = "NA"
            table1_dic["Percent"] = "NA"
            data_forpval_local.append(list(filtered_df_local[i]))
        else:
            table1_dic["Mean"] = "NA"
            table1_dic["Std"] = "NA"
            temp = filtered_df_local[i].value_counts(dropna=False)
            table1_dic["Categories"] = temp.index.to_list()
            count = temp.values.tolist()
            table1_dic["Count"] = count
            table1_dic["Percent"] = [(x/totalpat)*100 for x in count]
            data_forpval_local.append([count, totalpat])
            table1_dic["Categories"] = ', '.join(map(str, table1_dic["Categories"]))
            table1_dic["Count"] = ', '.join(map(str, table1_dic["Count"]))
            table1_dic["Percent"] = ', '.join(map(str, table1_dic["Percent"]))
        if inds == 0:
            df_table1 = pd.DataFrame(table1_dic)
        else:
            temp = pd.DataFrame(table1_dic)
            df_table1 = pd.concat([df_table1, temp])
    df_datapval = pd.DataFrame(data_forpval_local)

    return df_table1, df_datapval


def calculate_pvalues(data_forpval_group1, data_forpval_group2, categorical):
    data_group1 = data_forpval_group1[0]
    data_group2 = data_forpval_group2[0]
    charlist, pvallist = [], []

    for rows in range(len(data_group1)):
        temp1 = data_group1[rows]
        temp2 = data_group2[rows]
        
        if isinstance(temp1, str):
            charlist.append(temp1)
        else:
            if charlist[-1] in categorical:
                if len(temp1[0]) < 2:
                    p = "NA"
                else:
                    stat, p, dof, expected = scistats.chi2_contingency([temp1[0], temp2[0]])
            else:
                p = scistats.ttest_ind(temp1, temp2)
                p = p.pvalue
            pvallist.append(p)

    pvalsdf = pd.DataFrame(data={"Characteristics": charlist, "P-value": pvallist})

    return pvalsdf


In [None]:
# Inputs required - 
# (1) filepath of .csv file with characteristics
# (2) filename of group 1 .csv file
# (3) filename of group 2 .csv file
# (4) Rows to show in output Table-1

filepath = "/Users/Project Directory"
filename_group1 = "demographics_Group1.csv"
filename_group2 = "demographics_Group2.csv"
rows_in_table1 = ["Age", "Sex", "AF Type", "Duration of AF",
            "Prior AF Ablation?", "LV Ejection Fraction",
            "LA size", "MI", "HTN", "DM", "CAD", "Stroke", "CHADS2-VASc", "BMI"]


In [None]:
# ----- Do not modify anything beyond this --------
df_group1 = import_csv_as_df(filepath, filename_group1, rows_in_table1)
categorical = get_categorical(df_group1)
table1_group1, data_forpval_group1 = calculate_stats(df_group1, categorical)

df_group2 = import_csv_as_df(filepath, filename_group2, rows_in_table1)
table1_group2, data_forpval_group2 = calculate_stats(df_group2, categorical)

pvals = calculate_pvalues(data_forpval_group1, data_forpval_group2, categorical)

table1_final = pd.merge(table1_group1, table1_group2, on="Characteristics")
table1_final = pd.merge(table1_final, pvals, on="Characteristics")
table1_final.to_csv(os.path.join(filepath, "Table1_Output.csv"))

print("Success! Check your .csv file in the same input directory")