In [1]:
import numpy as np
import pandas as pd

Load relative prevalence for all individuals

In [2]:
RP = pd.read_csv("bootstrap/relative_prevalence.tsv", sep="\t", index_col=[0], header=None)[1]

Load relative prevalence without first person pronouns for all individuals

In [3]:
RP_no_FPP = pd.read_csv("bootstrap/relative_prevalence_without_FPP.tsv", sep="\t", index_col=[0], header=None)[1]

Load relative prevalence per category

In [4]:
RP_category = pd.read_csv("bootstrap/relative_prevalence_category.tsv", sep="\t", index_col=[0])

Load relative prevalence without first person pronouns per category

In [5]:
RP_category_no_FPP = pd.read_csv("bootstrap/relative_prevalence_category_without_FPP.tsv", sep="\t", index_col=[0])

Load relative prevalence for bootstrapped CDS

In [6]:
RP_CDS = pd.read_csv("bootstrap/relative_prevalence_CDS.tsv", sep="\t", index_col=[0], header=None)[1]

Load relative prevalence for bootstrapped CDS per category

In [7]:
_CDS = pd.read_csv("data/list_of_CDS.tsv", sep="\t", index_col="markers")
RP_category_CDS = pd.DataFrame(index=RP_CDS.index, columns=_CDS.categories.unique())
for cat in _CDS.categories.unique():
    fn = "bootstrap/relative_prevalence_CDS_{}.tsv".format(cat.replace(" ", "-"))
    RP_category_CDS.loc[:, cat] = pd.read_csv(fn, sep="\t", index_col=[0], header=None)[1]

Define outputting function

In [8]:
def output_median(x):
    return "{:.3f}{}".format(x.median(), "*" if x.quantile(q=0.025) > 1 else "")


def output_CI(x):
    return "$[$" + "{:.3f}, {:.3f}".format(x.quantile(q=0.025), x.quantile(q=0.975)) + r"$]$"

# Initialize Table

In [9]:
order = RP_category.median().sort_values(ascending=False).index

idxvals = np.concatenate((np.array(['Total']), order), axis=None)
table = pd.DataFrame(index=pd.Index(idxvals, name="CD Category"))

Include relative prevalence values in table

In [10]:
table.loc["Total", r"$PR$ median"] = output_median(RP)
table.loc[order, r"$PR$ median"] = RP_category.loc[:, order].apply(lambda x: output_median(x))

table.loc["Total", r"$PR$ 95% CI"] = output_CI(RP)
table.loc[order, r"$PR$ 95% CI"] = RP_category.loc[:, order].apply(lambda x: output_CI(x))

Include relative prevalence without FPP

In [11]:
table.loc["Total", r"$PR_1$ median"] = output_median(RP_no_FPP)
table.loc[order[1:], r"$PR_1$ median"] = RP_category_no_FPP.loc[:, order[1:]].apply(lambda x: output_median(x))
table.loc["Personalizing", r"$PR_1$ median"] = "/"

table.loc["Total", r"$PR_1$ 95% CI"] = output_CI(RP_no_FPP)
table.loc[order[1:], r"$PR_1$ 95% CI"] = RP_category_no_FPP.loc[:, order[1:]].apply(lambda x: output_CI(x)).fillna("/")
table.loc["Personalizing", r"$PR_1$ 95% CI"] = "/"

Include relative prevalence based on CDS bootstrap

In [12]:
table.loc["Total", r"$PR_C$ median"] = output_median(RP_CDS)
table.loc[order, r"$PR_C$ median"] = RP_category_CDS.loc[:, order].apply(lambda x: output_median(x))

table.loc["Total", r"$PR_C$ 95% CI"] = output_CI(RP_CDS)
table.loc[order, r"$PR_C$ 95% CI"] = RP_category_CDS.loc[:, order].apply(lambda x: output_CI(x))

# Table

In [13]:
table

Unnamed: 0_level_0,$PR$ median,$PR$ 95% CI,$PR_1$ median,$PR_1$ 95% CI,$PR_C$ median,$PR_C$ 95% CI
CD Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Total,1.129*,"$[$1.102, 1.157$]$",1.110*,"$[$1.082, 1.137$]$",1.231*,"$[$1.168, 1.320$]$"
Personalizing,2.084*,"$[$1.940, 2.239$]$",/,/,2.403*,"$[$1.676, 3.043$]$"
Emotional Reasoning,1.983*,"$[$1.759, 2.228$]$",1.815*,"$[$1.467, 2.217$]$",2.316*,"$[$2.013, 3.158$]$"
Overgeneralizing,1.441*,"$[$1.367, 1.518$]$",1.344*,"$[$1.271, 1.420$]$",1.605*,"$[$1.414, 1.776$]$"
Mental Filtering,1.296*,"$[$1.129, 1.471$]$",1.191,"$[$0.931, 1.491$]$",1.466*,"$[$1.171, 1.924$]$"
Disqualifying the Positive,1.229*,"$[$1.142, 1.320$]$",1.229*,"$[$1.142, 1.320$]$",1.401*,"$[$1.203, 1.536$]$"
Labeling and mislabeling,1.207*,"$[$1.159, 1.256$]$",1.090*,"$[$1.041, 1.139$]$",1.336*,"$[$1.176, 1.554$]$"
Dichotomous Reasoning,1.131*,"$[$1.101, 1.162$]$",1.131*,"$[$1.101, 1.162$]$",1.217*,"$[$1.159, 1.305$]$"
Fortune-telling,1.110,"$[$0.955, 1.219$]$",0.908,"$[$0.735, 1.037$]$",1.177,"$[$0.855, 1.506$]$"
Magnification and Minimization,1.084*,"$[$1.039, 1.130$]$",1.084*,"$[$1.039, 1.130$]$",1.085*,"$[$1.020, 1.412$]$"


In [14]:
table.to_csv("figures/Table3.tex", sep="&")