In [1]:
import numpy as np
import pandas as pd

Load relative prevalence for all individuals

In [2]:
RP = pd.read_csv("bootstrap/relative_prevalence.tsv", sep="\t", index_col=[0], header=None)[1]


Load relative prevalence without URLs for all individuals

In [3]:
RP_no_URLs = pd.read_csv("bootstrap/relative_prevalence_no_urls_nor_http.tsv", sep="\t", index_col=0, header=None)

Load relative prevalence per category

In [4]:
RP_category = pd.read_csv("bootstrap/relative_prevalence_category.tsv", sep="\t", index_col=[0])

Load relative prevalence without URLs per category

In [5]:
RP_category_no_URLs = pd.read_csv("bootstrap/relative_prevalence_no_urls_nor_http_category.tsv", sep="\t", index_col=0)

Define outputting function

In [6]:
def output_median(x):
    return "{:.3f}{}".format(x.median(), "*" if x.quantile(q=0.025) > 1 else "")


def output_CI(x):
    return "$[$" + "{:.3f}, {:.3f}".format(x.quantile(q=0.025), x.quantile(q=0.975)) + r"$]$"

# Initialize Table

In [7]:
order = RP_category.median().sort_values(ascending=False).index

idxvals = np.concatenate((np.array(['Total']), order), axis=None)
table = pd.DataFrame(index=pd.Index(idxvals, name="CD Category"))

Include relative prevalence values in table

In [8]:
table.loc["Total", r"$PR$ median"] = output_median(RP)
table.loc[order, r"$PR$ median"] = RP_category.loc[:, order].apply(lambda x: output_median(x))

table.loc["Total", r"$PR$ 95% CI"] = output_CI(RP)
table.loc[order, r"$PR$ 95% CI"] = RP_category.loc[:, order].apply(lambda x: output_CI(x))

Include relative prevalence without URLs

In [9]:
table.loc["Total", r"$PR_{http}$ median"] = output_median(RP_no_URLs[1])
table.loc[order, r"$PR_{http}$ median"] = RP_category_no_URLs.loc[:, order].apply(output_median)

table.loc["Total", r"$PR_{http}$ 95% CI"] = output_CI(RP_no_URLs[1])
table.loc[order, r"$PR_{http}$ 95% CI"] = RP_category_no_URLs.loc[:, order].apply(output_CI)

# Table

In [10]:
table

Unnamed: 0_level_0,$PR$ median,$PR$ 95% CI,$PR_{http}$ median,$PR_{http}$ 95% CI
CD Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Total,1.129*,"$[$1.102, 1.157$]$",1.105*,"$[$1.079, 1.131$]$"
Personalizing,2.084*,"$[$1.940, 2.239$]$",1.942*,"$[$1.803, 2.088$]$"
Emotional Reasoning,1.983*,"$[$1.759, 2.228$]$",1.778*,"$[$1.566, 2.013$]$"
Overgeneralizing,1.441*,"$[$1.367, 1.518$]$",1.335*,"$[$1.268, 1.404$]$"
Mental Filtering,1.296*,"$[$1.129, 1.471$]$",1.119,"$[$0.961, 1.288$]$"
Disqualifying the Positive,1.229*,"$[$1.142, 1.320$]$",1.137*,"$[$1.056, 1.222$]$"
Labeling and mislabeling,1.207*,"$[$1.159, 1.256$]$",1.137*,"$[$1.094, 1.184$]$"
Dichotomous Reasoning,1.131*,"$[$1.101, 1.162$]$",1.107*,"$[$1.079, 1.136$]$"
Fortune-telling,1.110,"$[$0.955, 1.219$]$",1.135*,"$[$1.073, 1.200$]$"
Magnification and Minimization,1.084*,"$[$1.039, 1.130$]$",1.103*,"$[$1.058, 1.152$]$"


In [11]:
table.to_csv("figures/TableS4.tex", sep="&")