In [1]:
import numpy as np
import pandas as pd

Load cognitive distortion schemata (CDS)

In [2]:
_CDS = pd.read_csv("data/list_of_CDS.tsv", sep="\t", index_col="markers")
_CDS["variants"].fillna("[]", inplace=True)
_CDS["variants"] = _CDS["variants"].apply(eval)

Determine n-gram size for each schema, label schemata that contain first person pronouns, and group CDS per category.

In [3]:
per_cat = _CDS.groupby("categories")

Load per CDS bootstrap results

In [4]:
RP_phrase = pd.read_csv("bootstrap/relative_prevalence_phrase.tsv", sep="\t", index_col=[0])
RP_category = pd.read_csv("bootstrap/relative_prevalence_category.tsv", sep="\t", index_col=[0])

Determine which CDS are significantly used more in the $D$ cohort.

In [5]:
significant = _CDS.loc[RP_phrase.loc[:, RP_phrase.quantile(q=0.025) > 1].columns, :]

# Initialize Table

In [6]:
order = RP_category.median().sort_values(ascending=False).index
idxvals = np.concatenate((order, np.array(['Total'])), axis=None)
table = pd.DataFrame(index=pd.Index(idxvals, name="CD Category"))

Count number of CDS per category

In [7]:
table[r"$N_{CD}$"] = per_cat.count()["variants"]
table.loc["Total", r"$N_{CD}$"] = _CDS.index.size
table[r"$N_{CD}$"] = table[r"$N_{CD}$"].astype(int)

Count number of schemata that occur significantly more in the $D$ cohort and determine which fraction of all CDS are significant.

In [8]:
table[r"$N^*$"] = significant.groupby("categories").count()["variants"]
table.loc["Total", r"$N^*$"] = significant.index.size
table[r"$N^*$"] = table[r"$N^*$"].astype(int)

table[r"$N^*_r(\%)$"] = np.around(100 * table[r"$N^*$"] / table[r"$N_{CD}$"], decimals=1)

# Table

In [9]:
table

Unnamed: 0_level_0,$N_{CD}$,$N^*$,$N^*_r(\%)$
CD Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Personalizing,14,8,57.1
Emotional Reasoning,7,4,57.1
Overgeneralizing,21,12,57.1
Mental Filtering,14,3,21.4
Disqualifying the Positive,14,3,21.4
Labeling and mislabeling,44,15,34.1
Dichotomous Reasoning,23,14,60.9
Fortune-telling,8,2,25.0
Magnification and Minimization,8,3,37.5
Should statements,5,1,20.0


In [10]:
table.to_csv("figures/Table4.tex", sep="&")