# CFT – Demographics

## Setup and Helper Functions

In [None]:
import re
from pathlib import Path

import pandas as pd
import numpy as np
import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

import biopsykit as bp
from biopsykit.protocols import MIST

%load_ext autoreload
%autoreload 2
%matplotlib widget

In [None]:
plt.close("all")

palette = bp.colors.fau_palette
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams['figure.figsize'] = (10, 5)
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['mathtext.default'] = "regular"

palette

## Import Data

In [None]:
base_path = Path("../../../")
data_path = base_path.joinpath("data/for_analysis")

codebook_path = base_path.joinpath("../Data/questionnaire/cft_questionnaire_codebook.csv")

results_path = base_path.joinpath("results")
plot_path = results_path.joinpath("plots")
stats_path = results_path.joinpath("statistics")

bp.utils.file_handling.mkdirs([results_path, plot_path, stats_path])

In [None]:
quest_data = bp.io.load_questionnaire_data(
    data_path.joinpath("questionnaire_data.csv")
)

codebook = bp.io.load_codebook(codebook_path)

display(codebook.head())
display(quest_data.head())

## Gender

In [None]:
df_gender = bp.utils.dataframe_handling.apply_codebook(quest_data.loc[:, ["gender"]], codebook)
df_gender_count = bp.metadata.gender_counts(df_gender)
df_gender_count_cond = bp.metadata.gender_counts(df_gender, split_condition=True)

display(df_gender_count)
display(df_gender_count_cond)

## Age & BMI

In [None]:
age_bmi_cols = ["age", "BMI"]

In [None]:
age_bmi = quest_data[age_bmi_cols].agg(["mean", "std"]).T
age_bmi

In [None]:
age_bmi_cond = quest_data[age_bmi_cols].groupby("condition").agg(["mean", "std"]).T
age_bmi_cond

In [None]:
data_analysis = quest_data[age_bmi_cols]
data_analysis.columns.name = "variable"
data_analysis = pd.DataFrame(data_analysis.stack(), columns=["data"])

steps = [
    ("prep", "normality"),
    ("test", "pairwise_ttests")
]

params = {
    "dv": "data",
    "between": "condition",
    "groupby": "variable",
    "test__parametric": False
}

pipeline = bp.stats.StatsPipeline(steps, params)
pipeline.apply(data_analysis);
pipeline.export_statistics(stats_path.joinpath("stats_bmi_age.xlsx"))
pipeline.display_results()