# CFT – Demographics

## Setup and Helper Functions

In [None]:
import re
from pathlib import Path

import pandas as pd
import numpy as np
import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

import biopsykit as bp

from cft_analysis.datasets import CftDatasetProcessed

%load_ext autoreload
%autoreload 2
%matplotlib widget

In [None]:
plt.close("all")

palette = bp.colors.fau_palette
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams['figure.figsize'] = (10, 5)
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['mathtext.default'] = "regular"

palette

## Import Data

In [None]:
# get path to analysis results
base_path = Path("../../data")

In [None]:
dataset = CftDatasetProcessed(base_path, exclude_subjects=True)
dataset

In [None]:
results_path = base_path.joinpath("../results")
plot_path = results_path.joinpath("plots")
stats_path = results_path.joinpath("statistics")

bp.utils.file_handling.mkdirs([results_path, plot_path, stats_path])

## Gender

In [None]:
quest_data = dataset.questionnaire_recoded
quest_data.head()

In [None]:
gender_count = bp.metadata.gender_counts(quest_data)
gender_count = pd.concat({"Total": gender_count}, names=["condition"])
gender_count_cond = bp.metadata.gender_counts(quest_data, split_condition=True)

gender_count = pd.concat([gender_count, gender_count_cond])
gender_count = gender_count.unstack("gender")

gender_count

## Age & BMI

In [None]:
age_bmi_cols = ["age", "BMI"]

In [None]:
age_bmi_total = quest_data[age_bmi_cols].agg(["mean", "std"]).T
age_bmi_total = pd.concat({"Total": age_bmi_total.stack()}, names=["condition"])
age_bmi_total = age_bmi_total.unstack(level=0)

age_bmi_cond = quest_data[age_bmi_cols].groupby("condition").agg(["mean", "std"]).T

age_bmi = age_bmi_total.join(age_bmi_cond).T
age_bmi

### Statistics

In [None]:
data_analysis = quest_data[age_bmi_cols]
data_analysis.columns.name = "variable"
data_analysis = pd.DataFrame(data_analysis.stack(), columns=["data"])

steps = [
    ("prep", "normality"),
    ("test", "pairwise_ttests")
]

params = {
    "dv": "data",
    "between": "condition",
    "groupby": "variable",
    "test__parametric": False
}

pipeline = bp.stats.StatsPipeline(steps, params)
pipeline.apply(data_analysis);
pipeline.export_statistics(stats_path.joinpath("stats_bmi_age.xlsx"))
pipeline.display_results()

## Export

In [None]:
gender_count.to_csv(results_path.joinpath("gender_count.csv"))

In [None]:
age_bmi.to_csv(results_path.joinpath("age_bmi.csv"))