# Questionnaire Data Processing

## Setup and Helper Functions

In [None]:
import json
import re
from pathlib import Path

import pandas as pd
import numpy as np
import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

import biopsykit as bp

from cft_analysis.datasets import CftDatasetRaw

%load_ext autoreload
%autoreload 2
%matplotlib widget

In [None]:
plt.close("all")

palette = bp.colors.fau_palette
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"

palette

## Data Import

In [None]:
# build path to data folder
config_dict = json.load(Path("../../config.json").open(encoding="utf-8"))
base_path = Path("..").joinpath(config_dict["base_path"])

In [None]:
dataset = CftDatasetRaw(base_path)
dataset

In [None]:
data = dataset.questionnaire
data.head()

In [None]:
quest_path = base_path.joinpath("questionnaire")
# path to export processed questionnaire data in the Data repository
quest_path_export = quest_path.joinpath("processed")
quest_path_export_analysis = Path("../../data/questionnaire")

bp.utils.file_handling.mkdirs([quest_path_export, quest_path_export_analysis])

In [None]:
quest_path_export_analysis

## Data Processing

### Metadata

The following metadata are extracted:
* Body Mass Index (BMI)
* Age

In [None]:
bmi = bp.metadata.bmi(data, ["weight", "height"])
metadata = data[["age", "gender"]]

metadata = pd.concat([bmi, metadata], axis=1)

### Questionnaires

The following questionnaire scores are computed:
* Allgemeine Depressionsskala - Langform (ADS-L) (german version of the Center for Epidemiological Studies Depression Scale – CESD)
* Perceived Stress Scale (PSS)
* Multidimensionaler Befindlichkeitsfragebogen (MDBF) (german version of the Multidimensional Mood State Questionnaire – MDMQ): *pre* and *post* MIST

In [None]:
quest_dict = {
    "ads_l": bp.questionnaires.utils.find_cols(data, regex_str="ADSL_\d+")[1],
    "pss": bp.questionnaires.utils.find_cols(data, regex_str="PSS_\d+")[1],
    "mdbf-pre": bp.questionnaires.utils.find_cols(data, regex_str="MDBF_Pre_\d+")[1],
    "mdbf-post": bp.questionnaires.utils.find_cols(data, regex_str="MDBF_Post_\d+")[1],
}

In [None]:
data_recode = data.copy()

data_recode = bp.questionnaires.utils.convert_scale(data=data_recode, cols=quest_dict["ads_l"], offset=-1)
data_recode = bp.questionnaires.utils.convert_scale(data=data_recode, cols=quest_dict["pss"], offset=-1)

In [None]:
quest_data = bp.questionnaires.utils.compute_scores(data_recode, quest_dict)

In [None]:
quest_data_total = pd.concat([metadata, quest_data], axis=1)
quest_data_total.head()

## Export

In [None]:
quest_data_total.to_csv(quest_path_export.joinpath("questionnaire_data.csv"))
quest_data_total.to_csv(quest_path_export_analysis.joinpath("questionnaire_data.csv"))