# CARWatch – General Information

This notebook extracts general information from the CARWatch Dataset, such as demographic information (gender, age, condition, ...) **BEFORE** data were cleaned due to missing data, outlier, etc.

In [None]:
from pathlib import Path
import json

import pandas as pd
import numpy as np
import pingouin as pg

import biopsykit as bp
from biopsykit.utils.time import time_to_timedelta
from biopsykit.utils.dataframe_handling import multi_xs, int_from_str_idx, camel_to_snake

from fau_colors import cmaps

import matplotlib.pyplot as plt
import seaborn as sns

from carwatch_analysis.datasets import CarWatchDatasetProcessed
from carwatch_analysis.io import load_cortisol_samples_log_times
from carwatch_analysis.data_cleaning.saliva import clean_missing_values

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [None]:
plt.close("all")

palette = sns.color_palette(cmaps.faculties)
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams["figure.figsize"] = (8, 4)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"

pg.options["round"] = 4

palette

## Setup Paths

In [None]:
deploy_type = "develop"

In [None]:
# build path to data folder
config_dict = json.load(Path("../../../config.json").open(encoding="utf-8"))
base_path = Path("..").joinpath(config_dict[deploy_type]["base_path"])
base_path

In [None]:
export_path = Path("../../exports")
export_path.resolve()

In [None]:
result_path = Path("../../results")
result_path.mkdir(exist_ok=True)

In [None]:
dataset = CarWatchDatasetProcessed(base_path)
dataset

## Load Data

In [None]:
quest = dataset.questionnaire
quest.head()

## Descriptives

In [None]:
dict_descriptives = {}

**Number of Participants**

In [None]:
num_subjects = len(quest.index.get_level_values("subject").unique())
print("Number of Participants: {}".format(num_subjects))

**Age**

In [None]:
age = quest[["age"]].agg(["mean", "std"]).T
dict_descriptives["Age"] = age
age

**Gender**

In [None]:
gender_stats = bp.metadata.gender_counts(quest.xs(0, level="night"))
gender_stats = bp.utils.dataframe_handling.apply_codebook(gender_stats, dataset.codebook)
dict_descriptives["Gender_Total"] = gender_stats
gender_stats

### Subjects per Condition

In [None]:
quest.groupby("condition").size()

In [None]:
gender_stats_cond = bp.metadata.gender_counts(quest, split_condition=True)
gender_stats_cond = bp.utils.dataframe_handling.apply_codebook(gender_stats_cond, dataset.codebook)
dict_descriptives["Gender_Conditions"] = gender_stats_cond
gender_stats_cond

### Subjects per Log Type

In [None]:
# load cortisol samples
cort_path = export_path.joinpath("cortisol_samples_processed_all_log_types.csv")
cort_samples = load_cortisol_samples_log_times(cort_path)
cort_samples = cort_samples.drop(columns=cort_samples.filter(like="delay").columns)
cort_samples.head()

**Sensor + Selfreport**

In [None]:
log_types_all = [
    "Naive",
    "Selfreport",
    "Selfreport without App",
    "Selfreport with App",
    "App",
    "Sensor + Selfreport",
    "Sensor + Selfreport without App",
    "Sensor + Selfreport with App",
    "Sensor + App",
]

In [None]:
def get_nights_subjects(df):
    return pd.Series(
        {
            "subjects": len(df.index.get_level_values("subject").unique()),
            "nights": len(df.index.get_level_values("night_id").unique()),
        }
    )

In [None]:
stats_nights_subjects = cort_samples.groupby("log_type").apply(get_nights_subjects).reindex(log_types_all)
dict_descriptives["Log_Type_Statistics"] = stats_nights_subjects

stats_nights_subjects

## Cortisol Awakening Response

In [None]:
cort_increase = cort_samples[["cortisol"]]
cort_increase = (
    (cort_increase.xs("S2", level=-1) - cort_increase.xs("S0", level=-1)) / cort_increase.xs("S0", level=-1)
) * 100
cort_increase = cort_increase.xs("Naive", level="log_type")
cort_increase = cort_increase.agg(["mean", "std", bp.utils.functions.se]).T
dict_descriptives["CAR"] = cort_increase
cort_increase

## Export

In [None]:
bp.io.write_pandas_dict_excel(dict_descriptives, result_path.joinpath("descriptive_information.xlsx"))

In [None]:
cort_increase = cort_samples[["cortisol"]]
cort_increase = (
    (cort_increase.xs("S2", level=-1) - cort_increase.xs("S0", level=-1)) / cort_increase.xs("S0", level=-1)
) * 100
cort_increase = cort_increase.xs("Naive", level="log_type")
cort_increase = cort_increase.agg(["mean", "std", bp.utils.functions.se]).T
cort_increase = cort_increase.round(2)
dict_descriptives["CAR"] = cort_increase
cort_increase

## Export

In [None]:
bp.io.write_pandas_dict_excel(dict_descriptives, result_path.joinpath("descriptive_information.xlsx"))

## Export

In [None]:
bp.io.write_pandas_dict_excel(dict_descriptives, result_path.joinpath("descriptive_information.xlsx"))