# descriptive analysis

In [1]:
import os
import pandas as pd
from pathlib import Path
from pandas_plots import tbl, pls, hlp
import duckdb as ddb

hlp.show_package_version(["pygwalker"])

os.environ["THEME"]="light"

dir_data_in=Path("../data/in")
dir_data_out=Path("../data/out")

con = ddb.connect()

🐍 3.12.8 | 📦 pygwalker: 0.4.9.15 | 📦 pandas: 2.2.3 | 📦 numpy: 1.26.4 | 📦 duckdb: 1.2.2 | 📦 pandas-plots: 0.12.23 | 📦 connection_helper: 0.9.7


## import data

In [3]:
# # * get condensed dataset
df_csv_condensed = (
    pd.read_csv(
        dir_data_out / "df_csv_condensed.csv",
        sep=";",
        encoding="utf-8-sig",
        index_col=None,
    )
)

# * CTCAE should be a string with integer values and MUST have a real <NA> as missing
df_csv_condensed["[01.01] CTCAE"] = (df_csv_condensed["[01.01] CTCAE"]
        .astype("Int64")
        .astype(str)
        .replace("<NA>", pd.NA)
)

## tables

In [None]:
# # * list of codes for descriptive tables
stubs = (
        "[01.01]",
        "[01.02]",
        "[01.03]",
        "[02.02]",
        "[02.03]",
        "[02.04]",
        "[03.02]",
        "[03.03]",
        "[03.04]",
        "[03.06]",
        "[03.07]",
        "[03.08]",
        "[03.08.01]",
        "[03.08.02]",
        "[03.09]",
        "[03.10]",
        "[03.10.01]",
        "[03.11]",
        "[03.11.02]",
        "[03.12]",
        "[03.13]",
        "[03.13.01]",
        "[03.13.02]",
        "[03.14]",
        "[03.14.01]",
        "[03.14.02]",
        "[03.15]",
        "[03.17]",
        "[05.01]",
        "[05.02]",
        "[05.03]",
        "[05.04]",
        "[05.05]",
        "[05.06]",
        "[05.07]",
        "[05.08]",
        "[05.09]",
        )

# * get full olumn names off of codes
cols = hlp.find_cols(all_cols=df_csv_condensed.columns, stubs=stubs)

# cols = df_csv_condensed.columns
print(cols[:5])

['[01.01] CTCAE', '[01.02] Date', '[01.03] Exercise-related', '[02.02] Type', '[02.03] Trigger']


In [7]:
# # ! remove rows with missing CTCAE
df_csv_condensed_cleansed = df_csv_condensed[df_csv_condensed['[01.01] CTCAE'].notna()]

In [12]:
# # * loop over all selected columns
def print_survey(df_in: pd.DataFrame) -> None:
    for col in cols[:]:
        # * skip CTCAE and columns that are not str / object
        if col != "[01.01] CTCAE" and df_in[col].dtype.kind in ("O"):

            # * copy dataframe
            df = df_in.copy()

            # * combine each col with CTCAE, remove missings
            df = df[[col, "[01.01] CTCAE"]].dropna()
            
            # * debug: print column
            # print(col)

            # * remove blanks
            df.iloc[:,0]= df.iloc[:,0].str.strip()

            # * split to create arrays in cells, then explode to have atomic values
            df[col] = df[col].str.split("|")
            df = df.explode(col)

            # * to pivot
            table =(
                tbl.pivot_df(
                    df=df.sort_values(df.columns[-1], ascending=False),
                    dropna=False,
                    data_bar_axis="x",
                    col1_width=600,
                )
                # .format(lambda x: f"{x}")
            )
            display(table)


In [13]:
print_survey(df_csv_condensed_cleansed)

[01.01] CTCAE,1,2,3,Total
[01.02] Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Already present,23 (12.9%),8 (4.5%),1 (0.6%),32 (18.0%)
First occurrence,109 (61.2%),35 (19.7%),2 (1.1%),146 (82.0%)
Total,132 (74.2%),43 (24.2%),3 (1.7%),178 (100.0%)


[01.01] CTCAE,1,2,3,Total
[01.03] Exercise-related,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
No,19 (10.7%),7 (3.9%),1 (0.6%),27 (15.2%)
Yes,113 (63.5%),36 (20.2%),2 (1.1%),151 (84.8%)
Total,132 (74.2%),43 (24.2%),3 (1.7%),178 (100.0%)


[01.01] CTCAE,1,2,3,Total
[02.02] Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bone injuries,0,0,2 (0.9%),2 (0.9%)
Circulatory problems,28 (12.3%),2 (0.9%),0,30 (13.2%)
Coughing fit,4 (1.8%),0,0,4 (1.8%)
Enuresis,2 (0.9%),0,0,2 (0.9%)
Itching,3 (1.3%),1 (0.4%),0,4 (1.8%)
Muscle cramps,1 (0.4%),0,0,1 (0.4%)
Muscle soreness,4 (1.8%),2 (0.9%),0,6 (2.6%)
Nausea/Vomiting,31 (13.6%),4 (1.8%),0,35 (15.4%)
Nosebleed,1 (0.4%),1 (0.4%),0,2 (0.9%)
Pain,61 (26.8%),30 (13.2%),2 (0.9%),93 (40.8%)


[01.01] CTCAE,1,2,3,Total
[02.03] Trigger,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Colliding,7 (2.5%),2 (0.7%),0,9 (3.2%)
Coordination problems,14 (4.9%),8 (2.8%),1 (0.4%),23 (8.1%)
Environmental conditions,5 (1.8%),2 (0.7%),0,7 (2.5%)
Fall event,21 (7.4%),19 (6.7%),1 (0.4%),41 (14.4%)
Medical therapy,67 (23.6%),11 (3.9%),0,78 (27.5%)
Other,1 (0.4%),0,0,1 (0.4%)
Physical strain,93 (32.7%),17 (6.0%),2 (0.7%),112 (39.4%)
Psychological strain,11 (3.9%),2 (0.7%),0,13 (4.6%)
Total,219 (77.1%),61 (21.5%),4 (1.4%),284 (100.0%)


[01.01] CTCAE,1,2,3,Total
[02.04] Affected body parts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Abdomen,7 (3.8%),4 (2.2%),0,11 (5.9%)
Back,6 (3.2%),3 (1.6%),0,9 (4.8%)
Buttocks,5 (2.7%),2 (1.1%),0,7 (3.8%)
Chest,4 (2.2%),1 (0.5%),0,5 (2.7%)
Coccyx,2 (1.1%),0,0,2 (1.1%)
Full body,6 (3.2%),2 (1.1%),1 (0.5%),9 (4.8%)
Head,10 (5.4%),11 (5.9%),0,21 (11.3%)
Internal medicine type,59 (31.7%),6 (3.2%),0,65 (34.9%)
Intestine,2 (1.1%),0,0,2 (1.1%)
Intimate area,0,1 (0.5%),0,1 (0.5%)


[01.01] CTCAE,1,2,3,Total
[03.02] With hospitalization,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
No,131 (73.6%),43 (24.2%),1 (0.6%),175 (98.3%)
U,0,0,1 (0.6%),1 (0.6%)
Yes,1 (0.6%),0,1 (0.6%),2 (1.1%)
Total,132 (74.2%),43 (24.2%),3 (1.7%),178 (100.0%)


[01.01] CTCAE,1,2,3,Total
[03.03] Medical follow-up treatment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
No,120 (67.4%),8 (4.5%),0,128 (71.9%)
U,3 (1.7%),0,0,3 (1.7%)
Yes,9 (5.1%),35 (19.7%),3 (1.7%),47 (26.4%)
Total,132 (74.2%),43 (24.2%),3 (1.7%),178 (100.0%)


[01.01] CTCAE,1,2,3,Total
[03.04] With delayed therapy protocol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
No,132 (74.2%),43 (24.2%),3 (1.7%),178 (100.0%)
Total,132 (74.2%),43 (24.2%),3 (1.7%),178 (100.0%)


[01.01] CTCAE,1,2,3,Total
[03.06] Increased care needs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
No,131 (73.6%),43 (24.2%),0,174 (97.8%)
Yes,1 (0.6%),0,3 (1.7%),4 (2.2%)
Total,132 (74.2%),43 (24.2%),3 (1.7%),178 (100.0%)


[01.01] CTCAE,1,2,3,Total
[03.07] With medication administration,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
No,111 (64.2%),25 (14.5%),0,136 (78.6%)
Yes,0,2 (1.2%),3 (1.7%),5 (2.9%)
weiß nicht,20 (11.6%),12 (6.9%),0,32 (18.5%)
Total,131 (75.7%),39 (22.5%),3 (1.7%),173 (100.0%)


[01.01] CTCAE,1,2,3,Total
[03.08] Occurrence of fear and uncertainty,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ja,33 (18.6%),21 (11.9%),2 (1.1%),56 (31.6%)
Nein,99 (55.9%),22 (12.4%),0,121 (68.4%)
Total,132 (74.6%),43 (24.3%),2 (1.1%),177 (100.0%)


[01.01] CTCAE,1,2,3,Total
[03.08.01] Affected person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
For affected individuals,33 (37.5%),19 (21.6%),2 (2.3%),54 (61.4%)
For parents des Betroffenen,2 (2.3%),3 (3.4%),1 (1.1%),6 (6.8%)
For the excercise experts,3 (3.4%),4 (4.5%),1 (1.1%),8 (9.1%)
In the treatment team,9 (10.2%),1 (1.1%),2 (2.3%),12 (13.6%)
mit der Ablehnung weiter sporttherapheutischer Angebote,5 (5.7%),3 (3.4%),0,8 (9.1%)
Total,52 (59.1%),30 (34.1%),6 (6.8%),88 (100.0%)


[01.01] CTCAE,1,2,3,Total
[03.09] Structural adjustment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ja,5 (2.8%),4 (2.3%),1 (0.6%),10 (5.6%)
Nein,127 (71.8%),38 (21.5%),2 (1.1%),167 (94.4%)
Total,132 (74.6%),42 (23.7%),3 (1.7%),177 (100.0%)


[01.01] CTCAE,1,2,3,Total
[03.10] Assessment of the situation by expertise,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
No,102 (57.6%),19 (10.7%),0,121 (68.4%)
Yes,29 (16.4%),24 (13.6%),3 (1.7%),56 (31.6%)
Total,131 (74.0%),43 (24.3%),3 (1.7%),177 (100.0%)


[01.01] CTCAE,1,2,3,Total
[03.10.01] Approver,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Eltern,2 (3.6%),0,0,2 (3.6%)
Medizin,10 (17.9%),15 (26.8%),3 (5.4%),28 (50.0%)
Pflege,15 (26.8%),7 (12.5%),0,22 (39.3%)
Physiotherapie,1 (1.8%),2 (3.6%),0,3 (5.4%)
Psychosozialer Dienst,1 (1.8%),0,0,1 (1.8%)
Total,29 (51.8%),24 (42.9%),3 (5.4%),56 (100.0%)


[01.01] CTCAE,1,2,3,Total
"[03.11] Application RICE rule (Rest, Ice, Compression, Elevation)",Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ja,2 (1.1%),14 (7.9%),2 (1.1%),18 (10.1%)
Nein,130 (73.0%),29 (16.3%),1 (0.6%),160 (89.9%)
Total,132 (74.2%),43 (24.2%),3 (1.7%),178 (100.0%)


[01.01] CTCAE,1,2,3,Total
[03.12] With observation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ja,3 (1.7%),2 (1.1%),1 (0.6%),6 (3.4%)
Nein,128 (71.9%),41 (23.0%),2 (1.1%),171 (96.1%)
U,1 (0.6%),0,0,1 (0.6%)
Total,132 (74.2%),43 (24.2%),3 (1.7%),178 (100.0%)


[01.01] CTCAE,1,2,3,Total
[03.13] Stop,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
No,11 (6.2%),5 (2.8%),0,16 (9.0%)
Yes,121 (68.0%),38 (21.3%),3 (1.7%),162 (91.0%)
Total,132 (74.2%),43 (24.2%),3 (1.7%),178 (100.0%)


[01.01] CTCAE,1,2,3,Total
[03.13.01] Stop or Break,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Break,53 (32.7%),16 (9.9%),1 (0.6%),70 (43.2%)
Cessation,68 (42.0%),22 (13.6%),2 (1.2%),92 (56.8%)
Total,121 (74.7%),38 (23.5%),3 (1.9%),162 (100.0%)


[01.01] CTCAE,1,2,3,Total
[03.14] Adaptations,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-,132 (74.2%),43 (24.2%),3 (1.7%),178 (100.0%)
Total,132 (74.2%),43 (24.2%),3 (1.7%),178 (100.0%)


[01.01] CTCAE,1,2,Total
[03.14.01] Adaptations intensity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Communication strategy,3 (5.2%),1 (1.7%),4 (6.9%)
Equipment,2 (3.4%),2 (3.4%),4 (6.9%)
Exercise selection,26 (44.8%),3 (5.2%),29 (50.0%)
Intensity,18 (31.0%),1 (1.7%),19 (32.8%)
Setting,2 (3.4%),0,2 (3.4%)
Total,51 (87.9%),7 (12.1%),58 (100.0%)


[01.01] CTCAE,1,2,Total
[03.14.02] Adaptations duration,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ab jetzt für alle bewegungseinheiten mit allen Patient*innen,3 (7.3%),0,3 (7.3%)
für die gesamte Therapiephase,7 (17.1%),1 (2.4%),8 (19.5%)
nur für diese Einheit,26 (63.4%),4 (9.8%),30 (73.2%)
Total,36 (87.8%),5 (12.2%),41 (100.0%)


[01.01] CTCAE,1,2,3,Total
[05.01] Therapy phase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Acute therapy,119 (66.9%),27 (15.2%),1 (0.6%),147 (82.6%)
Aftercare,9 (5.1%),14 (7.9%),2 (1.1%),25 (14.0%)
Long-term therapy,4 (2.2%),2 (1.1%),0,6 (3.4%)
Total,132 (74.2%),43 (24.2%),3 (1.7%),178 (100.0%)


[01.01] CTCAE,1,2,3,Total
[05.02] Group size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Group 2-5,1 (0.6%),1 (0.6%),0,2 (1.1%)
Group 5 to 10,1 (0.6%),4 (2.3%),1 (0.6%),6 (3.4%)
Group over 10,0,6 (3.4%),0,6 (3.4%)
Individual,126 (72.4%),32 (18.4%),2 (1.1%),160 (92.0%)
Total,128 (73.6%),43 (24.7%),3 (1.7%),174 (100.0%)


[01.01] CTCAE,1,2,3,Total
[05.03] Age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
02 to 05 years,13 (9.8%),10 (7.5%),0,23 (17.3%)
06 to 09 years,34 (25.6%),6 (4.5%),0,40 (30.1%)
10 to 14 years,29 (21.8%),9 (6.8%),0,38 (28.6%)
15 to 18 years,17 (12.8%),6 (4.5%),1 (0.8%),24 (18.0%)
18+ years,3 (2.3%),3 (2.3%),2 (1.5%),8 (6.0%)
Total,96 (72.2%),34 (25.6%),3 (2.3%),133 (100.0%)


[01.01] CTCAE,1,2,3,Total
[05.04] Online,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
No,126 (71.2%),41 (23.2%),3 (1.7%),170 (96.0%)
Yes,5 (2.8%),2 (1.1%),0,7 (4.0%)
Total,131 (74.0%),43 (24.3%),3 (1.7%),177 (100.0%)


[01.01] CTCAE,1,2,3,Total
[05.05] As part of testing,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
No,128 (72.3%),42 (23.7%),3 (1.7%),173 (97.7%)
Yes,4 (2.3%),0,0,4 (2.3%)
Total,132 (74.6%),42 (23.7%),3 (1.7%),177 (100.0%)


[01.01] CTCAE,1,2,3,Total
[05.06] Setting,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
At home (via telemedicine),5 (3.1%),2 (1.2%),0,7 (4.3%)
Gym,40 (24.7%),19 (11.7%),2 (1.2%),61 (37.7%)
Hospital corridor,34 (21.0%),7 (4.3%),0,41 (25.3%)
Outside,6 (3.7%),4 (2.5%),1 (0.6%),11 (6.8%)
Patients room,33 (20.4%),9 (5.6%),0,42 (25.9%)
Total,118 (72.8%),41 (25.3%),3 (1.9%),162 (100.0%)


[01.01] CTCAE,1,2,3,Total
[05.07] Main motor skill,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Coordination,58 (25.2%),20 (8.7%),3 (1.3%),81 (35.2%)
Endurance,37 (16.1%),11 (4.8%),0,48 (20.9%)
Flexibility,21 (9.1%),3 (1.3%),0,24 (10.4%)
Full body,19 (8.3%),5 (2.2%),0,24 (10.4%)
Relaxation,2 (0.9%),0,0,2 (0.9%)
Speed,5 (2.2%),5 (2.2%),0,10 (4.3%)
Strength,27 (11.7%),13 (5.7%),1 (0.4%),41 (17.8%)
Total,169 (73.5%),57 (24.8%),4 (1.7%),230 (100.0%)


[01.01] CTCAE,1,2,3,Total
[05.08] Time point,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1. Time point,50 (38.2%),6 (4.6%),2 (1.5%),58 (44.3%)
2. Time point,45 (34.4%),27 (20.6%),1 (0.8%),73 (55.7%)
Total,95 (72.5%),33 (25.2%),3 (2.3%),131 (100.0%)


[01.01] CTCAE,1,2,3,Total
[05.09] Training condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Average,25 (20.2%),11 (8.9%),0,36 (29.0%)
Good,10 (8.1%),2 (1.6%),0,12 (9.7%)
Moderate,53 (42.7%),17 (13.7%),3 (2.4%),73 (58.9%)
Weiß nicht,3 (2.4%),0,0,3 (2.4%)
Total,91 (73.4%),30 (24.2%),3 (2.4%),124 (100.0%)


## pie charts

In [19]:
import plotly.express as px

def print_pie(ser:pd.Series, caption)-> None:
    cnt = ser.value_counts()

    # Create the pie chart using the counts and their corresponding labels.
    fig = px.pie(
        values=cnt.values,
        names=cnt.index,
        title='Distribution of Age Groups'
    )
    fig.show()

In [None]:
ser = df_csv_condensed_cleansed["[05.03] Age"]

