In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('./../../data/raw/EEG.machinelearing_data_BRMH.csv')
df.head()

Unnamed: 0,no.,sex,age,eeg.date,education,IQ,main.disorder,specific.disorder,AB.A.delta.a.FP1,AB.A.delta.b.FP2,...,COH.F.gamma.o.Pz.p.P4,COH.F.gamma.o.Pz.q.T6,COH.F.gamma.o.Pz.r.O1,COH.F.gamma.o.Pz.s.O2,COH.F.gamma.p.P4.q.T6,COH.F.gamma.p.P4.r.O1,COH.F.gamma.p.P4.s.O2,COH.F.gamma.q.T6.r.O1,COH.F.gamma.q.T6.s.O2,COH.F.gamma.r.O1.s.O2
0,1,M,57.0,2012.8.30,,,Addictive disorder,Alcohol use disorder,35.998557,21.717375,...,55.989192,16.739679,23.452271,45.67882,30.16752,16.918761,48.850427,9.42263,34.507082,28.613029
1,2,M,37.0,2012.9.6,6.0,120.0,Addictive disorder,Alcohol use disorder,13.425118,11.002916,...,45.595619,17.510824,26.777368,28.201062,57.108861,32.375401,60.351749,13.900981,57.831848,43.463261
2,3,M,32.0,2012.9.10,16.0,113.0,Addictive disorder,Alcohol use disorder,29.94178,27.544684,...,99.475453,70.654171,39.131547,69.920996,71.063644,38.534505,69.908764,27.180532,64.803155,31.485799
3,4,M,35.0,2012.10.8,18.0,126.0,Addictive disorder,Alcohol use disorder,21.496226,21.846832,...,59.986561,63.822201,36.478254,47.117006,84.658376,24.724096,50.299349,35.319695,79.822944,41.141873
4,5,M,36.0,2012.10.18,16.0,112.0,Addictive disorder,Alcohol use disorder,37.775667,33.607679,...,61.46272,59.166097,51.465531,58.635415,80.685608,62.138436,75.888749,61.003944,87.455509,70.531662


In [3]:
def mean_std(series):
    return f"{series.mean():.1f} ± {series.std():.1f}"

def min_max(series):
    return f"{series.min():.0f}–{series.max():.0f}"

def sex_percentage(group):
    total = group.shape[0]
    pct_m = 100 * (group['sex'].str.lower().str.startswith('m').sum() / total)
    pct_f = 100 - pct_m
    return f"{pct_m:.1f} / {pct_f:.1f}"

grouped = df.groupby(['main.disorder', 'specific.disorder'])

rows = []
for (main, spec), group in grouped:
    row = {
        'Main Disorder': main,
        'Specific Disorder': spec,
        'N': group.shape[0],
        'Sex (%M/%F)': sex_percentage(group),
        'Age (Mean ± SD)': mean_std(group['age']),
        'Age (min–max)': min_max(group['age']),
        'Education (Mean ± SD)': mean_std(group['education']),
        'Education (min–max)': min_max(group['education']),
        'IQ (Mean ± SD)': mean_std(group['IQ']),
        'IQ (min–max)': min_max(group['IQ']),
    }
    rows.append(row)

table = pd.DataFrame(rows)

table.to_csv('./descriptive_table.csv', index=False)

table

Unnamed: 0,Main Disorder,Specific Disorder,N,Sex (%M/%F),Age (Mean ± SD),Age (min–max),Education (Mean ± SD),Education (min–max),IQ (Mean ± SD),IQ (min–max)
0,Addictive disorder,Alcohol use disorder,93,80.6 / 19.4,34.2 ± 11.9,19–64,13.3 ± 3.1,1–18,103.4 ± 13.6,68–130
1,Addictive disorder,Behavioral addiction disorder,93,95.7 / 4.3,25.1 ± 7.5,18–68,13.2 ± 1.9,7–18,104.4 ± 18.5,56–142
2,Anxiety disorder,Panic disorder,59,64.4 / 35.6,31.0 ± 11.3,19–61,13.4 ± 2.9,0–18,100.3 ± 14.8,72–142
3,Anxiety disorder,Social anxiety disorder,48,85.4 / 14.6,26.5 ± 9.1,19–58,12.8 ± 1.6,10–16,95.9 ± 17.9,49–128
4,Healthy control,Healthy control,95,63.2 / 36.8,25.7 ± 4.5,18–42,14.9 ± 2.1,11–20,116.2 ± 10.9,88–141
5,Mood disorder,Bipolar disorder,67,62.7 / 37.3,29.7 ± 11.0,18–62,14.1 ± 2.2,6–19,100.8 ± 17.0,60–134
6,Mood disorder,Depressive disorder,199,54.8 / 45.2,31.3 ± 13.2,18–70,13.0 ± 2.5,1–18,101.8 ± 15.3,61–145
7,Obsessive compulsive disorder,Obsessive compulsitve disorder,46,82.6 / 17.4,28.5 ± 9.8,18–70,13.9 ± 2.3,5–18,107.8 ± 15.2,76–138
8,Schizophrenia,Schizophrenia,117,55.6 / 44.4,31.7 ± 12.1,19–72,12.8 ± 2.9,0–18,89.6 ± 17.5,50–131
9,Trauma and stress related disorder,Acute stress disorder,38,7.9 / 92.1,28.9 ± 9.0,19–55,14.3 ± 2.3,8–20,104.1 ± 15.4,76–134
