In [7]:
import config as cfg
from pathlib import Path
import pandas as pd

# Load PROACT
df = pd.read_csv(Path.joinpath(cfg.PROACT_DATA_DIR, 'proact_processed.csv'), index_col=0)
event_names = ['Speech', 'Swallowing', 'Handwriting', 'Walking']
for event_name in event_names:
    df = df.loc[(df[f'TTE_{event_name}'] > 0) & (df[f'TTE_{event_name}'] <= 500)]
    df = df.loc[(df[f'Event_{event_name}'] == 0) | (df[f'Event_{event_name}'] == 1)]

In [9]:
# Print statistics for PROACT dataset
df['Time_in_study'] = df.apply(lambda x: max(x['TTE_Speech'],  x['TTE_Swallowing'], x['TTE_Handwriting'], x['TTE_Walking']), axis=1)
df['SOO_Limb'] = df['Site_of_Onset'].apply(lambda x: 1 if x in ["Limb", "LimbAndBulbar"] else 0)
df['SOO_Bulbar'] = df['Site_of_Onset'].apply(lambda x: 1 if x in ["Bulbar", "LimbAndBulbar"] else 0)
df['SOO_Other'] = df['Site_of_Onset'].apply(lambda x: 1 if x in ["Other"] else 0)

num_features = ['Age', 'Height', 'Weight', "BMI", 'ALSFRS_R_Total', 'Time_in_study']
cat_features = ['Sex', 'Race_Caucasian', 'SOO_Limb', 'SOO_Bulbar', 'SOO_Spine', 'Subject_used_Riluzole']

print(f"N: {df.shape[0]}")

for col in num_features:
    mean = df[col].astype(float).mean(axis=0).round(1)
    std = df[col].astype(float).std(axis=0).round(1)
    text = mean.astype(str) + " (" + std.astype(str) + ")"
    print(f"{col}: {text}")

print()
print(f"Percentage of female: {df['Sex'].value_counts(normalize=True)['Female'] * 100:.1f}%")
print(f"Percentage of Caucasian: {df['Race_Caucasian'].value_counts(normalize=True)[1.0] * 100:.1f}%")
print(f"Percentage of SOO_Limb: {df['SOO_Limb'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of SOO_Bulbar: {df['SOO_Bulbar'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of SOO_Other: {df['SOO_Other'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of Subject_used_Riluzole: {df['Subject_used_Riluzole'].value_counts(normalize=True)['Yes'] * 100:.1f}%")

N: 3220
Age: 55.8 (11.6)
Height: 172.0 (9.5)
Weight: 76.3 (14.6)
BMI: 25.7 (4.0)
ALSFRS_R_Total: 39.4 (5.0)
Time_in_study: 271.4 (122.7)

Percentage of female: 32.2%
Percentage of Caucasian: 94.6%
Percentage of SOO_Limb: 43.3%
Percentage of SOO_Bulbar: 13.4%
Percentage of SOO_Other: 4.3%
Percentage of Subject_used_Riluzole: 75.7%

