In [3]:
import config as cfg
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import torch

# Load CALSNIC
date_cols = ['Visit_Date', 'Date of death', 'ALSFRS_Date', 'SymptomOnset_Date']
calsnic_df = pd.read_csv(Path.joinpath(cfg.CALSNIC_DATA_DIR, 'calsnic_processed.csv'), index_col=0, parse_dates=date_cols)

# Load PROACT
proact_df = pd.read_csv(Path.joinpath(cfg.PROACT_DATA_DIR, 'proact_processed.csv'), index_col=0)

event_names = ['Speech', 'Swallowing', 'Handwriting', 'Walking']
for event_name in event_names:
    calsnic_df = calsnic_df.loc[(calsnic_df[f'TTE_{event_name}'] > 0) & (calsnic_df[f'TTE_{event_name}'] <= 365)]
    proact_df = proact_df.loc[(proact_df[f'TTE_{event_name}'] > 0) & (proact_df[f'TTE_{event_name}'] <= 365)]

In [4]:
# Print statistics for CALSNIC dataset
df = calsnic_df
df['Time_in_study'] = df.apply(lambda x: max(x['TTE_Speech'],  x['TTE_Swallowing'], x['TTE_Handwriting'], x['TTE_Walking']), axis=1)
df['SOO_Limb'] = df['Region_of_Onset'].apply(lambda x: 1 if x in ["lower_extremity", "upper_extremity", 'upper_extremity_lower_extremity'] else 0)
df['SOO_Bulbar'] = df['Region_of_Onset'].apply(lambda x: 1 if x in ["bulbar"] else 0)

num_features = ['Age', 'Height', 'Weight', 'BMI', 'ALSFRS_TotalScore', 'Time_in_study']
cat_features = ['Sex', 'SOO_Limb', 'SOO_Bulbar', 'SOO_Spine', 'Subject_used_Riluzole']

print(f"N: {df.shape[0]}")

for col in num_features:
    mean = df[col].astype(float).mean(axis=0).round(1)
    std = df[col].astype(float).std(axis=0).round(1)
    text = mean.astype(str) + " (" + std.astype(str) + ")"
    print(f"{col}: {text}")

print()
print(f"Percentage of female: {df['Sex'].value_counts(normalize=True)['Female'] * 100:.1f}%")
print(f"Percentage of SOO_Limb: {df['SOO_Limb'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of SOO_Bulbar: {df['SOO_Bulbar'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of Subject_used_Riluzole: {df['Subject_used_Riluzole'].value_counts(normalize=True)['Yes'] * 100:.1f}%")
print()

N: 149
Age: 59.6 (10.5)
Height: 167.1 (19.3)
Weight: 76.1 (18.5)
BMI: 26.9 (4.6)
ALSFRS_TotalScore: 37.8 (5.6)
Time_in_study: 146.6 (47.0)

Percentage of female: 40.3%
Percentage of SOO_Limb: 81.2%
Percentage of SOO_Bulbar: 16.8%
Percentage of Subject_used_Riluzole: 59.7%



In [5]:
# Print statistics for PROACT dataset
df = proact_df
df['Time_in_study'] = df.apply(lambda x: max(x['TTE_Speech'],  x['TTE_Swallowing'], x['TTE_Handwriting'], x['TTE_Walking']), axis=1)
df['SOO_Limb'] = df['Site_of_Onset'].apply(lambda x: 1 if x in ["Limb", "LimbAndBulbar"] else 0)
df['SOO_Bulbar'] = df['Site_of_Onset'].apply(lambda x: 1 if x in ["Bulbar", "LimbAndBulbar"] else 0)
df['SOO_Other'] = df['Site_of_Onset'].apply(lambda x: 1 if x in ["Other"] else 0)

num_features = ['Age', 'Height', 'Weight', "BMI", 'ALSFRS_R_Total', 'Time_in_study']
cat_features = ['Sex', 'Race_Caucasian', 'SOO_Limb', 'SOO_Bulbar', 'SOO_Spine', 'Subject_used_Riluzole']

print(f"N: {df.shape[0]}")

for col in num_features:
    mean = df[col].astype(float).mean(axis=0).round(1)
    std = df[col].astype(float).std(axis=0).round(1)
    text = mean.astype(str) + " (" + std.astype(str) + ")"
    print(f"{col} (event): {text}")

print()
print(f"Percentage of female: {df['Sex'].value_counts(normalize=True)['Female'] * 100:.1f}%")
print(f"Percentage of Caucasian: {df['Race_Caucasian'].value_counts(normalize=True)[1.0] * 100:.1f}%")
print(f"Percentage of SOO_Limb: {df['SOO_Limb'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of SOO_Bulbar: {df['SOO_Bulbar'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of SOO_Other: {df['SOO_Other'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of Subject_used_Riluzole: {df['Subject_used_Riluzole'].value_counts(normalize=True)['Yes'] * 100:.1f}%")
print()

N: 1807
Age (event): 55.9 (11.9)
Height (event): 172.7 (9.2)
Weight (event): 77.1 (14.6)
BMI (event): 26.1 (3.9)
ALSFRS_R_Total (event): 38.8 (5.2)
Time_in_study (event): 214.0 (101.0)

Percentage of female: 32.4%
Percentage of Caucasian: 94.4%
Percentage of SOO_Limb: 37.3%
Percentage of SOO_Bulbar: 9.6%
Percentage of SOO_Other: 1.2%
Percentage of Subject_used_Riluzole: 82.0%

