In [1]:
import config as cfg
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import torch

# Load CALSNIC
date_cols = ['Visit_Date', 'Date of death', 'ALSFRS_Date', 'SymptomOnset_Date']
calsnic_df = pd.read_csv(Path.joinpath(cfg.CALSNIC_DATA_DIR, 'calsnic_processed.csv'), index_col=0, parse_dates=date_cols)

# Load PROACT
proact_df = pd.read_csv(Path.joinpath(cfg.PROACT_DATA_DIR, 'proact_processed.csv'), index_col=0)

event_names = ['Speech', 'Swallowing', 'Handwriting', 'Walking']
for event_name in event_names:
    calsnic_df = calsnic_df.loc[(calsnic_df[f'TTE_{event_name}'] > 0) & (calsnic_df[f'TTE_{event_name}'] <= 500)]
    proact_df = proact_df.loc[(proact_df[f'TTE_{event_name}'] > 0) & (proact_df[f'TTE_{event_name}'] <= 500)]

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Print statistics for CALSNIC dataset
df = calsnic_df
df['Time_in_study'] = df.apply(lambda x: max(x['TTE_Speech'],  x['TTE_Swallowing'], x['TTE_Handwriting'], x['TTE_Walking']), axis=1)
df['SOO_Limb'] = df['Region_of_Onset'].apply(lambda x: 1 if x in ["lower_extremity", "upper_extremity", 'upper_extremity_lower_extremity'] else 0)
df['SOO_Bulbar'] = df['Region_of_Onset'].apply(lambda x: 1 if x in ["bulbar"] else 0)

num_features = ['Age', 'Height', 'Weight', 'BMI', 'ALSFRS_TotalScore', 'Time_in_study']
cat_features = ['Sex', 'SOO_Limb', 'SOO_Bulbar', 'SOO_Spine', 'Subject_used_Riluzole']

print(f"N: {df.shape[0]}")

for col in num_features:
    mean = df[col].astype(float).mean(axis=0).round(1)
    std = df[col].astype(float).std(axis=0).round(1)
    text = mean.astype(str) + " (" + std.astype(str) + ")"
    print(f"{col}: {text}")

print()
print(f"Percentage of female: {df['Sex'].value_counts(normalize=True)['Female'] * 100:.1f}%")
print(f"Percentage of SOO_Limb: {df['SOO_Limb'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of SOO_Bulbar: {df['SOO_Bulbar'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of Subject_used_Riluzole: {df['Subject_used_Riluzole'].value_counts(normalize=True)['Yes'] * 100:.1f}%")
print()

N: 151
Age: 59.7 (10.4)
Height: 167.3 (19.2)
Weight: 76.5 (18.6)
BMI: 27.0 (4.6)
ALSFRS_TotalScore: 37.9 (5.4)
Time_in_study: 240.2 (114.9)

Percentage of female: 40.4%
Percentage of SOO_Limb: 81.5%
Percentage of SOO_Bulbar: 17.2%
Percentage of Subject_used_Riluzole: 58.9%



In [4]:
# Print statistics for PROACT dataset
df = proact_df
df['Time_in_study'] = df.apply(lambda x: max(x['TTE_Speech'],  x['TTE_Swallowing'], x['TTE_Handwriting'], x['TTE_Walking']), axis=1)
df['SOO_Limb'] = df['Site_of_Onset'].apply(lambda x: 1 if x in ["Limb", "LimbAndBulbar"] else 0)
df['SOO_Bulbar'] = df['Site_of_Onset'].apply(lambda x: 1 if x in ["Bulbar", "LimbAndBulbar"] else 0)
df['SOO_Other'] = df['Site_of_Onset'].apply(lambda x: 1 if x in ["Other"] else 0)

num_features = ['Age', 'Height', 'Weight', "BMI", 'ALSFRS_R_Total', 'Time_in_study']
cat_features = ['Sex', 'Race_Caucasian', 'SOO_Limb', 'SOO_Bulbar', 'SOO_Spine', 'Subject_used_Riluzole']

print(f"N: {df.shape[0]}")

for col in num_features:
    mean = df[col].astype(float).mean(axis=0).round(1)
    std = df[col].astype(float).std(axis=0).round(1)
    text = mean.astype(str) + " (" + std.astype(str) + ")"
    print(f"{col}: {text}")

print()
print(f"Percentage of female: {df['Sex'].value_counts(normalize=True)['Female'] * 100:.1f}%")
print(f"Percentage of Caucasian: {df['Race_Caucasian'].value_counts(normalize=True)[1.0] * 100:.1f}%")
print(f"Percentage of SOO_Limb: {df['SOO_Limb'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of SOO_Bulbar: {df['SOO_Bulbar'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of SOO_Other: {df['SOO_Other'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of Subject_used_Riluzole: {df['Subject_used_Riluzole'].value_counts(normalize=True)['Yes'] * 100:.1f}%")
print()

N: 4664
Age: 56.4 (11.4)
Height: 171.1 (9.4)
Weight: 75.8 (14.7)
BMI: 25.7 (4.1)
ALSFRS_R_Total: 38.5 (5.2)
Time_in_study: 302.0 (122.1)

Percentage of female: 35.0%
Percentage of Caucasian: 94.3%
Percentage of SOO_Limb: 42.4%
Percentage of SOO_Bulbar: 15.8%
Percentage of SOO_Other: 4.9%
Percentage of Subject_used_Riluzole: 76.5%

