In [1]:
import config as cfg
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import torch

# Load CALSNIC
date_cols = ['Visit_Date', 'Date of death', 'ALSFRS_Date', 'SymptomOnset_Date']
calsnic_df = pd.read_csv(Path.joinpath(cfg.CALSNIC_DATA_DIR, 'calsnic_processed.csv'), index_col=0, parse_dates=date_cols)

# Load PROACT
proact_df = pd.read_csv(Path.joinpath(cfg.PROACT_DATA_DIR, 'proact_processed.csv'), index_col=0)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
calsnic_df.columns

Index(['PSCID', 'Visit', 'Symptom_Duration', 'Visit_details', 'Visit_Date',
       'Status', 'Date of death', 'Cause of death', 'ALSFRS_Date',
       'ALSFRS_1_Speech', 'ALSFRS_2_Salivation', 'ALSFRS_3_Swallowing',
       'ALSFRS_Bulbar_Subscore', 'ALSFRS_4_Handwriting',
       'ALSFRS_GastrostomyPresent', 'ALSFRS_5_Cuttingfood&handlingutensils',
       'ALSFRS_6_Dressing&hygiene', 'ALSFRS_Fine Motor',
       'ALSFRS_7_Turninginbed', 'ALSFRS_8_Walking', 'ALSFRS_9_Climbingstairs',
       'ALSFRS_Gross Motor', 'ALSFRS_10_Dyspnea', 'ALSFRS_11_Orthopnea',
       'ALSFRS_12_RespiratoryInsufficiency', 'ALSFRS_Breathing_Subscore',
       'ALSFRS_TotalScore', 'UMN_Right', 'UMN_Left', 'LMN_Right', 'LMN_Left',
       'ECAS_ALSNonSpecific_Total', 'ECAS_ALSSpecific_Total', 'Handedness',
       'YearsEd', 'Diagnosis', 'Sex', 'Age', 'SymptomOnset_Date',
       'Region_of_Onset', 'Subject_used_Riluzole', 'FVC_Average', 'Weight',
       'Height', 'Ethnicity', 'BMI', 'Visit_Diff', 'SymptomDays',
      

In [6]:
# Print statistics for CALSNIC dataset
df = calsnic_df
df['Time_in_study'] = df.apply(lambda x: max(x['TTE_Speech'],  x['TTE_Swallowing'], x['TTE_Handwriting'], x['TTE_Walking']), axis=1)
df['SOO_Limb'] = df['Region_of_Onset'].apply(lambda x: 1 if x in ["lower_extremity", "upper_extremity", 'upper_extremity_lower_extremity'] else 0)
df['SOO_Bulbar'] = df['Region_of_Onset'].apply(lambda x: 1 if x in ["bulbar"] else 0)

num_features = ['Age', 'Height', 'Weight', 'BMI', 'ALSFRS_TotalScore', 'Time_in_study']
cat_features = ['Sex', 'SOO_Limb', 'SOO_Bulbar', 'SOO_Spine', 'Subject_used_Riluzole']

print(f"N: {df.shape[0]}")

for col in num_features:
    mean = df[col].astype(float).mean(axis=0).round(1)
    std = df[col].astype(float).std(axis=0).round(1)
    text = mean.astype(str) + " (" + std.astype(str) + ")"
    print(f"{col}: {text}")

print()
print(f"Percentage of female: {df['Sex'].value_counts(normalize=True)['Female'] * 100:.1f}%")
print(f"Percentage of SOO_Limb: {df['SOO_Limb'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of SOO_Bulbar: {df['SOO_Bulbar'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of Subject_used_Riluzole: {df['Subject_used_Riluzole'].value_counts(normalize=True)['Yes'] * 100:.1f}%")
print()

N: 152
Age: 59.5 (10.6)
Height: 167.3 (19.2)
Weight: 76.5 (18.6)
BMI: 27.0 (4.6)
ALSFRS_TotalScore: 37.8 (5.6)
Time_in_study: 152.0 (60.6)

Percentage of female: 40.1%
Percentage of SOO_Limb: 80.9%
Percentage of SOO_Bulbar: 17.1%
Percentage of Subject_used_Riluzole: 59.2%



In [8]:
# Print statistics for PROACT dataset
df = proact_df
df['Time_in_study'] = df.apply(lambda x: max(x['TTE_Speech'],  x['TTE_Swallowing'], x['TTE_Handwriting'], x['TTE_Walking']), axis=1)
df['SOO_Limb'] = df['Site_of_Onset'].apply(lambda x: 1 if x in ["Limb", "LimbAndBulbar"] else 0)
df['SOO_Bulbar'] = df['Site_of_Onset'].apply(lambda x: 1 if x in ["Bulbar", "LimbAndBulbar"] else 0)
df['SOO_Other'] = df['Site_of_Onset'].apply(lambda x: 1 if x in ["Other"] else 0)

num_features = ['Age', 'Height', 'Weight', "BMI", 'ALSFRS_R_Total', 'Time_in_study']
cat_features = ['Sex', 'Race_Caucasian', 'SOO_Limb', 'SOO_Bulbar', 'SOO_Spine', 'Subject_used_Riluzole']

print(f"N: {df.shape[0]}")

for col in num_features:
    mean = df[col].astype(float).mean(axis=0).round(1)
    std = df[col].astype(float).std(axis=0).round(1)
    text = mean.astype(str) + " (" + std.astype(str) + ")"
    print(f"{col} (event): {text}")

print()
print(f"Percentage of female: {df['Sex'].value_counts(normalize=True)['Female'] * 100:.1f}%")
print(f"Percentage of Caucasian: {df['Race_Caucasian'].value_counts(normalize=True)[1.0] * 100:.1f}%")
print(f"Percentage of SOO_Limb: {df['SOO_Limb'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of SOO_Bulbar: {df['SOO_Bulbar'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of SOO_Other: {df['SOO_Other'].value_counts(normalize=True)[1] * 100:.1f}%")
print(f"Percentage of Subject_used_Riluzole: {df['Subject_used_Riluzole'].value_counts(normalize=True)['Yes'] * 100:.1f}%")
print()

N: 5906
Age (event): 56.0 (11.7)
Height (event): 171.6 (9.3)
Weight (event): 77.1 (14.9)
BMI (event): 26.2 (4.3)
ALSFRS_R_Total (event): 37.6 (5.5)
Time_in_study (event): 292.7 (192.5)

Percentage of female: 36.8%
Percentage of Caucasian: 93.3%
Percentage of SOO_Limb: 36.3%
Percentage of SOO_Bulbar: 10.5%
Percentage of SOO_Other: 6.0%
Percentage of Subject_used_Riluzole: 85.3%

