### Import Dependencies

In [7]:
import sys
import os
import pandas as pd
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn import metrics

### Initialize Gait Table

In [8]:
def gait_df_init():
       gait_df = pd.read_csv("csv-input/gait-table.csv", index_col=0)
       gait_df.fillna(0)

       gait_feats = [
              "Cadence_L", "Cadence_R", 
              "DblSupport_L", "DblSupport_R", 
              "ElevMidswing_L", "ElevMidswing_R", 
              "CycleDur_L", "CycleDur_R",
              "Speed_L", "Speed_R",
              "Circumdxn_L", "Circumdxn_R",
              "HSAngle_L", "HSAngle_R",
              "TOffAngle_L", "TOffAngle_R",
              "SglSupport_L", "SglSupport_R", 
              "Stance_L", "Stance_R", 
              "StepDur_L", "StepDur_R",
              "StrideLength_L", "StrideLength_R", 
              "Swing_L", "Swing_R",
              "TermDblSupport_L", "TermDblSupport_R", 
              "ToeOutAngle_L", "ToeOutAngle_R", 
              "LumbarCorROM", "LumbarSagROM", "LumbarTransvROM",
              "TrunkCorROM", "TrunkSagROM", "TrunkTransvROM", 
              "ArmSwingVel_L", "ArmSwingVel_R", "ArmROM_L", "ArmROM_R"
       ]

       # Group left and right features
       feat_dict = {}
       updated_gait_feats = []
       for feat in gait_feats:
              if "_" in feat:
                     group, side = feat.split("_")
                     if group not in feat_dict:
                            feat_dict[group] = ["", ""]
                     feat_dict[group][0 if side == "L" else 1] = feat
              else:
                     updated_gait_feats.append(feat)

       # Calculate mean between both left and right for feature, append to gait table
       for group, [left, right] in feat_dict.items():
              gait_df[group] = gait_df[[left, right]].mean(axis=1)

       return gait_df

### Calculate Benefit

In [9]:
def calculate_benefit(gait_df, condition, feature):
    # Condition
    condition_gait_df = gait_df[gait_df["Subject"].str.contains(condition)].reset_index(drop=True)
    # Is .dropna() needed?
    condition_gait_df = condition_gait_df[["Subject", "Cycle_Index", "Condition", feature]].pivot_table(index=["Subject", "Cycle_Index"], columns="Condition", values=feature).dropna().reset_index()

    # Feature
    condition_gait_df = condition_gait_df.rename(columns={"walk": f"Initial_{feature}", "walkAuditory": f"Auditory_{feature}", "walkVisual": f"Visual_{feature}"})

    pvalue_dict = {}
    for subject, subject_df in condition_gait_df.groupby("Subject"):
        _, auditory_pvalue = stats.ttest_ind(subject_df[f"Auditory_{feature}"], subject_df[f"Initial_{feature}"], equal_var=False, alternative="greater")
        _, visual_pvalue = stats.ttest_ind(subject_df[f"Visual_{feature}"], subject_df[f"Initial_{feature}"], equal_var=False, alternative="greater")
        pvalue_dict[subject] = (auditory_pvalue, visual_pvalue)

    condition_benefit_df = pd.DataFrame([(subject, auditory_pvalue, visual_pvalue) for subject, (auditory_pvalue, visual_pvalue) in pvalue_dict.items()], columns=["Subject", "Auditory_pvalue", "Visual_pvalue"])
    condition_benefit_df[f"{feature}_Auditory_Benefit"] = (condition_benefit_df["Auditory_pvalue"] < 0.05).astype(int)
    condition_benefit_df[f"{feature}_Visual_Benefit"] = (condition_benefit_df["Visual_pvalue"] < 0.05).astype(int)

    # OR auditory and visual benefit
    condition_benefit_df[f"{feature}_Benefit"] = condition_benefit_df[f"{feature}_Auditory_Benefit"] | condition_benefit_df[f"{feature}_Visual_Benefit"]

    return condition_benefit_df


def create_benefit_df(gait_df):
    # Stride length
    stride_benefit_df = calculate_benefit(gait_df, "PD", "StrideLength")
    # Speed
    speed_benefit_df = calculate_benefit(gait_df, "PD", "Speed")

    return stride_benefit_df, speed_benefit_df

### Prepare Gait Table

In [10]:
# Prepare gait table
def prepare_gait_df(gait_df, pd_stride_benefit_df, condition, feature, queue="", OR=False):
    pd_gait_df = gait_df[gait_df["Subject"].str.contains(condition)].reset_index(drop=True)
    pd_gait_df = pd_gait_df.groupby(["Subject", "age", "group", "UPDRS III TOTAL:"])[[feature]].mean().reset_index()
    pd_gait_df = pd_gait_df.rename(columns={feature: f"Initial_{feature}_Avg"})
    pd_gait_df = pd_gait_df.loc[~(pd_gait_df["Subject"] == "sub-PD33")].reset_index(drop=True)  # Exclude sub-PD33, no walk run

    pd_log_reg_df = pd_stride_benefit_df.join(pd_gait_df[["age", "group", "UPDRS III TOTAL:", f"Initial_{feature}_Avg"]])
    if OR:
        pd_log_reg_df = pd_log_reg_df[["Subject", "age", "group", "UPDRS III TOTAL:", f"Initial_{feature}_Avg", f"{feature}_Benefit"]]
    else:
        pd_log_reg_df = pd_log_reg_df[["Subject", "age", "group", "UPDRS III TOTAL:", f"Initial_{feature}_Avg", f"{feature}_{queue}_Benefit"]]
    
    return pd_log_reg_df

### Logistic Regression

In [11]:
def log_reg(pd_log_reg_df, feature, queue="", OR=False):
    # Train logistic regression model
    if OR:
        X = pd_log_reg_df.drop(columns=["Subject", f"{feature}_Benefit"])
        y = pd_log_reg_df[f"{feature}_Benefit"]
    else:
        X = pd_log_reg_df.drop(columns=["Subject", f"{feature}_{queue}_Benefit"])
        y = pd_log_reg_df[f"{feature}_{queue}_Benefit"]
    X = pd.get_dummies(X, columns=["group"], drop_first=True)

    # 60% data for training, 40% data for testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1)
    log_reg = linear_model.LogisticRegression()
    log_reg.fit(X_train, y_train)
    y_pred = log_reg.predict(X_test)
    print(metrics.accuracy_score(y_test, y_pred))

In [12]:
def main():
    # Initialize gait table
    gait_df = gait_df_init()

    # Calculate benefit
    pd_stride_benefit_df, pd_speed_benefit_df = create_benefit_df(gait_df)
    # Output (for debugging purposes)
    pd_stride_benefit_df.to_csv("csv-output/stride_benefit_table.csv")
    pd_speed_benefit_df.to_csv("csv-output/speed_benefit_table.csv")
    
    # Apply logistic regression model

    # Stride length

    pd_log_reg_df = prepare_gait_df(gait_df, pd_stride_benefit_df, "PD", "StrideLength", "Auditory")
    log_reg(pd_log_reg_df, "StrideLength", "Auditory")  # Auditory
    pd_log_reg_df = prepare_gait_df(gait_df, pd_stride_benefit_df, "PD", "StrideLength", "Visual")
    log_reg(pd_log_reg_df, "StrideLength", "Visual")  # Visual
    
    # Combined (auditory OR visual)
    pd_log_reg_df = prepare_gait_df(gait_df, pd_stride_benefit_df, "PD", "StrideLength", OR=True)
    print(pd_log_reg_df.head())
    log_reg(pd_log_reg_df, "StrideLength", OR=True)

    # Speed

    pd_log_reg_df = prepare_gait_df(gait_df, pd_speed_benefit_df, "PD", "Speed", "Auditory")
    log_reg(pd_log_reg_df, "Speed", "Auditory")  # Auditory
    pd_log_reg_df = prepare_gait_df(gait_df, pd_speed_benefit_df, "PD", "Speed", "Visual")
    log_reg(pd_log_reg_df, "Speed", "Visual")  # Visual

    # Combined (auditory OR visual)
    pd_log_reg_df = prepare_gait_df(gait_df, pd_speed_benefit_df, "PD", "Speed", OR=True)
    pd_log_reg_df.to_clipboard()
    print(pd_log_reg_df.head())
    log_reg(pd_log_reg_df, "Speed", OR=True)

    # Output
    pd_log_reg_df.to_csv("csv-output/pd_log_reg.csv")

    
if __name__ == "__main__":
    main()

0.8235294117647058
0.47058823529411764
    Subject   age  group  UPDRS III TOTAL:  Initial_StrideLength_Avg  \
0  sub-PD01  64.0  nofog              27.0                  1.153625   
1  sub-PD02  82.0  nofog              18.0                  0.985880   
2  sub-PD03  82.0  nofog              32.0                  1.112697   
3  sub-PD04  72.0    fog              32.0                  0.834839   
4  sub-PD05  80.0    fog              32.0                  1.014913   

   StrideLength_Benefit  
0                     1  
1                     1  
2                     1  
3                     1  
4                     1  
0.8235294117647058
0.8235294117647058
0.35294117647058826
    Subject   age  group  UPDRS III TOTAL:  Initial_Speed_Avg  Speed_Benefit
0  sub-PD01  64.0  nofog              27.0           0.872875              1
1  sub-PD02  82.0  nofog              18.0           0.795217              1
2  sub-PD03  82.0  nofog              32.0           1.071388              1
3  sub