This dataset from physionet.org contains gait dynamics, summarized as the right stride interval, for subjects with amyotrophic lateral sclerosis (ALS), as well as a control group, and as well as subjects with Parkinson's disease and Huntingtons disease. The underlying mechanisms affecting gait dynamics for Parkinson's and Huntington's disease are different from that of ALS, however gait is affected vs. that of a control group.

https://physionet.org/content/gaitndd/1.0.0/

# Import Libraries and Data

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats
sns.set_theme(style="darkgrid")

import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv('/kaggle/input/als-gait-dynamics/gait.csv')

# Exploratory Data Analysis

In [None]:
print(np.shape(df))

In [None]:
df.info()

In [None]:
df.head()

In [None]:
df['Group'].unique()

In [None]:
df['Subject'].unique()

In [None]:
class_counts = df.groupby('Subject').size()
print(class_counts.describe())

In [None]:
crosstab = df.pivot_table(values = 'Right Stride Interval (sec)', columns = "Subject", index = "Time Point")
print(np.shape(crosstab))
crosstab.head()

In [None]:
crosstab['ALS1'].plot();

In [None]:
series = 'Right Stride Interval (sec)'

In [None]:
plt.figure(figsize=(15, 5))
chart = sns.boxplot(x=df['Subject'], y=df[series], hue=df['Group'], width=4)
chart.set_xticklabels(chart.get_xticklabels(), rotation=45, horizontalalignment='right')
plt.show()

# Removing Outliers

In [None]:
df_copy = df[df[series] < 3]
crosstab = df_copy.pivot_table(values = 'Right Stride Interval (sec)', columns = "Subject", index = "Time Point")
print(np.shape(crosstab))
crosstab.head()

In [None]:
for col in crosstab.columns:
    if col != 'Time Point':
        med = np.median(crosstab[col].dropna())
        for i in range(0,len(crosstab[col])):
            value = crosstab[col].iloc[i]
            if value == 0 or value > 3:
                crosstab[col].iloc[i] = med

In [None]:
labels = [i for i in crosstab.columns]
plt.figure(figsize=(18,5))
ax = plt.gca()
ax.set_xticklabels(labels=labels,rotation=90);
plt.xlabel('Condition')
plt.ylabel('Stride Time (sec)')
plt.boxplot(crosstab.dropna(), labels=labels);

In [None]:
als_df = df_copy[df_copy['Group'] == 'ALS']
ctrl_df = df_copy[df_copy['Group'] == 'CTR']
park_df = df_copy[df_copy['Group'] == 'PAR']
hunt_df = df_copy[df_copy['Group'] == 'HUN']

In [None]:
def plot_ts(db):
    subjects = db['Subject'].unique()
    fig = plt.figure(figsize=(15,7))
    for i in range(len(subjects)):
        subj = subjects[i]
        data = db[db['Subject'] == subj]
        x = np.linspace(0,1,len(data))
        plt.plot(x,data[series]+i*.5,linewidth=2)
    plt.title('Blood Pressure Over Course of 2 Minutes')
    plt.xlabel('Time')
    plt.ylabel('Subject')
    plt.yticks([])
    plt.show()

In [None]:
plot_ts(als_df)

In [None]:
plot_ts(ctrl_df)

In [None]:
plot_ts(park_df)

In [None]:
plot_ts(hunt_df)

In [None]:
def compile_data(db):
    results = []
    for subj in db['Subject'].unique():
        temp_df = db[db['Subject'] == subj]
        mean = np.mean(temp_df[series])
        results.append(mean)
    return results

In [None]:
als_results = compile_data(als_df)
ctrl_results = compile_data(ctrl_df)
park_results = compile_data(park_df)
hunt_results = compile_data(hunt_df)

In [None]:
results_df = pd.DataFrame()
results_df['padding'] = np.zeros(19)
results_df['als_results'] = pd.Series(als_results)
results_df['hun_results'] = pd.Series(hunt_results)
results_df['par_results'] = pd.Series(park_results)
results_df['ctr_results'] = pd.Series(ctrl_results)
results_df = results_df.drop(columns="padding")

In [None]:
results_df

In [None]:
plt.plot(results_df)

In [None]:
labels = ['ALS', 'HUNT', 'PARK', 'CTRL']
plt.figure()
plt.title('Average Stride Interval by Group')
ax = plt.gca()
ax.set_xticklabels(labels=labels,rotation=90);
plt.xlabel('Condition')
plt.ylabel('Stride Interval (sec)')
plt.boxplot(results_df.dropna(), labels=labels);

In [None]:
plt.hist(results_df['als_results'],bins=10,color=[1,0,0,.5],alpha=0.5,label='ALS')
plt.hist(results_df['ctr_results'],bins=6,color=[0,0,1,.5],alpha=0.5,label='CTRL')
plt.xlabel('Stride Time')
plt.ylabel('Count')
plt.legend()
plt.show()

In [None]:
plt.hist(results_df['als_results'],bins=8,color='blue',alpha=0.35,label='ALS')
plt.hist(results_df['ctr_results'],bins=8,color='darkred',alpha=0.35,label='CTRL')
plt.hist(results_df['par_results'],bins=8,color='green',alpha=0.35,label='PARK')
plt.hist(results_df['hun_results'],bins=8,color='orange',alpha=0.35,label='HUNT')
plt.xlabel('Stride Time')
plt.ylabel('Count')
plt.legend()
plt.show()

In [None]:
import scipy.stats as stats

data1 = results_df['als_results'].dropna()
data2 = results_df['ctr_results'].dropna()

n1 = len(results_df['als_results'].dropna())
n2 = len(results_df['ctr_results'].dropna())

t,p = stats.ttest_ind(data1,data2,equal_var=False)

ddof = n1+n2-2
print('t(%g) = %g, p=%g'%(ddof,t,p))

In [None]:
shortened_results = results_df.iloc[0:13]
shortened_results

In [None]:
new_df = pd.DataFrame()
new_df['Results'] = np.zeros(13*4)
new_df['Group'] = np.zeros(13*4)

new_df['Results'].iloc[0:13] = shortened_results['als_results']
new_df['Group'].iloc[0:13] = 'ALS'

new_df['Results'].iloc[13:26] = shortened_results['hun_results'].values
new_df['Group'].iloc[13:26] = 'HUNT'

new_df['Results'].iloc[26:39] = shortened_results['par_results'].values
new_df['Group'].iloc[26:39] = 'PARK'

new_df['Results'].iloc[39:52] = shortened_results['ctr_results'].values
new_df['Group'].iloc[39:52] = 'CTRL'

In [None]:
!pip install pingouin
import pingouin as pg

In [None]:
pg.anova(data=new_df,dv='Results',between='Group')

In [None]:
pg.pairwise_tukey(data=new_df,dv='Results',between='Group')

In [None]:
new_df.boxplot(by='Group');