In [1]:
# Required Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Load the datasets with corrected paths
df_cs1 = pd.read_csv("neuroqwerty-mit-csxpd-dataset-1.0.0/neuroQWERTY/MIT-CS1PD/GT_DataPD_MIT-CS1PD.csv")
df_cs2 = pd.read_csv("neuroqwerty-mit-csxpd-dataset-1.0.0/neuroQWERTY/MIT-CS2PD/GT_DataPD_MIT-CS2PD.csv")

# Combine the datasets
df_all = pd.concat([df_cs1, df_cs2], ignore_index=True)

# Create output directory for the figures
output_dir = "static_visualizations"
os.makedirs(output_dir, exist_ok=True)

# Plot 1: Histogram of UPDRS scores
plt.figure(figsize=(8, 5))
sns.histplot(df_all['updrs108'], bins=20, kde=True)
plt.title("Distribution of UPDRS Scores")
plt.xlabel("UPDRS Score")
plt.ylabel("Count")
plt.tight_layout()
plt.savefig(f"{output_dir}/updrs_distribution.png")
plt.close()

# Plot 2: Typing Speed vs. UPDRS
plt.figure(figsize=(8, 5))
sns.scatterplot(x="updrs108", y="typingSpeed", hue="gt", data=df_all)
sns.regplot(x="updrs108", y="typingSpeed", data=df_all, scatter=False, color='gray')
plt.title("Typing Speed vs. UPDRS Score")
plt.xlabel("UPDRS Score")
plt.ylabel("Typing Speed (keys/min)")
plt.legend(title="PD Diagnosis")
plt.tight_layout()
plt.savefig(f"{output_dir}/typing_speed_vs_updrs.png")
plt.close()

# Plot 3: Boxplot of Typing Speed by PD Diagnosis
plt.figure(figsize=(8, 5))
sns.boxplot(x="gt", y="typingSpeed", data=df_all)
plt.xticks([0, 1], ['No PD', 'PD'])
plt.title("Typing Speed Distribution by Diagnosis")
plt.xlabel("Parkinson’s Diagnosis")
plt.ylabel("Typing Speed (keys/min)")
plt.tight_layout()
plt.savefig(f"{output_dir}/typing_speed_by_diagnosis.png")
plt.close()

# Plot 4: NQ Score vs. UPDRS
plt.figure(figsize=(8, 5))
sns.scatterplot(x="updrs108", y="nqScore", hue="gt", data=df_all)
sns.regplot(x="updrs108", y="nqScore", data=df_all, scatter=False, color='black')
plt.title("NQ Score vs. UPDRS Score")
plt.xlabel("UPDRS Score")
plt.ylabel("nqScore")
plt.legend(title="PD Diagnosis")
plt.tight_layout()
plt.savefig(f"{output_dir}/nqscore_vs_updrs.png")
plt.close()

# Plot 5: afTap vs sTap
plt.figure(figsize=(8, 5))
sns.scatterplot(x="afTap", y="sTap", hue="gt", data=df_all)
plt.title("Alternate Finger Tap vs Single Finger Tap Time")
plt.xlabel("afTap (ms)")
plt.ylabel("sTap (ms)")
plt.legend(title="PD Diagnosis")
plt.tight_layout()
plt.savefig(f"{output_dir}/afTap_vs_sTap.png")
plt.close()

# Plot 6: Boxplot of nqScore by PD Diagnosis
plt.figure(figsize=(8, 5))
sns.boxplot(x="gt", y="nqScore", data=df_all)
plt.xticks([0, 1], ['No PD', 'PD'])
plt.title("nqScore Distribution by Diagnosis")
plt.xlabel("Parkinson’s Diagnosis")
plt.ylabel("nqScore")
plt.tight_layout()
plt.savefig(f"{output_dir}/nqscore_by_diagnosis.png")
plt.close()
