In [None]:
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout


DATA_DIR = "#folder path" # the folder with all .txt files

WINDOW_SIZE = 128
OVERLAP = 0.5

import glob
import os
import pandas as pd
import numpy as np

def load_gait_files(folder):
    X_all = []
    y_all = []

    files = glob.glob(os.path.join(folder, "*.txt"))
    print(f"Found {len(files)} files.")
    if len(files) == 0:
        raise ValueError("No files found. Check folder path and extensions!")

    for file in files:
        df = pd.read_csv(file, sep="\t", header=None)
        # Features: columns 1-18 (ignore column 0 = time)
        X = df.iloc[:, 1:19].values

        # Assign label based on filename
        filename = os.path.basename(file)
        if "Co" in filename or "co" in filename:
            label = 0
        elif "Pt" in filename or "pt" in filename:
            label = 1
        else:
            print("Skipping unknown file:", filename)
            continue

        y = np.full((X.shape[0],), label)

        X_all.append(X)
        y_all.append(y)

    # Combine all files
    X_all = np.vstack(X_all)
    y_all = np.hstack(y_all)
    print("Combined shape:", X_all.shape, y_all.shape)

    return X_all, y_all


X, y = load_gait_files(DATA_DIR)


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# SEGMENTATION 

def segment_signal(X, y, window_size=128, overlap=0.5):
    step = int(window_size * (1 - overlap))
    X_segments, y_segments = [], []

    for start in range(0, len(X) - window_size, step):
        end = start + window_size
        X_segments.append(X[start:end])
        y_segments.append(int(round(np.mean(y[start:end]))))

    return np.array(X_segments), np.array(y_segments)

X_segments, y_segments = segment_signal(X_scaled, y, WINDOW_SIZE, OVERLAP)
print("Segmented shape:", X_segments.shape, y_segments.shape)

# TRAIN/TEST SPLIT 
X_train, X_test, y_train, y_test = train_test_split(X_segments, y_segments, test_size=0.2, random_state=42, stratify=y_segments)

print("Train:", X_train.shape, "Test:", X_test.shape)

# CNN MODEL 
model = Sequential([
    Conv1D(64, 3, activation='relu', input_shape=(WINDOW_SIZE, X_segments.shape[2])),
    MaxPooling1D(2),
    Dropout(0.3),
    Conv1D(128, 3, activation='relu'),
    MaxPooling1D(2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')  # Binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# TRAINING 
history = model.fit( X_train, y_train,validation_data=(X_test, y_test),epochs=20,batch_size=64)

# PLOTTING RESULTS
plt.figure(figsize=(12,4))

plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train')
plt.plot(history.history['val_accuracy'], label='Validation')
plt.title('Model Accuracy')
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train')
plt.plot(history.history['val_loss'], label='Validation')
plt.title('Model Loss')
plt.legend()

plt.show()

# EVALUATE 
loss, acc = model.evaluate(X_test, y_test)
print(f"\nTest Accuracy: {acc*100:.2f}% | Test Loss: {loss:.4f}")

# VISUALIZE GAIT WAVES 
# Example: showing 5 random samples from test set
import random

for i in random.sample(range(len(X_test)), 5):
    plt.figure(figsize=(12,4))
    for j in range(X_test.shape[2]):  # plot each sensor channel
        plt.plot(X_test[i,:,j], label=f'Sensor {j+1}')
    plt.title(f'Gait Wave - Sample {i} | Label: {"PD" if y_test[i]==1 else "Healthy"}')
    plt.xlabel('Time Steps')
    plt.ylabel('Normalized Force')
    plt.legend()
    plt.show()


In [None]:
#18 images for showing pd vs healthy for all sensors
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Folder path 
DATA_DIR = "#folder path"

# List files 
pd_files = glob.glob(os.path.join(DATA_DIR, "*Pt*.txt"))   # Parkinson's
healthy_files = glob.glob(os.path.join(DATA_DIR, "*Co*.txt"))  # Healthy

#  Pick random files
pd_file = np.random.choice(pd_files)
healthy_file = np.random.choice(healthy_files)
print("PD File:", pd_file)
print("Healthy File:", healthy_file)

#  Load files 
df_pd = pd.read_csv(pd_file, sep="\t", header=None).iloc[:, 1:19]
df_pd.columns = [f"FSR_{i+1}" for i in range(18)]

df_healthy = pd.read_csv(healthy_file, sep="\t", header=None).iloc[:, 1:19]
df_healthy.columns = [f"FSR_{i+1}" for i in range(18)]

#  Plot overlaid signals 
plt.figure(figsize=(15, 30))

for i in range(18):
    plt.subplot(9, 2, i+1)
    plt.plot(df_pd.iloc[:, i], label="PD", color='r')
    plt.plot(df_healthy.iloc[:, i], label="Healthy", color='g', alpha=0.7)
    plt.title(f"Sensor {i+1} - PD vs Healthy")
    plt.xlabel("Time steps")
    plt.ylabel("Force (N)")
    plt.legend()

plt.tight_layout()
plt.show()
#red-pd
#green-healthy

In [None]:
#single image taking average of all sensors
import matplotlib.pyplot as plt

# Average across all sensors
pd_avg = df_pd.mean(axis=1)
healthy_avg = df_healthy.mean(axis=1)

plt.figure(figsize=(10,4))
plt.plot(pd_avg, label="PD", color='r')
plt.plot(healthy_avg, label="Healthy", color='g', alpha=0.7)
plt.title("Average Gait Wave - PD vs Healthy")
plt.xlabel("Time steps")
plt.ylabel("Force (N)")
plt.legend()
plt.tight_layout()

plt.show()
