# Install and import libraries

In [57]:
%pip install pydot
%pip install tensorflow
%pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [58]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle

import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from pathlib import Path

from constants import (
    DATA_INPUT_PATH,
    MODEL_PATH,
    METADATA_PATH,
)

from constants import CLASSES
num_classes = len(CLASSES)

## Convert .csv(s) to dataframes and concatenate

In [59]:
from pathlib import Path
import pandas as pd

dfs = []

# Set your input path
DATA_INPUT_PATH = Path(DATA_INPUT_PATH)  

# Find CSV files
files = list(DATA_INPUT_PATH.glob("*.csv"))
print("Looking in:", DATA_INPUT_PATH)
print("Found CSVs:", files)

if not files:
    raise FileNotFoundError(f"No CSV files found in {DATA_INPUT_PATH.resolve()}")

# Read all files
for file in files:
    df = pd.read_csv(file)
    dfs.append(df)

# Concatenate into one DataFrame
df = pd.concat(dfs, axis=0)

# Reorder/select columns (if applicable)

# Remove duplicates
print(f"Shape before dropping duplicates: {df.shape}")
df = df.drop_duplicates()
print(f"Shape after dropping duplicates: {df.shape}")


Looking in: data
Found CSVs: [WindowsPath('data/physio_emg_imu_data_20250429_001144.csv'), WindowsPath('data/physio_emg_imu_data_20250429_174128.csv'), WindowsPath('data/physio_emg_imu_data_20250429_175521.csv'), WindowsPath('data/physio_emg_imu_data_20250430_002614.csv')]
Shape before dropping duplicates: (42037, 16)
Shape after dropping duplicates: (42037, 16)


In [60]:
# --- NEW CELL: Calculate Calibration Stats from 'Rest' Data ---
import numpy as np
from constants import NUM_EMG_SENSORS # Make sure NUM_EMG_SENSORS is defined in constants.py (should be 8)

print("Calculating calibration statistics from 'Rest' data...")

# Select only the 'Rest' data (gesture_id == 0)
rest_df = df[df['gesture_id'] == 0].copy()

# Select only the EMG columns (s1 to s8)
emg_columns = [f"s{i}" for i in range(1, NUM_EMG_SENSORS + 1)]

# Select only IMU columns (quat)
imu_columns = ["quat_w","quatx","quaty","quatz"]

# Get landmark data
landmark_columns = [c for c in df.columns if c.endswith(("_x","_y","_z"))]
print(f"Landmark columns are: {landmark_columns}")

rest_emg_data = rest_df[emg_columns].values # Get as numpy array

if len(rest_emg_data) > 0:
    # Calculate mean and std dev for each EMG channel (column-wise)
    calibration_mean = np.mean(rest_emg_data, axis=0)
    calibration_std = np.std(rest_emg_data, axis=0)

    # Avoid division by zero: set std dev to 1 if it's 0 or very close to 0
    calibration_std[calibration_std < 1e-6] = 1.0

    print(f"Calculated Calibration Mean (shape {calibration_mean.shape}):\n{np.round(calibration_mean, 2)}")
    print(f"Calculated Calibration StdDev (shape {calibration_std.shape}):\n{np.round(calibration_std, 2)}")
else:
    print("ERROR: No 'Rest' data found to calculate calibration statistics!")
    # Handle this error appropriately - maybe exit or use default values
    # Using default values for now, but this indicates a data problem
    calibration_mean = np.zeros(NUM_EMG_SENSORS)
    calibration_std = np.ones(NUM_EMG_SENSORS)
    print("Using default calibration stats (mean=0, std=1). CHECK YOUR DATA.")

# Ensure these variables are available for the next cell
# (They will be if run in the same kernel session)
# --- END OF NEW CELL ---

Calculating calibration statistics from 'Rest' data...
Landmark columns are: ['quat_x', 'quat_y', 'quat_z']
Calculated Calibration Mean (shape (8,)):
[78.91 52.52 40.85 40.06 60.6  77.78 71.22 61.55]
Calculated Calibration StdDev (shape (8,)):
[78.76 51.92 19.09 13.64 29.51 71.93 43.05 41.06]


In [61]:
import numpy as np
from constants import WINDOW_SIZE, WINDOW_STEP, NUM_EMG_SENSORS
from sklearn.preprocessing import StandardScaler

# assume df, calibration_mean, calibration_std, landmark_columns etc. are already defined

# define a real scaler object
scaler = StandardScaler()
scaler.mean_, scaler.scale_ = calibration_mean, calibration_std

# build raw windows
channels = [f"s{i}" for i in range(1, NUM_EMG_SENSORS+1)] + ["quat_w","quatx","quaty","quatz"]
raw       = df[channels].values
landmarks = df[landmark_columns].values

X_win, Y = [], []
for start in range(0, len(raw) - WINDOW_SIZE + 1, WINDOW_STEP):
    win = raw[start:start+WINDOW_SIZE]
    emg = win[:,:NUM_EMG_SENSORS]
    imu = win[:,NUM_EMG_SENSORS:]
    # use scaler here
    norm_emg = scaler.transform(emg)
    win_norm = np.concatenate([norm_emg, imu], axis=1)
    X_win.append(win_norm)
    center = start + WINDOW_SIZE//2
    Y.append(landmarks[center])

X_win = np.stack(X_win)
Y     = np.array(Y)
print("All windows X_win shape:", X_win.shape)
print("All targets   Y    shape:", Y.shape)


KeyError: "['quatx', 'quaty', 'quatz'] not in index"

## Scale, split, and one-shot encode RMS feature windows

In [None]:
from sklearn.model_selection import train_test_split

# 1) train+val vs test
X_trainval, X_test, Y_trainval, Y_test = train_test_split(
    X_win, Y, test_size=0.20, random_state=42
)
# 2) train vs val
X_train, X_val, Y_train, Y_val = train_test_split(
    X_trainval, Y_trainval, test_size=0.25, random_state=42
)

print("Train:", X_train.shape, Y_train.shape)
print("Val:  ", X_val.shape,   Y_val.shape)
print("Test: ", X_test.shape,  Y_test.shape)

Train: (252, 30, 12) (252, 72)
Val:   (84, 30, 12) (84, 72)
Test:  (84, 30, 12) (84, 72)


## Build, compile & train a simple dense classifier

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

# --- BEGIN shape‐fix (unchanged) -------
def ensure_3d(X, window_size, n_channels):
    if X.ndim == 3:
        return X
    if X.ndim == 2 and X.shape[1] == n_channels:
        n_samples = X.shape[0]
        return X.reshape(n_samples, window_size, n_channels)
    raise ValueError(f"Cannot reshape array of shape {X.shape} into 3D with {window_size}×{n_channels}")

X_train = ensure_3d(X_train, WINDOW_SIZE, 12)
X_val   = ensure_3d(X_val,   WINDOW_SIZE, 12)
X_test  = ensure_3d(X_test,  WINDOW_SIZE, 12)

print("After reshape —")
print(" Train:", X_train.shape)
print(" Val:  ", X_val.shape)
print(" Test: ", X_test.shape)
# --- END shape‐fix -------

# dynamically pick up how many outputs your Y has
n_outputs = Y_train.shape[1]
print("Your network will produce", n_outputs, "values per window (should equal Y.shape[1])")

model = Sequential([
    Conv1D(32, kernel_size=3, activation='relu', input_shape=(WINDOW_SIZE, 12)),
    MaxPooling1D(pool_size=2),
    BatchNormalization(),

    Conv1D(64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    BatchNormalization(),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),

    # match the number of target‐dimensions
    Dense(n_outputs, activation="linear")
])

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

model.compile(
    optimizer=Adam(1e-3),
    loss='mse',
    metrics=['mae']
)

history = model.fit(
    X_train, Y_train,
    validation_data=(X_val, Y_val),
    epochs=100,
    batch_size=64,
    callbacks=[early_stopping]
)

test_mse, test_mae = model.evaluate(X_test, Y_test)
print(f"Final test MSE: {test_mse:.4f}, MAE: {test_mae:.4f}")


After reshape —
 Train: (252, 30, 12)
 Val:   (84, 30, 12)
 Test:  (84, 30, 12)
Your network will produce 72 values per window (should equal Y.shape[1])
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 75ms/step - loss: 0.6342 - mae: 0.5897 - val_loss: 0.8875 - val_mae: 0.6279
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.2977 - mae: 0.4197 - val_loss: 0.3477 - val_mae: 0.4213
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.1947 - mae: 0.3415 - val_loss: 0.2212 - val_mae: 0.3508
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 0.1460 - mae: 0.3001 - val_loss: 0.1785 - val_mae: 0.3220
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 0.1210 - mae: 0.2720 - val_loss: 0.1472 - val_mae: 0.2966
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.1016 - mae: 0.2503 - val_loss: 0.1245 - val_mae: 0.2740
Epoch 7/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0900 - mae: 0.2348

## Save the model and the scalar + feature list

In [None]:
# --- MODIFIED CELL 8: Save Model and Metadata ---
# make sure the folder exists
(Path(MODEL_PATH).parent).mkdir(parents=True, exist_ok=True)

# Save the trained model (this line is unchanged)

# CHANGED TO TEMP PATH, REPLACE WITH MODEL_PATH LATER
model.save(MODEL_PATH)
print("Saved model to", MODEL_PATH)

# pickle the scaler, feature names, AND calibration stats

# REPLACE WITH MODEL_PATH
with open(MODEL_PATH, "wb") as f:
    # channels list we used for X is:
    feature_names = [f"s{i}" for i in range(1, NUM_EMG_SENSORS + 1)] + ["quat_w", "quat_x", "quat_y", "quat_z"]
    # <<< Save all three items in a tuple >>>
    try:
        pickle.dump((scaler, feature_names, calibration_mean, calibration_std), f)

        # REPLACE WITH METADATA_PATH
        print("Saved metadata (scaler, features, calib_mean, calib_std) to", METADATA_PATH)
    except NameError:
        print("ERROR: Could not save metadata. Was calibration_mean/std calculated earlier?")
    except Exception as e:
        print(f"ERROR saving metadata: {e}")


# --- END OF MODIFIED CELL 8 ---



Saved model to model\physio_model.h5
Saved metadata (scaler, features, calib_mean, calib_std) to model-v4\model_temp\physio_temp_meta.pkl


In [None]:
import os
from constants import DATA_INPUT_PATH, MODEL_INPUT_PATH, MODEL_PATH, METADATA_PATH

print("DATA_INPUT_PATH ->", DATA_INPUT_PATH)
print("  contains:", os.listdir(DATA_INPUT_PATH))
print()
print("MODEL_INPUT_PATH ->", MODEL_INPUT_PATH)
print("  contains:", os.listdir(MODEL_INPUT_PATH))
print()
print("MODEL_PATH       ->", MODEL_PATH, "exists?", os.path.exists(MODEL_PATH))
print("METADATA_PATH    ->", METADATA_PATH, "exists?", os.path.exists(METADATA_PATH))


DATA_INPUT_PATH -> data
  contains: ['artemis_archive', 'convert_old_data.py', 'physio_emg_imu_data_20250429_001144.csv', 'physio_emg_imu_data_20250429_174128.csv', 'physio_emg_imu_data_20250429_175521.csv', 'physio_emg_imu_data_20250430_002614.csv']

MODEL_INPUT_PATH -> model
  contains: ['artemis_archive', 'physio_metadata.pkl', 'physio_model.h5']

MODEL_PATH       -> model\physio_model.h5 exists? True
METADATA_PATH    -> model\physio_metadata.pkl exists? True


In [None]:
import constants
print(constants.__file__)


c:\Users\blacb\Documents\GitHub\NeuroSyn\model-v4\constants.py


In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
from tensorflow.keras.models import load_model
import pickle

from constants import DATA_INPUT_PATH, MODEL_PATH, METADATA_PATH, CLASSES, WINDOW_SIZE

# — 1) load the freshest CSV —
data_dir = Path(DATA_INPUT_PATH)
csv_path = max(data_dir.glob("*.csv"), key=lambda p: p.stat().st_mtime)
df      = pd.read_csv(csv_path)

# — 2) load scaler + feature names —
with open(METADATA_PATH, "rb") as f:
    scaler, feature_names, calibration_mean, calibration_std = pickle.load(f)

# — 3) load your trained model —
model = load_model(MODEL_PATH)

# — 4) for each class, grab the first 100 consecutive rows of that gesture,
#         compute the 12-dim RMS feature vector, and predict —
demo_rows = []
for gid, name in CLASSES.items():
    sub = df[df["gesture_id"] == gid]
    if len(sub) < WINDOW_SIZE:
        print(f"⚠️  not enough samples for '{name}' (need {WINDOW_SIZE}, got {len(sub)})")
        continue
    
    window  = sub[feature_names].values[:WINDOW_SIZE]
    # Separate EMG/IMU parts from the 'window' variable
    emg_part = window[:, :NUM_EMG_SENSORS]
    imu_part = window[:, NUM_EMG_SENSORS:]

    # Apply Z-score normalization to EMG part using LOADED stats
    # (Make sure calibration_mean and calibration_std were loaded correctly earlier in this cell)
    try:
        normalized_emg_part = (emg_part - calibration_mean) / calibration_std
    except NameError:
        print(f"ERROR: calibration_mean/std not available for demo processing (Class {gid}). Skipping.")
        continue # Skip to the next class if stats are missing

    # Calculate RMS on normalized EMG and raw IMU
    rms_emg = np.sqrt(np.mean(normalized_emg_part**2, axis=0))
    rms_imu = np.sqrt(np.mean(imu_part**2, axis=0))

    # Concatenate to get the final feature vector for the demo window
    rms_vec = np.concatenate([rms_emg, rms_imu])

    demo_rows.append((gid, name, rms_vec))

# assemble our demo set
y_true = [gid for gid,_,_ in demo_rows]
X_demo = np.vstack([vec for *_, vec in demo_rows])

# — 5) scale & predict —
X_scaled = scaler.transform(X_demo)
preds     = model.predict(X_scaled, verbose=0)
pred_ids  = preds.argmax(axis=1)

# — 6) report —
print("Demo results:")
for (true_id, true_name, _), pred in zip(demo_rows, pred_ids):
    print(f" • True: {true_id:2d} {true_name:17s} → Predicted: {pred:2d} {CLASSES[pred]}")
    
acc = np.mean(np.array(pred_ids) == np.array(y_true))
print(f"\nDemo accuracy (RMS‐window per class): {acc:.0%}")




Demo results:
 • True:  0 Rest              → Predicted:  0 Rest
 • True:  1 Wrist Flexion     → Predicted:  1 Wrist Flexion
 • True:  2 Wrist Extension   → Predicted:  2 Wrist Extension
 • True:  3 Elbow Flexion     → Predicted:  2 Wrist Extension
 • True:  4 Elbow Extension   → Predicted:  3 Elbow Flexion
 • True:  5 Hand Close        → Predicted:  5 Hand Close
 • True:  6 Hand Open         → Predicted:  6 Hand Open
 • True:  7 Forearm Pronation → Predicted:  7 Forearm Pronation
 • True:  8 Forearm Supination → Predicted:  8 Forearm Supination

Demo accuracy (RMS‐window per class): 78%
