# Install and import libraries

In [1]:
%pip install pydot
%pip install tensorflow
%pip install scikit-learn





[notice] A new release of pip is available: 25.0.1 -> 25.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle

import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from pathlib import Path

from constants import (
    DATA_INPUT_PATH,
    MODEL_PATH,
    METADATA_PATH,
)

from constants import CLASSES
num_classes = len(CLASSES)

## Convert .csv(s) to dataframes and concatenate

In [3]:
# --- Existing code before the loop ---
from pathlib import Path
import pandas as pd

dfs = []

# Set your input path
DATA_INPUT_PATH = Path(DATA_INPUT_PATH)

# Find CSV files
files = list(DATA_INPUT_PATH.glob("*.csv"))
print("Looking in:", DATA_INPUT_PATH)
print("Found CSVs:", files)

if not files:
    raise FileNotFoundError(f"No CSV files found in {DATA_INPUT_PATH.resolve()}")

# --- NEW Modified Loop ---
print("\nProcessing files and removing rows with incomplete landmarks:")
for file in files:
    print(f"  Reading: {file.name}")
    # Read a single CSV
    df_single = pd.read_csv(file)
    initial_rows = len(df_single)

    # Define landmark columns based on THIS file's columns
    landmark_columns = [c for c in df_single.columns if c.endswith(("_x_shldr_norm", "_y_shldr_norm", "_z_shldr_norm"))]

    if not landmark_columns:
         print(f"    WARNING: No landmark columns found in {file.name}. Skipping filtering for this file.")
    else:
        # Drop rows with ANY NaN in the landmark columns for THIS file
        df_single.dropna(subset=landmark_columns, inplace=True)
        dropped_rows = initial_rows - len(df_single)
        if dropped_rows > 0:
            print(f"    Dropped {dropped_rows} rows due to missing landmarks.")

    # Append the cleaned DataFrame (or original if no landmarks found/dropped)
    if len(df_single) > 0:
        dfs.append(df_single)
    else:
        print(f"    WARNING: No rows remaining in {file.name} after filtering. Skipping this file.")


# --- Existing code after the loop ---
# Concatenate into one DataFrame (only if dfs list is not empty)
if not dfs:
     raise ValueError("No dataframes to concatenate after filtering. Check input files and filtering logic.")

df = pd.concat(dfs, axis=0)

# Remove duplicates (optional, but good practice)
print(f"\nShape before dropping duplicates across all files: {df.shape}")
df = df.drop_duplicates()
print(f"Shape after dropping duplicates: {df.shape}")

# --- You no longer need the df.dropna() call here specifically for landmarks ---
# --- as it was handled file-by-file above                                ---

Looking in: data
Found CSVs: [WindowsPath('data/physio_emg_imu_data_20250504_215905.csv')]

Processing files and removing rows with incomplete landmarks:
  Reading: physio_emg_imu_data_20250504_215905.csv
    Dropped 155 rows due to missing landmarks.

Shape before dropping duplicates across all files: (1985, 91)
Shape after dropping duplicates: (1985, 91)


In [4]:
# --- NEW CELL: Calculate Calibration Stats from 'Rest' Data ---
import numpy as np
from constants import NUM_EMG_SENSORS # Make sure NUM_EMG_SENSORS is defined in constants.py (should be 8)

print("Calculating calibration statistics from 'Rest' data...")

# Select only the 'Rest' data (gesture_id == 0)
rest_df = df[df['gesture_id'] == 0].copy()

# Select only the EMG columns (s1 to s8)
emg_columns = [f"s{i}_norm" for i in range(1, NUM_EMG_SENSORS + 1)]

# Select only IMU columns (quat)
imu_columns = ["quat_w","quatx","quaty","quatz"]

#select landmark columns based on your CSV header
landmark_columns = [c for c in df.columns if c.endswith(("_x_shldr_norm", "_y_shldr_norm", "_z_shldr_norm"))]
print(f"Landmark columns found ({len(landmark_columns)}): {landmark_columns}") # Add this print statement to verify

rest_emg_data = rest_df[emg_columns].values # Get as numpy array

if len(rest_emg_data) > 0:
    # Calculate mean and std dev for each EMG channel (column-wise)
    calibration_mean = np.mean(rest_emg_data, axis=0)
    calibration_std = np.std(rest_emg_data, axis=0)

    # Avoid division by zero: set std dev to 1 if it's 0 or very close to 0
    calibration_std[calibration_std < 1e-6] = 1.0

    print(f"Calculated Calibration Mean (shape {calibration_mean.shape}):\n{np.round(calibration_mean, 2)}")
    print(f"Calculated Calibration StdDev (shape {calibration_std.shape}):\n{np.round(calibration_std, 2)}")
else:
    print("ERROR: No 'Rest' data found to calculate calibration statistics!")
    # Handle this error appropriately - maybe exit or use default values
    # Using default values for now, but this indicates a data problem
    calibration_mean = np.zeros(NUM_EMG_SENSORS)
    calibration_std = np.ones(NUM_EMG_SENSORS)
    print("Using default calibration stats (mean=0, std=1). CHECK YOUR DATA.")

# Ensure these variables are available for the next cell
# (They will be if run in the same kernel session)
# --- END OF NEW CELL ---

Calculating calibration statistics from 'Rest' data...
Landmark columns found (72): ['Pose_L_Shoulder_x_shldr_norm', 'Pose_L_Shoulder_y_shldr_norm', 'Pose_L_Shoulder_z_shldr_norm', 'Pose_L_Elbow_x_shldr_norm', 'Pose_L_Elbow_y_shldr_norm', 'Pose_L_Elbow_z_shldr_norm', 'Pose_L_Wrist_x_shldr_norm', 'Pose_L_Wrist_y_shldr_norm', 'Pose_L_Wrist_z_shldr_norm', 'Hand_R_Wrist_x_shldr_norm', 'Hand_R_Wrist_y_shldr_norm', 'Hand_R_Wrist_z_shldr_norm', 'Hand_R_Thumb_CMC_x_shldr_norm', 'Hand_R_Thumb_CMC_y_shldr_norm', 'Hand_R_Thumb_CMC_z_shldr_norm', 'Hand_R_Thumb_MCP_x_shldr_norm', 'Hand_R_Thumb_MCP_y_shldr_norm', 'Hand_R_Thumb_MCP_z_shldr_norm', 'Hand_R_Thumb_IP_x_shldr_norm', 'Hand_R_Thumb_IP_y_shldr_norm', 'Hand_R_Thumb_IP_z_shldr_norm', 'Hand_R_Thumb_Tip_x_shldr_norm', 'Hand_R_Thumb_Tip_y_shldr_norm', 'Hand_R_Thumb_Tip_z_shldr_norm', 'Hand_R_Index_MCP_x_shldr_norm', 'Hand_R_Index_MCP_y_shldr_norm', 'Hand_R_Index_MCP_z_shldr_norm', 'Hand_R_Index_PIP_x_shldr_norm', 'Hand_R_Index_PIP_y_shldr_norm', 

In [5]:
import numpy as np
from constants import WINDOW_SIZE, WINDOW_STEP, NUM_EMG_SENSORS
from sklearn.preprocessing import StandardScaler

# assume df, calibration_mean, calibration_std, landmark_columns etc. are already defined

# define a real scaler object
scaler = StandardScaler()
scaler.mean_, scaler.scale_ = calibration_mean, calibration_std

# build raw windows
channels = [f"s{i}_norm" for i in range(1, NUM_EMG_SENSORS+1)] + ["quat_w","quatx","quaty","quatz"]
raw       = df[channels].values
landmarks = df[landmark_columns].values

X_win, Y = [], []
for start in range(0, len(raw) - WINDOW_SIZE + 1, WINDOW_STEP):
    win = raw[start:start+WINDOW_SIZE]
    emg = win[:,:NUM_EMG_SENSORS]
    imu = win[:,NUM_EMG_SENSORS:]
    # use scaler here
    norm_emg = scaler.transform(emg)
    win_norm = np.concatenate([norm_emg, imu], axis=1)
    X_win.append(win_norm)
    center = start + WINDOW_SIZE//2
    Y.append(landmarks[center])

X_win = np.stack(X_win)
Y     = np.array(Y)
print("All windows X_win shape:", X_win.shape)
print("All targets   Y    shape:", Y.shape)


All windows X_win shape: (79, 30, 12)
All targets   Y    shape: (79, 72)


## Scale, split, and one-shot encode RMS feature windows

In [6]:
from sklearn.model_selection import train_test_split

# 1) train+val vs test
X_trainval, X_test, Y_trainval, Y_test = train_test_split(
    X_win, Y, test_size=0.20, random_state=42
)
# 2) train vs val
X_train, X_val, Y_train, Y_val = train_test_split(
    X_trainval, Y_trainval, test_size=0.25, random_state=42
)

print("Train:", X_train.shape, Y_train.shape)
print("Val:  ", X_val.shape,   Y_val.shape)
print("Test: ", X_test.shape,  Y_test.shape)

Train: (47, 30, 12) (47, 72)
Val:   (16, 30, 12) (16, 72)
Test:  (16, 30, 12) (16, 72)


## Build, compile & train a simple dense classifier

In [17]:
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import MeanAbsoluteError
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

# --- BEGIN shape‐fix (unchanged) -------
def ensure_3d(X, window_size, n_channels):
    if X.ndim == 3:
        return X
    if X.ndim == 2 and X.shape[1] == n_channels:
        n_samples = X.shape[0]
        return X.reshape(n_samples, window_size, n_channels)
    raise ValueError(f"Cannot reshape array of shape {X.shape} into 3D with {window_size}×{n_channels}")

X_train = ensure_3d(X_train, WINDOW_SIZE, 12)
X_val   = ensure_3d(X_val,   WINDOW_SIZE, 12)
X_test  = ensure_3d(X_test,  WINDOW_SIZE, 12)

print("After reshape —")
print(" Train:", X_train.shape)
print(" Val:  ", X_val.shape)
print(" Test: ", X_test.shape)
# --- END shape‐fix -------

# dynamically pick up how many outputs your Y has
n_outputs = Y_train.shape[1]
print("Your network will produce", n_outputs, "values per window (should equal Y.shape[1])")

model = Sequential([
    Conv1D(32, kernel_size=3, activation='relu', input_shape=(WINDOW_SIZE, 12)),
    MaxPooling1D(pool_size=2),
    BatchNormalization(),

    Conv1D(64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    BatchNormalization(),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),

    # match the number of target‐dimensions
    Dense(n_outputs, activation="linear")
])

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

model.compile(
    optimizer=Adam(1e-3),
    loss=MeanSquaredError(),
    metrics=[MeanAbsoluteError()]
)

# Check for NaN/inf in input data
print(f"NaN in X_train: {np.any(np.isnan(X_train))}")
print(f"Inf in X_train: {np.any(np.isinf(X_train))}")
print(f"NaN in X_val:   {np.any(np.isnan(X_val))}")
print(f"Inf in X_val:   {np.any(np.isinf(X_val))}")

# Check for NaN/inf in target data
print(f"NaN in Y_train: {np.any(np.isnan(Y_train))}")
print(f"Inf in Y_train: {np.any(np.isinf(Y_train))}")
print(f"NaN in Y_val:   {np.any(np.isnan(Y_val))}")
print(f"Inf in Y_val:   {np.any(np.isinf(Y_val))}")

# Also check the ranges
print(f"X_train min/max: {np.min(X_train):.2f} / {np.max(X_train):.2f}")
print(f"Y_train min/max: {np.min(Y_train):.2f} / {np.max(Y_train):.2f}")

history = model.fit(
    X_train, Y_train,
    validation_data=(X_val, Y_val),
    epochs=100,
    batch_size=8,
    callbacks=[early_stopping]
)

test_mse, test_mae = model.evaluate(X_test, Y_test)
print(f"Final test MSE: {test_mse:.4f}, MAE: {test_mae:.4f}")


After reshape —
 Train: (47, 30, 12)
 Val:   (16, 30, 12)
 Test:  (16, 30, 12)
Your network will produce 72 values per window (should equal Y.shape[1])
NaN in X_train: False
Inf in X_train: False
NaN in X_val:   False
Inf in X_val:   False
NaN in Y_train: False
Inf in Y_train: False
NaN in Y_val:   False
Inf in Y_val:   False
X_train min/max: -2.16 / 25.08
Y_train min/max: -0.80 / 0.44


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 76ms/step - loss: 0.3912 - mean_absolute_error: 0.4873 - val_loss: 0.0520 - val_mean_absolute_error: 0.1839
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.1785 - mean_absolute_error: 0.3301 - val_loss: 0.0360 - val_mean_absolute_error: 0.1554
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.1319 - mean_absolute_error: 0.2770 - val_loss: 0.0307 - val_mean_absolute_error: 0.1455
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 0.0834 - mean_absolute_error: 0.2211 - val_loss: 0.0278 - val_mean_absolute_error: 0.1389
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 0.0524 - mean_absolute_error: 0.1793 - val_loss: 0.0259 - val_mean_absolute_error: 0.1335
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss

## Save the model and the scalar + feature list

In [18]:
# --- MODIFIED CELL 8: Save Model and Metadata ---
# make sure the folder exists
(Path(MODEL_PATH).parent).mkdir(parents=True, exist_ok=True)


# Save the trained model
model.save(MODEL_PATH)
print("Saved model to", MODEL_PATH)

# Save the scaler, feature names, and calibration stats to METADATA_PATH
with open(METADATA_PATH, "wb") as f:
    feature_names = [f"s{i}_norm" for i in range(1, NUM_EMG_SENSORS + 1)] + ["quat_w", "quatx", "quaty", "quatz"]
    pickle.dump((scaler, feature_names, calibration_mean, calibration_std), f)
    print("Saved metadata (scaler, features, calib_mean, calib_std) to", METADATA_PATH)




Saved model to model\physio_model.h5
Saved metadata (scaler, features, calib_mean, calib_std) to model\physio_metadata.pkl


In [19]:
import os
from constants import DATA_INPUT_PATH, MODEL_INPUT_PATH, MODEL_PATH, METADATA_PATH

print("DATA_INPUT_PATH ->", DATA_INPUT_PATH)
print("  contains:", os.listdir(DATA_INPUT_PATH))
print()
print("MODEL_INPUT_PATH ->", MODEL_INPUT_PATH)
print("  contains:", os.listdir(MODEL_INPUT_PATH))
print()
print("MODEL_PATH       ->", MODEL_PATH, "exists?", os.path.exists(MODEL_PATH))
print("METADATA_PATH    ->", METADATA_PATH, "exists?", os.path.exists(METADATA_PATH))


DATA_INPUT_PATH -> data
  contains: ['artemis_archive', 'convert_old_data.py', 'physio_emg_imu_data_20250504_215905.csv']

MODEL_INPUT_PATH -> model
  contains: ['artemis_archive', 'physio_metadata.pkl', 'physio_model.h5']

MODEL_PATH       -> model\physio_model.h5 exists? True
METADATA_PATH    -> model\physio_metadata.pkl exists? True


In [20]:
import constants
print(constants.__file__)


c:\Users\simpl\Desktop\coding\NeuroSyn\model-v4\constants.py


In [25]:
import numpy as np
import pandas as pd
from pathlib import Path
from tensorflow.keras.models import load_model
import pickle

from constants import DATA_INPUT_PATH, MODEL_PATH, METADATA_PATH, CLASSES, WINDOW_SIZE, NUM_EMG_SENSORS

# 1) Load the freshest CSV
data_dir = Path(DATA_INPUT_PATH)
csv_path = max(data_dir.glob("*.csv"), key=lambda p: p.stat().st_mtime)
df = pd.read_csv(csv_path)

# 2) Load scaler + feature names
with open(METADATA_PATH, "rb") as f:
    scaler, feature_names, calibration_mean, calibration_std = pickle.load(f)

# 3) Load your trained model
model = load_model(MODEL_PATH, compile=False)

# 4) For each class, grab the first valid window and predict
demo_rows = []
for gid, name in CLASSES.items():
    sub = df[df["gesture_id"] == gid]
    if len(sub) < WINDOW_SIZE:
        print(f"⚠️  not enough samples for '{name}' (need {WINDOW_SIZE}, got {len(sub)})")
        continue

    # Get the first window for this gesture
    window = sub[feature_names].values[:WINDOW_SIZE]  # shape (30, 12)
    # Separate EMG/IMU parts
    emg_part = window[:, :NUM_EMG_SENSORS]
    imu_part = window[:, NUM_EMG_SENSORS:]

    # Normalize EMG part using loaded stats
    normalized_emg_part = (emg_part - calibration_mean) / calibration_std

    # Concatenate normalized EMG and raw IMU
    window_norm = np.concatenate([normalized_emg_part, imu_part], axis=1)  # shape (30, 12)
    demo_rows.append((gid, name, window_norm))

# Assemble demo set
y_true = [gid for gid, _, _ in demo_rows]
X_demo = np.stack([vec for *_, vec in demo_rows])  # shape (N, 30, 12)

# 5) Predict
preds = model.predict(X_demo, verbose=0)
# If this is a regression model, you may want to print the predictions directly.
# If it's a classification model, use argmax:
# pred_ids = preds.argmax(axis=1)

print("Demo results:")
for (true_id, true_name, _), pred in zip(demo_rows, preds):
    print(f" • True: {true_id:2d} {true_name:17s} → Predicted: {pred}")

# If you want to compute accuracy for classification:
# acc = np.mean(np.array(pred_ids) == np.array(y_true))
# print(f"\nDemo accuracy (window per class): {acc:.0%}")

⚠️  not enough samples for 'Wrist Flexion' (need 30, got 0)
⚠️  not enough samples for 'Elbow Extension' (need 30, got 0)
⚠️  not enough samples for 'Hand Open' (need 30, got 0)
Demo results:
 • True:  0 Rest              → Predicted: [-0.05292831  0.07073354  0.07009118  0.15387936  0.1850755  -0.18561047
  0.02915647  0.20194347 -0.47631323 -0.02728528  0.16244575  0.25640112
 -0.07797333  0.09802471  0.22471485 -0.04472312  0.2650513   0.22484468
 -0.0612967   0.1808976   0.00906982 -0.09162341  0.24221587  0.10663012
 -0.11510002  0.16116682  0.3088908  -0.1302991   0.1391977   0.10235903
 -0.16996616  0.20987767  0.15430072 -0.10079712  0.19445735  0.16658682
 -0.10289496  0.22714487  0.16622005 -0.09545696  0.24737702  0.1678516
  0.02294839  0.26204786  0.05826973 -0.06669929  0.22454369  0.17451546
 -0.06131237  0.2505746   0.08363113 -0.03796579  0.19722764  0.05711849
 -0.00330704  0.21310142  0.12974313 -0.04157147  0.22107364  0.0658411
  0.01679794  0.26168633  0.12070472 