# Squat quality scoring with TensorFlow

This notebook trains a small BlazePose + LSTM regressor on your local squat videos and predicts a quality score per clip.


## 1) Environment setup
- Installs TensorFlow + MediaPipe + OpenCV.
- Use your own data/squats_train and data/squats_test folders; no downloads required.


In [None]:
!python -m pip install --upgrade pip
!python -m pip install "tensorflow<2.17" tensorflow-io jupyter
!python -m pip install mediapipe opencv-python


## 2) Imports and configuration
- Adjust paths or scoring scale if needed.
- Scores are expected in data/squat_scores.csv.


In [None]:
import os
import random
import csv
from pathlib import Path

import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

BATCH_SIZE = 4
NUM_FRAMES = 16
IMG_SIZE = 160
NUM_LANDMARKS = 33  # BlazePose outputs 33 landmarks
LANDMARK_DIMS = 4   # x, y, z, visibility
SCORE_SCALE = 100.0  # labels are 0-100; model trains on 0-1 internally

DATA_ROOT = Path("data")
TRAIN_DIR = DATA_ROOT / "squats_train"
TEST_DIR = DATA_ROOT / "squats_test"
LABELS_PATH = DATA_ROOT / "squat_scores.csv"
MODEL_DIR = Path("checkpoints")

VIDEO_EXTS = (".mp4", ".mov", ".avi", ".mkv")

MODEL_DIR.mkdir(parents=True, exist_ok=True)
TRAIN_DIR.mkdir(parents=True, exist_ok=True)
TEST_DIR.mkdir(parents=True, exist_ok=True)

print(tf.__version__)
print("Data root:", DATA_ROOT.resolve())
print("Model dir:", MODEL_DIR.resolve())


## 3) Prepare local data and labels
- A CSV template is generated listing every video under data/squats_train and data/squats_test.
- Fill in the score column (0-100). At least two labeled train videos are required to run.


In [None]:
def list_videos(root: Path):
    return sorted(
        p for p in root.rglob("*")
        if p.suffix.lower() in VIDEO_EXTS and p.is_file()
    )


def ensure_label_file():
    existing = {}
    if LABELS_PATH.exists():
        with LABELS_PATH.open("r", newline="", encoding="utf-8") as f:
            reader = csv.DictReader(f)
            for row in reader:
                existing[row.get("relative_path", "")] = row.get("score", "")

    rows = []
    for p in list_videos(TRAIN_DIR) + list_videos(TEST_DIR):
        rel = p.relative_to(DATA_ROOT).as_posix()
        rows.append({"relative_path": rel, "score": existing.get(rel, "")})

    LABELS_PATH.parent.mkdir(parents=True, exist_ok=True)
    with LABELS_PATH.open("w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["relative_path", "score"])
        writer.writeheader()
        writer.writerows(rows)

    print(f"Label file ready at {LABELS_PATH}. Fill in 'score' (0-{int(SCORE_SCALE)}) for each row.")
    return rows


_ = ensure_label_file()


### Label summary / sanity check


In [None]:
def load_labeled_samples():
    samples = []
    missing = []
    with LABELS_PATH.open("r", newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            rel = row.get("relative_path", "")
            score_str = row.get("score", "").strip()
            if not rel:
                continue

            full = DATA_ROOT / rel
            if not full.exists():
                missing.append(rel)
                continue

            if not score_str:
                continue

            try:
                score = float(score_str)
            except ValueError:
                print(f"Skipping {rel}: invalid score '{score_str}'")
                continue

            score = max(0.0, min(SCORE_SCALE, score))
            samples.append((str(full), score))

    if missing:
        print("Warning: paths not found on disk:", missing)

    print(f"Loaded {len(samples)} labeled samples.")
    return samples


labeled_samples = load_labeled_samples()
if len(labeled_samples) < 2:
    raise ValueError("Add scores in the CSV (at least 2 labeled videos) before training.")


## 4) Build a TensorFlow input pipeline
- Uniformly sample frames, run BlazePose to get 33 landmarks per frame, normalize, and emit flattened keypoints.
- Labels are normalized to 0-1 during training; final scores are rescaled to 0-100.


In [None]:
mp_pose = mp.solutions.pose
pose_detector = mp_pose.Pose(
    static_image_mode=False,
    model_complexity=1,
    enable_segmentation=False,
    smooth_landmarks=True,
)


def _sample_frame_indices(total_frames: int, num_target: int) -> np.ndarray:
    if total_frames <= 0:
        return np.zeros((num_target,), dtype=np.int32)
    idxs = np.linspace(0, max(total_frames - 1, 0), num_target).astype(np.int32)
    return idxs


def _normalize_landmarks(landmarks: np.ndarray) -> np.ndarray:
    """Center on hips, scale by torso/hip distance, rotate so hips are horizontal."""
    left_hip, right_hip = landmarks[23, :3], landmarks[24, :3]
    left_shoulder, right_shoulder = landmarks[11, :3], landmarks[12, :3]

    center_hip = (left_hip + right_hip) / 2.0
    center_shoulder = (left_shoulder + right_shoulder) / 2.0
    torso = np.linalg.norm(center_shoulder[:2] - center_hip[:2])
    hip_dist = np.linalg.norm(left_hip[:2] - right_hip[:2])
    scale = max(torso, hip_dist, 1e-3)

    landmarks[:, :3] = (landmarks[:, :3] - center_hip) / scale

    hip_vec = right_hip[:2] - left_hip[:2]
    angle = np.arctan2(hip_vec[1], hip_vec[0] + 1e-6)
    cos_a, sin_a = np.cos(-angle), np.sin(-angle)
    rot = np.array([[cos_a, -sin_a], [sin_a, cos_a]], dtype=np.float32)
    landmarks[:, :2] = landmarks[:, :2] @ rot.T
    return landmarks


def _extract_keypoints_np(video_path: str) -> np.ndarray:
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()

    num_frames = len(frames)
    keypoints = np.zeros((NUM_FRAMES, NUM_LANDMARKS, LANDMARK_DIMS), dtype=np.float32)
    if num_frames == 0:
        return keypoints

    idxs = _sample_frame_indices(num_frames, NUM_FRAMES)
    for out_i, frame_idx in enumerate(idxs):
        frame = frames[int(frame_idx)]
        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = pose_detector.process(image_rgb)
        if results.pose_landmarks:
            lm = results.pose_landmarks.landmark
            coords = np.array([[p.x, p.y, p.z, p.visibility] for p in lm], dtype=np.float32)
            coords = _normalize_landmarks(coords)
            keypoints[out_i] = coords
    return keypoints


def load_keypoints(path: tf.Tensor) -> tf.Tensor:
    def _py_decode(p):
        return _extract_keypoints_np(p.numpy().decode("utf-8"))

    kpts = tf.py_function(_py_decode, [path], tf.float32)
    kpts.set_shape((NUM_FRAMES, NUM_LANDMARKS, LANDMARK_DIMS))
    return kpts


def preprocess(path: tf.Tensor, score: tf.Tensor) -> tuple[tf.Tensor, tf.Tensor]:
    keypoints = load_keypoints(path)
    keypoints = tf.reshape(keypoints, (NUM_FRAMES, NUM_LANDMARKS * LANDMARK_DIMS))
    score = tf.cast(score, tf.float32) / SCORE_SCALE
    score = tf.expand_dims(score, axis=-1)
    return keypoints, score


def build_tf_dataset(samples, training: bool):
    paths, scores = zip(*samples)
    ds = tf.data.Dataset.from_tensor_slices((list(paths), list(scores)))
    if training:
        ds = ds.shuffle(buffer_size=len(paths), seed=SEED, reshuffle_each_iteration=True)
    ds = ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    return ds


def _in_dir(path_str: str, root: Path) -> bool:
    try:
        Path(path_str).resolve().relative_to(root.resolve())
        return True
    except ValueError:
        return False


train_samples = [s for s in labeled_samples if _in_dir(s[0], TRAIN_DIR)]
test_samples = [s for s in labeled_samples if _in_dir(s[0], TEST_DIR)]
if len(train_samples) < 2:
    raise ValueError("Need at least 2 labeled train videos in squats_train for a train/val split.")

random.shuffle(train_samples)
split = max(1, int(0.8 * len(train_samples)))
if split >= len(train_samples):
    split = len(train_samples) - 1

train_ds = build_tf_dataset(train_samples[:split], training=True)
val_ds = build_tf_dataset(train_samples[split:], training=False)
test_ds = build_tf_dataset(test_samples, training=False) if test_samples else None

print("Train batches:", len(train_ds))
print("Val batches:", len(val_ds))
print("Test batches:", len(test_ds) if test_ds is not None else 0)


## 5) Define a lightweight regression model
- BiLSTM + pooling over landmark sequences; single sigmoid output predicts normalized score.


In [None]:
def build_model() -> tf.keras.Model:
    inputs = tf.keras.Input(shape=(NUM_FRAMES, NUM_LANDMARKS * LANDMARK_DIMS))
    x = tf.keras.layers.Masking(mask_value=0.0)(inputs)
    x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True))(x)
    x = tf.keras.layers.GlobalAveragePooling1D()(x)
    x = tf.keras.layers.Dense(128, activation="relu")(x)
    x = tf.keras.layers.Dropout(0.3)(x)
    x = tf.keras.layers.Dense(64, activation="relu")(x)
    outputs = tf.keras.layers.Dense(1, activation="sigmoid")(x)  # normalized score 0-1
    return tf.keras.Model(inputs, outputs)


model = build_model()
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss="mse",
    metrics=[tf.keras.metrics.MeanAbsoluteError(name="mae")],
)
model.summary()


## 6) Train
- Early stopping on validation MAE.


In [None]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True, monitor="val_mae"),
    tf.keras.callbacks.ModelCheckpoint(str(MODEL_DIR / "model.keras"), save_best_only=True, monitor="val_mae"),
]

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=20,
    callbacks=callbacks,
)

best_val_mae = min(history.history["val_mae"])
print("Best val MAE (normalized 0-1):", best_val_mae)
print("Best val MAE (score units):", best_val_mae * SCORE_SCALE)


## 7) Evaluate and save artifacts


In [None]:
eval_target = test_ds if test_ds is not None else val_ds
eval_results = model.evaluate(eval_target, return_dict=True)
print(eval_results)
print(f"MAE in score units: {eval_results['mae'] * SCORE_SCALE:.2f}")

export_dir = MODEL_DIR / "squat_scorer.keras"
model.save(export_dir)
with (MODEL_DIR / "score_scale.txt").open("w", encoding="utf-8") as f:
    f.write(str(SCORE_SCALE))

print("Artifacts saved to", MODEL_DIR)


## 8) Export TFLite for Android


In [None]:
from pathlib import Path

MODEL_DIR = Path("checkpoints")
saved_model_dir = MODEL_DIR / "squat_scorer_savedmodel"  # or wherever you exported
tflite_path = MODEL_DIR / "squat_scorer.tflite"

converter = tf.lite.TFLiteConverter.from_saved_model(str(saved_model_dir))
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# Key flags for TensorList + LSTM
converter.experimental_enable_resource_variables = True
converter._experimental_lower_tensor_list_ops = False
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,
    tf.lite.OpsSet.SELECT_TF_OPS,
]

tflite_model = converter.convert()
tflite_path.write_bytes(tflite_model)
print("Wrote", tflite_path)


## 9) Single-sample inference helper


In [None]:
def predict_sample(video_path: str):
    keypoints = _extract_keypoints_np(video_path)
    keypoints = keypoints.reshape(1, NUM_FRAMES, NUM_LANDMARKS * LANDMARK_DIMS)
    score_norm = float(model.predict(keypoints, verbose=0)[0][0])
    return score_norm * SCORE_SCALE


example_path = train_samples[0][0] if train_samples else labeled_samples[0][0]
pred_score = predict_sample(example_path)
print(f"Predicted score: {pred_score:.2f} (0-{int(SCORE_SCALE)}) on {example_path}")
