In [10]:
# train_gesture_model.py
# Tiny sequence classifier (length-4 gestures). Trains a logistic-regression
# and exports weights for Arduino inference (C++ header).

import numpy as np
from sklearn.linear_model import LogisticRegression
from pathlib import Path

# ----- CONFIG -----
SEQ_LEN = 4
VOCAB = ["UP", "DOWN", "LEFT", "RIGHT"]   # fixed order!
CLASSES = [
    ("PURPLE", ["UP","UP","DOWN","DOWN"]),          # e.g. show purple (R+B)
    ("CYAN",   ["LEFT","RIGHT","LEFT","RIGHT"]),    # cyan (G+B)
    ("WHITE",  ["UP","LEFT","DOWN","RIGHT"]),       # white (R+G+B)
    ("ORANGE", ["RIGHT","RIGHT","LEFT","LEFT"]),    # orange (R+G)
]
N_PER_CLASS = 18   # small, fast
NOISE_P = 0.10     # 10% chance to corrupt one position

# Optional: if you logged real data, load it here instead of synthetic.
# Expected CSV format: label,g1 g2 g3 g4  (tokens separated by spaces)
# Example row: PURPLE,UP UP DOWN DOWN
CSV_PATH = None  # set to "my_gesture_data.csv" if you collected real data

# ----- HELPERS -----
tok2id = {t:i for i,t in enumerate(VOCAB)}
id2tok = {i:t for t,i in tok2id.items()}
class_names = [c[0] for c in CLASSES]
patterns = [c[1] for c in CLASSES]

def onehot_seq(seq):
    # position-wise onehot (SEQ_LEN * len(VOCAB)) features
    x = np.zeros(SEQ_LEN*len(VOCAB), dtype=np.float32)
    for p, tok in enumerate(seq):
        x[p*len(VOCAB) + tok2id[tok]] = 1.0
    return x

def load_or_make_data():
    X, y = [], []
    if CSV_PATH:
        import csv
        with open(CSV_PATH, "r", newline="") as f:
            r = csv.reader(f)
            for row in r:
                if not row or row[0].startswith("#"):
                    continue
                label = row[0].strip()
                seq = row[1].strip().split()
                if len(seq) != SEQ_LEN:
                    continue
                X.append(onehot_seq(seq))
                y.append(class_names.index(label))
    else:
        # synthetic: perturb true patterns a bit
        rng = np.random.default_rng(0)
        for ci, (_, base) in enumerate(CLASSES):
            for _ in range(N_PER_CLASS):
                seq = base.copy()
                if rng.random() < NOISE_P:
                    pos = int(rng.integers(0, SEQ_LEN))
                    seq[pos] = VOCAB[int(rng.integers(0, len(VOCAB)))]
                X.append(onehot_seq(seq))
                y.append(ci)
    X = np.stack(X, axis=0)
    y = np.array(y, dtype=np.int64)
    return X, y

def export_header(W, b, class_names, path="model_weights.h"):
    # W: (C, F), b: (C,)
    C, F = W.shape
    content = []
    content.append("// Auto-generated by train_gesture_model.py")
    content.append("#pragma once")
    content.append(f"static const int ML_NUM_CLASSES = {C};")
    content.append(f"static const int ML_SEQ_LEN    = {SEQ_LEN};")
    content.append(f"static const int ML_VOCAB      = {len(VOCAB)};")
    content.append(f"static const int ML_NUM_FEATS  = {F};")
    # weights
    content.append("static const float ML_W[ML_NUM_CLASSES][ML_NUM_FEATS] = {")
    for c in range(C):
        row = ", ".join(f"{w:.6f}f" for w in W[c])
        content.append(f"  {{ {row} }},")
    content.append("};")
    # bias
    b_row = ", ".join(f"{bi:.6f}f" for bi in b)
    content.append(f"static const float ML_B[ML_NUM_CLASSES] = {{ {b_row} }};")
    # class names
    names = ", ".join(f"\"{n}\"" for n in class_names)
    content.append(f"static const char* ML_CLASS_NAMES[ML_NUM_CLASSES] = {{ {names} }};")
    # token mapping comment
    content.append("// Token order: 0=UP  1=DOWN  2=LEFT  3=RIGHT")
    Path(path).write_text("\n".join(content), encoding="utf-8")
    print(f"Wrote {path}")

def main():
    X, y = load_or_make_data()
    clf = LogisticRegression(
        penalty="l2",
        C=10.0,
        max_iter=1000,
        solver="lbfgs",
        multi_class="multinomial",
        n_jobs=None
    )
    clf.fit(X, y)
    acc = clf.score(X, y)
    print(f"Train accuracy on small set: {acc:.3f}")
    W = clf.coef_.astype(np.float32)     # shape (C, F)
    b = clf.intercept_.astype(np.float32) # shape (C,)
    export_header(W, b, class_names)

if __name__ == "__main__":
    main()


Train accuracy on small set: 1.000
Wrote model_weights.h


