In [1]:
import cv2 #opencv 
import os #for file path
import time
import uuid #naming images
from IPython.display import display, clear_output
import PIL.Image
import numpy as np

In [2]:
IMAGE_PATH = "Tensorflow/workspace/images/collected_images"

In [3]:
labels = ['A', 'B', 'C', 'thankyou', 'yes', 'no', 'peace', 'ok']
n_samples = 200

In [None]:
os.makedirs(IMAGE_PATH, exist_ok=True)

# open camera (index 0 since that worked for you)
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    raise RuntimeError("Camera failed to open. Check permissions and index.")

try:
    for label in labels:
        folder = os.path.join(IMAGE_PATH, label)
        os.makedirs(folder, exist_ok=True)
        print(f"Collecting images for: {label}  -> saving to {folder}")
        saved = 0
        attempts = 0
        while saved < n_samples and attempts < n_samples * 10:
            attempts += 1
            ret, frame = cap.read()
            if not ret or frame is None:
                print(f"Empty frame (attempt {attempts}); retrying...")
                time.sleep(1)
                continue

            fname = os.path.join(folder, f"{label}_{saved:04d}_{str(uuid.uuid1())[:8]}.jpg")
            ok = cv2.imwrite(fname, frame)
            if ok:
                saved += 1
                img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                pil = PIL.Image.fromarray(img_rgb)
                clear_output(wait=True)
                display(pil.resize((320,240)))
                print(f"Saved {saved}/{n_samples}: {fname}")
            else:
                print("Failed to write image; retrying...")

            time.sleep(1)
        time.sleep(10)
        print(f"Done with {label}. Saved {saved} images.")
finally:
    cap.release()
    clear_output(wait=True)
    print("Camera released. Collection finished.")

Camera released. Collection finished.


In [None]:
import cv2, pandas as pd
from pathlib import Path
import mediapipe as mp

ROOT = Path(IMAGE_PATH)
OUTCSV = ROOT / "landmarks.csv"

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.25)

rows = []
for imgf in sorted(ROOT.rglob("*.jpg")):
    if "bad" in imgf.parts:
        continue
    img = cv2.imread(str(imgf))
    if img is None:
        continue
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    res = hands.process(img_rgb)
    if not res.multi_hand_landmarks:
        continue
    lm = res.multi_hand_landmarks[0]
    feat = []
    for p in lm.landmark:
        feat += [p.x, p.y, p.z]
    label = imgf.parent.name if imgf.parent != ROOT else imgf.name.split("_")[0]
    rows.append(feat + [label])

hands.close()

cols = [f"x{i}" for i in range(63)] + ["label"]
df = pd.DataFrame(rows, columns=cols)
df.to_csv(OUTCSV, index=False)
print("Saved", OUTCSV, "rows:", len(df))
print(df['label'].value_counts())


I0000 00:00:1765209069.576101  343774 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M4
W0000 00:00:1765209069.587902  344684 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1765209069.594886  344686 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1765209069.642963  344681 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


Saved Tensorflow/workspace/images/collected_images/landmarks.csv rows: 1441
label
yes                   199
C                     196
A                     191
no                    182
B                     181
peace                 180
thankyou              180
ok                    131
.ipynb_checkpoints      1
Name: count, dtype: int64


In [None]:
import pandas as pd
from pathlib import Path

ROOT = Path(IMAGE_PATH)
CSV = ROOT / "landmarks.csv"
OUT = ROOT / "landmarks.cleaned.csv"

df = pd.read_csv(CSV)
bad_mask = df['label'].str.startswith('.')
print("Removing labels that start with dot:", sorted(df.loc[bad_mask, 'label'].unique()))
df_clean = df[~bad_mask].reset_index(drop=True)
df_clean.to_csv(OUT, index=False)
print("Saved cleaned CSV to", OUT)
print(df_clean['label'].value_counts())


Removing labels that start with dot: ['.ipynb_checkpoints']
Saved cleaned CSV to Tensorflow/workspace/images/collected_images/landmarks.cleaned.csv
label
yes         199
C           196
A           191
no          182
B           181
peace       180
thankyou    180
ok          131
Name: count, dtype: int64


In [None]:

import numpy as np, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib
from pathlib import Path

ROOT = Path(IMAGE_PATH)
CSV = ROOT / "landmarks.csv"
OUT = ROOT / "model"
OUT.mkdir(exist_ok=True)

df = pd.read_csv(CSV)
X = df.drop(columns=['label']).values.astype(np.float32)
y_text = df['label'].values
le = LabelEncoder(); y = le.fit_transform(y_text)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)


scaler = StandardScaler().fit(X_train)
X_train_s, X_test_s = scaler.transform(X_train), scaler.transform(X_test)

clf = MLPClassifier(hidden_layer_sizes=(128,64), max_iter=300, random_state=42)
clf.fit(X_train_s, y_train)

y_pred = clf.predict(X_test_s)
print("Test acc:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=le.classes_))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))

joblib.dump({'model':clf, 'scaler':scaler, 'label_encoder': le}, OUT / "gesture_baseline.pkl")
print("Saved model to", OUT / "gesture_baseline.pkl")


Test acc: 0.9930555555555556
              precision    recall  f1-score   support

           A       1.00      1.00      1.00        38
           B       0.97      1.00      0.99        36
           C       1.00      1.00      1.00        39
          no       1.00      0.97      0.99        37
          ok       1.00      0.96      0.98        26
       peace       1.00      1.00      1.00        36
    thankyou       1.00      1.00      1.00        36
         yes       0.98      1.00      0.99        40

    accuracy                           0.99       288
   macro avg       0.99      0.99      0.99       288
weighted avg       0.99      0.99      0.99       288

Confusion matrix:
 [[38  0  0  0  0  0  0  0]
 [ 0 36  0  0  0  0  0  0]
 [ 0  0 39  0  0  0  0  0]
 [ 0  0  0 36  0  0  0  1]
 [ 0  1  0  0 25  0  0  0]
 [ 0  0  0  0  0 36  0  0]
 [ 0  0  0  0  0  0 36  0]
 [ 0  0  0  0  0  0  0 40]]
Saved model to Tensorflow/workspace/images/collected_images/model/gesture_baseline.p