# This is a sample Jupyter Notebook

Below is an example of a code cell. 
Put your cursor into the cell and press Shift+Enter to execute it and select the next one, or click 'Run Cell' button.

Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

To learn more about Jupyter Notebooks in PyCharm, see [help](https://www.jetbrains.com/help/pycharm/ipython-notebook-support.html).
For an overview of PyCharm, go to Help -> Learn IDE features or refer to [our documentation](https://www.jetbrains.com/help/pycharm/getting-started.html).

In [5]:
import cv2
import mediapipe as mp
import csv
import os
import numpy as np
import tensorflow as tf
from keras.src.metrics.accuracy_metrics import accuracy

mp_hands = mp.solutions.hands.Hands(...)
# Create Hands processor instance with static image mode
hands = mp_hands.Hands(static_image_mode=True)

In [6]:
def process_image(img_path):
    """
    Process single image to extract hand landmarks
    Args:
        img_path: Path to input image
    Returns:
        List of normalized landmarks (x,y,z) or None if no hand detected
    """
    # Read image using OpenCV
    img = cv2.imread(img_path)
    # Convert BGR to RGB (MediaPipe requires RGB)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # Process image with MediaPipe
    results = hands.process(img)

    if results.multi_hand_landmarks:
        return [coord for hand in results.multi_hand_landmarks
                for landmark in hand.landmark
                for coord in [landmark.x, landmark.y, landmark.z]]
    return None

In [7]:
def generate_landmarks_csv(dataset_path, output_csv):
    """
    Generate CSV file containing image paths and corresponding landmarks
    Args:
        dataset_path: Root directory of gesture folders
        output_csv: Path to output CSV file
    """
    # Open CSV file in write mode
    with open(output_csv, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        #header row
        writer.writerow(['image_path', 'landmarks', 'gesture_label'])

        # Loop through each gesture folder
        for gesture_idx, gesture_name in enumerate(sorted(os.listdir(dataset_path))):
            gesture_path = os.path.join(dataset_path, gesture_name)

            # Process each image in gesture folder
            for img_name in sorted(os.listdir(gesture_path)):
                img_path = os.path.join(gesture_path, img_name)
                landmarks = process_image(img_path)
                if landmarks: #if detected
                    # Write image path, landmarks, and numeric label
                    writer.writerow([img_path, ','.join(map(str, landmarks)), gesture_idx])

In [8]:
class GlovedHandDataset(tf.keras.utils.Sequence): #dataset is big for memory
    def __init__(self, csv_path ,batch_size=32 ,shuffle=True):
        """
        Initialize data generator
        Args:
            csv_path: Path to landmarks CSV
            batch_size: Number of samples per batch
            shuffle: Whether to shuffle data after each epoch
        """
        # Read all data from CSV
        with open(csv_path) as f:
            self.data = list(csv.reader(f))[:1]  # Skip header row

        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        """Returns number of batches per epoch"""
        return int(np.ceil(len(self.data) / float(self.batch_size)))

    def on_epoch_end(self):
        """Shuffles data after each epoch if enabled"""
        self.indices = np.arange(len(self.data))
        if self.shuffle:
            np.random.shuffle(self.indices)

    def __getitem__(self, idx):
        # Get batch indices
        batch_indices = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]

        # Initialize batch arrays
        batch_images = []
        batch_landmarks = []
        batch_labels = []

        for i in batch_indices:
            img_path, lm_str, label = self.data[i]

            img = cv2.imread(img_path)
            img = cv2.resize(img, (256, 256)) #resize to expected input for rgb
            img = (img / 127.5) - 1.0  #normalize to [-1,1] (mediaPipe standard)

            #convert landmark string to numpy array
            landmarks = np.array(lm_str.split(','), dtype=np.float32)

            batch_images.append(img)
            batch_landmarks.append(landmarks)
            batch_labels.append(int(label))

        #return batch as numpy arrays
        return np.array(batch_images), {
            'output_1': np.array(batch_landmarks),
            'output_2': np.array(batch_labels),
        }

In [16]:
def build_model(num_gestures=17):
    """
    Build modified MediaPipe model with two outputs
    Args:
        num_gestures: Number of gesture classes (17 for CADDIAN)
    Returns:
        Compiled Keras model
    """
    #load base MediaPipe model (pretrained)
    base_model = mp.solutions.hands.Hands(
        static_image_mode=True,
        model_complexity=1,
        min_detection_confidence=0.5
    )

    #Freeze all layers except last 3
    for layer in base_model.layers[:-3]:
        layer.trainable = False

    #create multi-output model
    x = base_model.output
    #Landmark prediction head (original task)
    landmarks_out = tf.keras.layers.Dense(21*3, name = 'output_1')(x)

    #Gesture classification head (new task)
    gesture_out = tf.keras.layers.Dense(num_gestures, activation='softmax', name = 'output_2')(x)

    model = tf.keras.Model(
        inputs=base_model.input,
        outputs=[landmarks_out, gesture_out]
    )

    #Compile with custom loss weights
    model.compile(
        optimizer=tf.keras.optimizers.Adam(0.001),
        loss={
            'output_1': 'mse',
            'output_2': 'sparse_categorical_crossentropy',
        },
        loss_weights={'output_1': 0.3, 'output_2': 0.7},
        metrics={'output_2' : 'accuracy'}
    )

    return model

In [None]:
# Main execution
if __name__ == '__main__':
    #please change the paths as it is in your files
    dataset_path = "C:/Users/crese/Downloads/caddy-gestures-complete-v2-release-all-scenarios-fast.ai"
    output_csv_path = 'landmarks.csv'

    generate_landmarks_csv(dataset_path, output_csv_path)

    #step 2: create data generators
    train_gen = GlovedHandDataset(output_csv_path)

In [12]:
import urllib.request

# Download the model (if not already present)
if not os.path.exists('hand_landmark_model.h5'):
    url = 'https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task'
    urllib.request.urlretrieve(url, 'hand_landmark_model.h5')

In [19]:
import tensorflow as tf
import tensorflowjs as tfjs

# Convert MediaPipe model to TensorFlow
tfjs.converters.convert_tf_saved_model(
    'hand_landmarker.task',
    'hand_landmarker_savedmodel'
)

ModuleNotFoundError: No module named 'tensorflowjs'

In [17]:

    #step 3: Build and train model
    model = build_model()
    model.fit(
        train_gen,
        epochs=10,
        verbose=1
    )

    #step 4: Save trained model
    model.save('gloved_hand_model.h5')

AttributeError: 'Hands' object has no attribute 'layers'