In [None]:
pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m64.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.21 sounddevice-0.5.1


In [None]:
import os
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Directory structure
train_data_dir = '/content/drive/My Drive/asl_alphabet_train'
test_data_dir = '/content/drive/My Drive/asl_train_short'


# Initialize MediaPipe Hands for static image processing
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=True,  # Process each image independently
    max_num_hands=1,
    min_detection_confidence=0.5
)

In [None]:
def crop_hand(image):
    """
    Uses MediaPipe to detect the hand and crop the hand region.
    If no hand is detected, returns the original image.
    """
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    if results.multi_hand_landmarks:
        h, w, _ = image.shape
        landmarks = results.multi_hand_landmarks[0].landmark
        xs = [lm.x for lm in landmarks]
        ys = [lm.y for lm in landmarks]
        xmin = int(min(xs) * w)
        xmax = int(max(xs) * w)
        ymin = int(min(ys) * h)
        ymax = int(max(ys) * h)
        padding = 20  # add some margin
        xmin = max(0, xmin - padding)
        ymin = max(0, ymin - padding)
        xmax = min(w, xmax + padding)
        ymax = min(h, ymax + padding)
        return image[ymin:ymax, xmin:xmax]
    else:
        return image

In [None]:
def custom_generator(directory, batch_size, target_size):
    datagen = ImageDataGenerator(rescale=1./255,
                                 rotation_range=20,
                                 width_shift_range=0.2,
                                 height_shift_range=0.2,
                                 horizontal_flip=True)
    base_gen = datagen.flow_from_directory(directory, target_size=target_size, batch_size=batch_size, class_mode='categorical', shuffle=True)
    while True:
        batch_x, batch_y = next(base_gen)
        new_batch = []
        for img in batch_x:
            img_uint8 = (img * 255).astype(np.uint8)
            cropped = crop_hand(img_uint8)
            cropped_resized = cv2.resize(cropped, target_size)
            cropped_normalized = cropped_resized.astype('float32') / 255.0
            new_batch.append(cropped_normalized)
        yield np.array(new_batch), batch_y

In [None]:
# Define training parameters
target_size = (224, 224)  # Standard input size for MobileNetV2
batch_size = 32

# Choose which training directory to use: train_data_dir or train_short_data_dir
training_dir = train_data_dir  # change as needed
train_gen = custom_generator(training_dir, batch_size, target_size)
val_gen = custom_generator(test_data_dir, batch_size, target_size)

# Define steps per epoch (if unknown, you can estimate based on dataset size)
# Example: if training_dir has ~11,000 images, steps_per_epoch ~ 11000 / 32 ≈ 344
steps_per_epoch = 344
validation_steps = 100  # Adjust based on your test set size

In [None]:
# Transfer Learning: Load MobileNetV2 without top layers
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(29, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers and train only the top classifier first
for layer in base_model.layers:
    layer.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint_callback = ModelCheckpoint(
    filepath="latest_model.keras",  # Save as .keras for compatibility
    save_weights_only=False,         # Save the full model (architecture + weights)
    save_freq='epoch',               # Save at the end of every epoch
    verbose=1
)

model.compile(optimizer=Adam(learning_rate=1e-3),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_gen,
          steps_per_epoch=steps_per_epoch,
          validation_data=val_gen,
          validation_steps=validation_steps,
          epochs=10,
          callbacks=[checkpoint_callback])


NameError: name 'model' is not defined

In [None]:
# Save the fine-tuned model in the new Keras format
model.save("asl_transfer_model.keras")
hands.close()

NameError: name 'model' is not defined