# Import and install required libraries/dependencies

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install cvzone
!pip install mediapipe
!pip install tensorflow

Collecting cvzone
  Downloading cvzone-1.6.1.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: cvzone
  Building wheel for cvzone (setup.py) ... [?25l[?25hdone
  Created wheel for cvzone: filename=cvzone-1.6.1-py3-none-any.whl size=26298 sha256=b10dbbc7afa54323273f1241068c7e74d17190669bdce6ce41d34850bbfa2b0e
  Stored in directory: /root/.cache/pip/wheels/2c/9f/b3/92e945ac4a71bf727a92463f38155cc5a4fa49c5010b38ec4c
Successfully built cvzone
Installing collected packages: cvzone
Successfully installed cvzone-1.6.1
Collecting mediapipe
  Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (36.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.1/36.1 MB[0m 

In [3]:
from google.colab.patches import cv2_imshow
from cvzone.HandTrackingModule import HandDetector
import pandas as pd
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
import numpy as np
import os, cv2, math
import tensorflow as tf

# Model training pipeline

## Save the landmark images into processed_csv with filename and labels

In [39]:
black_landmark_img = "/content/drive/MyDrive/50.035 Computer Vision Group Assignment/Data (Combined)/cropped (Mitchel & Ad)/Landmarks (black)"

data = []

for filename in os.listdir(black_landmark_img):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        label = filename.split(" (")[0]
        if label in ["bis_C", "tur_C"]:
              label = "bis_tur_C"
        img_path = os.path.join(black_landmark_img, filename)
        data.append({"img_path": img_path, "label": label})

df = pd.DataFrame(data)
print(df.head())

unique_labels = df['label'].unique()
label_map = {label: idx for idx, label in enumerate(unique_labels)}
df['numeric_label'] = df['label'].map(label_map)
df = df.drop(columns='label')

df.to_csv("/content/drive/MyDrive/CompVision/black_landmarks.csv", index=False)
print("DataFrame created and saved successfully!")

                                            img_path  label
0  /content/drive/MyDrive/50.035 Computer Vision ...  tur_N
1  /content/drive/MyDrive/50.035 Computer Vision ...  tur_N
2  /content/drive/MyDrive/50.035 Computer Vision ...  tur_N
3  /content/drive/MyDrive/50.035 Computer Vision ...  tur_N
4  /content/drive/MyDrive/50.035 Computer Vision ...  tur_N
DataFrame created and saved successfully!


In [40]:
print(label_map)

{'tur_N': 0, 'tur_O': 1, 'tur_P': 2, 'tur_R': 3, 'tur_S': 4, 'tur_T': 5, 'tur_U': 6, 'tur_V': 7, 'tur_Y': 8, 'tur_Z': 9, 'tur_D': 10, 'tur_E': 11, 'tur_F': 12, 'tur_G': 13, 'tur_H': 14, 'tur_I': 15, 'tur_J': 16, 'tur_K': 17, 'tur_L': 18, 'tur_M': 19, 'bis_Q': 20, 'bis_O': 21, 'bis_T': 22, 'bis_tur_C': 23, 'bis_D': 24, 'bis_U': 25, 'bis_M': 26, 'bis_K': 27, 'bis_B': 28, 'bis_Y': 29, 'bis_S': 30, 'bis_L': 31, 'bis_F': 32, 'bis_Z': 33, 'bis_E': 34, 'bis_G': 35, 'bis_P': 36, 'bis_A': 37, 'bis_X': 38, 'bis_V': 39, 'bis_R': 40, 'bis_W': 41, 'bis_N': 42, 'bis_I': 43, 'bis_H': 44, 'tur_A': 45, 'tur_B': 46}


## Split dataset (train, test, validate)

In [41]:
from sklearn.model_selection import train_test_split

# split into training+validation (90%) and testing (10%)
train_val_df, test_df = train_test_split(df, test_size=0.10, random_state=34, stratify=df['numeric_label'])
# split training+validation (90%) into training (80%) and validation (10%)
train_df, val_df = train_test_split(train_val_df, test_size=0.1111, random_state=34, stratify=train_val_df['numeric_label'])  # 0.1111 * 90% = ~10%

print(f"Training samples: {len(train_df)}")
print(f"Validation samples: {len(val_df)}")
print(f"Testing samples: {len(test_df)}")

Training samples: 7643
Validation samples: 956
Testing samples: 956


## Preprocessing function to load and resize image

In [42]:
def preprocess_image(file_path, label):
    image = tf.io.read_file(file_path)
    image = tf.image.decode_jpeg(image, channels=3)  # use decode_png for PNG images
    image = tf.image.resize(image, [224, 224])
    image = image / 255.0  # normalize pixel values to [0, 1]
    return image, label

## Convert dataframe to tf dataset

In [43]:
def create_dataset(df):
    file_paths = df['img_path'].values
    labels = tf.keras.utils.to_categorical(df['numeric_label'].values, num_classes=len(label_map))
    dataset = tf.data.Dataset.from_tensor_slices((file_paths, labels))
    dataset = dataset.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    return dataset

## Create datasets

In [44]:
batch_size = 32

train_dataset = create_dataset(train_df).shuffle(buffer_size=1000).repeat().batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
val_dataset = create_dataset(val_df).repeat().batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
test_dataset = create_dataset(test_df).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

## Training model

In [70]:
import math
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(len(label_map), activation='softmax')
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)
lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-6
)

# training parameters
epochs = 20
steps_per_epoch = math.ceil(len(train_df) / batch_size)
validation_steps = math.ceil(len(val_df) / batch_size)

history = model.fit(
    train_dataset,
    steps_per_epoch=steps_per_epoch,
    epochs=epochs,
    validation_data=val_dataset,
    validation_steps=validation_steps,
    callbacks=[early_stopping, lr_scheduler]
)

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m239/239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 53ms/step - accuracy: 0.8404 - loss: 0.7110 - val_accuracy: 0.9948 - val_loss: 0.0290 - learning_rate: 0.0010
Epoch 2/20
[1m239/239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 49ms/step - accuracy: 0.9988 - loss: 0.0063 - val_accuracy: 0.9948 - val_loss: 0.0281 - learning_rate: 0.0010
Epoch 3/20
[1m239/239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 50ms/step - accuracy: 0.9997 - loss: 0.0018 - val_accuracy: 0.9948 - val_loss: 0.0244 - learning_rate: 0.0010
Epoch 4/20
[1m239/239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 50ms/step - accuracy: 0.9996 - loss: 6.2200e-04 - val_accuracy: 0.9958 - val_loss: 0.0202 - learning_rate: 0.0010
Epoch 5/20
[1m239/239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 49ms/step - accuracy: 1.0000 - loss: 3.2125e-05 - val_accuracy: 0.9958 - val_loss: 0.0208 - learning_rate: 0.0010
Epoch 6/20
[1m239/239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

## Model Summary

In [71]:
model.summary()

## Run model on testing

In [72]:
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f"Test Set Accuracy: {test_accuracy}")

[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 60ms/step - accuracy: 0.9966 - loss: 0.0334
Test Set Accuracy: 0.9979079365730286


## Get F2 score

In [76]:
def get_labels(dataset):
    labels = []
    for _, label in dataset:
        labels.append(label.numpy())
    return np.concatenate(labels)

y_true = get_labels(test_dataset)

y_pred = np.argmax(model.predict(test_dataset), axis=1)

# ensure y_true is a 1D array of class labels (not one-hot encoded)
y_true = np.argmax(y_true, axis=1) if y_true.ndim > 1 else y_true

[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step


In [74]:
# Compute F2 score
def fbeta_score(y_true, y_pred, beta=2):
    from sklearn.metrics import precision_recall_fscore_support
    precision, recall, _, _ = precision_recall_fscore_support(y_true, y_pred, beta=beta, average='weighted')
    fbeta = (1 + beta**2) * (precision * recall) / ((beta**2 * precision) + recall + 1e-7)
    return fbeta

f2_score = fbeta_score(y_true, y_pred, beta=2)
print(f"F2 Score: {f2_score}")

F2 Score: 0.997927852488589


## Save model

In [75]:
# Save the model
model.save("/content/drive/MyDrive/CompVision/doubleCNN_landmarks.h5")

