In [1]:
import os
import json
import cv2
import numpy as np
import tensorflow as tf
import albumentations as alb
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model 
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from matplotlib import pyplot as plt

# Constants
IMAGE_SIZE = (120, 120)

# Data Augmentation
augmentor = alb.Compose([
    alb.RandomCrop(width=450, height=450),
    alb.HorizontalFlip(p=0.5),
    alb.RandomBrightnessContrast(p=0.2),
    alb.RandomGamma(p=0.2),
    alb.RGBShift(p=0.2),
    alb.VerticalFlip(p=0.5)
], bbox_params=alb.BboxParams(format='albumentations', label_fields=['class_labels']))

def get_user_input():
    image_dir = "C:\\Users\\chsur\\OneDrive\\Desktop\\suri-all\\data2\\images"
    labels_dir = "C:\\Users\\chsur\\OneDrive\\Desktop\\suri-all\\data2\\labels"

    # Debugging prints
    print(f"Checking image directory: {image_dir}")
    print(f"Checking labels directory: {labels_dir}")

    if not image_dir or not os.path.isdir(image_dir):
        raise FileNotFoundError(f"The specified image directory does not exist or was not provided: {image_dir}")
    if not labels_dir or not os.path.isdir(labels_dir):
        raise FileNotFoundError(f"The specified labels directory does not exist or was not provided: {labels_dir}")
    return image_dir, labels_dir

def load_image(file_path):
    byte_img = tf.io.read_file(file_path)
    img = tf.io.decode_jpeg(byte_img)
    return img

def process_image_and_labels(image_path, label_path):
    img = cv2.imread(image_path)
    coords = [0, 0, 0.00001, 0.00001]
    if os.path.exists(label_path):
        with open(label_path, 'r') as f:
            label = json.load(f)
        coords[0] = label['shapes'][0]['points'][0][0]
        coords[1] = label['shapes'][0]['points'][0][1]
        coords[2] = label['shapes'][0]['points'][1][0]
        coords[3] = label['shapes'][0]['points'][1][1]
        coords = list(np.divide(coords, [640, 480, 640, 480]))
    return img, coords

def augment_image(image, coords):
    # Ensure the bounding box is valid before augmentation
    if coords[2] <= coords[0] or coords[3] <= coords[1]:
        print(f"Invalid pre-augmentation bbox: {coords}")
        coords = [0, 0, image.shape[1], image.shape[0]]  # Set to default values covering the whole image

    augmented = augmentor(image=image, bboxes=[coords], class_labels=['face'])
    aug_image = augmented['image']
    
    if len(augmented['bboxes']) == 0:
        aug_coords = [0, 0, image.shape[1], image.shape[0]]
        aug_class = 0
        print(f"No bbox returned after augmentation, using default: {aug_coords}")
    else:
        aug_coords = augmented['bboxes'][0]
        # Validate the augmented bounding box
        if aug_coords[2] <= aug_coords[0] or aug_coords[3] <= aug_coords[1]:
            print(f"Invalid post-augmentation bbox: {aug_coords}")
            aug_coords = [0, 0, image.shape[1], image.shape[0]]  # Set to default values
            aug_class = 0
        else:
            aug_class = 1

    return aug_image, aug_coords, aug_class

# Usage
def generator():
    for img, coords in dataset:
        aug_img, aug_coords, aug_class = augment_image(img, coords)
        yield aug_img, aug_coords, aug_class

# Example augmentor
augmentor = alb.Compose([
    alb.HorizontalFlip(p=0.5),
    alb.RandomBrightnessContrast(p=0.2),
    alb.Resize(224, 224, always_apply=True),
], bbox_params=alb.BboxParams(format='pascal_voc', label_fields=['class_labels']))

# Dataset example (Replace with your actual dataset)
dataset = [
    (np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8), [50, 50, 200, 200]),
    (np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8), [0, 0, 0, 0])  # Example of invalid bbox
]

# Creating the generator
data_gen = generator()

# Iterate over the generator to see the results
for _ in range(len(dataset)):
    img, coords, class_id = next(data_gen)
    print(f"Augmented Image Shape: {img.shape}, Coords: {coords}, Class ID: {class_id}")

def preprocess_dataset(images, labels, augment_times=60):
    def generator():
        for img_path, lbl_path in zip(images, labels):
            img, coords = process_image_and_labels(img_path, lbl_path)
            for _ in range(augment_times):
                aug_img, aug_coords, aug_class = augment_image(img, coords)
                yield aug_img, (aug_class, aug_coords)

    dataset = tf.data.Dataset.from_generator(generator, output_signature=(
        tf.TensorSpec(shape=(None, None, 3), dtype=tf.uint8),
        (tf.TensorSpec(shape=(), dtype=tf.uint8), tf.TensorSpec(shape=(4,), dtype=tf.float32))
    ))
    dataset = dataset.map(lambda x, y: (tf.image.resize(x, IMAGE_SIZE) / 255.0, y))
    dataset = dataset.batch(8).prefetch(4)
    return dataset

def build_custom_model():
    input_layer = Input(shape=(120, 120, 3))
    
    # Custom convolutional layers
    x = Conv2D(32, (3, 3), activation='relu')(input_layer)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(256, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Flatten()(x)

    # Classification Model  
    f1 = Dense(256, activation='relu')(x)
    class2 = Dense(1, activation='sigmoid')(f1)
    
    # Bounding box model
    f2 = Dense(256, activation='relu')(x)
    regress2 = Dense(4, activation='sigmoid')(f2)
    
    facetracker = Model(inputs=input_layer, outputs=[class2, regress2])
    return facetracker

def localization_loss(y_true, yhat):
    delta_coord = tf.reduce_sum(tf.square(y_true[:,:2] - yhat[:,:2]))
    h_true = y_true[:,3] - y_true[:,1]
    w_true = y_true[:,2] - y_true[:,0]
    h_pred = yhat[:,3] - yhat[:,1]
    w_pred = yhat[:,2] - yhat[:,0]
    delta_size = tf.reduce_sum(tf.square(w_true - w_pred) + tf.square(h_true-h_pred))
    return delta_coord + delta_size

class FaceTracker(Model):
    def __init__(self, model, **kwargs):
        super().__init__(**kwargs)
        self.model = model

    def compile(self, opt, classloss, localizationloss, **kwargs):
        super().compile(**kwargs)
        self.closs = classloss
        self.lloss = localizationloss
        self.opt = opt
    
    def train_step(self, batch, **kwargs):
        X, y = batch
        with tf.GradientTape() as tape:
            classes, coords = self.model(X, training=True)
            batch_classloss = self.closs(y[0], classes)
            batch_localizationloss = self.lloss(tf.cast(y[1], tf.float32), coords)
            total_loss = batch_localizationloss + 0.5 * batch_classloss
            grad = tape.gradient(total_loss, self.model.trainable_variables)
        self.opt.apply_gradients(zip(grad, self.model.trainable_variables))
        return {"total_loss": total_loss, "class_loss": batch_classloss, "regress_loss": batch_localizationloss}
    
    def test_step(self, batch, **kwargs):
        X, y = batch
        classes, coords = self.model(X, training=False)
        batch_classloss = self.closs(y[0], classes)
        batch_localizationloss = self.lloss(tf.cast(y[1], tf.float32), coords)
        total_loss = batch_localizationloss + 0.5 * batch_classloss
        return {"total_loss": total_loss, "class_loss": batch_classloss, "regress_loss": batch_localizationloss}
        
    def call(self, X, **kwargs):
        return self.model(X, **kwargs)

# Get user inputs
image_dir, labels_dir = get_user_input()

print(f"Image directory: {image_dir}")
print(f"Labels directory: {labels_dir}")

# Load original image and label paths
image_files = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith('.jpg')]
label_files = [os.path.join(labels_dir, f) for f in os.listdir(labels_dir) if f.endswith('.json')]

# Find matching files
image_file_basenames = {os.path.splitext(os.path.basename(f))[0] for f in image_files}
label_file_basenames = {os.path.splitext(os.path.basename(f))[0] for f in label_files}

matching_basenames = image_file_basenames.intersection(label_file_basenames)

matched_image_files = [os.path.join(image_dir, f"{basename}.jpg") for basename in matching_basenames]
matched_label_files = [os.path.join(labels_dir, f"{basename}.json") for basename in matching_basenames]

print(f"Number of matching image files: {len(matched_image_files)}")
print(f"Number of matching label files: {len(matched_label_files)}")

# Split into training, validation, and testing sets (70%, 15%, 15%)
train_images, temp_images, train_labels, temp_labels = train_test_split(matched_image_files, matched_label_files, test_size=0.3, random_state=42)
val_images, test_images, val_labels, test_labels = train_test_split(temp_images, temp_labels, test_size=0.5, random_state=42)

# Preprocess datasets
train = preprocess_dataset(train_images, train_labels)
val = preprocess_dataset(val_images, val_labels)
test = preprocess_dataset(test_images, test_labels)

# Model compilation
facetracker = build_custom_model()
facetracker_model = FaceTracker(facetracker)
facetracker_model.compile(
    opt=tf.keras.optimizers.Adam(learning_rate=0.0001),
    classloss=tf.keras.losses.BinaryCrossentropy(),
    localizationloss=localization_loss
)

# Train the model
hist = facetracker_model.fit(
    train, 
    epochs=30, 
    validation_data=val
)

# Save the model
facetracker_model.save('facetracker_model.h5')

# Evaluate the model
facetracker_model.evaluate(test)


Augmented Image Shape: (224, 224, 3), Coords: (43.75, 43.75, 175.0, 175.0), Class ID: 1
Invalid pre-augmentation bbox: [0, 0, 0, 0]
Augmented Image Shape: (224, 224, 3), Coords: (0.0, 0.0, 224.0, 224.0), Class ID: 1
Checking image directory: C:\Users\chsur\OneDrive\Desktop\suri-all\data2\images
Checking labels directory: C:\Users\chsur\OneDrive\Desktop\suri-all\data2\labels
Image directory: C:\Users\chsur\OneDrive\Desktop\suri-all\data2\images
Labels directory: C:\Users\chsur\OneDrive\Desktop\suri-all\data2\labels
Number of matching image files: 244
Number of matching label files: 244
Epoch 1/30
      2/Unknown [1m10s[0m 93ms/step - class_loss: 0.6475 - regress_loss: 149608.3438 - total_loss: 149608.6719Invalid pre-augmentation bbox: [0.2690590659340659, 0.8628663003663004, 0.20552884615384612, 0.9624542124542124]
Invalid pre-augmentation bbox: [0.2690590659340659, 0.8628663003663004, 0.20552884615384612, 0.9624542124542124]
Invalid pre-augmentation bbox: [0.2690590659340659, 0.86286

  self.gen.throw(typ, value, traceback)


Invalid pre-augmentation bbox: [0.7283230633802817, 0.9298708920187794, 0.7965448943661972, 0.8110328638497653]
Invalid pre-augmentation bbox: [0.7283230633802817, 0.9298708920187794, 0.7965448943661972, 0.8110328638497653]
Invalid pre-augmentation bbox: [0.7283230633802817, 0.9298708920187794, 0.7965448943661972, 0.8110328638497653]
Invalid pre-augmentation bbox: [0.7283230633802817, 0.9298708920187794, 0.7965448943661972, 0.8110328638497653]
Invalid pre-augmentation bbox: [0.7283230633802817, 0.9298708920187794, 0.7965448943661972, 0.8110328638497653]
Invalid pre-augmentation bbox: [0.7283230633802817, 0.9298708920187794, 0.7965448943661972, 0.8110328638497653]
Invalid pre-augmentation bbox: [0.7283230633802817, 0.9298708920187794, 0.7965448943661972, 0.8110328638497653]
Invalid pre-augmentation bbox: [0.7283230633802817, 0.9298708920187794, 0.7965448943661972, 0.8110328638497653]
Invalid pre-augmentation bbox: [0.7283230633802817, 0.9298708920187794, 0.7965448943661972, 0.8110328638



    134/Unknown [1m5s[0m 36ms/step - class_loss: 5.0438e-38 - regress_loss: 209663.9844 - total_loss: 209663.9844Invalid pre-augmentation bbox: [0.3597051056338028, 0.5660211267605634, 0.25517165492957744, 0.7567488262910798]
Invalid pre-augmentation bbox: [0.3597051056338028, 0.5660211267605634, 0.25517165492957744, 0.7567488262910798]
Invalid pre-augmentation bbox: [0.3597051056338028, 0.5660211267605634, 0.25517165492957744, 0.7567488262910798]
Invalid pre-augmentation bbox: [0.3597051056338028, 0.5660211267605634, 0.25517165492957744, 0.7567488262910798]
Invalid pre-augmentation bbox: [0.3597051056338028, 0.5660211267605634, 0.25517165492957744, 0.7567488262910798]
Invalid pre-augmentation bbox: [0.3597051056338028, 0.5660211267605634, 0.25517165492957744, 0.7567488262910798]
Invalid pre-augmentation bbox: [0.3597051056338028, 0.5660211267605634, 0.25517165492957744, 0.7567488262910798]
Invalid pre-augmentation bbox: [0.3597051056338028, 0.5660211267605634, 0.25517165492957744, 0

[0.0, 399624.0, 399624.0]

In [2]:
from tensorflow.keras.models import load_model

In [15]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model

# Load the pre-trained model
model = load_model('facetracker_model.h5')

# Function to preprocess each frame
def preprocess_frame(frame):
    # Resize the frame to the input size expected by the model (e.g., 224x224)
    resized_frame = cv2.resize(frame, (224, 224))
    # Normalize the frame
    normalized_frame = resized_frame / 255.0
    # Expand dimensions to match the model's input shape (1, 224, 224, 3)
    input_frame = np.expand_dims(normalized_frame, axis=0)
    return input_frame

# Function to draw bounding boxes on the frame
def draw_bounding_boxes(frame, predictions):
    for pred in predictions:
        x, y, w, h = pred
        # Convert back to original frame size
        x = int(x * frame.shape[1])
        y = int(y * frame.shape[0])
        w = int(w * frame.shape[1])
        h = int(h * frame.shape[0])
        # Draw rectangle around the face
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

# Initialize webcam
cap = cv2.VideoCapture(0)

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    if not ret:
        break

    # Preprocess the frame
    input_frame = preprocess_frame(frame)

    # Make predictions
    predictions = model.predict(input_frame)

    # Draw bounding boxes on the frame
    draw_bounding_boxes(frame, predictions)

    # Display the resulting frame
    cv2.imshow('Face Tracker', frame)

    # Break the loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close windows
cap.release()
cv2.destroyAllWindows()


ValueError: Unknown layer: 'FaceTracker'. Please ensure you are using a `keras.utils.custom_object_scope` and that this object is included in the scope. See https://www.tensorflow.org/guide/keras/save_and_serialize#registering_the_custom_object for details.

In [14]:
%pip install opencv-python-headless tensorflow


Collecting opencv-python-headless
  Using cached opencv_python_headless-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Using cached opencv_python_headless-4.10.0.84-cp37-abi3-win_amd64.whl (38.8 MB)
Installing collected packages: opencv-python-headless
Note: you may need to restart the kernel to use updated packages.


ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'c:\\Users\\chsur\\AppData\\Local\\Programs\\Python\\Python310\\Lib\\site-packages\\cv2\\cv2.pyd'
Consider using the `--user` option or check the permissions.

