In [1]:
import cv2

In [2]:

# Deep Learning CNN model to recognize face
 
# Specifying the folder where images are present
TrainingImagePath='dataset'
 
from keras.preprocessing.image import ImageDataGenerator
 
# Defining pre-processing transformations on raw images of training data
# These hyper parameters helps to generate slightly twisted versions
# of the original image, which leads to a better model, since it learns
# on the good and bad mix of images
train_datagen = ImageDataGenerator(
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True)
 
# Defining pre-processing transformations on raw images of testing data
# No transformations are done on the testing images
test_datagen = ImageDataGenerator()
 
# Generating the Training Data
training_set = train_datagen.flow_from_directory(
        TrainingImagePath,
        target_size=(64, 64),
        batch_size=32,
        class_mode='categorical')
 
 
# Generating the Testing Data
test_set = test_datagen.flow_from_directory(
        TrainingImagePath,
        target_size=(64, 64),
        batch_size=32,
        class_mode='categorical')
 
# Printing class labels for each face
test_set.class_indices

  _warn(("h5py is running against HDF5 {0} when it was built against {1}, "


Found 25 images belonging to 25 classes.
Found 25 images belonging to 25 classes.


{'0': 0,
 '1': 1,
 '10': 2,
 '11': 3,
 '12': 4,
 '13': 5,
 '14': 6,
 '15': 7,
 '17': 8,
 '19': 9,
 '2': 10,
 '20': 11,
 '21': 12,
 '22': 13,
 '23': 14,
 '24': 15,
 '25': 16,
 '27': 17,
 '3': 18,
 '4': 19,
 '5': 20,
 '6': 21,
 '7': 22,
 '8': 23,
 '9': 24}

In [3]:
'''############ Creating lookup table for all faces ############'''
# class_indices have the numeric tag for each face
TrainClasses=training_set.class_indices
 
# Storing the face and the numeric tag for future reference
ResultMap={}
for faceValue,faceName in zip(TrainClasses.values(),TrainClasses.keys()):
    ResultMap[faceValue]=faceName
 
# Saving the face map for future reference
import pickle
with open("ResultsMap.pkl", 'wb') as fileWriteStream:
    pickle.dump(ResultMap, fileWriteStream)
 
# The model will give answer as a numeric tag
# This mapping will help to get the corresponding face name for it
print("Mapping of Face and its ID",ResultMap)
 
# The number of neurons for the output layer is equal to the number of faces
OutputNeurons=len(ResultMap)
print('\n The Number of output neurons: ', OutputNeurons)

Mapping of Face and its ID {0: '0', 1: '1', 2: '10', 3: '11', 4: '12', 5: '13', 6: '14', 7: '15', 8: '17', 9: '19', 10: '2', 11: '20', 12: '21', 13: '22', 14: '23', 15: '24', 16: '25', 17: '27', 18: '3', 19: '4', 20: '5', 21: '6', 22: '7', 23: '8', 24: '9'}

 The Number of output neurons:  25


In [4]:
from keras.models import Sequential
from keras.layers import Convolution2D
from keras.layers import MaxPool2D
from keras.layers import Flatten
from keras.layers import Dense
 
'''Initializing the Convolutional Neural Network'''
classifier= Sequential()
 
''' STEP--1 Convolution
# Adding the first layer of CNN
# we are using the format (64,64,3) because we are using TensorFlow backend
# It means 3 matrix of size (64X64) pixels representing Red, Green and Blue components of pixels
'''
classifier.add(Convolution2D(32, kernel_size=(5, 5), strides=(1, 1), input_shape=(64,64,3), activation='relu'))
 
'''# STEP--2 MAX Pooling'''
classifier.add(MaxPool2D(pool_size=(2,2)))
 
'''############## ADDITIONAL LAYER of CONVOLUTION for better accuracy #################'''
classifier.add(Convolution2D(64, kernel_size=(5, 5), strides=(1, 1), activation='relu'))
 
classifier.add(MaxPool2D(pool_size=(2,2)))
 
'''# STEP--3 FLattening'''
classifier.add(Flatten())
 
'''# STEP--4 Fully Connected Neural Network'''
classifier.add(Dense(64, activation='relu'))
 
classifier.add(Dense(OutputNeurons, activation='softmax'))
 
'''# Compiling the CNN'''
#classifier.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
classifier.compile(loss='categorical_crossentropy', optimizer = 'adam', metrics=["accuracy"])
 
###########################################################
import time
# Measuring the time taken by the model to train
StartTime=time.time()
 
# Starting the model training
classifier.fit_generator(
                    training_set,
                    steps_per_epoch=1,
                    epochs=15,
                    validation_data=test_set,
                    validation_steps=10)
 
EndTime=time.time()
print("###### Total Time Taken: ", round((EndTime-StartTime)/60), 'Minutes ######')

  classifier.fit_generator(


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
###### Total Time Taken:  0 Minutes ######


In [5]:
import cv2

# Load the video
video_path = 'dataset_video/Video_Boumediene_Rayane.mp4'
cap = cv2.VideoCapture(video_path)

# Load the Haar cascade classifier for object detection
cascade_path = 'haarcascade_frontalface_default.xml'
face_cascade = cv2.CascadeClassifier(cascade_path)

# Output file path for saving the bounding box predictions
output_file = 'bounding_box_rayane.txt'

# Open the output file in write mode
with open(output_file, 'w') as f:
    frame_count = 0

    while True:
        # Read the next frame from the video
        ret, frame = cap.read()

        if not ret:
            break

        # Perform object detection on the frame using Haar cascade classifier
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

        # Iterate over detected faces and write the bounding box coordinates to the output file in YOLO format
        for (x, y, w, h) in faces:
            frame_height, frame_width, _ = frame.shape
            x_center = (x + x + w) / (2 * frame_width)
            y_center = (y + y + h) / (2 * frame_height)
            width = w / frame_width
            height = h / frame_height

            # Write the bounding box coordinates to the output file in YOLO format
            line = f'0 {x_center} {y_center} {width} {height}\n'  # Assuming class index 0
            f.write(line)

        frame_count += 1

    print(f'Processed {frame_count} frames.')

# Release the video capture and close the output file
cap.release()

       


Processed 5856 frames.


In [6]:
import os

def calculate_iou(box1, box2):
    # Convert YOLO format to (x_min, y_min, x_max, y_max)
    box1 = convert_yolo_to_coordinates(box1)
    box2 = convert_yolo_to_coordinates(box2)

    if box1 is None or box2 is None:
        return 0

    # Calculate the intersection coordinates
    x_min = max(box1[0], box2[0])
    y_min = max(box1[1], box2[1])
    x_max = min(box1[2], box2[2])
    y_max = min(box1[3], box2[3])

    # Calculate the intersection area
    intersection_area = max(0, x_max - x_min) * max(0, y_max - y_min)

    # Calculate the union area
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union_area = box1_area + box2_area - intersection_area

    # Calculate the IoU
    iou = intersection_area / union_area
    return iou

def convert_yolo_to_coordinates(box):
    try:
        x, y, w, h = map(float, box)
        x_min = (x - w / 2)
        y_min = (y - h / 2)
        x_max = (x + w / 2)
        y_max = (y + h / 2)
        return x_min, y_min, x_max, y_max
    except ValueError:
        return None

def calculate_f1_score(ground_truth_folder, predicted_file, iou_threshold):
    # Read the predicted bounding box coordinates from the file
    with open(predicted_file, 'r') as f:
        predicted_boxes = [line.strip().split() for line in f.readlines()]

    true_positives = 0
    false_positives = 0
    false_negatives = 0

    for pred_box in predicted_boxes:
        max_iou = 0
        gt_files = os.listdir(ground_truth_folder)
        if len(gt_files) == 0:
            continue  # Skip iteration if there are no ground truth files
        for file in gt_files:
            gt_file = os.path.join(ground_truth_folder, file)
            with open(gt_file, 'r') as f:
                ground_truth_boxes = [line.strip().split() for line in f.readlines()]
            for gt_box in ground_truth_boxes:
                if len(gt_box) == 5:
                    iou = calculate_iou(pred_box[1:], gt_box[1:])
                    if iou > max_iou:
                        max_iou = iou
        if max_iou >= iou_threshold:
            true_positives += 1
        else:
            false_positives += 1

    false_negatives = len(gt_files) - true_positives

    if true_positives == 0 and false_positives == 0 and false_negatives == 0:
        # Handle the case when there are no true positives, false positives, or false negatives
        precision = 0
        recall = 0
    else:
        precision = true_positives / (true_positives + false_positives)
        recall = true_positives / (true_positives + false_negatives)

    if precision + recall == 0:
        f1_score = 0  # Set F1 score to zero if both precision and recall are zero
    else:
        f1_score = 2 * (precision * recall) / (precision + recall)

    return f1_score

ground_truth_folder = 'labels/labelled_frames_Video_Boumediene_Rayane'
predicted_file = 'bounding_box_rayane.txt'
iou_threshold = 0.5
output_file = 'f1_score_result_cnn_rayane.txt'

f1_score = calculate_f1_score(ground_truth_folder, predicted_file, iou_threshold)

with open(output_file, 'w') as f:
    f.write(f"F1 Score: {f1_score}\n")

print(f"F1 Score saved in {output_file}")
