In [None]:
import cv2
import numpy as np
import os
import face_recognition
import time
import h5py
import pickle
import shutil

training_path = 'C:\\Users\\Learning\\udemy\\Hackathon\\ultimate\\dataset\\train_ds\\train\\'
processed_training_path = 'C:\\Users\\Learning\\udemy\\Hackathon\\ultimate\\dataset\\train_ds\\train_processed\\'
cropped_training_path = 'C:\\Users\\Learning\\udemy\\Hackathon\\ultimate\\dataset\\train_ds\\train_cropped\\'
pickle_filename = "C:\\Users\\Learning\\udemy\\Hackathon\\ultimate\\dataset\\trained_models\\face_encodings_custom.pickle" #train model
h5_filename = "C:\\Users\\Learning\\udemy\\Hackathon\\ultimate\\dataset\\trained_models\\face_encodings_custom.h5" #train model

# Define the desired size for cropped face images
cropped_image_size = (100, 100)

def load_encodings(path_dataset):
    list_encodings = []
    list_names = []

    # Store image encoding and names
    subdirs = [os.path.join(path_dataset, f) for f in os.listdir(path_dataset)]

    for subdir in subdirs:
        name = subdir.split(os.path.sep)[-1]  # get the name of the subdirectory (which is named after the person)
        images_list = [os.path.join(subdir, f) for f in os.listdir(subdir)]

        for image_path in images_list:
            try:
                img = cv2.imread(image_path)
                if img is None:
                    print("Error loading image:", image_path)
                    continue

                img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                # Detect faces in the image
                face_locations = face_recognition.face_locations(img_rgb, model="cnn")

                # If there's a face detected
                if face_locations:
                    top, right, bottom, left = face_locations[0]  # Considering only the first face found
                    face_height = bottom - top
                    face_width = right - left

                    # Check if face size is greater than 100x100 pixels
                    if face_height >= 100 and face_width >= 100:
                        face_image = img_rgb[top:bottom, left:right]  # Crop the face
                        face_image_resized = cv2.resize(face_image, cropped_image_size)  # Resize the cropped face image

                        # Save the resized cropped face image
                        cropped_face_path = os.path.join(cropped_training_path, f"{name}_{len(list_encodings)}.jpg")
                        cv2.imwrite(cropped_face_path, cv2.cvtColor(face_image_resized, cv2.COLOR_RGB2BGR))

                        # Move the original image to train_processed
                        shutil.move(image_path, os.path.join(processed_training_path, os.path.basename(image_path)))

                        # Calculate encoding
                        face_encodings = face_recognition.face_encodings(face_image_resized)
                        if face_encodings:
                            # Store file encoding and name
                            list_encodings.append(face_encodings[0])
                            list_names.append(name)

                else:
                    print("No face detected in:", image_path)
                    
            except Exception as e:
                print("Error processing image:", image_path)
                print(e)

    return list_encodings, list_names

start_time = time.time()
print("Execution started...")

# Create directories if they don't exist
os.makedirs(processed_training_path, exist_ok=True)
os.makedirs(cropped_training_path, exist_ok=True)

list_encodings, list_names = load_encodings(training_path)

# Save the encodings and names in a pickle file
encodings_data = {"encodings": list_encodings, "names": list_names}
with open(pickle_filename, "wb") as f:
    pickle.dump(encodings_data, f)

# Save the encodings and names in an H5 file
with h5py.File(h5_filename, "w") as hf:
    hf.create_dataset("encodings", data=np.array(list_encodings))
    hf.create_dataset("names", data=np.array(list_names))

print("=====================================================================")

print("Execution completed.")
print("Total execution time: {:.2f} seconds".format(time.time() - start_time))
print("=====================================================================")
