In [None]:
import cv2
import numpy as np
from PIL import Image
from pathlib import Path
import os
import pandas as pd

#Code taken from mostly here: https://github.com/aakashjhawar/face-recognition-using-deep-learning/blob/master/recognize_image.py
def face_detect(image_path, net):
  try:
      with Image.open(image_path) as img:
          original_image = img.convert("RGB")
          original_image = np.array(original_image)
  except Exception as e:
      print(f"Error loading image {image_path} with Pillow: {e}")
      return

  (h, w) = original_image.shape[:2]
  image_bgr = cv2.cvtColor(original_image, cv2.COLOR_RGB2BGR)
  blob = cv2.dnn.blobFromImage(cv2.resize(image_bgr, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0))
  net.setInput(blob)
  detections = net.forward()

  for i in range(0, detections.shape[2]):
    confidence = detections[0, 0, i, 2]
    if confidence > 0.5:
        box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
        (startX, startY, endX, endY) = box.astype("int")
        cropped_face = image_bgr[startY:endY, startX:endX]
        resized_cropped_face = cv2.resize(cropped_face, (224, 224), interpolation=cv2.INTER_AREA)
        resized_cropped_face_rgb = cv2.cvtColor(resized_cropped_face, cv2.COLOR_BGR2RGB)
        return resized_cropped_face_rgb

  resized_image_bgr = cv2.resize(image_bgr, (224, 224), interpolation=cv2.INTER_AREA)
  resized_image_rgb = cv2.cvtColor(resized_image_bgr, cv2.COLOR_BGR2RGB)
  return resized_image_rgb

In [None]:
import cv2
import numpy as np
from PIL import Image
from pathlib import Path
import os

def preprocess_test_dataset(img_dir, output_dir, dnn_proto_path, dnn_model_path):
    net = cv2.dnn.readNetFromCaffe(dnn_proto_path, dnn_model_path)
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    img_names = [img_name for img_name in os.listdir(img_dir) if img_name.endswith('.jpg')]
    img_names_sorted = sorted(img_names, key=lambda x: int(os.path.splitext(x)[0]))
    for img_name in img_names_sorted:
        img_path = os.path.join(img_dir, img_name)
        processed_image = face_detect(img_path, net)
        if processed_image is not None:
            processed_image_pil = Image.fromarray(processed_image)
            output_path = os.path.join(output_dir, img_name)
            processed_image_pil.save(output_path)
        else:
            print(f"Skipping {img_path} due to processing error.")

img_dir = 'drive/MyDrive/test'
output_dir = 'drive/MyDrive/test_preprocessed'
dnn_proto_path = 'drive/MyDrive/deploy.prototxt'
dnn_model_path = 'drive/MyDrive/res10_300x300_ssd_iter_140000.caffemodel'

preprocess_test_dataset(img_dir, output_dir, dnn_proto_path, dnn_model_path)

In [None]:
def preprocess_dataset(annotations_file, img_dir, output_dir, dnn_proto_path, dnn_model_path):
    img_labels = pd.read_csv(annotations_file)
    net = cv2.dnn.readNetFromCaffe(dnn_proto_path, dnn_model_path)
    Path(output_dir).mkdir(parents=True, exist_ok=True)

    for index, row in img_labels.iterrows():
      img_path = os.path.join(img_dir, row['File Name'])
      processed_image = face_detect(img_path, net)
      if processed_image is not None:
          processed_image_pil = Image.fromarray(processed_image)
          output_path = os.path.join(output_dir, row['File Name'])
          processed_image_pil.save(output_path)
      else:
          print(f"Skipping {img_path} due to processing error.")

annotations_file = 'drive/MyDrive/train.csv'
img_dir = 'drive/MyDrive/train'
output_dir = 'drive/MyDrive/train_preprocessed'
dnn_proto_path = 'drive/MyDrive/deploy.prototxt'
dnn_model_path = 'drive/MyDrive/res10_300x300_ssd_iter_140000.caffemodel'
preprocess_dataset(annotations_file, img_dir, output_dir, dnn_proto_path, dnn_model_path)