# Part B: Entertainment

In [1]:
# 1. Read/import images from the folder ‘training_images’
import cv2
import os
import zipfile

# Define the path to the ZIP file and the extraction directory
zip_path = 'training_images-20211126T092819Z-001.zip'
extract_path = 'training_images'

# Check if the extraction directory exists; if not, extract the ZIP file
if not os.path.exists(extract_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

# Get a list of image file extensions to filter the files
image_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.tif', '.tiff')

# Read images from the extracted directory
training_images = []
for f in os.listdir("training_images/training_images"):
    if f.lower().endswith(image_extensions):
        img_path = os.path.join("training_images/training_images", f)
        img = cv2.imread(img_path)
        if img is not None:
            training_images.append(img)
        else:
            print(f"Warning: Unable to read image {img_path}")

In [2]:
# 2. Write a loop to detect faces using Haar cascade
# Load the Haar Cascade classifier for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Initialize a list to store the detected faces
faces_data = []

# Loop through each image in the training_images list
for img in training_images:
    # Convert the image to grayscale
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # Detect faces in the image
    faces = face_cascade.detectMultiScale(gray_img, scaleFactor=1.1, minNeighbors=5)
    # Append the detected faces to the faces_data list
    faces_data.append(faces)

In [3]:
# 3. Extract metadata of faces and write into DataFrame
import pandas as pd
metadata = []

for i, faces in enumerate(faces_data):
    for (x, y, w, h) in faces:
        metadata.append([i, x, y, w, h])

metadata_df = pd.DataFrame(metadata, columns=['Image_Index', 'X', 'Y', 'Width', 'Height'])
metadata_df.head()

Unnamed: 0,Image_Index,X,Y,Width,Height
0,0,146,140,428,428
1,1,80,140,436,436
2,2,33,121,362,362
3,3,134,173,356,356
4,5,61,62,472,472


In [4]:
# 4. Save the DataFrame as a CSV file
metadata_df.to_csv('face_metadata.csv', index=False)