In [None]:
import cv2  
import os  
import numpy as np  
from PIL import Image  
import matplotlib.pyplot as plt  

# define the paths to the input and output folders
input_folder = "/Users/julia/Documents/project_code/input_folder/archive/facesmin3/lfw-deepfunneled"  # source of images
output_folder = "/Users/julia/Documents/project_code/output_folder"  # destination for preprocessed images
image_width, image_height = 224, 224  # target size for resizing the images

# specify the path to the haar cascade file for face detection
cascade_path = "/System/Volumes/Data/opt/anaconda3/envs/myenv/share/opencv4/haarcascades/haarcascade_frontalface_default.xml"
facecascade = cv2.CascadeClassifier(cascade_path)  # load the face detection model

# check if the cascade file was loaded successfully
if facecascade.empty():
    print("error loading cascade file. please check the path.")
else:
    print("cascade loaded successfully.")

# iterate over all images in the input folder
for root, _, files in os.walk(input_folder):
    for file in files:
        if file.endswith(("png", "jpg", "jpeg")):  # check for image files
            path = os.path.join(root, file)  # get the full path to the image
            imgtest = cv2.imread(path)  # read the image

            if imgtest is None:
                print(f"could not read image: {path}")  # handle unreadable images
                continue

            # detect faces in the image
            faces = facecascade.detectMultiScale(imgtest, scaleFactor=1.1, minNeighbors=5)
            print(f"detected {len(faces)} face(s) in {file}.")

            for (x, y, w, h) in faces:
                # extract and resize the detected face
                roi = imgtest[y:y + h, x:x + w]  # region of interest (the face)
                resized_image = cv2.resize(roi, (image_width, image_height))  # resize the face

                # create the same directory structure in the output folder
                save_dir = os.path.join(output_folder, os.path.basename(root))
                os.makedirs(save_dir, exist_ok=True)  # create directories if they don't exist

                # save the preprocessed image to the output folder
                save_path = os.path.join(save_dir, file)
                im = Image.fromarray(resized_image)
                im.save(save_path)
                print(f"saved preprocessed image: {save_path}")



In [3]:
# data_root = Path('/Users/julia/tensorflow-env/env/train/')
# 
# total = 0
# for sub_dir in data_root.iterdir():
#     try:
#         # check if sub_dir is a directory
#         isdir = os.path.isdir(sub_dir)
#         count = len(list(sub_dir.iterdir()))
#         total += count
#         print(f'{sub_dir.name}: {count}')
#     except Exception as e:
#         print(f"error processing {sub_dir}: {e}")
# print(f'total: {total}')



In [None]:
# counts images in the test folder
from pathlib import Path

data_root = Path("/Users/julia/Documents/project_code/input_folder/archive/facesmin3/lfw-deepfunneled")

total = 0
for sub_dir in data_root.iterdir():
    try:
        # check if sub_dir is a directory
        count = len(list(sub_dir.iterdir()))
        total += count
        print(f'{sub_dir.name}: {count}')
    except Exception as e:
        print(f"error processing {sub_dir}: {e}")
print(f'total: {total}')


In [None]:
# deletes subdirectories with fewer than 1 image
from send2trash import send2trash

data_root = Path("/Users/julia/Documents/project_code/input_folder/archive/facesmin3/lfw-deepfunneled")

total = 0
for sub_dir in data_root.iterdir():
    try:
        # check if sub_dir is a directory
        count = len(list(sub_dir.iterdir()))
        total += count
        print(f'{sub_dir.name}: {count}')
        # if count is less than 1, delete the folder
        if count < 1:
            send2trash(sub_dir)
            print(f'{sub_dir} : folder removed') 
    except Exception as e:
        print(f"error processing {sub_dir}: {e}")
print(f'total: {total}')


In [None]:
# counts images in the test folder 
data_root = Path("/Users/julia/Documents/project_code/input_folder/archive/facesmin3/lfw-deepfunneled")

total = 0  
for sub_dir in data_root.iterdir():
    try:
        # check if sub_dir is a directory
        count = len(list(sub_dir.iterdir()))
        total += count
        print(f'{sub_dir.name}: {count}')
    except Exception as e:
        print(f"error processing {sub_dir}: {e}")
print(f'total: {total}')

