In [1]:
import os
import random
import cv2
import matplotlib.pyplot as plt
import tarfile
import numpy as np
from urllib import request


base_path = "dataset"

if not os.path.isdir(base_path):
  os.makedirs(base_path)


dataset_tar_path = os.path.join(base_path,"vgg_face_dataset.tar.gz")

if not os.path.isfile(dataset_tar_path):
  vgg_face_dataset_url = "http://www.robots.ox.ac.uk/~vgg/data/vgg_face/vgg_face_dataset.tar.gz"
  
  with request.urlopen(vgg_face_dataset_url) as r, open(os.path.join(base_path, "vgg_face_dataset.tar.gz"), 'wb') as f:
    f.write(r.read())

  with tarfile.open(os.path.join(base_path, "vgg_face_dataset.tar.gz")) as f:
    f.extractall(os.path.join(base_path))

# check if the haarcascade file exists
if not os.path.isfile(os.path.join(base_path, "haarcascade_frontalface_default.xml")):
  
  trained_haarcascade_url = "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml"

  with request.urlopen(trained_haarcascade_url) as r, open(os.path.join(base_path, "haarcascade_frontalface_default.xml"), 'wb') as f:
      f.write(r.read())




In [2]:
def display_images(image):
    for img in image:
        # plt.figure(figsize=(1, 1))
        #   plt.subplot(1, len(images), i + 1)
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        plt.axis("off")
        plt.show()


def get_celeb_txt_file(celeb_name):
    return [subject for subject in sorted(os.listdir(os.path.join(base_path, "vgg_face_dataset", "files"))) if subject.startswith(celeb_name) and subject.endswith(".txt")]


def get_images(subject, nb_images):
    with open(os.path.join(base_path, "vgg_face_dataset", "files", subject), 'r') as f:
        lines = f.readlines()

    images_ = []
    for line in lines:
        url = line[line.find("http://"): line.find(".jpg") + 4]
        try:
            res = request.urlopen(url)
            img = np.asarray(bytearray(res.read()), dtype="uint8")
            img = cv2.imdecode(img, cv2.IMREAD_COLOR)
            images_.append(img)
        except:
            pass

        if len(images_) == nb_images:
            break

    print("Number of images found: ", len(images_))
    return images_

def save_image_to_path(image, path):
    for img in image:
        # plt.figure(figsize=(1, 1))
        #   plt.subplot(1, len(images), i + 1)
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        plt.axis("off")
        plt.savefig(path)

In [3]:
images_folder = os.path.join("images") 

ryan_reynolds = get_celeb_txt_file("Ryan_Reynolds")
bruno_mars = get_celeb_txt_file("Bruno_Mars")
tyler_perry = get_celeb_txt_file("Tyler_Perry")
tamara_taylor = get_celeb_txt_file("Tamara_Taylor")

ryan_reynolds_images = []
bruno_mars_images = []
tyler_perry_images = []
tamara_taylor_images = []

nb_images = 20
if not os.path.isdir(os.path.join(images_folder, "person_a")):
    training_folder = os.path.join(images_folder, "person_a/training")
    os.makedirs(os.path.join(images_folder, "person_a/training"))
    ryan_reynolds_images = get_images(ryan_reynolds[0], nb_images*2)
    save_image_to_path(ryan_reynolds_images, os.path.join(training_folder, "person_a/training"))

if not os.path.isdir(os.path.join(images_folder, "person_b")):
    training_folder = os.path.join(images_folder, "person_b/training")
    os.makedirs(os.path.join(images_folder, "person_b/training"))
    bruno_mars_images = get_images(bruno_mars[0], nb_images*2)
    save_image_to_path(bruno_mars_images, os.path.join(training_folder, "person_b/training"))
    
    
if not os.path.isdir(os.path.join(images_folder, "person_c")):
    training_folder = os.path.join(images_folder, "person_c/training")
    os.makedirs(os.path.join(images_folder, "person_c/training"))
    tyler_perry_images = get_images(tyler_perry[0], nb_images*2)
    save_image_to_path(tyler_perry_images, os.path.join(training_folder, "person_c/training"))
    

if not os.path.isdir(os.path.join(images_folder, "person_d")):
    training_folder = os.path.join(images_folder, "person_d/training")
    os.makedirs(os.path.join(images_folder, "person_d/training"))
    tamara_taylor_images = get_images(tamara_taylor[0], nb_images*2)
    save_image_to_path(tamara_taylor_images, os.path.join(training_folder, "person_d/training"))


In [None]:
import matplotlib.pyplot as plt

faceCascade = cv2.CascadeClassifier(os.path.join(base_path, "haarcascade_frontalface_default.xml"))

for images_ in images:

    for img in images_:

        img_ = img.copy()
        img_gray = cv2.cvtColor(img_, cv2.COLOR_BGR2GRAY)
        faces = faceCascade.detectMultiScale(
            img_gray,
            scaleFactor=1.2,
            minNeighbors=5,
            minSize=(30, 30),
            flags=cv2.CASCADE_SCALE_IMAGE
        )
        print("Found {} face(s)!".format(len(faces)))

        for (x, y, w, h) in faces:
            cv2.rectangle(img_, (x, y), (x+w, y+h), (0, 255, 0), 10)

            # Convert BGR image to RGB
            img_rgb = cv2.cvtColor(img_, cv2.COLOR_BGR2RGB)

            # Display the image with faces using matplotlib
            
            plt.figure(figsize=(1, 1))
            plt.imshow(img_rgb)
            plt.axis('off')  # Turn off axis labels
            plt.show()

NameError: name 'images' is not defined