In [90]:
import os
import random
import cv2
import matplotlib.pyplot as plt
import tarfile
import numpy as np
from urllib import request


base_path = "dataset"

if not os.path.isdir(base_path):
  os.makedirs(base_path)


dataset_tar_path = os.path.join(base_path,"vgg_face_dataset.tar.gz")

if not os.path.isfile(dataset_tar_path):
  vgg_face_dataset_url = "http://www.robots.ox.ac.uk/~vgg/data/vgg_face/vgg_face_dataset.tar.gz"
  
  with request.urlopen(vgg_face_dataset_url) as r, open(os.path.join(base_path, "vgg_face_dataset.tar.gz"), 'wb') as f:
    f.write(r.read())

  with tarfile.open(os.path.join(base_path, "vgg_face_dataset.tar.gz")) as f:
    f.extractall(os.path.join(base_path))

# check if the haarcascade file exists
if not os.path.isfile(os.path.join(base_path, "haarcascade_frontalface_default.xml")):
  
  trained_haarcascade_url = "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml"

  with request.urlopen(trained_haarcascade_url) as r, open(os.path.join(base_path, "haarcascade_frontalface_default.xml"), 'wb') as f:
      f.write(r.read())




In [91]:
def display_images(image):
    for img in image:
        # plt.figure(figsize=(1, 1))
        #   plt.subplot(1, len(images), i + 1)
        try:
            plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            plt.axis("off")
            plt.show()
        except:
            pass


def get_celeb_txt_file(celeb_name):
    return [subject for subject in sorted(os.listdir(os.path.join(base_path, "vgg_face_dataset", "files"))) if subject.startswith(celeb_name) and subject.endswith(".txt")]


def get_images(subject, nb_images):
    with open(os.path.join(base_path, "vgg_face_dataset", "files", subject), 'r') as f:
        lines = f.readlines()

    images_ = []
    for line in lines:
        url = line[line.find("http://"): line.find(".jpg") + 4]
        try:
            res = request.urlopen(url)
            img = np.asarray(bytearray(res.read()), dtype="uint8")
            img = cv2.imdecode(img, cv2.IMREAD_COLOR)
            images_.append(img)
        except:
            pass

        if len(images_) == nb_images:
            break

    print("Number of images found: ", len(images_))
    return images_


def save_images_to_path(images, folder_path, person_name):
    os.makedirs(folder_path, exist_ok=True)

    for i, img in enumerate(images):
        try:
            # Create a unique filename for each image
            image_path = os.path.join(folder_path, f"{person_name}_{i}.jpg")

            plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            plt.axis("off")
            plt.savefig(image_path)
            plt.close()  # Close the current figure to avoid memory issues
        except Exception as e:
            print(f"Error saving image {i}: {e}")
            

def download_and_save_person_images(person_name, celeb_txt, images_folder, nb_images=20):
    person_folder = os.path.join(images_folder, person_name)
    
    if not os.path.isdir(person_folder):
        training_folder = os.path.join(person_folder, "training")
        os.makedirs(training_folder, exist_ok=True)
        
        person_images = get_images(celeb_txt, nb_images)
        save_images_to_path(person_images, training_folder, person_name)
        return person_images
    
    # go though the training folder and return the images
    images = []
    for image in os.listdir(os.path.join(person_folder, "training")):
        img = cv2.imread(os.path.join(person_folder, "training", person_name, image))
        images.append(img)
    return images


def create_testing_sets():
    for person in os.listdir(images_folder):
        person_folder = os.path.join(images_folder, person)
        training_folder = os.path.join(person_folder, "training")
        test_folder = os.path.join(person_folder, "testing")
        
        if not os.path.isdir(test_folder):
            os.makedirs(test_folder, exist_ok=True)
            
            for image in random.sample(os.listdir(training_folder), nb_test_images):
                image_path = os.path.join(training_folder, image)
                os.rename(image_path, os.path.join(test_folder, image))



**Ryan Reynolds:**

*Male:* Variations in facial expressions, hairstyles, however his scruffy 5 o'clock shadow is pretty much always a part of his appearance, his larger than average forehead is also a key focus point, his point upside down triangle head also made him a person of interest. 

**Ryan Phillippe:**

*Male:* American actor with a unique and recognizable facial structure. Explore images that highlight different expressions and angles to capture his distinct appearance.

**Regina Hall:**

*Female:* African American actress with a dynamic and engaging presence. Emphasize diversity in hairstyles, makeup, and expressions to showcase the versatility of her appearance.

**Tamara Taylor:**

*Female:* Canadian actress known for her captivating looks. Highlight different aspects of her appearance, including expressions and roles that showcase her versatility.

**Ryan Reynolds and Ryan Phillippe (Persons A and C):**

Shared Characteristics: Both are male, have a similar facial structure, and share some genetic features

**Regina Hall and Tamara Taylor (Persons B and D):**

Shared Characteristics: Both are light skinned black women, with similar facial features and relatively similar hair styles 

In [92]:
images_folder = os.path.join("images") 

ryan_reynolds = get_celeb_txt_file("Ryan_Reynolds")
regina_hall = get_celeb_txt_file("Regina_Hall")
ryan_phillippe = get_celeb_txt_file("Ryan_Phillippe")
tamara_taylor = get_celeb_txt_file("Tamara_Taylor")


person_a_images = []
person_b_images = []
person_c_images = []
person_d_images = []


nb_images = 40
nb_test_images = 10

person_a_images = download_and_save_person_images("person_a", ryan_reynolds[0], images_folder, nb_images)
person_b_images = download_and_save_person_images("person_b", regina_hall[0], images_folder, nb_images)
person_c_images = download_and_save_person_images("person_c", ryan_phillippe[0], images_folder, nb_images)
person_d_images = download_and_save_person_images("person_d", tamara_taylor[0], images_folder, nb_images)


create_testing_sets()


In [93]:
# display_images(person_a_images)

In [94]:
def find_faces(images):
    faceCascade = cv2.CascadeClassifier(os.path.join(base_path, "haarcascade_frontalface_default.xml"))
    faces = []
    for img in images:
        img_ = img.copy()
        img_gray = cv2.cvtColor(img_, cv2.COLOR_BGR2GRAY)
        faces_ = faceCascade.detectMultiScale(
            img_gray,
            scaleFactor=1.2,
            minNeighbors=5,
            minSize=(30, 30),
            flags=cv2.CASCADE_SCALE_IMAGE
        )
        faces.append(faces_)
        
    print("Found {} face(s)!".format(len(faces)))
    return faces

In [95]:
from keras.applications.vgg16 import VGG16
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.callbacks import ModelCheckpoint, EarlyStopping


# Load the pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=True, input_shape=(224, 224, 3))


In [96]:
def get_image_file_paths(base_path):
    training_image_paths = []
    testing_image_paths = []

    training_folder = os.path.join(base_path, "training")
    testing_folder = os.path.join(base_path, "testing")
    
    for image in os.listdir(training_folder):
        training_image_paths.append(os.path.join(training_folder, image).replace("\\", "/"))
        
    for image in os.listdir(testing_folder):
        testing_image_paths.append(os.path.join(testing_folder, image).replace("\\", "/"))

    return {"training": training_image_paths, "testing": testing_image_paths}



    

In [97]:


person_a_images_base_path = "images/person_a/"
person_b_images_base_path = "images/person_b/"
person_c_images_base_path = "images/person_c/"
person_d_images_base_path = "images/person_d/"

# def extract_features(image_paths, label):
#     X = []
#     y = []

#     for img_path in image_paths:
#         try:
#             img = image.load_img(img_path, target_size=(224, 224))
#             img_array = image.img_to_array(img)
#             img_array = preprocess_input(img_array)
#             features = base_model.predict(img_array.reshape(1, 224, 224, 3))
#             X.append(features.flatten())
#             y.append(label)
#         except Exception as e:
#             print(f"Error: {e}")
#     return X, y
# person_a_features = extract_features(get_image_file_paths(person_a_images_base_path, nb_images), "person_a")

# X_train, X_test, y_train, y_test = train_test_split(person_a_features['X'], person_a_features['y'], test_size=0.2, random_state=42)


In [98]:
from shutil import copyfile


person_a_images_file_paths = get_image_file_paths(person_a_images_base_path)
person_b_images_file_paths = get_image_file_paths(person_b_images_base_path)
person_c_images_file_paths = get_image_file_paths(person_c_images_base_path)
person_d_images_file_paths = get_image_file_paths(person_d_images_base_path)

tmp_train_folder = os.path.join(base_path, "tmp_train");
tmp_test_folder = os.path.join(base_path, "tmp_test");

def initialize_temp_training_testing_folder(file_paths):
    if not os.path.isdir(tmp_train_folder):
        os.mkdir(tmp_train_folder)
    
    if not os.path.isdir(tmp_test_folder):
        os.mkdir(tmp_test_folder)
        
    
    # if the file oath contains train then copy to tmp_train_folder else copy to tmp_test_folder
    for image_path in file_paths["training"]:
        person_name = image_path.split("/")[1]
        if not os.path.isdir(os.path.join(tmp_train_folder, person_name)):
            os.mkdir(os.path.join(tmp_train_folder, person_name))
            destination_folder = os.path.join(tmp_train_folder, person_name)
            
        # check if the file exists in the tmp_test_folder
        if not os.path.isfile(os.path.join(tmp_test_folder, person_name, os.path.basename(image_path))):
            copyfile(image_path, os.path.join(destination_folder, os.path.basename(image_path)))
    
    for image_path in file_paths["testing"]:
        person_name = image_path.split("/")[1]
        if not os.path.isdir(os.path.join(tmp_test_folder, person_name)):
            os.mkdir(os.path.join(tmp_test_folder, person_name))
            destination_folder = os.path.join(tmp_test_folder, person_name)
            
        # check if the file exists in the tmp_train_folder
        if not os.path.isfile(os.path.join(tmp_train_folder, person_name, os.path.basename(image_path))):
            copyfile(image_path, os.path.join(destination_folder, os.path.basename(image_path)))
    


all_image_file_paths = {
    'training': person_a_images_file_paths['training'] + person_b_images_file_paths['training'] + person_c_images_file_paths['training'] + person_d_images_file_paths['training'],
    'testing': person_a_images_file_paths['testing'] + person_b_images_file_paths['testing' ] + person_c_images_file_paths['testing'] + person_d_images_file_paths['testing']
}


initialize_temp_training_testing_folder(all_image_file_paths)



trData = ImageDataGenerator()
train_data = trData.flow_from_directory(directory=tmp_train_folder, target_size=(224, 224))
tsData = ImageDataGenerator()
test_data = tsData.flow_from_directory(directory=tmp_test_folder, target_size=(224, 224)) 


{'training': ['images/person_a/training/person_a_0.jpg', 'images/person_a/training/person_a_1.jpg', 'images/person_a/training/person_a_10.jpg', 'images/person_a/training/person_a_11.jpg', 'images/person_a/training/person_a_12.jpg', 'images/person_a/training/person_a_13.jpg', 'images/person_a/training/person_a_15.jpg', 'images/person_a/training/person_a_19.jpg', 'images/person_a/training/person_a_2.jpg', 'images/person_a/training/person_a_20.jpg', 'images/person_a/training/person_a_22.jpg', 'images/person_a/training/person_a_23.jpg', 'images/person_a/training/person_a_24.jpg', 'images/person_a/training/person_a_26.jpg', 'images/person_a/training/person_a_27.jpg', 'images/person_a/training/person_a_28.jpg', 'images/person_a/training/person_a_3.jpg', 'images/person_a/training/person_a_30.jpg', 'images/person_a/training/person_a_32.jpg', 'images/person_a/training/person_a_34.jpg', 'images/person_a/training/person_a_35.jpg', 'images/person_a/training/person_a_36.jpg', 'images/person_a/train

In [99]:
checkpoint = ModelCheckpoint("vgg16_1.h5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', save_freq=1)
early_stop = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=1, mode='auto')

callbacks = [checkpoint, early_stop]
base_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


# base_model.fit_generator(train_data, steps_per_epoch=100, epochs=10, validation_data=test_data, validation_steps=10, callbacks=callbacks)
# base_model.save("vgg16_1.h5")



In [100]:
# # Compile and train the model
# model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# model.fit(np.array(person_a_features[0]), np.array(person_a_features[1]), epochs=10, batch_size=32)


In [101]:
# Evaluate The Model

