# Data Exploration

### Install and import dependencies

In [1]:
!pip install tensorflow==2.10.0 opencv-python matplotlib

Collecting tensorflow==2.10.0
  Downloading tensorflow-2.10.0-cp38-cp38-win_amd64.whl (455.9 MB)
     -------------------------------------- 455.9/455.9 MB 2.3 MB/s eta 0:00:00
Collecting keras<2.11,>=2.10.0
  Using cached keras-2.10.0-py2.py3-none-any.whl (1.7 MB)
Collecting flatbuffers>=2.0
  Using cached flatbuffers-22.10.26-py2.py3-none-any.whl (26 kB)
Collecting tensorboard<2.11,>=2.10
  Using cached tensorboard-2.10.1-py3-none-any.whl (5.9 MB)
Collecting tensorflow-estimator<2.11,>=2.10.0
  Using cached tensorflow_estimator-2.10.0-py2.py3-none-any.whl (438 kB)
Installing collected packages: keras, flatbuffers, tensorflow-estimator, tensorboard, tensorflow
  Attempting uninstall: keras
    Found existing installation: keras 2.9.0
    Uninstalling keras-2.9.0:
      Successfully uninstalled keras-2.9.0
  Attempting uninstall: flatbuffers
    Found existing installation: flatbuffers 1.12
    Uninstalling flatbuffers-1.12:
      Successfully uninstalled flatbuffers-1.12
  Attempting 



### Import Dependencies

In [1]:
# Import standard dependencies
import cv2
import os
import random
import numpy as np
from matplotlib import pyplot as plt

In [2]:
# Import tensorflow dependencies - Funtional API
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Conv2D, Dense, MaxPooling2D, Input, Flatten # Layer allows me to create a custom layer
import tensorflow as tf

## Create folders

In [3]:
# Set up paths
positive_path = os.path.join('data','positive')
negative_path = os.path.join('data','negative')
anchor_path = os.path.join('data','anchor')

In [5]:
# Make directories
os.makedirs(positive_path)
os.makedirs(negative_path)
os.makedirs(anchor_path)

FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'data\\positive'

## Download dataset

In [12]:
# Download from http://vis-www.cs.umass.edu/lfw/
# Uncompress Tar GZ labelled faces
!tar -xf lfw.tgz

In [15]:
# Move most of the images to the data/negative folder
for directory in os.listdir('lfw'):
    for file in os.listdir(os.path.join('lfw', directory)):
        existing_path = os.path.join('lfw', directory, file)
        new_path = os.path.join(negative_path, file)
        os.replace(existing_path, new_path)      

In [4]:
os.listdir('lfw')

['Aaron_Eckhart',
 'Aaron_Guiel',
 'Aaron_Patterson',
 'Aaron_Peirsol',
 'Aaron_Pena',
 'Aaron_Sorkin',
 'Aaron_Tippin',
 'Abbas_Kiarostami',
 'Abba_Eban',
 'Abdel_Aziz_Al-Hakim',
 'Abdel_Madi_Shabneh',
 'Abdel_Nasser_Assidi',
 'Abdoulaye_Wade',
 'Abdulaziz_Kamilov',
 'Abdullah',
 'Abdullah_Ahmad_Badawi',
 'Abdullah_al-Attiyah',
 'Abdullah_Gul',
 'Abdullah_Nasseef',
 'Abdullatif_Sener',
 'Abdul_Majeed_Shobokshi',
 'Abdul_Rahman',
 'Abel_Aguilar',
 'Abel_Pacheco',
 'Abid_Hamid_Mahmud_Al-Tikriti',
 'Abner_Martinez',
 'Abraham_Foxman',
 'Aby_Har-Even',
 'Adam_Ant',
 'Adam_Freier',
 'Adam_Herbert',
 'Adam_Kennedy',
 'Adam_Mair',
 'Adam_Rich',
 'Adam_Sandler',
 'Adam_Scott',
 'Adelina_Avila',
 'Adel_Al-Jubeir',
 'Adisai_Bodharamik',
 'Adolfo_Aguilar_Zinser',
 'Adolfo_Rodriguez_Saa',
 'Adoor_Gopalakarishnan',
 'Adriana_Lima',
 'Adriana_Perez_Navarro',
 'Adrianna_Zuzic',
 'Adrian_Annus',
 'Adrian_Fernandez',
 'Adrian_McPherson',
 'Adrian_Murrell',
 'Adrian_Nastase',
 'Adrien_Brody',
 'Afton_S

## Collect photos from various online sources

#### 1. Collected 1180 photos -- 10 photos per person (white male, white female, black male, black female, asian male, asian female)

#### 2. Removed extra faces and made sure each photo had only one face

#### 3. Cropped the photos in square

#### 4. Concatenated photos to have two faces in each photo

#### 5. Cropped faces to prepare input photos for siamese network

#####     --> The faces located close to the edge of the photo were not cropped --> removed those photos

In [None]:
# Use HOG
# Load image
loaded_image = "data/combined.jpg"

# Create a HOG face detector using the built-in dlib class
face_detector = dlib.get_frontal_face_detector()
win = dlib.image_window()

# Load the image into an array
img_array = io.imread(loaded_image)

# Run the HOG face detector on the image data.
# The result will be the bounding boxes of the faces in our image.
detected_faces = face_detector(img_array, 1)

print("I found {} faces in the file {}".format(len(detected_faces), loaded_image))

# Open a window on the desktop showing the image
win.set_image(img_array)


# Loop through each face found in the image
for i, face_rect in enumerate(detected_faces):
    # Detected faces are returned as an object with the coordinates
    # of the top, left, right and bottom edges
    print("- Face #{} found at Left: {} Top: {} Right: {} Bottom: {}".format(i, face_rect.left(), face_rect.top(),
                                                                             face_rect.right(), face_rect.bottom()))

    # Draw a box around each face we found
    win.add_overlay(face_rect)
    
    # Crop image and save
    img = cv2.imread(loaded_image)
    crop_image = img[face_rect.top():face_rect.bottom(), face_rect.left():face_rect.right()]
    cv2.imwrite("data/cropped{}.jpg".format(i),crop_image)
    
for i in range(2):
    image = Image.open('data/cropped{}.jpg'.format(i))
    new_img = image.resize((224,224))
    new_img.save('data/cropped{}_224.jpg'.format(i))


#### 6. Used the photos downloaded from http://vis-www.cs.umass.edu/lfw/ as negative and photos collected from various online sources as positive