In [2]:
import cv2
import os
import random
import numpy as np
import matplotlib.pyplot as plt

In [3]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Conv2D, Dense, MaxPooling2D, Input, Flatten
import tensorflow as tf

In [4]:
# Set data paths
POS_PATH = os.path.join('data', 'positive')
NEG_PATH = os.path.join('data', 'negative')
ANC_PATH = os.path.join('data', 'anchor')

In [5]:
# # Create dirs
# os.makedirs(POS_PATH)
# os.makedirs(NEG_PATH)
# os.makedirs(ANC_PATH)

In [6]:
# # Uncompress LFW database
# # Download dataset from http://vis-www.cs.umass.edu/lfw/#download
# # Into /facial_recognition folder
# filename = 'lfw.tgz'
# !tar -xf filename   # Extract data

In [7]:
# # Put all the LFW data into the negative folder
# for directory in os.listdir('lfw'):
#     for file in os.listdir(os.path.join('lfw', directory)):
#         EX_PATH = os.path.join('lfw', directory, file)
#         NEW_PATH = os.path.join(NEG_PATH, file)
#         os.replace(EX_PATH, NEW_PATH)

In [8]:
# Import uuid to generate unique identifier
import uuid

In [9]:
# Collect data
cap = cv2.VideoCapture(0)
while cap.isOpened():
    ret, frame = cap.read()

    # Cut frame to 250x250p
    dim = 250
    x_offset = 250
    y_offset = 150
    frame = frame[y_offset:y_offset+dim, x_offset:x_offset+dim, :]

    # Collect anchors
    if cv2.waitKey(1) & 0XFF == ord('a'):
        img_name = os.path.join(ANC_PATH, f'{uuid.uuid1()}.jpg')
        cv2.imwrite(img_name, frame)

    # Collect positives
    if cv2.waitKey(1) & 0XFF == ord('p'):
        img_name = os.path.join(POS_PATH, f'{uuid.uuid1()}.jpg')
        cv2.imwrite(img_name, frame)

    cv2.imshow('Image Collection', frame)

    if cv2.waitKey(1) & 0XFF == ord('q'):
        break

# Release the webcam
cap.release()
cv2.destroyAllWindows()

### Load and preprocess images

In [88]:
# Get directories
anchor = tf.data.Dataset.list_files(ANC_PATH + os.sep + '*.jpg').take(300)
positive = tf.data.Dataset.list_files(POS_PATH + os.sep + '*.jpg').take(300)
negative = tf.data.Dataset.list_files(NEG_PATH + os.sep + '*.jpg').take(300)

In [89]:
# Scale and resize
def preprocess_image(file_path):
    """Receives a path to an img and returns a 100x100p normalized image"""
    # Read image
    raw_img = tf.io.read_file(file_path)

    # Load image
    img = tf.io.decode_jpeg(raw_img)

    # Preprocessing
    img = tf.image.resize(img, (100,100))
    
    # Normalizing
    img = img/255.0
    return img

In [90]:
# Create labeled dataset
positives = tf.data.Dataset.zip((anchor, positive, tf.data.Dataset.from_tensor_slices(tf.ones(len(anchor)))))
negatives = tf.data.Dataset.zip((anchor, negative, tf.data.Dataset.from_tensor_slices(tf.zeros(len(anchor)))))
data = positives.concatenate(negatives)
data

<ConcatenateDataset element_spec=(TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(), dtype=tf.float32, name=None))>

In [91]:
# Build train and test partition
def preprocess_twin(input_img, validation_img, label) -> tuple:
    '''Receives input and validation image with the corresponding label and returns
    a tuple containing the preprocessed input and validation image, as well as the label'''
    return (preprocess_image(input_img), preprocess_image(validation_img), label)

In [95]:
samples = data.as_numpy_iterator()

In [96]:
example = samples.next()
print(example)

(b'data\\anchor\\1f55f181-856b-11ed-8068-34e6adf636cc.jpg', b'data\\positive\\412c0753-856b-11ed-b3b0-34e6adf636cc.jpg', 1.0)


In [97]:
res = preprocess_twin(*example)

In [98]:
# Build dataloader pipeline
data = data.map(preprocess_twin)
data = data.cache()
data = data.shuffle(buffer_size=1024)

In [99]:
# Training partition
train_data = data.take(round(len(data)*.7))
train_data = train_data.batch(16)
train_data = train_data.prefetch(8)

In [103]:
# Testing partition
test_data = data.skip(round(len(data)*.7))
test_data = test_data.take(round(len(data)*0.3))
test_data = test_data.batch(16)
test_data = test_data.prefetch(8)