## Basic Configuration

### Installing dependencies

In [1]:
# !pip install tensorflow opencv-python matplotlib

### Importing dependencies

In [2]:
import cv2
import os
import random
import numpy as np
from matplotlib import pyplot as plt

In [3]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Conv2D, Dense, MaxPooling2D, Input, Flatten
import tensorflow as tf

### Unzipping dataset

In [4]:
# !tar -xf lfw.tgz

### Creating directories

In [5]:
POS_PATH = os.path.join('data', 'positive')
NEG_PATH = os.path.join('data', 'negative')

In [6]:
# os.makedirs(POS_PATH)
# os.makedirs(NEG_PATH)

### Moving images into directories

In [7]:
# total_dirs = 0
# single_img = 0
# for directory in os.listdir('lfw'):
#     if(len(os.listdir(os.path.join('lfw', directory))))==1:
#         single_img+=1
#     total_dirs+=1
# print(f"TOTAL DIRS: {total_dirs}")
# print(f"SINGLE IMG: {single_img}")
# print(f"NON SINGLE IMG:{total_dirs - single_img}")

In [8]:
# # renaming an image in a directory with multiple images to anchor
# for directory in os.listdir('lfw'):
#     if len(os.listdir(os.path.join('lfw', directory)))>1:
#         for i, file in enumerate(os.listdir(os.path.join('lfw', directory))):
#             if i==0:
#                 os.rename(os.path.join('lfw', directory, file), os.path.join('lfw', directory, file.replace('0001', 'Anchor')))

In [9]:
# # moving positive images
# for directory in os.listdir('lfw'):
#     if len(os.listdir(os.path.join('lfw', directory)))>1:
#         os.makedirs(os.path.join(POS_PATH, directory))
#         for file in os.listdir(os.path.join('lfw', directory)):
#             PREV_PATH = os.path.join('lfw', directory, file)
#             NEW_PATH = os.path.join(POS_PATH, directory, file)
#             os.replace(PREV_PATH, NEW_PATH)

In [10]:
# # moving negative images
# for directory in os.listdir('lfw'):
#     if len(os.listdir(os.path.join('lfw', directory)))>0 and len(os.listdir(os.path.join('lfw', directory)))<2:
#         for file in os.listdir(os.path.join('lfw', directory)):
#             PREV_PATH = os.path.join('lfw', directory, file)
#             NEW_PATH = os.path.join(NEG_PATH, file)
#             os.replace(PREV_PATH, NEW_PATH)

## Preprocess

### Getting images

In [178]:
negative = tf.data.Dataset.list_files(NEG_PATH+'\*.jpg').take(300)
anchor = tf.data.Dataset.list_files(POS_PATH+'\*\*_Anchor.jpg').take(300)

In [180]:
def get_positive_paths(anchor_dir_str):
    # List all files in the directory and exclude Anchor.jpg
    anchor_path_dir = '\\'.join(anchor_dir_str.split("\\")[:-1])
    img_names_numbers = [
        file.split('_')[-1].replace('.jpg', '') 
        for file in os.listdir(anchor_path_dir) 
        if file.endswith('.jpg') and 'Anchor' not in file
    ]
    return img_names_numbers

In [181]:
def map_anchor_to_positive(anchor_path):
    # Convert tensor to numpy to get directory string outside of TensorFlow operations
    anchor_dir = tf.strings.split(anchor_path, os.sep)[-2]
    
    # Use `tf.numpy_function` to convert tensor to numpy string
    def process_path(anchor_path_numpy):
        anchor_dir_str = anchor_path_numpy.decode('utf-8')
        img_names_numbers = get_positive_paths(anchor_dir_str)
        random_img_number = random.choice(img_names_numbers)
        positive_path = anchor_path_numpy.replace(b'Anchor.jpg', f'{random_img_number}.jpg'.encode())
        return anchor_path_numpy, positive_path
    
    # Use `tf.numpy_function` to apply the custom processing function
    anchor_path = tf.numpy_function(process_path, [anchor_path], [tf.string, tf.string])
    return anchor_path

In [182]:
def preprocess(file_path):
#     read image from file path
    byte_img = tf.io.read_file(file_path)
#     load in the image
    img = tf.io.decode_jpeg(byte_img)
#     preprocess
    img = tf.image.resize(img, (100, 100))
#     scale image
    img = img / 255.0
    return img

In [183]:
anchor_positive_pairs = anchor.map(map_anchor_to_positive)

In [184]:
positives = tf.data.Dataset.zip((anchor_positive_pairs, tf.data.Dataset.from_tensor_slices(tf.ones(len(anchor)))))
negatives = tf.data.Dataset.zip(((anchor, negative), tf.data.Dataset.from_tensor_slices(tf.zeros(len(anchor)))))

In [185]:
data = positives.concatenate(negatives)

In [186]:
samples = data.as_numpy_iterator()

In [203]:
list((negatives.take(5)))

[((<tf.Tensor: shape=(), dtype=string, numpy=b'data\\positive\\Ron_Dittemore\\Ron_Dittemore_Anchor.jpg'>,
   <tf.Tensor: shape=(), dtype=string, numpy=b'data\\negative\\Larry_Flynt_0001.jpg'>),
  <tf.Tensor: shape=(), dtype=float32, numpy=0.0>),
 ((<tf.Tensor: shape=(), dtype=string, numpy=b'data\\positive\\George_Robertson\\George_Robertson_Anchor.jpg'>,
   <tf.Tensor: shape=(), dtype=string, numpy=b'data\\negative\\Newton_Carlton_Slawson_0001.jpg'>),
  <tf.Tensor: shape=(), dtype=float32, numpy=0.0>),
 ((<tf.Tensor: shape=(), dtype=string, numpy=b'data\\positive\\Nan_Wang\\Nan_Wang_Anchor.jpg'>,
   <tf.Tensor: shape=(), dtype=string, numpy=b'data\\negative\\Sureyya_Ayhan_0001.jpg'>),
  <tf.Tensor: shape=(), dtype=float32, numpy=0.0>),
 ((<tf.Tensor: shape=(), dtype=string, numpy=b'data\\positive\\Tom_Cruise\\Tom_Cruise_Anchor.jpg'>,
   <tf.Tensor: shape=(), dtype=string, numpy=b'data\\negative\\Ed_Wade_0001.jpg'>),
  <tf.Tensor: shape=(), dtype=float32, numpy=0.0>),
 ((<tf.Tensor: sh