# 1.Setup

### 1.1 Installing Dependencies

In [None]:
!pip install tensorflow opencv-python matplotlib

### 1.2 collect Images Using opencv

In [1]:
import cv2
import os
import random
import numpy as np
from matplotlib import pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Conv2D, Dense, MaxPooling2D, Input, Flatten
import tensorflow as tf
import uuid


### 1.4 Create Folder Structures

In [2]:
# Setup paths
POS_PATH = os.path.join('data', 'positive')
NEG_PATH = os.path.join('data', 'negative')
ANC_PATH = os.path.join('data', 'anchor')

In [3]:
# Make the directories
os.makedirs(POS_PATH, exist_ok=True)
os.makedirs(NEG_PATH, exist_ok=True)
os.makedirs(ANC_PATH, exist_ok=True)

# 2. Collect Positives and Anchors

### 2.1 Untar Labelled Faces in the Wild Dataset

In [None]:
# http://vis-www.cs.umass.edu/lfw/

In [11]:
# Uncompress Tar GZ Labelled Faces in the Wild Dataset
!tar -xf lfw.tgz

In [12]:
# Move LFW Images to the following repository data/negative
for directory in os.listdir('lfw'):
    for file in os.listdir(os.path.join('lfw', directory)):
        EX_PATH = os.path.join('lfw', directory, file)
        NEW_PATH = os.path.join(NEG_PATH, file)
        os.replace(EX_PATH, NEW_PATH)

### 2.2 Collect Positive and Anchor Classes

In [4]:
# Import uuid library to generate unique image names
import uuid

In [5]:
os.path.join(ANC_PATH, '{}.jpg'.format(uuid.uuid1()))

'data\\anchor\\fff8308c-376d-11ef-a5f8-5414f35162e7.jpg'

In [6]:
import uuid

# Function to capture images with names
def capture_images(label):
    cap = cv2.VideoCapture(0)
    while cap.isOpened(): 
        ret, frame = cap.read()
        frame = frame[120:120+250, 200:200+250, :]
        
        if cv2.waitKey(1) & 0xFF == ord('a'):
            imgname = os.path.join(ANC_PATH, '{}_{}.jpg'.format(label, uuid.uuid1()))
            cv2.imwrite(imgname, frame)
        
        if cv2.waitKey(1) & 0xFF == ord('p'):
            imgname = os.path.join(POS_PATH, '{}_{}.jpg'.format(label, uuid.uuid1()))
            cv2.imwrite(imgname, frame)
        
        cv2.imshow('Image Collection', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()



In [25]:
# Example usage: capture images for a person named "John"
capture_images("Mahesh")

### 2.3 NEW - Data Augmentation

In [27]:
def data_aug(img):
    data = []
    for i in range(9):
        img = tf.image.stateless_random_brightness(img, max_delta=0.02, seed=(1,2))
        img = tf.image.stateless_random_contrast(img, lower=0.6, upper=1, seed=(1,3))
        # img = tf.image.stateless_random_crop(img, size=(20,20,3), seed=(1,2))
        img = tf.image.stateless_random_flip_left_right(img, seed=(np.random.randint(100),np.random.randint(100)))
        img = tf.image.stateless_random_jpeg_quality(img, min_jpeg_quality=90, max_jpeg_quality=100, seed=(np.random.randint(100),np.random.randint(100)))
        img = tf.image.stateless_random_saturation(img, lower=0.9,upper=1, seed=(np.random.randint(100),np.random.randint(100)))
            
        data.append(img)
    
    return data

In [28]:
# Process images in ANC_PATH
for file_name in os.listdir(ANC_PATH):
    img_path = os.path.join(ANC_PATH, file_name)
    img = cv2.imread(img_path)
    
    if img is not None:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert image to RGB format
        img = tf.convert_to_tensor(img, dtype=tf.float32) / 255.0  # Normalize image
        augmented_images = data_aug(img)

        for i, image in enumerate(augmented_images):
            # Generate a new filename with original label and UUID
            new_file_name = '{}_{}.jpg'.format(file_name.split('.')[0], uuid.uuid1())
            save_path = os.path.join(ANC_PATH, new_file_name)

            # Save the augmented image
            cv2.imwrite(save_path, (image.numpy() * 255).astype(np.uint8))
    else:
        print(f"Failed to load image at path: {img_path}")

In [29]:
# Process images in POS_PATH
for file_name in os.listdir(POS_PATH):
    img_path = os.path.join(POS_PATH, file_name)
    img = cv2.imread(img_path)
    
    if img is not None:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert image to RGB format
        img = tf.convert_to_tensor(img, dtype=tf.float32) / 255.0  # Normalize image
        augmented_images = data_aug(img)

        for i, image in enumerate(augmented_images):
            # Generate a new filename with original label and UUID
            new_file_name = '{}_{}.jpg'.format(file_name.split('.')[0], uuid.uuid1())
            save_path = os.path.join(POS_PATH, new_file_name)

            # Save the augmented image
            cv2.imwrite(save_path, (image.numpy() * 255).astype(np.uint8))
    else:
        print(f"Failed to load image at path: {img_path}")

# 3. Load and Preprocess Images

### 3.1 Get Image Directories

#### 3.2 Preprocessing - Scale and Resize

In [48]:
def preprocess(file_path):
    byte_img = tf.io.read_file(file_path)
    img = tf.io.decode_jpeg(byte_img)
    img = tf.image.resize(img, (100, 100))
    img = img / 255.0
    return img

anchor = tf.data.Dataset.list_files(os.path.join(ANC_PATH, '*.jpg')).take(3000)
positive = tf.data.Dataset.list_files(os.path.join(POS_PATH, '*.jpg')).take(3000)
negative = tf.data.Dataset.list_files(os.path.join(NEG_PATH, '*.jpg')).take(3000)

### 3.3 Create Labelled Dataset

### 3.4 Build Train and Test Partition

In [49]:
def preprocess_twin(input_img, validation_img, label):
    return(preprocess(input_img), preprocess(validation_img), label)

In [50]:
positives = tf.data.Dataset.zip((anchor, positive, tf.data.Dataset.from_tensor_slices(tf.ones(len(anchor)))))
negatives = tf.data.Dataset.zip((anchor, negative, tf.data.Dataset.from_tensor_slices(tf.zeros(len(anchor)))))
data = positives.concatenate(negatives)

data = data.map(preprocess_twin)
data = data.cache()
data = data.shuffle(buffer_size=10000)


In [51]:
data

<_ShuffleDataset element_spec=(TensorSpec(shape=(100, 100, None), dtype=tf.float32, name=None), TensorSpec(shape=(100, 100, None), dtype=tf.float32, name=None), TensorSpec(shape=(), dtype=tf.float32, name=None))>

In [56]:
# Training partition
train_data = data.take(round(len(data)*.7))
train_data = train_data.batch(16)
train_data = train_data.prefetch(8)

In [57]:
train_data

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 100, 100, None), dtype=tf.float32, name=None), TensorSpec(shape=(None, 100, 100, None), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.float32, name=None))>

In [58]:
# Testing partition
test_data = data.skip(round(len(data)*.7))
test_data = test_data.take(round(len(data)*.3))
test_data = test_data.batch(16)
test_data = test_data.prefetch(8)

In [59]:
test_data

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 100, 100, None), dtype=tf.float32, name=None), TensorSpec(shape=(None, 100, 100, None), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.float32, name=None))>

# 4. Model Engineering

### 4.1 Build Embedding Layer

### 4.2 Build Distance Layer

### 4.3 Make Siamese Model

In [60]:
def create_base_network(input_shape):
    input = Input(shape=input_shape)
    x = Conv2D(64, (10, 10), activation='relu')(input)
    x = MaxPooling2D()(x)
    x = Conv2D(128, (7, 7), activation='relu')(x)
    x = MaxPooling2D()(x)
    x = Conv2D(128, (4, 4), activation='relu')(x)
    x = MaxPooling2D()(x)
    x = Conv2D(256, (4, 4), activation='relu')(x)
    x = Flatten()(x)
    x = Dense(4096, activation='sigmoid')(x)
    return Model(input, x)

def euclidean_distance(vectors):
    (featsA, featsB) = vectors
    sumSquared = tf.reduce_sum(tf.square(featsA - featsB), axis=1, keepdims=True)
    return tf.sqrt(tf.maximum(sumSquared, tf.keras.backend.epsilon()))

def build_siamese_network(input_shape):
    base_network = create_base_network(input_shape)
    
    input_a = Input(shape=input_shape)
    input_b = Input(shape=input_shape)
    
    featsA = base_network(input_a)
    featsB = base_network(input_b)
    
    distance = tf.keras.layers.Lambda(euclidean_distance)([featsA, featsB])
    
    model = Model(inputs=[input_a, input_b], outputs=distance)
    return model

input_shape = (100, 100, 3)
model = build_siamese_network(input_shape)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


# 5. Training

In [62]:
# Example data preparation assuming 'train_data' and 'test_data' are paired datasets
# Adjust according to your actual data structure

train_data_pairs = tf.data.Dataset.zip((train_data, train_data))  # Pair each sample with itself for training
test_data_pairs = tf.data.Dataset.zip((test_data, test_data))  # Pair each sample with itself for testing

history = model.fit(train_data_pairs, epochs=10, validation_data=test_data_pairs)


Epoch 1/10


ValueError: Layer "functional_3" expects 2 input(s), but it received 3 input tensors. Inputs received: [<tf.Tensor 'data:0' shape=(None, 100, 100, None) dtype=float32>, <tf.Tensor 'data_1:0' shape=(None, 100, 100, None) dtype=float32>, <tf.Tensor 'data_2:0' shape=(None,) dtype=float32>]

### 5.1 Setup Loss and Optimizer

### 5.2 Establish Checkpoints

### 5.3 Build Train Step Function

### 5.4 Build Training Loop

### 5.5 Train the model

In [None]:
history = model.fit(train_data, epochs=10, validation_data=test_data)

In [None]:
import numpy as np
import tensorflow as tf
import cv2

def preprocess_image(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.resize(image, (100, 100))
    image = image / 255.0
    image = np.expand_dims(image, axis=-1)
    return np.expand_dims(image, axis=0)

# Load known face encodings and names
known_encodings = []
known_names = []

# Encode the images in the POS_PATH as known faces
for file_name in os.listdir(POS_PATH):
    img_path = os.path.join(POS_PATH, file_name)
    img = cv2.imread(img_path)
    processed_img = preprocess_image(img)
    encoding = model.predict([processed_img, processed_img])
    known_encodings.append(encoding)
    known_names.append(file_name.split('.')[0])

# Real-time face recognition
cap = cv2.VideoCapture(0)
while cap.isOpened(): 
    ret, frame = cap.read()
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    faces = face_cascade.detectMultiScale(frame_rgb, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    for (x, y, w, h) in faces:
        face = frame_rgb[y:y+h, x:x+w]
        processed_face = preprocess_image(face)
        encoding = model.predict([processed_face, processed_face])
        
        min_dist = float('inf')
        name = "Unknown"
        
        for i, known_encoding in enumerate(known_encodings):
            dist = np.linalg.norm(encoding - known_encoding)
            if dist < min_dist:
                min_dist = dist
                name = known_names[i]
        
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
        cv2.putText(frame, name, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    
    cv2.imshow('Video', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()