<a href="https://colab.research.google.com/github/RohanCheera/FaceTrace/blob/main/models/SimilaritySearch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Install required packages in a specific order to avoid numpy incompatibility
!pip install tensorflow==2.15.0
!pip install numpy==1.26.4
!pip install deepface==0.0.79
!pip install faiss-cpu

# Restart runtime to clear cached imports
import os
os.kill(os.getpid(), 9)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')


import cv2
import os
import numpy as np
import pickle
import faiss
from deepface import DeepFace
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, BatchNormalization, Dropout, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from google.colab import files

# Print TensorFlow version for verification
print(f"TensorFlow version: {tf.__version__}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
TensorFlow version: 2.15.0


In [None]:

# Define custom Lambda layer for L2 normalization
def l2_normalize(x):
    return tf.nn.l2_normalize(x, axis=1)

class TripletLoss(tf.keras.losses.Loss):
    def __init__(self, margin=0.2):
        super().__init__()
        self.margin = margin

    def call(self, y_true, y_pred):
        anchor, positive, negative = y_pred[:, :512], y_pred[:, 512:1024], y_pred[:, 1024:]
        pos_dist = tf.reduce_sum(tf.square(anchor - positive), axis=-1)
        neg_dist = tf.reduce_sum(tf.square(anchor - negative), axis=-1)
        loss = tf.maximum(pos_dist - neg_dist + self.margin, 0.0)
        return tf.reduce_mean(loss)

# Define paths
project_folder = '/content/drive/MyDrive/'
criminal_folder = os.path.join(project_folder, 'criminal_faces')
test_folder = os.path.join(project_folder, 'test_faces')
index_path = os.path.join(project_folder, 'faiss_index.bin')
embeddings_path = os.path.join(project_folder, 'embeddings.pickle')
model_keras_path = os.path.join(project_folder, 'face_recognition_model.keras')

# Create test folder if it doesn't exist
os.makedirs(test_folder, exist_ok=True)

In [None]:

def convert_to_grayscale(image_path, output_path):
    try:
        img = cv2.imread(image_path)
        if img is None:
            raise ValueError("Failed to load image")
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        gray_3ch = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
        cv2.imwrite(output_path, gray_3ch)
        return True
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return False

print("Creating test dataset with grayscale images...")
image_paths = [os.path.join(criminal_folder, img) for img in os.listdir(criminal_folder) if img.endswith(('.jpg', '.png'))]
for img_path in image_paths:
    img_name = os.path.basename(img_path)
    output_path = os.path.join(test_folder, img_name)
    if convert_to_grayscale(img_path, output_path):
        print(f"Created test image: {img_name}")


Creating test dataset with grayscale images...
Created test image: A01181.jpg
Created test image: A01148.jpg
Created test image: A01072.jpg
Created test image: A00360.jpg
Created test image: A01077.jpg
Error processing /content/drive/MyDrive/criminal_faces/A00220.jpg: Failed to load image
Created test image: A01054.jpg
Created test image: A01157.jpg
Created test image: A00147.jpg
Created test image: A01258.jpg
Created test image: A00367.jpg
Created test image: A01285.jpg
Created test image: A01237.jpg
Created test image: A01349.jpg
Created test image: A01411.jpg
Created test image: A01939.jpg
Created test image: A01531.jpg
Created test image: A01759.jpg
Created test image: A01467.jpg
Created test image: A01694.jpg
Created test image: A01457.jpg
Created test image: A01929.jpg
Created test image: A01834.jpg
Created test image: A01615.jpg
Created test image: A01356.jpg
Created test image: A01860.jpg
Created test image: A01729.jpg
Created test image: A01950.jpg
Created test image: A01806.j

In [None]:


def create_face_recognition_model(input_shape=(224, 224, 3), embedding_size=512):
    try:
        base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
        base_model.trainable = False
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        x = Dense(1024, activation='relu')(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(512, activation='relu')(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        x = Dense(embedding_size, activation=None)(x)
        embeddings = Lambda(l2_normalize, name='l2_normalize')(x)
        model = Model(inputs=base_model.input, outputs=embeddings)
        return model
    except Exception as e:
        print(f"Error creating model: {e}")
        raise


In [None]:

def prepare_triplet_data(image_paths, ids, img_size=(224, 224)):
    anchor_images = []
    positive_images = []
    negative_images = []
    for i, anchor_path in enumerate(image_paths):
        anchor_id = ids[i]
        positive_path = anchor_path
        negative_idx = np.random.choice([j for j in range(len(ids)) if ids[j] != anchor_id])
        negative_path = image_paths[negative_idx]
        anchor_img = cv2.imread(anchor_path)
        positive_img = cv2.imread(positive_path)
        negative_img = cv2.imread(negative_path)
        if anchor_img is None or positive_img is None or negative_img is None:
            print(f"Skipping triplet for {anchor_path}: Invalid image")
            continue
        anchor_img = cv2.resize(anchor_img, img_size)
        positive_img = cv2.resize(positive_img, img_size)
        negative_img = cv2.resize(negative_img, img_size)
        anchor_images.append(anchor_img / 255.0)
        positive_images.append(positive_img / 255.0)
        negative_images.append(negative_img / 255.0)
    return (np.array(anchor_images), np.array(positive_images), np.array(negative_images))

try:
    model = create_face_recognition_model()
    anchor_input = tf.keras.Input(shape=(224, 224, 3))
    positive_input = tf.keras.Input(shape=(224, 224, 3))
    negative_input = tf.keras.Input(shape=(224, 224, 3))
    anchor_embedding = model(anchor_input)
    positive_embedding = model(positive_input)
    negative_embedding = model(negative_input)
    triplet_model = Model(
        inputs=[anchor_input, positive_input, negative_input],
        outputs=tf.concat([anchor_embedding, positive_embedding, negative_embedding], axis=1)
    )
    triplet_model.compile(optimizer=Adam(learning_rate=0.001), loss=TripletLoss(margin=0.2))
except Exception as e:
    print(f"Error compiling model: {e}")
    raise

image_paths = [os.path.join(criminal_folder, img) for img in os.listdir(criminal_folder) if img.endswith(('.jpg', '.png'))]
ids = [os.path.basename(img).split('.')[0] for img in image_paths]

anchor_images, positive_images, negative_images = prepare_triplet_data(image_paths, ids)

if len(anchor_images) > 0:
    print("Fine-tuning model with triplet loss...")
    triplet_model.fit(
        [anchor_images, positive_images, negative_images],
        np.zeros(len(anchor_images)),
        batch_size=32,
        epochs=5,
        verbose=1
    )
    model.layers[0].trainable = True
    triplet_model.compile(optimizer=Adam(learning_rate=0.0001), loss=TripletLoss(margin=0.2))
    triplet_model.fit(
        [anchor_images, positive_images, negative_images],
        np.zeros(len(anchor_images)),
        batch_size=32,
        epochs=3,
        verbose=1
    )
else:
    print("No valid triplet data available for training. Proceeding with pre-trained weights.")

# Save the model in .keras format
try:
    model.save(model_keras_path)
    print(f"Model saved as .keras to {model_keras_path}")
except Exception as e:
    print(f"Error saving model: {e}")
    raise


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Skipping triplet for /content/drive/MyDrive/criminal_faces/A00220.jpg: Invalid image
Skipping triplet for /content/drive/MyDrive/criminal_faces/A15763.jpg: Invalid image
Skipping triplet for /content/drive/MyDrive/criminal_faces/A51233.jpg: Invalid image
Skipping triplet for /content/drive/MyDrive/criminal_faces/A56106.jpg: Invalid image
Skipping triplet for /content/drive/MyDrive/criminal_faces/A60096.jpg: Invalid image
Skipping triplet for /content/drive/MyDrive/criminal_faces/A61187.jpg: Invalid image
Skipping triplet for /content/drive/MyDrive/criminal_faces/A61136.jpg: Invalid image
Skipping triplet for /content/drive/MyDrive/criminal_faces/A61982.jpg: Invalid image
Skipping triplet for /content/drive/MyDrive/criminal_faces/A70618.jpg: Invalid image
Skipping triplet for /content/drive/MyDrive/criminal_faces/A70869.jpg: Invalid image
Skippin

In [None]:

embeddings = []
valid_ids = []
for img_path, id in zip(image_paths, ids):
    try:
        img = cv2.imread(img_path)
        if img is None:
            raise ValueError("Failed to load image")
        img = cv2.resize(img, (224, 224))
        img = img / 255.0
        img = np.expand_dims(img, axis=0)
        embedding = model.predict(img)[0]
        embeddings.append(embedding)
        valid_ids.append(id)
        print(f"Processed: {id}")
    except Exception as e:
        print(f"Error processing {img_path}: {e}")
        continue

with open(embeddings_path, 'wb') as f:
    pickle.dump({'ids': valid_ids, 'embeddings': embeddings}, f)

try:
    dimension = len(embeddings[0])
    index = faiss.IndexFlatIP(dimension)
    embeddings_np = np.array(embeddings).astype('float32')
    index.add(embeddings_np)
    faiss.write_index(index, index_path)
    print(f"FAISS index created with {len(valid_ids)} embeddings.")
except Exception as e:
    print(f"Error creating FAISS index: {e}")
    raise




Processed: A01181
Processed: A01148
Processed: A01072
Processed: A00360
Processed: A01077
Error processing /content/drive/MyDrive/criminal_faces/A00220.jpg: Failed to load image
Processed: A01054
Processed: A01157
Processed: A00147
Processed: A01258
Processed: A00367
Processed: A01285
Processed: A01237
Processed: A01349
Processed: A01411
Processed: A01939
Processed: A01531
Processed: A01759
Processed: A01467
Processed: A01694
Processed: A01457
Processed: A01929
Processed: A01834
Processed: A01615
Processed: A01356
Processed: A01860
Processed: A01729
Processed: A01950
Processed: A01806
Processed: A01399
Processed: A01796
Processed: A01909
Processed: A01906
Processed: A01676
Processed: A01889
Processed: A01681
Processed: A01736
Processed: A01422
Processed: A01680
Processed: A01990
Processed: A01824
Processed: A01365
Processed: A01958
Processed: A01815
Processed: A02074
Processed: A02270
Processed: A06045
Processed: A02008
Processed: A06264
Processed: A08240
Processed: A02079
Processed: A

In [None]:
print("\nEvaluating accuracy...")
test_images = [os.path.join(test_folder, img) for img in os.listdir(test_folder) if img.endswith(('.jpg', '.png'))]
top1_correct = 0
top5_correct = 0
total_images = 0

def get_query_embedding(image_path):
    try:
        img = cv2.imread(image_path)
        if img is None:
            raise ValueError("Failed to load image")
        img = cv2.resize(img, (224, 224))
        img = img / 255.0
        img = np.expand_dims(img, axis=0)
        embedding = model.predict(img)[0]
        return embedding.reshape(1, -1)
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return None

def recognize_face(query_embedding):
    distances, indices = index.search(query_embedding, k=5)
    matches = [(valid_ids[i], float(dist)) for i, dist in zip(indices[0], distances[0])]
    return matches

for test_image_path in test_images:
    ground_truth_id = os.path.basename(test_image_path).split('.')[0]
    query_embedding = get_query_embedding(test_image_path)
    if query_embedding is None:
        continue
    total_images += 1
    matches = recognize_face(query_embedding)
    matched_ids = [match[0] for match in matches]
    if matches[0][0] == ground_truth_id:
        top1_correct += 1
    if ground_truth_id in matched_ids:
        top5_correct += 1
    print(f"Test image: {ground_truth_id}, Top-5 matches: {matches}")

top1_accuracy = top1_correct / total_images if total_images > 0 else 0
top5_accuracy = top5_correct / total_images if total_images > 0 else 0

print(f"\nProcessed {total_images} test images")
print(f"Top-1 Accuracy: {top1_accuracy:.4f} ({top1_correct}/{total_images})")
print(f"Top-5 Accuracy: {top5_accuracy:.4f} ({top5_correct}/{total_images})")



Evaluating accuracy...
Test image: A01181, Top-5 matches: [('A01181', 0.9954930543899536), ('A15380', 0.9038527607917786), ('A60700', 0.8912805914878845), ('A83437', 0.8558080196380615), ('A62835', 0.8232073783874512)]
Test image: A01054, Top-5 matches: [('A01054', 0.9989330768585205), ('A80360', 0.9261503219604492), ('A61136', 0.9019296169281006), ('A01077', 0.8899179100990295), ('A92178', 0.8863575458526611)]
Test image: A00360, Top-5 matches: [('A00360', 0.9898120164871216), ('A77585', 0.912007749080658), ('A83427', 0.9093398451805115), ('A60334', 0.9071766138076782), ('A81938', 0.8933134078979492)]
Test image: A01148, Top-5 matches: [('A01148', 0.9957931041717529), ('A10936', 0.8832827210426331), ('A72159', 0.8443901538848877), ('A50773', 0.747801661491394), ('A10413', 0.7186329364776611)]
Test image: A01077, Top-5 matches: [('A01077', 0.985062837600708), ('A01054', 0.9302114248275757), ('A57980', 0.9029151201248169), ('A63589', 0.8827730417251587), ('A15385', 0.8759369254112244)]

In [None]:
# Download the generated files
print("\nDownloading files...")
try:
    files.download(model_keras_path)
    files.download(index_path)
    files.download(embeddings_path)
    print("Download initiated for model, FAISS index, and embeddings")
except Exception as e:
    print(f"Error downloading files: {e}")
    print("Please check Google Drive at /content/drive/MyDrive/ for files")


Downloading files...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Download initiated for model, FAISS index, and embeddings


In [None]:
model_keras_path


'/content/drive/MyDrive/face_recognition_model.keras'

In [None]:
index_path

'/content/drive/MyDrive/faiss_index.bin'

In [None]:
embeddings_path


'/content/drive/MyDrive/embeddings.pickle'