In [3]:
from google.colab import files
import zipfile
import os


print("⬆️ Upload train.zip")
files.upload()
with zipfile.ZipFile("train.zip", 'r') as zip_ref:
    zip_ref.extractall("Task_B/train")



⬆️ Upload train.zip


KeyboardInterrupt: 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:

# Upload val.zip (ONLY the contents of val/)
print("⬆️ Upload val.zip")
files.upload()
with zipfile.ZipFile("val.zip", 'r') as zip_ref:
    zip_ref.extractall("Task_B/val")

# Verify both folders
print("✅ Extracted folders:")
print("Train:", os.listdir("Task_B/train")[:3])
print("Val:", os.listdir("Task_B/val")[:3])

In [None]:
import shutil
import os

# Flatten the train/train → train
if os.path.exists("Task_B/train/train"):
    shutil.move("Task_B/train/train", "Task_B/train_temp")
    shutil.rmtree("Task_B/train")
    shutil.move("Task_B/train_temp", "Task_B/train")

# Flatten the val/val → val
if os.path.exists("Task_B/val/val"):
    shutil.move("Task_B/val/val", "Task_B/val_temp")
    shutil.rmtree("Task_B/val")
    shutil.move("Task_B/val_temp", "Task_B/val")

# Confirm
print("✅ Folder structure fixed!")
print("Train samples:", os.listdir("Task_B/train")[:3])
print("Val samples:", os.listdir("Task_B/val")[:3])


✅ Folder structure fixed!
Train samples: ['Charlie_Hunnam', 'Tim_Curry', '048_frontal']
Val samples: ['Lili_Marinho', 'Hans_Eichel', '112_frontal']


In [None]:
!pip install -q face_recognition
!pip install -q scikit-learn


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.1/100.1 MB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for face-recognition-models (setup.py) ... [?25l[?25hdone


In [None]:
!pip install deepface
!pip install scikit-learn


Collecting deepface
  Downloading deepface-0.0.93-py3-none-any.whl.metadata (30 kB)
Collecting flask-cors>=4.0.1 (from deepface)
  Downloading flask_cors-6.0.1-py3-none-any.whl.metadata (5.3 kB)
Collecting mtcnn>=0.1.0 (from deepface)
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting retina-face>=0.0.1 (from deepface)
  Downloading retina_face-0.0.17-py3-none-any.whl.metadata (10 kB)
Collecting fire>=0.4.0 (from deepface)
  Downloading fire-0.7.0.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gunicorn>=20.1.0 (from deepface)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting lz4>=4.3.3 (from mtcnn>=0.1.0->deepface)
  Downloading lz4-4.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Downloading deepface-0.0.93-py3-none-any.whl (108 kB)
[2K   [90m━━━━━

In [None]:
import os
import numpy as np
from deepface import DeepFace
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score


25-06-29 14:42:44 - Directory /root/.deepface has been created
25-06-29 14:42:44 - Directory /root/.deepface/weights has been created


In [None]:
def collect_dataset(folder):
    data = []
    for person_folder in os.listdir(folder):
        person_path = os.path.join(folder, person_folder)
        if not os.path.isdir(person_path):
            continue

        for f in os.listdir(person_path):
            if f.lower().endswith(('.jpg', '.jpeg', '.png')):
                data.append((os.path.join(person_path, f), person_folder))

        distortion_folder = os.path.join(person_path, "distortion")
        if os.path.exists(distortion_folder):
            for f in os.listdir(distortion_folder):
                if f.lower().endswith(('.jpg', '.jpeg', '.png')):
                    data.append((os.path.join(distortion_folder, f), person_folder))
    return data


In [None]:
def extract_embeddings(data_list, model_name="ArcFace"):
    embeddings = []
    labels = []
    for img_path, label in data_list:
        try:
            emb = DeepFace.represent(img_path=img_path, model_name=model_name, enforce_detection=False)[0]['embedding']
            embeddings.append(emb)
            labels.append(label)
        except Exception as e:
            print(f"⚠️ Skipping {img_path} due to error: {e}")
    return np.array(embeddings), labels


In [None]:
def predict(val_emb, train_embeddings, train_labels):
    sims = cosine_similarity([val_emb], train_embeddings)
    index = np.argmax(sims)
    return train_labels[index]

In [None]:
def evaluate(train_folder, val_folder):
    print("Collecting training data...")
    train_data = collect_dataset(train_folder)

    print("Collecting validation data...")
    val_data = collect_dataset(val_folder)

    print("Extracting embeddings for training data...")
    train_embeddings, train_labels = extract_embeddings(train_data)

    print("Predicting validation set...")
    y_true = []
    y_pred = []

    for img_path, true_label in val_data:
        try:
            val_emb = DeepFace.represent(img_path=img_path, model_name="ArcFace", enforce_detection=False)[0]['embedding']
            predicted_label = predict(val_emb, train_embeddings, train_labels)
            y_true.append(true_label)
            y_pred.append(predicted_label)
        except Exception as e:
            print(f"Skipping {img_path} due to error during prediction: {e}")

    if len(y_true) == 0 or len(y_pred) == 0:
        print("❗ No predictions made. Dataset may be empty or corrupt.")
    else:
        print("\n--- Evaluation Results ---")
        print("Top-1 Accuracy:", accuracy_score(y_true, y_pred))
        print("Macro F1-Score:", f1_score(y_true, y_pred, average='macro'))
        print("Precision:", precision_score(y_true, y_pred, average='macro'))
        print("Recall:", recall_score(y_true, y_pred, average='macro'))

train_folder = "Task_B/train"
val_folder = "Task_B/val"

evaluate(train_folder, val_folder)

Collecting training data...
Collecting validation data...
Extracting embeddings for training data...
Predicting validation set...
❗ No predictions made. Dataset may be empty or corrupt.


In [None]:
# ✅ STEP 4: Define face recognition and evaluation logic
from deepface import DeepFace
import os
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score

def get_person_images(data_dir):
    images = []
    labels = []
    for person in os.listdir(data_dir):
        person_folder = os.path.join(data_dir, person)
        if not os.path.isdir(person_folder):
            continue

        for file in os.listdir(person_folder):
            file_path = os.path.join(person_folder, file)
            if file.endswith(('.jpg', '.jpeg', '.png')):
                images.append(file_path)
                labels.append(person)

            # Include distorted images if available
            distortion_folder = os.path.join(person_folder, "distortion")
            if os.path.exists(distortion_folder):
                for dimg in os.listdir(distortion_folder):
                    if dimg.endswith(('.jpg', '.jpeg', '.png')):
                        images.append(os.path.join(distortion_folder, dimg))
                        labels.append(person)

    return images, labels

In [None]:
# ✅ STEP 5: Load training embeddings
print("\U0001F50D Loading training faces and extracting embeddings using ArcFace...")
try:
    embeddings = DeepFace.find(
        img_path=None,
        db_path=train_folder,
        model_name="ArcFace",
        enforce_detection=False,
        detector_backend="retinaface"
    )
except Exception as e:
    print("\u274C Error while embedding training data:", e)


🔍 Loading training faces and extracting embeddings using ArcFace...
25-06-29 15:03:07 - Found 15408 newly added image(s), 0 removed image(s), 0 replaced image(s).


Finding representations:   0%|          | 0/15408 [00:00<?, ?it/s]

25-06-29 15:03:11 - retinaface.h5 will be downloaded from the url https://github.com/serengil/deepface_models/releases/download/v1.0/retinaface.h5


Downloading...
From: https://github.com/serengil/deepface_models/releases/download/v1.0/retinaface.h5
To: /root/.deepface/weights/retinaface.h5

  0%|          | 0.00/119M [00:00<?, ?B/s][A
  0%|          | 524k/119M [00:00<00:47, 2.51MB/s][A
  2%|▏         | 2.62M/119M [00:00<00:11, 9.76MB/s][A
  8%|▊         | 9.44M/119M [00:00<00:03, 30.5MB/s][A
 15%|█▌        | 17.8M/119M [00:00<00:02, 47.7MB/s][A
 22%|██▏       | 26.2M/119M [00:00<00:01, 57.2MB/s][A
 29%|██▉       | 34.6M/119M [00:00<00:01, 64.2MB/s][A
 36%|███▌      | 43.0M/119M [00:00<00:01, 68.7MB/s][A
 43%|████▎     | 51.4M/119M [00:00<00:00, 71.8MB/s][A
 50%|█████     | 59.8M/119M [00:01<00:00, 74.1MB/s][A
 57%|█████▋    | 68.2M/119M [00:01<00:00, 75.7MB/s][A
 65%|██████▍   | 76.5M/119M [00:01<00:00, 76.6MB/s][A
 72%|███████▏  | 84.9M/119M [00:01<00:00, 77.1MB/s][A
 79%|███████▊  | 93.3M/119M [00:01<00:00, 77.6MB/s][A
 85%|████████▌ | 101M/119M [00:01<00:00, 76.9MB/s] [A
 92%|█████████▏| 110M/119M [00:01<00:00,

KeyboardInterrupt: 

In [None]:
from tqdm import tqdm

def predict_and_evaluate(val_folder, db_path):
    y_true = []
    y_pred = []

    for person in os.listdir(val_folder):
        person_folder = os.path.join(val_folder, person)
        if not os.path.isdir(person_folder):
            continue

        for file in os.listdir(person_folder):
            file_path = os.path.join(person_folder, file)
            if file_path.endswith(('.jpg', '.jpeg', '.png')):
                try:
                    result = DeepFace.find(
                        img_path=file_path,
                        db_path=db_path,
                        model_name="ArcFace",
                        enforce_detection=False,
                        detector_backend="retinaface",
                        silent=True
                    )

                    # Predicted folder name is in identity path
                    if len(result[0]) > 0:
                        pred_identity = result[0].iloc[0]["identity"]
                        predicted_label = pred_identity.split(os.path.sep)[-2]  # Extract folder name
                    else:
                        predicted_label = "Unknown"
                except:
                    predicted_label = "Unknown"

                y_true.append(person)
                y_pred.append(predicted_label)

In [None]:
# 4. Install DeepFace and required packages
!pip install deepface --quiet
import os
from deepface import DeepFace
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import numpy as np
import glob
import cv2
from tqdm import tqdm


In [None]:
# 5. Helper function to load images and labels
def load_dataset(folder_path):
    image_paths = []
    labels = []
    subfolders = sorted(os.listdir(folder_path))
    for person in subfolders:
        person_folder = os.path.join(folder_path, person)
        if os.path.isdir(person_folder):
            # Main image (e.g., John.jpg)
            for img in glob.glob(os.path.join(person_folder, "*.jpg")):
                image_paths.append(img)
                labels.append(person)
            # Distorted images
            distortion_path = os.path.join(person_folder, "distortion")
            if os.path.exists(distortion_path):
                for img in glob.glob(os.path.join(distortion_path, "*.jpg")):
                    image_paths.append(img)
                    labels.append(person)
    return image_paths, labels


In [None]:
# 6. Embed faces using DeepFace
def extract_embeddings(image_paths):
    embeddings = []
    for img_path in tqdm(image_paths, desc="🔍 Extracting embeddings"):
        try:
            embedding = DeepFace.represent(
                img_path=img_path,
                model_name='ArcFace',
                detector_backend='opencv',
                enforce_detection=False
            )[0]['embedding']
            embeddings.append(embedding)
        except Exception as e:
            print(f"⚠️ Skipping {img_path}: {e}")
            embeddings.append(None)
    return embeddings


In [None]:
# 7. Evaluation logic
from sklearn.neighbors import KNeighborsClassifier

def evaluate(train_folder, val_folder):
    print("📂 Loading training data...")
    train_images, train_labels = load_dataset(train_folder)
    print("📂 Loading validation data...")
    val_images, val_labels = load_dataset(val_folder)

    print(f"🔢 Training samples: {len(train_images)}, Validation samples: {len(val_images)}")

    train_embeddings = extract_embeddings(train_images)
    val_embeddings = extract_embeddings(val_images)

    # Filter None values
    train_data = [(emb, label) for emb, label in zip(train_embeddings, train_labels) if emb is not None]
    val_data = [(emb, label) for emb, label in zip(val_embeddings, val_labels) if emb is not None]

    if len(train_data) == 0 or len(val_data) == 0:
        print("❌ No valid data for training/validation")
        return

    X_train, y_train = zip(*train_data)
    X_val, y_val = zip(*val_data)

    clf = KNeighborsClassifier(n_neighbors=1)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_val)

    print("\n--- 📊 Evaluation Results ---")
    print("Top-1 Accuracy:", accuracy_score(y_val, y_pred))
    print("Macro F1-Score:", f1_score(y_val, y_pred, average='macro'))
    print("Precision:", precision_score(y_val, y_pred, average='macro'))
    print("Recall:", recall_score(y_val, y_pred, average='macro'))


In [None]:
# 8. Call everything
train_path = "Task_B/train"
val_path = "Task_B/val"
evaluate(train_path, val_path)


📂 Loading training data...
📂 Loading validation data...
🔢 Training samples: 15408, Validation samples: 3376


🔍 Extracting embeddings: 100%|██████████| 15408/15408 [3:19:36<00:00,  1.29it/s]
🔍 Extracting embeddings: 100%|██████████| 3376/3376 [51:27<00:00,  1.09it/s]



--- 📊 Evaluation Results ---
Top-1 Accuracy: 0.0
Macro F1-Score: 0.0
Precision: 0.0
Recall: 0.0
