# Face Recognition on Kaggle

Deploy and run face recognition service on Kaggle with GPU support.

## 1. Kaggle Environment Setup

In [None]:
# Enable GPU in Kaggle:
# Settings -> Accelerator -> GPU

import os
import sys

# Check GPU availability
import torch
print(f"GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU Device: {torch.cuda.get_device_name(0)}")

# Kaggle paths
KAGGLE_INPUT_PATH = '/kaggle/input'
KAGGLE_WORKING_PATH = '/kaggle/working'

print(f"\nKaggle Paths:")
print(f"Input: {KAGGLE_INPUT_PATH}")
print(f"Working: {KAGGLE_WORKING_PATH}")

## 2. Install Dependencies

In [None]:
%%bash
# Install system dependencies for OpenCV
apt-get update && apt-get install -y \
    libgl1-mesa-glx \
    libglib2.0-0 \
    libsm6 \
    libxext6 \
    libxrender-dev \
    libgomp1

# Install Python packages
pip install -q \
    insightface==0.7.3 \
    onnxruntime-gpu==1.16.3 \
    faiss-gpu==1.7.4 \
    opencv-python-headless==4.9.0.80 \
    fastapi==0.109.0 \
    uvicorn==0.25.0 \
    python-multipart==0.0.6 \
    sqlalchemy==2.0.25 \
    asyncpg==0.29.0

echo "Dependencies installed!"

## 3. Load Face Recognition Components

In [None]:
import numpy as np
import cv2
from insightface.app import FaceAnalysis
import faiss
import pickle
import json
from pathlib import Path

# Initialize face analysis app
app = FaceAnalysis(
    name='buffalo_l',
    providers=['CUDAExecutionProvider', 'CPUExecutionProvider'],
    allowed_modules=['detection', 'recognition']
)
app.prepare(ctx_id=0, det_thresh=0.5)

print("✅ Face engine initialized with GPU support")

# Initialize FAISS index
dimension = 512
index = faiss.IndexFlatIP(dimension)  # Inner product for cosine similarity

# ID mappings
id_to_person = {}
next_id = 0

print(f"✅ FAISS index created (dimension: {dimension})")

## 4. Load Dataset from Kaggle

In [None]:
# Example: Load from Kaggle dataset
# Add dataset to Kaggle: Add Data -> Search for face dataset

# List available datasets
import os
datasets = os.listdir(KAGGLE_INPUT_PATH)
print("Available datasets:")
for ds in datasets:
    print(f"  - {ds}")

# Example dataset path (adjust to your dataset)
# DATASET_PATH = f"{KAGGLE_INPUT_PATH}/lfw-dataset/lfw-deepfunneled"

# For demo, create sample data
def load_sample_images():
    """Load sample images from Kaggle dataset or create dummy data"""
    images = {}
    
    # Try to load from a dataset if available
    if datasets:
        dataset_path = Path(KAGGLE_INPUT_PATH) / datasets[0]
        
        # Find image files
        for img_path in dataset_path.rglob("*.jpg"):
            person_id = img_path.parent.name
            if person_id not in images:
                images[person_id] = []
            images[person_id].append(str(img_path))
            
            if len(images) >= 10:  # Limit for demo
                break
    
    return images

face_images = load_sample_images()
print(f"\nLoaded {len(face_images)} persons with images")

## 5. Batch Face Enrollment

In [None]:
def process_and_enroll(person_id: str, image_paths: list):
    """Process and enroll faces for a person"""
    global next_id
    
    embeddings = []
    
    for img_path in image_paths[:3]:  # Limit images per person
        try:
            # Read image
            img = cv2.imread(img_path)
            if img is None:
                continue
            
            # Detect and extract faces
            faces = app.get(img)
            
            if faces:
                # Use first face
                face = faces[0]
                embedding = face.normed_embedding
                embeddings.append(embedding)
                
        except Exception as e:
            print(f"Error processing {img_path}: {e}")
    
    # Add to index
    if embeddings:
        embeddings_array = np.array(embeddings).astype('float32')
        
        # Add to FAISS
        ids = list(range(next_id, next_id + len(embeddings)))
        index.add(embeddings_array)
        
        # Update mappings
        for idx in ids:
            id_to_person[idx] = person_id
        
        next_id += len(embeddings)
        
        return len(embeddings)
    
    return 0

# Enroll all faces
print("Enrolling faces...")
total_enrolled = 0

for person_id, images in face_images.items():
    enrolled = process_and_enroll(person_id, images)
    total_enrolled += enrolled
    print(f"  {person_id}: {enrolled} faces enrolled")

print(f"\n✅ Total faces enrolled: {total_enrolled}")
print(f"Index size: {index.ntotal}")

## 6. Save Index to Kaggle Working Directory

In [None]:
# Save FAISS index
index_path = f"{KAGGLE_WORKING_PATH}/face_index.faiss"
faiss.write_index(index, index_path)
print(f"✅ FAISS index saved to {index_path}")

# Save ID mappings
mappings_path = f"{KAGGLE_WORKING_PATH}/id_mappings.pkl"
with open(mappings_path, 'wb') as f:
    pickle.dump({
        'id_to_person': id_to_person,
        'next_id': next_id
    }, f)
print(f"✅ ID mappings saved to {mappings_path}")

# Save configuration
config_path = f"{KAGGLE_WORKING_PATH}/config.json"
config = {
    'dimension': dimension,
    'index_size': index.ntotal,
    'num_persons': len(set(id_to_person.values())),
    'similarity_threshold': 0.65,
    'model': 'buffalo_l',
    'device': 'cuda' if torch.cuda.is_available() else 'cpu'
}
with open(config_path, 'w') as f:
    json.dump(config, f, indent=2)
print(f"✅ Configuration saved to {config_path}")

## 7. Face Identification Service

In [None]:
def identify_face(image_path: str, top_k: int = 5, threshold: float = 0.65):
    """Identify a face from an image"""
    
    # Read image
    img = cv2.imread(image_path)
    if img is None:
        return {"error": "Failed to read image"}
    
    # Detect face
    faces = app.get(img)
    if not faces:
        return {"error": "No face detected"}
    
    # Get embedding
    face = faces[0]
    embedding = face.normed_embedding.reshape(1, -1).astype('float32')
    
    # Search in index
    if index.ntotal == 0:
        return {"error": "Index is empty"}
    
    k = min(top_k, index.ntotal)
    distances, indices = index.search(embedding, k)
    
    # Process results
    matches = []
    for dist, idx in zip(distances[0], indices[0]):
        if dist >= threshold:
            person_id = id_to_person.get(int(idx), "unknown")
            matches.append({
                "person_id": person_id,
                "similarity": float(dist),
                "index_id": int(idx)
            })
    
    return {
        "matches": matches,
        "face_detected": True,
        "bbox": face.bbox.tolist(),
        "det_score": float(face.det_score)
    }

# Test identification
if face_images:
    # Get a test image
    test_person = list(face_images.keys())[0]
    test_image = face_images[test_person][0]
    
    print(f"Testing with image from {test_person}")
    result = identify_face(test_image)
    
    print("\nIdentification Result:")
    print(json.dumps(result, indent=2))

## 8. Use Kaggle Secrets for Configuration

In [None]:
# Access Kaggle Secrets
# Add secrets in Kaggle: Settings -> Secrets -> Add Secret

from kaggle_secrets import UserSecretsClient

# Initialize secrets client
secrets = UserSecretsClient()

# Example: Get API keys or database credentials
# api_key = secrets.get_secret("API_KEY")
# db_password = secrets.get_secret("DB_PASSWORD")

# For demo, set dummy values
api_key = "demo_key"
similarity_threshold = 0.65

print("Configuration loaded from secrets")
print(f"Similarity threshold: {similarity_threshold}")

## 9. Create Simple API Server

In [None]:
# Create a simple FastAPI server
from fastapi import FastAPI, File, UploadFile
from fastapi.responses import JSONResponse
import uvicorn
from threading import Thread
import time

# Create API
api = FastAPI(title="Face Recognition on Kaggle")

@api.get("/")
async def root():
    return {
        "service": "Face Recognition",
        "platform": "Kaggle",
        "gpu": torch.cuda.is_available(),
        "index_size": index.ntotal
    }

@api.post("/identify")
async def identify(image: UploadFile = File(...)):
    # Save uploaded image
    temp_path = f"{KAGGLE_WORKING_PATH}/temp_upload.jpg"
    
    contents = await image.read()
    with open(temp_path, 'wb') as f:
        f.write(contents)
    
    # Identify face
    result = identify_face(temp_path)
    
    return JSONResponse(content=result)

@api.get("/stats")
async def stats():
    return {
        "total_faces": index.ntotal,
        "total_persons": len(set(id_to_person.values())),
        "dimension": dimension,
        "threshold": similarity_threshold
    }

# Run server in background (for demo)
def run_server():
    uvicorn.run(api, host="0.0.0.0", port=8000)

# Note: In Kaggle notebook, the server won't be accessible externally
# This is for demonstration purposes
print("API server ready (internal only)")
print("Endpoints:")
print("  GET  /")
print("  POST /identify")
print("  GET  /stats")

## 10. Export Model for Download

In [None]:
# Package everything for download
import zipfile

output_zip = f"{KAGGLE_WORKING_PATH}/face_recognition_model.zip"

with zipfile.ZipFile(output_zip, 'w') as zipf:
    # Add index
    zipf.write(index_path, 'face_index.faiss')
    
    # Add mappings
    zipf.write(mappings_path, 'id_mappings.pkl')
    
    # Add config
    zipf.write(config_path, 'config.json')

print(f"✅ Model package created: {output_zip}")
print(f"Size: {os.path.getsize(output_zip) / 1024 / 1024:.2f} MB")
print("\nYou can download this file from the output directory")