In [5]:
# =============================
# 📦 Install all dependencies
# =============================

!pip install numpy pandas scikit-learn matplotlib seaborn tqdm opencv-python deepface --quiet

# =============================
# 📚 Import all libraries
# =============================
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [6]:
!pip install tf-keras --quiet

In [7]:
!pip install pyaudio



In [8]:
from deepface import DeepFace
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [9]:
# ===============================
# Image Augmentation Utilities
# ===============================
def create_variants(image):
    """Generate multiple augmented versions of an image for robustness."""
    h, w = image.shape[:2]
    variants = [image]

    # Horizontal flip
    variants.append(cv2.flip(image, 1))

    # Brightness adjustment
    for alpha in [0.8, 1.2]:
        bright_img = cv2.convertScaleAbs(image, alpha=alpha, beta=0)
        variants.append(bright_img)

    # Slight rotations
    for angle in [-10, 10]:
        mat = cv2.getRotationMatrix2D((w // 2, h // 2), angle, 1.0)
        rotated = cv2.warpAffine(image, mat, (w, h))
        variants.append(rotated)

    # Zoom in/out
    for scale in [0.9, 1.1]:
        resized = cv2.resize(image, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
        y1 = max(0, (resized.shape[0] - h) // 2)
        x1 = max(0, (resized.shape[1] - w) // 2)
        cropped = resized[y1:y1 + h, x1:x1 + w]
        cropped = cv2.resize(cropped, (w, h))
        variants.append(cropped)

    return variants

In [10]:
# ===============================
# Embedding Computation
# ===============================
MODEL_NAME = "Facenet512"
FACE_DETECTOR = "retinaface"

def extract_embedding(image):
    """Compute normalized embedding using DeepFace."""
    rep = DeepFace.represent(
        img_path=image,
        model_name=MODEL_NAME,
        detector_backend=FACE_DETECTOR,
        enforce_detection=False
    )
    vec = np.array(rep[0]["embedding"])
    return vec / np.linalg.norm(vec)

def compute_embedding_from_file(image_path):
    """Read image, create augmentations, and average embeddings."""
    img = cv2.imread(image_path)
    if img is None:
        raise ValueError(f"Could not read: {image_path}")

    variants = create_variants(img)
    vectors = []

    for aug in variants:
        try:
            emb = extract_embedding(aug)
            vectors.append(emb)
        except Exception as e:
            print(f"⚠️ Skipping one variant: {e}")

    if not vectors:
        raise ValueError("No embeddings generated.")

    avg_emb = np.mean(vectors, axis=0)
    return avg_emb / np.linalg.norm(avg_emb)

In [11]:
# ===============================
# Compute All Embeddings in a Folder
# ===============================
def generate_embeddings(folder_path):
    """Compute embeddings for all supported images in the given folder."""
    embeddings, names = [], []
    for fname in os.listdir(folder_path):
        if fname.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
            path = os.path.join(folder_path, fname)
            try:
                emb = compute_embedding_from_file(path)
                embeddings.append(emb)
                names.append(fname)
                print(f"✅ Processed: {fname}")
            except Exception as e:
                print(f"⚠️ Skipped {fname}: {e}")
    return np.array(embeddings), names

In [12]:
!pip install fsspec
!pip install --upgrade typing-extensions



In [13]:
# ===============================
# Main Execution
# ===============================
if __name__ == "__main__":
    trusted_faces, _ = generate_embeddings("trusted_faces")
    random_faces, _ = generate_embeddings("random_faces")

    np.savez("embeddings.npz", trusted=trusted_faces, random=random_faces)
    print("✅ All embeddings saved to embeddings.npz")

✅ Processed: WhatsApp Image 2025-10-23 at 16.01.16_35649226.jpg
✅ Processed: WhatsApp Image 2025-10-23 at 16.01.16_6bb17b53.jpg
✅ Processed: fotor-ai-2023112455252.jpg
✅ All embeddings saved to embeddings.npz


In [14]:
# Install all required modules individually
!pip install SpeechRecognition
!pip install pygame
!pip install deepface
!pip install gtts
!pip install google-genai
!pip install mtcnn
!pip install retina-face
!pip install opencv-python-headless
!pip install tqdm
!pip install numpy
!pip install pandas

Collecting opencv-python-headless
  Using cached opencv_python_headless-4.12.0.88-cp37-abi3-win_amd64.whl.metadata (20 kB)
Using cached opencv_python_headless-4.12.0.88-cp37-abi3-win_amd64.whl (38.9 MB)
Installing collected packages: opencv-python-headless


ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\sachin\\anaconda3\\Lib\\site-packages\\cv2\\cv2.pyd'
Consider using the `--user` option or check the permissions.





In [15]:
import time
import datetime
import threading
import speech_recognition as sr
import pygame
from deepface import DeepFace
from gtts import gTTS
import tempfile
from google import genai
import difflib


pygame 2.6.1 (SDL 2.28.4, Python 3.13.5)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [16]:
# ===============================
# Audio Setup and Helpers
# ===============================
pygame.mixer.init()
alert_sound = pygame.mixer.Sound("buzzer.mp3")
buzzer_on = False

def play_buzzer():
    global buzzer_on
    if not buzzer_on:
        buzzer_on = True
        alert_sound.play(-1)

def stop_buzzer():
    global buzzer_on
    if buzzer_on:
        alert_sound.stop()
        buzzer_on = False

def say(text):
    """Play TTS speech asynchronously."""
    def _speak():
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
            filename = tmp.name
        gTTS(text=text, lang='en').save(filename)
        pygame.mixer.music.load(filename)
        pygame.mixer.music.play()
        while pygame.mixer.music.get_busy():
            pygame.time.Clock().tick(10)
        try:
            os.remove(filename)
        except PermissionError:
            time.sleep(0.1)

    threading.Thread(target=_speak, daemon=True).start()


In [17]:
# ===============================
# Gemini Response + Similarity
# ===============================
def get_gemini_response(prompt):
    """Query Gemini model for polite warnings."""
    client = genai.Client(api_key="AIzaSyDIFttbAx7aMskpQtroxB7SsMRBpL15Q_Y")
    result = client.models.generate_content(model="gemini-2.5-pro", contents=prompt)
    return result.text

def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


In [18]:
# ===============================
# Load Saved Embeddings
# ===============================
data = np.load("embeddings.npz")
trusted_faces = data["trusted"]
random_faces = data["random"]

trusted_mean = np.mean(trusted_faces, axis=0)
trusted_mean /= np.linalg.norm(trusted_mean)

trusted_sims = [cosine_similarity(trusted_mean, e) for e in trusted_faces]
random_sims = [cosine_similarity(trusted_mean, e) for e in random_faces]
THRESHOLD = (np.mean(trusted_sims) + np.mean(random_sims)) / 2

print(f"🔹 Threshold calibrated: {THRESHOLD:.3f}")


🔹 Threshold calibrated: 0.433


In [19]:
# ===============================
# Voice Command Activation
# ===============================
def listen_for_command():
    """Wait for activation command 'guard my room'."""
    recog = sr.Recognizer()
    mic = sr.Microphone()
    trigger = "guard my room"

    print("🎙️ Say 'Guard my room' to start guarding.")
    say("Say guard my room to activate.")

    while True:
        with mic as source:
            recog.adjust_for_ambient_noise(source)
            try:
                audio = recog.listen(source, timeout=5)
                command = recog.recognize_google(audio).lower()
                print(f"🗣️ Heard: {command}")

                if difflib.get_close_matches(command, [trigger], n=1, cutoff=0.6):
                    say("Guarding mode activated.")
                    print("🛡️ Guarding mode ON")
                    return
            except (sr.WaitTimeoutError, sr.UnknownValueError):
                continue
            except Exception as e:
                print(f"⚠️ Speech error: {e}")


In [20]:
# # ===============================
# # Security Camera Guard Mode
# # ===============================
# def run_guard_mode():
#     cap = cv2.VideoCapture(0)
#     frame_counter = 0
#     unknown_start = None
#     last_alert = 0
#     UNKNOWN_SAVE_DELAY = 10
#     ALERT_TRIGGER_TIME = 5
#     os.makedirs("unknown_faces", exist_ok=True)

#     print("🎥 Camera monitoring active. Press 'q' to stop.")

#     while True:
#         ret, frame = cap.read()
#         if not ret:
#             continue

#         frame_counter += 1
#         if frame_counter % 3 != 0:
#             continue

#         frame_small = cv2.resize(frame, (480, 360))
#         try:
#             faces = DeepFace.extract_faces(img_path=frame_small, detector_backend="opencv", enforce_detection=False)
#         except Exception as e:
#             print(f"⚠️ Face extraction failed: {e}")
#             faces = []

#         intruder_detected = False

#         for det in faces:
#             face = det.get("face")
#             box = det.get("facial_area", {})
#             if face is None or not box:
#                 continue

#             x, y, w, h = map(int, [box.get("x", 0), box.get("y", 0), box.get("w", 0), box.get("h", 0)])

#             try:
#                 rep = DeepFace.represent(img_path=face, model_name="Facenet512", detector_backend="skip", enforce_detection=False)
#                 emb = np.array(rep[0]["embedding"]) / np.linalg.norm(rep[0]["embedding"])
#             except Exception as e:
#                 print(f"⚠️ Embedding error: {e}")
#                 continue

#             sim = cosine_similarity(emb, trusted_mean)
#             now = time.time()

#             if sim > THRESHOLD:
#                 label, color = f"TRUSTED ({sim:.2f})", (0, 255, 0)
#                 unknown_start = None
#                 stop_buzzer()
#             else:
#                 label, color = f"UNKNOWN ({sim:.2f})", (0, 0, 255)
#                 intruder_detected = True
#                 if unknown_start is None:
#                     unknown_start = now
#                 elif now - unknown_start > ALERT_TRIGGER_TIME and now - last_alert > UNKNOWN_SAVE_DELAY:
#                     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
#                     path = os.path.join("unknown_faces", f"unknown_{timestamp}.jpg")
#                     cv2.imwrite(path, cv2.cvtColor((face * 255).astype(np.uint8), cv2.COLOR_RGB2BGR))
#                     print(f"💾 Unknown face saved at {path}")

#                     message = (
#                         "Warning! You are not authorized to be here. "
#                         "Please leave immediately. The owner has been notified and recording is active."
#                     )
#                     print("🤖 Spoken alert:", message)
#                     say(message)
#                     play_buzzer()
#                     last_alert = now

#             cv2.rectangle(frame_small, (x, y), (x + w, y + h), color, 2)
#             cv2.putText(frame_small, label, (x, max(y - 10, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

#         if not intruder_detected:
#             unknown_start = None
#             stop_buzzer()

#         cv2.imshow("Face Verification", frame_small)
#         if cv2.waitKey(1) & 0xFF == ord('q'):
#             break

#     cap.release()
#     cv2.destroyAllWindows()
#     stop_buzzer()


In [21]:
!pip install google-genai



In [26]:
import os
import time
import datetime
import numpy as np
import cv2
from deepface import DeepFace
from google import genai  # Assuming genai is the Gemini API client library

# Load Gemini API key from environment variable
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

def get_gemini_response(prompt):
    """Query Gemini model for polite warnings with error handling."""
    if not GEMINI_API_KEY:
        print("⚠️ Gemini API key not found. Using default warning message.")
        return (
            "Warning! You are not authorized to be here. "
            "Please leave immediately. The owner has been notified and recording is active."
        )
    try:
        client = genai.Client(api_key=GEMINI_API_KEY)
        result = client.models.generate_content(model="gemini-2.5-pro", contents=prompt)
        return result.text
    except Exception as e:
        print(f"⚠️ Gemini API error: {e}. Using fallback warning message.")
        return (
            "Warning! You are not authorized to be here. "
            "Please leave immediately. The owner has been notified and recording is active."
        )

def run_guard_mode():
    cap = cv2.VideoCapture(0)
    frame_counter = 0
    unknown_start = None
    last_alert = 0
    UNKNOWN_SAVE_DELAY = 10
    ALERT_TRIGGER_TIME = 5
    os.makedirs("unknown_faces", exist_ok=True)

    print("🎥 Camera monitoring active. Press 'q' to stop.")

    while True:
        ret, frame = cap.read()
        if not ret:
            continue

        frame_counter += 1
        if frame_counter % 3 != 0:
            continue

        frame_small = cv2.resize(frame, (480, 360))
        try:
            faces = DeepFace.extract_faces(img_path=frame_small, detector_backend="opencv", enforce_detection=False)
        except Exception as e:
            print(f"⚠️ Face extraction failed: {e}")
            faces = []

        intruder_detected = False

        for det in faces:
            face = det.get("face")
            box = det.get("facial_area", {})
            if face is None or not box:
                continue

            x, y, w, h = map(int, [box.get("x", 0), box.get("y", 0), box.get("w", 0), box.get("h", 0)])

            try:
                rep = DeepFace.represent(img_path=face, model_name="Facenet512", detector_backend="skip", enforce_detection=False)
                emb = np.array(rep[0]["embedding"]) / np.linalg.norm(rep[0]["embedding"])
            except Exception as e:
                print(f"⚠️ Embedding error: {e}")
                continue

            sim = cosine_similarity(emb, trusted_mean)
            now = time.time()

            if sim > THRESHOLD:
                label, color = f"TRUSTED ({sim:.2f})", (0, 255, 0)
                unknown_start = None
                stop_buzzer()
            else:
                label, color = f"UNKNOWN ({sim:.2f})", (0, 0, 255)
                intruder_detected = True
                if unknown_start is None:
                    unknown_start = now
                elif now - unknown_start > ALERT_TRIGGER_TIME and now - last_alert > UNKNOWN_SAVE_DELAY:
                    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
                    path = os.path.join("unknown_faces", f"unknown_{timestamp}.jpg")
                    cv2.imwrite(path, cv2.cvtColor((face * 255).astype(np.uint8), cv2.COLOR_RGB2BGR))
                    print(f"💾 Unknown face saved at {path}")

                    prompt = (
                        "Generate a polite but firm warning message to alert an intruder that they are not authorized, "
                        "asking them to leave immediately, indicating that the owner has been notified and recording is active."
                    )
                    message = get_gemini_response(prompt)
                    print("🤖 Gemini spoken alert:", message)
                    say(message)
                    play_buzzer()
                    last_alert = now

            cv2.rectangle(frame_small, (x, y), (x + w, y + h), color, 2)
            cv2.putText(frame_small, label, (x, max(y - 10, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

        if not intruder_detected:
            unknown_start = None
            stop_buzzer()

        cv2.imshow("Face Verification", frame_small)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
    stop_buzzer()

In [27]:
if __name__ == "__main__":
    listen_for_command()
    run_guard_mode()


🎙️ Say 'Guard my room' to start guarding.
🗣️ Heard: han han
🗣️ Heard: gand mein room
🛡️ Guarding mode ON
🎥 Camera monitoring active. Press 'q' to stop.
💾 Unknown face saved at unknown_faces\unknown_20251023_184515.jpg
💾 Unknown face saved at unknown_faces\unknown_20251023_184530.jpg
