# MILESTONE-1

In [1]:
import cv2
import speech_recognition as sr
import threading
import datetime


log_file = "command_log.txt"

def log_activity(spoken_text, status):
    """Save recognition result and activation status to a log file (ASCII safe)."""
    with open(log_file, "a", encoding="utf-8") as f:
        timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        f.write(f"[{timestamp}] Heard: '{spoken_text}' --> {status}\n")
    print(f"[LOG] Heard: '{spoken_text}' --> {status}")


def activate_protect_mode():
    """Turn protect mode ON (one-way activation)."""
    global protect_mode
    if not protect_mode:
        protect_mode = True
        print(f"[STATE] Protect Mode is now ON ")
    else:
        print(f"[STATE] Already in Protect Mode")


def deactivate_protect_mode():
    """Turn protect mode OFF and stop listening."""
    global protect_mode, listening
    if protect_mode:
        protect_mode = False
        print(f"[STATE] Protect Mode is now OFF")
    else:
        print(f"[STATE] Already OFF")
    listening = False   # stop the loop in listen_for_command


def listen_for_command():
    """Continuously listen for activation/deactivation commands via microphone."""
    global listening, total_commands, correct_commands
    recognizer = sr.Recognizer()
    mic = sr.Microphone()

    with mic as source:
        recognizer.adjust_for_ambient_noise(source)

    print("[INFO] Listening for commands: 'protect my room' to activate, 'stop' to deactivate.")

    while listening:   # loop runs only while listening == True
        with mic as source:
            audio = recognizer.listen(source, phrase_time_limit=5)

        try:
            text = recognizer.recognize_google(audio).lower()
            print(f"[HEARD] {text}")

            if protect_mode == False:
                total_commands += 1  # every recognized phrase counts

            if "protect my room" in text:
                activate_protect_mode()
                log_activity(text, "Activated")
                correct_commands += 1
            else:
                log_activity(text, "No action")

        except sr.UnknownValueError:
            log_activity("Unrecognized speech", "No action")
        except sr.RequestError as e:
            print(f"[ERROR] Could not request results from Google Speech API: {e}")

    # When loop ends â†’ show recognition accuracy
    if total_commands > 0:
        accuracy = (correct_commands / total_commands) * 100
        print(f"\n[RESULT] Recognition Accuracy: {accuracy:.2f}% "
              f"({correct_commands}/{total_commands})")
    else:
        print("\n[RESULT] No valid speech processed.")


def webcam_display():
    """Simple webcam feed to confirm video access."""
    cap = cv2.VideoCapture(0)

    if not cap.isOpened():
        print("[ERROR] Cannot access webcam")
        return

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Show protect mode status on the frame
        status_text = f"Protect Mode: {'ON' if protect_mode else 'OFF'}"
        cv2.putText(frame, status_text, (10, 40),
                    cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (0, 255, 0) if protect_mode else (0, 0, 255), 2)

        cv2.imshow("Protect Agent Webcam", frame)

        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            print("[INFO] 'q' pressed - shutting down...")
            deactivate_protect_mode()
            break

    cap.release()
    cv2.destroyAllWindows()

In [2]:
# Global state
protect_mode = False
listening = True   # controls microphone loop

# Accuracy tracking
total_commands = 0
correct_commands = 0

# Clear old log file
with open(log_file, "w") as f:
    f.write("=== Command Recognition Loag ===\n")

# Run ASR in a separate thread
threading.Thread(target=listen_for_command, daemon=True).start()
# Run webcam display in main thread
webcam_display()

[INFO] Listening for commands: 'protect my room' to activate, 'stop' to deactivate.
[LOG] Heard: 'Unrecognized speech' --> No action
[LOG] Heard: 'Unrecognized speech' --> No action
[LOG] Heard: 'Unrecognized speech' --> No action
[LOG] Heard: 'Unrecognized speech' --> No action
[INFO] 'q' pressed - shutting down...
[STATE] Already OFF


[LOG] Heard: 'Unrecognized speech' --> No action

[RESULT] No valid speech processed.


In [32]:
# Global state
protect_mode = False
listening = True   # controls microphone loop

# Accuracy tracking
total_commands = 0
correct_commands = 0

# Clear old log file
with open(log_file, "w") as f:
    f.write("=== Command Recognition Loag ===\n")

# Run ASR in a separate thread
threading.Thread(target=listen_for_command, daemon=True).start()
# Run webcam display in main thread
webcam_display()

[INFO] Listening for commands: 'protect my room' to activate, 'stop' to deactivate.
[HEARD] protect my room
[STATE] Protect Mode is now ON 
[LOG] Heard: 'protect my room' --> Activated
[HEARD] show the product mode is
[LOG] Heard: 'show the product mode is' --> No action
[HEARD] impressing
[LOG] Heard: 'impressing' --> No action
[INFO] 'q' pressed - shutting down...
[STATE] Protect Mode is now OFF


[LOG] Heard: 'Unrecognized speech' --> No action

[RESULT] Recognition Accuracy: 100.00% (1/1)


---

In [21]:
# Global state
protect_mode = False
listening = True   # controls microphone loop

# Accuracy tracking
total_commands = 0
correct_commands = 0

# Clear old log file
with open(log_file, "w") as f:
    f.write("=== Command Recognition Loag ===\n")

# Run ASR in a separate thread
threading.Thread(target=listen_for_command, daemon=True).start()
# Run webcam display in main thread
webcam_display()

[INFO] Listening for commands: 'protect my room' to activate, 'stop' to deactivate.
[HEARD] protect my room
[STATE] Protect Mode is now ON 
[LOG] Heard: 'protect my room' --> Activated
[INFO] 'q' pressed - shutting down...
[STATE] Protect Mode is now OFF


[LOG] Heard: 'Unrecognized speech' --> No action

[RESULT] Recognition Accuracy: 100.00% (1/1)


In [22]:
# Global state
protect_mode = False
listening = True   # controls microphone loop

# Accuracy tracking
total_commands = 0
correct_commands = 0

# Clear old log file
with open(log_file, "w") as f:
    f.write("=== Command Recognition Loag ===\n")

# Run ASR in a separate thread
threading.Thread(target=listen_for_command, daemon=True).start()
# Run webcam display in main thread
webcam_display()

[INFO] Listening for commands: 'protect my room' to activate, 'stop' to deactivate.
[HEARD] protect my room
[STATE] Protect Mode is now ON 
[LOG] Heard: 'protect my room' --> Activated
[INFO] 'q' pressed - shutting down...
[STATE] Protect Mode is now OFF


[LOG] Heard: 'Unrecognized speech' --> No action

[RESULT] Recognition Accuracy: 100.00% (1/1)


In [25]:
# Global state
protect_mode = False
listening = True   # controls microphone loop

# Accuracy tracking
total_commands = 0
correct_commands = 0

# Clear old log file
with open(log_file, "w") as f:
    f.write("=== Command Recognition Loag ===\n")

# Run ASR in a separate thread
threading.Thread(target=listen_for_command, daemon=True).start()
# Run webcam display in main thread
webcam_display()

[INFO] Listening for commands: 'protect my room' to activate, 'stop' to deactivate.
[HEARD] protect my room
[STATE] Protect Mode is now ON 
[LOG] Heard: 'protect my room' --> Activated
[INFO] 'q' pressed - shutting down...
[STATE] Protect Mode is now OFF


[LOG] Heard: 'Unrecognized speech' --> No action

[RESULT] Recognition Accuracy: 100.00% (1/1)


In [None]:
# Global state
protect_mode = False
listening = True   # controls microphone loop

# Accuracy tracking
total_commands = 0
correct_commands = 0

# Clear old log file
with open(log_file, "w") as f:
    f.write("=== Command Recognition Log ===\n")

# Run ASR in a separate thread
threading.Thread(target=listen_for_command, daemon=True).start()
# Run webcam display in main thread
webcam_display()

[INFO] Listening for commands: 'protect my room' to activate, 'stop' to deactivate.
[HEARD] protect my room
[STATE] Protect Mode is now ON 
[LOG] Heard: 'protect my room' --> Activated
[INFO] 'q' pressed - shutting down...
[STATE] Protect Mode is now OFF


[LOG] Heard: 'Unrecognized speech' --> No action

[RESULT] Recognition Accuracy: 100.00% (1/1)


In [27]:
# Global state
protect_mode = False
listening = True   # controls microphone loop

# Accuracy tracking
total_commands = 0
correct_commands = 0

# Clear old log file
with open(log_file, "w") as f:
    f.write("=== Command Recognition Loag ===\n")

# Run ASR in a separate thread
threading.Thread(target=listen_for_command, daemon=True).start()
# Run webcam display in main thread
webcam_display()

[INFO] Listening for commands: 'protect my room' to activate, 'stop' to deactivate.
[HEARD] protect my room
[STATE] Protect Mode is now ON 
[LOG] Heard: 'protect my room' --> Activated
[INFO] 'q' pressed - shutting down...
[STATE] Protect Mode is now OFF


[LOG] Heard: 'Unrecognized speech' --> No action

[RESULT] Recognition Accuracy: 100.00% (1/1)


* Average Accuracy = 100*5 / 5
                   = 100 %

---

## MILESTONE-2

In [6]:
! pip install opencv-python face_recognition numpy

Collecting face_recognition
  Using cached face_recognition-1.3.0-py2.py3-none-any.whl.metadata (21 kB)
Collecting face-recognition-models>=0.3.0 (from face_recognition)
  Using cached face_recognition_models-0.3.0-py2.py3-none-any.whl
Using cached face_recognition-1.3.0-py2.py3-none-any.whl (15 kB)
Installing collected packages: face-recognition-models, face_recognition

   ---------------------------------------- 0/2 [face-recognition-models]
   ---------------------------------------- 0/2 [face-recognition-models]
   ---------------------------------------- 0/2 [face-recognition-models]
   ---------------------------------------- 0/2 [face-recognition-models]
   ---------------------------------------- 2/2 [face_recognition]

Successfully installed face-recognition-models-0.3.0 face_recognition-1.3.0


In [5]:
! python -m pip install dlib-19.24.1-cp311-cp311-win_amd64.whl

Processing e:\7th sem\ee782 adv topics in ml\assignment_2\dlib-19.24.1-cp311-cp311-win_amd64.whl
Installing collected packages: dlib
Successfully installed dlib-19.24.1


In [8]:
! pip install opencv-python



In [10]:
import cv2, dlib, face_recognition

print("OpenCV version:", cv2.__version__)
print("dlib version:", dlib.__version__)
print("face_recognition installed successfully!")

OpenCV version: 4.8.1
dlib version: 19.24.1
face_recognition installed successfully!


In [4]:
import cv2
import face_recognition
import numpy as np
import os

# Directory to store trusted faces
ENROLL_DIR = "trusted_faces"
os.makedirs(ENROLL_DIR, exist_ok=True)

# Capture from webcam
cap = cv2.VideoCapture(0)
print("[INFO] Press 's' to capture face, 'q' to quit.")

# Ask for user name once
user_name = input("Enter name for this user: ")

# Load previous embeddings if exist
user_file = os.path.join(ENROLL_DIR, f"{user_name}.npy")
if os.path.exists(user_file):
    embeddings = list(np.load(user_file, allow_pickle=True))
    print(f"[INFO] Loaded {len(embeddings)} existing embeddings for {user_name}")
else:
    embeddings = []

while True:
    ret, frame = cap.read()
    if not ret:
        break

    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    boxes = face_recognition.face_locations(rgb_frame)

    # Draw boxes
    for (top, right, bottom, left) in boxes:
        cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)

    cv2.imshow("Enroll Trusted Face", frame)
    key = cv2.waitKey(1) & 0xFF

    if key == ord('s') and boxes:
        # Take first face
        top, right, bottom, left = boxes[0]

        # Encode face using full frame + location
        encs = face_recognition.face_encodings(rgb_frame, [(top, right, bottom, left)])
        if encs:
            embeddings.append(encs[0])
            np.save(user_file, embeddings)
            print(f"[SAVED] Captured embedding #{len(embeddings)} for {user_name}")
        else:
            print("[WARN] No face detected, try again.")

    elif key == ord('q'):
        print(f"[INFO] Total embeddings saved for {user_name}: {len(embeddings)}")
        break

cap.release()
cv2.destroyAllWindows()

[INFO] Press 's' to capture face, 'q' to quit.
[SAVED] Captured embedding #1 for Rehna
[SAVED] Captured embedding #2 for Rehna
[SAVED] Captured embedding #3 for Rehna
[SAVED] Captured embedding #4 for Rehna
[SAVED] Captured embedding #5 for Rehna
[SAVED] Captured embedding #6 for Rehna
[SAVED] Captured embedding #7 for Rehna
[SAVED] Captured embedding #8 for Rehna
[SAVED] Captured embedding #9 for Rehna
[SAVED] Captured embedding #10 for Rehna
[INFO] Total embeddings saved for Rehna: 10


In [46]:
import cv2
import face_recognition
import numpy as np
import os

# Directory to store trusted faces
ENROLL_DIR = "trusted_faces"
os.makedirs(ENROLL_DIR, exist_ok=True)

# Capture from webcam
cap = cv2.VideoCapture(0)
print("[INFO] Press 's' to capture face, 'q' to quit.")

# Ask for user name once
user_name = input("Enter name for this user: ")

# Load previous embeddings if exist
user_file = os.path.join(ENROLL_DIR, f"{user_name}.npy")
if os.path.exists(user_file):
    embeddings = list(np.load(user_file, allow_pickle=True))
    print(f"[INFO] Loaded {len(embeddings)} existing embeddings for {user_name}")
else:
    embeddings = []

while True:
    ret, frame = cap.read()
    if not ret:
        break

    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    boxes = face_recognition.face_locations(rgb_frame)

    # Draw boxes
    for (top, right, bottom, left) in boxes:
        cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)

    cv2.imshow("Enroll Trusted Face", frame)
    key = cv2.waitKey(1) & 0xFF

    if key == ord('s') and boxes:
        # Take first face
        top, right, bottom, left = boxes[0]

        # Encode face using full frame + location
        encs = face_recognition.face_encodings(rgb_frame, [(top, right, bottom, left)])
        if encs:
            embeddings.append(encs[0])
            np.save(user_file, embeddings)
            print(f"[SAVED] Captured embedding #{len(embeddings)} for {user_name}")
        else:
            print("[WARN] No face detected, try again.")

    elif key == ord('q'):
        print(f"[INFO] Total embeddings saved for {user_name}: {len(embeddings)}")
        break

cap.release()
cv2.destroyAllWindows()

[INFO] Press 's' to capture face, 'q' to quit.
[SAVED] Captured embedding #1 for yashaswini
[SAVED] Captured embedding #2 for yashaswini
[SAVED] Captured embedding #3 for yashaswini
[SAVED] Captured embedding #4 for yashaswini
[SAVED] Captured embedding #5 for yashaswini
[SAVED] Captured embedding #6 for yashaswini
[SAVED] Captured embedding #7 for yashaswini
[SAVED] Captured embedding #8 for yashaswini
[SAVED] Captured embedding #9 for yashaswini
[SAVED] Captured embedding #10 for yashaswini
[SAVED] Captured embedding #11 for yashaswini
[INFO] Total embeddings saved for yashaswini: 11


In [12]:
! pip install playsound

Collecting playsound
  Downloading playsound-1.3.0.tar.gz (7.7 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: playsound
  Building wheel for playsound (setup.py): started
  Building wheel for playsound (setup.py): finished with status 'done'
  Created wheel for playsound: filename=playsound-1.3.0-py3-none-any.whl size=7044 sha256=d877c784475663776773b06e946d0aa526250899b17d89c98afb92d01ea79d21
  Stored in directory: c:\users\shaik rehna afroz\appdata\local\pip\cache\wheels\50\98\42\62753a9e1fb97579a0ce2f84f7db4c21c09d03bb2091e6cef4
Successfully built playsound
Installing collected packages: playsound
Successfully installed playsound-1.3.0


  DEPRECATION: Building 'playsound' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to the source tree of 'playsound'. Discussion can be found at https://github.com/pypa/pip/issues/6334


In [51]:
import cv2
import face_recognition
import numpy as np
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from collections import defaultdict

ENROLL_DIR = "trusted_faces"
TEST_DIR = "test_cases"

trusted_encodings, trusted_names = [], []

# Load trusted embeddings
for file in os.listdir(ENROLL_DIR):
    if file.endswith(".npy"):
        path = os.path.join(ENROLL_DIR, file)
        name = os.path.splitext(file)[0]
        embeddings = np.load(path, allow_pickle=True)
        for enc in embeddings:
            trusted_encodings.append(enc)
            trusted_names.append(name)

print(f"[INFO] Trusted users: {set(trusted_names)}")
print(f"[INFO] Total embeddings: {len(trusted_encodings)}")

def get_true_label(filename):
    prefix = filename[0].lower()
    if prefix == "r":
        return "Rehna"
    elif prefix == "y":
        return "yashaswini"
    else:
        return "Unknown"

# Track metrics
y_true_all, y_pred_all = [], []
results_by_condition = defaultdict(lambda: {"y_true": [], "y_pred": []})

# Loop through subfolders
for condition in os.listdir(TEST_DIR):
    condition_path = os.path.join(TEST_DIR, condition)
    if not os.path.isdir(condition_path):
        continue

    print(f"\n[INFO] Testing condition: {condition}")

    for img_file in os.listdir(condition_path):
        if not img_file.lower().endswith((".jpg", ".jpeg", ".png")):
            continue

        img_path = os.path.join(condition_path, img_file)
        true_label = get_true_label(img_file)

        image = cv2.imread(img_path)
        if image is None:
            print(f"[WARNING] Could not load {img_file}")
            continue

        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        boxes = face_recognition.face_locations(rgb_image)
        encodings = face_recognition.face_encodings(rgb_image, boxes)

        if len(encodings) == 0:
            print(f"[INFO] No face detected in {img_file}")
            y_pred = "Unknown"
        else:
            face_enc = encodings[0]
            distances = face_recognition.face_distance(trusted_encodings, face_enc)

            if len(distances) > 0:
                min_idx = np.argmin(distances)
                if distances[min_idx] < 0.45:
                    y_pred = trusted_names[min_idx]
                else:
                    y_pred = "Unknown"
            else:
                y_pred = "Unknown"

        # Append global + per-condition results
        y_true_all.append(true_label)
        y_pred_all.append(y_pred)
        results_by_condition[condition]["y_true"].append(true_label)
        results_by_condition[condition]["y_pred"].append(y_pred)

        print(f"[TEST] {img_file} | True: {true_label}, Pred: {y_pred}")

# === Overall Results ===
print("\n===== OVERALL RESULTS =====")
if len(y_true_all) > 0:
    print("Accuracy :", accuracy_score(y_true_all, y_pred_all))
    print("Precision:", precision_score(y_true_all, y_pred_all, average="weighted", zero_division=0))
    print("Recall   :", recall_score(y_true_all, y_pred_all, average="weighted", zero_division=0))
    print("F1 Score :", f1_score(y_true_all, y_pred_all, average="weighted", zero_division=0))
else:
    print("[ERROR] No test results computed.")

# === Per Condition Results ===
print("\n===== PER-CONDITION RESULTS =====")
for condition, data in results_by_condition.items():
    if len(data["y_true"]) == 0:
        continue
    acc = accuracy_score(data["y_true"], data["y_pred"])
    print(f"{condition} -> Accuracy: {acc:.2f}")

[INFO] Trusted users: {'yashaswini', 'Rehna'}
[INFO] Total embeddings: 21

[INFO] Testing condition: background_noise
[TEST] r1.jpg | True: Rehna, Pred: Rehna
[TEST] r2.jpg | True: Rehna, Pred: Rehna
[TEST] r3.jpeg | True: Rehna, Pred: Rehna
[TEST] r4.jpeg | True: Rehna, Pred: Rehna
[TEST] r5.jpeg | True: Rehna, Pred: Rehna
[TEST] y3.jpeg | True: yashaswini, Pred: yashaswini
[TEST] y4.jpeg | True: yashaswini, Pred: yashaswini

[INFO] Testing condition: bright_light
[TEST] r1.jpeg | True: Rehna, Pred: Rehna
[TEST] r10.jpeg | True: Rehna, Pred: Rehna
[TEST] r11.jpeg | True: Rehna, Pred: Rehna
[TEST] r12.jpeg | True: Rehna, Pred: Rehna
[TEST] r13.jpg | True: Rehna, Pred: Rehna
[TEST] r14.jpg | True: Rehna, Pred: Rehna
[TEST] r18.jpg | True: Rehna, Pred: Rehna
[TEST] r19.jpg | True: Rehna, Pred: Unknown
[TEST] r2.jpeg | True: Rehna, Pred: Rehna
[TEST] r20.jpg | True: Rehna, Pred: Rehna
[TEST] r21.jpg | True: Rehna, Pred: Rehna
[TEST] r22.jpg | True: Rehna, Pred: Rehna
[TEST] r3.jpeg | True

===== OVERALL RESULTS =====

Accuracy : 0.9

Precision: 0.95

Recall   : 0.9

F1 Score : 0.913

===== PER-CONDITION RESULTS =====

background_noise -> Accuracy: 1.00

bright_light -> Accuracy: 0.89

dim_light -> Accuracy: 0.82

unseen -> Accuracy: 1.00

#### MILESTONE 3(entire integrated code) is in "Milestone_3.py" file