In [2]:
import requests

ESP32_IP = "http://192.168.204.171"  # Replace with your ESP32-CAM IP

# === Test Camera Feed ===
def check_camera():
    url = f"{ESP32_IP}/cam-hi.jpg"
    response = requests.get(url)
    if response.status_code == 200:
        with open("test_image.jpg", "wb") as file:
            file.write(response.content)
        print("✅ Camera working! Image saved as test_image.jpg")
    else:
        print("❌ Failed to get camera feed")

# === Test Servo Movement ===
def move_servo(angle):
    url = f"{ESP32_IP}/servo_angle?value={angle}"
    response = requests.get(url)
    if response.status_code == 200:
        print(f"✅ Servo moved to {angle}°")
    else:
        print("❌ Failed to move servo")

# === Test Ultrasonic Distance (if endpoint is available) ===
def check_distance():
    url = f"{ESP32_IP}/distance"  # Only if you added this endpoint in ESP32 code
    response = requests.get(url)
    if response.status_code == 200:
        print(f"📏 Distance: {response.text} cm")
    else:
        print("❌ Failed to get distance")

move_servo(90)     # Move servo to 90 degrees
 # Test ultrasonic (if available)


✅ Servo moved to 90°


In [3]:
import cv2
import torch
from torchvision import transforms, models
from PIL import Image
import numpy as np
from ultralytics import YOLO
#from modules.voice1 import speak_text

# Emotion labels
emotion_labels = {
    0: "Angry",
    1: "Disgust",
    2: "Fear",
    3: "Happy",
    4: "Sad",
    5: "Surprise",
    6: "Neutral"
}

# Load ConvNeXt model
def get_convnext(model_size='large', num_classes=7):
    if model_size == 'tiny':
        model = models.convnext_tiny(weights=models.ConvNeXt_Tiny_Weights)
    elif model_size == 'small':
        model = models.convnext_small(weights=models.ConvNeXt_Small_Weights)
    elif model_size == 'base':
        model = models.convnext_base(weights=models.ConvNeXt_Base_Weights)
    else:
        model = models.convnext_large(weights=models.ConvNeXt_Large_Weights)
    model.classifier[2] = torch.nn.Linear(model.classifier[2].in_features, num_classes)
    return model

# Load models
face_detector = YOLO(r".\modules\yolo_face_detection.pt")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
emotion_model = get_convnext(model_size='large', num_classes=7).to(device)
emotion_model.load_state_dict(torch.load(r".\modules\model_epoch_5.pth", map_location=device))
emotion_model.eval()

# Preprocessing
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Real-time prediction without storing predictions
def live_emotion_prediction_realtime():
    cap = cv2.VideoCapture(0)

    last_emotion = None
    speak_cooldown = 30  # Number of frames before speaking the same emotion again
    frame_counter = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = face_detector.predict(source=rgb_frame, save=False, conf=0.5)
        detections = results[0].boxes.xyxy.cpu().numpy()
        confidences = results[0].boxes.conf.cpu().numpy()

        for det, conf in zip(detections, confidences):
            if conf < 0.5:
                continue

            xmin, ymin, xmax, ymax = map(int, det)
            h, w, _ = frame.shape
            x1, y1, x2, y2 = max(xmin, 0), max(ymin, 0), min(xmax, w - 1), min(ymax, h - 1)

            face_img = rgb_frame[y1:y2, x1:x2]
            if face_img.size == 0:
                continue

            face_pil = Image.fromarray(face_img)
            input_tensor = preprocess(face_pil).unsqueeze(0).to(device)

            with torch.no_grad():
                logits = emotion_model(input_tensor)
                pred = int(logits.argmax(dim=1).item())
                label = emotion_labels[pred]

                # Speak only if emotion changes or cooldown passes
                if label != last_emotion or frame_counter >= speak_cooldown:
                    #speak_text(f"{label}")
                    last_emotion = label
                    frame_counter = 0

                # Draw box and label
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX,
                            0.9, (0, 255, 0), 2)

            break  # Only one face per frame

        frame_counter += 1
        cv2.imshow("Real-Time Emotion Detection", frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# Run it
if __name__ == "__main__":
    live_emotion_prediction_realtime()


0: 480x640 1 face, 23.7ms
Speed: 2.5ms preprocess, 23.7ms inference, 42.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 14.6ms
Speed: 2.2ms preprocess, 14.6ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 13.9ms
Speed: 2.0ms preprocess, 13.9ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 13.7ms
Speed: 2.0ms preprocess, 13.7ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 16.0ms
Speed: 1.0ms preprocess, 16.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 17.5ms
Speed: 2.0ms preprocess, 17.5ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 18.6ms
Speed: 3.0ms preprocess, 18.6ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 17.5ms
Speed: 2.0ms preprocess, 17.5ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480

In [1]:
!pip install dlib-19.22.99-cp310-cp310-win_amd64.whl

Processing .\dlib-19.22.99-cp310-cp310-win_amd64.whl
Installing collected packages: dlib
Successfully installed dlib-19.22.99


In [2]:
!pip install face_recognition

Collecting face_recognition
  Using cached face_recognition-1.3.0-py2.py3-none-any.whl.metadata (21 kB)
Collecting face-recognition-models>=0.3.0 (from face_recognition)
  Using cached face_recognition_models-0.3.0-py2.py3-none-any.whl
Using cached face_recognition-1.3.0-py2.py3-none-any.whl (15 kB)
Installing collected packages: face-recognition-models, face_recognition
Successfully installed face-recognition-models-0.3.0 face_recognition-1.3.0


In [12]:
import cv2
import os
import time
# Dynamic Output Directory
OUTPUT_DIR = os.path.join(os.getcwd(), "known image")
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

# Load Face Detector
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

def detect_faces(frame):
    """ Detect faces in a given frame using OpenCV Haar Cascade. """
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.2, minNeighbors=6, minSize=(40, 40))
    return faces

# def detect_faces(frame):
#     """ Detect faces in a given frame using OpenCV Haar Cascade. """
#     frame_resized = cv2.resize(frame, (640, 480))  # Resize for consistent detection
#     gray = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
#     gray = cv2.equalizeHist(gray)  # Improve contrast
#     faces = face_cascade.detectMultiScale(
#         gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
#     return faces

def face_add():
    """ Capture, detect faces, get a name, and save the image with the recognized name. """
    print("📷 Capturing image from webcam...")

    # Open webcam
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("❌ Could not open webcam.")
        return None

    ret, frame = cap.read()
    cap.release()  # Release the webcam

    if not ret:
        print("❌ Failed to capture image.")
        return None

    # Detect faces
    faces = detect_faces(frame)
    if len(faces) == 0:
        print("⚠️ No face detected! Try adjusting the camera angle or lighting.")
        return None

    # Draw rectangles around detected faces
    for (x, y, w, h) in faces:
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

    # Get Name via Voice
    print("🎤 Please say your name...")
    person_name = input("Couldn't detect name. Enter manually: ").strip()
    if not person_name:
        print("❌ No name provided! Image not saved.")
        return None

    # Generate Unique File Name
    timestamp = time.strftime("%Y%m%d_%H%M%S")
    file_name = f"{person_name}_{timestamp}.jpg"
    output_path = os.path.join(OUTPUT_DIR, file_name)

    # Save the image
    cv2.imwrite(output_path, frame)
    print(f"✅ Image saved as: {output_path}")

    # Store name in a text |file (for reference)
    with open(os.path.join(OUTPUT_DIR, "face_log.txt"), "a") as f:
        f.write(f"{person_name}, {output_path}\n")

    return person_name  # Return the detected name for further use
face_add()


📷 Capturing image from webcam...
🎤 Please say your name...
✅ Image saved as: d:\niit\ml\blindkit\known image\akka_20250415_182844.jpg


'akka'

In [13]:
import os
import cv2
import time
import face_recognition
from statistics import mode, StatisticsError
from datetime import datetime
import cv2
import time
from collections import Counter


def load_known_faces():
    #change path where knownface is stores
    known_faces_dir = r".\known image"
    known_encodings = []
    known_names = []
    for filename in os.listdir(known_faces_dir):
        if filename.lower().endswith((".png", ".jpg", ".jpeg")):
            image_path = os.path.join(known_faces_dir, filename)
            image = face_recognition.load_image_file(image_path)
            encodings = face_recognition.face_encodings(image)
            if encodings:
                known_encodings.append(encodings[0])
                known_names.append(os.path.splitext(filename)[0])
    return known_encodings, known_names

def recognize_faces(image_path, known_encodings, known_names):
    image = face_recognition.load_image_file(image_path)
    encodings = face_recognition.face_encodings(image)
    recognized_faces = []
    for encoding in encodings:
        results = face_recognition.compare_faces(known_encodings, encoding)
        distances = face_recognition.face_distance(known_encodings, encoding)
        if any(results):
            best_match_index = distances.argmin()
            recognized_faces.append(known_names[best_match_index])
        else:
            recognized_faces.append("Unknown")
    return recognized_faces

def get_most_frequent_face_name(known_encodings, known_names, num_frames=3):
    recognized_list = []
    cap = cv2.VideoCapture(0)

    if not cap.isOpened():
        print("Error: Could not open webcam.")
        return "Camera error"

    print("Capturing frames from webcam and recognizing faces...")
    for i in range(num_frames):
        ret, frame = cap.read()
        if not ret:
            print(f"Frame {i+1}: Error capturing frame from webcam.")
            continue

        temp_path = f"temp_frame_{i}.jpg"
        cv2.imwrite(temp_path, frame)

        faces = recognize_faces(temp_path, known_encodings, known_names)
        print(f"Frame {i+1}: Recognized - {faces}")
        recognized_list.extend(faces)
        time.sleep(1)

    cap.release()

    if not recognized_list:
        return "No faces detected."

    try:
        most_common = Counter(recognized_list).most_common(1)[0][0]
        return f"Most frequently recognized face: {most_common}"
    except StatisticsError:
        return "No unique most frequent face (tie or none recognized)."


def facesd():
    known_encodings, known_names = load_known_faces()
    if not known_encodings:
        print("No known faces loaded. Please add images to the 'known image' directory.")
        return

    result = get_most_frequent_face_name(known_encodings, known_names)
    print(result)
    return result

facesd()

Capturing frames from webcam and recognizing faces...
Frame 1: Recognized - ['akka_20250415_182844', 'Unknown']
Frame 2: Recognized - ['akka_20250415_182844', 'sriks_20250415_182753']
Frame 3: Recognized - ['akka_20250415_182844']
Most frequently recognized face: akka_20250415_182844


'Most frequently recognized face: akka_20250415_182844'