In [1]:

import numpy as np
import pandas as pd
from ultralytics import YOLO
import cv2
import torch
import pyttsx3
from torchvision.transforms import Compose, Resize, ToTensor, Normalize
import time
import threading
from gtts import gTTS
from playsound import playsound
import os
import pygame

pygame 2.6.1 (SDL 2.28.4, Python 3.12.4)
Hello from the pygame community. https://www.pygame.org/contribute.html


  from pkg_resources import resource_stream, resource_exists


In [8]:
try:
    pygame.mixer.init()
except pygame.error as e:
    print(f"Error initializing mixer: {e}")
    

yolo = YOLO("yolov8n.pt")
yolo.conf = 0.35 # Classification threshold

model_type = "MiDaS_small"

midas = torch.hub.load("intel-isl/MiDaS", model_type) # Change the MiDas model type to experiment with different models
midas.eval()

midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
if model_type == "DPT_large" or model_type == "DPT_Hybrid":
    transform = midas_transforms.dpt_transform
else:
    transform = midas_transforms.small_transform

def speak_alert(msg):
    def run():
        try:
            tts = gTTS(text=msg, lang='en')
            filename = "alert_audio.mp3"
            tts.save(filename)
            
            # --- ADDED SAFETY CHECK ---
            # Make sure the file was actually created before trying to play it
            if os.path.exists(filename):
                pygame.mixer.music.load(filename)
                pygame.mixer.music.play()
                
                while pygame.mixer.music.get_busy():
                    time.sleep(0.1)
                
                pygame.mixer.music.unload()
                os.remove(filename)
            else:
                print("Alert audio file not created due to a network error.")

        except Exception as e:
            print(f"Error in speak_alert: {e}")
            
    thread = threading.Thread(target=run)
    thread.start()

OBSTACLE_CLASSES = {"person", "chair", "bench", "bicycle", "motorbike", "car"}

cap = cv2.VideoCapture(0)

last_alert_time = 0
alert_interval = 3.0
DEPTH_THRESHOLD = 450

frame_count = 0
DEPTH_UPDATE_INTERVAL = 3  # every 3rd frame

last_depth_map = None  # cache previous depth

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    frame = cv2.flip(frame, 1)
    frame_count += 1

    h_frame, w_frame = frame.shape[:2]

    # YOLO detection
    results = yolo(frame, imgsz=320, device="cpu", verbose=False)[0]

    # Run MiDaS only every N frames
    if frame_count % DEPTH_UPDATE_INTERVAL == 0:
        img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        input_batch = transform(img_rgb)   # no unsqueeze

        with torch.no_grad():
            prediction = midas(input_batch)
            if isinstance(prediction, (tuple, list)):
                prediction = prediction[0]

            depth_map_tensor = torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=img_rgb.shape[:2],
                mode="bicubic",
                align_corners=False
            ).squeeze()

        last_depth_map = depth_map_tensor.cpu().numpy()

    
    if last_depth_map is not None:
        depth_map = last_depth_map

        
        output_display = cv2.normalize(depth_map, None, 255, 0, cv2.NORM_MINMAX, cv2.CV_8U)
        colored_depth_map = cv2.applyColorMap(output_display, cv2.COLORMAP_MAGMA)
    else:
        colored_depth_map = np.zeros_like(frame)

    alert_triggered = False
    closest_obstacle_label = ""
    
    if hasattr(results, "boxes"):
        for box in results.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
            cls_id = int(box.cls.cpu().numpy()[0])
            conf = float(box.conf.cpu().numpy())
            label = yolo.names[cls_id]

            if label not in OBSTACLE_CLASSES:
                continue

            
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 200, 0), 2)
            cv2.putText(frame, f"{label} {conf:.2f}", (x1, y1 - 6),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 200, 0), 2)

        
            if last_depth_map is not None:
                box_depth = depth_map[y1:y2, x1:x2]
                if box_depth.size > 0:
                    mean_depth = np.median(box_depth)

                    if mean_depth > DEPTH_THRESHOLD:
                        alert_triggered = True
                        closest_obstacle_label = label
                        cv2.putText(frame, f"ALERT: {label} close ({mean_depth:.2f})",(30, 40),cv2.FONT_HERSHEY_SIMPLEX,
                                    0.8, (0, 0, 255), 3)
                        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 3)

    if alert_triggered and (time.time() - last_alert_time > alert_interval):
        speak_alert(f"{closest_obstacle_label} ahead")
        last_alert_time = time.time()

    cv2.imshow("Smart Glasses View", frame)
    cv2.imshow("Depth Map", colored_depth_map)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()


Using cache found in C:\Users\Pranav/.cache\torch\hub\intel-isl_MiDaS_master


Loading weights:  None


Using cache found in C:\Users\Pranav/.cache\torch\hub\rwightman_gen-efficientnet-pytorch_master
Using cache found in C:\Users\Pranav/.cache\torch\hub\intel-isl_MiDaS_master
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf = float(box.conf.cpu().numpy())
  conf =

In [6]:
from gtts import gTTS
import os

# Text to be converted to speech
mytext = "Hello, this is a test of the Google Text-to-Speech library."

# The language for the speech (e.g., 'en' for English)
language = 'en'

# Create a gTTS object
tts_obj = gTTS(text=mytext, lang=language, slow=False)

# Save the audio file
tts_obj.save("test_audio.mp3")

# Optional: Play the generated audio file
os.system("start test_audio.mp3")
# For macOS: os.system("afplay test_audio.mp3")
# For Linux: os.system("mpg123 test_audio.mp3")

print("Audio file 'test_audio.mp3' has been created.")

Audio file 'test_audio.mp3' has been created.
