In [3]:
import cv2
import numpy as np
import face_recognition
import pyttsx3
import time

In [30]:
def recognize_faces():
    # Initialize text-to-speech engine
    engine = pyttsx3.init()

    # Load known faces and names
    known_faces = ['photos/mohamed_elhalak.jpg']
    known_names = ['mohamed tarek']
    
    known_images = [cv2.imread(img) for img in known_faces]
    encodings_known = [face_recognition.face_encodings(img)[0] for img in known_images]

    # Initialize webcam
    cap = cv2.VideoCapture(0)
    face_announced = False

    while True:
        ret, img = cap.read()
        if not ret:
            print("Failed to capture frame from webcam")
            break

        img_resized = cv2.resize(img, (0, 0), None, 0.25, 0.25)
        img_resized = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)

        face_locations = face_recognition.face_locations(img_resized)
        encodings_current = face_recognition.face_encodings(img_resized, face_locations)

        if len(encodings_current) != 0:
            for face_encoding, face_location in zip(encodings_current, face_locations):
                matches = face_recognition.compare_faces(encodings_known, face_encoding)
                face_distances = face_recognition.face_distance(encodings_known, face_encoding)
                match_index = np.argmin(face_distances)

                if matches[match_index] and not face_announced:
                    name = known_names[match_index]
                    engine.say(name)
                    engine.runAndWait()
                    face_announced = True

                top, right, bottom, left = face_location
                top *= 4
                right *= 4
                bottom *= 4
                left *= 4
                cv2.rectangle(img, (left, top), (right, bottom), (0, 0, 255), 2)
                cv2.putText(img, name, (left + 6, bottom - 6), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)

        else:
            face_announced = False

        cv2.imshow('Face Recognition', img)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


In [None]:
recognize_faces()

In [7]:


def detect_objects():
    engine = pyttsx3.init()
    
    # Load class names
    with open('coco.names', 'rt') as f:
        classNames = f.read().rstrip('\n').split('\n')
        
    # Load model
    net = cv2.dnn_DetectionModel('frozen_inference_graph.pb', 'ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt')
    net.setInputSize(320, 230)
    net.setInputScale(1.0 / 127.5)
    net.setInputMean((127.5, 127.5, 127.5))
    net.setInputSwapRB(True)
    
    # Webcam capture
    cap = cv2.VideoCapture(0)
    detected_objects = set()
    last_announced_objects = set()
    
    while True:
        ret, img = cap.read()
        if not ret:
            break

        # Detect objects
        classIds, confs, bbox = net.detect(img, confThreshold=0.5)
        detected_objects_in_frame = set()
        
        # Process detections
        if len(classIds) != 0:
            for classId, confidence, box in zip(classIds.flatten(), confs.flatten(), bbox):
                className = classNames[classId - 1]
                cv2.rectangle(img, box, color=(0, 255, 0), thickness=2)
                cv2.putText(img, className, (box[0] + 10, box[1] + 20), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), thickness=2)
                detected_objects_in_frame.add(className)
                
                # Announce new objects in the frame
                if className not in detected_objects:
                    engine.say(f"{className}")
                    engine.runAndWait()

            # Announce objects that exited the frame
            for obj in detected_objects - detected_objects_in_frame:
                if obj not in last_announced_objects:  # Avoid duplicate announcements
                    engine.say(f"{obj} has left the frame.")
                    engine.runAndWait()
                    last_announced_objects.add(obj)
                
            # Reset last announced objects to track changes per frame
            last_announced_objects = detected_objects_in_frame.copy()
            
        detected_objects = detected_objects_in_frame

        # Display the resulting image
        cv2.imshow('Object Detection', img)

        # Break the loop if 'q' key is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release resources
    cap.release()
    cv2.destroyAllWindows()


In [8]:
detect_objects()

In [8]:
import time
import cv2
import pytesseract
from gtts import gTTS
import os

In [13]:

def recognize_text():
    # Delay for camera initialization
    time.sleep(2)
    cap = cv2.VideoCapture(0)
    time.sleep(2)  # Wait for the camera to be ready
    ret, frame = cap.read()
    cap.release()
    
    if not ret:
        print("Error capturing image from camera.")
        return
    
    # Optional: Add a delay before processing the image
    time.sleep(2)  # Delay to ensure the model can read the photo
    
    # Recognize text in the captured frame
    extracted_text = pytesseract.image_to_string(frame)
    print("Extracted Text:", extracted_text)
    
    # Convert extracted text to audio
    tts = gTTS(text=extracted_text, lang='en')
    tts.save('output_audio.mp3')
    
    # Play the audio file
    os.system('start output_audio.mp3')  # Adjust for OS if needed (e.g., 'open' for Mac, 'xdg-open' for Linux)

# Call the function

In [14]:
recognize_text()


Extracted Text: Client Name Project Type Date Completed Hours Spent Amount Bi
deo Creation 6/30/2024 2 $

Karma Security

5/31/2024 2$

5/20/2024 14 $

4/8/2024 1 $

7/3/2024 8s

3/18/2024 33 $ 2

6/9/2024 14 $ 63
: 7/16/2024 23 $

5/30/2024 20


