In [1]:
!pip install opencv-python
!pip install keyboard
!pip install pyttsx3
!pip install gtts pygame
!pip install pytesseract
!pip install Pillow
!pip install gTTS
!pip install pygame
!pip install opencv-python pygame gtts
!pip install ultralytics opencv-python gtts pygame
# For most Windows CPUs; if you have NVIDIA GPU, look up paddlepaddle-gpu instructions
!pip install PyQt5 




In [2]:
from object_detector import ObjectDetector
from spatial_analyzer import SpatialAnalyzer
from audio_feedback import AudioFeedback
from camera_manager import CameraManager
from config import Config

  from pkg_resources import resource_stream, resource_exists


pygame 2.6.1 (SDL 2.28.4, Python 3.13.0)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [3]:
import cv2
import pytesseract
from PIL import Image
from gtts import gTTS
import pygame
from datetime import datetime
import os
import numpy as np

# Set Tesseract path (for Windows)
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

class TextReaderApp:
    def __init__(self):
        self.cap = cv2.VideoCapture(0)
        self.frame_width = 640
        self.frame_height = 480
        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.frame_width)
        self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.frame_height)
        
        # GUI settings
        self.font = cv2.FONT_HERSHEY_SIMPLEX
        self.running = True
        self.last_captured_text = ""
        self.languages = {"1": ("eng", "English"), "2": ("ben", "Bengali"), 
                         "3": ("hin", "Hindi"), "4": ("tam", "Tamil")}
        self.current_lang = "eng"
        self.current_lang_name = "English"
        
    def draw_overlay(self, frame):
        """Draw enhanced GUI overlay on the frame"""
        overlay = frame.copy()
        
        # Semi-transparent background for text
        cv2.rectangle(overlay, (10, 10), (self.frame_width - 10, 180), (0, 0, 0), -1)
        cv2.addWeighted(overlay, 0.7, frame, 0.3, 0, frame)
        
        # Title
        cv2.putText(frame, "Text Reader & Voice Assistant", (20, 40), 
                   self.font, 0.8, (0, 255, 255), 2)
        
        # Instructions
        instructions = [
            f"Language: {self.current_lang_name} (Press 1-4 to change)",
            "SPACE: Capture & Read Text",
            "R: Repeat Last Text",
            "Q: Quit Application"
        ]
        
        for i, instruction in enumerate(instructions):
            color = (255, 255, 255) if i > 0 else (0, 255, 0)
            cv2.putText(frame, instruction, (20, 70 + i*25), 
                       self.font, 0.5, color, 1)
        
        # Status bar at bottom
        cv2.rectangle(frame, (0, self.frame_height - 60), 
                     (self.frame_width, self.frame_height), (50, 50, 50), -1)
        
        status = "Ready to capture (Mirror Mode)"
        cv2.putText(frame, status, (20, self.frame_height - 30), 
                   self.font, 0.6, (0, 255, 0), 2)
        
        # Capture frame indicator
        cv2.rectangle(frame, (self.frame_width//4, self.frame_height//4), 
                     (3*self.frame_width//4, 3*self.frame_height//4), (0, 255, 0), 2)
        cv2.putText(frame, "Text Capture Area", 
                   (self.frame_width//4 + 10, self.frame_height//4 - 10), 
                   self.font, 0.5, (0, 255, 0), 2)
        
        return frame
    
    def show_processing_overlay(self, frame, message):
        """Show processing message overlay"""
        overlay = frame.copy()
        cv2.rectangle(overlay, (50, self.frame_height//2 - 50), 
                     (self.frame_width - 50, self.frame_height//2 + 50), (0, 100, 200), -1)
        cv2.addWeighted(overlay, 0.8, frame, 0.2, 0, frame)
        
        cv2.putText(frame, message, (70, self.frame_height//2), 
                   self.font, 0.7, (255, 255, 255), 2)
        return frame
    
    def capture_and_process(self, original_frame):
        """Capture image and process text - MIRROR MODE AWARE"""
        # For OCR, we need the UNFLIPPED (original) frame so text reads correctly
        unflipped_frame = cv2.flip(original_frame, 1)  # Flip back to normal
        
        # Define capture area coordinates
        x1 = self.frame_width // 4
        y1 = self.frame_height // 4
        x2 = 3 * self.frame_width // 4
        y2 = 3 * self.frame_height // 4
        
        # Crop only the capture area from the unflipped frame
        crop_img = unflipped_frame[y1:y2, x1:x2]
        
        # Save the cropped image for OCR
        filename = f"captured_{datetime.now().strftime('%Y%m%d_%H%M%S')}.jpg"
        cv2.imwrite(filename, crop_img)
        
        # Show processing message on the mirrored display
        processing_frame = self.show_processing_overlay(original_frame, "Processing Text...")
        cv2.imshow("Text Reader", processing_frame)
        cv2.waitKey(1)
        
        # Extract text from cropped area only
        try:
            image = Image.open(filename)
            text = pytesseract.image_to_string(image, lang=self.current_lang)
            self.last_captured_text = text.strip()
            
            if self.last_captured_text:
                print(f"Extracted Text ({self.current_lang_name}): {self.last_captured_text}")
                self.text_to_speech(self.last_captured_text)
            else:
                print("No text found in capture area")
                self.text_to_speech("No text found in the capture area")
                
        except Exception as e:
            print(f"Error processing text: {e}")
            self.text_to_speech("Error processing the image")
        
        # Clean up
        if os.path.exists(filename):
            os.remove(filename)
    
    def text_to_speech(self, text):
        """Convert text to speech with enhanced error handling"""
        if not text.strip():
            return
            
        try:
            filename = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
            tts = gTTS(text=text, lang="en")
            tts.save(filename)
            
            pygame.mixer.init()
            pygame.mixer.music.load(filename)
            pygame.mixer.music.play()
            
            # Non-blocking audio
            while pygame.mixer.music.get_busy():
                pygame.time.Clock().tick(10)
                
            pygame.mixer.quit()
            os.remove(filename)
            
        except Exception as e:
            print(f"Audio error: {e}")
    
    def change_language(self, key):
        """Change OCR language based on key press"""
        if key in self.languages:
            self.current_lang, self.current_lang_name = self.languages[key]
            print(f"Language changed to: {self.current_lang_name}")
            self.text_to_speech(f"Language changed to {self.current_lang_name}")
    
    def run(self):
        """Main application loop with mirror mode"""
        print("Enhanced Text Reader started in Mirror Mode!")
        print("Press 1-4 to change language, SPACE to capture, R to repeat, Q to quit")
        
        while self.running:
            ret, frame = self.cap.read()
            if not ret:
                print("Failed to grab frame!")
                break
            
            # Store original frame for OCR processing
            original_frame = frame.copy()
            
            # Flip frame horizontally for mirror effect in display
            mirrored_frame = cv2.flip(frame, 1)
            
            # Draw enhanced GUI on mirrored frame
            display_frame = self.draw_overlay(mirrored_frame)
            
            # Show mirrored frame
            cv2.imshow("Text Reader", display_frame)
            
            # Handle key presses
            key = cv2.waitKey(1) & 0xFF
            key_char = chr(key) if key < 128 else None
            
            if key == 32:  # SPACE
                self.capture_and_process(mirrored_frame)  # Process with correct orientation
            elif key_char in ['1', '2', '3', '4']:
                self.change_language(key_char)
            elif key_char == 'r' or key_char == 'R':
                if self.last_captured_text:
                    print(f"Repeating: {self.last_captured_text}")
                    self.text_to_speech(self.last_captured_text)
                else:
                    self.text_to_speech("No previous text to repeat")
            elif key_char == 'q' or key_char == 'Q':
                print("Quitting...")
                self.running = False
        
        self.cleanup()
    
    def cleanup(self):
        """Clean up resources"""
        self.cap.release()
        cv2.destroyAllWindows()
        print("Application closed successfully!")

# Main execution
if __name__ == "__main__":
    app = TextReaderApp()
    app.run()



Enhanced Text Reader started in Mirror Mode!
Press 1-4 to change language, SPACE to capture, R to repeat, Q to quit
Quitting...
Application closed successfully!
