In [3]:
!pip install pytesseract pillow opencv-python pyautogui PyPDF2 PySimpleGUI
!pip install python-docx
!pip install fpdf
!pip install ocrmypdf
!pip install ghostscript
pip install --user easyocr
!pip install SpeechRecognition

Collecting python-docx
  Downloading python-docx-0.8.11.tar.gz (5.6 MB)
     ---------------------------------------- 0.0/5.6 MB ? eta -:--:--
     - -------------------------------------- 0.2/5.6 MB 3.9 MB/s eta 0:00:02
     ---- ----------------------------------- 0.6/5.6 MB 6.8 MB/s eta 0:00:01
     --------- ------------------------------ 1.3/5.6 MB 9.4 MB/s eta 0:00:01
     ----------------- ---------------------- 2.4/5.6 MB 12.9 MB/s eta 0:00:01
     --------------------- ------------------ 3.0/5.6 MB 13.8 MB/s eta 0:00:01
     ----------------------------- ---------- 4.1/5.6 MB 15.4 MB/s eta 0:00:01
     ---------------------------------- ----- 4.8/5.6 MB 14.6 MB/s eta 0:00:01
     ---------------------------------------  5.6/5.6 MB 15.5 MB/s eta 0:00:01
     ---------------------------------------- 5.6/5.6 MB 14.8 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: python-

In [1]:
import pytesseract
from PIL import Image
import PySimpleGUI as sg
import os
import shutil
import threading
import speech_recognition as sr
from fpdf import FPDF
from docx import Document
import cv2
import ocrmypdf
import logging

# Define a custom event for displaying the pop-up message
DISPLAY_COMPLETED_MESSAGE_IMAGE = '-DISPLAY-COMPLETED-MESSAGE-IMAGE'
DISPLAY_COMPLETED_MESSAGE_PDF = '-DISPLAY-COMPLETED-MESSAGE-PDF'
DISPLAY_COMPLETED_MESSAGE_VOICE = '-DISPLAY-COMPLETED-MESSAGE-VOICE'
DISPLAY_COMPLETED_MESSAGE_VIDEO = '-DISPLAY-COMPLETED-MESSAGE-VIDEO'

# Set logging level to ERROR to suppress all but critical error messages
logging.basicConfig(level=logging.ERROR)

# Global variable to store the result of PDF processing
pdf_conversion_result = None

# Image
def process_image(image_path, window):
    img = Image.open(image_path)
    text = pytesseract.image_to_string(img)
    window['-OUTPUT-IMAGE-'].update(text)
    window['-STATUS-IMAGE-'].update("Progress: Completed")
    # Send a custom event to display the "Completed" message
    window.write_event_value(DISPLAY_COMPLETED_MESSAGE_IMAGE, "Completed")

# Handwriting
# Function to preprocess images before OCR
def preprocess_image(img):
    # Check if the image has a palette format with transparency
    if img.mode == "P" and "transparency" in img.info:
        img = img.convert("RGBA")
    else:
        img = img.convert('L')
    return img

def process_handwriting(image_path, window):
    img = Image.open(image_path)
    img = preprocess_image(img)
    config = '--psm 6'
    text = pytesseract.image_to_string(img, config=config)
    window['-OUTPUT-IMAGE-'].update(text)
    window['-STATUS-IMAGE-'].update("Progress: Completed")
    # Send a custom event to display the "Completed" message
    window.write_event_value(DISPLAY_COMPLETED_MESSAGE_IMAGE, "Completed")

# PDF
def process_pdf_task(pdf_path, window, callback):
    output_folder = 'output_folder'
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    result = process_pdf(pdf_path, output_folder)
    callback(window, result)

def process_pdf(pdf_path, output_folder):
    try:
        output_pdf = os.path.join(output_folder, os.path.basename(pdf_path))
        ocrmypdf.ocr(pdf_path, output_pdf, force_ocr=True, output_type="pdf", language="eng")
        return output_pdf
    except Exception as e:
        sg.popup(f"OCR failed: {e}", title="Error")
        return None
        
def on_pdf_conversion_complete(window, result):
    global pdf_conversion_result
    pdf_conversion_result = result

    if pdf_conversion_result:
        window['-OUTPUT-PDF-'].update("PDF Conversion Completed...")
        window['-STATUS-PDF-'].update("Progress: Completed")
        # Send a custom event to display the "Completed" message
        window.write_event_value(DISPLAY_COMPLETED_MESSAGE_PDF, "Completed")
    else:
        sg.popup("PDF conversion failed or was not initiated.", title="Error")
        window['-STATUS-PDF-'].update("Progress: Failed")

# Voice
def process_voice(voice_file, window):
    text = voice_to_text(voice_file)
    window['-OUTPUT-VOICE-'].update(text)
    window['-STATUS-VOICE-'].update("Progress: Completed")
    # Send a custom event to display the "Completed" message
    window.write_event_value(DISPLAY_COMPLETED_MESSAGE_VOICE, "Completed")

def voice_to_text(voice_file):
    # Create a recognizer instance
    recognizer = sr.Recognizer()

    # Load the audio file
    with sr.AudioFile(voice_file) as source:
        audio_data = recognizer.record(source)

    # Attempt to recognize the speech in the audio file
    try:
        text = recognizer.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        # Speech was unintelligible
        return "Unable to understand the audio"
    except sr.RequestError as e:
        # Could not request results from Google Speech Recognition service
        return f"Could not request results; {e}"
# Video
def extract_frames(video_path, frames_per_minute=24):
    """Extracts frames from a video at a given rate (frames per minute)."""
    video = cv2.VideoCapture(video_path)
    success, image = video.read()
    
    total_frames = video.get(cv2.CAP_PROP_FRAME_COUNT)
    fps = video.get(cv2.CAP_PROP_FPS)
    duration_in_seconds = total_frames / fps
    minutes = duration_in_seconds / 60

    # Calculate the interval between each frame to extract
    frame_interval = int(total_frames / (frames_per_minute * minutes))
    frames = []
    current_frame = 0

    while success:
        if current_frame % frame_interval == 0:
            frames.append(image)
        
        success, image = video.read()
        current_frame += 1

    video.release()
    return frames

def save_frames(frames, output_folder):
    """Saves extracted frames as images in the specified folder."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for i, frame in enumerate(frames):
        cv2.imwrite(os.path.join(output_folder, f"frame_{i}.png"), frame)

def process_video(video_path, window):
    frames = extract_frames(video_path)
    window['-OUTPUT-VIDEO-'].update(f"Extracted {len(frames)} Frames from Video...")
    window['-STATUS-VIDEO-'].update("Progress: Completed")
    # Send a custom event to display the "Completed" message
    window.write_event_value(DISPLAY_COMPLETED_MESSAGE_VIDEO, "Completed")

# Function to save text as a TXT file, Word document and PDF document
def save_text_to_txt(text, filename):
    base_filename = os.path.splitext(filename)[0]  # Extract base filename without extension
    with open(f"{base_filename}.txt", "w") as file:
        file.write(text)

def save_text_to_word(text, filename):
    base_filename = os.path.splitext(filename)[0]  # Extract base filename without extension
    doc = Document()
    doc.add_paragraph(text)
    doc.save(f"{base_filename}.docx")

def save_text_to_pdf(text, filename):
    base_filename = os.path.splitext(filename)[0]  # Extract base filename without extension
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, text)
    pdf.output(f"{base_filename}.pdf")

# Tab 1: Extract Text
tab1_layout = [
    [sg.Text("Select an Image: "), sg.InputText(key="-IMAGE-"), sg.FileBrowse(file_types=(("Image Files", "*.jpg;*.png;*.jpeg"),))],
    [sg.Button("Extract Text from Image"), sg.Button("Extract Text from Handwriting")],
    [sg.Text("Progress: ", key='-STATUS-IMAGE-')],
    [sg.Output(size=(60, 10), key='-OUTPUT-IMAGE-')],
    [sg.Button("Save as Text", key='-SAVE-TEXT-IMAGE-'), sg.Button("Save as Word", key='-SAVE-WORD-IMAGE-'), sg.Button("Save as PDF", key='-SAVE-PDF-IMAGE-')]
]

# Tab 2: Convert PDF
tab2_layout = [
    [sg.Text("Select a Scanned PDF File: "), sg.InputText(key="-PDF-"), sg.FileBrowse(file_types=(("PDF Files", "*.pdf"),))],
    [sg.Button("Convert Scanned PDF to Full Text PDF")],
    [sg.Text("Progress: ", key='-STATUS-PDF-')],
    [sg.Output(size=(60, 10), key='-OUTPUT-PDF-')],
    [sg.Button("Save PDF", key='-SAVE-PDF-')]
]

# Tab 3: Convert Voice
tab3_layout = [
    [sg.Text("Select a Voice File: "), sg.InputText(key="-VOICE-"), sg.FileBrowse(file_types=(("Audio Files", "*.wav;*.mp3"),))],
    [sg.Button("Convert Voice to Text")],
    [sg.Text("Progress: ", key='-STATUS-VOICE-')],
    [sg.Output(size=(60, 10), key='-OUTPUT-VOICE-')],
    [sg.Button("Save as Text", key='-SAVE-TEXT-VOICE-'), sg.Button("Save as Word", key='-SAVE-WORD-VOICE-'), sg.Button("Save as PDF", key='-SAVE-PDF-VOICE-')]
]

# Tab 4: Convert Video
tab4_layout = [
    [sg.Text("Select a Video File: "), sg.InputText(key="-VIDEO-"), sg.FileBrowse(file_types=(("Video Files", "*.mp4;*.avi;*.mov"),))],
    [sg.Button("Convert Video to Images")],
    [sg.Text("Progress: ", key='-STATUS-VIDEO-')],
    [sg.Output(size=(60, 10), key='-OUTPUT-VIDEO-')],
    [sg.Button("Save Images", key='-SAVE-IMAGES-VIDEO-')]
]

# Update the layout with the new Tab
layout = [
    [sg.TabGroup([[sg.Tab('Extract Text', tab1_layout), sg.Tab('Convert PDF', tab2_layout), sg.Tab('Convert Voice', tab3_layout), sg.Tab('Convert Video', tab4_layout)]])]
]

# Create the Window
window = sg.Window("OCR System", layout)

# Event Loop
while True:
    event, values = window.read()
    if event in (sg.WINDOW_CLOSED, "Exit"):
        break

    # Extract Text from Image
    if event == "Extract Text from Image":
        image_path = values["-IMAGE-"]
        if image_path and image_path.lower().endswith((".jpg", ".jpeg", ".png")):
            window['-OUTPUT-IMAGE-'].update("")
            window['-STATUS-IMAGE-'].update("Progress: Processing")
            threading.Thread(target=process_image, args=(image_path, window), daemon=True).start()
        else:
            sg.popup("Please select a valid image file!", title="Warning")
            
    # Extract Text from Handwriting
    elif event == "Extract Text from Handwriting":
        image_path = values["-IMAGE-"]
        if image_path and image_path.lower().endswith((".jpg", ".jpeg", ".png")):
            window['-OUTPUT-IMAGE-'].update("")
            window['-STATUS-IMAGE-'].update("Progress: Processing")
            threading.Thread(target=process_handwriting, args=(image_path, window), daemon=True).start()
        else:
            sg.popup("Please select a valid handwriting image file!", title="Warning")

    # Convert Scanned PDF to Full Text PDF
    elif event == "Convert Scanned PDF to Full Text PDF":
        pdf_path = values["-PDF-"]
        if pdf_path and pdf_path.lower().endswith(".pdf"):
            window['-OUTPUT-PDF-'].update("")
            window['-STATUS-PDF-'].update("Progress: Processing")
            threading.Thread(target=process_pdf_task, args=(pdf_path, window, on_pdf_conversion_complete), daemon=True).start()
        else:
            sg.popup("Please select a valid PDF file!", title="Warning")

    # Save PDF
    elif event == '-SAVE-PDF-':
        global pdf_conversion_result
        if pdf_conversion_result:
            filename = sg.popup_get_file('Save Converted PDF', save_as=True, no_window=True, file_types=[("PDF Files", "*.pdf")])
            if filename:
                shutil.move(pdf_conversion_result, filename)
                sg.popup('File saved successfully!', title="Success")
            else:
                sg.popup('File saving cancelled or no file generated!', title="Notice")
            pdf_conversion_result = None  # Reset the variable after saving
        else:
            sg.popup("There is no PDF file converted!", title="Warning")
        
    # Convert Voice to Text
    elif event == "Convert Voice to Text":
        voice_file = values["-VOICE-"]
        if voice_file and voice_file.lower().endswith((".wav", ".mp3")):
            window['-OUTPUT-VOICE-'].update("")
            window['-STATUS-VOICE-'].update("Progress: Processing")
            threading.Thread(target=process_voice, args=(voice_file, window), daemon=True).start()
        else:
            sg.popup("Please select a valid voice file!", title="Warning")
    
    # Convert Video to Images
    elif event == "Convert Video to Images":
        video_path = values["-VIDEO-"]
        if video_path and video_path.lower().endswith((".mp4", ".avi", ".mov")):
            window['-OUTPUT-VIDEO-'].update("")
            window['-STATUS-VIDEO-'].update("Progress: Processing")
            threading.Thread(target=process_video, args=(video_path, window), daemon=True).start()
        else:
            sg.popup("Please select a valid video file!", title="Warning")

    # Save image video
    elif event == '-SAVE-IMAGES-VIDEO-':
        video_path = values["-VIDEO-"]
        if video_path and os.path.isfile(video_path):
            frames = extract_frames(video_path)
            if frames:
                folder = sg.popup_get_folder('Select Folder to Save Images', no_window=True)
                if folder:
                    save_frames(frames, folder)
                    sg.popup('Images saved successfully!', title="Success")
                else:
                    sg.popup("Image saving cancelled!", title="Notice")
            else:
                sg.popup("No frames were extracted from the video!", title="Warning")
        else:
            sg.popup("There is no Video file converted!", title="Warning")

    # Notification of Task Completed
    if event == DISPLAY_COMPLETED_MESSAGE_IMAGE:
        sg.popup("Extract Text from Image/Handwriting Completed!", title="Notice")
    elif event == DISPLAY_COMPLETED_MESSAGE_PDF:
        sg.popup("Convert Scanned PDF to Full Text PDF Completed!", title="Notice")
    elif event == DISPLAY_COMPLETED_MESSAGE_VOICE:
        sg.popup("Convert Voice to Text Completed!", title="Notice")
    elif event == DISPLAY_COMPLETED_MESSAGE_VIDEO:
        sg.popup("Convert Video to Images Completed!", title="Notice")
        
    # Saving of window output
    if event in ['-SAVE-TEXT-IMAGE-', '-SAVE-WORD-IMAGE-', '-SAVE-PDF-IMAGE-', '-SAVE-TEXT-VOICE-', '-SAVE-WORD-VOICE-', '-SAVE-PDF-VOICE-']:
        text_to_save = ''
        if 'IMAGE' in event:
            text_to_save = window['-OUTPUT-IMAGE-'].get()
        elif 'VOICE' in event:
            text_to_save = window['-OUTPUT-VOICE-'].get()

        save_as_type = event.split('-')[2]
        file_type = {"TEXT": ".txt", "WORD": ".docx", "PDF": "*.pdf"}
        file_ext = {"TEXT": "Text Files (.txt)", "WORD": "Word Documents (.docx)", "PDF": "PDF Files (*.pdf)"}

        if not text_to_save.strip():
            sg.popup("There is no text to save!", title="Warning")
        else:
            filename = sg.popup_get_file('Save As', save_as=True, no_window=True, file_types=[(file_ext[save_as_type], file_type[save_as_type])])
            if filename:
                if save_as_type == "TEXT":
                    save_text_to_txt(text_to_save, filename)
                    sg.popup('Texts saved as txt format successfully!', title="Success")
                elif save_as_type == "WORD":
                    save_text_to_word(text_to_save, filename)
                    sg.popup('Texts saved as docx file format successfully!', title="Success")
                elif save_as_type == "PDF":
                    save_text_to_pdf(text_to_save, filename)
                    sg.popup('Texts saved as pdf file format successfully!', title="Success")
            else:
                    sg.popup("Save Action Cancelled!", title="Notice")
window.close()