In [2]:
!pip install gradio
!pip install easyocr
!pip install pillow
!pip install numpy


Collecting gradio
  Downloading gradio-4.44.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from g

In [5]:
import gradio as gr
import easyocr
from PIL import Image
import re

# Initialize the OCR Reader
reader = easyocr.Reader(['en', 'hi'])  # Load English and Hindi models

# Hindi to English number mapping
hindi_to_english_digits = {
    '०': '0', '१': '1', '२': '2', '३': '3', '४': '4',
    '५': '5', '६': '6', '७': '7', '८': '8', '९': '9'
}

# Function to convert Hindi numerals to English numerals
def convert_hindi_numbers_to_english(text):
    return ''.join(hindi_to_english_digits.get(char, char) for char in text)

# Function to perform OCR and return extracted text without translation
def ocr_from_image(image_path):
    image = Image.open(image_path)
    results = reader.readtext(image_path, detail=1)  # Get detailed results including language
    extracted_text = []

    for res in results:
        text = res[1]  # Extracted text
        lang = res[2]  # Detected language ('en' or 'hi')

        # Convert any Hindi numerals to English numerals
        text = convert_hindi_numbers_to_english(text)

        # Keep the text as it is (in its detected language), after numeral conversion
        extracted_text.append(text)

    return " ".join(extracted_text)

# Function to search for a keyword and highlight the matches
def search_and_highlight(text, keyword):
    if not keyword:
        return text

    # Use regex to find all matches of the keyword (case insensitive) and wrap with <mark> for highlighting
    highlighted_text = re.sub(f'({re.escape(keyword)})', r'<mark>\1</mark>', text, flags=re.IGNORECASE)

    return highlighted_text

# Gradio app with separate buttons for text extraction and search
def gradio_app():
    with gr.Blocks() as demo:
        # Image uploader
        image_input = gr.Image(type="filepath", label="Upload Image")

        # Button to extract text from the image
        extract_button = gr.Button("Extract Text")

        # Extracted text display (Interactive to allow copying)
        extracted_text_output = gr.Textbox(label="Extracted Text", interactive=True)

        gr.Markdown("###")

        # Keyword input
        keyword_input = gr.Textbox(label="Enter Keyword to Search")

        # Button to search the keyword in the extracted text
        search_button = gr.Button("Search Keyword")

        # Extracted text with highlighted search result
        highlighted_text_output = gr.HTML(label="Highlighted Text")

        # Function to update the extracted text output
        def update_extracted_text(image):
            extracted_text = ocr_from_image(image)
            return extracted_text

        # Function to search the keyword in the extracted text
        def search_in_extracted_text(extracted_text, keyword):
            highlighted_text = search_and_highlight(extracted_text, keyword)
            return highlighted_text

        # Extract button to show extracted text
        extract_button.click(fn=update_extracted_text,
                             inputs=[image_input],
                             outputs=[extracted_text_output])

        # Search button to highlight the keyword in the extracted text
        search_button.click(fn=search_in_extracted_text,
                            inputs=[extracted_text_output, keyword_input],
                            outputs=[highlighted_text_output])

    return demo

# Gradio app
demo = gradio_app()
demo.launch(share=True)




Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://0797c78fa0f3f5fe8a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


