In [1]:
!pip install google-cloud-vision google-auth





In [12]:
import os
import io
from collections import deque
from google.cloud import vision
from google.oauth2 import service_account
import pandas as pd

In [13]:
# Replace with the path to your new service account JSON file
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = " "


In [20]:
# variables 
document_mode = False  # Set to True if you're processing a document with dense text
languages = ''  # Specify language hints if needed, e.g., 'en,es'
full_description = False  # Set to True if you want detailed output
confidence_threshold = 0.6  # Adjust confidence threshold if needed
key_path = " "  # Specify the path to your service account JSON credentials if needed


In [21]:
# API KEY
def set_credentials():
    global credentials
    credentials_file = key_path or os.environ.get('GOOGLE_APPLICATION_CREDENTIALS')
    if credentials_file:
        credentials = service_account.Credentials.from_service_account_file(credentials_file)
    else:
        raise Exception("Missing service_account.json, GOOGLE_APPLICATION_CREDENTIALS or key_path")

In [22]:
def detect_text(vision_image, language_hints=[], full=False):
    client = vision.ImageAnnotatorClient(credentials=credentials)
    image_context = vision.ImageContext(language_hints=language_hints)
    response = client.text_detection(image=vision_image, image_context=image_context)

    texts = response.text_annotations

    if not texts:
        return "No text detected."

    output = []

    if texts:
        output.append(f"Language: {texts[0].locale}")

    if full:
        for text in texts:
            output.append('\n' + text.description)
            vertices = ([f'({vertex.x},{vertex.y})' for vertex in text.bounding_poly.vertices])
            boundaries = ','.join(vertices)
            output.append(f'bounds: {boundaries}')
    else:
        output.append(texts[0].description.strip())

    return '\n'.join(output)

In [16]:
def detect_document_text(vision_image, language_hints=[], full=False):
    client = vision.ImageAnnotatorClient(credentials=credentials)
    image_context = vision.ImageContext(language_hints=language_hints)
    response = client.document_text_detection(image=vision_image, image_context=image_context)

    if not response.text_annotations:
        return "No text detected."

    output = []

    text = response.text_annotations[0]

    output.append(f"Language: {text.locale}\n")
    output.append(text.description.strip())

    if full:
        paragraphs, lines = extract_paragraphs(response.full_text_annotation)
        output.append('\nSINGLE LINE\n')
        output.append(' '.join(map(str.strip, lines)))
        output.append('\nPARAGRAPHS\n\n--')
        output.append('\n\n'.join(paragraphs) + '\n--')

    for page in response.full_text_annotation.pages:
        has_mistakes = False

        for block in page.blocks:
            if full:
                output.append(f'\nBlock confidence: {f(block.confidence)}')

            for paragraph in block.paragraphs:
                if full:
                    output.append('\n' + paragraphs.popleft())
                    output.append(f'\nParagraph confidence: {f(paragraph.confidence)}\n')

                for word in paragraph.words:
                    word_text = ''.join([symbol.text for symbol in word.symbols])
                    if full:
                        output.append(f'({f(word.confidence)}) {word_text}')

                    for symbol in word.symbols:
                        if symbol.confidence < confidence_threshold:
                            if not has_mistakes:
                                has_mistakes = True
                                if not full:
                                    output.append("")
                            output.append(f"Possible mistake: symbol '{symbol.text}' in word '{word_text}' (confidence: {f(symbol.confidence)})")

    return '\n'.join(output)

In [17]:
def extract_paragraphs(full_text_annotation):
    breaks = vision.TextAnnotation.DetectedBreak.BreakType
    paragraphs = []
    lines = []

    for page in full_text_annotation.pages:
        for block in page.blocks:
            for paragraph in block.paragraphs:
                para = ""
                line = ""
                for word in paragraph.words:
                    for symbol in word.symbols:
                        line += symbol.text
                        if symbol.property.detected_break.type == breaks.SPACE:
                            line += ' '
                        if symbol.property.detected_break.type == breaks.EOL_SURE_SPACE:
                            line += ' '
                            lines.append(line)
                            para += line
                            line = ''
                        if symbol.property.detected_break.type == breaks.LINE_BREAK:
                            lines.append(line)
                            para += line
                            line = ''
                paragraphs.append(para)

    return deque(paragraphs), lines

In [18]:

def get_image_file(path):
    with io.open(path, 'rb') as image_file:
        content = image_file.read()
    return vision.Image(content=content)

def get_image_uri(uri):
    image = vision.Image()
    image.source.image_uri = uri
    return image

In [23]:
language_hints = languages.split(',')

set_credentials()

# Image 

In [None]:
# Local Images
use_url = False  # Set to True if you want to use an image URL  
path_to_image = 'C:/Users/lokes/Downloads/bipin.png'  # Update this with the path to your image

convert = detect_document_text if document_mode else detect_text
get_image = get_image_uri if use_url else get_image_file


# Run the conversion and capture the output
result_text = convert(get_image(path_to_image), language_hints, full_description)

# Ensure the result_text is a string
if result_text is None:
    result_text = "No text was detected."

# Save the result to a text file
output_file_path = 'C:/Users/lokes/Downloads/OCR Final/output.txt'  # Update this with the desired path for your output file
with open(output_file_path, 'w', encoding='utf-8') as output_file:
    output_file.write(result_text)

print(f"Output saved to {output_file_path}")

Output saved to C:/Users/lokes/Downloads/OCR Final/output.txt


# URL

In [None]:
# Get Text from URL of Image
image_url = 'https://d3jmn01ri1fzgl.cloudfront.net/photoadking/webp_thumbnail/brown-gradient-indian-cuisine-menu-template-gtac7g8be7636d.webp'  # Replace this with your actual image URL

use_url = True  # Set to True if you want to use an image URL  

convert = detect_document_text if document_mode else detect_text
get_image = get_image_uri if use_url else get_image_file

# Run the conversion and capture the output using the image URL
result_text = convert(get_image(image_url), language_hints, full_description)

# Ensure the result_text is a string
if result_text is None:
    result_text = "No text was detected."

# Save the result to a text file
output_file_path = 'C:/Users/lokes/Downloads/OCR Final/output_URL.txt'  # Update this with the desired path for your output file
with open(output_file_path, 'w', encoding='utf-8') as output_file:
    output_file.write(result_text)

print(f"Output saved to {output_file_path}")

Output saved to C:/Users/lokes/Downloads/OCR Final/output_URL.txt


# PDF

In [29]:
from PyPDF2 import PdfReader

# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PdfReader(file)
        result_text = ""
        for page_num in range(len(reader.pages)):
            page = reader.pages[page_num]
            result_text += page.extract_text() + "\n\n"
    return result_text

# Set your path to the PDF file
pdf_path = 'C:/Users/lokes/Downloads/Resume202408140824.pdf'  # Replace this with your actual PDF file path

# Extract text from the PDF
result_text = extract_text_from_pdf(pdf_path)

# Ensure the result_text is a string
if not result_text:
    result_text = "No text was detected."

# Save the result to a text file
output_file_path = 'C:/Users/lokes/Downloads/OCR Final/output_PDF.txt'  # Update this with the desired path for your output file
with open(output_file_path, 'w', encoding='utf-8') as output_file:
    output_file.write(result_text)

print(f"Output saved to {output_file_path}")


Output saved to C:/Users/lokes/Downloads/OCR Final/output_PDF.txt
