In [30]:
from google.cloud import vision

In [31]:
client = vision.ImageAnnotatorClient()

In [32]:
def read_image(image_path: str):
    with open(image_path, 'rb') as image_file:
        image_binary = image_file.read()
    return image_binary

In [33]:
def detect(image_path: str):
    image_binary = read_image(image_path=image_path)
    image = vision.Image(content=image_binary)
    response = client.document_text_detection(image=image)
    return response

In [44]:
image_path = "/home/mohsen/Desktop/homelab/mahta/test-data/exame1.jpeg"
detection = detect(image_path=image_path)

In [45]:
def print_text_by_paragraph(detection):
    for page in detection.full_text_annotation.pages:
        for block in page.blocks:
            for para_num, paragraph in enumerate(block.paragraphs, 1):
                para_text = ""
                for word in paragraph.words:
                    word_text = "".join([symbol.text for symbol in word.symbols])
                    para_text += word_text + " "
                
                print(f"\n¶ Paragraph {para_num} (Confidence: {paragraph.confidence:.2%})")
                print(f"   {para_text.strip()}")

In [47]:
print_text_by_paragraph(detection)


¶ Paragraph 1 (Confidence: 66.36%)
   لأمان

¶ Paragraph 1 (Confidence: 97.36%)
   آزمون شبیه ساز نهایی درس سلامت و بهداشت تعداد : صفحه

¶ Paragraph 1 (Confidence: 98.73%)
   نام و نام خانوادگی

¶ Paragraph 1 (Confidence: 97.41%)
   ردیف

¶ Paragraph 1 (Confidence: 84.99%)
   1 .

¶ Paragraph 1 (Confidence: 72.25%)
   ۱۲

¶ Paragraph 1 (Confidence: 92.69%)
   الف )

¶ Paragraph 1 (Confidence: 61.60%)
   Y

¶ Paragraph 1 (Confidence: 98.18%)
   به نام خدا

¶ Paragraph 1 (Confidence: 97.86%)
   رشته

¶ Paragraph 1 (Confidence: 96.45%)
   کلیه رشته ها

¶ Paragraph 1 (Confidence: 97.03%)
   ساعت شروع

¶ Paragraph 1 (Confidence: 97.07%)
   دوره دوم متوسطه - دوازدهم تاریخ آزمون

¶ Paragraph 1 (Confidence: 94.60%)
   پاسخبرگ

¶ Paragraph 1 (Confidence: 68.31%)
   ۱۴۰۴/۰۲/۲۵

¶ Paragraph 1 (Confidence: 98.84%)
   پاسخهای خود را در محلهای تعیین شده به صورت دقیق خوش خط و مرتب وارد کنید

¶ Paragraph 1 (Confidence: 87.49%)
   مدت زمان ۳۰ دقیقه

¶ Paragraph 1 (Confidence: 92.15%)
   نمره

¶ Paragr

In [48]:
def analyze_vision_structure(detection):
    """
    Analyze Google Vision API text detection structure
    """
    print("=== Google Vision API Structure Analysis ===\n")
    
    # Overall structure
    print(f"Full text: {detection.full_text_annotation.text[:200]}...")
    print(f"Total pages: {len(detection.full_text_annotation.pages)}\n")
    
    total_paragraphs = 0
    
    # Analyze each page
    for page_num, page in enumerate(detection.full_text_annotation.pages):
        print(f"--- PAGE {page_num + 1} ---")
        print(f"Page dimensions: {page.width} x {page.height}")
        print(f"Total blocks: {len(page.blocks)}\n")
        
        # Analyze each block
        for block_num, block in enumerate(page.blocks):
            print(f"  Block {block_num + 1}:")
            print(f"    Type: {block.block_type}")
            print(f"    Confidence: {block.confidence:.2%}")
            print(f"    Paragraphs: {len(block.paragraphs)}")
            
            # Show bounding box
            vertices = block.bounding_box.vertices
            print(f"    Bounding box: ({vertices[0].x}, {vertices[0].y}) to ({vertices[2].x}, {vertices[2].y})")
            
            # Analyze ALL paragraphs in this block (not just first few)
            for para_num, paragraph in enumerate(block.paragraphs):
                total_paragraphs += 1
                para_text = ""
                for word in paragraph.words:
                    word_text = "".join([symbol.text for symbol in word.symbols])
                    para_text += word_text + " "
                
                print(f"      Para {para_num + 1}: '{para_text.strip()}' (conf: {paragraph.confidence:.2%})")
                
                # Show word details and coordinates for ALL words
                for word_num, word in enumerate(paragraph.words):
                    word_text = "".join([symbol.text for symbol in word.symbols])
                    word_vertices = word.bounding_box.vertices
                    print(f"        Word {word_num + 1}: '{word_text}' at ({word_vertices[0].x}, {word_vertices[0].y})")
            
            print()
    
    print(f"Total paragraphs found: {total_paragraphs}")

analyze_vision_structure(detection)

=== Google Vision API Structure Analysis ===

Full text: لأمان
آزمون شبیه ساز نهایی درس سلامت و بهداشت تعداد :صفحه
نام و نام خانوادگی
ردیف
1.
۱۲
الف)
Y
به نام خدا
رشته
کلیه رشته ها
ساعت شروع
دوره دوم متوسطه - دوازدهم تاریخ آزمون
پاسخبرگ
۱۴۰۴/۰۲/۲۵
پاسخهای خ...
Total pages: 1

--- PAGE 1 ---
Page dimensions: 960 x 1280
Total blocks: 38

  Block 1:
    Type: 1
    Confidence: 66.36%
    Paragraphs: 1
    Bounding box: (823, 87) to (925, 113)
      Para 1: 'لأمان' (conf: 66.36%)
        Word 1: 'لأمان' at (823, 87)

  Block 2:
    Type: 1
    Confidence: 97.36%
    Paragraphs: 1
    Bounding box: (594, 106) to (934, 147)
      Para 1: 'آزمون شبیه ساز نهایی درس سلامت و بهداشت تعداد : صفحه' (conf: 97.36%)
        Word 1: 'آزمون' at (905, 126)
        Word 2: 'شبیه' at (874, 124)
        Word 3: 'ساز' at (855, 123)
        Word 4: 'نهایی' at (824, 120)
        Word 5: 'درس' at (794, 118)
        Word 6: 'سلامت' at (749, 115)
        Word 7: 'و' at (738, 115)
        Word 8: 'بهداشت' at (693,