# Installing dependencies

In [2]:
!pip install google-cloud-vision

Collecting google-cloud-vision
  Downloading google_cloud_vision-3.10.2-py3-none-any.whl.metadata (9.6 kB)
Downloading google_cloud_vision-3.10.2-py3-none-any.whl (527 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m527.9/527.9 kB[0m [31m24.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: google-cloud-vision
Successfully installed google-cloud-vision-3.10.2


In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


# OCR Application

In [2]:
import os
import io
from google.cloud import vision
from PIL import Image
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import numpy as np

def detect_handwritten_text(image_path):

    # Google Vision
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/content/gdrive/MyDrive/G_cloud/german-ocr-handwritten-bef6bb33ec89.json"

    client = vision.ImageAnnotatorClient()

    with io.open(image_path, 'rb') as image_file:
        content = image_file.read()

    image = vision.Image(content=content)

    image_context = vision.ImageContext(
        language_hints=['de']
    )

    # Use DOCUMENT_TEXT_DETECTION with context
    response = client.document_text_detection(
        image=image,
        image_context=image_context
    )

    if response.error.message:
        print(f"Error: {response.error.message}")
        return None, None, None

    # Prepare word-level data
    boxes = []
    texts = []

    for page in response.full_text_annotation.pages:
        for block in page.blocks:
            for paragraph in block.paragraphs:
                for word in paragraph.words:
                    word_text = ''.join([symbol.text for symbol in word.symbols])
                    vertices = word.bounding_box.vertices
                    x = [vertex.x if vertex.x is not None else 0 for vertex in vertices]
                    y = [vertex.y if vertex.y is not None else 0 for vertex in vertices]

                    boxes.append((x, y))
                    texts.append(word_text.strip())

    full_text = response.full_text_annotation.text

    return full_text, boxes, texts


In [3]:
def display_image_with_boxes(image_path, boxes, texts):

    img = Image.open(image_path)
    img_width, img_height = img.size

    fig = go.Figure()

    fig.add_layout_image(
        dict(
            source=img,
            xref="x",
            yref="y",
            x=0,
            y=img_height,
            sizex=img_width,
            sizey=img_height,
            sizing="stretch",
            layer="below"
        )
    )

    # word bounding boxes
    for (x, y), text in zip(boxes, texts):
        x_poly = x + [x[0]]
        y_poly = [img_height - yi for yi in (y + [y[0]])]  # Invert Y axis

        fig.add_trace(go.Scatter(
            x=x_poly,
            y=y_poly,
            mode='lines',
            fill='toself',
            fillcolor='rgba(255,0,0,0.2)',
            line=dict(color='red'),
            hoverinfo='text',
            text=text,
            showlegend=False
        ))

    fig.update_layout(
        title="Detected Handwritten Text (Word Level)",
        autosize=False,
        width=img_width,
        height=img_height,
        margin=dict(l=0, r=0, t=30, b=0),
        xaxis=dict(
            showgrid=False,
            zeroline=False,
            visible=False,
            range=[0, img_width],
            scaleanchor="y",
            scaleratio=1
        ),
        yaxis=dict(
            showgrid=False,
            zeroline=False,
            visible=False,
            range=[0, img_height],
        )
    )

    fig.show()


In [4]:

image_path = "/content/gdrive/MyDrive/OCR_Sample_IMG/sample1.png"
full_text, boxes, texts = detect_handwritten_text(image_path)


if full_text:
    print("Full extracted text:\n")
    print(full_text)

    print("\nDisplaying image with interactive boxes...")
    display_image_with_boxes(image_path, boxes, texts)

Full extracted text:

Name, Vorname des Versicherten Kostenträgerkennung Versicherten-NR.
durch den Versicherten
Datum
Physikalische
Therapie
Leistungserbringer
Unterschrift des Versicherten
26.02 Ergo
27.02 Physio
28.02 KGN
Safwan
Mark
Safwan

Displaying image with interactive boxes...
