### This notebook contains POC - to extract key value pairs and other layout related information from documents and images using OCR

In [1]:
import os
import openai
import dotenv
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import DocumentAnalysisFeature, AnalyzeResult

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient


dotenv.load_dotenv("..\\common\\credentials.env", override=True)

True

#### Set variables

In [2]:
"""
This code sample shows Prebuilt Document operations with the Azure Form Recognizer client library. 
The async versions of the samples require Python 3.6 or later.

To learn more, please visit the documentation - Quickstart: Form Recognizer Python client library SDKs
https://learn.microsoft.com/azure/applied-ai-services/form-recognizer/quickstarts/get-started-v3-sdk-rest-api?view=doc-intel-3.1.0&pivots=programming-language-python
"""

"""
Remember to remove the key from your code when you're done, and never post it publicly. For production, use
secure methods to store and access your credentials. For more information, see 
https://docs.microsoft.com/en-us/azure/cognitive-services/cognitive-services-security?tabs=command-line%2Ccsharp#environment-variables-and-application-configuration
"""
path_to_sample_documents = "..\\common\\data\\sampleinvoice.jpeg"
endpoint = os.environ["DOCUMENTINTELLIGENCE_ENDPOINT"]
key = os.environ["DOCUMENTINTELLIGENCE_API_KEY"]

#### read input document and analyze using Document Intelligence

In [3]:
# sample document
document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )

#using General Document model

with open(path_to_sample_documents, "rb") as f:
       poller = document_analysis_client.begin_analyze_document(
           "prebuilt-document", document=f, locale="en-US"
       )

#Alternatively, you can use FormURL to analyze the document
#formUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-layout.pdf"
#poller = document_analysis_client.begin_analyze_document_from_url("prebuilt-document", formUrl)


result = poller.result()

##### Notes: 
The General document model combines powerful Optical Character Recognition (OCR) capabilities with deep learning models to extract key-value pairs, tables, and selection marks from documents.
Starting with Document Intelligence versions 2024-02-29-preview, 2023-10-31-preview and going forward, the general document model (prebuilt-document) is deprecated. To extract key-value pairs, selection marks, text, tables, and structure from documents, use the following models:

Layout model with the optional query string parameter features=keyValuePairs enabled.  VERSION: • v4:2024-02-29-preview • v3.1:2023-07-31 (GA) || Model ID prebuilt-layout

#### Extract KV pairs

In [4]:
print("----Key-value pairs found in document----")
for kv_pair in result.key_value_pairs:
    if kv_pair.key and kv_pair.value:
        print("Key '{}': Value: '{}'".format(kv_pair.key.content, kv_pair.value.content))
    else:
        print("Key '{}': Value:".format(kv_pair.key.content))

print("----------------------------------------")

----Key-value pairs found in document----
Key 'Invoice No.': Value: 'INV09080012'
Key 'Date': Value: '14-Aug-2009'
Key 'Tel:': Value: '(415) 989-1188'
Key 'Fax:': Value: '(415) 989-2288'
Key 'Email:': Value: 'admin@xincube.com'
Key 'Website:': Value: 'www.xincube.com'
Key 'Bill To': Value: 'John
Synex Inc
128 AA Juanita Ave, 91740 Glendora, CA, US'
Key 'Ship To': Value: 'John
Synex Inc
128 AA Juanita Ave, 91740 Glendora, CA, US'
Key 'Sales Person': Value:
Key 'Order No': Value:
Key 'Shipping Date': Value: '13-Aug-2009'
Key 'Shipping Terms': Value:
Key 'Terms': Value: 'COD'
Key 'Sub Total (USD)': Value: '9,463.00'
Key 'Discount (USD)': Value: '0.00'
Key 'Shipping (USD)': Value: '0.00'
Key 'Total (USD)': Value: '10,243.70'
Key 'Sales Tax (USD)': Value: '780.70'
Key 'Deposit (USD)': Value: '0.00'
Key 'Amount Due (USD)': Value: '10,243.70'
----------------------------------------


#### Extract Tables, Rows and Cells

In [5]:
if result.tables:
    for table_idx, table in enumerate(result.tables):
        print(f"Table # {table_idx} has {table.row_count} rows and {table.column_count} columns")
        if table.bounding_regions:
            for region in table.bounding_regions:
                print(f"Table # {table_idx} location on page: {region.page_number} is {region.polygon}")
        for cell in table.cells:
            print(f"...Cell[{cell.row_index}][{cell.column_index}] has text '{cell.content}'")
            if cell.bounding_regions:
                for region in cell.bounding_regions:
                    print(
                        f"...content on page {region.page_number} is within bounding polygon '{region.polygon}'\n"
                    )
print("----------------------------------------")

Table # 0 has 2 rows and 5 columns
Table # 0 location on page: 1 is [Point(x=49.0, y=558.0), Point(x=1161.0, y=558.0), Point(x=1161.0, y=635.0), Point(x=49.0, y=635.0)]
...Cell[0][0] has text 'Sales Person'
...content on page 1 is within bounding polygon '[Point(x=53.0, y=558.0), Point(x=222.0, y=558.0), Point(x=222.0, y=596.0), Point(x=52.0, y=596.0)]'

...Cell[0][1] has text 'Order No'
...content on page 1 is within bounding polygon '[Point(x=222.0, y=558.0), Point(x=424.0, y=558.0), Point(x=424.0, y=596.0), Point(x=222.0, y=596.0)]'

...Cell[0][2] has text 'Shipping Date'
...content on page 1 is within bounding polygon '[Point(x=424.0, y=558.0), Point(x=669.0, y=558.0), Point(x=669.0, y=596.0), Point(x=424.0, y=596.0)]'

...Cell[0][3] has text 'Shipping Terms'
...content on page 1 is within bounding polygon '[Point(x=669.0, y=558.0), Point(x=900.0, y=558.0), Point(x=900.0, y=596.0), Point(x=669.0, y=596.0)]'

...Cell[0][4] has text 'Terms'
...content on page 1 is within bounding pol

### Extract Layout

In [6]:
for idx, style in enumerate(result.styles):
    print(
        "Document contains {} content".format(
            "handwritten" if style.is_handwritten else "no handwritten"
        )
    )

for page in result.pages:
    print("----Analyzing layout from page #{}----".format(page.page_number))
    print(
        "Page has width: {} and height: {}, measured with unit: {}".format(
            page.width, page.height, page.unit
        )
    )

    for line_idx, line in enumerate(page.lines):
        words = line.get_words()
        print(
            "...Line # {} has word count {} and text '{}' within bounding polygon '{}'".format(
                line_idx,
                len(words),
                line.content,
                line.polygon,
            )
        )

        for word in words:
            print(
                "......Word '{}' has a confidence of {}".format(
                    word.content, word.confidence
                )
            )

    for selection_mark in page.selection_marks:
        print(
            "...Selection mark is '{}' within bounding polygon '{}' and has a confidence of {}".format(
                selection_mark.state,
                selection_mark.polygon,
                selection_mark.confidence,
            )
        )

for table_idx, table in enumerate(result.tables):
    print(
        "Table # {} has {} rows and {} columns".format(
            table_idx, table.row_count, table.column_count
        )
    )
    for region in table.bounding_regions:
        print(
            "Table # {} location on page: {} is {}".format(
                table_idx,
                region.page_number,
                region.polygon,
            )
        )
    for cell in table.cells:
        print(
            "...Cell[{}][{}] has content '{}'".format(
                cell.row_index,
                cell.column_index,
                cell.content,
            )
        )
        for region in cell.bounding_regions:
            print(
                "...content on page {} is within bounding polygon '{}'".format(
                    region.page_number,
                    region.polygon,
                )
            )

print("----------------------------------------")

----Analyzing layout from page #1----
Page has width: 1275.0 and height: 1650.0, measured with unit: pixel
...Line # 0 has word count 1 and text 'xin' within bounding polygon '[Point(x=78.0, y=90.0), Point(x=175.0, y=83.0), Point(x=177.0, y=139.0), Point(x=78.0, y=142.0)]'
......Word 'xin' has a confidence of 0.992
...Line # 1 has word count 1 and text 'Cube' within bounding polygon '[Point(x=188.0, y=86.0), Point(x=300.0, y=87.0), Point(x=300.0, y=137.0), Point(x=189.0, y=138.0)]'
......Word 'Cube' has a confidence of 0.97
...Line # 2 has word count 1 and text 'Invoice' within bounding polygon '[Point(x=798.0, y=55.0), Point(x=904.0, y=55.0), Point(x=904.0, y=83.0), Point(x=798.0, y=83.0)]'
......Word 'Invoice' has a confidence of 0.961
...Line # 3 has word count 2 and text 'Invoice No.' within bounding polygon '[Point(x=796.0, y=102.0), Point(x=899.0, y=102.0), Point(x=899.0, y=122.0), Point(x=796.0, y=122.0)]'
......Word 'Invoice' has a confidence of 0.973
......Word 'No.' has a con