https://docs.microsoft.com/en-gb/azure/cognitive-services/form-recognizer/quickstarts/client-library?tabs=linux&pivots=programming-language-python

In [1]:
key = '197e883bb96542f2b33e773f92112c8a'
endpoint ='https://form-recognizerx.cognitiveservices.azure.com/'

In [2]:
import os
import azure.ai.formrecognizer
from azure.core.credentials import AzureKeyCredential
from azure.core.exceptions import ResourceNotFoundError

In [4]:
from azure.ai.formrecognizer import FormRecognizerClient
from azure.ai.formrecognizer import FormTrainingClient

In [5]:
form_recognizer_client = FormRecognizerClient(endpoint=endpoint, credential=AzureKeyCredential(key))
form_training_client = FormTrainingClient(endpoint, AzureKeyCredential(key))

In [8]:
trainingDataUrl = 'https://thestoragex.blob.core.windows.net/archieve-storage?sp=racwdl&st=2020-07-28T18:51:31Z&se=2020-07-31T18:51:00Z&sv=2019-12-12&sr=c&sig=23H4Uq9u4vS8KLYmlJTd60UDNAa9uYSqQ9e7YblJr10%3D'
formUrl = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/master/sdk/formrecognizer/azure-ai-formrecognizer/tests/sample_forms/receipt/contoso-receipt.png"
receiptUrl = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/master/sdk/formrecognizer/azure-ai-formrecognizer/tests/sample_forms/receipt/contoso-receipt.png"

In [9]:
poller = form_recognizer_client.begin_recognize_content_from_url(formUrl)
contents = poller.result()

In [10]:
def format_bounding_box(bounding_box):
    if not bounding_box:
        return "N/A"
    return ", ".join(["[{}, {}]".format(p.x, p.y) for p in bounding_box])

In [11]:
for idx, content in enumerate(contents):
    print("----Recognizing content from page #{}----".format(idx))
    print("Has width: {} and height: {}, measured with unit: {}".format(
        content.width,
        content.height,
        content.unit
    ))
    for table_idx, table in enumerate(content.tables):
        print("Table # {} has {} rows and {} columns".format(table_idx, table.row_count, table.column_count))
        for cell in table.cells:
            print("...Cell[{}][{}] has text '{}' within bounding box '{}'".format(
                cell.row_index,
                cell.column_index,
                cell.text,
                format_bounding_box(cell.bounding_box)
            ))
    for line_idx, line in enumerate(content.lines):
        print("Line # {} has word count '{}' and text '{}' within bounding box '{}'".format(
            line_idx,
            len(line.words),
            line.text,
            format_bounding_box(line.bounding_box)
        ))
    print("----------------------------------------")

----Recognizing content from page #0----
Has width: 1688.0 and height: 3000.0, measured with unit: pixel
Line # 0 has word count '1' and text 'Contoso' within bounding box '[405.0, 273.0], [1049.0, 273.0], [1052.0, 403.0], [407.0, 415.0]'
Line # 1 has word count '1' and text 'Contoso' within bounding box '[327.0, 592.0], [498.0, 602.0], [494.0, 655.0], [324.0, 642.0]'
Line # 2 has word count '3' and text '123 Main Street' within bounding box '[318.0, 689.0], [649.0, 692.0], [648.0, 754.0], [317.0, 752.0]'
Line # 3 has word count '3' and text 'Redmond, WA 98052' within bounding box '[315.0, 794.0], [749.0, 795.0], [748.0, 858.0], [315.0, 856.0]'
Line # 4 has word count '1' and text '123-456-7890' within bounding box '[305.0, 1004.0], [619.0, 1014.0], [617.0, 1072.0], [305.0, 1061.0]'
Line # 5 has word count '3' and text '6/10/20 19 13:59' within bounding box '[301.0, 1222.0], [630.0, 1227.0], [629.0, 1287.0], [301.0, 1284.0]'
Line # 6 has word count '3' and text 'Sales Associate: Paul' 

In [12]:
poller = form_recognizer_client.begin_recognize_receipts_from_url(receiptUrl)
receipts = poller.result()

In [13]:
for idx, receipt in enumerate(receipts):
    print("--------Recognizing receipt #{}--------".format(idx))
    receipt_type = receipt.fields.get("ReceiptType")
    if receipt_type:
        print("Receipt Type: {} has confidence: {}".format(receipt_type.value, receipt_type.confidence))
    merchant_name = receipt.fields.get("MerchantName")
    if merchant_name:
        print("Merchant Name: {} has confidence: {}".format(merchant_name.value, merchant_name.confidence))
    transaction_date = receipt.fields.get("TransactionDate")
    if transaction_date:
        print("Transaction Date: {} has confidence: {}".format(transaction_date.value, transaction_date.confidence))

--------Recognizing receipt #0--------
Receipt Type: Itemized has confidence: 0.659
Merchant Name: Contoso Contoso has confidence: 0.516
Transaction Date: 2019-06-10 has confidence: 0.985


In [14]:
print("Receipt items:")
for idx, item in enumerate(receipt.fields.get("Items").value):
    print("...Item #{}".format(idx))
    item_name = item.value.get("Name")
    if item_name:
        print("......Item Name: {} has confidence: {}".format(item_name.value, item_name.confidence))
    item_quantity = item.value.get("Quantity")
    if item_quantity:
        print("......Item Quantity: {} has confidence: {}".format(item_quantity.value, item_quantity.confidence))
    item_price = item.value.get("Price")
    if item_price:
        print("......Individual Item Price: {} has confidence: {}".format(item_price.value, item_price.confidence))
    item_total_price = item.value.get("TotalPrice")
    if item_total_price:
        print("......Total Item Price: {} has confidence: {}".format(item_total_price.value, item_total_price.confidence))

Receipt items:
...Item #0
......Item Name: 8GB RAM (Black) has confidence: 0.916
......Total Item Price: 999.0 has confidence: 0.559
...Item #1
......Item Name: SurfacePen has confidence: 0.858
......Item Quantity: None has confidence: 0.858
......Total Item Price: 99.99 has confidence: 0.386


In [15]:
subtotal = receipt.fields.get("Subtotal")
if subtotal:
    print("Subtotal: {} has confidence: {}".format(subtotal.value, subtotal.confidence))
tax = receipt.fields.get("Tax")
if tax:
    print("Tax: {} has confidence: {}".format(tax.value, tax.confidence))
tip = receipt.fields.get("Tip")
if tip:
    print("Tip: {} has confidence: {}".format(tip.value, tip.confidence))
total = receipt.fields.get("Total")
if total:
    print("Total: {} has confidence: {}".format(total.value, total.confidence))
print("--------------------------------------")

Subtotal: 1098.99 has confidence: 0.964
Tax: 104.4 has confidence: 0.713
Total: 1203.39 has confidence: 0.774
--------------------------------------


In [16]:
poller = form_training_client.begin_training(trainingDataUrl, use_training_labels=False)
model = poller.result()

HttpResponseError: Invalid model created with ID=1b16a158-6b6f-48f6-8e12-8d4bc7d1a9ea
(2014) No valid blobs found in the specified Azure blob container. Please conform to the document format/size/page/dimensions requirements.
