https://learn.microsoft.com/en-us/rest/api/aiservices/document-models/analyze-document?view=rest-aiservices-2023-07-31&preserve-view=true&tabs=HTTP

In [1]:
import os
from dotenv import load_dotenv

load_dotenv(override=True) # take environment variables from .env.

DOC_INTELLIGENCE_RESOURCE_NAME = os.environ["DOC_INTELLIGENCE_RESOURCE_NAME"]
DOC_INTELLIGENCE_KEY = os.environ["DOC_INTELLIGENCE_KEY"]
endpoint = f"https://{DOC_INTELLIGENCE_RESOURCE_NAME}.cognitiveservices.azure.com/"

In [41]:
model_id = "prebuilt-read"
#model_id = "custom-invoice-uniquecode"
DOC_INTELLIGENCE_API_VERSION = "2023-07-31"

api_url = f"{endpoint}/formrecognizer/documentModels/{model_id}:analyze?api-version={DOC_INTELLIGENCE_API_VERSION}"

In [42]:
import requests
import json

formUrl = "https://sbdnicstgdevweu.blob.core.windows.net/forms/form-A.pdf?sp=r&st=2024-05-06T12:05:19Z&se=2024-05-06T20:05:19Z&spr=https&sv=2022-11-02&sr=b&sig=O1V9w2ARzD%2Fd8x1e585KIPyiZ18dAiEPyWn3doRskG0%3D"

headers =  {"Content-Type":"application/json",
            "Ocp-Apim-Subscription-Key":DOC_INTELLIGENCE_KEY}

request_dict = {
    "urlSource":formUrl
}

response = requests.post(api_url, data=json.dumps(request_dict), headers=headers)
print(f"Response status: {response.status_code}")

Response status: 202


In [43]:
_operationLocation = response.headers['Operation-Location']
_operationLocation

'https://man-doc-intelligence.cognitiveservices.azure.com/formrecognizer/documentModels/prebuilt-read/analyzeResults/7cd0385c-c6c5-4f97-be2a-c18ee7ad74bc?api-version=2023-07-31'

### Get Analyze result

In [44]:
headers =  {"Ocp-Apim-Subscription-Key":DOC_INTELLIGENCE_KEY}

response = requests.get(_operationLocation, data=json.dumps(request_dict), headers=headers)
print(f"Response status: {response.status_code}")

Response status: 200


In [45]:
response_dict_via_text = json.loads(response.text)
response_dict = response.json()

#### See definitions of JSON content
https://learn.microsoft.com/en-us/rest/api/aiservices/document-models/get-analyze-result?view=rest-aiservices-2023-07-31&tabs=HTTP#definitions

In [46]:
print(json.dumps(response_dict, indent=2))

{
  "status": "succeeded",
  "createdDateTime": "2024-05-06T13:23:18Z",
  "lastUpdatedDateTime": "2024-05-06T13:23:20Z",
  "analyzeResult": {
    "apiVersion": "2023-07-31",
    "modelId": "prebuilt-read",
    "stringIndexType": "textElements",
    "content": "Name des Kunden: Downstream\nKunden-Nr: 3212 Unique Code: X832A\nLeistung\nSetup\nDevelopment\n400\nDesign\n220\nGesamt\n900 EUR\nMfg,\nMag. Max Mustermann\nKonto-Inhaber\nSusi Soprano\nRechnung\n22. April 2024\nPreis (EUR, inkl. Mwst) 280\nKontonr/IBAN\n9999-4444-3333-3333\nBankname\nBawag",
    "pages": [
      {
        "pageNumber": 1,
        "angle": 0.1273237019777298,
        "width": 8.5,
        "height": 11,
        "unit": "inch",
        "words": [
          {
            "content": "Name",
            "polygon": [
              0.9885,
              1.332,
              1.3944,
              1.332,
              1.3944,
              1.4848,
              0.9885,
              1.48
            ],
            "confid

## Print text content

In [47]:
response_dict['analyzeResult']['content']

'Name des Kunden: Downstream\nKunden-Nr: 3212 Unique Code: X832A\nLeistung\nSetup\nDevelopment\n400\nDesign\n220\nGesamt\n900 EUR\nMfg,\nMag. Max Mustermann\nKonto-Inhaber\nSusi Soprano\nRechnung\n22. April 2024\nPreis (EUR, inkl. Mwst) 280\nKontonr/IBAN\n9999-4444-3333-3333\nBankname\nBawag'

### List custom fields if any

In [49]:
if 'documents' in response_dict['analyzeResult']:
    for document in response_dict['analyzeResult']['documents']:
        for name, field in document['fields'].items():
            if field['type'] != 'string':
                print(f'{name}: unhandled type: {field["type"]}')
                continue
            print(f'{name}: {field["valueString"]} (Confidence: {field["confidence"]})')