### Custom Analyzer with Azure AI Content Understanding

![image.png](./Assets/image.png)

### Setting up the Environment

In [None]:
import os
from dotenv import load_dotenv
import requests

load_dotenv()

CONTENT_UNDERSTANDING_ENDPOINT = os.getenv("CONTENT_UNDERSTANDING_ENDPOINT").strip().rstrip('/')
CONTENT_UNDERSTANDING_API_KEY = os.getenv("CONTENT_UNDERSTANDING_API_KEY")
CUSTOM_ANALYZER_NAME = os.getenv("CUSTOM_ANALYZER_NAME")

print("Endpoint:", CONTENT_UNDERSTANDING_ENDPOINT)
print("API Key:", CONTENT_UNDERSTANDING_API_KEY)
print("Custom Analyzer Name:", CUSTOM_ANALYZER_NAME)

### Running Custom Analysis

In [None]:
prebuilt_document_analyzer_url = f"{CONTENT_UNDERSTANDING_ENDPOINT}/contentunderstanding/analyzers/{CUSTOM_ANALYZER_NAME}:analyze?api-version=2025-05-01-preview"

document_url = "https://github.com/kuljotSB/AI-102-Certification/raw/refs/heads/main/Information-Extraction/Azure-AI-Content-Understanding/Custom_Analyzer/invoices/invoice.pdf"

body = {
    "url": document_url
}

document_analysis_result = {}

try:
    headers = {
                "Content-Type": "application/json",
                "Ocp-Apim-Subscription-Key": CONTENT_UNDERSTANDING_API_KEY
            }

    response = requests.post(prebuilt_document_analyzer_url, headers=headers, json=body)
    response.raise_for_status()
    result = response.json()
    analysis_id = result.get("id")
    print("Analysis ID:", analysis_id)

    # Using the analysis ID to get results; polling until the analysis is complete
    get_result_url = f"{CONTENT_UNDERSTANDING_ENDPOINT}/contentunderstanding/analyzerResults/{analysis_id}?api-version=2025-05-01-preview"
    
    headers = {
        "Ocp-Apim-Subscription-Key": CONTENT_UNDERSTANDING_API_KEY
    }
    analysis_status = "Running"
    while analysis_status == "Running":
        status_response = requests.get(get_result_url, headers=headers)
        status_response.raise_for_status()
        status_result = status_response.json()
        analysis_status = status_result.get("status")
        print("Current Analysis Status:", analysis_status)
        if analysis_status == "Running":
            import time
            time.sleep(1)  # Wait before polling again
    result_response = requests.get(get_result_url, headers=headers)
    result_response.raise_for_status()
    document_analysis_result = result_response.json()
    print("Document Analysis Result:", document_analysis_result)

except requests.RequestException as e:
    print(f"Error occurred: {e}")