
![Sample Image](https://github.com/kuljotSB/EssencifAIconsultancy/blob/main/doc_intelli_flow/assets/prompt_flow_image.png?raw=true)


### Extract_Info component

In [None]:
import os
from promptflow import tool
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest

# Use environment variables for security
endpoint = ""
key = ""

@tool
def document_intelligence(url: str) -> dict:
    """
    Extracts invoice details from a document using Azure Document Intelligence.
    
    Parameters:
        url (str): The URL of the document to analyze.
    
    Returns:
        dict: A dictionary containing extracted invoice data.
    """

    document_intelligence_client = DocumentIntelligenceClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )

    poller = document_intelligence_client.begin_analyze_document(
        "prebuilt-invoice", AnalyzeDocumentRequest(url_source=url)
    )
    invoices = poller.result()

    invoice_data = []

    for invoice in invoices.documents:
        invoice_dict = {}

        def add_field(field_name, invoice_field, value_attr):
            """
            Extracts field values safely, handling nested attributes properly.
            """
            if invoice_field:
                if "." in value_attr:  # Handle nested attributes (e.g., value_currency.amount)
                    obj_attr, sub_attr = value_attr.split(".")
                    invoice_dict[field_name] = getattr(getattr(invoice_field, obj_attr, None), sub_attr, None)
                else:
                    invoice_dict[field_name] = getattr(invoice_field, value_attr, None)

        # Extract Invoice Fields
        add_field("VendorName", invoice.fields.get("VendorName"), "value_string")
        add_field("VendorAddress", invoice.fields.get("VendorAddress"), "value_address")
        add_field("CustomerName", invoice.fields.get("CustomerName"), "value_string")
        add_field("InvoiceId", invoice.fields.get("InvoiceId"), "value_string")
        add_field("InvoiceDate", invoice.fields.get("InvoiceDate"), "value_date")
        add_field("InvoiceTotal", invoice.fields.get("InvoiceTotal"), "value_currency.amount")
        add_field("DueDate", invoice.fields.get("DueDate"), "value_date")
        add_field("AmountDue", invoice.fields.get("AmountDue"), "value_currency.amount")

        # Extract Items Safely
        invoice_dict["Items"] = []
        items_field = invoice.fields.get("Items")

        if items_field and items_field.value_array:
            for item in items_field.value_array:
                item_dict = {}

                def safe_add_item(field_name, item_object, key, value_attr):
                    """Safely add item fields, avoiding None errors."""
                    field = item_object.get(key)
                    if field:
                        if "." in value_attr:
                            obj_attr, sub_attr = value_attr.split(".")
                            item_dict[field_name] = getattr(getattr(field, obj_attr, None), sub_attr, None)
                        else:
                            item_dict[field_name] = getattr(field, value_attr, None)

                safe_add_item("Description", item.value_object, "Description", "value_string")
                safe_add_item("Quantity", item.value_object, "Quantity", "value_number")
                safe_add_item("UnitPrice", item.value_object, "UnitPrice", "value_currency.amount")
                safe_add_item("ProductCode", item.value_object, "ProductCode", "value_string")
                safe_add_item("Amount", item.value_object, "Amount", "value_currency.amount")

                invoice_dict["Items"].append(item_dict)

        invoice_data.append(invoice_dict)

    return invoice_data


### Summarisation Component

In [None]:
#system:
You are a helpful AI assistant made to behave as a document extraction bot. 
Prior to calling you, the analyse API of azure document intelligence was called
to extract information from a document and you will be provided with the JSON schema
of the document information containing key-value pairs like merchant address, items,
subtotal value etc of a single invoice.

Your work is to generate a markdown of the information contained in the invoice 
passed down to you!

#user:
document extracted information: {{doc__information}}

### Output_Parser component

In [None]:
from promptflow import tool
import json
from datetime import date

def serialize(obj):
    """Custom serialization function for unsupported types."""
    if isinstance(obj, date):  # Convert datetime.date to string
        return obj.isoformat()
    if hasattr(obj, "__dict__"):  # Convert objects with `__dict__` attribute to a dict
        return obj.__dict__
    return str(obj)  # Fallback to string conversion

@tool
def my_python_tool(markdown_text: str, json_struct: dict):
    final_response_dict = {
        "markdown_text": markdown_text,
        "json_struct": json_struct
    }

    return json.loads(json.dumps(final_response_dict, default=serialize))  # Ensure JSON serializable
