In [None]:
%pip install llama-index==0.12.26 llama-index-llms-azure-openai==0.3.2

In [None]:
import os
import base64
from openai import AzureOpenAI 
from azure.identity import defaultAzureCredential, get_bearer_token_provider 

endpoint = os.getenv("ENDPOINT_URL", "")
deployment = os.getenv("DEPLOYMENT_NAME", "")
subscription_key = os.getenv("AZURE_OPENAI_API_KEY", "")

In [None]:
from llama_index.llms.azure_openai import AzureOpenAI

llm = AzureOpenAI(
    engine = deployment, 
    model = "gpt-4o", #For caching using version 2024-12-17 and above,
    api_key = subscription_key, 
    azure_endpoint = endpoint, 
    api_version = "2025-01-01-preview",
    max_token = 800,
    temperature = 0.7,
    top_p = 0.95, 
    frequency_penalty = 0, 
    presence_penalty = 0, 
    stop = None, 
    stream = False,
)

## Using Unstructured Loader to load directly into Llama Index

## Antropic's Contextual Retrieval

In [None]:
from functools import wraps
from typing import Callable, Optional, List, Coroutine
import asyncio
import logging

from llama_index.core.llms import (
    ChatMessage,
    ImageBlock,
    TextBlock,
    ChatResponse
)

from loguru import logger


def retry_on_rate_limit(retries: int = 3, backoff_factor: int = 2):
    """
    Decorator to retry an async function when rate limit errors occur.

    Args:
        retries (int): Maximum number of retry attempts.
        backoff_factor (int): Exponential backoff factor (e.g., 2 means 2^attempt seconds).
    """
    def decorator(func: Callable[..., Coroutine]) -> Callable[..., Coroutine]:
        @wraps(func)
        async def wrapper(*args, **kwargs) -> Optional[ChatResponse]:
            for attempt in range(retries):
                try:
                    return await func(*args, **kwargs)
                except Exception as e:
                    error_message = str(e).lower()
                    if any(msg in error_message for msg in ["rate limit", "too many requests", "429"]):
                        logger.warning(f"Rate limit reached. Retrying {attempt + 1}/{retries}...")
                        await asyncio.sleep(backoff_factor ** attempt)  # Exponential backoff
                    else:
                        logger.error(f"Unexpected error: {e}")
                        break
            return None  # Return None if all retries fail
        return wrapper
    return decorator

@retry_on_rate_limit()
async def _generate_response(messages: List[ChatMessage]) -> Optional[ChatResponse]:
    """
    Generates a response for a user input.

    Args:
        messages (List[ChatMessage]): A list of chat messages forming the prompt.

    Returns:
        Optional[ChatResponse]: The AI-generated response or None in case of an error.
    """
    return await llm.achat(messages)

async def generate_chunk_context(document_text: str, chunk_text: str) -> Optional[ChatResponse]:
    """
    Generates a succinct context that situates a chunk within the overall document.

    Args:
        document_text (str): The full document text.
        chunk_text (str): A chunk of text from the document.

    Returns:
        Optional[ChatResponse]: A short context to situate the chunk within the document.
    """
    messages = [
        ChatMessage(
            role="user",
            blocks=[TextBlock(text=document_text)],  # Check if there's a cache hit
        ),
        ChatMessage(
            role="user",
            content=[
                TextBlock(text="Here is the chunk we want to situate within the whole document."),
                TextBlock(text=chunk_text),
                TextBlock(
                    text="Please provide a short, succinct context situating this chunk within the overall document "
                    "to improve search retrieval. Answer only with the succinct context and nothing else."
                )
            ],
        ),
    ]
    return await _generate_response(messages)

async def situate_image_within_document(document_text: str, image_base64: str) -> Optional[ChatResponse]:
    """
    Generates a succinct description situating an image within a document.

    Args:
        document_text (str): The text of the document or a relevant excerpt.
        image_base64 (str): A single image in base64 format.

    Returns:
        Optional[ChatResponse]: A description contextualizing the image within the document.
    """
    messages = [
        ChatMessage(
            role="user",
            blocks=[TextBlock(text=document_text)],  # Check if there's a cache hit
        ),
        ChatMessage(
            role="user",
            content=[
                TextBlock(text="Here is the image we want to describe within the context of the document."),
                ImageBlock(image=image_base64),
                TextBlock(
                    text="Based on the surrounding text in the document, please provide a short, succinct description "
                    "of this image. The description should situate the image within the document, helping improve "
                    "search retrieval and relevance. Answer only with the succinct description and nothing else."
                )
            ],
        ),
    ]
    return await _generate_response(messages)
