In [19]:
import os

# Define the target directory
target_directory = (
    r"C:\Users\pablosal\Desktop\gbbai-azure-aoai"  # change your directory here
)

# Check if the directory exists
if os.path.exists(target_directory):
    # Change the current working directory
    os.chdir(target_directory)
    print(f"Directory changed to {os.getcwd()}")
else:
    print(f"Directory {target_directory} does not exist.")

Directory C:\Users\pablosal\Desktop\gbbai-azure-aoai does not exist.


In [20]:
# Create an instance of the client. You can find it in src/aoai/azure_openai.py.
# It is essentially a wrapper using dependency injection to automate the initialization
# and most used API calls.
from src.aoai.azure_openai import AzureOpenAIManager
from utils.ml_logging import get_logger

# Set up logger
logger = get_logger()

In [21]:
import fitz # PyMuPDF
import os
from typing import List

def convert_pdf_to_images(pdf_path: str, output_folder: str, dpi: int = 72) -> List[str]:
    """
    Convert a PDF file into images, one image per page, with controlled DPI.

    Parameters:
    - pdf_path (str): The path to the PDF file.
    - output_folder (str): The folder where the images will be saved.
    - dpi (int): The DPI for the output images (lower for reduced resolution).

    Returns:
    - List[str]: A list of paths to the saved images.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    pdf_document = fitz.open(pdf_path)
    image_paths = []

    for page_num in range(len(pdf_document)):
        page = pdf_document.load_page(page_num)
        # Set a lower resolution
        pix = page.get_pixmap(matrix=fitz.Matrix(dpi / 72, dpi / 72))
        image_path = os.path.join(output_folder, f'page_{page_num + 1}.png')
        pix.save(image_path)
        image_paths.append(image_path)

    return image_paths

# Example usage
pdf_path = r'utils\artifacts\policy_images\pa-82110pa0030004-pb-dcu-pa-i-fam-prf-23.pdf'
output_folder = r'utils\artifacts\policy_images'
policy_image_paths = convert_pdf_to_images(pdf_path, output_folder, dpi=45)

print("List of image paths:")
for path in policy_image_paths:
    print(path)

List of image paths:
utils\artifacts\policy_images\page_1.png
utils\artifacts\policy_images\page_2.png
utils\artifacts\policy_images\page_3.png
utils\artifacts\policy_images\page_4.png
utils\artifacts\policy_images\page_5.png


In [None]:
pdf -> images -> number on the image (superimposed) -> dpi(108) -> object array bytes   

In [22]:
from utils.chat_prompts.classification_test import PROMPT

print(PROMPT)


You are a Classification Bot.

Task Overview:
You will be provided with images of medical and dental plans containing coverage details for individuals and families. Your task is to analyze these images, identify which image(s) contain the most relevant information for each question listed below, and extract the specific information requested.

Instructions:

1. Analyze the Images:
   - For each question, examine all provided images to determine which ones contain the most relevant information.
   - Only consider images where the text directly relates to the question.
   - Pay attention to synonyms or alternative phrases that convey the same meaning.

2. Answer Format:
   - Output your answers in the JSON format provided below.
   - For each question:
     - answer_image: Indicate the image number(s) where the relevant information is found, or "N/A" if not available.
     - answer: Extract and provide the specific information requested in the question, or "N/A" if not available.
   - U

## Leveraging (gpt4o 05_13) for Structured Outputs

In [23]:
azure_openai_client_05_13 = AzureOpenAIManager(completion_model_name='AZURE_AOAI_CHAT_MODEL_NAME_DEPLOYMENT_ID_TEST_05_13')

In [24]:
policy_image_paths[2:4]

['utils\\artifacts\\policy_images\\page_3.png',
 'utils\\artifacts\\policy_images\\page_4.png']

In [25]:
api_response = await azure_openai_client_05_13.generate_chat_response(query=PROMPT, system_message_content='''You are a multimodal classification bot. You will be provided with images of medical 
                                                           and dental plans containing coverage details for individuals and families. 
                                                           Your task is to analyze these images and answer specific questions based on the relevance of
                                                           the text in the images to each question.''',
                                                           image_paths=policy_image_paths[1:3], 
                                                           conversation_history=[],
                                                           stream=False,
                                                           response_format='json_object',
                                                           max_tokens=2000)

2024-09-16 13:43:12,489 - micro - MainProcess - INFO     Function generate_chat_response started at 2024-09-16 13:43:12 (azure_openai.py:generate_chat_response:324)
2024-09-16 13:43:12,518 - micro - MainProcess - INFO     Sending request to Azure OpenAI at 2024-09-16 13:43:12 (azure_openai.py:generate_chat_response:361)
2024-09-16 13:44:06,421 - micro - MainProcess - INFO     Function generate_chat_response finished at 2024-09-16 13:44:06 (Duration: 53.93 seconds) (azure_openai.py:generate_chat_response:407)


In [26]:
import json
from pprint import pprint

response_content = api_response['response']
print("Type of response:", type(response_content))

if isinstance(response_content, str):
    try:
        json_object = json.loads(response_content)
        pprint(json_object)
    except json.JSONDecodeError as e:
        print("Failed to decode JSON:", e)
else:
    pprint(response_content)

Type of response: <class 'dict'>
{'questions': [{'answer': 'N/A',
                'answer_image': 'N/A',
                'id': 'plan_information',
                'question': 'Does it contain details like plan name, name of '
                            'the payer or administrator of the plan, plan '
                            'type, plan effective date?'},
               {'answer': 'N/A',
                'answer_image': 'N/A',
                'id': 'plan_coinsurance',
                'question': 'Does it contain coverage details about the '
                            'plan-level coinsurance? (The amounts are '
                            'expressed as in-network and out-of-network '
                            'providers, usually mentioned together.)'},
               {'answer': 'N/A',
                'answer_image': 'N/A',
                'id': 'medical_deductible',
                'question': 'Does it contain coverage details about the annual '
                            'medical

## Leveraging (gpt4o 08_06) for Structured Outputs

In [11]:
from openai import AzureOpenAI
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Retrieve values from environment variables
AZURE_OPENAI_KEY_TEST = os.getenv("AZURE_OPENAI_KEY_TEST")
AZURE_OPENAI_API_ENDPOINT_TEST = os.getenv("AZURE_OPENAI_API_ENDPOINT_TEST")
AZURE_OPENAI_API_VERSION_TEST = os.getenv("AZURE_OPENAI_API_VERSION_TEST")
AZURE_AOAI_CHAT_MODEL_NAME_DEPLOYMENT_ID_TEST_08_06 = os.getenv("AZURE_AOAI_CHAT_MODEL_NAME_DEPLOYMENT_ID_TEST_08_06")

# Initialize AzureOpenAI client with environment variables
azure_openai_client = AzureOpenAI(
    api_key=AZURE_OPENAI_KEY_TEST,
    api_version=AZURE_OPENAI_API_VERSION_TEST,
    azure_endpoint=AZURE_OPENAI_API_ENDPOINT_TEST,
)

### Pydantic Integration (SDK)

In [16]:
from pydantic import BaseModel, Field, validator
from typing import List, Optional

# Define Pydantic models
class Question(BaseModel):
    id: str
    answer_image: Optional[str] = Field(None, nullable=True)
    answer: Optional[str] = Field(None, nullable=True)
    question: str

    @validator('answer_image', 'answer', 'question', pre=True, always=True)
    def default_none(cls, v):
        return v or None

class ClassificationBotResponse(BaseModel):
    questions: List[Question]

C:\Users\pablosal\AppData\Local\Temp\ipykernel_18496\824765058.py:11: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.8/migration/
  @validator('answer_image', 'answer', 'question', pre=True, always=True)


In [17]:
import base64
import mimetypes
from typing import List, Tuple, Any

def encode_images(image_paths: List[str]) -> List[Tuple[str, str]]:
    encoded_images = []
    for path in image_paths:
        try:
            with open(path, 'rb') as image_file:
                encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
                mime_type, _ = mimetypes.guess_type(path)
                if mime_type is None:
                    mime_type = "application/octet-stream"
                encoded_images.append((encoded_image, mime_type))
        except FileNotFoundError:
            print(f"Image not found: {path}")
        except Exception as e:
            print(f"Error reading image {path}: {e}")
    return encoded_images

def generate_chat_response(
    model: str,
    prompt: str,
    image_paths: List[str],
    response_format: Any,
    max_tokens: int = 2000,
):
    system_message = {
        "role": "system",
        "content": "You are a multimodal classification bot. You will be provided with images of medical and dental plans containing coverage details for individuals and families. Your task is to analyze these images and answer specific questions based on the relevance of the text in the images to each question."
    }

    # Encode images and guess MIME types
    encoded_images = encode_images(image_paths)

    user_content = prompt
    for idx, (encoded_image, mime_type) in enumerate(encoded_images, start=1):
        user_content += f"\n\nImage {idx}:\n![Image](data:{mime_type};base64,{encoded_image})"

    user_message = {
        "role": "user",
        "content": user_content
    }

    messages = [system_message, user_message]

    try:
        response = azure_openai_client.beta.chat.completions.parse(
            model=model,
            messages=messages,
            response_format=response_format,
            max_tokens=max_tokens,
        )
    except Exception as e:
        print(f"API call failed: {e}")
        return None

    message = response.choices[0].message

    if message.parsed:
        return message.parsed
    else:
        print("Assistant refused the request or parsing failed.")
        print("Refusal:", message.refusal)
        return None

In [18]:
api_response = generate_chat_response(
    model=AZURE_AOAI_CHAT_MODEL_NAME_DEPLOYMENT_ID_TEST_08_06,
    prompt=PROMPT,
    image_paths=policy_image_paths[1:3],
    response_format=ClassificationBotResponse,
    max_tokens=2000
)

if api_response:
    # Process the response
    for question in api_response.questions:
        print(f"ID: {question.id}")
        print(f"Answer Image: {question.answer_image}")
        print(f"Answer: {question.answer}")
        print(f"Question: {question.question}\n")
else:
    print("No valid response received.")

ID: plan_information
Answer Image: N/A
Answer: N/A
Question: Does it contain details like plan name, name of the payer or administrator of the plan, plan type, plan effective date?

ID: plan_coinsurance
Answer Image: Image 2
Answer: 75% coinsurance for in-network providers, 50% coinsurance for out-of-network providers.
Question: Does it contain coverage details about the plan-level coinsurance? (The amounts are expressed as in-network and out-of-network providers, usually mentioned together.)

ID: medical_deductible
Answer Image: Image 1
Answer: $1,500 individual, $3,000 family for in-network; $2,500 individual, $5,000 family for out-of-network.
Question: Does it contain coverage details about the annual medical deductible amount that you pay for the plan? (The amounts are expressed as in-network and out-of-network providers for individual and family, usually mentioned together.)

ID: annual_out_of_pocket_maximum
Answer Image: Image 2
Answer: $6,000 individual, $12,000 family for in-ne

In [20]:
api_response.questions

[Question(id='plan_information', answer_image='N/A', answer='N/A', question='Does it contain details like plan name, name of the payer or administrator of the plan, plan type, plan effective date?'),
 Question(id='plan_coinsurance', answer_image='Image 2', answer='75% coinsurance for in-network providers, 50% coinsurance for out-of-network providers.', question='Does it contain coverage details about the plan-level coinsurance? (The amounts are expressed as in-network and out-of-network providers, usually mentioned together.)'),
 Question(id='medical_deductible', answer_image='Image 1', answer='$1,500 individual, $3,000 family for in-network; $2,500 individual, $5,000 family for out-of-network.', question='Does it contain coverage details about the annual medical deductible amount that you pay for the plan? (The amounts are expressed as in-network and out-of-network providers for individual and family, usually mentioned together.)'),
 Question(id='annual_out_of_pocket_maximum', answer_

### API Request 

In [12]:
def build_request_body(
    model: str,
    prompt: str,
    image_paths: List[str],
    response_format: dict,
    max_tokens: int = 2000,
) -> dict:
    # Prepare the system message
    system_message = {
        "role": "system",
        "content": (
            "You are a multimodal classification bot. You will be provided with images of medical "
            "and dental plans containing coverage details for individuals and families. "
            "Your task is to analyze these images and answer specific questions based on the relevance of "
            "the text in the images to each question."
        ),
    }

    # Encode images and guess MIME types
    encoded_images = encode_images(image_paths)

    # Build the user message content
    user_content = prompt
    for idx, (encoded_image, mime_type) in enumerate(encoded_images, start=1):
        user_content += f"\n\nImage {idx}:\n![Image](data:{mime_type};base64,{encoded_image})"

    user_message = {
        "role": "user",
        "content": user_content
    }

    messages = [system_message, user_message]

    # Build the payload
    payload = {
        "model": model,
        "messages": messages,
        "response_format": response_format,
        "max_tokens": max_tokens,
    }

    return payload


In [13]:
custom_response_format = {
    "type": "json_schema",
    "json_schema": {
        "name": "classification_bot_response",
        "strict": True,
        "schema": {
            "type": "object",
            "properties": {
                "questions": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "id": {"type": "string"},
                            "answer_image": {"type": ["string", "null"]},
                            "answer": {"type": ["string", "null"]},
                            "question": {"type": "string"},
                        },
                        "required": ["id", "answer_image", "answer", "question"],
                        "additionalProperties": False,
                    },
                }
            },
            "required": ["questions"],
            "additionalProperties": False,
        },
    },
}


In [14]:
import requests
def call_azure_openai_chat_completions_api(
    azure_endpoint: str,
    chat_model_name: str,
    api_key: str,
    body: dict,
    api_version: str = "2024-02-01",
):
    """
    Calls the Azure OpenAI API with the given parameters.
    """
    url = f"{azure_endpoint}openai/deployments/{chat_model_name}/chat/completions?api-version={api_version}"
    headers = {
        "Content-Type": "application/json",
        "api-key": api_key,
    }

    try:
        response = requests.post(url, headers=headers, json=body)
        response.raise_for_status()  # Raises HTTPError for bad responses

        # Extract rate limit headers if needed
        # rate_limit_headers = extract_rate_limit_and_usage_info(response)
        return response.status_code, response.json(), {}
    except requests.ConnectionError as e:
        print("The server could not be reached")
        print(e)
        return None, None, {}
    except requests.HTTPError as e:
        print(f"HTTP error occurred: {e}")
        return response.status_code, e.response.json(), {}
    except Exception as err:
        print(f"An error occurred: {err}")
        return None, None, {}


In [18]:

# Build the request body
request_body = build_request_body(
    model=AZURE_AOAI_CHAT_MODEL_NAME_DEPLOYMENT_ID_TEST_08_06,
    prompt=PROMPT,
    image_paths=policy_image_paths[1:3],
    response_format=custom_response_format,
    max_tokens=2000
)

# Send the request
status_code, response_json, _ = call_azure_openai_chat_completions_api(
    azure_endpoint=AZURE_OPENAI_API_ENDPOINT_TEST,
    chat_model_name=AZURE_AOAI_CHAT_MODEL_NAME_DEPLOYMENT_ID_TEST_08_06,
    api_key=AZURE_OPENAI_KEY_TEST,
    body=request_body
)

if response_json:
    # Extract the assistant's message content
    try:
        message_content = response_json['choices'][0]['message'].get('content', '')
        # Parse the assistant's response as JSON
        try:
            parsed_response = json.loads(message_content)
            print("Assistant's response:")
            print(json.dumps(parsed_response, indent=2))
        except json.JSONDecodeError:
            print("Failed to parse assistant's response as JSON.")
            print("Assistant's raw response:")
            print(message_content)
    except KeyError:
        print("Unexpected response structure.")
        print(response_json)
else:
    print("No response received.")


Assistant's response:
{
  "questions": [
    {
      "id": "plan_information",
      "answer_image": "Image 2",
      "answer": "Plan Name: PPO Plan, Payer: Unity Health, Plan Type: PPO",
      "question": "Does it contain details like plan name, name of the payer or administrator of the plan, plan type, plan effective date?"
    },
    {
      "id": "plan_coinsurance",
      "answer_image": "Image 2",
      "answer": "In-network: 20%, Out-of-network: 40%",
      "question": "Does it contain coverage details about the plan-level coinsurance? (The amounts are expressed as in-network and out-of-network providers, usually mentioned together.)"
    },
    {
      "id": "medical_deductible",
      "answer_image": "Image 2",
      "answer": "Individual: $500 In-network / $1,000 Out-of-network, Family: $1,000 In-network / $2,000 Out-of-network",
      "question": "Does it contain coverage details about the annual medical deductible amount that you pay for the plan? (The amounts are expressed 