# Multi-Modal LLM using Azure OpenAI GPT-4o-mini model for image reasoning

In this notebook, we show how to use **Azure** OpenAI GPT4o-mini MultiModal LLM class/abstraction for image understanding/reasoning.

In [None]:
%pip install llama-index-multi-modal-llms-azure-openai
%pip install openai

In [None]:
import sys
import os
sys.path.insert(1, '../')
import init_creds as creds
 
AZURE_OPENAI_API_KEY = creds.get_api_key()
AZURE_OPENAI_ENDPOINT = creds.get_endpoint()
 
if not AZURE_OPENAI_API_KEY:
    raise ValueError("No AZURE_OPENAI_API_KEY set for Azure OpenAI API")
if not AZURE_OPENAI_ENDPOINT:
    raise ValueError("No AZURE_OPENAI_ENDPOINT set for Azure OpenAI API")

os.environ["AZURE_OPENAI_API_KEY"] = AZURE_OPENAI_API_KEY
os.environ["AZURE_OPENAI_ENDPOINT"] = AZURE_OPENAI_ENDPOINT
os.environ["OPENAI_API_VERSION"] = "2024-07-01-preview"
os.environ["AZURE_OPENAI_API_VERSION"] = os.environ["OPENAI_API_VERSION"]

## Initialize `AzureOpenAIMultiModal` and Load Images from URLs

Unlike normal `OpenAI`, you need to pass a `engine` argument in addition to `model`. The `engine` is the name of your model deployment.

In [None]:
from llama_index.multi_modal_llms.azure_openai import AzureOpenAIMultiModal

azure_openai_mm_llm = AzureOpenAIMultiModal(
    engine = "gpt-4o-mini",
    model="gpt-4o-mini",
    max_new_tokens=300,
)

In [None]:
import base64
import requests
from llama_index.core.schema import ImageDocument

image_url = "https://www.visualcapitalist.com/wp-content/uploads/2023/10/US_Mortgage_Rate_Surge-Sept-11-1.jpg"


response = requests.get(image_url)
if response.status_code != 200:
    raise ValueError("Error: Could not retrieve image from URL.")
base64str = base64.b64encode(response.content).decode("utf-8")

image_document = ImageDocument(image=base64str, image_mimetype="image/jpeg")

In [None]:
from IPython.display import HTML

HTML(f'<img width=400 src="data:image/jpeg;base64,{base64str}"/>')

### Complete a prompt with an image

In [None]:
complete_response = azure_openai_mm_llm.complete(
    prompt="Describe the image as an alternative text",
    image_documents=[image_document],
)

print(complete_response)

In [None]:
complete_response = azure_openai_mm_llm.complete(
    prompt="Identify the key observations and share the analysis to a real estate agent",
    image_documents=[image_document],
)

print(complete_response)

In [None]:
complete_response = azure_openai_mm_llm.complete(
    prompt="Identify the key observations and share the analysis to government officials",
    image_documents=[image_document],
)

print(complete_response)