# Setup

In [1]:
import os
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
from azure.ai.inference.models import SystemMessage, UserMessage, TextContentItem

load_dotenv()
MODEL_DEPLOYMENT = os.getenv("MODEL_DEPLOYMENT")
PROJECT_ENDPOINT = os.getenv("PROJECT_ENDPOINT")

In [2]:
project_client = AIProjectClient(
    credential=DefaultAzureCredential(
        exclude_environment_credential=True,
        exclude_managed_identity_credential=True
    ),
    endpoint=PROJECT_ENDPOINT,
)

Make sure to check the deployment's connected resource. 

It may be connected to other project if there is no quota left.

In [3]:
for deployment in project_client.deployments.list():
    print(deployment.as_dict())

{'name': 'Phi-4-multimodal-instruct', 'type': 'ModelDeployment', 'modelName': 'Phi-4-multimodal-instruct', 'modelVersion': '1', 'modelPublisher': 'Microsoft', 'capabilities': {'chat_completion': 'true'}, 'sku': {'name': 'GlobalStandard', 'capacity': 1}}
{'name': 'text-embedding-ada-002', 'type': 'ModelDeployment', 'modelName': 'text-embedding-ada-002', 'modelVersion': '2', 'modelPublisher': 'OpenAI', 'capabilities': {'embeddings': 'true'}, 'sku': {'name': 'GlobalStandard', 'capacity': 120}}


In [4]:
chat_client = project_client.inference.get_chat_completions_client()

# Usage

## Prompt with audio file

In [12]:
file_path = "https://github.com/MicrosoftLearning/mslearn-ai-language/raw/refs/heads/main/Labfiles/09-audio-chat/data/avocados.mp3"
response = chat_client.complete(
    model=MODEL_DEPLOYMENT,
    messages=[
        UserMessage(
            [
                TextContentItem(text="What is the person talking about?"),
                {
                    "type": "audio_url",
                    "audio_url": {"url": file_path}
                }
            ]
        )
    ]
)
print(response.choices[0].message.content)

The person is updating their order for next month's delivery.


In [15]:
file_path = "https://github.com/MicrosoftLearning/mslearn-ai-language/raw/refs/heads/main/Labfiles/09-audio-chat/data/fresas.mp3"
response = chat_client.complete(
    model=MODEL_DEPLOYMENT,
    messages=[
        UserMessage(
            [
                TextContentItem(text="Can you summarize this customer's voice message?"),
                {
                    "type": "audio_url",
                    "audio_url": {"url": file_path}
                }
            ]
        )
    ]
)
print(response.choices[0].message.content)

The customer, Sarah from Kentosso Cake Shop in Midtown Manhattan, is preparing for the spring festival and wants to order two boxes of strawberries to be delivered this Friday. She is expecting a call to confirm the order.


## Prompt with image file (using image URL)

In [17]:
file_path = "https://github.com/MicrosoftLearning/mslearn-ai-language/raw/refs/heads/main/Instructions/media/voice-live-tile.png"
response = chat_client.complete(
    model=MODEL_DEPLOYMENT,
    messages=[
        UserMessage(
            [
                TextContentItem(text="What is the content of the image?"),
                {
                    "type": "image_url",
                    "image_url": {"url": file_path}
                }
            ]
        )
    ]
)
print(response.choices[0].message.content)

The image contains a header with the text "Voice Live" in bold, followed by a "Preview" button. Below the header, there's a description that reads "Engage in natural conversations with a voice agent with a realistic AI voice and interactive avatar." To the left of the text, there is an illustration of a speech bubble with a graph icon on the upper left corner and an audio speaker icon on the upper right corner. The background is white with a light gradient at the bottom.


## Prompt with image file (using Base64 image data)

The url value should be in this format `data:{mime_type};base64,{base64_encoded_data}`

In [None]:
import base64

image_file = "data/street.jpg"
with open(image_file, "rb") as f:
    b64_image = base64.b64encode(f.read()).decode("utf-8")

response = chat_client.complete(
    model=MODEL_DEPLOYMENT,
    messages=[
        UserMessage(
            [
                TextContentItem(text="Write a short description for this image?"),
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:image/jpeg;base64,{b64_image}"}
                }
            ]
        )
    ]
)
print(response.choices[0].message.content)

a man walking a dog in a city
