## LlamaStack Vision API


In [5]:
import asyncio
import base64
import mimetypes
from llama_stack_client import LlamaStackClient
from llama_stack_client.lib.inference.event_logger import EventLogger
from llama_stack_client.types import UserMessage
from termcolor import cprint
import os

## Configuration
Set up your connection parameters:

In [46]:
MODEL_NAME = 'sambanova/Llama-3.2-11B-Vision-Instruct'
MODEL_NAME = 'sambanova/Llama-3.2-90B-Vision-Instruct'

## Helper Functions
Let's create some utility functions to handle image processing and API interaction:

In [None]:
import base64
import mimetypes
from termcolor import cprint
from llama_stack_client.lib.inference.event_logger import EventLogger
from llama_stack_client import LlamaStackClient


def encode_image_to_data_url(file_path: str) -> str:
    """
    Encode an image file to a data URL.

    Args:
        file_path: Path to the image file

    Returns:
        Data URL string
    """
    mime_type, _ = mimetypes.guess_type(file_path)
    if mime_type is None:
        raise ValueError("Could not determine MIME type of the file")

    with open(file_path, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode("utf-8")

    return f"data:{mime_type};base64,{encoded_string}"


async def process_image(client: LlamaStackClient, image_path: str, stream: bool = True):
    """
    Process an image through the LlamaStack Vision API.

    Args:
        client: Initialized client.
        image_path: Path to image file.capitalize.
        stream: Whether to stream the response
    """
    data_url = encode_image_to_data_url(image_path)

    messages=[
        {
            'role': 'user', 'content': {'type': 'image', 'image': {'url': {'uri': data_url}}}
        },
        {
            'role': 'user',
            'content': 'How many different colors are in this image?',
        },
    ]
    
    cprint("User> Sending image for analysis...", "green")
    response = client.inference.chat_completion(
        messages=messages,
        model_id=MODEL_NAME,
        stream=stream,
    )

    if stream:
        text = ''
        for chunk in response:
            if chunk.event is not None:
                print(f'{chunk.event.delta.text}', end='', flush=True)
                text += chunk.event.delta.text
                print(chunk.event.delta.text)
        print(text)
    else:
        print(
            f'Type: {type(response.completion_message.content)}, '
            f'Value:{response.completion_message.content}'
        )


## Agent Configuration

Agents are configured using the `AgentConfig` class, which includes:

- `Model`: The underlying LLM to power the agent.
- `Instructions`: System prompt that defines the agent’s behavior.
- `Tools`: Capabilities the agent can use to interact with external systems.
- `Safety Shields`: Guardrails to ensure responsible AI behavior.

## Chat with Image

In [49]:
# Initialize client and process image
async def main(stream: bool):
    # Initialize client
    client = LlamaStackClient(
        base_url=f"http://localhost:{os.environ['LLAMA_STACK_PORT']}",
    )

    # Process image
    print(os.getcwd())
    await process_image(client, "../../../images/SambaNova-dark-logo-1.png", stream)


# Execute the main function
await main(stream=True)

/Users/francescar/Documents/ai-starter-kit/integrations/llama_stack/notebooks
[32mUser> Sending image for analysis...[0m
[0m
