# Gemma 3 12B Demo (eGPU / vLLM)

This notebook demonstrates how to interact with the **Gemma 3 12B** model served via **vLLM** on a local eGPU setup.

## Prerequisites
- Ensure the `kanoa-vllm-gemma` service is running:
  ```bash
  docker compose -f docker/vllm/docker-compose.gemma.yml up -d
  ```
- The API should be accessible at `http://localhost:8000/v1`.

In [None]:
import base64
import io

import matplotlib.pyplot as plt
import requests
from PIL import Image

# Configuration
API_BASE = "http://localhost:8000/v1"
API_KEY = "EMPTY"
MODEL_ID = "google/gemma-3-12b-it"

def get_headers():
    return {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }

In [None]:
# Verify Model Availability
try:
    resp = requests.get(f"{API_BASE}/models", headers=get_headers())
    resp.raise_for_status()
    models = resp.json()
    print("Available Models:")
    for m in models['data']:
        print(f"- {m['id']}")

    if not any(m['id'] == MODEL_ID for m in models['data']):
        print(f"\n⚠️ Warning: Expected model '{MODEL_ID}' not found in list. Please check your docker-compose configuration.")
except Exception as e:
    print(f"Error connecting to vLLM: {e}")

In [None]:
# Prepare Image
IMAGE_URL = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"

def encode_image_from_url(url):
    response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
    response.raise_for_status()
    image_data = response.content
    return base64.b64encode(image_data).decode('utf-8'), Image.open(io.BytesIO(image_data))

base64_image, img_obj = encode_image_from_url(IMAGE_URL)

# Display Image
plt.figure(figsize=(10, 6))
plt.imshow(img_obj)
plt.axis('off')
plt.title("Input Image")
plt.show()

In [None]:
# Send Request to vLLM
payload = {
    "model": MODEL_ID,
    "messages": [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "What is depicted in this image? Describe the lighting and colors."},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{base64_image}"
                    }
                }
            ]
        }
    ],
    "max_tokens": 512,
    "temperature": 0.7
}

print("Sending request to vLLM...")
response = requests.post(f"{API_BASE}/chat/completions", headers=get_headers(), json=payload)
response.raise_for_status()

result = response.json()
content = result['choices'][0]['message']['content']

print("\n--- Model Response ---")
print(content)