### Prequisites
1.  Ollama Container.

    In a separate terminal, run the docker compose file. 

    `docker compose -f vision-compose.yml up -d`

2. Moondream vision model.

   execute the load_model function with moondream model using ollama url

<!--
    Once the docker container is running, download the moondream model via curl


   `curl http://localhost:11434/api/pull -d '{ "name": "moondream:latest" }'`

    Verify if model is downloaded with command

   `curl http://localhost:11434/api/tags`
-->
 

In [1]:
import requests
import base64
import requests
import os


def load_model(ollama_url, model_name):
    command = "/api/pull"
    url = ollama_url + command
    model = model_name + ":latest"
    payload = {"name": model}
    
    headers = {
        "Content-Type": "application/json"
    }
    
    response = requests.post(url, json=payload, headers=headers)
    
    if response.status_code == 200:
        print("Request successful!")
        print(response.text)
    else:
        print(f"Request failed with status code: {response.status_code}")
        print(response.text)

    

In [2]:
def explain_image(image_path, model, prompt, ollama_url):
        
    with open(image_path, "rb") as image_file:
        encoded_image = base64.b64encode(image_file.read())

    url = ollama_url + "/api/chat"
    payload = {
        "model": model,
        "messages": [
            {
                "role": "user",
                "content": prompt,
                "images": [encoded_image.decode("utf-8")]
            }
        ]
    }
    
    headers = {
        "Content-Type": "application/json"
    }
    
    response = requests.post(url, json=payload, headers=headers)
    
    if response.status_code == 200:
        for chunk in response.iter_lines():
            if chunk:
                data = chunk.decode('utf-8')
                print(data)
    else:
        print(f"Error: {response.status_code} - {response.text}")


In [3]:
ollama_url = "http://localhost:11434"
model_name = "moondream"
load_model( ollama_url, model_name )

Request successful!
{"status":"pulling manifest"}
{"status":"pulling e554c6b9de01","digest":"sha256:e554c6b9de016673fd2c732e0342967727e9659ca5f853a4947cc96263fa602b","total":828661152,"completed":828661152}
{"status":"pulling 4cc1cb3660d8","digest":"sha256:4cc1cb3660d87ff56432ebeb7884ad35d67c48c7b9f6b2856f305e39c38eed8f","total":909777984,"completed":909777984}
{"status":"pulling c71d239df917","digest":"sha256:c71d239df91726fc519c6eb72d318ec65820627232b2f796219e87dcf35d0ab4","total":11357,"completed":11357}
{"status":"pulling 4b021a3b4b4a","digest":"sha256:4b021a3b4b4a58b06921bdf163d1b61e78b74d7df434a7f735edfdc807e68377","total":77,"completed":77}
{"status":"pulling 9468773bdc1f","digest":"sha256:9468773bdc1f9908492a3cdaab70bf5ac0a45d2d52717e8208c0deece2765262","total":65,"completed":65}
{"status":"pulling ba5fbb481ada","digest":"sha256:ba5fbb481ada654c85475473fa862b81eb5539cecc56da57f90ef5af56798be2","total":562,"completed":562}
{"status":"verifying sha256 digest"}
{"status":"writing 

In [4]:
model = "moondream"
prompt = "What is in this image?"
        
url = "http://localhost:11434"
    
explain_image("../../docs/speech-inference.png", model, prompt, url)

{"model":"moondream","created_at":"2024-06-15T15:21:23.784975821Z","message":{"role":"assistant","content":"\n"},"done":false}
{"model":"moondream","created_at":"2024-06-15T15:21:23.821849559Z","message":{"role":"assistant","content":"The"},"done":false}
{"model":"moondream","created_at":"2024-06-15T15:21:23.860103741Z","message":{"role":"assistant","content":" image"},"done":false}
{"model":"moondream","created_at":"2024-06-15T15:21:23.905786298Z","message":{"role":"assistant","content":" presents"},"done":false}
{"model":"moondream","created_at":"2024-06-15T15:21:23.948186849Z","message":{"role":"assistant","content":" a"},"done":false}
{"model":"moondream","created_at":"2024-06-15T15:21:23.989728331Z","message":{"role":"assistant","content":" flow"},"done":false}
{"model":"moondream","created_at":"2024-06-15T15:21:24.027391181Z","message":{"role":"assistant","content":"chart"},"done":false}
{"model":"moondream","created_at":"2024-06-15T15:21:24.064499551Z","message":{"role":"assista

In [None]:
ollama_url = "http://localhost:11434"
model_name = "llava"
load_model( ollama_url, model_name )

In [None]:
model = "llava"
prompt = "What is in this image?"
        
url = "http://localhost:11434"
    
explain_image("../../docs/speech-inference.png", model, prompt, url)