In [19]:
from ollama_helper.llm.llm_helper import async_llm_generate
from ollama_helper.vlm.vlm_helper import async_stream_vlm_generate
from pathlib import Path
# test_image = str(Path().resolve() / "test.png")

In [17]:
async def test_real_time_vlm_streaming(use_system_prompt=True):
    """Test real-time VLM streaming where chunks are displayed as they arrive"""
    test_prompt = """Find all the open requests in the image and output all request details as a json object, 
    only include the title of each request in the list"""
    # Use absolute path to ensure the file is found
    test_image = str(Path().resolve() / "test.png")
    test_model = "granite3.2-vision:latest"
    
    print(f"\nTesting real-time VLM streaming with model: {test_model}")
    print(f"Prompt: {test_prompt}")
    print(f"Image: {test_image}")
    
    # Define system prompt if enabled
    system_prompt = None
    if use_system_prompt:
        system_prompt = """You are a good webpage screenshot reader. 
        You will be given a screenshot of a webpage and you properly analyse the image before answering the user's question."""
        print(f"Using system prompt: {system_prompt}")
    
    try:
        await async_stream_vlm_generate(prompt=test_prompt, image_path=test_image, model=test_model, system=system_prompt)
    except Exception as e:
        print(f"Error: {str(e)}")

In [18]:
await test_real_time_vlm_streaming()

INFO:vlm_client:Using streaming chat API with VLM model: granite3.2-vision:latest



Testing real-time VLM streaming with model: granite3.2-vision:latest
Prompt: Find all the open requests in the image and output all request details as a json object, 
    only include the title of each request in the list
Image: C:\Users\Prince\Documents\GitHub\Proejct-Karna\offline-ai-assistant\karna-python-backend\inference\ollama_module\test.png
Using system prompt: You are a good webpage screenshot reader. 
        You will be given a screenshot of a webpage and you properly analyse the image before answering the user's question.

Streaming VLM response in real-time:
--------------------------------------------------
Using system prompt: You are a good webpage screenshot reader. 
       ...


INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"



{
  "title": "Help fixing an excel budget file",
  "description": "3 interested 29 minutes ago $60/hr",
  "url": "https://www.codementor.com/request/help-fixing-an-excel-budget-file"
},
{
  "title": "Need help with building a budget",
  "description": "1 interest 40 minutes ago $60/hr",
  "url": "https://www.codementor.com/request/need-help-with-building-a-budget"
},
{
  "title": "Linux and C support for school work",
  "description": "16 interested an hour ago $80/hr",
  "url": "https://www.codementor.com/request/linux-and-c-support-for-school-work"
},
{
  "title": "My posts",
  "description": "2 started  Find freelance jobs with pay ranging from $50-$80/hr.",
  "url": "https://www.codementor.com/request/my-posts"
},
{
  "title": "Need help with an Excel task",
  "description": "1 interested 2 hours ago $60/hr",
  "url": "https://www.codementor.com/request/need-help-with-an-excel-task"
},
{
  "title": "Looking for a tutor to teach me Azure Networking (AZ-700)",
  "description": "1 in