In [21]:
import subprocess

def ask_llava_via_cli(image_path, question):
    """
    Launches an Ollama CLI session using the llava:7b model,
    sends an image and a question to the model, and captures the output.

    Parameters:
        image_path (str): Absolute path to the image file to analyze.
        question (str): A natural language question about the image.

    Returns:
        str: The model's textual response.
    """
    # Start the Ollama LLaVA process
    process = subprocess.Popen(
        ["ollama", "run", "llava:7b"],
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE
    )

    # Build the prompt in CLI format (image + question)
    prompt_image = f"![image]({image_path})\n"
    prompt_question = question + "\n"
    full_prompt = prompt_image + prompt_question

    try:
        # Send the prompt encoded as UTF-8 to avoid Windows charset issues
        stdout, stderr = process.communicate(input=full_prompt.encode("utf-8"), timeout=20)

        # Decode and return the model's response
        return stdout.decode("utf-8", errors="replace")

    except subprocess.TimeoutExpired:
        process.kill()
        return "Error: Timeout"

# ========== Example usage ==========

if __name__ == "__main__":
    # Set the path to your image file
    image_path = r"C:\\Drawing_Robot Chef in Cozy Kitchen.png"

    # Write the question you want the model to answer
    question = "What is the robot doing in the kitchen?"

    # Run the query and print the result
    result = ask_llava_via_cli(image_path, question)
    print("LLaVA Response:\n", result)


LLaVA Response:
  The image shows a robotic arm in a cozy kitchen environment, preparing food. However, without more context or information about what the robot is specifically doing or the specific actions it is taking, I cannot provide a detailed description of its activity. 


