# Vision-Language LangGraph Chat Agent
# Using Ollama + LLaVA on Google Colab

In [1]:
# STEP 1: Install Ollama & pull LLaVA model
# Run these shell commands first (in Colab, prefix with !)
!apt-get install -y zstd
!curl -fsSL https://ollama.ai/install.sh | sh

!ollama pull llava

zsh:1: command not found: apt-get
>>> Removing existing Ollama installation...
>>> Downloading Ollama for macOS...
######################################################################## 100.0%8%
>>> Installing Ollama to /Applications...
>>> Starting Ollama...
>>> Install complete. You can now run 'ollama'.
2026/02/26 10:11:42 ERROR Failed to load MLX dynamic library path=/Applications/Ollama.app/Contents/Resources/libmlxc.dylib
2026/02/26 10:11:42 WARN MLX dynamic library not available error="failed to load MLX dynamic library (searched: [/Applications/Ollama.app/Contents/Resources /Users/nadiamehjabin/Desktop/AiAgentfromServer/Topic6VLM/build/lib/ollama])"
]11;?\[6n[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠴ [K[?25h[?2

# STEP 2: Install Python libraries

In [2]:
!pip install -q ollama langgraph langchain-core ipywidgets Pillow

In [3]:
import subprocess
import time
import base64
import io

import ollama
import ipywidgets as widgets
from IPython.display import display, Image as IPImage, HTML
from PIL import Image as PILImage

from typing import Annotated
from typing_extensions import TypedDict
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langchain_core.messages import HumanMessage, AIMessage

In [4]:
# START OLLAMA SERVER (background process)

def start_ollama():
    proc = subprocess.Popen(
        ["ollama", "serve"],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL,
    )
    time.sleep(5)  # Wait for server to be ready
    print(" Ollama server started!")
    return proc


In [5]:
# LANGGRAPH: State Definition
# AgentState is the agent's "memory" passed between nodes.
# - messages: full conversation history; add_messages APPENDS
#             new messages instead of overwriting the list.
# - image_b64: the uploaded image stays constant throughout chat.


class AgentState(TypedDict):
    messages: Annotated[list, add_messages]
    image_b64: str

In [6]:
# LANGGRAPH: Chat Node (the LLM call)

# A node is a function: State -> dict of state updates.
# This node formats the conversation for Ollama and calls LLaVA.

def chat_node(state: AgentState) -> dict:
    conversation = state["messages"]
    image_b64 = state["image_b64"]

    ollama_messages = []
    for i, msg in enumerate(conversation):
        if isinstance(msg, HumanMessage):
            entry = {"role": "user", "content": msg.content}
            if i == 0 and image_b64:      # Attach image only on the first message
                entry["images"] = [image_b64]
            ollama_messages.append(entry)
        elif isinstance(msg, AIMessage):
            ollama_messages.append({"role": "assistant", "content": msg.content})

    response = ollama.chat(model="llava", messages=ollama_messages)
    ai_text = response["message"]["content"]

    return {"messages": [AIMessage(content=ai_text)]}

In [7]:
# LANGGRAPH: Build the Graph

# Flow: START → chat_node → END
# We re-invoke the graph each turn with the full state.

def build_graph():
    builder = StateGraph(AgentState)
    builder.add_node("chat_node", chat_node)
    builder.add_edge(START, "chat_node")
    builder.add_edge("chat_node", END)
    return builder.compile()

In [8]:
# HELPER: Resize & Base64-encode uploaded image

# Smaller image = fewer tokens = faster LLaVA responses!

def resize_and_encode(image_bytes: bytes, max_size: int = 512) -> str:
    img = PILImage.open(io.BytesIO(image_bytes))
    img.thumbnail((max_size, max_size), PILImage.LANCZOS)
    buffer = io.BytesIO()
    img.save(buffer, format="JPEG", quality=85)
    return base64.b64encode(buffer.getvalue()).decode("utf-8")

# IPYWIDGETS UI

In [13]:
def build_ui(graph):
    # --- Agent memory (persists across widget interactions) ---
    agent_memory = {"messages": [], "image_b64": ""}

    # --- Widgets ---
    upload_btn = widgets.FileUpload(
        accept="image/*", multiple=False,
        description=" Upload Image", button_style="info",
        layout=widgets.Layout(width="200px"),
    )
    image_preview = widgets.Output(layout=widgets.Layout(width="300px", min_height="50px"))
    chat_output = widgets.Output(
        layout=widgets.Layout(
            width="100%", min_height="300px",
            border="1px solid #ddd", padding="10px",
        )
    )
    user_input = widgets.Text(
        placeholder="Ask something about the image...",
        layout=widgets.Layout(width="75%", height="40px"),
    )
    send_btn = widgets.Button(
        description="Send ", button_style="primary",
        layout=widgets.Layout(width="20%", height="40px"),
    )
    clear_btn = widgets.Button(
        description=" New Chat", button_style="warning",
        layout=widgets.Layout(width="15%", height="40px"),
    )
    status_label = widgets.Label(value=" Upload an image to begin")

    # --- Event: Image upload ---
    def on_image_upload(change):
        if not upload_btn.value:
            return
        # NEW way (ipywidgets >= 8.x): .value is a tuple of dicts
        uploaded_file = upload_btn.value[0]
        image_bytes = uploaded_file["content"]          # ← bytes directly

        
        agent_memory["image_b64"] = resize_and_encode(image_bytes, max_size=512)
        agent_memory["messages"] = []

        image_preview.clear_output()
        with image_preview:
            display(IPImage(data=image_bytes, width=280))

        chat_output.clear_output()

        with chat_output:
            display(HTML("<p style='color:green;'> Image loaded! Ask me anything about it.</p>"))

        status_label.value = " Image ready — start chatting!"

    # --- Event: Send message ---
    def on_send(_):
        question = user_input.value.strip()
        if not question:
            return
        if not agent_memory["image_b64"]:
            with chat_output:
                display(HTML("<p style='color:red;'> Please upload an image first!</p>"))
            return

        with chat_output:
            display(HTML(f"""
                <div style='margin:8px 0; padding:8px 12px; background:#e3f2fd;
                            border-radius:12px; max-width:80%; margin-left:auto;'>
                    <b>You:</b> {question}
                </div>
            """))

        user_input.value = ""
        send_btn.disabled = True
        status_label.value = " LLaVA is thinking..."

        agent_memory["messages"].append(HumanMessage(content=question))

        result = graph.invoke({
            "messages": agent_memory["messages"],
            "image_b64": agent_memory["image_b64"],
        })

        agent_memory["messages"] = result["messages"]
        ai_response = result["messages"][-1].content

        with chat_output:
            display(HTML(f"""
                <div style='margin:8px 0; padding:8px 12px; background:#f3e5f5;
                            border-radius:12px; max-width:80%;'>
                    <b>LLaVA:</b> {ai_response}
                </div>
            """))

        send_btn.disabled = False
        num_turns = len([m for m in agent_memory["messages"] if isinstance(m, HumanMessage)])
        status_label.value = f" {num_turns} message(s) in conversation"

    # --- Event: Clear chat ---
    def on_clear(_):
        agent_memory["messages"] = []
        chat_output.clear_output()
        with chat_output:
            display(HTML("<p style='color:green;'> Conversation reset! Same image loaded.</p>"))
        status_label.value = " Chat cleared — same image, new conversation"

    # Connect events
    upload_btn.observe(on_image_upload, names="value")
    send_btn.on_click(on_send)
    clear_btn.on_click(on_clear)
    user_input.on_submit(on_send)  # Press Enter to send

    # --- Layout ---
    header = widgets.HTML("""
        <h2 style='color:#4a4a4a;'>Vision-Language Chat Agent</h2>
        <p style='color:#666;'>Powered by LLaVA via Ollama + LangGraph</p>
        <hr/>
    """)
    top_row = widgets.HBox([
        widgets.VBox([upload_btn, image_preview], layout=widgets.Layout(width="320px")),
        widgets.VBox(
            [widgets.HTML("<b>Conversation History:</b>"), chat_output],
            layout=widgets.Layout(width="100%", flex="1"),
        ),
    ])
    input_row = widgets.HBox(
        [user_input, send_btn, clear_btn],
        layout=widgets.Layout(margin="8px 0"),
    )
    app = widgets.VBox(
        [header, top_row, input_row, status_label],
        layout=widgets.Layout(padding="20px", width="100%"),
    )
    display(app)



In [12]:
import ipywidgets
print(ipywidgets.__version__)

8.1.7


In [14]:
# MAIN
if __name__ == "__main__":
    start_ollama()
    # Now that the server is running, pull the llava model
    print(" Pulling LLaVA model...")
    subprocess.run(["ollama", "pull", "llava"], check=True)
    print(" LLaVA model pulled successfully!")

    graph = build_graph()
    print(" LangGraph agent built!")
    build_ui(graph)
    print(" UI ready! Upload an image and start chatting.")

 Ollama server started!
 Pulling LLaVA model...


Feb 20 2026 00:08:12 - ERROR - generated.c:2199 - CHECK failed: mlx_array_item_float16_
2026/02/26 10:21:07 ERROR Failed to load MLX dynamic library symbols path=/Applications/Ollama.app/Contents/Resources/libmlxc.dylib
2026/02/26 10:21:07 WARN MLX dynamic library not available error="failed to load MLX dynamic library (searched: [/Applications/Ollama.app/Contents/Resources /Users/nadiamehjabin/Desktop/AiAgentfromServer/Topic6VLM/build/lib/ollama])"
[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠸ [K[?25h[?2026l

 LLaVA model pulled successfully!
 LangGraph agent built!


[?2026h[?25l[1Gpulling manifest [K
pulling 170370233dd5: 100% ▕██████████████████▏ 4.1 GB                         [K
pulling 72d6f08a42f6: 100% ▕██████████████████▏ 624 MB                         [K
pulling 43070e2d4e53: 100% ▕██████████████████▏  11 KB                         [K
pulling c43332387573: 100% ▕██████████████████▏   67 B                         [K
pulling ed11eda7790d: 100% ▕██████████████████▏   30 B                         [K
pulling 7c658f9561e5: 100% ▕██████████████████▏  564 B                         [K
verifying sha256 digest [K
writing manifest [K
success [K[?25h[?2026l
  user_input.on_submit(on_send)  # Press Enter to send


VBox(children=(HTML(value="\n        <h2 style='color:#4a4a4a;'>Vision-Language Chat Agent</h2>\n        <p st…

 UI ready! Upload an image and start chatting.
