In [35]:
def extract_json_from_reply(raw_reply: str) -> dict:
    def find_json_item(item):
        item= f'"{item}":'
        text_after_item = raw_reply[raw_reply.find(item)+len(item):]
        find_first_chuku = text_after_item[text_after_item.find('"')+1:]
        find_second_chuku = find_first_chuku[:find_first_chuku.find('"')]
        return find_second_chuku
    return {
        "identified_location":       find_json_item('identified_location'),
        "approximate_grid_position": find_json_item('approximate_grid_position'),
        "facing_direction":          find_json_item('facing_direction'),
        "reasoning":                 find_json_item('reasoning'),
        "fastest_route":             find_json_item('fastest_route'),
        "full_answer":               find_json_item('full_answer')
    }


In [None]:
import os
import glob
import json
import csv
from ollama import generate

# ── 1. Configuration ───────────────────────────────────────────────────────────
# Change this to the exact Ollama model you have locally (e.g. "llama3.2" or similar).
OLLAMA_MODEL = "llava"  

# ── 2. Paths (adjust if needed) ────────────────────────────────────────────────
PROMPT_FILE             = "prompt_with_floormap.txt"
NODE_METADATA_FILE      = "node_image_results.json"
FLOORPLAN_IMAGE_PATH    = os.path.join("CEPSR - Floor 7", "Floorplan", "labeled_grid.png")

# Note: In your folder structure, “Query images” sits directly under “CEPSR - Floor 7”
QUERY_IMAGES_FOLDER     = os.path.join("CEPSR - Floor 7", "Query images")
OUTPUT_CSV              = f"{OLLAMA_MODEL}_results.csv"

# The set of destinations for which you want step-by-step directions
DESTINATIONS = [
    "703 off cs",
    "bathroom (700 fl pubr-f bsvc)",
    "750E3 RLAB EE",
    "720 off cs"
]

# ── 3. Helper to read image bytes ───────────────────────────────────────────────
def load_image_bytes(path: str) -> bytes:
    with open(path, "rb") as f:
        return f.read()

# ── 4. Load prompt template and node metadata JSON ─────────────────────────────
with open(PROMPT_FILE, "r", encoding="utf-8") as f:
    template = f.read()

with open(NODE_METADATA_FILE, "r", encoding="utf-8") as f:
    node_metadata_json_str = f.read().strip()

# ── 5. Load the floor plan image bytes once ────────────────────────────────────
floorplan_bytes = load_image_bytes(FLOORPLAN_IMAGE_PATH)

# ── 6. Prepare CSV and write header ────────────────────────────────────────────
with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(
        csvfile,
        fieldnames=[
            "query_image_name",
            "destination",
            "identified_location",
            "approximate_grid_position",
            "facing_direction",
            "reasoning",
            "fastest_route",
            "full_model_answer"
        ]
    )
    writer.writeheader()

    # ── 7. Find all files in the “Query images” folder ───────────────────────────
    image_paths = glob.glob(os.path.join(QUERY_IMAGES_FOLDER, "*"))
    print(f"Found {len(image_paths)} images in {QUERY_IMAGES_FOLDER}.")

    for img_path in image_paths:
        ext = os.path.splitext(img_path)[1].lower()
        if ext not in [".jpg", ".jpeg", ".png"]:
            continue

        query_image_name = os.path.basename(img_path)
        query_bytes      = load_image_bytes(img_path)

        # ── 8. For each destination, build a prompt & call Ollama ────────────────
        for destination in DESTINATIONS:
            # Split the template at the placeholders, in order:
            part_before_floor, remainder              = template.split("[FLOOR_PLAN_IMAGE]", 1)
            part_before_metadata, remainder           = remainder.split("[NODE_METADATA_JSON]", 1)
            part_before_query, part_after_query       = remainder.split("[QUERY_IMAGE]", 1)

            prompt_text_1 = part_before_floor
            prompt_text_2 = part_before_metadata
            prompt_text_3 = part_before_query
            # Replace [DESTINATION] in the final chunk
            prompt_text_4 = part_after_query.replace("[DESTINATION]", destination)

            # Now concatenate all text segments (we do NOT inline base64 here)
            full_prompt = (
                prompt_text_1
                + "\n"  # we expect Ollama to match this to floorplan_bytes
                + "\n" + prompt_text_2
                + "\n" + node_metadata_json_str
                + "\n" + prompt_text_3
                + "\n"  # we expect Ollama to match this to query_bytes
                + "\n" + prompt_text_4
            )

            # 8b. Call Ollama's generate() with both images in the `images` list.
            # The first image is the floor plan, the second is the query image.
            # We stream=True to print as we go and also accumulate into a string.
            print(f"\n→ Query: {query_image_name} → Destination: {destination}")
            accumulated = ""
            try:
                for resp in generate(
                    OLLAMA_MODEL,
                    full_prompt,
                    images=[floorplan_bytes, query_bytes],
                    stream=True
                ):
                    chunk = resp.get("response", "")
                    print(chunk, end="", flush=True)
                    accumulated += chunk
            except Exception as e:
                print(f"\nERROR during Ollama generate for {query_image_name} → {destination}: {e}")
                continue

            print("\n")  # newline after streaming finishes

            raw_reply = accumulated

            # ── 9. Parse out the six JSON fields ─────────────────────────────────
            parsed = extract_json_from_reply(raw_reply)
            
            identified_location       = parsed.get("identified_location", "")
            approximate_grid_position = parsed.get("approximate_grid_position", "")
            facing_direction          = parsed.get("facing_direction", "")
            reasoning_text            = parsed.get("reasoning", "")
            fastest_route_text        = parsed.get("fastest_route", "")
            parsed_full_answer        = parsed.get("full_answer", "")

            # If parsing failed (parsed is empty), then keep 'parsed_full_answer' = raw_reply
            if not parsed:
                parsed_full_answer = raw_reply
                reasoning_text    = ""
                fastest_route_text = ""
                print(f"⚠️ Warning: could not parse JSON from response. "
                      "Storing raw reply as full_model_answer.")

            # ── 10. Write one row into CSV ────────────────────────────────────────
            writer.writerow({
                "query_image_name":           query_image_name,
                "destination":                destination,
                "identified_location":        identified_location,
                "approximate_grid_position":  approximate_grid_position,
                "facing_direction":           facing_direction,
                "reasoning":                  reasoning_text,
                "fastest_route":              fastest_route_text,
                "full_model_answer":          parsed_full_answer
            })

            print(f"✔ Finished processing {query_image_name} → {destination}\n")

print(f"All done. Results written to {OUTPUT_CSV}.")


Found 11 images in CEPSR - Floor 7/Query images.

→ Query: location1_east.jpeg → Destination: 703 off cs
 **Answer:**
Identified Location: Approximate grid position: Column 6.5, Row 3; nearest reference points: long corridor with doors, signs, water fountain, benches.
Facing Direction: East
**Reasoning:**
1. **Floor Pattern Check:** The visible floor pattern appears to be the same as Node 8's description: black and white checker tiles.
2. **Ceiling & Lighting:** The purple accent beam visible in the query image matches Node 8's description, which also contains fluorescent ceiling panels.
3. **Wall/Signage/Doors:** No visible door colors or room numbers/signs can be confidently identified from the query image.
4. **Special Fixtures (Bins, Water Fountain, Benches):** The presence of a water fountain and benches in the query image corresponds to Node 8's description.
5. **Spatial Layout (Corridor vs. Alcove vs. Room Entrance):** The long corridor with multiple doors on both sides aligns w