In [None]:
import torch
from PIL import Image
from utils import *
import json


In [2]:
def load_structure(structure_id):
    # Where the rendered structures are
    gold_processed_path = "../data/structures/gold-processed"

    # Construct the path to the structure
    structure_path = os.path.join(gold_processed_path, structure_id)

    # Load the structure JSON
    try:
        json_path = os.path.join(structure_path, f"{structure_id}.json")
        s_json = json.load(open(json_path, "r"))
    except FileNotFoundError:
        raise FileNotFoundError(f"Structure {structure_id} not found.")
    
    s_images_list = []

    # Load the images
    for filename in os.listdir(structure_path):
        # Check if the file has a JPG or JPEG extension (case insensitive)
        if filename.lower().endswith(('.jpg', '.jpeg')):
            img_path = os.path.join(structure_path, filename)
            try:
                # Open the image file using PIL
                img = Image.open(img_path)
                s_images_list.append(img)
            except IOError:
                raise IOError(f"Warning: Could not open image {img_path}")
    
    return s_json, s_images_list

In [3]:
def generate_response(model, processor, conversation, role_name, images=None, max_new_tokens=128):
    """
    Generates a response for the given model using the (filtered) conversation history.
    
    - Filters out system messages that are not intended for the current model.
    - role_name should be the current model's identifier (e.g., "Architect" or "Builder").
    """

    # Filter conversation for the current model.
    filtered_conversation = filter_conversation(conversation, target_model=role_name)
    
    # Build the prompt using the processor's chat template.
    prompt = processor.apply_chat_template(filtered_conversation, add_generation_prompt=True)

    inputs = processor(
        images=images,
        text=prompt,
        padding=True,
        return_tensors="pt"
    ).to(model.device)
    
    generate_ids = model.generate(**inputs, max_new_tokens=max_new_tokens)
    output = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
    

    # Minimal cleanup to remove special tokens (adjust as needed)
    #response_text = output.replace("[INST]", "").replace("[/INST]", "").strip()
    return output[0].split("[/INST]")[-1]

In [4]:
model_id = "llava-hf/llava-v1.6-mistral-7b-hf"
device = "cuda" if torch.cuda.is_available() else "cpu"
structure_id = "C1_bell"
max_rounds = 10
max_new_tokens = 256

In [None]:
model_A, processor_A = initialize_model(model_id, device, 4)
model_B, processor_B = initialize_model(model_id, device, 4)

In [8]:
s_json, s_images_list = load_structure(structure_id)

In [9]:
current_round = 0
conversation_history = setup_roles()

In [None]:
from pprint import pprint

while current_round < 10:
        print(f"===== Round {current_round + 1} =====")

        # ----- Architect's Turn -----
        # For the first turn, pass the images; later turns might not require images.
        modelA_response = generate_response(
            model=model_A,
            processor=processor_A,
            conversation=conversation_history,
            role_name="Architect",
            images=s_images_list if current_round == 0 else None,
            max_new_tokens=256
        )

        print("Architect: %s", modelA_response)

        # Append Architect's response to the conversation history.
        conversation_history.append({
            "role": "assistant",
            "content": [
                {"type": "text", "text": modelA_response}
            ]
        })

        # Check if Architect signaled to finish.
        if "[FINISH]" in modelA_response:
            pprint("Finishing conversation as indicated by Architect.")
            break

        # ----- Builder's Turn -----
        modelB_response = generate_response(
            model=model_B,
            processor=processor_B,
            conversation=conversation_history,
            role_name="Builder",
            images=None,
            max_new_tokens=256
        )

        print("Builder: %s", modelB_response)

        # Append Builder's response to the conversation history.
        conversation_history.append({
            "role": "assistant",
            "content": [
                {"type": "text", "text": modelB_response}
            ]
        })

        current_round += 1