In [None]:
import torch
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig
from qwen_vl_utils import process_vision_info
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)

model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2.5-VL-7B-Instruct",
    torch_dtype=torch.float16,
    device_map="auto",
)
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
prompt= """
I want you to create HDDL problem file (similar to pddl file) of the image that I give as input.
An example of an HDDL is this:
(define
        (problem pfile01)
        (:domain  domain_htn)
        (:objects
                plate1 - container
                pear1 - food
                home1 wp1s wp2s - location
                wp1f - location
                robot1 - robot
        )       (:htn
                :parameters ()
                :subtasks (and
                 (task0 (move_object plate1 wp1f))
                 (task1 (move_to_container pear1 plate1))
                )
                :ordering (and
                )
        )

        (:init
                (at plate1 wp1s)
                (at pear1 wp2s)
                (at robot1 home1)
        )
)
Another example:
(define
    (problem pfile01)
    (:domain  domain_htn)
    (:objects
        tennis_ball1 - item
        white_cup1 red_cup1 - container
        banana1 pear1 - food
        home1 wp1s wp2s wp3s wp4s wp5s out_location wp1f wp2f - location
        robot1 - robot
    )
    (:htn
        :parameters ()
        :subtasks (and
            (task0 (move_object tennis_ball1 out_location))
            (task1 (move_object white_cup1 wp1f))
            (task2 (move_object red_cup1 wp2f))
            (task3 (move_to_container banana1 white_cup1))
            (task4 (move_to_container pear1 red_cup1))
        )
        :ordering (and
        )
    )

    (:init
        (at tennis_ball1 wp1s)
        (at white_cup1 wp2s)
        (at red_cup1 wp3s)
        (at banana1 wp4s)
        (at pear1 wp5s)
        (at robot1 home1)
    )
)
First, identify objects in the image and their types, including food (for example, apple, banana, etc.), containers (for example, plate, bowl, cup, basket), and other objects (listed as items).
For the location of the objects, use simply wp1s, wp2s etc, (for the start) and wp1f, wp2f etc, (for the goal).
For the goal, only food and containers are allowed on the table.
Put food in containers and remove the other object from the tables.
The task you can use are: move_object (to move the objects) and move_to_container (to move objects to the container).
To move the objects, use (move_object plate wp1f).
To remove the object, use the task (move_object, remote_control, out_location).
Only output the generated hddl languages.
"""

""" 1. the standard pipeline """
messages = [
    {
        "role":"user",
        "content":[
            {
                "type":"image",
                "image": "/media/shuo/T7/hddl_data/images/problem_126.png"
            },
            {
                "type":"text",
                "text":prompt
            }
        ]
    }

]

inputs = processor.apply_chat_template(
    messages,
    add_generation_prompt=True,
    tokenize=True,
    return_dict=True,
    return_tensors="pt"
).to("cuda")

generated_ids = model.generate(**inputs, max_new_tokens=1024, do_sample=False)
generated_ids_trimmed = [
            out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
output_text = processor.batch_decode(
       generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)


"""2. the custom pipeline"""
text = processor.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(
    text=[text],
    images=image_inputs,
    videos=video_inputs,
    padding=True,
    return_tensors="pt",
)
inputs = inputs.to("cuda")
generated_ids = model.generate(**inputs, max_new_tokens=1024, do_sample=False)
generated_ids_trimmed = [
    out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
output_text2 = processor.batch_decode(
    generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)

In [None]:
import pandas as pd
from IPython.display import display, HTML

def show_output(output):
    # Ensure strings are displayed with line breaks
    output = output.encode().decode('unicode_escape')

    # Create a DataFrame
    df = pd.DataFrame({
        'Output': [output]
    })

    # Style the DataFrame for multiline rendering and fixed-width formatting
    styled_df = df.style.set_table_styles([
        {
            'selector': 'td',
            'props': [
                ('text-align', 'left'),
                ('white-space', 'pre-wrap'),
                ('font-family', '"Courier New", monospace'),
                ('border', '1px solid black'),
                ('padding', '10px'),
                ('vertical-align', 'top'),
                ('width', '500px'),  # adjust width as needed
                ('overflow-wrap', 'break-word')
            ]
        },
        {
            'selector': 'th',
            'props': [
                ('text-align', 'left'),
                ('font-family', '"Courier New", monospace'),
                ('border', '1px solid black'),
                ('padding', '10px')
            ]
        }
    ])

    # Display in notebook or IPython
    display(HTML(styled_df.to_html()))

def compare_generations(base_gen, ft_gen):
    # Ensure strings are displayed with line breaks
    base_gen = base_gen.encode().decode('unicode_escape')
    ft_gen = ft_gen.encode().decode('unicode_escape')

    # Create a DataFrame
    df = pd.DataFrame({
        'Base Generation': [base_gen],
        'Fine-tuned Generation': [ft_gen]
    })

    # Style the DataFrame for multiline rendering and fixed-width formatting
    styled_df = df.style.set_table_styles([
        {
            'selector': 'td',
            'props': [
                ('text-align', 'left'),
                ('white-space', 'pre-wrap'),
                ('font-family', '"Courier New", monospace'),
                ('border', '1px solid black'),
                ('padding', '10px'),
                ('vertical-align', 'top'),
                ('width', '500px'),  # adjust width as needed
                ('overflow-wrap', 'break-word')
            ]
        },
        {
            'selector': 'th',
            'props': [
                ('text-align', 'left'),
                ('font-family', '"Courier New", monospace'),
                ('border', '1px solid black'),
                ('padding', '10px')
            ]
        }
    ])

    # Display in notebook or IPython
    display(HTML(styled_df.to_html()))

compare_generations(output_text[0], output_text2[0])