In [1]:
import json
import os
import sys
from pathlib import Path

module_path = os.path.abspath(os.path.join("../../"))
if module_path not in sys.path:
    sys.path.append(module_path)

from architecture.blocksworld import adapter
from architecture.blocksworld.validator import BlocksworldValidator

path = Path("./blocksworld/task_1_plan_generation.json")
instances_path = Path("./instances_basic")
val_path = Path("./VAL/validate")
domain_path = Path("./domain.pddl")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
test = adapter.BlocksworldAdapter(path)

In [3]:
# example_basic_usage(path)
# example_with_gold_paths(path)
# example_multi_phase(path)
# example_save_files(path)
# example_phase_breakdown(path)

In [4]:
dataset = test.build_diligent_dataset(t_values=[1, 2], check_unique=True)
encode_object = {
    "a": "red",
    "b": "blue",
    "c": "orange",
    "d": "yellow",
    "e": "white",
    "f": "magenta",
    "g": "black",
    "h": "cyan",
    "i": "green",
    "j": "violet",
    "k": "silver",
    "l": "gold",
}

decode_object = {v: k for k, v in encode_object.items()}

Phase 1: 500 pairs
Phase 2: 500 pairs


In [5]:
print(dataset[0][0])

<node>0 Blocks state: the red block is clear, the yellow block is clear, the hand is empty, the red block is on top of the blue block, the yellow block is on top of the orange block, the blue block is on the table and the orange block is on the table.
Goal: the orange block is on top of the red block.</node> | <node>1 (unstack yellow orange)</node> | <node>2 (put-down yellow)</node> | <node>3 (pick-up orange)</node> | <node>4 (stack orange red)</node>


In [35]:
target_instances = {}
target_plans = {}
with open(path, "r") as f:
    data = json.load(f)["instances"]

for ex in data:
    query = adapter.BlocksworldAdapter.format_query(ex["query"])
    target = ex["ground_truth_plan"].strip()
    instance_num = ex["instance_id"]
    target_instances[query] = instance_num
    target_plans[query] = target

In [36]:
validator = BlocksworldValidator(
    target_plans,
    target_instances,
    instances_path=instances_path,
    val_path=val_path,
    domain_path=domain_path,
    plan_buf_path=Path("./buffer.pddl"),
)

In [37]:
# dataset[0][1]._text = '(stack orange orange)'

In [39]:
with open(Path("./buffer.pddl"), "r") as f:
    test = f.read()
    print(test)

(unstack d c)
(put-down d)
(pick-up c)
(stack c a)
(unstack a b)


In [42]:
cmd

'VAL/validate -v domain.pddl instances_basic/instance-2.pddl buffer.pddl'

In [41]:
import subprocess

cmd = f"{val_path} -v {domain_path} {instances_path / 'instance-2.pddl'} {Path('./buffer.pddl')}"
result = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, error = result.communicate()
output = output.decode("CP866")
error = error.decode("CP866")
output, error

('Checking plan: buffer.pddl\nPlan to validate:\n\nPlan size: 5\n1:\n(unstack d c)\n \n2:\n(put-down d)\n \n3:\n(pick-up c)\n \n4:\n(stack c a)\n \n5:\n(unstack a b)\n \n\nPlan Validation details\n-----------------------\n\nChecking next happening (time 1)\nDeleting (on d c)\nDeleting (clear d)\nDeleting (handempty)\nAdding (holding d)\nAdding (clear c)\n\nChecking next happening (time 2)\nDeleting (holding d)\nAdding (clear d)\nAdding (handempty)\nAdding (ontable d)\n\nChecking next happening (time 3)\nDeleting (clear c)\nDeleting (ontable c)\nDeleting (handempty)\nAdding (holding c)\n\nChecking next happening (time 4)\nDeleting (clear a)\nDeleting (holding c)\nAdding (handempty)\nAdding (clear c)\nAdding (on c a)\n\nChecking next happening (time 5)\nPlan failed because of unsatisfied precondition in:\n(unstack a b)\n\nPlan failed to execute\n\nPlan Repair Advice:\n\n(unstack a b) has an unsatisfied precondition at time 5\n(Set (clear a) to true)\n\n\nFailed plans:\n buffer.pddl \n',


In [18]:
result = []
for context, action in dataset:
    result.append(validator.validate(action, context))

ValidationExecutionError: /bin/sh: 1: VAL/validate/validate: not found


In [19]:
if all(result):
    print("All valid")

All valid


# Checkpoint check

In [42]:
from vllm import LLM, SamplingParams

prompts = [
    """You must output exactly ONE next action using one of these
forms:

<node>ID STEP_TEXT</node>

<done>FINAL_ANSWER</done>

<backtrack>NODE_ID</backtrack>

Emit ONLY the tag. No commentary or reasoning. <node>0 I am playing with a set of blocks where I need to arrange the blocks into stacks. Here are the actions I can do

Pick up a block
Unstack a block from on top of another block
Put down a block
Stack a block on top of another block

I have the following restrictions on my actions:
I can only pick up or unstack one block at a time.
I can only pick up or unstack a block if my hand is empty.
I can only pick up a block if the block is on the table and the block is clear. A block is clear if the block has no other blocks on top of it and if the block is not picked up.
I can only unstack a block from on top of another block if the block I am unstacking was really on top of the other block.
I can only unstack a block from on top of another block if the block I am unstacking is clear.
Once I pick up or unstack a block, I am holding the block.
I can only put down a block that I am holding.
I can only stack a block on top of another block if I am holding the block being stacked.
I can only stack a block on top of another block if the block onto which I am stacking the block is clear.
Once I put down or stack a block, my hand becomes empty.
Once you stack a block on top of a second block, the second block is no longer clear.

As initial conditions I have that, the red block is clear, the yellow block is clear, the hand is empty, the red block is on top of the blue block, the yellow block is on top of the orange block, the blue block is on the table and the orange block is on the table.
My goal is to have that the orange block is on top of the red block.</node> | <node>1 (unstack yellow orange)</node> | <node>2 (put-down yellow)</node> | <node>3 (pick-up orange)</node> | <node>4 (stack orange red)</node>"""
]

sampling_params = SamplingParams(
    temperature=0.7, top_p=0.8, max_tokens=300, stop=["</done>"]
)

In [16]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1,2"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
os.environ["PYTORCH_ALLOC_CONF"] = "expandable_segments:True"
llm = LLM(
    model="/home/jovyan/khalikov/diligent-learner-detached/output/blocksworld_test/run_2025-12-01_23-09-22/final_model"
)

INFO 12-02 17:23:19 [utils.py:253] non-default args: {'disable_log_stats': True, 'model': '/home/jovyan/khalikov/diligent-learner-detached/output/blocksworld_test/run_2025-12-01_23-09-22/final_model'}
INFO 12-02 17:23:19 [model.py:631] Resolved architecture: Qwen3ForCausalLM
INFO 12-02 17:23:19 [model.py:1745] Using max model len 40960


2025-12-02 17:23:21,202	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


INFO 12-02 17:23:21 [scheduler.py:216] Chunked prefill is enabled with max_num_batched_tokens=8192.


The tokenizer you are loading from '/home/jovyan/khalikov/diligent-learner-detached/output/blocksworld_test/run_2025-12-01_23-09-22/final_model' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.


[1;36m(EngineCore_DP0 pid=814036)[0;0m INFO 12-02 17:23:21 [core.py:93] Initializing a V1 LLM engine (v0.11.2) with config: model='/home/jovyan/khalikov/diligent-learner-detached/output/blocksworld_test/run_2025-12-01_23-09-22/final_model', speculative_config=None, tokenizer='/home/jovyan/khalikov/diligent-learner-detached/output/blocksworld_test/run_2025-12-01_23-09-22/final_model', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=40960, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, data_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, structured_outputs_config=StructuredOutputsConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_parser='', reasoning_parser_plugin='', enable_in_reasoning=False), 

[1;36m(EngineCore_DP0 pid=814036)[0;0m [2025-12-02 17:23:28] INFO _optional_torch_c_dlpack.py:119: JIT-compiling torch-c-dlpack-ext to cache...
[1;36m(EngineCore_DP0 pid=814036)[0;0m We recommend installing via `pip install torch-c-dlpack-ext`


[1;36m(EngineCore_DP0 pid=814036)[0;0m INFO 12-02 17:23:42 [cuda.py:418] Valid backends: ['FLASH_ATTN', 'FLASHINFER', 'TRITON_ATTN', 'FLEX_ATTENTION']
[1;36m(EngineCore_DP0 pid=814036)[0;0m INFO 12-02 17:23:42 [cuda.py:427] Using FLASH_ATTN backend.


Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:  50% Completed | 1/2 [00:05<00:05,  5.06s/it]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:08<00:00,  4.05s/it]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:08<00:00,  4.20s/it]
[1;36m(EngineCore_DP0 pid=814036)[0;0m 


[1;36m(EngineCore_DP0 pid=814036)[0;0m INFO 12-02 17:23:51 [default_loader.py:314] Loading weights took 8.50 seconds
[1;36m(EngineCore_DP0 pid=814036)[0;0m INFO 12-02 17:23:52 [gpu_model_runner.py:3338] Model loading took 7.5023 GiB memory and 23.315299 seconds
[1;36m(EngineCore_DP0 pid=814036)[0;0m INFO 12-02 17:24:04 [backends.py:631] Using cache directory: /home/jovyan/.cache/vllm/torch_compile_cache/89507c50bd/rank_0_0/backbone for vLLM's torch.compile
[1;36m(EngineCore_DP0 pid=814036)[0;0m INFO 12-02 17:24:04 [backends.py:647] Dynamo bytecode transform time: 12.19 s
[1;36m(EngineCore_DP0 pid=814036)[0;0m INFO 12-02 17:24:13 [backends.py:251] Cache the graph for dynamic shape for later use
[1;36m(EngineCore_DP0 pid=814036)[0;0m INFO 12-02 17:24:23 [backends.py:282] Compiling a graph for dynamic shape takes 18.72 s
[1;36m(EngineCore_DP0 pid=814036)[0;0m INFO 12-02 17:24:30 [monitor.py:34] torch.compile takes 30.91 s in total
[1;36m(EngineCore_DP0 pid=814036)[0;0m INF

Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 100%|██████████| 51/51 [00:02<00:00, 18.45it/s]
Capturing CUDA graphs (decode, FULL): 100%|██████████| 35/35 [00:01<00:00, 22.73it/s]


[1;36m(EngineCore_DP0 pid=814036)[0;0m INFO 12-02 17:24:37 [gpu_model_runner.py:4244] Graph capturing finished in 5 secs, took 2.62 GiB
[1;36m(EngineCore_DP0 pid=814036)[0;0m INFO 12-02 17:24:37 [core.py:250] init engine (profile, create kv cache, warmup model) took 45.64 seconds


[1;36m(EngineCore_DP0 pid=814036)[0;0m The tokenizer you are loading from '/home/jovyan/khalikov/diligent-learner-detached/output/blocksworld_test/run_2025-12-01_23-09-22/final_model' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.


INFO 12-02 17:24:38 [llm.py:352] Supported tasks: ['generate']


In [43]:
outputs = llm.generate(prompts, sampling_params)
# Print the outputs.
print("\nGenerated Outputs:\n" + "-" * 60)
for output in outputs:
    prompt = output.prompt
    generated_text = output.outputs[0].text
    print(f"Prompt:    {prompt!r}")
    print(f"Output:    {generated_text!r}")
    print("-" * 60)

Adding requests: 100%|██████████| 1/1 [00:00<00:00, 521.87it/s]
Processed prompts: 100%|██████████| 1/1 [00:03<00:00,  3.11s/it, est. speed input: 155.77 toks/s, output: 96.35 toks/s]


Generated Outputs:
------------------------------------------------------------
Prompt:    'You must output exactly ONE next action using one of these\nforms:\n\n<node>ID STEP_TEXT</node>\n\n<done>FINAL_ANSWER</done>\n\n<backtrack>NODE_ID</backtrack>\n\nEmit ONLY the tag. No commentary or reasoning. <node>0 I am playing with a set of blocks where I need to arrange the blocks into stacks. Here are the actions I can do\n\nPick up a block\nUnstack a block from on top of another block\nPut down a block\nStack a block on top of another block\n\nI have the following restrictions on my actions:\nI can only pick up or unstack one block at a time.\nI can only pick up or unstack a block if my hand is empty.\nI can only pick up a block if the block is on the table and the block is clear. A block is clear if the block has no other blocks on top of it and if the block is not picked up.\nI can only unstack a block from on top of another block if the block I am unstacking was really on top of the ot




In [5]:
test = "I am playing with a set of blocks where I need to arrange the blocks into stacks. Here are the actions I can do\n\nPick up a block\nUnstack a block from on top of another block\nPut down a block\nStack a block on top of another block\n\nI have the following restrictions on my actions:\nI can only pick up or unstack one block at a time.\nI can only pick up or unstack a block if my hand is empty.\nI can only pick up a block if the block is on the table and the block is clear. A block is clear if the block has no other blocks on top of it and if the block is not picked up.\nI can only unstack a block from on top of another block if the block I am unstacking was really on top of the other block.\nI can only unstack a block from on top of another block if the block I am unstacking is clear.\nOnce I pick up or unstack a block, I am holding the block.\nI can only put down a block that I am holding.\nI can only stack a block on top of another block if I am holding the block being stacked.\nI can only stack a block on top of another block if the block onto which I am stacking the block is clear.\nOnce I put down or stack a block, my hand becomes empty.\nOnce you stack a block on top of a second block, the second block is no longer clear.\n\n[STATEMENT]\nAs initial conditions I have that, the red block is clear, the blue block is clear, the hand is empty, the blue block is on top of the yellow block, the yellow block is on top of the white block, the white block is on top of the orange block, the red block is on the table and the orange block is on the table.\nMy goal is to have that the red block is on top of the blue block and the blue block is on top of the yellow block.\n\nMy plan is as follows:\n\n[PLAN]\npick up the red block\nstack the red block on top of the blue block\n[PLAN END]\n\n[STATEMENT]\nAs initial conditions I have that, the red block is clear, the orange block is clear, the white block is clear, the hand is empty, the yellow block is on top of the blue block, the white block is on top of the yellow block, the red block is on the table, the blue block is on the table and the orange block is on the table.\nMy goal is to have that the blue block is on top of the orange block, the orange block is on top of the white block and the yellow block is on top of the blue block.\n\nMy plan is as follows:\n\n[PLAN]"
print(test)

I am playing with a set of blocks where I need to arrange the blocks into stacks. Here are the actions I can do

Pick up a block
Unstack a block from on top of another block
Put down a block
Stack a block on top of another block

I have the following restrictions on my actions:
I can only pick up or unstack one block at a time.
I can only pick up or unstack a block if my hand is empty.
I can only pick up a block if the block is on the table and the block is clear. A block is clear if the block has no other blocks on top of it and if the block is not picked up.
I can only unstack a block from on top of another block if the block I am unstacking was really on top of the other block.
I can only unstack a block from on top of another block if the block I am unstacking is clear.
Once I pick up or unstack a block, I am holding the block.
I can only put down a block that I am holding.
I can only stack a block on top of another block if I am holding the block being stacked.
I can only stack a 

In [6]:
test = "привет"
test.rfind("вет")

3