In [1]:
%reload_ext autoreload
%autoreload 2
import random
from VLA2Systems.task_data_generator import TaskDataGenerator
# Using a simple list
# env_list = ["MiniGrid-DoorKey-16x16-v0", "MiniGrid-MultiRoom-N6-v0"]
env_list = ["BabyAI-OneRoomS8-v0", "BabyAI-ActionObjDoor-v0"]

generator = TaskDataGenerator(env_list)
seed = random.randint(1, 1000)
generator.reset(seed=seed)
plan = generator.generate_plan()
if plan:
    text = generator.plan2text(include_all=True)
    print(text)
else:
    print("Planning Failed")


Knowledge Base:
Room 0:
  red ball is at (6, 2)
There is no Connections.
Robot location: (5, 5)
Mission: pick up the ball
Plan is: 
Step 1: Pick up a ball


In [2]:
# Using a difficulty-based dictionary
env_dict = {
    "easy": ["BabyAI-ActionObjDoor-v0"],
    "intermediate": ["BabyAI-FindObjS5-v0"],
    "hard": ["BabyAI-UnlockToUnlock-v0", "BabyAI-Synth-v0"]
}
index = 0
generator = TaskDataGenerator(env_dict)
generator.reset(difficulty="hard")  # Select from "hard" list
generator.generate_plan()
generator.save_env_image(filename=f"Image-{generator.env_name}-{generator.seed}-{index}.png")
if plan:
    input_text = generator.get_input_text(include_robot_current_room=True, include_grid=True)
    output_text = generator.get_output_text()
    print(f"Input text:\n{input_text}")
    print(f"Output text:\n{output_text}")
else:
    print("Planning Failed")


Input text:
Grid Map of the environment:
WWWWWWWWWWWWWWWW
W....W..K.DK...W
W....W...RW....W
W....D....W....W
W...BW....W....W
WWWWWWWWWWWWWWWW
Knowledge Base:
Room 0:
  green ball is at (4, 4)
  yellow door is at (5, 3) and is currently locked
Room 1:
  blue key is at (8, 1)
  blue door is at (10, 1) and is currently locked
  yellow door is at (5, 3) and is currently locked
Room 2:
  yellow key is at (11, 1)
  blue door is at (10, 1) and is currently locked

Connections:
Room 1 connect to Room 2 by blue door at (10, 1) which is currently locked
Room 0 connect to Room 1 by yellow door at (5, 3) which is currently locked
Robot location: (9, 2), which is in Room 1
Mission: pick up the ball
In order to achieve the mission the robot should preform these steps:

Output text:
Step 1: Pick up blue key
Step 2: Open blue door
Step 3: Pick up yellow key
Step 4: Open yellow door
Step 5: Pick up the ball


In [None]:
from VLA2Systems.task_data_collector import DataCollector
collector = DataCollector("configs/data_collection_config.yaml")
collector.collect_data()


In [None]:
import os
from datasets import load_from_disk

def print_dataset_samples(dataset_path, num_samples=5):
    if not os.path.exists(dataset_path):
        print(f"Error: Dataset path '{dataset_path}' does not exist.")
        return
    
    print(f"Loading dataset from {dataset_path}...")
    dataset = load_from_disk(dataset_path)
    
    print(f"Dataset loaded! Total samples: {len(dataset)}")
    
    print(f"Showing {min(num_samples, len(dataset))} samples:")
    for i, sample in enumerate(dataset.select(range(min(num_samples, len(dataset))))):
        print(f"\nSample {i+1}:")
        print(f"Input: \n{sample['input']}")
        print(f"Output: \n{sample['output']}")
        print("-" * 50)

dataset_name = "./datasets/robot_LLM_grid_dataset_10k/hard/"
print_dataset_samples(dataset_name)


Loading dataset from ./datasets/robot_LLM_dataset_10k/hard/...
Dataset loaded! Total samples: 3000
Showing 5 samples:

Sample 1:
Input: 
Knowledge Base:
Room 0 is empty
Room 1:
  grey door is at (10, 6) and is currently closed
Room 2 is empty
Room 3:
  grey door is at (6, 11) and is currently closed
Room 4:
  green box is at (7, 7)
  red key is at (7, 8)
  blue ball is at (11, 9)
  grey key is at (9, 11)
  purple box is at (11, 11)
  grey door is at (10, 6) and is currently closed
  grey door is at (12, 8) and is currently closed
  grey door is at (6, 11) and is currently closed
  blue door is at (7, 12) and is currently closed
Room 5:
  grey door is at (12, 8) and is currently closed
Room 6 is empty
Room 7:
  blue door is at (7, 12) and is currently closed
Room 8 is empty

Connections:
Room 1 connect to Room 4 by grey door at (10, 6) which is currently closed
Room 4 connect to Room 5 by grey door at (12, 8) which is currently closed
Room 3 connect to Room 4 by grey door at (6, 11) whi

# Merge the data based on a split.

In [7]:
import random
from datasets import load_from_disk, Dataset

# Define dataset paths
dataset_paths = {
    "easy": "./datasets/robot_LLM_grid_dataset_10k/easy/",
    "intermediate": "./datasets/robot_LLM_grid_dataset_10k/intermediate/",
    "hard": "./datasets/robot_LLM_grid_dataset_10k/hard/"
}

# Define contribution percentages (must sum to 1.0)
contribution_percentages = {
    "easy": 0.85,         # 40% from easy
    "intermediate": 0.12, # 30% from intermediate
    "hard": 0.03          # 30% from hard
}

# Load datasets
datasets = {key: load_from_disk(path) for key, path in dataset_paths.items()}

# Determine total dataset size
total_size = sum(len(ds) for ds in datasets.values())
target_size = min(len(ds) for ds in datasets.values())  # Use smallest dataset as a baseline
target_size = 3000

# Calculate samples per dataset
sample_sizes = {key: int(target_size * percentage) for key, percentage in contribution_percentages.items()}

# Sample and merge datasets
mixed_data = []
for key, dataset in datasets.items():
    sampled_data = dataset.shuffle(seed=42).select(range(sample_sizes[key]))  # Random sampling
    mixed_data.extend(sampled_data)

# Convert to Hugging Face Dataset
mixed_dataset = Dataset.from_list(mixed_data)

# Shuffle final dataset
mixed_dataset = mixed_dataset.shuffle(seed=42)

# Save the mixed dataset (optional)
mixed_dataset.save_to_disk("./datasets/robot_LLM_grid_dataset_10k/mixed_dataset")

print("Final dataset size:", len(mixed_dataset))

Saving the dataset (0/1 shards):   0%|          | 0/3000 [00:00<?, ? examples/s]

Saving the dataset (1/1 shards): 100%|██████████| 3000/3000 [00:00<00:00, 452492.52 examples/s]

Final dataset size: 3000



