# Evaluation of OpenVLA Model on LIBERO Dataset

This notebook evaluates the process of using the OpenVLA model on one task from the LIBERO dataset. The following steps are performed:

1. **Setup and Imports**: Import necessary libraries and set up the environment.
2. **Load Processor and Model**: Load the OpenVLA processor and model from HuggingFace.
3. **Load LIBERO Dataset Configuration**: Load the configuration for the LIBERO dataset.
4. **Prepare Datasets**: Prepare datasets from the LIBERO benchmark.
5. **Extract Sample Data and Process Inputs**: Extract a sample image and instruction from the LIBERO dataset, process the inputs using the OpenVLA processor, visualize the raw RGB image, print the raw instruction and formatted prompt, print the size of the processed input tensors, and print the OpenVLA model outputs for each step.

By running these sections sequentially, we can evaluate the whole process for one task from the LIBERO dataset, visualize the raw RGB image, print the instructions and prompts, show the input tensor sizes, and print the OpenVLA model outputs for each step.


Section 1: Setup and Imports

In [1]:
import os
import sys

# Add VLA_DIR to PYTHONPATH
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '../')))

# Add LIBERO to PYTHONPATH
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '../external/LIBERO')))

import yaml
import torch
import matplotlib.pyplot as plt
from PIL import Image
from easydict import EasyDict
from transformers import AutoModelForVision2Seq, AutoProcessor
from libero.libero import benchmark, get_libero_path
from utils.LIBERO_utils import get_task_names, extract_task_info

  from .autonotebook import tqdm as notebook_tqdm


Section 2: Load Processor and Model

In [None]:
# Load Processor & VLA
processor = AutoProcessor.from_pretrained("openvla/openvla-7b", trust_remote_code=True)
vla = AutoModelForVision2Seq.from_pretrained(
    "openvla/openvla-7b",
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
    trust_remote_code=True
).to("cuda:1")

Section 3: Load LIBERO Demonstration Data

In [2]:
# Check dataset path
BENCHMARK_PATH = get_libero_path("benchmark_root")
DATASET_BASE_PATH = get_libero_path("datasets")
print("Default benchmark root path: ", BENCHMARK_PATH)
print("Default dataset root path: ", DATASET_BASE_PATH)

# Select a dataset
DATASET_NAME = "libero_spatial"
FILTER_KEY = None  # Set filter key if needed, e.g., "valid" for validation
VERBOSE = True
dataset_path_demo = os.path.join(DATASET_BASE_PATH, DATASET_NAME)
print(f"Dataset path: {dataset_path_demo}")

# Load dataset
# use a dictionary to store demonstration data for each task
demonstration_data = {}
# get all task names in the dataset
task_names_demo = get_task_names(dataset_path_demo)
# get demonstration data for each task

# for task_name_demo in task_names_demo:
#     print(f"Loading demonstration data for task: {task_name_demo}")
#     [language_instruction, actions_batch, images_batch] = extract_task_info(dataset_path_demo, task_name, filter_key=FILTER_KEY, verbose=VERBOSE)
#     demonstration_data[task_name_demo] = [language_instruction, actions_batch, images_batch]

Default benchmark root path:  /home/zhaoyu/Workspace/VLA-Continual-Learning/external/LIBERO/libero/libero
Default dataset root path:  /data2/zhaoyu/LIBERO_dataset/datasets
Dataset path: /data2/zhaoyu/LIBERO_dataset/datasets/libero_spatial


Section 4: Train OpenVLA on LIBERO

In [None]:
# TODO

Section 5: Evaluate OpenVLA on Trained LIBERO Dataset

In [4]:
%env MUJOCO_GL=osmesa
import time
import numpy as np
import random
from libero.libero.utils.time_utils import Timer
from libero.libero.utils.video_utils import VideoWriter
from utils.LIBERO_utils import get_task_names, extract_task_info
from libero.lifelong.metric import (
    evaluate_loss,
    evaluate_success,
    raw_obs_to_tensor_obs,
)
from libero.lifelong.utils import (
    control_seed,
    safe_device,
    torch_load_model,
    NpEncoder,
    compute_flops,
)
from libero.libero.envs import OffScreenRenderEnv, SubprocVectorEnv


BDDL_FILES_BASE_PATH = get_libero_path("bddl_files")
INIT_STATES_BASE_PATH = get_libero_path("init_states")
VIDEO_FOLDER = "../videos"
SAVE_VIDEO = False

EVAL_MAX_STEP = 500

# Load evaluation dataset
# task_names_demo = list(demonstration_data.keys())
benchmark_dict = benchmark.get_benchmark_dict()
benchmark_instance = benchmark_dict[DATASET_NAME]()
# num_tasks_eval = benchmark_instance.get_num_tasks()
task_names_eval = benchmark_instance.get_task_names()
print(f"Task names for evaluation: {task_names_eval}")
# print(f"Task name for training: {task_names_demo}")

# Evaluate the model
task_id = 0
task = benchmark_instance.get_task(task_id)

with Timer() as t, VideoWriter(VIDEO_FOLDER, SAVE_VIDEO) as video_writer:
    env_args = {
        "bddl_file_name": os.path.join(
            BDDL_FILES_BASE_PATH, task.problem_folder, task.bddl_file
        ),
        "render_gpu_device_id": 2
    }

    env_num = 1
    env = SubprocVectorEnv(
        [lambda: OffScreenRenderEnv(**env_args) for _ in range(env_num)]
    )
        
    env.reset()
    env.seed(0)

    init_states_path = os.path.join(
        INIT_STATES_BASE_PATH, task.problem_folder, task.init_states_file
    )
    init_states = torch.load(init_states_path)
    indices = np.arange(env_num) % init_states.shape[0]
    init_states_ = init_states[indices]

    dones = [False] * env_num
    steps = 0
    obs = env.set_init_state(init_states_)
    print(obs)

#     num_success = 0
#     for _ in range(5):  # simulate the physics without any actions
#         env.step(np.zeros((env_num, 7)))

#     with torch.no_grad():
#         while steps < EVAL_MAX_STEP:
#             steps += 1

#             # get current observation
            
            
#             data = raw_obs_to_tensor_obs(obs, task_emb, cfg)
#             actions = algo.policy.get_action(data)
#             obs, reward, done, info = env.step(actions)
#             video_writer.append_vector_obs(
#                 obs, dones, camera_name="agentview_image"
#             )

#             # check whether succeed
#             for k in range(env_num):
#                 dones[k] = dones[k] or done[k]
#             if all(dones):
#                 break

#         for k in range(env_num):
#             num_success += int(dones[k])

#     success_rate = num_success / env_num
#     env.close()

#     eval_stats = {
#         "loss": test_loss,
#         "success_rate": success_rate,
#     }

#     os.system(f"mkdir -p {args.save_dir}")
#     torch.save(eval_stats, save_folder)
# print(
#     f"[info] finish for ckpt at {run_folder} in {t.get_elapsed_time()} sec for rollouts"
# )
# print(f"Results are saved at {save_folder}")
# print(test_loss, success_rate)


env: MUJOCO_GL=osmesa
[info] using task orders [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Task names for evaluation: ['pick_up_the_black_bowl_between_the_plate_and_the_ramekin_and_place_it_on_the_plate', 'pick_up_the_black_bowl_next_to_the_ramekin_and_place_it_on_the_plate', 'pick_up_the_black_bowl_from_table_center_and_place_it_on_the_plate', 'pick_up_the_black_bowl_on_the_cookie_box_and_place_it_on_the_plate', 'pick_up_the_black_bowl_in_the_top_drawer_of_the_wooden_cabinet_and_place_it_on_the_plate', 'pick_up_the_black_bowl_on_the_ramekin_and_place_it_on_the_plate', 'pick_up_the_black_bowl_next_to_the_cookie_box_and_place_it_on_the_plate', 'pick_up_the_black_bowl_on_the_stove_and_place_it_on_the_plate', 'pick_up_the_black_bowl_next_to_the_plate_and_place_it_on_the_plate', 'pick_up_the_black_bowl_on_the_wooden_cabinet_and_place_it_on_the_plate']


-------init offscreen--------
-------init 1--------
-------init 2--------
-------init 3--------
-------init 4--------
-------init--------
-------init mujoco--------
CREATED CON


  @overload(np.MachAr)


-------init offscreen--------
-------init 1--------
-------init 2--------
-------init 3--------
-------init 4--------
-------init--------
-------init mujoco--------
CREATED CON


AttributeError: 'numpy.ndarray' object has no attribute 'keys'