# Evaluation of OpenVLA Model on LIBERO Dataset

This notebook evaluates the process of using the OpenVLA model on one task from the LIBERO dataset. The following steps are performed:

1. **Setup and Imports**: Import necessary libraries and set up the environment.
2. **Load Processor and Model**: Load the OpenVLA processor and model from HuggingFace.
3. **Load LIBERO Dataset Configuration**: Load the configuration for the LIBERO dataset.
4. **Prepare Datasets**: Prepare datasets from the LIBERO benchmark.
5. **Extract Sample Data and Process Inputs**: Extract a sample image and instruction from the LIBERO dataset, process the inputs using the OpenVLA processor, visualize the raw RGB image, print the raw instruction and formatted prompt, print the size of the processed input tensors, and print the OpenVLA model outputs for each step.

By running these sections sequentially, we can evaluate the whole process for one task from the LIBERO dataset, visualize the raw RGB image, print the instructions and prompts, show the input tensor sizes, and print the OpenVLA model outputs for each step.


Section 1: Load VLA huggingface Processor and Model

In [None]:
%env TRANSFORMERS_CACHE=/data2/zhaoyu/huggingface_cache
# os.environ['TRANSFORMERS_CACHE'] = '/data2/zhaoyu/huggingface_cache'
from transformers import AutoModelForVision2Seq, AutoProcessor
import torch

# Load Processor & VLA
processor = AutoProcessor.from_pretrained("openvla/openvla-7b", trust_remote_code=True)
vla = AutoModelForVision2Seq.from_pretrained(
    "openvla/openvla-7b",
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
    trust_remote_code=True
).to("cuda:2")
print(vla)

Section 2: Load LIBERO Demonstration Data

In [None]:
'''
Dataset structure:
language_instruction: a string of language instruction for the task
actions_batch: numpy array with size: (50, N, 8)
    - 50: number of demonstrations
    - N: number of actions in each demonstration
    - 8: action dimension
images_batch: numpy array with size: (50, N, 128, 128, 3)
    - 50: number of demonstrations
    - N: number of images in each demonstration
    - 128x128: image size
    - 3: RGB
'''

import os
import sys
import numpy as np

# Add VLA_DIR to PYTHONPATH
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '../')))

# Add LIBERO to PYTHONPATH
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '../external/LIBERO')))

from libero.libero import benchmark, get_libero_path
from utils.LIBERO_utils import get_task_names, extract_task_info

## User specific configurations
# TODO: change this into argparse for user input in python file
DATASET_NAME = "libero_10" # "libero_object", "libero_spatial", "libero_goal", "libero_10", "libero_90"
# currently no need to change FILTER_KEY and VERBOSE
FILTER_KEY = None  # Set filter key if needed, e.g., "valid" for validation
VERBOSE = True

## Check libero dataset path
BENCHMARK_PATH = get_libero_path("benchmark_root")
DATASET_BASE_PATH = get_libero_path("datasets")
DATASET_PATH_DEMO = os.path.join(DATASET_BASE_PATH, DATASET_NAME)
print("=====================================")
print("LIBERO benchmark root path: ", BENCHMARK_PATH)
print("LIBERO dataset root path: ", DATASET_BASE_PATH)
print(f"LIBERO demonstration dataset for {DATASET_NAME} path: {DATASET_PATH_DEMO}")
print("=====================================")

## Load demonstration dataset
# get all task names in the dataset
task_names_demo = get_task_names(DATASET_PATH_DEMO)
# print(f"Tasks in the demonstration dataset: {task_names_demo}")
# load demonstration data for each task
dataset_demo = {}
print("Start loading demonstration data for each task...")
print("-------------------------------------")
for task_name_demo in task_names_demo:
    print(f"Loading demonstration data for task:\n {task_name_demo}")
    [language_instruction, actions_batch, images_batch] = extract_task_info(DATASET_PATH_DEMO, task_name_demo, filter_key=FILTER_KEY, verbose=VERBOSE)
    dataset_demo[task_name_demo] = [language_instruction, actions_batch, images_batch]
    # check if actions_batch and images_batch have the same length
    assert actions_batch.shape[0] == images_batch.shape[0], "Dataset problem: the number of actions and images should be the same!"
    # print dataset information
    print("Loaded successfully!")
    print(f"Total demonstrations: {actions_batch.shape[0]}")
    ave_len = np.mean([len(x) for x in actions_batch]) # average length of demonstrations
    print(f"Average demonstration length: {ave_len}")
    action_shape = actions_batch[0][0].shape # action shape
    print(f"Action shape: {action_shape}")
    img_shape = images_batch[0][0].shape # image shape
    print(f"Image shape: {img_shape}")
    print("-------------------------------------")

Section 4: Train OpenVLA on LIBERO

In [None]:
# TODO

Section 5: Evaluate OpenVLA on Trained LIBERO Dataset

In [3]:
%env MUJOCO_GL=osmesa
import os
import sys
import numpy as np
import random
import matplotlib.pyplot as plt

# Add VLA_DIR to PYTHONPATH
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '../')))

# Add LIBERO to PYTHONPATH
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '../external/LIBERO')))

from libero.libero.utils.time_utils import Timer
from libero.libero.utils.video_utils import VideoWriter
# from libero.lifelong.metric import (
#     evaluate_loss,
#     evaluate_success,
#     raw_obs_to_tensor_obs,
# )
# from libero.lifelong.utils import (
#     control_seed,
#     safe_device,
#     torch_load_model,
#     NpEncoder,
#     compute_flops,
# )
from libero.libero import benchmark, get_libero_path
from libero.libero.envs import OffScreenRenderEnv, SubprocVectorEnv

from utils.LIBERO_utils import get_task_names, extract_task_info, extract_env_obs

## User specific configurations
# TODO: change this into argparse for user input in python file
DATASET_NAME = "libero_10" # "libero_object", "libero_spatial", "libero_goal", "libero_10", "libero_90"
SAVE_VIDEO = False # save video of the evaluation process
EVAL_MAX_STEP = 500 # maximum number of steps for evaluation
DEVICE_ID = 2 # GPU device id for rendering

## Check evaluation configureations path
BDDL_FILES_BASE_PATH = get_libero_path("bddl_files")
INIT_STATES_BASE_PATH = get_libero_path("init_states")
VIDEO_FOLDER = "../videos"
print("=====================================")
print("LIBERO evaluation BDDL files path: ", BDDL_FILES_BASE_PATH)
print("LIBERO evaluation initial states path: ", INIT_STATES_BASE_PATH)
if SAVE_VIDEO:
    print("Video saving to: ", VIDEO_FOLDER)
print("=====================================")


## Load evaluation environment
benchmark_dict = benchmark.get_benchmark_dict()
benchmark_instance = benchmark_dict[DATASET_NAME]()
# num_tasks_eval = benchmark_instance.get_num_tasks()
task_names_eval = benchmark_instance.get_task_names()
print(f"Task names for evaluation: {task_names_eval}")
# print(f"Task name for training: {task_names_demo}")

# Evaluate the model
task_id = 2
task = benchmark_instance.get_task(task_id)

with Timer() as t, VideoWriter(VIDEO_FOLDER, SAVE_VIDEO) as video_writer:
    env_args = {
        "bddl_file_name": os.path.join(
            BDDL_FILES_BASE_PATH, task.problem_folder, task.bddl_file
        ),
        "render_gpu_device_id": DEVICE_ID
    }

    env_num = 3
    env = SubprocVectorEnv(
        [lambda: OffScreenRenderEnv(**env_args) for _ in range(env_num)]
    )
        
    env.reset()
    env.seed(random.randint(0, 1000))

    init_states_path = os.path.join(
        INIT_STATES_BASE_PATH, task.problem_folder, task.init_states_file
    )
    init_states = torch.load(init_states_path)
    indices = np.arange(env_num) % init_states.shape[0]
    init_states_ = init_states[indices]

    dones = [False] * env_num
    steps = 0
    obs = env.set_init_state(init_states_)
    print("Initial RGB observations:")
    stacked_image = np.hstack([obs[k]["agentview_image"][::-1] for k in range(env_num)])
    plt.imshow(stacked_image)
    plt.axis('off')
    plt.show()

    num_success = 0
    for _ in range(5):  # simulate the physics without any actions
        obs, reward, done, info = env.step(np.zeros((env_num, 7)))
        
    print("RGB observations after 5 steps:")
    stacked_image = np.hstack([obs[k]["agentview_image"][::-1] for k in range(env_num)])
    plt.imshow(stacked_image)
    plt.axis('off')
    plt.show()

#     with torch.no_grad():
#         while steps < EVAL_MAX_STEP:
#             steps += 1

#             # get current observation
            
#             data = extract_env_obs(obs, DEVICE_ID)
#             print(data)
#             input("PAUSE")
#             # actions = algo.policy.get_action(data)
#             obs, reward, done, info = env.step(actions)
#             video_writer.append_vector_obs(
#                 obs, dones, camera_name="agentview_image"
#             )

#             # check whether succeed
#             for k in range(env_num):
#                 dones[k] = dones[k] or done[k]
#             if all(dones):
#                 break

#         for k in range(env_num):
#             num_success += int(dones[k])

#     success_rate = num_success / env_num
#     env.close()

#     eval_stats = {
#         "loss": test_loss,
#         "success_rate": success_rate,
#     }

#     os.system(f"mkdir -p {args.save_dir}")
#     torch.save(eval_stats, save_folder)
# print(
#     f"[info] finish for ckpt at {run_folder} in {t.get_elapsed_time()} sec for rollouts"
# )
# print(f"Results are saved at {save_folder}")
# print(test_loss, success_rate)


env: MUJOCO_GL=osmesa


NameError: name 'DATASET_NAME' is not defined