# Fine-tune OpenVLA with huggingface parameter efficient tuning method on LIBERO dataset

Load LIBERO demonstration dataset

In [1]:
'''
Dataset structure:
language_instruction: a string of language instruction for the task
actions_batch: numpy array with size: (50, N, 8)
    - 50: number of demonstrations
    - N: number of actions in each demonstration
    - 8: action dimension
images_batch: numpy array with size: (50, N, 128, 128, 3)
    - 50: number of demonstrations
    - N: number of images in each demonstration
    - 128x128: image size
    - 3: RGB
'''

import os
import sys
import numpy as np
# Add VLA_DIR to PYTHONPATH
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '../')))
# Add LIBERO to PYTHONPATH
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '../external/LIBERO')))
from libero.libero import benchmark, get_libero_path
from utils.LIBERO_utils import get_task_names, extract_task_info

## User specific configurations
# TODO: change this into argparse for user input in python file
DATASET_NAME = "libero_10" # "libero_object", "libero_spatial", "libero_goal", "libero_10", "libero_90"
# currently no need to change FILTER_KEY and VERBOSE
FILTER_KEY = None  # Set filter key if needed, e.g., "valid" for validation
VERBOSE = True

## Check libero dataset path
BENCHMARK_PATH = get_libero_path("benchmark_root")
DATASET_BASE_PATH = get_libero_path("datasets")
DATASET_PATH_DEMO = os.path.join(DATASET_BASE_PATH, DATASET_NAME)
print("=====================================")
print("LIBERO benchmark root path: ", BENCHMARK_PATH)
print("LIBERO dataset root path: ", DATASET_BASE_PATH)
print(f"LIBERO demonstration dataset for {DATASET_NAME} path: {DATASET_PATH_DEMO}")
print("=====================================")

## Load demonstration dataset
# get all task names in the dataset
task_names_demo = get_task_names(DATASET_PATH_DEMO)
# print(f"Tasks in the demonstration dataset: {task_names_demo}")
# load demonstration data for each task
dataset_demo = {}
print("Start loading demonstration data for each task...")
print("-------------------------------------")
for task_name_demo in task_names_demo:
    print(f"Loading demonstration data for task:\n {task_name_demo}")
    [language_instruction, actions_batch, images_batch] = extract_task_info(DATASET_PATH_DEMO, task_name_demo, filter_key=FILTER_KEY, verbose=VERBOSE)
    dataset_demo[task_name_demo] = [language_instruction, actions_batch, images_batch]
    # check if actions_batch and images_batch have the same length
    assert actions_batch.shape[0] == images_batch.shape[0], "Dataset problem: the number of actions and images should be the same!"
    # print dataset information
    print("Loaded successfully!")
    print(f"Total demonstrations: {actions_batch.shape[0]}")
    ave_len = np.mean([len(x) for x in actions_batch]) # average length of demonstrations
    print(f"Average demonstration length: {ave_len}")
    action_shape = actions_batch[0][0].shape # action shape
    print(f"Action shape: {action_shape}")
    img_shape = images_batch[0][0].shape # image shape
    print(f"Image shape: {img_shape}")
    print("-------------------------------------")

LIBERO benchmark root path:  /home/zhaoyu/Workspace/VLA-Continual-Learning/external/LIBERO/libero/libero
LIBERO dataset root path:  /data2/zhaoyu/LIBERO_dataset/datasets
LIBERO demonstration dataset for libero_10 path: /data2/zhaoyu/LIBERO_dataset/datasets/libero_10
Start loading demonstration data for each task...
-------------------------------------
Loading demonstration data for task:
 LIVING_ROOM_SCENE6_put_the_white_mug_on_the_plate_and_put_the_chocolate_pudding_to_the_right_of_the_plate_demo
Loaded successfully!
Total demonstrations: 50
Average demonstration length: 255.12
Action shape: (8,)
Image shape: (128, 128, 3)
-------------------------------------
Loading demonstration data for task:
 STUDY_SCENE1_pick_up_the_book_and_place_it_in_the_back_compartment_of_the_caddy_demo
Loaded successfully!
Total demonstrations: 50
Average demonstration length: 189.4
Action shape: (8,)
Image shape: (128, 128, 3)
-------------------------------------
Loading demonstration data for task:
 LI

Convert dataset to RLDS format (required by OpenVLA finetune)

In [2]:
import pickle

## User specific configurations
# TODO: change this into argparse for user input in python file
DATASET_SAVE_PATH = "/data2/zhaoyu/LIBERO_rlds"

## Convert demonstration dataset to RLDS format
episodes = []

for task_name, (language_instruction, actions_batch, images_batch) in dataset_demo.items():
    num_demos = actions_batch.shape[0]
    
    for i in range(num_demos):
        episode = {
            'language_instruction': language_instruction,
            'steps': []
        }
        
        num_steps = actions_batch[i].shape[0]
        for j in range(num_steps):
            step = {
                'observation': {
                    'image': images_batch[i][j]
                },
                'action': actions_batch[i][j],  # action dimension is 7
                'reward': 0.0,  # Update with actual reward if available
                'is_last': (j == num_steps - 1)
            }
            episode['steps'].append(step)
        
        episodes.append(episode)

## Save the dataset as a pickle file
os.makedirs(DATASET_SAVE_PATH, exist_ok=True)
output_path = os.path.join(DATASET_SAVE_PATH, f'{DATASET_NAME}.pkl')
with open(output_path, 'wb') as f:
    pickle.dump(episodes, f)

print(f"Dataset converted and saved to {output_path}")

Dataset converted and saved to /data2/zhaoyu/LIBERO_rlds/libero_10.pkl


## Load dataset and finetune

Imports and path

In [1]:
%env TRANSFORMERS_CACHE=/data2/zhaoyu/huggingface_cache
%env TOKENIZERS_PARALLELISM = false
#os.environ["TOKENIZERS_PARALLELISM"] = "false"
# os.environ['TRANSFORMERS_CACHE'] = '/data2/zhaoyu/huggingface_cache'

## Imports
import os
import sys
import numpy as np
import torch
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.optim import AdamW
from torch.utils.data import DataLoader
from transformers import AutoModelForVision2Seq, AutoProcessor
from transformers import BitsAndBytesConfig
from accelerate import PartialState
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
import wandb
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '../')))
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '../external/LIBERO')))
from libero.libero import benchmark, get_libero_path
from utils.LIBERO_utils import get_task_names, extract_task_info
from prismatic.models.backbones.llm.prompting import PurePromptBuilder, VicunaV15ChatPromptBuilder
from prismatic.util.data_utils import PaddedCollatorForActionPrediction
from prismatic.vla.action_tokenizer import ActionTokenizer
from prismatic.vla.datasets import RLDSBatchTransform, RLDSDataset
from prismatic.vla.datasets.rlds.utils.data_utils import save_dataset_statistics


## User specific configurations
# TODO: change this into argparse for user input in python file
BASE_STORAGE_PATH = '/data2/zhaoyu/LIBERO_finetune'
DATASET_SAVE_PATH = os.path.join(BASE_STORAGE_PATH, f'datasets/{DATASET_NAME}')
CHECKPOINT_PATH = os.path.join(BASE_STORAGE_PATH, f'checkpoints/{DATASET_NAME}')
LOGS_PATH = os.path.join(BASE_STORAGE_PATH, f'logs/{DATASET_NAME}')
RLDS_DATASET_PATH = os.path.join(DATASET_SAVE_PATH, f'{DATASET_NAME}.pkl')
# create directories if they do not exist
os.makedirs(DATASET_SAVE_PATH, exist_ok=True)
os.makedirs(CHECKPOINT_PATH, exist_ok=True)
os.makedirs(LOGS_PATH, exist_ok=True)
# print confirmation of environment setup
print("Environment setup complete.")
# print(f"TRANSFORMERS_CACHE set to: {os.environ['TRANSFORMERS_CACHE']}")
print(f"RLDS Dataset path: {DATASET_SAVE_PATH}")
print(f"Checkpoint path: {CHECKPOINT_PATH}")
print(f"Logs path: {LOGS_PATH}")

env: TRANSFORMERS_CACHE=/data2/zhaoyu/huggingface_cache


  from .autonotebook import tqdm as notebook_tqdm


ModuleNotFoundError: No module named 'prismatic.models'

Training configs

In [None]:
## Finetune configuration
@dataclass
class FinetuneConfig:
    # Training hyperparameters
    batch_size: int = 12  # Batch size for training, adjust based on GPU memory
    epochs: int = 10  # Number of training epochs
    learning_rate: float = 5e-5  # Learning rate for the optimizer
    lora_rank: int = 32  # LoRA rank for low-rank adaptation
    target_modules: str = "all-linear"  # Target modules for LoRA

    # Distributed training settings
    world_size: int = torch.cuda.device_count()  # Number of GPUs available for training
    rank: int = int(os.getenv('RANK', 0))  # Rank of the current process
    local_rank: int = int(os.getenv('LOCAL_RANK', 0))  # Local rank of the current process

    # Logging configurations
    wandb_project: str = "OpenVLA_Finetuning"  # Project name for Weights & Biases logging
    wandb_run_name: str = f"finetune_{DATASET_NAME}"  # Run name for Weights & Biases logging

# Initialize the configuration
finetune_config = FinetuneConfig()

# Print configuration summary
print("Configuration parameters set:")
print(f"Batch size: {finetune_config.batch_size}")
print(f"Epochs: {finetune_config.epochs}")
print(f"Learning rate: {finetune_config.learning_rate}")
print(f"LoRA rank: {finetune_config.lora_rank}")
print(f"Target modules: {finetune_config.target_modules}")
print(f"World size (number of GPUs): {finetune_config.world_size}")
print(f"Rank: {finetune_config.rank}")
print(f"Local rank: {finetune_config.local_rank}")
print(f"WANDB project: {finetune_config.wandb_project}")
print(f"WANDB run name: {finetune_config.wandb_run_name}")

Load and preprocess dataset

In [None]:
import pickle
from torch.utils.data import Dataset

## Custom dataset to handle RLDS formatted LIBERO data
class RLDataset(Dataset):
    def __init__(self, data_path):
        with open(data_path, 'rb') as f:
            self.data = pickle.load(f)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        episode = self.data[idx]
        language_instruction = episode['language_instruction']
        steps = episode['steps']
        return language_instruction, steps
    
## Load the RLDS formatted LIBERO dataset
dataset_path = RLDS_DATASET_PATH
print(f"Loading dataset from {dataset_path}")
rlds_dataset = RLDataset(dataset_path)
print(f"Loaded {len(rlds_dataset)} episodes")

## Collator function to prepare batches
def collate_fn(batch):
    language_instructions = [item[0] for item in batch]
    steps = [item[1] for item in batch]
    images = [step['observation']['image'] for episode in steps for step in episode]
    actions = [step['action'] for episode in steps for step in episode]
    rewards = [step['reward'] for episode in steps for step in episode]
    is_last = [step['is_last'] for episode in steps for step in episode]
    
    # Convert to PyTorch tensors
    images = torch.tensor(images, dtype=torch.float32)
    actions = torch.tensor(actions, dtype=torch.float32)
    rewards = torch.tensor(rewards, dtype=torch.float32)
    is_last = torch.tensor(is_last, dtype=torch.bool)

    return {
        'language_instructions': language_instructions,
        'images': images,
        'actions': actions,
        'rewards': rewards,
        'is_last': is_last
    }

## Create DataLoader for the dataset
data_loader = DataLoader(
    rlds_dataset,
    batch_size=finetune_config.batch_size,
    shuffle=True,
    collate_fn=collate_fn
)

## Print a batch to verify
for batch in data_loader:
    print(batch)
    break