In [9]:
import os
import json
import joblib
import numpy as np
from tqdm import tqdm
from glob import glob
from PIL import Image

In [10]:
task = "pick_red"

load_dir = f"../data/teleop"
data_path_list = glob(os.path.join(load_dir, task, "*.gz"), recursive=True)
print("Found {} data files".format(len(data_path_list)))

save_dir = f"../data/llava"

Found 79 data files


In [11]:
data = joblib.load(data_path_list[0])
data[0].keys()

dict_keys(['lowdim_ee', 'lowdim_qpos', '207322251049_rgb', '207322251049_depth'])

In [12]:
data[0]["207322251049_rgb"].shape, data[1].shape

((55, 480, 640, 3), (55, 7))

In [13]:
action_values_list = []

# load data
for data_path in tqdm(data_path_list[:3]):
    data = joblib.load(data_path)
    for action in data[1]:
        action_values_list.append(action)

# get min max from action_values_list for each dimension
action_values_list = np.stack(action_values_list, axis=0)
action_values_list = np.array(action_values_list)
min_max_list = []
for i in range(action_values_list.shape[1]):
    min_max_list.append([np.min(action_values_list[:,i]), np.max(action_values_list[:,i])])
min_max_list

100%|██████████| 3/3 [00:00<00:00,  3.76it/s]


[[-0.0266508311209918, 0.016585686950799317],
 [-0.010962512964752837, 0.020432407985895924],
 [-0.03641561409442955, 0.04720275719556884],
 [-0.024069496178814466, 0.028907196681839362],
 [-0.05346590574651613, 0.010457678666933945],
 [-0.03035266875860437, 0.048358356813292515],
 [-0.004282234822177378, 1.0]]

In [14]:
min_max_list = [[-1.0, 1.0]]*7
min_max_list

[[-1.0, 1.0],
 [-1.0, 1.0],
 [-1.0, 1.0],
 [-1.0, 1.0],
 [-1.0, 1.0],
 [-1.0, 1.0],
 [-1.0, 1.0]]

In [15]:
def discretize_to_bins(data, min_value, max_value, num_bins=256):
    # Calculate the bin size
    bin_size = (max_value - min_value) / num_bins
    # Assign each data point to a bin
    binned_data = np.floor((data - min_value) / bin_size).astype(int)
    # Clip the values to handle the maximum value
    binned_data = np.clip(binned_data, 0, num_bins - 1)
    return binned_data

def discretize(action_values, min_max_lst):
    new_action_values = []
    for i in range(len(action_values)):
        new_action_values.append(discretize_to_bins(action_values[i], min_value = min_max_lst[i][0], max_value = min_max_lst[i][1]))
    return new_action_values

In [16]:
instruction = "pick up the red cube"

tot_agentview_entries = []
# load data
for data_path in tqdm(data_path_list):
    data = joblib.load(data_path)

    agentview_entries = []
    max_step = len(data[0]["lowdim_ee"])
    for step in range(max_step):
        episode = data_path.split("/")[-1].split(".")[0]
        
        # uncomment if you want to save images as jpg
        for data_type in ["rgb"]: # ["depth"]
            id_ = "{}/episode_{}/{}/step_{}.jpg".format(task, episode, data_type, step)
            agentview_img = data[0][f"207322251049_{data_type}"][step]

            # if data_type == "depth":
            #     agentview_img = np.repeat(agentview_img[...,None], 3, axis=-1) / 1000.0
            #     agentview_img = agentview_img.astype(np.uint8)

            agentview_image_path = os.path.join(save_dir, task, id_)
            if not os.path.exists(os.path.dirname(agentview_image_path)):
                os.makedirs(os.path.dirname(agentview_image_path))
            Image.fromarray(agentview_img).save(agentview_image_path)

        terminate = 0 if step < max_step - 1 else 1

        # action_values = np.concatenate((delta_pos, delta_rot))
        action_values = data[1][step]
        av = discretize(action_values, min_max_list)
        action_text = f"{terminate} {av[0]} {av[1]} {av[2]} {av[3]} {av[4]} {av[5]} {av[6]}"

        agentview_entry = {
            "id": id_,
            "image": agentview_image_path,
            "conversations": [
            {
                "from": "human",
                "value": f"What action should the robot take to `{instruction}`"
            },
            {
                "from": "gpt",
                "value": action_text,
                "raw_actions": action_values.tolist()
            },
            ]
        }

        agentview_entries.append(agentview_entry)
        tot_agentview_entries.append(agentview_entry)

with open('{}/llava_agentview_{}_{}.json'.format(os.path.join(save_dir, task), data_type, task), 'w') as file:
    json.dump(agentview_entries, file)
with open('{}/llava_agentview_tot.json'.format(os.path.join(save_dir, task)), 'w') as file:
    json.dump(tot_agentview_entries, file)
print(f'finished making the dataset ready for llava training! It contains {len(tot_agentview_entries)} entries')

100%|██████████| 79/79 [00:27<00:00,  2.87it/s]

finished making the dataset ready for llava training! It contains 4355 entries



