In [1]:
import os

folderIn = 'calandradaraset/'
extension_in = 'h5'
namefiles = [each for each in os.listdir(folderIn) if each.endswith(extension_in)]
namefiles.sort()
print('Name files: %s' % namefiles)
print('Number files: %d' % len(namefiles))
# list of indexes of the files to load
idxs = [i for i in range(38)]
# create a list of filename that append the above indexes to 'calandra_corl2017_'
namefiles = ['calandra_corl2017_%03d.h5' % idx for idx in idxs]
print('Name files: %s' % namefiles)

Name files: ['calandra_corl2017_000.h5', 'calandra_corl2017_001.h5', 'calandra_corl2017_002.h5', 'calandra_corl2017_003.h5', 'calandra_corl2017_004.h5', 'calandra_corl2017_005.h5', 'calandra_corl2017_006.h5', 'calandra_corl2017_007.h5', 'calandra_corl2017_008.h5', 'calandra_corl2017_009.h5', 'calandra_corl2017_010.h5', 'calandra_corl2017_011.h5', 'calandra_corl2017_012.h5', 'calandra_corl2017_013.h5', 'calandra_corl2017_014.h5', 'calandra_corl2017_015.h5', 'calandra_corl2017_016.h5', 'calandra_corl2017_017.h5', 'calandra_corl2017_018.h5', 'calandra_corl2017_019.h5', 'calandra_corl2017_020.h5', 'calandra_corl2017_021.h5', 'calandra_corl2017_022.h5', 'calandra_corl2017_023.h5', 'calandra_corl2017_024.h5', 'calandra_corl2017_025.h5', 'calandra_corl2017_026.h5', 'calandra_corl2017_027.h5', 'calandra_corl2017_028.h5', 'calandra_corl2017_029.h5', 'calandra_corl2017_030.h5', 'calandra_corl2017_031.h5', 'calandra_corl2017_032.h5', 'calandra_corl2017_033.h5', 'calandra_corl2017_034.h5', 'caland

The script below takes the data from the h5 files and saves the images in the corresponding folders. The images are saved in the following format: object_name/sensor/<object_name>_<is_gripping>_<sensor>_<phase>_idx.png. The script takes several minutes to execute.

In [10]:
"""
This script takes several minutes to execute. 
Approximately 5 minutes per h5 file on a 5.8 GHz i9-13900K with 128gb ram, 
i.e. apprx 3hours for all 38 files.
"""
import os
import deepdish as dd
import matplotlib.pyplot as plt
import time

folderOut = 'calandra_objects/'
if not os.path.exists(folderOut):
    os.makedirs(folderOut)

start_time = time.time()

for namefile in namefiles:
    # Load data from file
    print('Loading file: %s' % namefile)
    t = dd.io.load(folderIn+namefile)
    n_data = len(t)
    print("N data: %d" % n_data)
    
    # iterate over the data, get the object name and the is_gripping flag per sample
    for i in range(n_data):
        object_name = t[i]['object_name']
        is_gripping = t[i]['is_gripping']
        # convert the object name to string
        object_name = object_name.decode('utf-8')
        
        # create the directory for the object if it does not exist
        if not os.path.exists(folderOut+object_name):
            os.makedirs(folderOut+object_name)
            
        # create the directory for the kinectA_rgb_before if it does not exist
        if not os.path.exists(folderOut+object_name+'/kinectA_rgb'):
            os.makedirs(folderOut+object_name+'/kinectA_rgb')
        if not os.path.exists(folderOut+object_name+'/gelsightA'):
            os.makedirs(folderOut+object_name+'/gelsightA')
        if not os.path.exists(folderOut+object_name+'/gelsightB'):
            os.makedirs(folderOut+object_name+'/gelsightB')
            
        # find the biggest index that a sample has on one of the directories above
        idx = 0
        while os.path.exists(folderOut+object_name+'/'+'kinectA_rgb/'+object_name+'_'+('success' if is_gripping else 'failure')+'_'+'kinectA_rgb_before'+'_'+str(idx)+'.png'):
            idx += 1
        
        # save the images in the corresponding directory
        plt.imsave(folderOut+object_name+'/'+'kinectA_rgb/'+object_name+'_'+('success' if is_gripping else 'failure')+'_'+'kinectA_rgb_before'+'_'+str(idx)+'.png', t[i]['kinectA_rgb_before'])
        plt.imsave(folderOut+object_name+'/'+'kinectA_rgb/'+object_name+'_'+('success' if is_gripping else 'failure')+'_'+'kinectA_rgb_during'+'_'+str(idx)+'.png', t[i]['kinectA_rgb_during'])
        plt.imsave(folderOut+object_name+'/'+'kinectA_rgb/'+object_name+'_'+('success' if is_gripping else 'failure')+'_'+'kinectA_rgb_after'+'_'+str(idx)+'.png', t[i]['kinectA_rgb_after'])
        plt.imsave(folderOut+object_name+'/'+'gelsightA/'+object_name+'_'+('success' if is_gripping else 'failure')+'_'+'gelsightA_before'+'_'+str(idx)+'.png', t[i]['gelsightA_before'])
        plt.imsave(folderOut+object_name+'/'+'gelsightA/'+object_name+'_'+('success' if is_gripping else 'failure')+'_'+'gelsightA_during'+'_'+str(idx)+'.png', t[i]['gelsightA_during'])
        plt.imsave(folderOut+object_name+'/'+'gelsightA/'+object_name+'_'+('success' if is_gripping else 'failure')+'_'+'gelsightA_after'+'_'+str(idx)+'.png', t[i]['gelsightA_after'])
        plt.imsave(folderOut+object_name+'/'+'gelsightB/'+object_name+'_'+('success' if is_gripping else 'failure')+'_'+'gelsightB_before'+'_'+str(idx)+'.png', t[i]['gelsightB_before'])
        plt.imsave(folderOut+object_name+'/'+'gelsightB/'+object_name+'_'+('success' if is_gripping else 'failure')+'_'+'gelsightB_during'+'_'+str(idx)+'.png', t[i]['gelsightB_during'])
        plt.imsave(folderOut+object_name+'/'+'gelsightB/'+object_name+'_'+('success' if is_gripping else 'failure')+'_'+'gelsightB_after'+'_'+str(idx)+'.png', t[i]['gelsightB_after'])
        
        # print progress alongside the time it took to execute
        if i % 50 == 0:
            print("Progress: %d/%d" % (i, n_data))
            print("Time elapsed: %d seconds" % (time.time() - start_time))
            
print("Done!")
print("Time elapsed: %d seconds" % (time.time() - start_time))

Loading file: calandra_corl2017_000.h5
N data: 250
Progress: 0/250
Time elapsed: 20 seconds
Progress: 50/250
Time elapsed: 90 seconds
Progress: 100/250
Time elapsed: 160 seconds
Progress: 150/250
Time elapsed: 230 seconds
Progress: 200/250
Time elapsed: 299 seconds
Loading file: calandra_corl2017_001.h5
N data: 250
Progress: 0/250
Time elapsed: 388 seconds
Progress: 50/250
Time elapsed: 459 seconds
Progress: 100/250
Time elapsed: 529 seconds
Progress: 150/250
Time elapsed: 600 seconds
Progress: 200/250
Time elapsed: 671 seconds
Loading file: calandra_corl2017_002.h5
N data: 250
Progress: 0/250
Time elapsed: 761 seconds
Progress: 50/250
Time elapsed: 831 seconds
Progress: 100/250
Time elapsed: 902 seconds
Progress: 150/250
Time elapsed: 971 seconds
Progress: 200/250
Time elapsed: 1042 seconds
Loading file: calandra_corl2017_003.h5
N data: 250
Progress: 0/250
Time elapsed: 1133 seconds
Progress: 50/250
Time elapsed: 1202 seconds
Progress: 100/250
Time elapsed: 1271 seconds
Progress: 150/

Remove objects that have only success or failure samples

In [None]:
# check if an object has only success or failure samples and delete this object folder
import os
import shutil

folderOut = 'calandra_objects/'
objects = [each for each in os.listdir(folderOut) if os.path.isdir(folderOut+each)]
for obj in objects:
    success = [each for each in os.listdir(folderOut+obj) if each.endswith('success')]
    failure = [each for each in os.listdir(folderOut+obj) if each.endswith('failure')]
    if len(success) == 0 or len(failure) == 0:
        shutil.rmtree(folderOut+obj)
        print("Removed object: %s" % obj)

Use the train and test object lists to split the data into train and test sets object_wise  

In [None]:
import os
import shutil

folderOut = 'calandra_objects_split_object_wise/'
train_objects = ['cinnamon', 'potato', 'aspirin', 'red_turtle', 'happy_fall_stone', 'monster_truck', 'soft_red_cube', 'lime', 'ogx_shampoo', 'peanut_butter', 'chocolate_shake', 'webcam_box', 'rubics_cube', 'muffin', 'red_bull', 'purple_small_plastic_fruit', 'onion', 'soft_blue_hexagon', 'plastic_cow', 'fox_head', 'small_coffe_cup', 'lemon', 'red_apple', '3d_printed_white_ball', 'set_small_plastic_men_police_man', 'dog_toy_ice_cream_cone', 'playdoh_container', 'bandaid_box', 'emergency_stop_button_for_sawyer', 'plastic_duck', 'brown_paper_cup_2_upside', 'mentos_gum_can']
test_objects = ['international_travel_adapter', '3d_printed_blue_connector', 'soft_zebra', 'fake_flower_in_pot', 'metal_can', 'french_dip', 'metal_cylinder_with_holes', 'soft_beer_bottle_holder']

if not os.path.exists(folderOut):
    os.makedirs(folderOut)
if not os.path.exists(folderOut+'train'):
    os.makedirs(folderOut+'train')
if not os.path.exists(folderOut+'test'):
    os.makedirs(folderOut+'test')
if not os.path.exists(folderOut+'train'+'/kinectA_rgb'):
    os.makedirs(folderOut+'train'+'/kinectA_rgb')
if not os.path.exists(folderOut+'train'+'/gelsightA'):
    os.makedirs(folderOut+'train'+'/gelsightA')
if not os.path.exists(folderOut+'train'+'/gelsightB'):
    os.makedirs(folderOut+'train'+'/gelsightB')
if not os.path.exists(folderOut+'test'+'/kinectA_rgb'):
    os.makedirs(folderOut+'test'+'/kinectA_rgb')
if not os.path.exists(folderOut+'test'+'/gelsightA'):
    os.makedirs(folderOut+'test'+'/gelsightA')
if not os.path.exists(folderOut+'test'+'/gelsightB'):
    os.makedirs(folderOut+'test'+'/gelsightB')
    
# copy the train objects sample in the train/kinectA_rgb, train/gelsightA and train/gelsightB folders
for obj in train_objects:
    for sensor in ['kinectA_rgb', 'gelsightA', 'gelsightB']:
        # copy all samples of the object
        for each in os.listdir(folderOut+obj+'/'+sensor):
            shutil.copy(folderOut+obj+'/'+sensor+'/'+each, folderOut+'train/'+sensor+'/'+each)
            
# copy the test objects sample in the test/kinectA_rgb, test/gelsightA and test/gelsightB folders
for obj in test_objects:
    for sensor in ['kinectA_rgb', 'gelsightA', 'gelsightB']:
        # copy all samples of the object
        for each in os.listdir(folderOut+obj+'/'+sensor):
            shutil.copy(folderOut+obj+'/'+sensor+'/'+each, folderOut+'test/'+sensor+'/'+each)
