In [9]:
import pandas as pd
import shutil

def bbox_to_yolo(xmin, ymin, xmax, ymax, image_width, image_height):
    box_width = xmax - xmin
    box_height = ymax - ymin
    
    x_center = xmin + box_width / 2
    y_center = ymin + box_height / 2
    
    # Normalize by image size
    x_center_norm = x_center / image_width
    y_center_norm = y_center / image_height
    width_norm = box_width / image_width
    height_norm = box_height / image_height
    
    return x_center_norm, y_center_norm, width_norm, height_norm


def read_and_join(path_prefix):
    obj_ann = pd.read_json(path_prefix + 'object_ann.json')
    category = pd.read_json(path_prefix + 'category.json').rename(columns={"name": "category_name"})
    category['category_id'] = range(len(category))
    sample_data = pd.read_json(path_prefix + 'sample_data.json')
    attribute = pd.read_json(path_prefix + 'attribute.json')
    
    # object annotation left join sample_data
    merged = obj_ann.merge(
        sample_data,
        how = 'left',
        right_on='token',
        left_on='sample_data_token',
        suffixes=('_left', '_right')
    )

    merged = merged.merge(
        category,
        how = 'left',
        left_on='category_token',
        right_on='token',
        suffixes=('_from_merged', '_from_category')
    )

    print (len(obj_ann))
    print (len(sample_data))
    print (len(category))

    return merged

def create_image_filename_list(path_prefix, data_split, merged):
    if data_split not in ['train', 'val', 'test']:
        return -1

    df = merged[['filename']]
    filenames = set([])
    for index, row in df.iterrows(): 
        filenames.add(row['filename'])
    for fn in filenames:
        with open(data_split+".txt", 'a') as f:
            f.write(fn + "\n")
        
# create yolo label txt
def convert_to_yolo_format(path_prefix, data_split, merged):
    if data_split not in ['train', 'val', 'test']:
        return -1
    df = merged[['bbox', 'category_id', 'category_name', 'filename', 'width',  'height']]
    for index, row in df.iterrows():
        filename = row['filename'].replace('/', '-') 
        abs_path_to_label_txt = path_prefix + data_split + "/label/" + filename + '.txt' # example: /content/train/label/asdf.jpg.txt
        x_center, y_center, width, height = bbox_to_yolo(row['bbox'][0], row['bbox'][1], row['bbox'][2], row['bbox'][3], row['width'], row['height'])
        with open(abs_path_to_label_txt, 'a') as f: # append label to the txt
            f.write(str(row['category_id']) + " " + str(x_center) + " " + str(y_center) + " " + str(width) + " " + str(height) + "\n")
            
path_prefix = '/home/akbar/Downloads/nuimages-v1.0-mini(1)/'
z = read_and_join('/home/akbar/Downloads/nuimages-v1.0-mini(1)/v1.0-train/')
z.head()



506
650
25


Unnamed: 0,token_left,category_token,bbox,mask,attribute_tokens,sample_data_token,token_right,sample_token,ego_pose_token,calibrated_sensor_token,...,width,height,timestamp,is_key_frame,prev,next,token,category_name,description,category_id
0,00053e5bedec4ee68f0f24193913ed3a,6021b5187b924d64be64a702e5570edf,"[725, 293, 909, 522]","{'size': [900, 1600], 'counts': 'bWxcYzA3a2swO...",[abc0f113547848a9baaa62096fca37f5],daa5b1091bfa45da92c67e78e1dd89aa,daa5b1091bfa45da92c67e78e1dd89aa,f8ac851edaba49fdb0cf3fb713c7cb9a,cb74e347dfbf4d249f34b817c961e4b6,1f9d712450425bfda2703c73978e6c3d,...,1600,900,2018-01-08 03:39:05.757583,True,62fa424b680749fa86e67415a4ad1587,5eb88842218a49e3860a86b32fc3d5d3,6021b5187b924d64be64a702e5570edf,vehicle.truck,Vehicles primarily designed to haul cargo incl...,24
1,00332fb1d93f42348785ed2e9ad51ade,653f7efbb9514ce7b81d44070d6208c1,"[318, 459, 381, 498]","{'size': [900, 1600], 'counts': 'U1NnODJvazAzT...",[],8980a5ef00b04d6a9c4544b210467730,8980a5ef00b04d6a9c4544b210467730,0deae1f926c148378bbd871055df8b08,6d340954efbb4fee80c1116b61613d7c,f7e930e6288e57eb95fae8012a1e6437,...,1600,900,2018-01-04 03:09:46.214952,True,809771256c1e4855ae025755101b94dc,2f3ce54ddc6d4079842e7e207da44ce9,653f7efbb9514ce7b81d44070d6208c1,movable_object.barrier,Temporary road barrier placed in the scene in ...,9
2,00e29457d0eb402bbcb96efb4c5256fc,dfd26f200ade4d24b540184e16050022,"[1034, 454, 1146, 571]","{'size': [900, 1600], 'counts': 'ZlRqbTA6UGswS...",[271f6773e4d2496cbb9942c204c8a4c1],a5b934fd32bd49a2a52da91d0e95ad4d,a5b934fd32bd49a2a52da91d0e95ad4d,7487046d5b8543e0994f37242b158294,95d02c1450394fb8a4f1d1a15c5b9ea6,4efddf0b018b5c74937aa9a90d4a3e93,...,1600,900,2018-09-04 05:32:48.104825,True,ffe79bffb2904422b2e177b892381149,996991ec5cae4673942b4781e8a6f372,dfd26f200ade4d24b540184e16050022,vehicle.motorcycle,Gasoline or electric powered 2-wheeled vehicle...,22
3,013ca27c90d3465a9b5db763cb962ec9,fd69059b62a3469fbaef25340c0eab7f,"[366, 454, 497, 511]","{'size': [900, 1600], 'counts': 'XV1XOmAwX2swN...",[9f65c1eaa74e4d5db46e87a34811e994],4afb21eec056459f87f22525eb9b59b2,4afb21eec056459f87f22525eb9b59b2,0b8de86ffe9740ddb3d50b5a474bc7c3,654f53780de24f5a8776b384b474f7e4,12d3935ef4ac559881c7ce18732f5575,...,1600,900,2018-05-30 20:37:59.297295,True,55cb011c05ee44b1871c6127377baf49,b1984330b392487abc23504752db2cd7,fd69059b62a3469fbaef25340c0eab7f,vehicle.car,"Vehicle designed primarily for personal use, e...",17
4,014c6d598b6341659049734b16bc8034,1fa93b757fc74fb197cdd60001ad8abf,"[987, 415, 1037, 501]","{'size': [900, 1600], 'counts': 'X1JhazA9ZWswN...",[8c92f43bdb7c4df399aac34068f08f0f],1e8072c38b1f4dd2a1aa07ea1624c564,1e8072c38b1f4dd2a1aa07ea1624c564,730789124738489090b225c29cbad7f6,93d2bd6644cc446abe4c490c6cc944a2,a07eafb87a05527d8071950123bd3085,...,1600,900,2018-09-19 03:24:16.197423,True,1b05d848f13a45889ff99b807b05c34d,928618dbd59843249b3beafcf087bbb3,1fa93b757fc74fb197cdd60001ad8abf,human.pedestrian.adult,Adult subcategory.,2


In [10]:
create_image_filename_list(path_prefix, 'train', z) 