In [2]:
import boto3
import json

bucket = 'sagemaker-project-p-o3c1kiruwcnf'

train_label = 'bdd100k_images/labels/object_detection_labels/det_train.json'
val_label = 'bdd100k_images/labels/object_detection_labels/det_val.json'

train_image_path = '/bdd100k_images/train/'
val_image_path = '/bdd100k_images/val/'

manifest_write_path = 'bdd100k_images/labels/manifests/'


s3 = boto3.resource('s3')
content_object = s3.Object(bucket, train_label)

file_content = content_object.get()['Body'].read().decode('utf-8')
train_json_content = json.loads(file_content)

content_object = s3.Object(bucket, val_label)
file_content = content_object.get()['Body'].read().decode('utf-8')
val_json_content = json.loads(file_content)

In [3]:
train_json_content[0]

{'name': '0000f77c-6257be58.jpg',
 'attributes': {'weather': 'clear',
  'timeofday': 'daytime',
  'scene': 'city street'},
 'timestamp': 10000,
 'labels': [{'id': '0',
   'attributes': {'occluded': False,
    'truncated': False,
    'trafficLightColor': 'G'},
   'category': 'traffic light',
   'box2d': {'x1': 1125.902264,
    'y1': 133.184488,
    'x2': 1156.978645,
    'y2': 210.875445}},
  {'id': '1',
   'attributes': {'occluded': False,
    'truncated': False,
    'trafficLightColor': 'G'},
   'category': 'traffic light',
   'box2d': {'x1': 1156.978645,
    'y1': 136.637417,
    'x2': 1191.50796,
    'y2': 210.875443}},
  {'id': '2',
   'attributes': {'occluded': False,
    'truncated': False,
    'trafficLightColor': 'NA'},
   'category': 'traffic sign',
   'box2d': {'x1': 1105.66915985699,
    'y1': 211.122087,
    'x2': 1170.79037,
    'y2': 233.566141}},
  {'id': '3',
   'attributes': {'occluded': False,
    'truncated': True,
    'trafficLightColor': 'NA'},
   'category': 'traf

In [25]:
def make_payload(payload, path):    
    file = payload['name']
    labels = payload['attributes']
    
    weather_options = [
        'rainy',
        'snowy', 
        'clear',
        'overcast',
        'partly cloudy',
    ]
    
    timeofday_options = [
        'daytime',
        'night',
        'dawn/dusk'
    ]
    
    scene_options = [
        'residential', 
        'city street',
        'highway',
    ]
    
    check = (lambda x, y: (x, y.index(x)) if x in y else ("undefined", len(y)))
    
    weather, weather_id = check(labels['weather'], weather_options)
    timeofday, timeofday_id = check(labels['timeofday'], timeofday_options)
    scene, scene_id = check(labels['scene'], scene_options)  
    
    
    payload = {
        "source-ref": f"s3://{bucket}{path}{file}",
        "weather": weather_id,
        "weather-metadata": 
            {"class-name": weather, 
             "type" : "groundtruth/image-classification"},
        "timeofday": timeofday_id,
        "timeofday-metadata": 
            {"class-name": timeofday, 
             "type":"groundtruth/image-classification"},
        "scene": scene_id,
        "scene-metadata": 
            {"class-name": scene, 
             "type":"groundtruth/image-classification"},
    }
    
    return json.dumps(payload)

In [26]:
def manifest_build(mode):    
    if mode == 'train':
        json_body = train_json_content
        image_path = train_image_path
    else:
        json_body = val_json_content        
        image_path = val_image_path
    
    lines = [make_payload(load, image_path) for load in json_body]
    
    file = f'manifest-simplified-{mode}.json'
    with open(file, 'w') as f:
        for item in lines:
            f.write("%s\n" % item)
            
    s3.Bucket(bucket).upload_file(file,f"{manifest_write_path}{file}")

In [27]:
manifest_build('train')
manifest_build('val')