In [3]:
import json
import random
from collections import Counter
import boto3

sqs = boto3.client('sqs')
queue = sqs.get_queue_url(QueueName='Resize_Images')

sample_rate = {
        'scene': {
            "residential": 1, 
            "city street": 0.25,
            "highway": 0.5,
            "undefined": 1
        },
        'timeofday': {
            "daytime": 0.15,
            "night": 0.2,
            "dawn/dusk": 1,
            "undefined": 1},
        'weather': {
            "rainy": 1,
            "snowy": 1, 
            "clear": 0.15,
            "overcast": 0.75,
            "partly cloudy": 1,
            "undefined": 0.75}
        }

In [4]:
weather_counter = Counter()
scene_counter = Counter()
timeofday_counter = Counter()

with open('manifest-simplified-train.json') as f:
    for line in f:
        body = json.loads(line)
        
        label = body['weather-metadata']['class-name']
        weather_counter.update([label])
        
        label = body['scene-metadata']['class-name']
        scene_counter.update([label])
        
        label = body['timeofday-metadata']['class-name']
        timeofday_counter.update([label])
        
print(weather_counter)
print(scene_counter)
print(timeofday_counter)

Counter({'clear': 37344, 'overcast': 8770, 'undefined': 8249, 'snowy': 5549, 'rainy': 5070, 'partly cloudy': 4881})
Counter({'city street': 43516, 'highway': 17379, 'residential': 8074, 'undefined': 894})
Counter({'daytime': 36728, 'night': 27971, 'dawn/dusk': 5027, 'undefined': 137})


In [10]:
random.seed(1)

data = []

weather_manifest = open('manifest-oversampled-weather-val.json', 'w')
scene_manifest = open('manifest-oversampled-scene-val.json', 'w')
timeofday_manifest = open('manifest-oversampled-timeofday-val.json', 'w')

weather_counter = Counter()
scene_counter = Counter()
timeofday_counter = Counter()

with open('manifest-simplified-val.json') as f:
    for line in f:
        body = json.loads(line)      
        
        path = body['source-ref']
        new_path = path.replace('val', 'resized/val')
        new_key = new_path.replace('s3://sagemaker-project-p-o3c1kiruwcnf/', '')
        new_line = line.replace(path, new_path)
        
        payload = {
          "s3_url": path,
          "height": "224",
          "width": "224",
          "save_path": new_key
        }

        #response = sqs.send_message(QueueUrl=queue['QueueUrl'], MessageBody=json.dumps(payload))
        
        label = body['weather-metadata']['class-name']
        if random.random() < sample_rate['weather'][label]:
            weather_manifest.write(new_line)
            weather_counter.update([label])
        
        label = body['scene-metadata']['class-name']
        if random.random() < sample_rate['scene'][label]:
            scene_manifest.write(new_line)
            scene_counter.update([label])
            
        label = body['timeofday-metadata']['class-name']
        if random.random() < sample_rate['timeofday'][label]:
            timeofday_manifest.write(new_line)
            timeofday_counter.update([label])
        
        
weather_manifest.close()
scene_manifest.close()
timeofday_manifest.close()

In [9]:
# Train
print(weather_counter)
print(scene_counter)
print(timeofday_counter)

Counter({'overcast': 6554, 'undefined': 6202, 'snowy': 5549, 'clear': 5518, 'rainy': 5070, 'partly cloudy': 4881})
Counter({'city street': 10820, 'highway': 8620, 'residential': 8074, 'undefined': 894})
Counter({'night': 5672, 'daytime': 5574, 'dawn/dusk': 5027, 'undefined': 137})


In [11]:
# Val
print(weather_counter)
print(scene_counter)
print(timeofday_counter)

Counter({'overcast': 939, 'undefined': 891, 'snowy': 769, 'clear': 763, 'rainy': 738, 'partly cloudy': 738})
Counter({'city street': 1511, 'residential': 1253, 'highway': 1228, 'undefined': 136})
Counter({'daytime': 813, 'night': 792, 'dawn/dusk': 778, 'undefined': 35})
