In [1]:
from pathlib import Path
import json
import os
import requests
from PIL import Image
import io
import numpy as np

In [7]:
# change to match

path = Path('/data/home/svanbodegraven/notebooks/HealthyHabitatAI/notebooks/data/HealthyHabitat')
if not os.path.exists(path):
    os.mkdir(path)
masks_path = os.path.join(path, 'trainannot')
if not os.path.exists(masks_path):
    os.mkdir(masks_path)
images_path = os.path.join(path, 'train')
if not os.path.exists(images_path):
    os.mkdir(images_path)

In [8]:
# provide latest export file
export_file_path = os.path.join(path, 'export-2019-11-18T22_07_39.636Z.json')

In [9]:
# static mapping of colour to classes
healthy_habitat_colours = {"bare_ground" : [255, 0, 0],
                           "burnt_para_grass" : [255, 165, 0],
                           "dead_para_grass" : [255, 255, 0],
                           "dense_para_grass" : [154, 205, 50],
                           "other_grass" : [0, 128, 128],
                           "lily" : [0,128,0],
                           "para_grass" : [0, 255, 255],
                           "tree" : [173, 216, 230],
                           "water" : [0, 0, 255],
                           "wet_para_grass" : [128, 0, 128],
                           "unlabelled" : [0, 0, 0]}  

def mask_merge(new_img, mask):
    updated_img = np.asarray(new_img.convert('RGB')).copy()
    maskR = updated_img[:, :, 0] > 0
    maskG = updated_img[:, :, 1] > 0
    maskB = updated_img[:, :, 2] > 0
    updated_img[maskR] = healthy_habitat_colours[mask]
    updated_img[maskG] = healthy_habitat_colours[mask]
    updated_img[maskB] = healthy_habitat_colours[mask]
    return updated_img

# download both files and masks
# check for files
with open(export_file_path, 'r') as export_file:
    data = json.load(export_file)
    for row in data:
        if (len(row['Label']['objects']) > 0): # export.json also contains images that haven't been labelled, so just pick out images with labels
            # download source image
            image_url = row['Labeled Data']
            image_name = image_url.split('?')[0].split('-')[-1]
            parts = image_name.split('.')
            image_name = '{0}.{1}'.format(parts[0], 'png')
            image_path = os.path.join(images_path, image_name)
            if not os.path.exists(image_path):
                print('Downloading {0}...'.format(image_path))
                response = requests.get(image_url)
                image = Image.open(io.BytesIO(response.content))
                image = image.convert('RGB')
                image.save(image_path)
            else:
                print('{0} exists.'.format(image_path))               
            #create empty mask to fill with json values
            width, height = image.size
            num_classes = len(healthy_habitat_colours)
            #combined_mask = np.zeros([width,height,num_classes])
            combined_mask = np.zeros([width,height,3], dtype=np.uint8)
            # download masks and combine
            for object in row['Label']['objects']:
                parts = image_name.split('.')
                mask_name = '{0}.{1}'.format(parts[0], 'png')
                mask_path = os.path.join(masks_path, mask_name)         
                mask_class = object['value']
                response = requests.get(object['instanceURI'])
                new_img = Image.open(io.BytesIO(response.content))
                mask_img = mask_merge(new_img, mask_class)
                combined_mask = combined_mask + mask_img
            Image.fromarray(combined_mask).save(mask_path)  

Downloading /data/home/svanbodegraven/notebooks/HealthyHabitatAI/notebooks/data/HealthyHabitat/train/DJI_0474.png...
Downloading /data/home/svanbodegraven/notebooks/HealthyHabitatAI/notebooks/data/HealthyHabitat/train/DJI_0118.png...
Downloading /data/home/svanbodegraven/notebooks/HealthyHabitatAI/notebooks/data/HealthyHabitat/train/DJI_0150.png...
Downloading /data/home/svanbodegraven/notebooks/HealthyHabitatAI/notebooks/data/HealthyHabitat/train/DJI_0182.png...
Downloading /data/home/svanbodegraven/notebooks/HealthyHabitatAI/notebooks/data/HealthyHabitat/train/DJI_0214.png...
Downloading /data/home/svanbodegraven/notebooks/HealthyHabitatAI/notebooks/data/HealthyHabitat/train/DJI_0280.png...
Downloading /data/home/svanbodegraven/notebooks/HealthyHabitatAI/notebooks/data/HealthyHabitat/train/DJI_0312.png...
Downloading /data/home/svanbodegraven/notebooks/HealthyHabitatAI/notebooks/data/HealthyHabitat/train/DJI_0377.png...
Downloading /data/home/svanbodegraven/notebooks/HealthyHabitatAI

In [11]:
file_list = os.listdir(images_path)

In [32]:
import random

path = Path('/data/home/svanbodegraven/notebooks/HealthyHabitatAI/notebooks/data/HealthyHabitat')
if not os.path.exists(path):
    os.mkdir(path)
masks_path = os.path.join(path, 'trainannot')
if not os.path.exists(masks_path):
    os.mkdir(masks_path)
train_path = os.path.join(path, 'train')
if not os.path.exists(images_path):
    os.mkdir(images_path)
testmasks_path = os.path.join(path, 'testannot')
if not os.path.exists(testmasks_path):
    os.mkdir(testmasks_path)
test_path = os.path.join(path, 'test')
if not os.path.exists(test_path):
    os.mkdir(test_path)    
valmasks_path = os.path.join(path, 'valannot')
if not os.path.exists(valmasks_path):
    os.mkdir(valmasks_path)
val_path = os.path.join(path, 'val')
if not os.path.exists(val_path):
    os.mkdir(val_path)
    
test_items = random.sample(file_list, 15)
val_items = random.sample(test_items, 5)

In [33]:
for i in test_items:
    os.rename(os.path.join(train_path, i), os.path.join(test_path, i))
    os.rename(os.path.join(masks_path, i), os.path.join(testmasks_path, i))    

In [34]:
for j in val_items:
    os.rename(os.path.join(test_path, j), os.path.join(val_path, j))
    os.rename(os.path.join(testmasks_path, j), os.path.join(valmasks_path, j)) 