In [1]:
# Packages
from IPython.display import Image
import rasterio
from rasterio import windows
import skimage
import skimage.io as skio
import json
import skimage.draw
import os
import sys
import pathlib
import math
import itertools
from shutil import copy2
import functools
from skimage import exposure
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import pandas as pd
from rasterio.plot import show
from osgeo import gdal

In [2]:
# Get absolute file paths. Returns generator object
def absoluteFilePaths(directory):
    for dirpath,_,filenames in os.walk(directory):
       for f in filenames:
           yield os.path.abspath(os.path.join(dirpath, f))

# Normalize array
def normalize(arr, arr_max = None):
    ''' Function to normalize an input array to 0-1 '''
    if not arr_max:
        arr_max = arr.max()
        out = arr / arr_max
    else:
        out = arr / arr_max
    return arr / arr_max

# Reorder Planet scenes to RGB
def reorder_to_rgb(image):
    '''reorders planet bands to red, green, blue for imshow'''
    blue = normalize(image[:,:,0])
    green = normalize(image[:,:,1])
    red = normalize(image[:,:,2])
    return np.stack([red, green, blue], axis=-1)

# Reorder Planet scenes to RGB for RASTERIO read images (C,H,W) 
def rasterio_to_rgb(image):
    '''reorders planet bands to red, green, blue for imshow'''
    blue = image[0,:,:]
    green = image[1,:,:]
    red = image[2,:,:]
    return np.stack([red, green, blue], axis=0)

# Contrast stretching algorithm for multiband images
def contrast_stretch_mb(img):
    # Loop over RGB bands
    for b in range(0,3):
        p2, p98 = np.percentile(img[:,:,b], (2, 98))
        img_scaled = exposure.rescale_intensity(img, in_range=(p2, p98))
        img[:,:,b] = img_scaled[:,:,b]
    return img

# Contrast stretching for a chip with percentiles passed to it from larger image
# Contrast stretching algorithm for multiband images
def contrast_stretch_chip(img, percs):
    img_out = img
    # Loop over RGB bands
    for b in range(0,3):
        band_percs = percs[b]
        p2 = band_percs[0]
        p98 = band_percs[1]
        band_max = band_percs[2]
        img_norm = normalize(img, band_max)
        img_scaled = exposure.rescale_intensity(img, in_range=(p2, p98))
        img_scaled = exposure.rescale_intensity(img_scaled, out_range=('uint8'))
        img_out[:,:,b] = img_scaled[:,:,b]
    return img_out

def setup_labeling(vgg_dir, chip_dir):
    
    """Copy the VGG project template JSONs and the via.html file into the
    directory of each planet_chip so labeling can begin
    """
    # Check if JSON files and/or via.html exists in chip directory
    chip_files = os.listdir(chip_dir)
    if any (".json" in f for f in chip_files):
        print("has labeling files")
    
    # If not, copy the template jsons and via.html into the chip's directory
    else:
        for file in os.listdir(vgg_dir):
            copy2(os.path.join(vgg_dir, file), chip_dir)   

In [3]:
def planet2chips(tiff_directory, chip_directory, chip_size = 512):
    
    """ Creates image chips (GeoTiffs and PNGs) of a GeoTiff file in a 
    specified directory and saves in new directory location 
    """
  
    # Get all analytic SR GeoTiff filnames in specified directory
    files = np.array(os.listdir(tiff_directory))
    tiff = pd.Series(files).str.contains('SR.tif')
    file = files[tiff][0]

    # Get image name to use for creating directory
    image_name = file.split("_")[0:3]
    image_name = "%s_%s_%s" % (image_name[0], image_name[1], image_name[2])

    # Image chip destination directory and subdirectories
    image_dir = os.path.join(chip_directory, image_name)   

    chip_dir = os.path.join(image_dir,'chips')
    png_dir = os.path.join(image_dir, 'pngs')

    # Print filenames
    print('filename: ' + file + '\n' + 'image name: ' + image_name)

    # Make directories to store raw and rgb image chips
    pathlib.Path(chip_dir).mkdir(parents=True, exist_ok=True)
    pathlib.Path(png_dir).mkdir(parents=True, exist_ok=True)
    
    # Iterate over image blocks - which are 256x256 - and save new GeoTiffs
    with rasterio.open(os.path.join(tiff_directory, file)) as src:
                
        # Read full src image and calculate percentiles for contrast stretchin
        full_src = src.read()
        print(full_src.shape)
        
        # Create windows of desired size
        rows1 = np.arange(0,full_src.shape[1], chip_size)
        rows2 = np.arange(chip_size,full_src.shape[1], chip_size)
        
        cols1 = np.arange(0,full_src.shape[2], chip_size)
        cols2 = np.arange(chip_size,full_src.shape[2], chip_size)
        
        # arrange into tuples
        rows = list(zip(rows1, rows2))
        cols = list(zip(cols1, cols2))
        
        # Arrange into tuples of windows to read
        windows = [ (a,b) for a in rows for b in cols ]                        
        
        # Get block dimensions of src
        for window in windows:

            r = src.read((1,2,3,4), window=window)

            if 0 in r:
                continue

            else:
                
                # Get start row and column for file name
                rmin = window[0][0]
                cmin = window[1][0]
            
                # Scale variable. Note bands of Planet imagery go BGR
                b = src.read((3,2,1), window=window)
                # Swap axis from rasterio order (C,H,W) to order expected by skio (H,W,C)
                b = np.moveaxis(b, 0, 2)
                b = contrast_stretch_mb(b)
                png_file = png_dir + '/' + image_name + '_' + str(rmin) + '_' + str(cmin) + '.png'
                skio.imsave(png_file, b)                

                # Open a new GeoTiff data file in which to save the raw image chip
                with rasterio.open((chip_dir + '/' + image_name + '_' + str(rmin) + '_' + str(cmin) + '.tif'), 'w', driver='GTiff',
                           height=r.shape[1], width=r.shape[2], count=4,
                           dtype=rasterio.uint16, crs=src.crs, 
                           transform=src.transform) as new_img:

                    # Write the raw image to the new GeoTiff
                    new_img.write(r)

Apply to a test image to check performance

In [None]:
# sdir = '/Users/Tyler-SFG/Desktop/Box Sync/SFG Centralized Resources/Projects/Aquaculture/Waitt Aquaculture/aqua-mapping/aqua-mapping-data/aqua-images/planet/planet_order_242451/20180830_154418_0f3c'
# planet2chips(tiff_directory = sdir, chip_directory = sdir, chip_size = 512)    

Now we need a function to copy the VGG project templates and via.html files into each chip directory so that the chips can be labeled.

In [4]:
def process_planet_orders(source_dir, target_dir):
    
    """Find unique PlanetScope scenes in a directory of Planet order folders
    and process newly added scenes into image chips"""
    
    # Get list of all planet orders in source directory
    orders = np.array(next(os.walk(source_dir))[1])
    # Add full path to each order directory
    orders = [os.path.join(source_dir, o) for o in orders]
    
    scenes = []
    scene_paths = []
    
    for o in orders:
        # scenes in order
        s_ids = np.array(next(os.walk(o))[1])
        s_ids_paths = [os.path.join(source_dir,o,s) for s in s_ids]
        
        # add to lists
        scenes.append(s_ids)
        scene_paths.append(s_ids_paths)
    
    # Flatten lists
    scenes = list(np.concatenate(scenes))
    print(len(scenes))
    scene_paths = list(np.concatenate(scene_paths))
    
    # Check which scenes already have chip folders
    scenes_exist = np.array(next(os.walk(target_dir))[1])
    
    scenes_to_process = []
    scene_paths_to_process = []
    
    # Remove scenes that already exist from list of scenes to process
    for s, sp in zip(scenes, scene_paths):
        if s not in scenes_exist:
            scenes_to_process.append(s)
            scene_paths_to_process.append(sp)            


    # Apply GeoTiff chipping function to each unprocessed scene
    for sp in scene_paths_to_process:
        print(sp)
        planet2chips(tiff_directory = sp, chip_directory = target_dir, chip_size = 512)    

Apply the function to process all Planet orders presently in Box

In [5]:
# Run function
sdir = '/Users/Tyler-SFG/Desktop/Box Sync/SFG Centralized Resources/Projects/Aquaculture/Waitt Aquaculture/aqua-mapping/aqua-mapping-data/aqua-images/planet'
tdir = '/Users/Tyler-SFG/Desktop/Box Sync/SFG Centralized Resources/Projects/Aquaculture/Waitt Aquaculture/aqua-mapping/aqua-mapping-data/aqua-images/planet_chips'

# os.path.isdir(sdir)
process_planet_orders(sdir, tdir)    

141
/Users/Tyler-SFG/Desktop/Box Sync/SFG Centralized Resources/Projects/Aquaculture/Waitt Aquaculture/aqua-mapping/aqua-mapping-data/aqua-images/planet/planet_order_236949/20180825_231532_1_0f3c
filename: 20180825_231532_1_0f3c_3B_AnalyticMS_SR.tif
image name: 20180825_231532_1
(4, 4510, 8605)


  .format(dtypeobj_in, dtypeobj_out))
  transform = guard_transform(transform)


/Users/Tyler-SFG/Desktop/Box Sync/SFG Centralized Resources/Projects/Aquaculture/Waitt Aquaculture/aqua-mapping/aqua-mapping-data/aqua-images/planet/planet_order_252429/20180224_141303_1035
filename: 20180224_141303_1035_3B_AnalyticMS_SR.tif
image name: 20180224_141303_1035
(4, 4906, 9354)
/Users/Tyler-SFG/Desktop/Box Sync/SFG Centralized Resources/Projects/Aquaculture/Waitt Aquaculture/aqua-mapping/aqua-mapping-data/aqua-images/planet/planet_order_252429/20180224_161540_0f49
filename: 20180224_161540_0f49_3B_AnalyticMS_SR.tif
image name: 20180224_161540_0f49
(4, 4300, 8705)
/Users/Tyler-SFG/Desktop/Box Sync/SFG Centralized Resources/Projects/Aquaculture/Waitt Aquaculture/aqua-mapping/aqua-mapping-data/aqua-images/planet/planet_order_252451/20180124_141121_0f31
filename: 20180124_141121_0f31_3B_AnalyticMS_SR.tif
image name: 20180124_141121_0f31
(4, 4910, 9253)
/Users/Tyler-SFG/Desktop/Box Sync/SFG Centralized Resources/Projects/Aquaculture/Waitt Aquaculture/aqua-mapping/aqua-mapping-da

### Move tiff files for labeled chips

After a Planet scene is processed into tiff and png chips, the pngs containing objects are added to a VGG project and labeled. Labels are then saved in a `[batchname]_labels.json` file. The last step prior to uploading the chips to Tana is to create a new directory for the chip containing the raw tiff file and a directory of class specific masks.

In [10]:
# Function to copy the tiffs of PNGs selected for labeling and make directories for each chip
def copy_chip_tiffs(label_dir, chips_dir, prepped_dir):
    
    """ Take a VGG labeling project with PNGs and create a directory
    for each chip in the prepped directory
    """
    # Read annotations
    pngs = os.listdir(label_dir)
    pngs = [png for png in pngs if png != '.DS_Store'] # remove stupid DS_Store file
    
    # Extract filenames and drop .png extension
    chips = [c.split('.png')[0] for c in pngs]
    
    # Loop over chips
    for chip in chips:
        
        # Make directory for chip in prepped dir
        chip_dir = os.path.join(prepped_dir, chip)
        # Create "image" dir for tiff image
        image_dir = os.path.join(chip_dir, 'image')
        
        # Make chip directory and subdirectories
        for d in [chip_dir, image_dir]:
            pathlib.Path(d).mkdir(parents=True, exist_ok=True)
                
        # Now locate the tiff file and copy into chip directory
        # Get scene name for chip
        scene = chip.split('_')[0:3]
        scene = "%s_%s_%s" % (scene[0], scene[1], scene[2])
        
        # Locate and copy tiff file
        tiff = os.path.join(chips_dir, scene, 'chips', (chip + '.tif'))
        copy2(tiff, image_dir)

Run function to copy tifs for selected PNGs

In [13]:
# Copy tiffs for chile cages
labels = '/Users/Tyler-SFG/Desktop/Box Sync/SFG Centralized Resources/Projects/Aquaculture/Waitt Aquaculture/aqua-mapping/aqua-mapping-data/aqua-images/vgg/labeled/label_china/pngs'
prepped_dir = '/Users/Tyler-SFG/Desktop/Box Sync/SFG Centralized Resources/Projects/Aquaculture/Waitt Aquaculture/aqua-mapping/aqua-mapping-data/aqua-images/prepped_planet'
chips_dir = '/Users/Tyler-SFG/Desktop/Box Sync/SFG Centralized Resources/Projects/Aquaculture/Waitt Aquaculture/aqua-mapping/aqua-mapping-data/aqua-images/planet_chips'

In [14]:
copy_chip_tiffs(label_dir = labels, chips_dir = chips_dir, prepped_dir = prepped_dir)

Now we need a function to create the class masks for each image

In [46]:
def masks_from_labels(labels, prepped_dir):
    
    # Read annotations
    annotations = json.load(open(labels))
    annotations = list(annotations.values())  # don't need the dict keys
    
    # The VIA tool saves images in the JSON even if they don't have any
    # annotations. Skip unannotated images.
    annotations = [a for a in annotations if a['regions']]
    
    # Loop over chips
    for a in annotations:
        
        # Get chip and directory
        chip = a['filename'].split('.png')[0]                        
        chip_dir = os.path.join(prepped_dir, chip)
        
        # Create a directory to store masks
        masks_dir = os.path.join(chip_dir, 'class_masks')
        pathlib.Path(masks_dir).mkdir(parents=True, exist_ok=True)
        
        # Read geotiff for chip
        gtiff = chip_dir +  '/' + 'image' + '/' + chip + '.tif'
        src = rasterio.open(gtiff)

        # Use try to only extract masks for chips with complete annotations and class labels
        try:

            """Code for processing VGG annotations from Matterport balloon color splash sample"""
            # Load annotations
            # VGG Image Annotator saves each image in the form:
            # { 'filename': '28503151_5b5b7ec140_b.jpg',
            #   'regions': {
            #       '0': {
            #           'region_attributes': {},
            #           'shape_attributes': {
            #               'all_points_x': [...],
            #               'all_points_y': [...],
            #               'name': 'polygon'}},
            #       ... more regions ...
            #   },
            #   'size': 100202
            # } 

            # Get the aquaculture class of each polygon    
            polygon_types = [r['region_attributes'] for r in a['regions']]        

            # Get unique aquaculture classes in annotations
            types = set(val for dic in polygon_types for val in dic.values())            

            for t in types:
                # Get the x, y coordinaets of points of the polygons that make up
                # the outline of each object instance. There are stores in the
                # shape_attributes (see json format above) 

                # Pull out polygons of that type               
                polygons = [r['shape_attributes'] for r in a['regions'] if r['region_attributes']['class'] == t]            

                # Draw mask using height and width of Geotiff
                mask = np.zeros([src.height, src.width], dtype=np.uint8)

                for p in polygons:

                    # Get indexes of pixels inside the polygon and set them to 1
                    rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])                    
                    mask[rr, cc] = 1            

                # Open a new GeoTiff data file in which to save the image chip
                with rasterio.open((masks_dir + '/' + chip + '_' + str(t) + '_mask.tif'), 'w', driver='GTiff',
                           height=src.shape[0], width=src.shape[1], count=1,
                           dtype=rasterio.ubyte, crs=src.crs, 
                           transform=src.transform) as new_img:

                    # Write the rescaled image to the new GeoTiff
                    new_img.write(mask.astype('uint8'),1)

        except KeyError:                
            print(chip + ' missing aquaculture class assignment')
            # write chip name to file for double checking
            continue
        

Run function to create masks

In [44]:
labels = "/Users/Tyler-SFG/Desktop/Box Sync/SFG Centralized Resources/Projects/Aquaculture/Waitt Aquaculture/aqua-mapping/aqua-mapping-data/aqua-images/vgg/labeled/label_china/20180410_020421_0f31_labels.json"
prepped_dir = '/Users/Tyler-SFG/Desktop/Box Sync/SFG Centralized Resources/Projects/Aquaculture/Waitt Aquaculture/aqua-mapping/aqua-mapping-data/aqua-images/prepped_planet/china_20180918'

In [47]:
masks_from_labels(labels = labels, prepped_dir = prepped_dir)

  if __name__ == '__main__':
  transform = guard_transform(transform)


20180410_020421_0f31_3_4 missing aquaculture class assignment
20180410_020421_0f31_6_12 missing aquaculture class assignment
