***Description***

This script uses pair of png satellite images (pre- and post-disaster) and corresponding json files with labels pre-disaster (building polygons) and post-disaster (damage level) 
to create 4 tensors: 

1. pre-image tensor (height,width,3)
2. segmentation mask tensor (height,width,1)
3. post-image tensor (height,width,3)
4. classification mask tensor (height,width,1)

1 and 2 are saved in file --> tensors_pre_[database].npz (useful for segmentation)
3 and 4 are saved in file --> tensors_post_[database].npz (useful for classification)

, with [database] the name of the database being used (see variable "database_dir" at the end of this file)

Labels for damage classification (used to create tensor n°4)

Class 0 - no building or un-classified building

Class 1 - no-damage

Class 2 - minor damage

Class 3 - major damage

Class 4 - destroyed

Data need to be in the following format. To create this structure, use **split_into_disasters.py** from __[xView2 baseline](https://github.com/DIUx-xView/xView2_baseline/tree/master)__ :


 folder with the data base: 

├── disaster_name_1

 │------├── images 

 │------│------------<image_id>.png

 │------│------------...

 │------├── labels

 │------│------------<image_id>.json

 │------│------------...

├── disaster_name_2

 │------├── images 

 │------│------------<image_id>.png

 │------│------------...

 │------├── labels

 │------│------------<image_id>.json

 │------│------------...

└── disaster_name_n
 

**The script includes and/or makes use of functions created during the xView2 challenge set by DIU (Defense Innovation Unit) through their baseline repository: xView2_baseline.**

__[xView2 license](https://github.com/DIUx-xView/xView2_baseline/blob/master/LICENSE.md)__   


#### 1. import the necessary libraries

In [1]:
import json 
from os import path, walk, makedirs
from sys import exit, stderr

from cv2 import fillPoly, imwrite
import numpy as np
from shapely import wkt
from shapely.geometry import mapping, Polygon
from skimage.io import imread
from tqdm.auto import tqdm

import numpy as np
import tensorflow as tf

# This removes the massive amount of scikit warnings of "low contrast images"
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

  from .autonotebook import tqdm as notebook_tqdm


#### 2. Definition of functions

In [2]:
# from XView2 baseline
def get_dimensions(file_path):
    """
    :param file_path: The path of the file 
    :return: returns (width,height,channels)
    """
    # Open the image we are going to mask
    pil_img = imread(file_path)
    img = np.array(pil_img)
    w, h, c = img.shape
    return (w, h, c)

In [3]:
# from xView2 baseline
def save_one_mask(masks, output_path, mask_file_name):
    """
    :param masks: list of masked polygons from the mask_polygons_separately function 
    :param output_path: path to save the masks
    :param mask_file_name: the file name the masks should have 
    """
    # For each filled polygon, write the mask shape out to the file per image
    mask_file_name = path.join(output_path, mask_file_name + '.png')
    imwrite(mask_file_name, masks)

In [4]:
# modified from xView2 baseline 
def mask_polygons_together_with_border(size, shapes_pre, shapes_post, border):
    """
    :param size: A tuple of the (width,height,channels)
    :param shape_pre: A list of points in the polygon from get_feature_info
    :param shape_post: A list of damage level labels from get_feature_damage
    :param border: number of pixels used to shrink the polygon in the case that there is building overlapping

    :returns: an array with the size of the original images, with pixel-wise labels including 
    building and damage information.
    """

    # For each WKT polygon, read the WKT format and fill the polygon as an image
    mask_img_buildings = np.zeros(size, np.uint8)
    mask_img_damage = np.zeros(size, np.uint8)
    
    

    for u in shapes_pre:
        #defining 2 arrays (one for building location, the next for damage level information)
        blank_building =  np.zeros(size, np.uint8)
        blank_damage =  np.zeros(size, np.uint8)

        # Each polygon stored in shapes is a np.ndarray
        poly = shapes_pre[u]
        # the damage label for this particular building
        dam = shapes_post[u]
        # Creating a shapely polygon object out of the numpy array 
        polygon = Polygon(poly)
        
        # Getting the center points from the polygon and the polygon points
        (poly_center_x, poly_center_y) = polygon.centroid.coords[0]
        polygon_points = polygon.exterior.coords

        # Setting a new polygon with each X,Y manipulated based off the center point
        shrunk_polygon = []
        for (x,y) in polygon_points:
            if x < poly_center_x:
                x += border
            elif x > poly_center_x:
                x -= border

            if y < poly_center_y:
                y += border
            elif y > poly_center_y:
                y -= border

            shrunk_polygon.append([x,y])
        
        # Transforming the polygon back to a np.ndarray
        ns_poly = np.array(shrunk_polygon, np.int32)
        
        # depending on level of damage the color (uses a dictionary defined at the top)
        if dam == "no-damage":
            color = 1
        elif dam == "minor-damage":
            color = 2
        elif dam == "major-damage":
            color = 3
        elif dam == "destroyed":           
            color = 4
        else: # includes 2 cases: no building and un-classified buildings
            color = 0
            #print("This building has unknown damage class: "+u)     
        
        # Filling the shrunken polygon to add a border between close polygons
        fillPoly(blank_building, [ns_poly], 1)
        fillPoly(blank_damage, [ns_poly], color)
        # updating labels for eack iteration (= building)
        mask_img_buildings += blank_building
        mask_img_damage += blank_damage


    #print(np.count_nonzero(mask_img_damage.flatten()))
    # solving the problem of overlapping buildings --> zeros will be in the pixels where
    #there is no building and where buildings overlapped (pixels with values > 1)
    mask_img_buildings[mask_img_buildings > 1] = 0
    mask_img_buildings[mask_img_buildings == 1] = 1
    # multiplying the damage array and the previous array will leave values only where there are ones
    final_mask = mask_img_buildings * mask_img_damage
    return mask_img_buildings,final_mask


In [5]:
# from XView2 baseline
def read_json(json_path):
    """
    :param json_path: path to load json from
    :returns: a python dictionary of json features
    """
    annotations = json.load(open(json_path))
    return annotations

In [6]:
# from XView2 baseline
def get_feature_info(feature):
    """
    :param feature: a python dictionary of json labels
    :returns: a list mapping of polygons contained in the image 
    """
    # Getting each polygon points from the json file and adding it to a dictionary of uid:polygons
    props = {}

    for feat in feature['features']['xy']:
        feat_shape = wkt.loads(feat['wkt'])
        coords = list(mapping(feat_shape)['coordinates'][0])
        props[feat['properties']['uid']] = (np.array(coords, np.int32))

    return props

In [7]:
def get_feature_damage(feature):
    """
    Creates a dictionary with the damage labels by building. Uses the same key that props dict.
    :param feature: a python dictionary of json labels (post-disaster with damage labels)
    :returns: a list of damage labels contained in the image 
    
    """
    damage = {}

    for feat in feature['features']['xy']:
        try:
            damage_type = feat['properties']['subtype']
        except: # pre-disaster damage is default no-damage
            damage_type = "no-damage"
            print("no damage info in "+feat['properties']['uid'])
            continue  
        damage[feat['properties']['uid']] = damage_type

    return damage

In [8]:
def get_disaster_info(feature):
    """
    Creates 2 lists with disaster name and disaster type
    :param feature: a python dictionary of json labels ( pre- or post-disaster)
    :returns: 2 lists with length = n pre_images = n post_images
    
    """
 
    try:
        disaster = feature['metadata']['disaster']
        disaster_type = feature['metadata']['disaster_type']
    except:
        disaster = 'unclassified'
        disaster_type = 'unclassified'
        print("no disaster information in "+feature['metadata']['img_name'])
         

    return disaster,disaster_type

In [10]:
# Modified from XView2 baseline. Now includes information of the post-damage files and create tensors for images and masks

def create_masks(json_path, images_directory):
    '''
    Creates pre- and post-tensors (images, masks, id's, disaster name, disaster type )
    :param json_path: path to json labels ( pre- and post-disaster)
    :param images_directory: path to images ( pre- and post-disaster)

    :returns: 6 tensors (pre and post)
    '''
    arr_img_pre_list = []
    arr_img_post_list = []
    lm1 = []
    lm2 = []
    id_pre_list = []
    id_post_list = []
    dis_list = []
    dis_type_list = []

    # For each feature in the json we will create a separate mask
    # Getting all files in the directory provided for jsons
    jsons_pre = [j for j in next(walk(json_path))[2] if '_pre' in j]
    # After removing non-json items in dir (if any)
    for j in tqdm([j for j in jsons_pre if j.endswith('json')],
                  unit='poly',
                  leave=False):
        # Our chips start off in life as PNGs
        chip_image_id_pre = path.splitext(j)[0] + '.png'
        chip_image_id_post = path.splitext(j)[0].replace('_pre', '_post') + '.png'

        id_pre = path.splitext(j)[0]
        id_post = path.splitext(j)[0].replace('_pre', '_post')

        # Loading the per chip json pre-disaster
        j_full_path_pre = path.join(json_path, j)
        chip_json_pre = read_json(j_full_path_pre)
        
        #getting the name of the post-json
        j_post = j.replace('_pre', '_post')
        #print(j_post)
        
        # Loading the per chip json post-disaster
        j_full_path_post = path.join(json_path, j_post)
        chip_json_post = read_json(j_full_path_post)

        # Getting the full chip path, and loading the size dimensions (same for post)
        chip_file_pre = path.join(images_directory, chip_image_id_pre)
        chip_file_post = path.join(images_directory, chip_image_id_post)

        chip_img_size = get_dimensions(chip_file_pre)
        chip_size = (chip_img_size[0],chip_img_size[0],1)
        # Reading in the polygons from the json file
        polys_pre = get_feature_info(chip_json_pre)
        polys_post = get_feature_damage(chip_json_post)

        # Getting a list of the polygons and saving masks as separate or single image files
        if len(polys_pre) > 0:
            m1,m2 = mask_polygons_together_with_border(chip_size, polys_pre, polys_post, 2)
            lm1.append(m1)
            lm2.append(m2)
            # creating tensors from images
            img_pre = tf.io.read_file(chip_file_pre)
            img_post = tf.io.read_file(chip_file_post)
            array_pre = tf.image.decode_png(img_pre, channels=3, dtype=tf.uint8)
            array_post = tf.image.decode_png(img_post, channels=3, dtype=tf.uint8)
            
            dis,dis_type = get_disaster_info(chip_json_pre)
            
            arr_img_pre_list.append(array_pre)
            arr_img_post_list.append(array_post)
            #creating id and id_disaster lists to identify the arrays
            id_pre_list.append(id_pre)
            id_post_list.append(id_post)
            dis_list.append(dis)
            dis_type_list.append(dis_type)

            #save_one_mask(masked_polys, output_directory, mask_file)
    return arr_img_pre_list, arr_img_post_list, lm1,lm2,id_pre_list,id_post_list,dis_list,dis_type_list

In [12]:
# DATABASE_DIR IS A FOLDER THAT CONTAINS THE DATA DIVIDED BY DISASTERS, IN EACH DISASTER FOLDER THERE ARE "IMAGES" (PNG FILES) AND "LABELS" FOLDERS (JSON FILES).

# Modified from XView2 baseline
database_dir = '../data/data_by_disaster'
#give an identifier string to add to npz files name
database = 'test'

# Getting the list of the disaster types under the xBD directory
disasters = next(walk(database_dir))[1]
final_list_imgpre = []
final_list_imgpost = []
final_list_maskseg = []
final_list_maskclas = []
id_all_pre = []
id_all_post = []
dis_all = []
dis_type_all = []


for disaster in disasters:
    if "-" in disaster:
        # Create the full path to the images, labels, and mask output directories
        image_dir = path.join(database_dir, disaster, 'images')
        json_dir = path.join(database_dir, disaster, 'labels')

        if not path.isdir(image_dir):
            print(
                "Error, could not find image files in {}.\n\n"
                .format(image_dir),
                file=stderr)
            exit(2)

        if not path.isdir(json_dir):
            print(
                "Error, could not find labels in {}.\n\n"
                .format(json_dir),
                file=stderr)
            exit(3)
        
        #print([j for j in next(walk(json_dir))[2] if '_pre' in j]    )
        #print(json_dir, image_dir, output_dir)
        l1,l2,l3,l4,id1,id2,dis,dis_type = create_masks(json_dir, image_dir)
        final_list_imgpre.extend(l1)
        final_list_imgpost.extend(l2)
        final_list_maskseg.extend(l3)
        final_list_maskclas.extend(l4)
        id_all_pre.extend(id1)
        id_all_post.extend(id2)
        dis_all.extend(dis)
        dis_type_all.extend(dis_type)

# Convert lists to numpy arrays
images_pre = np.stack(final_list_imgpre)
images_post = np.stack(final_list_imgpost)

masks_pre = np.stack(final_list_maskseg)
masks_post = np.stack(final_list_maskclas)





                                                 

In [13]:
# saving the arrays in npz numpy format
 
np.savez('tensors_pre_'+database+'.npz', images=images_pre, masks=masks_pre, id=id_all_pre, disaster_name=dis_all, disaster_type=dis_type_all)
np.savez('tensors_post_'+database+'.npz', images=images_post, masks=masks_post, id=id_all_post, disaster_name=dis_all, disaster_type=dis_type_all)