## Mask_Polygons.py

In [None]:
import json
from os import path, walk, makedirs
from sys import exit, stderr

from cv2 import fillPoly, imwrite
import numpy as np
from shapely import wkt
from shapely.geometry import mapping, Polygon
from skimage.io import imread
from tqdm import tqdm
import imantics 

# This removes the massive amount of scikit warnings of "low contrast images"
import warnings
warnings.filterwarnings("ignore", category=UserWarning)


def get_dimensions(file_path):
    """
    :param file_path: The path of the file 
    :return: returns (width,height,channels)
    """
    # Open the image we are going to mask
    pil_img = imread(file_path)
    img = np.array(pil_img)
    w, h, c = img.shape
    return (w, h, c)


def mask_polygons_separately(size, shapes):
    """
    :param size: A tuple of the (width,height,channels)
    :param shapes: A list of points in the polygon from get_feature_info
    :returns: a dict of masked polygons with the shapes filled in from cv2.fillPoly
    """
    # For each WKT polygon, read the WKT format and fill the polygon as an image
    masked_polys = {}

    for u in shapes:
        sh = shapes[u]
        mask_img = np.zeros(size, np.uint8)
        i = fillPoly(mask_img, [sh], (255, 255, 255))
        masked_polys[u] = i

    return masked_polys

def mask_polygons_together(size, shapes):
    """
    :param size: A tuple of the (width,height,channels)
    :param shapes: A list of points in the polygon from get_feature_info
    :returns: A numpy array with the polygons filled 255s where there's a building and 0 where not 
    """
    # For each WKT polygon, read the WKT format and fill the polygon as an image
    mask_img = np.zeros(size, np.uint8)

    for u in shapes:
        blank =  np.zeros(size, np.uint8)
        poly = shapes[u] 
        fillPoly(blank, [poly], (1, 1, 1))
        mask_img += blank
    
    # Here we are taking the overlap (+=) and squashing it back to 0
    mask_img[mask_img > 1] = 0

    # Finally we are taking all 1s and making it pure white (255)
    mask_img[mask_img == 1] = 255

    return mask_img

def mask_polygons_together_with_border(size, shapes, border):
    """
    :param size: A tuple of the (width,height,channels)
    :param shapes: A list of points in the polygon from get_feature_info
    :returns: a dict of masked polygons with the shapes filled in from cv2.fillPoly
    """

    # For each WKT polygon, read the WKT format and fill the polygon as an image
    mask_img = np.zeros(size, np.uint8)

    for u in shapes:
        blank =  np.zeros(size, np.uint8)
        # Each polygon stored in shapes is a np.ndarray
        poly = shapes[u]
        
        # Creating a shapely polygon object out of the numpy array 
        polygon = Polygon(poly)

        # Getting the center points from the polygon and the polygon points
        (poly_center_x, poly_center_y) = polygon.centroid.coords[0]
        polygon_points = polygon.exterior.coords

        # Setting a new polygon with each X,Y manipulated based off the center point
        shrunk_polygon = []
        for (x,y) in polygon_points:
            if x < poly_center_x:
                x += border
            elif x > poly_center_x:
                x -= border

            if y < poly_center_y:
                y += border
            elif y > poly_center_y:
                y -= border

            shrunk_polygon.append([x,y])
        
        # Transforming the polygon back to a np.ndarray
        ns_poly = np.array(shrunk_polygon, np.int32)
  
        # Filling the shrunken polygon to add a border between close polygons
        fillPoly(blank, [ns_poly], (1, 1, 1))
        mask_img += blank
    
    mask_img[mask_img > 1] = 0
    mask_img[mask_img == 1] = 255
    return mask_img

def save_masks(masks, output_path, mask_file_name):
    """
    :param masks: dictionary of UID:masked polygons from mask_polygons_separately()
    :param output_path: path to save the masks
    :param mask_file_name: the file name the masks should have 
    """
    # For each filled polygon, write out a separate file, increasing the name
    for m in masks:
        final_out = path.join(output_path,
                              mask_file_name + '_{}.png'.format(m))
        imwrite(final_out, masks[m])

def save_one_mask(masks, output_path, mask_file_name):
    """
    :param masks: list of masked polygons from the mask_polygons_separately function 
    :param output_path: path to save the masks
    :param mask_file_name: the file name the masks should have 
    """
    # For each filled polygon, write the mask shape out to the file per image
    mask_file_name = path.join(output_path, mask_file_name + '.png')
    imwrite(mask_file_name, masks)
    

def read_json(json_path):
    """
    :param json_path: path to load json from
    :returns: a python dictionary of json features
    """
    annotations = json.load(open(json_path))
    return annotations


def get_feature_info(feature):
    """
    :param feature: a python dictionary of json labels
    :returns: a list mapping of polygons contained in the image 
    """
    # Getting each polygon points from the json file and adding it to a dictionary of uid:polygons
    props = {}

    for feat in feature['features']['xy']:
        feat_shape = wkt.loads(feat['wkt'])
        coords = list(mapping(feat_shape)['coordinates'][0])
        props[feat['properties']['uid']] = (np.array(coords, np.int32))

    return props


def mask_chips(json_path, images_directory, output_directory, single_file, border):
    """
    :param json_path: path to find multiple json files for the chips
    :param images_directory: path to the directory containing the images to be masked
    :param output_directory: path to the directory where masks are to be saved
    :param single_file: a boolean value to see if masks should be saved a single file or multiple
    """
    # For each feature in the json we will create a separate mask
    # Getting all files in the directory provided for jsons
    jsons = [j for j in next(walk(json_path))[2] if '_pre' in j]

    # After removing non-json items in dir (if any)
    for j in tqdm([j for j in jsons if j.endswith('json')],
                  unit='poly',
                  leave=False):
        # Our chips start off in life as PNGs
        chip_image_id = path.splitext(j)[0] + '.png'
        mask_file = path.splitext(j)[0]

        # Loading the per chip json
        j_full_path = path.join(json_path, j)
        chip_json = read_json(j_full_path)

        # Getting the full chip path, and loading the size dimensions
        chip_file = path.join(images_directory, chip_image_id)
        chip_size = get_dimensions(chip_file)

        # Reading in the polygons from the json file
        polys = get_feature_info(chip_json)

        # Getting a list of the polygons and saving masks as separate or single image files
        if len(polys) > 0:
            if single_file:
                if border > 0:
                    masked_polys = mask_polygons_together_with_border(chip_size, polys, border)
                else:
                    masked_polys = mask_polygons_together(chip_size, polys)
                save_one_mask(masked_polys, output_directory, mask_file)
            else:
                masked_polys = mask_polygons_separately(chip_size, polys)
                save_masks(masked_polys, output_directory, mask_file)


if __name__ == "__main__":
    import argparse

    # Parse command line arguments
    parser = argparse.ArgumentParser(
        description=
        """mask_polygons.py: Takes in xBD dataset and masks polygons in the image\n\n
        WARNING: This could lead to hundreds of output images per input\n""")

    parser.add_argument('--input',
                        required=True,
                        metavar="/path/to/xBD/",
                        help='Path to parent dataset directory "xBD"')
    parser.add_argument('--single-file', 
                        action='store_true',
                        help='use to save all masked polygon instances to a single file rather than one polygon per mask file')
    parser.add_argument('--border',
                        default=0,
                        type=int,
                        metavar="positive integer for pixel border (e.g. 1)",
                        help='Positive integer used to shrink the polygon by')

    args = parser.parse_args()

    # Getting the list of the disaster types under the xBD directory
    disasters = next(walk(args.input))[1]

    for disaster in tqdm(disasters, desc='Masking', unit='disaster'):
        # Create the full path to the images, labels, and mask output directories
        image_dir = path.join(args.input, disaster, 'images')
        json_dir = path.join(args.input, disaster, 'labels')
        output_dir = path.join(args.input, disaster, 'masks')

        if not path.isdir(image_dir):
            print(
                "Error, could not find image files in {}.\n\n"
                .format(image_dir),
                file=stderr)
            exit(2)

        if not path.isdir(json_dir):
            print(
                "Error, could not find labels in {}.\n\n"
                .format(json_dir),
                file=stderr)
            exit(3)

        if not path.isdir(output_dir):
            makedirs(output_dir)

        mask_chips(json_dir, image_dir, output_dir, args.single_file, args.border)

## Process_data.py

In [None]:
from PIL import Image
import time
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
import math
import random
import argparse
import logging
import json
import cv2
import datetime

import shapely.wkt
import shapely
from shapely.geometry import Polygon
from collections import defaultdict
from sklearn.model_selection import train_test_split
logging.basicConfig(level=logging.INFO)

# Configurations
NUM_WORKERS = 4
NUM_CLASSES = 4
BATCH_SIZE = 64
NUM_EPOCHS = 120
LEARNING_RATE = 0.0001
RANDOM_SEED = 123
LOG_STEP = 150

damage_intensity_encoding = defaultdict(lambda: 0)
damage_intensity_encoding['destroyed'] = 3
damage_intensity_encoding['major-damage'] = 2
damage_intensity_encoding['minor-damage'] = 1
damage_intensity_encoding['no-damage'] = 0


def process_img(img_array, polygon_pts, scale_pct):
    """Process Raw Data. The function extracts a portion of the image based on 
    the given polygon's corners and the specified scale percentage. This function 
    defiens the cropping creteria of the building regions from the larger images for further 
    processing. This function is later called in process_data.py

            Args:
                img_array (numpy array): numpy representation of image.
                polygon_pts (array): corners of the building polygon.

            Returns:
                numpy array: of coordinates to be cropped

    """

    height, width, _ = img_array.shape

    xcoords = polygon_pts[:, 0]
    ycoords = polygon_pts[:, 1]
    xmin, xmax = np.min(xcoords), np.max(xcoords)
    ymin, ymax = np.min(ycoords), np.max(ycoords)

    xdiff = xmax - xmin
    ydiff = ymax - ymin

    #Extend image by scale percentage
    xmin = max(int(xmin - (xdiff * scale_pct)), 0)
    xmax = min(int(xmax + (xdiff * scale_pct)), width)
    ymin = max(int(ymin - (ydiff * scale_pct)), 0)
    ymax = min(int(ymax + (ydiff * scale_pct)), height)

    # This line returns a cropped portion of the input image 
    return img_array[ymin:ymax, xmin:xmax, :]


def process_data(input_path, output_path, output_csv_path, val_split_pct):
    """Process Raw Data into

        Args:
            dir_path (path): Path to the xBD dataset.
            data_type (string): String to indicate whether to process
                                train, test, or holdout data.

        Returns:
            x_data: A list of numpy arrays representing the images for training
            y_data: A list of labels for damage represented in matrix form

    """

    x_data = [] # stores images file names
    y_data = [] # stores images labels

    # iterates over all the items (folders) in the input_
    ## filters out any folders that start with a dot ('.'),
    ### which are typically hidden folders on Unix-based systems (e.g., macOS or Linux).path directory.
    disasters = [folder for folder in os.listdir(input_path) if not folder.startswith('.')]

    # For each d (each element of disasters), it concatenates input_path, "/", d, and "/images" to form a path.
    # list contains the paths to the "images" subdirectory of each disaster folder.
    disaster_paths = ([input_path + "/" +  d + "/images" for d in disasters])

    # list contains all the paths to the image files from different disaster folders.
    image_paths = []
    image_paths.extend([(disaster_path + "/" + pic) for pic in os.listdir(disaster_path)] for disaster_path in disaster_paths)

    # creates a single array that contains all the paths to the image files from different disaster folders,
    ## which can be easily iterated through later for further processing.
    img_paths = np.concatenate(image_paths)

    for img_path in tqdm(img_paths):

        #opens the images and creates image objects
        img_obj = Image.open(img_path)

        # converts image object into a numpy array. 
        ## he img_array variable now holds the pixel data of the image, 
        # represented as a 3-dimensional numpy array (height, width, channels).
        img_array = np.array(img_obj)

        # Get corresponding label for the current image
        ## This line constructs the path to the corresponding JSON label file based on the current img_path.
        ### replaces the extension 'png' with 'json' and replaces the 'images' subdirectory with 
        #### the 'labels' subdirectory in the path.
        label_path = img_path.replace('png', 'json').replace('images', 'labels')

        # opens the JSON label file specified by the label_path.
        label_file = open(label_path)
        
        # reads and parses the contents of the JSON label file
        ## The label_data variable now holds the JSON data containing 
        ### information about the building polygons and their properties.
        label_data = json.load(label_file)

        # feat = building polygon
        for feat in label_data['features']['xy']:

            # only images post-disaster will have damage type
            # extraction of the 'subtype' property from each feat (building polygon) in the JSON data.
            try:
                damage_type = feat['properties']['subtype']
            except: # pre-disaster damage is default no-damage
                damage_type = "no-damage"
                continue
            
            # constructs a unique identifier (poly_uuid) for the current building polygon based on the
            ## 'uid' property present in the feat dictionary.
            # It appends '.png' to the 'uid' to create a unique identifier for the cropped image later.
            poly_uuid = feat['properties']['uid'] + ".png" # a unique id for each polygon (building)

            # appends the numerical damage intensity label to the y_data list based on the damage_type
            # The damage_intensity_encoding dictionary maps damage types to numerical labels, as defined earlier in the code.
            y_data.append(damage_intensity_encoding[damage_type])

            # holds the Shapely geometry object representing the building polygon.
            polygon_geom = shapely.wkt.loads(feat['wkt'])

            # converts the corner points into a numpy array
            polygon_pts = np.array(list(polygon_geom.exterior.coords))

            # calls the process_img() function to crop the region of interest (building) from
            ## the original image img_array based on the extracted polygon_pts.
            ### Takes 80% more than the building area 
            poly_img = process_img(img_array, polygon_pts, 0.8)

            # This line saves the cropped image (poly_img) to the 'output_path' directory with a
            ## filename based on the unique identifier (poly_uuid).
            cv2.imwrite(output_path + "/" + poly_uuid, poly_img)

            # list will store the filenames of the cropped images, which can later be used to create the CSV file.
            x_data.append(poly_uuid)
    
    # creates the output path for the 'train.csv' file, which will store the image filenames
    ## and their corresponding labels for training the model.
    output_train_csv_path = os.path.join(output_csv_path, "train.csv")

    if(val_split_pct > 0):
       x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=val_split_pct)
       data_array_train = {'uuid': x_train, 'labels': y_train}
       data_array_test = {'uuid': x_test, 'labels': y_test}
       output_test_csv_path = os.path.join(output_csv_path, "test.csv")
       df_train = pd.DataFrame(data_array_train)
       df_test = pd.DataFrame(data_array_test)
       df_train.to_csv(output_train_csv_path)
       df_test.to_csv(output_test_csv_path)
    else: 
       data_array = {'uuid': x_data, 'labels': y_data}
       df = pd.DataFrame(data = data_array)
       df.to_csv(output_train_csv_path)
    

def main():

    parser = argparse.ArgumentParser(description='Run Building Damage Classification Training & Evaluation')
    parser.add_argument('--input_dir',
                        required=True,
                        metavar="/path/to/xBD_input",
                        help="Full path to the parent dataset directory")
    parser.add_argument('--output_dir',
                        required=True,
                        metavar='/path/to/xBD_output',
                        help="Path to new directory to save images")
    parser.add_argument('--output_dir_csv',
                        required=True,
                        metavar='/path/to/xBD_output_csv',
                        help="Path to new directory to save csv")
    parser.add_argument('--val_split_pct', 
                        required=False,
                        default=0.0,
                        metavar='Percentage to split validation',
                        help="Percentage to split ")
    args = parser.parse_args()

    logging.info("Started Processing for Data")
    process_data(args.input_dir, args.output_dir, args.output_dir_csv, float(args.val_split_pct))
    logging.info("Finished Processing Data")


if __name__ == '__main__':
    main()


## Mask_Polygons and Process_data combined

In [None]:
import json
from os import path, walk, makedirs
from sys import exit, stderr

from cv2 import fillPoly, imwrite
import numpy as np
from shapely import wkt
from shapely.geometry import mapping, Polygon
from skimage.io import imread
from tqdm import tqdm
import imantics 

# This removes the massive amount of scikit warnings of "low contrast images"
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

# --------------------------------------------------------------------------
#libs for process_data part
from PIL import Image
import time
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
import math
import random
import argparse
import logging
import json
import cv2
import datetime

import shapely.wkt
import shapely
from shapely.geometry import Polygon
from collections import defaultdict
from sklearn.model_selection import train_test_split
logging.basicConfig(level=logging.INFO)

# ---------------------------------------------------------------------------------
def get_dimensions(file_path):
    """
    :param file_path: The path of the file 
    :return: returns (width,height,channels)
    """
    # Open the image we are going to mask
    pil_img = imread(file_path)
    img = np.array(pil_img)
    w, h, c = img.shape
    return (w, h, c)


def mask_polygons_separately(size, shapes):
    """
    :param size: A tuple of the (width,height,channels)
    :param shapes: A list of points in the polygon from get_feature_info
    :returns: a dict of masked polygons with the shapes filled in from cv2.fillPoly
    """
    # For each WKT polygon, read the WKT format and fill the polygon as an image
    masked_polys = {}

    for u in shapes:
        sh = shapes[u]
        mask_img = np.zeros(size, np.uint8)
        i = fillPoly(mask_img, [sh], (255, 255, 255))
        masked_polys[u] = i

    return masked_polys

def mask_polygons_together(size, shapes):
    """
    :param size: A tuple of the (width,height,channels)
    :param shapes: A list of points in the polygon from get_feature_info
    :returns: A numpy array with the polygons filled 255s where there's a building and 0 where not 
    """
    # For each WKT polygon, read the WKT format and fill the polygon as an image
    mask_img = np.zeros(size, np.uint8)

    for u in shapes:
        blank =  np.zeros(size, np.uint8)
        poly = shapes[u] 
        fillPoly(blank, [poly], (1, 1, 1))
        mask_img += blank
    
    # Here we are taking the overlap (+=) and squashing it back to 0
    mask_img[mask_img > 1] = 0

    # Finally we are taking all 1s and making it pure white (255)
    mask_img[mask_img == 1] = 255

    return mask_img

def mask_polygons_together_with_border(size, shapes, border):
    """
    :param size: A tuple of the (width,height,channels)
    :param shapes: A list of points in the polygon from get_feature_info
    :returns: a dict of masked polygons with the shapes filled in from cv2.fillPoly
    """

    # For each WKT polygon, read the WKT format and fill the polygon as an image
    mask_img = np.zeros(size, np.uint8)

    for u in shapes:
        blank =  np.zeros(size, np.uint8)
        # Each polygon stored in shapes is a np.ndarray
        poly = shapes[u]
        
        # Creating a shapely polygon object out of the numpy array 
        polygon = Polygon(poly)

        # Getting the center points from the polygon and the polygon points
        (poly_center_x, poly_center_y) = polygon.centroid.coords[0]
        polygon_points = polygon.exterior.coords

        # Setting a new polygon with each X,Y manipulated based off the center point
        shrunk_polygon = []
        for (x,y) in polygon_points:
            if x < poly_center_x:
                x += border
            elif x > poly_center_x:
                x -= border

            if y < poly_center_y:
                y += border
            elif y > poly_center_y:
                y -= border

            shrunk_polygon.append([x,y])
        
        # Transforming the polygon back to a np.ndarray
        ns_poly = np.array(shrunk_polygon, np.int32)
  
        # Filling the shrunken polygon to add a border between close polygons
        fillPoly(blank, [ns_poly], (1, 1, 1))
        mask_img += blank
    
    mask_img[mask_img > 1] = 0
    mask_img[mask_img == 1] = 255
    return mask_img

def save_masks(masks, output_path, mask_file_name):
    """
    :param masks: dictionary of UID:masked polygons from mask_polygons_separately()
    :param output_path: path to save the masks
    :param mask_file_name: the file name the masks should have 
    """
    # For each filled polygon, write out a separate file, increasing the name
    for m in masks:
        final_out = path.join(output_path,
                              mask_file_name + '_{}.png'.format(m))
        imwrite(final_out, masks[m])

def save_one_mask(masks, output_path, mask_file_name):
    """
    :param masks: list of masked polygons from the mask_polygons_separately function 
    :param output_path: path to save the masks
    :param mask_file_name: the file name the masks should have 
    """
    # For each filled polygon, write the mask shape out to the file per image
    mask_file_name = path.join(output_path, mask_file_name + '.png')
    imwrite(mask_file_name, masks)
    

def read_json(json_path):
    """
    :param json_path: path to load json from
    :returns: a python dictionary of json features
    """
    annotations = json.load(open(json_path))
    return annotations


def get_feature_info(feature):
    """
    :param feature: a python dictionary of json labels
    :returns: a list mapping of polygons contained in the image 
    """
    # Getting each polygon points from the json file and adding it to a dictionary of uid:polygons
    props = {}

    for feat in feature['features']['xy']:
        feat_shape = wkt.loads(feat['wkt'])
        coords = list(mapping(feat_shape)['coordinates'][0])
        props[feat['properties']['uid']] = (np.array(coords, np.int32))

    return props


def mask_chips(json_path, images_directory, output_directory, single_file, border):
    """
    :param json_path: path to find multiple json files for the chips
    :param images_directory: path to the directory containing the images to be masked
    :param output_directory: path to the directory where masks are to be saved
    :param single_file: a boolean value to see if masks should be saved a single file or multiple
    """
    # For each feature in the json we will create a separate mask
    # Getting all files in the directory provided for jsons
    jsons = [j for j in next(walk(json_path))[2] if '_pre' in j]

    # After removing non-json items in dir (if any)
    for j in tqdm([j for j in jsons if j.endswith('json')],
                  unit='poly',
                  leave=False):
        # Our chips start off in life as PNGs
        chip_image_id = path.splitext(j)[0] + '.png'
        mask_file = path.splitext(j)[0]

        # Loading the per chip json
        j_full_path = path.join(json_path, j)
        chip_json = read_json(j_full_path)

        # Getting the full chip path, and loading the size dimensions
        chip_file = path.join(images_directory, chip_image_id)
        chip_size = get_dimensions(chip_file)

        # Reading in the polygons from the json file
        polys = get_feature_info(chip_json)

        # Getting a list of the polygons and saving masks as separate or single image files
        if len(polys) > 0:
            if single_file:
                if border > 0:
                    masked_polys = mask_polygons_together_with_border(chip_size, polys, border)
                else:
                    masked_polys = mask_polygons_together(chip_size, polys)
                save_one_mask(masked_polys, output_directory, mask_file)
            else:
                masked_polys = mask_polygons_separately(chip_size, polys)
                save_masks(masked_polys, output_directory, mask_file)


if __name__ == "__main__":
    import argparse

    # Parse command line arguments
    parser = argparse.ArgumentParser(
        description=
        """mask_polygons.py: Takes in xBD dataset and masks polygons in the image\n\n
        WARNING: This could lead to hundreds of output images per input\n""")

    parser.add_argument('--input',
                        required=True,
                        metavar="/path/to/xBD/",
                        help='Path to parent dataset directory "xBD"')
    parser.add_argument('--single-file', 
                        action='store_true',
                        help='use to save all masked polygon instances to a single file rather than one polygon per mask file')
    parser.add_argument('--border',
                        default=0,
                        type=int,
                        metavar="positive integer for pixel border (e.g. 1)",
                        help='Positive integer used to shrink the polygon by')

    args = parser.parse_args()

    # Getting the list of the disaster types under the xBD directory
    disasters = next(walk(args.input))[1]

    for disaster in tqdm(disasters, desc='Masking', unit='disaster'):
        # Create the full path to the images, labels, and mask output directories
        image_dir = path.join(args.input, disaster, 'images')
        json_dir = path.join(args.input, disaster, 'labels')
        output_dir = path.join(args.input, disaster, 'masks')

        if not path.isdir(image_dir):
            print(
                "Error, could not find image files in {}.\n\n"
                .format(image_dir),
                file=stderr)
            exit(2)

        if not path.isdir(json_dir):
            print(
                "Error, could not find labels in {}.\n\n"
                .format(json_dir),
                file=stderr)
            exit(3)

        if not path.isdir(output_dir):
            makedirs(output_dir)

        mask_chips(json_dir, image_dir, output_dir, args.single_file, args.border)

## Damage_Classification.py

### Model.py called in Damage_Classification.py

In [None]:
from PIL import Image
import time
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
import math
import random
import argparse
import logging
import json
import cv2
import datetime

from sklearn.metrics import f1_score
from sklearn.utils.class_weight import compute_class_weight
import shapely.wkt
import shapely
from shapely.geometry import Polygon
from collections import defaultdict

import tensorflow as tf
import keras
import ast
from keras import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Add, Input, Concatenate
from keras.models import Model
from keras.applications.resnet50 import ResNet50
from keras import backend as K


###
# Loss function for ordinal loss from https://github.com/JHart96/keras_ordinal_categorical_crossentropy
###
def ordinal_loss(y_true, y_pred):
    weights = K.cast(K.abs(K.argmax(y_true, axis=1) - K.argmax(y_pred, axis=1))/(K.int_shape(y_pred)[1] - 1), dtype='float32')
    return (1.0 + weights) * keras.losses.categorical_crossentropy(y_true, y_pred )


###
# Generate a simple CNN
###
def generate_xBD_baseline_model():
  weights = 'imagenet'
  inputs = Input(shape=(128, 128, 3))

  base_model = ResNet50(include_top=False, weights=weights, input_shape=(128, 128, 3))

  for layer in base_model.layers:
    layer.trainable = False

  x = Conv2D(32, (5, 5), strides=(1, 1), padding='same', activation='relu', input_shape=(128, 128, 3))(inputs)
  x = MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid', data_format=None)(x)

  x = Conv2D(64, (3, 3), strides=(1, 1), padding='same', activation='relu')(x)
  x = MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid', data_format=None)(x)

  x = Conv2D(64, (3, 3), strides=(1, 1), padding='same', activation='relu')(x)
  x = MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid', data_format=None)(x)

  x = Flatten()(x)

  base_resnet = base_model(inputs)
  base_resnet = Flatten()(base_resnet)

  concated_layers = Concatenate()([x, base_resnet])

  concated_layers = Dense(2024, activation='relu')(concated_layers)
  concated_layers = Dense(524, activation='relu')(concated_layers)
  concated_layers = Dense(124, activation='relu')(concated_layers)
  output = Dense(4, activation='relu')(concated_layers)

  model = Model(inputs=inputs, outputs=output)
  return model


### Original Damage_Classification.py

In [None]:
from PIL import Image
import time
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
import math
import random
import argparse
import logging
import json
import cv2
import datetime

from sklearn.metrics import f1_score
from sklearn.utils.class_weight import compute_class_weight
import shapely.wkt
import shapely
from shapely.geometry import Polygon
from collections import defaultdict

import tensorflow as tf
import keras
import ast
from keras import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Add, Input, Concatenate
from keras.models import Model
from keras.applications.resnet50 import ResNet50
from keras import backend as K

from model import *

logging.basicConfig(level=logging.INFO)

# Configurations
NUM_WORKERS = 4 
NUM_CLASSES = 4
BATCH_SIZE = 64
NUM_EPOCHS = 100 
LEARNING_RATE = 0.0001
RANDOM_SEED = 123
LOG_STEP = 150
LOG_DIR = '/path/to/logs' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

damage_intensity_encoding = dict()
damage_intensity_encoding[3] = '3'
damage_intensity_encoding[2] = '2' 
damage_intensity_encoding[1] = '1' 
damage_intensity_encoding[0] = '0' 


###
# Function to compute unweighted f1 scores, just for reference
###
def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision


    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))


###
# Creates data generator for validation set
###
def validation_generator(test_csv, test_dir):
    df = pd.read_csv(test_csv)
    df = df.replace({"labels" : damage_intensity_encoding })

    gen = keras.preprocessing.image.ImageDataGenerator(
                             rescale=1/255.)


    return gen.flow_from_dataframe(dataframe=df,
                                   directory=test_dir,
                                   x_col='uuid',
                                   y_col='labels',
                                   batch_size=BATCH_SIZE,
                                   shuffle=False,
                                   seed=RANDOM_SEED,
                                   class_mode="categorical",
                                   target_size=(128, 128))


###
# Applies random transformations to training data
###
def augment_data(df, in_dir):

    df = df.replace({"labels" : damage_intensity_encoding })
    gen = keras.preprocessing.image.ImageDataGenerator(horizontal_flip=True,
                             vertical_flip=True,
                             width_shift_range=0.1,
                             height_shift_range=0.1,
                             rescale=1/255.)
    return gen.flow_from_dataframe(dataframe=df,
                                   directory=in_dir,
                                   x_col='uuid',
                                   y_col='labels',
                                   batch_size=BATCH_SIZE,
                                   seed=RANDOM_SEED,
                                   class_mode="categorical",
                                   target_size=(128, 128))


# Run training and evaluation based on existing or new model
def train_model(train_data, train_csv, test_data, test_csv, model_in, model_out):

    model = generate_xBD_baseline_model()

    # Add model weights if provided by user
    if model_in is not None:
        model.load_weights(model_in)

    df = pd.read_csv(train_csv)
    class_weights = compute_class_weight('balanced', np.unique(df['labels'].to_list()), df['labels'].to_list());
    d_class_weights = dict(enumerate(class_weights))

    samples = df['uuid'].count()
    steps = np.ceil(samples/BATCH_SIZE)

    # Augments the training data
    train_gen_flow = augment_data(df, train_data)

    #Set up tensorboard logging
    tensorboard_callbacks = keras.callbacks.TensorBoard(log_dir=LOG_DIR,
                                                        batch_size=BATCH_SIZE)

    
    #Filepath to save model weights
    filepath = model_out + "-saved-model-{epoch:02d}-{accuracy:.2f}.hdf5"
    checkpoints = keras.callbacks.ModelCheckpoint(filepath,
                                                    monitor=['loss', 'accuracy'],
                                                    verbose=1,
                                                    save_best_only=False,
                                                    mode='max')

    #Adds adam optimizer
    adam = keras.optimizers.Adam(lr=LEARNING_RATE,
                                    beta_1=0.9,
                                    beta_2=0.999,
                                    decay=0.0,
                                    amsgrad=False)


    model.compile(loss=ordinal_loss, optimizer=adam, metrics=['accuracy', f1])

    #Training begins
    model.fit_generator(generator=train_gen_flow,
                        steps_per_epoch=steps,
                        epochs=NUM_EPOCHS,
                        workers=NUM_WORKERS,
                        use_multiprocessing=True,
                        class_weight=d_class_weights,
                        callbacks=[tensorboard_callbacks, checkpoints],
                        verbose=1)


    #Evalulate f1 weighted scores on validation set
    validation_gen = validation_generator(test_csv, test_data)
    predictions = model.predict(validation_gen)

    val_trues = validation_gen.classes
    val_pred = np.argmax(predictions, axis=-1)

    f1_weighted = f1_score(val_trues, val_pred, average='weighted')
    print(f1_weighted)


def main():

    parser = argparse.ArgumentParser(description='Run Building Damage Classification Training & Evaluation')
    parser.add_argument('--train_data',
                        required=True,
                        metavar="/path/to/xBD_train",
                        help="Full path to the train data directory")
    parser.add_argument('--train_csv',
                        required=True,
                        metavar="/path/to/xBD_split",
                        help="Full path to the train csv")
    parser.add_argument('--test_data',
                        required=True,
                        metavar="/path/to/xBD_test",
                        help="Full path to the test data directory")
    parser.add_argument('--test_csv',
                        required=True,
                        metavar="/path/to/xBD_split",
                        help="Full path to the test csv")
    parser.add_argument('--model_in',
                        default=None,
                        metavar='/path/to/input_model',
                        help="Path to save model")
    parser.add_argument('--model_out',
                        required=True,
                        metavar='/path/to/save_model',
                        help="Path to save model")

    args = parser.parse_args()

    train_model(args.train_data, args.train_csv, args.test_data, args.test_csv, args.model_in, args.model_out)


if __name__ == '__main__':
    main()


## Submission_to_overlay_polys.py

In [None]:
from PIL import Image, ImageDraw
import rasterio.features
import shapely.geometry
import numpy as np

def save_img(path_to_image, path_to_localization, path_to_damage, path_to_output): 
    no_damage_polygons = []
    minor_damage_polygons = []
    major_damage_polygons = []
    destroyed_polygons = []

    # Load the challenge output localization image
    localization = Image.open(path_to_localization)
    loc_arr = np.array(localization)

    # If the localization has damage values convert all non-zero to 1
    # This helps us find where buildings are, and then use the damage file
    # to get the value of the classified damage
    loc_arr = (loc_arr >= 1).astype(np.uint8)

    # Load the challenge output damage image
    damage = Image.open(path_to_damage)
    dmg_arr = np.array(damage)

    # Use the localization to get damage only were they have detected buildings
    mask_arr = dmg_arr*loc_arr
    
    # Get the value of each index put into a dictionary like structure
    shapes = rasterio.features.shapes(mask_arr)
    
    # Iterate through the unique values of the shape files 
    # This is a destructive iterator or else we'd use the pythonic for x in shapes if x blah 
    for shape in shapes:
        if shape[1] == 1:
            no_damage_polygons.append(shapely.geometry.Polygon(shape[0]["coordinates"][0]))
        elif shape[1] == 2:
            minor_damage_polygons.append(shapely.geometry.Polygon(shape[0]["coordinates"][0]))
        elif shape[1] == 3:
            major_damage_polygons.append(shapely.geometry.Polygon(shape[0]["coordinates"][0]))
        elif shape[1] == 4:
            destroyed_polygons.append(shapely.geometry.Polygon(shape[0]["coordinates"][0]))
        elif shape[1] == 0:
            continue
        else:
            print("Found non-conforming damage type: {}".format(shape[1]))
    
    # Loading post image
    img = Image.open(path_to_image) 
    
    draw = ImageDraw.Draw(img, 'RGBA')
    
    damage_dict = {
        "no-damage": (0, 255, 0, 100),
        "minor-damage": (0, 0, 255, 125),
        "major-damage": (255, 69, 0, 125),
        "destroyed": (255, 0, 0, 125),
        "un-classified": (255, 255, 255, 125)
    }
    
    # Go through each list and write it to the post image we just loaded
    for polygon in no_damage_polygons:
        x,y = polygon.exterior.coords.xy
        coords = list(zip(x,y))
        draw.polygon(coords, damage_dict["no-damage"])

    for polygon in minor_damage_polygons:
        x,y = polygon.exterior.coords.xy
        coords = list(zip(x,y))
        draw.polygon(coords, damage_dict["minor-damage"])

    for polygon in major_damage_polygons:
        x,y = polygon.exterior.coords.xy
        coords = list(zip(x,y))
        draw.polygon(coords, damage_dict["major-damage"])

    for polygon in destroyed_polygons:
        x,y = polygon.exterior.coords.xy
        coords = list(zip(x,y))
        draw.polygon(coords, damage_dict["destroyed"])

    img.save(path_to_output)

if __name__ == "__main__":
    import argparse

    # Parse command line arguments
    parser = argparse.ArgumentParser(
        description=
        """submission_to_overlay_polys.py: takes the submission file and overlays the predicted values over the image and saves the image with the polygons overtop to a new file"""
    )
    parser.add_argument('--image',
                        required=True,
                        metavar='/path/to/post_disaster.png',
                        help="Full path to the image to use to overlay the predictions onto")
    parser.add_argument('--damage',
                        required=True,
                        metavar='/path/to/challenge/prediction_damage.png',
                        help="Full path to the prediction json output from model"
    )
    parser.add_argument('--localization',
                        required=True,
                        metavar='/path/to/challenge/prediction_damage.png',
                        help="Full path to the prediction json output from model"
    )
    parser.add_argument('--output',
                        required=True,
                        metavar='/path/to/save/img_with_overlays.png',
                        help="Full path to save the final single output file to (include filename.png)"
    )

    args = parser.parse_args()

    # run main function 
    save_img(args.image, args.localization, args.damage, args.output)



## chatgpt

In [1]:
import os
import json
from PIL import Image, ImageDraw
import numpy as np

In [17]:
%pip install shapely

Note: you may need to restart the kernel to use updated packages.


In [2]:
from shapely import wkt
from shapely.geometry import mapping, Polygon

In [30]:

# This part needs to be debugged
def get_coords(feature):
    """
    :param feature: a python dictionary of json labels
    :returns: a list mapping of polygons contained in the image 
    """
    # Getting each polygon points from the json file and adding it to a dictionary of uid:polygons
    props = {}

    for feat in feature['features']['xy']:
        feat_shape = wkt.loads(feat['wkt'])
        coords = list(mapping(feat_shape)['coordinates'])
        print(coords)
    return coords

def read_post_disaster_json(json_file):
    with open(json_file, 'r') as f:
        data = json.load(f)
    polygons = data['features']['xy']
    #print(polygons)
    #coordinates = [polygon['wkt'] for polygon in polygons]

    coordinates = get_coords(data)
    #coordinates=[get_coords(d['features']['xy']) for d in data]
    #print(type(coordinates))
    #print(coordinates)
    labels = [polygon['properties']['subtype'] for polygon in polygons]
    print(len(coordinates), len(labels))
    #print(labels)
    return coordinates, labels

In [17]:
def create_polygon_mask(image_size, coordinates):
    mask = Image.new('1', image_size, 0)
    draw = ImageDraw.Draw(mask)
    draw.polygon(coordinates, outline=1, fill=1)
    return np.array(mask)

In [18]:
def create_class_label_mask(image_size, coordinates, labels):
    mask = Image.new('L', image_size, 0)
    draw = ImageDraw.Draw(mask)
    for coord, label in zip(coordinates, labels):
        class_label = 0  # Default to no damage
        if label == 'minor-damage':
            class_label = 1
        elif label == 'major-damage':
            class_label = 2
        elif label == 'destroyed':
            class_label = 3
        draw.polygon(coord, outline=class_label, fill=class_label)
    return np.array(mask)

In [6]:
post_disaster_folder = '/Users/sevincjakab/neuefische_bootcamp/20230717-NewRepo-Capstone-Building_Damage/Capstone_Building_Damage/data/subset/pre_post_separate_subset_sev/post_dis_files/post_dis_labels'
images_folder = '/Users/sevincjakab/neuefische_bootcamp/20230717-NewRepo-Capstone-Building_Damage/Capstone_Building_Damage/data/subset/pre_post_separate_subset_sev/post_dis_files/post_dis_images'
output_folder = '/Users/sevincjakab/neuefische_bootcamp/20230717-NewRepo-Capstone-Building_Damage/Capstone_Building_Damage/data/subset/pre_post_separate_subset_sev-output'  # Folder to save the masked images and masks

In [None]:
#%pip install pillow

In [7]:
from PIL import Image
import numpy as np

In [31]:
# List all JSON files in the 'post_disaster' folder
post_disaster_json_files = [file for file in sorted(os.listdir(post_disaster_folder)) if file.endswith('.json')]
print(post_disaster_json_files)

for post_disaster_json_file in post_disaster_json_files:
    # Extract image file name from JSON file name (assuming a naming convention is followed)
    image_file_name = os.path.splitext(post_disaster_json_file)[0] + '.png'  
    print(image_file_name)

    #constructs the path to the corresponding image file for the current JSON file.
    image_file_path = os.path.join(images_folder, image_file_name)
    print(image_file_path)

    #constructs the path to the current JSON file being processed in the loop.
    json_file_path = os.path.join(post_disaster_folder, post_disaster_json_file)
    print(json_file_path)
    
    # Read JSON and process masks and class label masks
    ## reads the JSON file and returns the coordinates and labels associated with the housing polygons.
    coordinates, labels = read_post_disaster_json(json_file_path)

    #loads the image and stores it in the variable image.
    image = Image.open(image_file_path)
    print(image)
    #The function creates a binary mask with pixel values set to 1 inside the housing polygons and 0 outside the polygons.
    mask = create_polygon_mask(image.size, coordinates)
    print(mask)
    #The function creates a mask where each pixel is assigned a class label based on the housing type 
    ## (no damage, mine damage, major damage, destroyed).
    print(coordinates)
    class_mask = create_class_label_mask(image.size, coordinates, labels)

    # Save the masked images and masks to the output folder
    ## This line saves the original image with the housing polygons masked into the output_folder.
    image.save(os.path.join(output_folder, image_file_name.replace('.png', '_masked.png')))

    #This line saves the mask (binary mask with housing polygons) as a numpy array in the output_folder
    np.save(os.path.join(output_folder, image_file_name.replace('.png', '_mask.npy')), mask)

    #This line saves the class_mask (mask with class labels) as a numpy array in the output_folder. 
    np.save(os.path.join(output_folder, image_file_name.replace('.png', '_class_mask.npy')), class_mask)


['mexico-earthquake_00000005_post_disaster.json', 'mexico-earthquake_00000006_post_disaster.json']
mexico-earthquake_00000005_post_disaster.png
/Users/sevincjakab/neuefische_bootcamp/20230717-NewRepo-Capstone-Building_Damage/Capstone_Building_Damage/data/subset/pre_post_separate_subset_sev/post_dis_files/post_dis_images/mexico-earthquake_00000005_post_disaster.png
/Users/sevincjakab/neuefische_bootcamp/20230717-NewRepo-Capstone-Building_Damage/Capstone_Building_Damage/data/subset/pre_post_separate_subset_sev/post_dis_files/post_dis_labels/mexico-earthquake_00000005_post_disaster.json
[((551.7649554532602, 849.708612914271), (570.5961304464547, 840.3695749093819), (572.4333182532723, 841.1350698282885), (603.5543322001602, 903.424945247882), (603.333402509672, 904.6032369115727), (582.566011974031, 914.7660024941968), (581.1667906251415, 914.5450728061128), (549.5605211417059, 850.8455171109238), (551.7649554532602, 849.708612914271))]
[((658.8812678039307, 986.0025926553803), (645.3261

ValueError: incorrect coordinate type