In [12]:
from PIL import Image
import time
import numpy as np
!pip install pandas
import pandas as pd
!pip install tqdm
from tqdm import tqdm
import sys,os
import math
import random
import argparse
import logging
import json
!pip install opencv-python
import cv2
import datetime

!pip install shapely
import shapely.wkt
import shapely
from shapely.geometry import Polygon
from collections import defaultdict
from sklearn.model_selection import train_test_split

from sklearn.metrics import f1_score
from sklearn.utils.class_weight import compute_class_weight
import shapely.wkt
import shapely
from shapely.geometry import Polygon
from collections import defaultdict

import tensorflow as tf
import keras
import ast
from keras import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Add, Input, Concatenate
from keras.models import Model
from tensorflow.keras.applications.resnet50 import ResNet50
from keras import backend as K



#### Process Data

In [3]:
# Configurations
NUM_WORKERS = 4
NUM_CLASSES = 4
BATCH_SIZE = 16 #64
NUM_EPOCHS = 10 #120
LEARNING_RATE = 0.0001
RANDOM_SEED = 123
LOG_STEP = 150

damage_intensity_encoding = defaultdict(lambda: 0)
damage_intensity_encoding['destroyed'] = 3
damage_intensity_encoding['major-damage'] = 2
damage_intensity_encoding['minor-damage'] = 1
damage_intensity_encoding['no-damage'] = 0

In [4]:
def process_img(img_array, polygon_pts, scale_pct):
    height, width, _ = img_array.shape

    xcoords = polygon_pts[:, 0]
    ycoords = polygon_pts[:, 1]
    xmin, xmax = np.min(xcoords), np.max(xcoords)
    ymin, ymax = np.min(ycoords), np.max(ycoords)

    xdiff = xmax - xmin
    ydiff = ymax - ymin

    #Extend image by scale percentage
    xmin = max(int(xmin - (xdiff * scale_pct)), 0)
    xmax = min(int(xmax + (xdiff * scale_pct)), width)
    ymin = max(int(ymin - (ydiff * scale_pct)), 0)
    ymax = min(int(ymax + (ydiff * scale_pct)), height)

    return img_array[ymin:ymax, xmin:xmax, :]

In [5]:
def process_data(input_path, output_path, output_csv_path, val_split_pct):
    x_data = []
    y_data = []

    disasters = [folder for folder in os.listdir(input_path) if not folder.startswith('.')]
    disaster_paths = ([input_path + f"\\{d}\\images" for d in disasters])
    image_paths = []
    image_paths.extend([(disaster_path + "\\" + pic) for pic in os.listdir(disaster_path)] for disaster_path in disaster_paths)
    img_paths = np.concatenate(image_paths)
    
    
    for img_path in tqdm(img_paths):

        if os.path.join(os.path.dirname(os.path.dirname(img_path)),"labels") != os.getcwd():
            os.chdir(os.path.join(os.path.dirname(os.path.dirname(img_path)),"labels"))
        
        img_obj = Image.open(img_path)
        img_array = np.array(img_obj)
        basename = os.path.basename(img_path)
        
        #Get corresponding label for the current image
        label_path = basename.replace('png', 'json')
        label_file = open(label_path)
        label_data = json.load(label_file)

        for feat in label_data['features']['xy']:

            # only images post-disaster will have damage type
            try:
                damage_type = feat['properties']['subtype']
            except: # pre-disaster damage is default no-damage
                damage_type = "no-damage"
                continue

            poly_uuid = feat['properties']['uid'] + ".png"

            y_data.append(damage_intensity_encoding[damage_type])

            polygon_geom = shapely.wkt.loads(feat['wkt'])
            polygon_pts = np.array(list(polygon_geom.exterior.coords))
            poly_img = process_img(img_array, polygon_pts, 0.8)
            cv2.imwrite(output_path + "/" + poly_uuid, poly_img)
            x_data.append(poly_uuid)
    
    output_train_csv_path = os.path.join(output_csv_path, "train.csv")

    if(val_split_pct > 0):
        x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=val_split_pct)
        data_array_train = {'uuid': x_train, 'labels': y_train}
        data_array_test = {'uuid': x_test, 'labels': y_test}
        output_test_csv_path = os.path.join(output_csv_path, "test.csv")
        df_train = pd.DataFrame(data_array_train)
        df_test = pd.DataFrame(data_array_test)
        df_train.to_csv(output_train_csv_path)
        df_test.to_csv(output_test_csv_path)
    else: 
        data_array = {'uuid': x_data, 'labels': y_data}
        df = pd.DataFrame(data = data_array)
        df.to_csv(output_train_csv_path)

In [14]:
input_dir = r"C:\Users\namacdon\Desktop\delete\xView2\train_images_labels_targets\disasters"
output_dir = r"C:\Users\namacdon\Desktop\delete\xView2\train_images_labels_targets\output"
output_dir_csv = r"C:\Users\namacdon\Desktop\delete\xView2\train_images_labels_targets\output\output.csv"
if not os.path.isdir(output_dir):
    os.mkdir(output_dir)
else:
    print("Output Path Already Exists")
    sys.exit(1)
if not os.path.exists(output_dir_csv):
    with open(output_dir_csv, 'w') as my_new_csv_file:
        pass
else:
    print("Output CSV Path Already Exists")
    sys.exit(1)
val_split_pct = 0.0

process_data(input_dir, output_dir, output_dir_csv, float(val_split_pct))

#### Define Model

In [None]:
def ordinal_loss(y_true, y_pred):
    weights = K.cast(K.abs(K.argmax(y_true, axis=1) - K.argmax(y_pred, axis=1))/(K.int_shape(y_pred)[1] - 1), dtype='float32')
    return (1.0 + weights) * keras.losses.categorical_crossentropy(y_true, y_pred )

In [None]:
def generate_xBD_baseline_model():
    weights = None
    inputs = Input(shape=(128, 128, 3))

    base_model = ResNet50(include_top=False, weights=weights, input_shape=(128, 128, 3))

    for layer in base_model.layers:
        layer.trainable = False

    x = Conv2D(32, (5, 5), strides=(1, 1), padding='same', activation='relu', input_shape=(128, 128, 3))(inputs)
    x = MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid', data_format=None)(x)

    x = Conv2D(64, (3, 3), strides=(1, 1), padding='same', activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid', data_format=None)(x)

    x = Conv2D(64, (3, 3), strides=(1, 1), padding='same', activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid', data_format=None)(x)

    x = Flatten()(x)

    base_resnet = base_model(inputs)
    base_resnet = Flatten()(base_resnet)

    concated_layers = Concatenate()([x, base_resnet])

    concated_layers = Dense(2024, activation='relu')(concated_layers)
    concated_layers = Dense(524, activation='relu')(concated_layers)
    concated_layers = Dense(124, activation='relu')(concated_layers)
    output = Dense(4, activation='relu')(concated_layers)

    model = Model(inputs=inputs, outputs=output)
    return model

In [None]:
model = generate_xBD_baseline_model()
model.summary()

In [None]:
def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision


    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
def validation_generator(test_csv, test_dir):
    df = pd.read_csv(test_csv)
    df = df.replace({"labels" : damage_intensity_encoding })

    gen = keras.preprocessing.image.ImageDataGenerator(
                             rescale=1/255.)


    return gen.flow_from_dataframe(dataframe=df,
                                   directory=test_dir,
                                   x_col='uuid',
                                   y_col='labels',
                                   batch_size=BATCH_SIZE,
                                   shuffle=False,
                                   seed=RANDOM_SEED,
                                   class_mode="categorical",
                                   target_size=(128, 128))

In [None]:
def augment_data(df, in_dir):

    df = df.replace({"labels" : damage_intensity_encoding })
    gen = keras.preprocessing.image.ImageDataGenerator(horizontal_flip=True,
                             vertical_flip=True,
                             width_shift_range=0.1,
                             height_shift_range=0.1,
                             rescale=1/255.)
    return gen.flow_from_dataframe(dataframe=df,
                                   directory=in_dir,
                                   x_col='uuid',
                                   y_col='labels',
                                   batch_size=BATCH_SIZE,
                                   seed=RANDOM_SEED,
                                   class_mode="categorical",
                                   target_size=(128, 128))

In [None]:
def train_model(train_data, train_csv, test_data, test_csv, model_in, model_out):

    model = generate_xBD_baseline_model()

    # Add model weights if provided by user
    if model_in is not None:
        model.load_weights(model_in)

    df = pd.read_csv(train_csv)
    class_weights = compute_class_weight('balanced', np.unique(df['labels'].to_list()), df['labels'].to_list());
    d_class_weights = dict(enumerate(class_weights))

    samples = df['uuid'].count()
    steps = np.ceil(samples/BATCH_SIZE)

    # Augments the training data
    train_gen_flow = augment_data(df, train_data)

    #Set up tensorboard logging
    tensorboard_callbacks = keras.callbacks.TensorBoard(log_dir=LOG_DIR,
                                                        batch_size=BATCH_SIZE)

    
    #Filepath to save model weights
    filepath = model_out + "-saved-model-{epoch:02d}-{accuracy:.2f}.hdf5"
    checkpoints = keras.callbacks.ModelCheckpoint(filepath,
                                                    monitor=['loss', 'accuracy'],
                                                    verbose=1,
                                                    save_best_only=False,
                                                    mode='max')

    #Adds adam optimizer
    adam = keras.optimizers.Adam(lr=LEARNING_RATE,
                                    beta_1=0.9,
                                    beta_2=0.999,
                                    decay=0.0,
                                    amsgrad=False)


    model.compile(loss=ordinal_loss, optimizer=adam, metrics=['accuracy', f1])

    #Training begins
    model.fit_generator(generator=train_gen_flow,
                        steps_per_epoch=steps,
                        epochs=NUM_EPOCHS,
                        workers=NUM_WORKERS,
                        use_multiprocessing=True,
                        class_weight=d_class_weights,
                        callbacks=[tensorboard_callbacks, checkpoints],
                        verbose=1)


    #Evalulate f1 weighted scores on validation set
    validation_gen = validation_generator(test_csv, test_data)
    predictions = model.predict(validation_gen)

    val_trues = validation_gen.classes
    val_pred = np.argmax(predictions, axis=-1)

    f1_weighted = f1_score(val_trues, val_pred, average='weighted')
    print(f1_weighted)