# Setup
Before running, make sure the following are uploaded to the root:
* all_labels.json (Sentinel 2 labels)
* nwpu_lakes_30samples.json
* nwpu_lakes_20samplesA.json
* nwpu_lakes_20samplesB.json

In [None]:
#colab = 0
colab = 1

if colab == 1:
    %tensorflow_version 2.x
    !pip install rasterio
    #!pip install --default-timeout=1000 tensorflow-gpu==2.0

In [None]:
import tensorflow as tf
tf.__version__

In [None]:
tf.config.list_physical_devices('GPU')

# Datasets

## Download
incl. Sentinel 2  images and masks, and NWPU images.

In [None]:
import requests

def download_file_from_google_drive(id, destination):
    def get_confirm_token(response):
        for key, value in response.cookies.items():
            if key.startswith('download_warning'):
                return value
        
        return None
    
    def save_response_content(response, destination):
        CHUNK_SIZE = 32768

        with open(destination, 'wb') as f:
            for chunk in response.iter_content(CHUNK_SIZE):
                if chunk: # Filter out keep-alive new chunks
                    f.write(chunk)
    
    URL ='https://docs.google.com/uc?export=download'

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)
    
    save_response_content(response, destination)

In [None]:
import zipfile

def unzip(f):
    with zipfile.ZipFile(f, 'r') as zip_ref:
        zip_ref.extractall()

In [None]:
nwpu_imagery_id = '14kkcuU6wd9UMvjaDrg3PNI-e_voCi8HL'
nwpu_imagery_zip = 'NWPU_imagery.zip'

s2_imagery_id = '1iMfIjr_ul49Ghs2ewazjCt8HMPfhY47h'
s2_imagery_zip = 's2cloudless_imagery.zip'

s2_label_imagery_id = '1c7MpwKVejoUuW9F2UaF_vps8Vq2RZRfR'
s2_label_imagery_zip = 's2cloudless_label_imagery.zip'

In [None]:
download_file_from_google_drive(nwpu_imagery_id, nwpu_imagery_zip)
download_file_from_google_drive(s2_imagery_id, s2_imagery_zip)
download_file_from_google_drive(s2_label_imagery_id, s2_label_imagery_zip)

unzip(nwpu_imagery_zip)
unzip(s2_imagery_zip)
unzip(s2_label_imagery_zip)

## Clean up non-lake NWPU imagery

In [None]:
import os, shutil

try:
    os.rename('images', 'nwpu_imagery')
except:
    pass

subdirs = [x[0] for x in os.walk('nwpu_imagery')][1:]
to_delete = [s for s in subdirs if 'lake' not in s]
for k in to_delete:
    shutil.rmtree(k, ignore_errors = True)
os.rename('nwpu_imagery' + os.sep + 'lake', 'nwpu_imagery' + os.sep + 'data')

## Clean up .zips

In [None]:
os.remove(nwpu_imagery_zip)
os.remove(s2_imagery_zip)
os.remove(s2_label_imagery_zip)

## Generate NWPU masks

In [None]:
import matplotlib
from PIL import Image, ImageDraw
import numpy as np

def write_mask(data, image_paths, images, i):
    def get_data(data):
        X = []
        Y = []

        for k in data['regions']:
            X.append(data['regions'][k]['shape_attributes']['all_points_x'])
            Y.append(data['regions'][k]['shape_attributes']['all_points_y'])
        
        return Y, X # Image coords flipped relative to JSON coords
    
    X, Y = get_data(data[image_paths[i]])
    nx, ny, nz = np.shape(images[i])
    mask = np.zeros((ny, nx))

    for x, y in zip(X, Y):
        polygon = np.vstack((x, y)).reshape((-1,), order = 'F').tolist()

        if nx > ny or ny > nx:
            x, y = y, x
            img = Image.new('L', (ny, nx), 0)
        else:
            img = Image.new('L', (nx, ny), 0)
        
        ImageDraw.Draw(img).polygon(polygon, outline = 1, fill = 1)

        m = np.flipud(np.rot90(np.array(img)))
        try:
            mask = mask + m
        except:
            mask = mask + m.T

    matplotlib.image.imsave('nwpu_label_imagery' + os.sep + 'data' + os.sep + image_paths[i] + '_mask.jpg', mask.astype('uint8'))


In [None]:
os.mkdir('nwpu_label_imagery')
os.mkdir('nwpu_label_imagery' + os.sep + 'data')

In [None]:
import json

data = []
json_file = 'nwpu_lakes_30samples.json'
data.append(json.load(open(json_file)))
json_file = 'nwpu_lakes_20samplesA.json'
data.append(json.load(open(json_file)))
json_file = 'nwpu_lakes_20samplesB.json'
data.append(json.load(open(json_file)))

data_merged = {}
for d in data:
    data_merged.update(d)

nwpu_image_paths = sorted(data_merged.keys())

In [None]:
nwpu_dir_path = 'nwpu_imagery' + os.sep + 'data'
nwpu_all_image_paths = [f for f in os.listdir(nwpu_dir_path) if os.path.isfile(os.path.join(nwpu_dir_path, f))]

for path in nwpu_all_image_paths:
    if path not in nwpu_image_paths:
        os.remove(os.path.join(nwpu_dir_path, path))

In [None]:
import rasterio

nwpu_images = []
for path in nwpu_image_paths:
    with rasterio.open('nwpu_imagery' + os.sep + 'data' + os.sep + path) as dataset:
        nwpu_images.append(dataset.read().T)

In [None]:
for i in range(len(nwpu_image_paths)):
    write_mask(data_merged, nwpu_image_paths, nwpu_images, i)

## Image batch generator

In [None]:
def image_batch_generator(img_dir, mask_dir, files, batch_size = 32, sz = (512, 512)):
    while True: # loop infinitely, as long as called by training function
        batch = np.random.choice(files, size = batch_size)

        batch_x = []
        batch_y = []

        for f in batch:
            raw_path = f'{img_dir}/data/{f}'
            raw = Image.open(raw_path)
            raw = raw.resize(sz)
            raw = np.array(raw)

            # Make RGB from...
            if len(raw.shape) == 2: # grayscale
                raw = np.stack((raw,) * 3, axis = -1)
            else: # RGBA
                raw = raw[:, :, 0:3]
            
            # Crop image square
            nx, ny, nz = np.shape(raw)
            n = np.minimum(nx, ny)
            raw = raw[:n, :n, :]

            batch_x.append(raw)

            # Get masks
            mask_path = f'{mask_dir}/data/{f}_mask.jpg'
            mask = Image.open(mask_path)
            mask = np.max(np.array(mask.resize(sz)), axis = 2) # Flatten to 2D
            mask = (mask > 100).astype('int') # Water pixels > 100 in mask

            mask = mask[:n, :n]

            batch_y.append(mask)
        
        # Preprocess batch
        batch_x = np.array(batch_x) / 255 # Normalize to [0, 1]
        batch_y = np.array(batch_y)
        batch_y = np.expand_dims(batch_y, 3) # Add singleton dimension

        yield (batch_x, batch_y) # Yield images and masks together

## Augmentations

### Make generators

In [None]:
SEED = 111
batch_size = 1

datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    featurewise_center = False,
    featurewise_std_normalization = False,
    shear_range = 0,
    zoom_range = 0.2,
    rotation_range = 45,
    horizontal_flip = True)

s2_image_generator = datagen.flow_from_directory(
    's2cloudless_imagery',
    target_size = (512, 512),
    batch_size = batch_size,
    class_mode = None,
    seed = SEED,
    shuffle = False)

s2_mask_generator = datagen.flow_from_directory(
    's2cloudless_label_imagery',
    target_size = (512, 512),
    batch_size = batch_size,
    class_mode = None,
    seed = SEED,
    shuffle = False)

nwpu_image_generator = datagen.flow_from_directory(
    'nwpu_imagery',
    target_size = (512, 512),
    batch_size = batch_size,
    class_mode = None,
    seed = SEED,
    shuffle = False)

nwpu_mask_generator = datagen.flow_from_directory(
    'nwpu_label_imagery',
    target_size = (512, 512),
    batch_size = batch_size,
    class_mode = None,
    seed = SEED,
    shuffle = False)

### Generate, save augmented images

In [None]:
def make_augmented_images(n_aug_files, generator, image_dir, label_dir):
    for counter in range(n_aug_files):
        x, y = next(generator)
        matplotlib.image.imsave(image_dir + os.sep + 'data' + os.sep + 'augimage' + str(counter) + '.jpg', np.squeeze(x[0]))
        matplotlib.image.imsave(label_dir + os.sep + 'data' + os.sep + 'augimage' + str(counter) + '.jpg_mask.jpg', np.squeeze(y[0]))

In [None]:
s2_n_aug_files = 100
s2_aug_generator = (tuple(np.array(pair, dtype = 'float64') / 255) for pair in zip(s2_image_generator, s2_mask_generator))
make_augmented_images(s2_n_aug_files, s2_aug_generator, 's2cloudless_imagery', 's2cloudless_label_imagery')

In [None]:
nwpu_n_aug_files = 140
nwpu_aug_generator = (tuple(np.array(pair, dtype = 'float64') / 255) for pair in zip(nwpu_image_generator, nwpu_mask_generator))
make_augmented_images(nwpu_n_aug_files, nwpu_aug_generator, 'nwpu_imagery', 'nwpu_label_imagery')

## Make datasets

In [None]:
from random import shuffle

prop_train = 0.6

s2_batch_size = 16
nwpu_batch_size = 16

s2_image_paths = os.listdir('s2cloudless_imagery/data')
nwpu_image_paths = os.listdir('nwpu_imagery/data')

shuffle(s2_image_paths)
shuffle(nwpu_image_paths)

s2_split = int(prop_train * len(s2_image_paths))
nwpu_split = int(prop_train * len(nwpu_image_paths))

s2_train_image_paths = s2_image_paths[0:s2_split]
s2_test_image_paths = s2_image_paths[s2_split:]
nwpu_train_image_paths = nwpu_image_paths[0:nwpu_split]
nwpu_test_image_paths = nwpu_image_paths[nwpu_split:]

s2_train_generator = image_batch_generator('s2cloudless_imagery', 's2cloudless_label_imagery', s2_train_image_paths, batch_size = batch_size)
s2_test_generator = image_batch_generator('s2cloudless_imagery', 's2cloudless_label_imagery', s2_test_image_paths, batch_size = batch_size)
nwpu_train_generator = image_batch_generator('nwpu_imagery', 'nwpu_label_imagery', nwpu_train_image_paths, batch_size = batch_size)
nwpu_test_generator = image_batch_generator('nwpu_imagery', 'nwpu_label_imagery', nwpu_test_image_paths, batch_size = batch_size)

s2_train_steps = len(s2_train_image_paths) // batch_size
s2_test_steps = len(s2_test_image_paths) // batch_size
nwpu_train_steps = len(nwpu_train_image_paths) // batch_size
nwpu_test_steps = len(nwpu_test_image_paths) // batch_size

# U-Net Model

In [None]:
def mean_iou(y_true, y_pred):
    yt0 = y_true[:, :, :, 0]
    yp0 = tf.keras.backend.cast(y_pred[:, :, :, 0] > 0.5, 'float32')
    inter = tf.math.count_nonzero(tf.logical_and(tf.equal(yt0, 1), tf.equal(yp0, 1)))
    union = tf.math.count_nonzero(tf.add(yt0, yp0))
    iou = tf.where(tf.equal(union, 0), 1., tf.cast(inter / union, 'float32'))
    return iou

In [None]:
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.layers import Concatenate, Conv2DTranspose
from tensorflow.keras.models import Model

def unet(sz = (512, 512, 3)):
    inputs = Input(sz)
    _ = inputs

    # Downsampling
    f = 8
    layers = []

    for i in range(6):
        _ = Conv2D(f, 3, activation = 'relu', padding = 'same')(_)
        _ = Conv2D(f, 3, activation = 'relu', padding = 'same')(_)
        layers.append(_)
        _ = MaxPooling2D()(_)
        f = f * 2
    
    # Bottleneck
    ff2 = 64

    j = len(layers) - 1
    _ = Conv2D(f, 3, activation = 'relu', padding = 'same')(_)
    _ = Conv2D(f, 3, activation = 'relu', padding = 'same')(_)
    _ = Conv2DTranspose(ff2, 2, strides = (2, 2), padding = 'same')(_)
    _ = Concatenate(axis = 3)([_, layers[j]])
    j = j - 1

    # Upsampling
    for i in range(5):
        ff2 = ff2 // 2
        f = f // 2
        _ = Conv2D(f, 3, activation = 'relu', padding = 'same')(_)
        _ = Conv2D(f, 3, activation = 'relu', padding = 'same')(_)
        _ = Conv2DTranspose(ff2, 2, strides = (2, 2), padding = 'same')(_)
        _ = Concatenate(axis = 3)([_, layers[j]])
        j = j - 1

    # Classification
    _ = Conv2D(f, 3, activation = 'relu', padding = 'same')(_)
    _ = Conv2D(f, 3, activation = 'relu', padding = 'same')(_)
    outputs = Conv2D(1, 1, activation = 'sigmoid')(_)

    # Model creation
    model = Model(inputs = [inputs], outputs = [outputs])
    model.compile(optimizer = 'rmsprop', loss = 'binary_crossentropy', metrics = [mean_iou])

    return model

In [None]:
s2_model = unet()
nwpu_model = unet()

## Callbacks

In [None]:
class PlotLearning(tf.keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.val_losses = []
        self.acc = []
        self.val_acc = []
        #self.fig = plt.figure()
        self.logs = []
    
    def on_epoch_end(self, epoch, logs={}):
        self.logs.append(logs)
        self.x.append(self.i)
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.acc.append(logs.get('mean_iou'))
        self.val_acc.append(logs.get('val_mean_iou'))
        self.i += 1
        print('i=',self.i,'loss=',logs.get('loss'),'val_loss=',logs.get('val_loss'),'mean_iou=',logs.get('mean_iou'),'val_mean_iou=',logs.get('val_mean_iou'))

In [None]:
import gc

class GarbageCollect(tf.keras.callbacks.Callback):
    def on_train_begin(self, logs = {}):
        pass
    
    def on_epoch_end(self, epoch, logs = {}):
        gc.collect()

In [None]:
def build_callbacks():
    checkpointer = tf.keras.callbacks.ModelCheckpoint(
        filepath = 'unet.h5',
        verbose = 0,
        save_best_only = True,
        save_weights_only = True)
    callbacks = [checkpointer, PlotLearning(), GarbageCollect()]
    return callbacks

# Train/Test

## Train/test on own datasets

In [None]:
# Disabling multiprocessing, using 1-sized queue to avoid ballooning RAM usage

s2_history = s2_model.fit(
    x = s2_train_generator,
    epochs = 200,
    steps_per_epoch = s2_train_steps,
    validation_data = s2_test_generator,
    validation_steps = s2_test_steps,
    callbacks = build_callbacks(),
    verbose = 1,
    use_multiprocessing = False,
    max_queue_size = 1)

In [None]:
nwpu_history = nwpu_model.fit(
    x = nwpu_train_generator,
    epochs = 200,
    steps_per_epoch = nwpu_train_steps,
    validation_data = nwpu_test_generator,
    validation_steps = nwpu_test_steps,
    callbacks = build_callbacks(),
    verbose = 1,
    use_multiprocessing = False,
    max_queue_size = 1)

## Test on other datasets

In [None]:
s2_cross_eval = s2_model.evaluate(
    x = nwpu_train_generator,
    verbose = 1,
    steps = nwpu_test_steps,
    callbacks = build_callbacks(),
    use_multiprocessing = False,
    max_queue_size = 1)

In [None]:
nwpu_cross_eval = nwpu_model.evaluate(
    x = s2_train_generator,
    verbose = 1,
    steps = s2_test_steps,
    callbacks = build_callbacks(),
    use_multiprocessing = False,
    max_queue_size = 1)

# Evaluate on own datasets

In [None]:
print(s2_history.history.keys())
print(nwpu_history.history.keys())

In [None]:
import matplotlib.pyplot as plt

def plot_history(s2_history, nwpu_history):
    plt.figure(figsize = (10, 10))

    plt.subplot(121)
    plt.plot(s2_history.history['mean_iou'], 'k', lw = 1) # Training IoU curve
    plt.plot(s2_history.history['val_mean_iou'], 'r', lw = 1) # Test IoU curve
    plt.ylim(0, 1) # Y axis limits
    plt.title('S2 model IoU')
    plt.ylabel('IoU')
    plt.xlabel('Epoch number')
    plt.legend(['train', 'test'], loc = 'upper left')

    plt.subplot(122)
    plt.plot(nwpu_history.history['mean_iou'], 'k', lw = 1)
    plt.plot(nwpu_history.history['val_mean_iou'], 'r', lw = 1)
    plt.ylim(0, 1)
    plt.title('NWPU model IoU')
    plt.ylabel('IoU')
    plt.xlabel('Epoch number')
    plt.legend(['train', 'test'], loc = 'upper left')

    plt.show()

In [None]:
plot_history(s2_history, nwpu_history)