In [None]:
#@title Author: Michael Evans { display-mode: "form" }
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Introduction

This notebook demonstrates a workflow for training a [fully convolutional neural network (FCNN)](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf), specifically [U-net](https://arxiv.org/abs/1505.04597) on previously exctracted remote sensing data using Tensorflow. In this example, we read 256x256 pixel image chips saved as zipped tfrecords in Google Cloud Storage (Note: the data can be read in from anywhere) containing the visible, infrared, and near infrared bands of Sentinel-2 imagery and a binary label band. This relatively simple model is a mostly unmodified version of [this example](https://github.com/tensorflow/models/blob/master/samples/outreach/blogs/segmentation_blogpost/image_segmentation.ipynb) from the TensorFlow docs.

In [1]:
from os.path import join
from sys import path
import json
import numpy as np
import tensorflow as tf

In [None]:
## Clone repo containing preprocessing and prediction functions
!git clone https://github.com/mjevans26/Satellite_ComputerVision.git

In [None]:
# Load the necessary modules from repo
path.append('/content/Satellite_ComputerVision')

from utils.processing import get_training_dataset, get_eval_dataset
from utils.model_tools import get_model, weighted_bce, make_confusion_matrix

In [2]:
# Specify names locations for outputs in Cloud Storage. 
BUCKET = '{YOUR_GCS BUCKET HERE}'
BUCKET_PATH = join('gs://', BUCKET)

FOLDER = 'NC_solar'
PRED_BASE = 'data/predict'
TRAIN_BASE = 'data/training'
EVAL_BASE = 'data/eval'

# Specify inputs (Sentinel bands) to the model and the response variable.
opticalBands = ['B2', 'B3', 'B4']
thermalBands = ['B8', 'B11', 'B12']

BANDS = opticalBands + thermalBands# + pcaBands
RESPONSE = 'landcover'
FEATURES = BANDS + [RESPONSE]

# Specify the size and shape of patches expected by the model.
KERNEL_SIZE = 256
KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]
COLUMNS = [
  tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES
]
FEATURES_DICT = dict(zip(FEATURES, COLUMNS))

## Training Data
First, we will read previously exported training data fro GCS into TFRecordDatasets

In [None]:
# make sure we have training records
trainPattern = join(BUCKET_PATH, FOLDER, TRAIN_BASE, '*.tfrecord.gz')
print(trainPattern)
trainFiles = !gsutil ls {trainPattern}

In [None]:
# create training dataset with default arguments for batch (16), repeat (True), and normalization axis (0)
training = get_training_dataset(trainFiles, FEATURES_DICT, BANDS, RESPONSE, 2000)

In [None]:
# confirm the training dataset produces expected results
iterator = iter(training)
print(iterator.next())

In [None]:
evalPattern = join(BUCKET_PATH, FOLDER, EVAL_BASE, '*.tfrecord.gz')
print(evalPattern)
evalFiles = !gsutil ls {evalPattern}

In [None]:
# create evaluation dataset
evaluation = get_eval_dataset(evalFiles, FEATURES_DICT, BANDS, RESPONSE)

## Model

In [5]:
# Define Global variables for Model Training
EPOCHS = 100
LR = 0.0001
BATCH = 16

OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=LR, beta_1=0.9, beta_2=0.999)

METRICS = {
        'logits':[tf.keras.metrics.MeanSquaredError(name='mse'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall')],
        'classes':[tf.keras.metrics.MeanIoU(num_classes=2, name = 'mean_iou')]
        }

OUT_DIR  = '{YOUR DIRECTORY FOR SAVING MODEL FILES HERE}'

When our training data is unbalanced it can be helpful to provide weights for the positive examples so that the model doesn't 'learn' to just predict zeros everywhere. To calculate the weight we read through the dataset and count up the number of 1s and 0s in our labels.

In [None]:
# Instantiate a nonsense model
m = get_model(depth = len(BANDS), optim = OPTIMIZER, loss = 'mse', mets = [tf.keras.metrics.categorical_accuracy], bias = None)
train_con_mat = make_confusion_matrix(training, m)
classums = train_con_mat.sum(axis = 1)

# Calculate and save Bias, Weight, and Train size based on data
BIAS = np.log(classums[1]/classums[0])
WEIGHT = classums[0]/classums[1]
TRAIN_SIZE = train_con_mat.sum()//(256*256)

During model training we will save the best performing set of weights as calculated on evaluation data at the end of each epoch. THe metric we track is the mean intersection over union.

In [None]:
## DEFINE CALLBACKS

def get_weighted_bce(y_true, y_pred):
    return weighted_bce(y_true, y_pred, WEIGHT)

# get the current time
now = datetime.now() 
date = now.strftime("%d%b%y")
date

# define a checkpoint callback to save best models during training
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    os.path.join(OUT_DIR, 'best_weights_' + date + '.hdf5'),
    monitor='val_classes_mean_iou',
    verbose=1,
    save_best_only=True,
    mode='max'
    )

Create and train the model

In [None]:
m = get_model(depth = len(BANDS), optim = OPTIMIZER, loss = get_weighted_bce, mets = METRICS, bias = BIAS)

In [None]:
# train the model
m.fit(
        x = training,
        epochs = EPOCHS,
        steps_per_epoch = int(TRAIN_SIZE//BATCH),
        validation_data = evaluation,
        callbacks = [checkpoint]
        )

m.save(os.path.join(OUT_DIR, f'{date}_unet256.h5'))

## Re-Training
 The code below will continue training an existing model. You may need to re-create your training and evaluation datasets if you intend to use new or different data from that on which the model was originally trained.

In [None]:
from tensorflow.python.keras import models

In [None]:
# Define where pre-trained model files and weights will come from
MODEL_FILE = '{PATH TO .h5 MODEL FILE}'
WEIGHT_FILE = '{PATH TO .hdf5 WEIGHT FILE'
EVAL_METRIC = 'val_classes_mean_iou'
# optionally change the learning rate
LR = 0.0001
# optionally change the number of epochs to re-train
EPOCHS = 100

In [None]:
# this non-keras native function was used during training so we need to supply it when re-instantiating the trained model
def get_weighted_bce(y_true, y_pred):
    return weighted_bce(y_true, y_pred, weight)

# get the current time
now = datetime.now() 
date = now.strftime("%d%b%y")
date

# define a checkpoint callback to save best models during training
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    os.path.join(OUT_DIR, 'best_weights_' + date + '.hdf5'),
    monitor='val_classes_mean_iou',
    verbose=1,
    save_best_only=True,
    mode='max'
    )

In [None]:
# load our trained model from the model and weights file
custom_objects = {'get_weighted_bce': get_weighted_bce}
m = models.load_model(MODEL_FILE, custom_objects = custom_objects)
m.load_weights(WEIGHT_FILE)


In [None]:
# set the initial evaluation metric for saving checkpoints to the previous best value
evalMetrics = m.evaluate(x = eval_data, verbose = 1)
metrics = m.metrics_names
index = metrics.index(EVAL_METRIC)
checkpoint.best = evalMetrics[index]


In [None]:
# OPTIONALLY set the learning rate for re-training
lr = backend.eval(m.optimizer.learning_rate)
print('current learning rate', lr)
backend.set_value(m.optimizer.learning_rate, LR)
print('new learning rate', LR)

In [None]:
# train the model
m.fit(
        x = training,
        epochs = EPOCHS,
        steps_per_epoch = steps_per_epoch,
        validation_data = evaluation,
        callbacks = [checkpoint]
        )

m.save(os.path.join(OUT_DIR, f'{date}_unet256.h5'))