# Train Macro-Localization Model on TIR Landsat 8 Chips for Steel/Cement/Land Cover Classification
This notebook uses the [fastai](https://github.com/fastai/fastai) library to adapt pre-trained CNNs to classify Landsat 8 TIR Band 10 image chips stored on AWS/S3.

In [None]:
# Install dependencies, including fastai
import sys
!{sys.executable} -m pip install -r ../tir-macroloc-model/requirements.txt

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import os
from pathlib import Path
import random

import boto3

from fastai import *
from fastai.vision import *
# Widget for class confusion
from fastai.widgets import ClassConfusion

import pandas as pd

import rasterio
import sklearn.model_selection
import torch
from torch import nn

## Download .tar Files From S3 Bucket

In [None]:
CURRENT_DIRECTORY = os.getcwd()
AWS_SOURCE_PATH = 'tir-macroloc-model/'

TARGET_PATH = '/scratch/l8_macrolocalization_model'

IMG_DIRS = (
    ('ALD_L8_TIR_landcover_chips_v4_B10_201801_201701_201704', 'landcover'),
    ('ALD_L8_TIR_cement_chips_v4_B10_201801_201701_201704', 'cement'),
    ('ALD_L8_TIR_steel_chips_v4_B10_201801_201701_201704', 'steel'),
)

!mkdir -p {TARGET_PATH}

In [None]:
s3 = boto3.resource('s3')
bucket = s3.Bucket('sfi-shared-assets')

for source_file, _ in IMG_DIRS:
    bucket.download_file(str(Path(AWS_SOURCE_PATH, source_file + '.tar')), str(Path(TARGET_PATH, source_file + '.tar')))

## Extract Contents of .tar Files

In [None]:
for source_file, _ in IMG_DIRS:
    !cd {TARGET_PATH} && tar xf {str(Path(TARGET_PATH, source_file + '.tar'))} --strip-components=1

## Convert GeoTiff to PNG (includes normalisation step)
Fastai appears to require converting TIFF files to an alternative image format. Thus, convert from GeoTIFF to PNG. The step of normalizing the resulting images is necessary for model training. 

In [None]:
def normalize(x, lower=0, upper=65535):
    """Stretch the max value to nan max and min to nan min"""
    x_max = np.nanmax(x, axis=(1, 2), keepdims=True)
    x_min = np.nanmin(x, axis=(1, 2), keepdims=True)

    m = (upper - lower) / (x_max - x_min)
    x_norm = (m * (x - x_min)) + lower

    return x_norm.astype("uint16")

Convert each image only if its corresponding target file does not already exist.

In [None]:
def convert_in_dir(input_tif_dir, output_png_dir, normalize=normalize):
    def convert_image(tif_filename):
        with rasterio.open(Path(input_tif_dir, tif_filename)) as infile:
            profile = infile.profile
            profile['driver'] = 'PNG'
            
            png_filename = Path(tif_filename).with_suffix('.png')
            raster = infile.read()
            raster = normalize(raster)

            with rasterio.open(Path(output_png_dir, png_filename), 'w', **profile) as dst:
                dst.write(raster)
    
    output_png_dir.mkdir(parents=True, exist_ok=True)
    for f in os.listdir(input_tif_dir):
        if f.endswith('.tif') and not Path(output_png_dir, f).with_suffix('.png').is_file():
            convert_image(f)

for input_dir, output_dir in IMG_DIRS:
    convert_in_dir(Path(TARGET_PATH, input_dir), Path(TARGET_PATH, output_dir))

# Partition the Data Using Stratified Random Sampling
To help address the issue of limited sample sizes (in particular for steel plant imagery), we partitition the data using stratified random sampling.

In [None]:
image_list = ! find {TARGET_PATH} | grep png$
class_assignments = [f.split('/')[-2] for f in image_list]

train_idx, val_idx = next(sklearn.model_selection.StratifiedShuffleSplit(n_splits=2, random_state=42, test_size=0.2).split(class_assignments, class_assignments))
subset_assignments = ['train' if i in train_idx else 'validate' for i in range(len(image_list))]

In [None]:
for image_class in np.unique(class_assignments):
    for subset in np.unique(subset_assignments):
        !mkdir -p {TARGET_PATH}/{subset}/{image_class}

In [None]:
for image_file, class_assignment, subset_assignment in zip(image_list, class_assignments, subset_assignments):
    if not Path(TARGET_PATH, subset_assignment, class_assignment, image_file.split('/')[-1]).exists():
        !ln -s {image_file} {TARGET_PATH}/{subset_assignment}/{class_assignment}

## Set Random Seeds
Set random seeds to ensure reproducibility.

In [None]:
def set_random_seed(seed=42):
    random.seed(seed)

    torch.manual_seed(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    np.random.seed(seed)
    
set_random_seed()

## Read in, Augment and Partition Image Data
Read in image files and augment them using flipping, rotation, zoom, lighting, warping, and affine transformations. Partition using fixed random seed for reprodicibility.

In [None]:
tfms = get_transforms(do_flip=True,
                      flip_vert=True, 
                      max_lighting=None, 
                      max_zoom=1.5, 
                      max_warp=0.2)

data = (ImageDataBunch.from_folder(TARGET_PATH, valid='validate', ds_tfms=tfms, bs=16, num_workers=0, seed=42)
        .normalize(imagenet_stats))

Display class-wise counts for training and validation sets.

In [None]:
def get_classwise_counts(items, classes):
    series = pd.value_counts(items).sort_index()
    series.index = classes
    
    return series

for subset, label in zip((data.train_ds, data.valid_ds), ('Training set', 'Validation set')):
    print('--- {} ---'.format(label))
    print(get_classwise_counts(subset.y.items, subset.classes))

For exploratory purposes, display a sample of images from a single training batch.

In [None]:
data.show_batch(rows=4, figsize=(10,10))

# Run 1 - Resnet50

Adapt Resnet50 using a weighted cross entropy as a custom loss function and using mixup to train the model. In addition, we will optimise models for recall, by selecting among training epochs.

In [None]:
results = {}
interpretations = {}

In [None]:
weights = [1, 1, 1]
# Replace the weight for the most abundant class with a smaller value
weights[np.argmax(get_classwise_counts(data.train_ds.y.items, data.train_ds.classes).values)] = 0.2
class_weights = torch.FloatTensor(weights).cuda()
loss_w = nn.CrossEntropyLoss(weight=class_weights)

In [None]:
# Instantiate metrics
recall = Recall()
precision = Precision()
# fbeta = MultiLabelFbeta(beta =1)
fbeta = FBeta()
metrics_all = [accuracy, recall, precision, fbeta]
metrics_labels = ['Accuracy', 'Recall', 'Precision', 'Fbeta']

In [None]:
learner = cnn_learner(data, 
                      models.resnet50,
                      # transfer learning on
                      pretrained=True,
                      # loss_func = LabelSmoothingCrossEntropy(), 
                      # class weighted cross entropy loss
                      loss_func=loss_w,
                      metrics=metrics_all,
                      opt_func=optim.Adam,
                      # batch norm at the end of the CNN
                      bn_final=True,
                      # nice callback for plotting loss for training and 
                      # validation during fitting 
                      # followed by mixup
                      callback_fns=ShowGraph).mixup()

Tune the learning rate based on Smith's (2015) range test.

In [None]:
def find_learning_rate(learner, show_plot=True):
    learner.lr_find()
    if show_plot:
        learner.recorder.plot()

In [None]:
def fit_recall_optimised(learner, n_epochs, max_learning_rate, model_filename):
    learner.fit_one_cycle(n_epochs, max_learning_rate,
                          callbacks=[callbacks.SaveModelCallback(learner, every='improvement', monitor='recall', name=model_filename)])

    learner.recorder.plot_losses() #, learner.recorder.plot_metrics()
    interpretation = ClassificationInterpretation.from_learner(learner)
    interpretation.plot_confusion_matrix(title='Confusion matrix', dpi=100)
    
    return interpretation

In [None]:
def get_statistics(learner):
    return dict(zip(metrics_labels, np.array(learner.validate(metrics=metrics_all))[1:]))

In [None]:
find_learning_rate(learner)

Based on the range test, a learning rate of 1E-02 appears to be reasonable, owing to the magnitude and slope of the associated loss.

In [None]:
fit_recall_optimised(learner, n_epochs=25, max_learning_rate=1e-02, model_filename='resnet_temp')

Fine tune the entire model. We perform this by unfreezing the model, then repeating the learning rate range test.

In [None]:
# load the model with the best recall
learner.load('resnet_temp')
learner.unfreeze()
find_learning_rate(learner)

Based on the range test, further train the model using a learning rate of 10E-4.

In [None]:
fit_recall_optimised(learner, n_epochs=10, max_learning_rate=1e-04, model_filename='resnet_temp')

Load the best recall-optimised model, freeze and re-train.

In [None]:
learner.load('resnet_temp')
learner.freeze()
find_learning_rate(learner)

In [None]:
fit_recall_optimised(learner, n_epochs=10, max_learning_rate=7e-04, model_filename='resnet_temp')

As a final step, load the best recall-optimised model, unfreeze and re-train using a low learning rate.

In [None]:
learner.load('resnet_temp')
learner.unfreeze()
interpretations['resnet'] = fit_recall_optimised(learner, n_epochs=15, max_learning_rate=1e-06, model_filename='resnet_temp')

In [None]:
learner.export(str(Path(CURRENT_DIRECTORY, 'resnet_final.pkl')))
results['resnet'] = get_statistics(learner)
results['resnet']

In [None]:
pred, actual = learner.get_preds(ds_type=DatasetType.Train)
pred = np.array(pred)

# Run 2 - VGG13

Adapt VGG13 using a weighted cross entropy as a custom loss function and using mixup to train the model. In addition, we will optimise models for recall, by selecting among training epochs.

In [None]:
learner = cnn_learner(data, 
                      models.vgg13_bn,
                      # transfer learning on
                      pretrained=True,
                      # loss_func = LabelSmoothingCrossEntropy(), 
                      # class weighted cross entropy loss
                      loss_func=loss_w,
                      metrics=metrics_all,
                      opt_func=optim.Adam,
                      # batch norm at the end of the CNN
                      bn_final=True,
                      # nice callback for plotting loss for training and 
                      # validation during fitting 
                      # followed by mixup
                      callback_fns=ShowGraph).mixup()

Tune the learning rate based on Smith's (2015) range test.

In [None]:
find_learning_rate(learner)

Based on the range test, a learning rate of 1E-02 appears to be reasonable, owing to the magnitude and slope of the associated loss.

In [None]:
fit_recall_optimised(learner, n_epochs=25, max_learning_rate=1e-02, model_filename='vgg_temp')

Fine tune the entire model. We perform this by unfreezing the model, then repeating the learning rate range test.

In [None]:
# load the model with the best recall
learner.load('vgg_temp')
learner.unfreeze()
find_learning_rate(learner)

Based on the range test, further train the model using a learning rate of 10E-4.

In [None]:
fit_recall_optimised(learner, n_epochs=10, max_learning_rate=1e-04, model_filename='vgg_temp')

Load the best recall-optimised model, freeze and re-train.

In [None]:
learner.load('vgg_temp')
learner.freeze()
find_learning_rate(learner)

In [None]:
fit_recall_optimised(learner, n_epochs=10, max_learning_rate=7e-04, model_filename='vgg_temp')

As a final step, load the best recall-optimised model, unfreeze and re-train using a low learning rate.

In [None]:
learner.load('vgg_temp')
learner.unfreeze()
interpretations['vgg'] = fit_recall_optimised(learner, n_epochs=15, max_learning_rate=1e-06, model_filename='vgg_temp')

In [None]:
learner.export(str(Path(CURRENT_DIRECTORY, 'vgg_final.pkl')))
results['vgg'] = get_statistics(learner)
results['vgg']

# Run 3 - Densenet161

Adapt Densenet161 using a weighted cross entropy as a custom loss function and using mixup to train the model. In addition, we will optimise models for recall, by selecting among training epochs.

In [None]:
learner = cnn_learner(data, 
                      models.densenet161,
                      # transfer learning on
                      pretrained=True,
                      # loss_func = LabelSmoothingCrossEntropy(), 
                      # class weighted cross entropy loss
                      loss_func=loss_w,
                      metrics=metrics_all,
                      opt_func=optim.Adam,
                      # batch norm at the end of the CNN
                      bn_final=True,
                      # nice callback for plotting loss for training and 
                      # validation during fitting 
                      # followed by mixup
                      callback_fns=ShowGraph).mixup()

Tune the learning rate based on Smith's (2015) range test.

In [None]:
find_learning_rate(learner)

Based on the range test, a learning rate of 1E-02 appears to be reasonable, owing to the magnitude and slope of the associated loss.

In [None]:
fit_recall_optimised(learner, n_epochs=25, max_learning_rate=1e-02, model_filename='densenet_temp')

Fine tune the entire model. We perform this by unfreezing the model, then repeating the learning rate range test.

In [None]:
# load the model with the best recall
learner.load('densenet_temp')
learner.unfreeze()
find_learning_rate(learner)

Based on the range test, further train the model using a learning rate of 10E-4.

In [None]:
fit_recall_optimised(learner, n_epochs=10, max_learning_rate=1e-04, model_filename='densenet_temp')

Load the best recall-optimised model, freeze and re-train.

In [None]:
learner.load('densenet_temp')
learner.freeze()
find_learning_rate(learner)

In [None]:
fit_recall_optimised(learner, n_epochs=10, max_learning_rate=7e-04, model_filename='densenet_temp')

As a final step, load the best recall-optimised model, unfreeze and re-train using a low learning rate.

In [None]:
learner.load('densenet_temp')
learner.unfreeze()
interpretations['densenet'] = fit_recall_optimised(learner, n_epochs=15, max_learning_rate=1e-06, model_filename='densenet_temp')

In [None]:
learner.export(str(Path(CURRENT_DIRECTORY, 'densenet_final.pkl')))
results['densenet'] = get_statistics(learner)
results['densenet']

# Obtain Summary of Results Across Models

In [None]:
pd.DataFrame(results)

Based on obtained results, we select Resnet as the best-performing model

# Analyze Results Obtained Using VGG

In [None]:
ClassConfusion(interpretations['vgg'], classlist=['cement','landcover','steel'], is_ordered=False, figsize=(8,8))

List of largest non-diagonal entries in the confusion matrix (actual | predicted | number of occurences).

In [None]:
interpretations['vgg'].most_confused()