# Build Landsat-8 TIR Macro-localization Deep Learning Model

This notebook trains models to classify Landsat 8 TIR Band 10 image chips into cement, steel, and landcover.

TBD more...

## Install Earlier Version of fastai

In [None]:
!pip install fastai==1.0.61

## Import Libraries

In [None]:
import os
import random
import glob

import boto3

import pandas as pd
import numpy as np

import sklearn.model_selection
import torch
from torch import nn

from fastai import *
from fastai.vision import *
from fastai.widgets import ClassConfusion

## Download .tar File From S3 Bucket and Extract Contents

This tar file contains normalized PNGs for cement, steel, and landcover, divided into train and validate sets.

In [None]:
AWS_SOURCE_PATH = 'L8-TIR-macro-localization-model-build3'
IMG_DIR = '/scratch/ALD_L8_TIR_chips_v4p1_train3'

# Output
AWS_MODEL_PATH = 'L8-TIR-model-results3'

In [None]:
s3 = boto3.resource('s3')
bucket = s3.Bucket('sfi-shared-assets')

bucket.download_file(str(Path(AWS_SOURCE_PATH, IMG_DIR.split('/')[-1]+'.tar')), 
                     IMG_DIR+'.tar')

In [None]:
unix_code = 'tar -C /scratch/ -xf '+IMG_DIR+'.tar'
os.system(unix_code)

## Set Random Seeds
Set random seeds to ensure reproducibility.

In [None]:
def set_random_seed(seed=42):
    random.seed(seed)

    torch.manual_seed(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    np.random.seed(seed)
    
set_random_seed()

## Read in, Augment and Partition Image Data
Read in image files and augment them using flipping, rotation, zoom, lighting, warping, and affine transformations. Partition using fixed random seed for reprodicibility.

In [None]:
tfms = get_transforms(do_flip=True,
                      flip_vert=True, 
                      max_lighting=None, 
                      max_zoom=1.5, 
                      max_warp=0.2)

data = (ImageDataBunch.from_folder(IMG_DIR, train='train', valid='validate', 
                                   ds_tfms=tfms, bs=16, num_workers=0, seed=42)
        .normalize(imagenet_stats))

Display class-wise counts for training and validation sets.

In [None]:
def get_classwise_counts(items, classes):
    series = pd.value_counts(items).sort_index()
    series.index = classes
    
    return series

for subset, label in zip((data.train_ds, data.valid_ds), ('Training set', 'Validation set')):
    print('--- {} ---'.format(label))
    print(get_classwise_counts(subset.y.items, subset.classes))

For exploratory purposes, display a sample of images from a single training batch.

In [None]:
data.show_batch(rows=4, figsize=(10,10))

## Common Setup for Pre-trained Models

In [None]:
results = {}
interpretations = {}

In [None]:
weights = [1, 1, 1]
# Replace the weight for the most abundant class with a smaller value
weights[np.argmax(get_classwise_counts(data.train_ds.y.items, data.train_ds.classes).values)] = 0.2
class_weights = torch.FloatTensor(weights).cuda()
loss_w = nn.CrossEntropyLoss(weight=class_weights)

In [None]:
# Instantiate metrics
recall = Recall()
precision = Precision()
# fbeta = MultiLabelFbeta(beta =1)
fbeta = FBeta()
metrics_all = [accuracy, recall, precision, fbeta]
metrics_labels = ['Accuracy', 'Recall', 'Precision', 'Fbeta']

## Functions to Tune Learning Rate

Tunes the learning rate based on Smith's (2015) range test.

In [None]:
def find_learning_rate(learner, show_plot=True):
    learner.lr_find()
    if show_plot:
        learner.recorder.plot()

In [None]:
def fit_recall_optimised(learner, n_epochs, max_learning_rate, model_filename):
    learner.fit_one_cycle(n_epochs, max_learning_rate,
                          callbacks=[callbacks.SaveModelCallback(learner, every='improvement', monitor='recall', name=model_filename)])

    learner.recorder.plot_losses() #, learner.recorder.plot_metrics()
    interpretation = ClassificationInterpretation.from_learner(learner)
    interpretation.plot_confusion_matrix(title='Confusion matrix', dpi=100)
    
    return interpretation

In [None]:
def get_statistics(learner):
    return dict(zip(metrics_labels, np.array(learner.validate(metrics=metrics_all))[1:]))

## Run 1 - Resnet50

Adapt Resnet50 using a weighted cross entropy as a custom loss function and using mixup to train the model. In addition, we will optimise models for recall, by selecting among training epochs.

### Define Learner (Resnet50)

In [None]:
learner = cnn_learner(data, 
                      models.resnet50,
                      # transfer learning on
                      pretrained=True,
                      # loss_func = LabelSmoothingCrossEntropy(), 
                      # class weighted cross entropy loss
                      loss_func=loss_w,
                      metrics=metrics_all,
                      opt_func=optim.Adam,
                      # batch norm at the end of the CNN
                      bn_final=True,
                      # nice callback for plotting loss for training and 
                      # validation during fitting 
                      # followed by mixup
                      callback_fns=ShowGraph).mixup()

### Tune Learning Rate (Resnet50)

In [None]:
find_learning_rate(learner)

### Train Resnet50 with initial (high) learning rate

Based on the range test, a learning rate of 1E-02 appears to be reasonable, owing to the magnitude and slope of the associated loss.

In [None]:
fit_recall_optimised(learner, n_epochs=25, max_learning_rate=1e-02, model_filename='resnet_temp')

### Determine new learning rate (Resnet50)

Fine tune the entire model. We perform this by unfreezing the model, then repeating the learning rate range test.

In [None]:
# load the model with the best recall
learner.load('resnet_temp')
learner.unfreeze()
find_learning_rate(learner)

### Retrain Resnet50 with low learning rate

Based on the range test, further train the model using a learning rate of 10E-4.

In [None]:
fit_recall_optimised(learner, n_epochs=10, max_learning_rate=1e-04, model_filename='resnet_temp')

### Retrain Resnet50 from best recall-optimized model

Load the best recall-optimised model, freeze and re-train.

In [None]:
learner.load('resnet_temp')
learner.freeze()
find_learning_rate(learner)

In [None]:
fit_recall_optimised(learner, n_epochs=10, max_learning_rate=7e-04, model_filename='resnet_temp')

### Final Resnet50 training

As a final step, load the best recall-optimised model, unfreeze and re-train using a low learning rate.

In [None]:
learner.load('resnet_temp')
learner.unfreeze()
interpretations['resnet'] = fit_recall_optimised(learner, n_epochs=15, max_learning_rate=1e-06, model_filename='resnet_temp')

In [None]:
learner.export('resnet50_multiclass_final.pkl')
#results['resnet'] = get_statistics(learner)
#results['resnet']

In [None]:
pred, actual = learner.get_preds(ds_type=DatasetType.Train)
pred = np.array(pred)

## Run 2 - VGG13

Adapt VGG13 using a weighted cross entropy as a custom loss function and using mixup to train the model. In addition, we will optimise models for recall, by selecting among training epochs.

### Define Learner (VGG13)

In [None]:
learner = cnn_learner(data, 
                      models.vgg13_bn,
                      # transfer learning on
                      pretrained=True,
                      # loss_func = LabelSmoothingCrossEntropy(), 
                      # class weighted cross entropy loss
                      loss_func=loss_w,
                      metrics=metrics_all,
                      opt_func=optim.Adam,
                      # batch norm at the end of the CNN
                      bn_final=True,
                      # nice callback for plotting loss for training and 
                      # validation during fitting 
                      # followed by mixup
                      callback_fns=ShowGraph).mixup()

### Tune Learning Rate (VGG13)

Tune the learning rate based on Smith's (2015) range test.

In [None]:
find_learning_rate(learner)

### First Round of VGG13 Training with High Learning Rate

Based on the range test, a learning rate of 1E-02 appears to be reasonable, owing to the magnitude and slope of the associated loss.

In [None]:
fit_recall_optimised(learner, n_epochs=25, max_learning_rate=1e-02, model_filename='vgg_temp')

### Repeat Learning Rate Test and Retrain (VGG13)

Fine tune the entire model. We perform this by unfreezing the model, then repeating the learning rate range test.

In [None]:
# load the model with the best recall
learner.load('vgg_temp')
learner.unfreeze()
find_learning_rate(learner)

Based on the range test, further train the model using a learning rate of 10E-4.

In [None]:
fit_recall_optimised(learner, n_epochs=10, max_learning_rate=1e-04, model_filename='vgg_temp')

### Retrain from recall-optimiated model (VGG13)

Load the best recall-optimised model, freeze and re-train.

In [None]:
learner.load('vgg_temp')
learner.freeze()
find_learning_rate(learner)

In [None]:
fit_recall_optimised(learner, n_epochs=10, max_learning_rate=7e-04, model_filename='vgg_temp')

### Final training of VGG13 with low learning rate

As a final step, load the best recall-optimised model, unfreeze and re-train using a low learning rate.

In [None]:
learner.load('vgg_temp')
learner.unfreeze()
interpretations['vgg'] = fit_recall_optimised(learner, n_epochs=15, max_learning_rate=1e-06, model_filename='vgg_temp')

In [None]:
learner.export('vgg13_multiclass_final.pkl')
results['vgg'] = get_statistics(learner)
results['vgg']

## Run 3 - Densenet161

### Define Learner (Densenet161)

Adapt Densenet161 using a weighted cross entropy as a custom loss function and using mixup to train the model. In addition, we will optimise models for recall, by selecting among training epochs.

In [None]:
learner = cnn_learner(data, 
                      models.densenet161,
                      # transfer learning on
                      pretrained=True,
                      # loss_func = LabelSmoothingCrossEntropy(), 
                      # class weighted cross entropy loss
                      loss_func=loss_w,
                      metrics=metrics_all,
                      opt_func=optim.Adam,
                      # batch norm at the end of the CNN
                      bn_final=True,
                      # nice callback for plotting loss for training and 
                      # validation during fitting 
                      # followed by mixup
                      callback_fns=ShowGraph).mixup()

### Tune Learning Rate (Densenet161)

Tune the learning rate based on Smith's (2015) range test.

In [None]:
find_learning_rate(learner)

Based on the range test, a learning rate of 1E-02 appears to be reasonable, owing to the magnitude and slope of the associated loss.

In [None]:
fit_recall_optimised(learner, n_epochs=25, max_learning_rate=1e-02, model_filename='densenet_temp')

### Retrain Densenet161 with lower learning rate

Fine tune the entire model. We perform this by unfreezing the model, then repeating the learning rate range test.

In [None]:
# load the model with the best recall
learner.load('densenet_temp')
learner.unfreeze()
find_learning_rate(learner)

Based on the range test, further train the model using a learning rate of 10E-4.

In [None]:
fit_recall_optimised(learner, n_epochs=10, max_learning_rate=1e-04, model_filename='densenet_temp')

### Retrain recall-optimized model (Densenet161)

Load the best recall-optimised model, freeze and re-train.

In [None]:
learner.load('densenet_temp')
learner.freeze()
find_learning_rate(learner)

In [None]:
fit_recall_optimised(learner, n_epochs=10, max_learning_rate=7e-04, model_filename='densenet_temp')

### Final training of Densenet161 with low learning rate

As a final step, load the best recall-optimised model, unfreeze and re-train using a low learning rate.

In [None]:
learner.load('densenet_temp')
learner.unfreeze()
interpretations['densenet'] = fit_recall_optimised(learner, n_epochs=15, max_learning_rate=1e-06, model_filename='densenet_temp')

In [None]:
learner.export('densenet161_multiclass_final.pkl')
results['densenet'] = get_statistics(learner)
results['densenet']

## Obtain Summary of Results Across Models

In [None]:
pd.DataFrame(results)

Based on obtained results, we select Resnet as the best-performing model

## Analyze Results Obtained Using VGG

In [None]:
ClassConfusion(interpretations['vgg'], classlist=['cement','landcover','steel'], is_ordered=False, figsize=(8,8))

List of largest non-diagonal entries in the confusion matrix (actual | predicted | number of occurences).

In [None]:
interpretations['vgg'].most_confused()

## Upload models to S3

In [None]:
model_results = glob.glob(IMG_DIR+'/*.pkl')

In [None]:
for m in model_results:
    bucket.upload_file(m, AWS_SOURCE_PATH+'/'+AWS_MODEL_PATH+'/'+m.split('/')[-1])