# Experiments on Macro-Localization Model Fusion (TIR Landsat 8 and Sentimel 2 RGB Chips)
This notebook explores model fusion for cement/steel/landcover classification based on [fastai](https://github.com/fastai/fastai)-trained CNNs.

## Prerequisites:
* Execute tir-macroloc-model/09-L8-TIR-model-mixup-BatchNorm.ipynb to download L8 chips and train model
* Execute S2-macroloc-model/09-S2-RGB-model-mixup-BatchNorm.ipynb to download S2 chips and train model

In [None]:
# Install dependencies, including fastai
import sys
!{sys.executable} -m pip install -r ../tir-macroloc-model/requirements.txt

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import os
from pathlib import Path
import random

import boto3
from fastai import *
from fastai.vision import *
# Widget for class confusion
from fastai.widgets import ClassConfusion

import pandas as pd

import rasterio
import sklearn.model_selection
import sklearn.pipeline
import sklearn.linear_model
import sklearn.neural_network
import torch
from torch import nn

In [None]:
SOURCE_PATH_L8 = '/scratch/l8_macrolocalization_model/'
SOURCE_PATH_S2 = '/scratch/s2_macrolocalization_model/'
TARGET_PATH = '/scratch/combined_macrolocalization_model/'

## Copy imagery for which we have both S2 and L8 chips

In [None]:
images_l8 = !find {SOURCE_PATH_L8}/cement/ {SOURCE_PATH_L8}/steel {SOURCE_PATH_L8}/landcover | grep png$

In [None]:
images_l8 = !find {SOURCE_PATH_L8}/cement/ {SOURCE_PATH_L8}/steel {SOURCE_PATH_L8}/landcover | grep png$
images_s2 = !find {SOURCE_PATH_S2}/cement/ {SOURCE_PATH_S2}/steel {SOURCE_PATH_L8}/landcover | grep png$

plant_ids_l8 = ['_'.join(image.split('/')[-1].split('_')[0:2]) for image in images_l8]
plant_ids_s2 = ['_'.join(image.split('/')[-1].split('_')[0:2]) for image in images_s2]

images_l8 = dict(zip(plant_ids_l8, images_l8))
images_s2 = dict(zip(plant_ids_s2, images_s2))

common_ids = sorted(set(images_l8.keys()) & set(images_s2.keys()))
images_l8 = [images_l8[k] for k in common_ids]
images_s2 = [images_s2[k] for k in common_ids]

In [None]:
!mkdir -p {TARGET_PATH}/l8 {TARGET_PATH}/s2
for d in 'steel', 'cement', 'landcover':
    for source in 's2', 'l8':
        !mkdir -p {TARGET_PATH}/{source}/train/{d}

In [None]:
for image_list, source in zip((images_l8, images_s2), ('l8', 's2')):
    for image in image_list:
        for c in 'steel', 'cement', 'landcover':
            if c in image:
                !cp {image} {TARGET_PATH}/{source}/train/{c}        

## Set Random Seeds
Set random seeds to ensure reproducibility.

In [None]:
def set_random_seed(seed=42):
    random.seed(seed)

    torch.manual_seed(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    np.random.seed(seed)
    
set_random_seed()

## Read in Data Using Fastai
Read in image files. Partition using fixed random seed for reprodicibility.

In [None]:
data = {}
for source in 'l8', 's2':
    data[source] = ImageDataBunch.from_folder(TARGET_PATH + '/' + source, bs=16, num_workers=0, seed=42).normalize(imagenet_stats)

## Read in Models

In [None]:
l8_model = load_learner(path='.', file='../tir-macroloc-model/vgg_final.pkl')
s2_model = load_learner(path='.', file='../S2-macroloc-model/densenet_final.pkl')

## Obtain Prediction Scores Using Models

In [None]:
def get_preds(data):
    preds = {}
    actual = {}
    
    preds['l8'] = np.array([np.array(l8_model.predict(im)[2]) for im in data['l8'].x])
    preds['s2'] = np.array([np.array(s2_model.predict(im)[2]) for im in data['s2'].x])
    
    actual['l8'] = np.array([str(y) for y in data['l8'].y])
    actual['s2'] = np.array([str(y) for y in data['s2'].y])
    
    return preds, actual

preds, actual = get_preds(data)

In [None]:
np.unique(actual['l8'], return_counts=True)

## Train a Logistic Regression Model on Prediction Scores and Evaluate for ((S2), (L8), (S2,L8)) Model Combinations

In [None]:
def train_model(X, Y):
    X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(X, Y, test_size=0.3, stratify=Y, random_state=0)
    
    regularisation_strengths = np.logspace(-20, 20, 100)
    penalty = 'l2'
    model = sklearn.linear_model.LogisticRegression(penalty='l2', random_state=0, solver='liblinear', fit_intercept=False)
    
    pipeline = sklearn.pipeline.make_pipeline(model)

    train_results = []
    test_results = []
    for reg_strength in regularisation_strengths:
        model.set_params(C=1/reg_strength)
        pipeline.fit(X_train, Y_train)
        train_results.append(sklearn.metrics.log_loss(Y_train, pipeline.predict_proba(X_train)))
        test_results.append(sklearn.metrics.log_loss(Y_test, pipeline.predict_proba(X_test)))

    i_optim = np.argmin(test_results)
    regularisation_strength_optim = regularisation_strengths[i_optim]

    # Estimate final model using optimal regularisation parameter value
    model.set_params(C=1/regularisation_strength_optim)
    pipeline.fit(X_train, Y_train)
    
    final_results = {}
    final_results['accuracy'] = pipeline.score(X_test, Y_test)
    final_results['fpr'], final_results['tpr'], final_results['thresh'] = sklearn.metrics.roc_curve(Y_test, pipeline.predict_proba(X_test)[:, 1])
    final_results['auc'] = sklearn.metrics.auc(final_results['fpr'], final_results['tpr'])
    final_results['precision'], final_results['recall'], final_results['fbeta'], _ = sklearn.metrics.precision_recall_fscore_support(Y_test, pipeline.predict_proba(X_test)[:, 1] > 0.5)
    
    plt.figure(figsize=(15, 7))
    plt.subplot(1, 2, 1)
    plt.ylim([0.0, 0.1])
    plt.semilogx(regularisation_strengths, np.array(train_results), label='Training data')
    plt.semilogx(regularisation_strengths, np.array(test_results), label='Testing data')
    plt.vlines(regularisation_strength_optim, plt.ylim()[0], plt.ylim()[1], color='k',
               linewidth=3, label='$\lambda_{opt}$ (testing data)')
    plt.legend(loc='lower right')
    plt.xlabel('Regularisation strength $\lambda$')
    plt.ylabel('Cross entropy (bits)')
    plt.title('Effect of {} regularisation strength on \nin-sample and out-of-sample performance'.format(penalty))
    plt.grid(True)

    plt.subplot(1, 2, 2)
    plt.plot(final_results['fpr'], final_results['tpr'], color='darkorange',
             linewidth=3, label='ROC curve (area under curve= %0.2f)' % final_results['auc'])
    plt.plot([0, 1], [0, 1], color='navy', linestyle='--', linewidth=3)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False positive rate')
    plt.ylabel('True positive rate')
    plt.title('Out-of-sample receiver operating characteristic\n based on optimal {} regularisation strength'.format(penalty))
    plt.legend(loc="lower right")
    plt.grid(True)
    plt.show()
    
    return model, penalty, regularisation_strengths, regularisation_strength_optim, train_results, test_results, final_results

### S2 Model -- Steel

In [None]:
model, penalty, regularisation_strengths, regularisation_strength_optim, train_results, test_results, final_results = train_model(preds['s2'], actual['s2'] == 'steel')

### L8 Model

In [None]:
model, penalty, regularisation_strengths, regularisation_strength_optim, train_results, test_results, final_results = train_model(preds['l8'], actual['l8'] == 'steel')

In [None]:
model, penalty, regularisation_strengths, regularisation_strength_optim, train_results, test_results, final_results = train_model(np.hstack((preds['l8'], preds['s2'])), actual['l8'] == 'steel')

In [None]:
with open('steel_macrolocisation_model_fusion.pkl', 'wb') as f:
    pickle.dump(model, f)

In [None]:
model, penalty, regularisation_strengths, regularisation_strength_optim, train_results, test_results, final_results = train_model(preds['s2'], actual['s2'] == 'cement')

In [None]:
model, penalty, regularisation_strengths, regularisation_strength_optim, train_results, test_results, final_results = train_model(preds['l8'], actual['l8'] == 'cement')

In [None]:
model, penalty, regularisation_strengths, regularisation_strength_optim, train_results, test_results, final_results = train_model(np.hstack((preds['l8'], preds['s2'])), actual['l8'] == 'cement')

In [None]:
with open('cement_macrolocisation_model_fusion.pkl', 'wb') as f:
    pickle.dump(model, f)

### Observations and Conclusions
For the tasks of detecting steel plants and detecting cement plants, using S2 and L8 model scores in combination yields a lower cross entropy against testing data, compared to using S2 or L8 model scores alone. On the other hand, we observe no improvement in terms of ROC AUC; for both tasks, observed model performance is already maximal for the case where we use S2 imagery alone, due to small sample sizes. The model fusion results should therefore be treated with some caution.