# Building Footprint Segmentation from Satellite images (bfss)

### This notebook is an attempt to walk through the entire code step-by-step, explaining the different blocks, to give an overview of the porject. 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.getcwd()

'/Users/pradip.gupta/personal-projects/bfss/notebook'

In [3]:
import sys, glob, shutil
os.chdir(os.path.dirname(os.getcwd()))
os.getcwd()

'/Users/pradip.gupta/personal-projects/bfss'

#### Adding "src/networks" folder in path, to enable in-line imports for the network files using importlib

In [4]:
import os, sys
sys.path.append(os.path.abspath('./src/networks'))

In [5]:
#To handel OOM errors
import tensorflow as tf
import keras.backend.tensorflow_backend as ktf
def get_session():
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction= 0.9,
                                allow_growth=True)
    return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
ktf.set_session(get_session())

Using TensorFlow backend.


In [20]:
#Standard imports
import pandas as pd
import importlib
import numpy as np
from keras.models import load_model
from keras.utils import multi_gpu_model
from keras.optimizers import Adam, RMSprop, Nadam, SGD

### Loading all the custom functions that we have written. These make the training script neat and help in debugging in case of errors. 

In [24]:
#Custom imports
import config
from src.training import data_loader
from src.training.metrics import bce_dice_loss, dice_coeff
from src.training.seg_data_generator import SegDataGenerator
from src.training.keras_callbacks import get_callbacks
from src.training.modeller import finetune_model
from src.training.keras_history import generate_stats
from src.training.plots import save_plots

### Load the data from the dataset path. Here we are loading only the meta data for all the AOIs. The X and Y variables hold a buzzard datasource object. 

Get to know more on buzzard here: https://github.com/airware/buzzard

In [25]:
dataset_path = config.dataset_path    
exp_name = config.exp_name

train, val, test = data_loader.get_samples(dataset_path)

print("\nPreparing dataset for Training")
X_train, y_train = data_loader.build_source(train, dataset_path)

print("\nPreparing dataset for Validation")
X_val, y_val = data_loader.build_source(val, dataset_path)

No of aois:
for train: 14
for validation: 3
for test: 1

Preparing dataset for Training

Adding austin4 to AOI list

Adding tyrol-w25 to AOI list

Adding chicago5 to AOI list

Adding vienna10 to AOI list

Adding chicago28 to AOI list

Adding vienna16 to AOI list

Adding chicago33 to AOI list

Adding kitsap18 to AOI list

Adding kitsap12 to AOI list

Adding chicago11 to AOI list

Adding kitsap17 to AOI list

Adding austin14 to AOI list

Adding tyrol-w21 to AOI list

Adding kitsap1 to AOI list

Preparing dataset for Validation

Adding austin29 to AOI list

Adding kitsap31 to AOI list

Adding tyrol-w5 to AOI list


### Reading the config.py file

In [26]:
#Params
tile_size = config.tile_size
no_of_samples = config.no_of_samples
downs = config.down_sampling

batch_size = config.batch_size
epochs = config.epochs  
initial_epoch = config.initial_epoch

training_frm_scratch = config.training_frm_scratch 
training_frm_chkpt = config.training_frm_chkpt 
transfer_lr = config.transfer_lr

if sum((training_frm_scratch, training_frm_chkpt, transfer_lr)) != 1:
    raise Exception("Conflicting training modes")


### Defining a super set for loss, optimiser and metric functions. The user can select any from there options using the config file

In [27]:
loss_class = {'bin_cross': 'binary_crossentropy',
              'bce_dice': bce_dice_loss}

metric_class = {'dice':dice_coeff}

optimiser_class = {'adam': (Adam, {}), 
                   'nadam': (Nadam, {}), 
                   'rmsprop': (RMSprop, {}),
                   'sgd':(SGD, {'decay':1e-6, 'momentum':0.99, 'nesterov':True})} 

### Calculating the no of iterations we will use per epoch.  For training, the steps per epoch is multiplied by 2 as we are using augmentation.

In [28]:
train_spe = int(np.floor((len(X_train)*no_of_samples*2) / batch_size)) #spe = Steps per epoch
val_spe = int(np.floor((len(X_val)*no_of_samples*2) / batch_size))

### Initialising the datagenerators for training and validation. 

Get to know more about keras generator from here: https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly

In [29]:
# Initialise generators    
train_generator = SegDataGenerator(dataset_path, img_source=X_train, 
                                mask_source=y_train, batch_size= batch_size, 
                                no_of_samples = no_of_samples, tile_size= tile_size, 
                                downsampling_factor = downs)

val_generator = SegDataGenerator(dataset_path, img_source=X_val, 
                                mask_source=y_val, batch_size= batch_size, 
                                no_of_samples = no_of_samples, tile_size= tile_size, 
                                downsampling_factor = downs)

In [31]:
if training_frm_scratch:
    print("Training from scratch")
    optimizer = optimiser_class[config.optimiser][0](lr=config.learning_rate, 
                               **optimiser_class[config.optimiser][1])
    loss = loss_class[config.loss]
    metric = metric_class[config.metric]
    
    if config.no_of_gpu > 1:
        print("Running in multi-gpu mode")
        with tf.device('/cpu:0'):
            build = getattr(importlib.import_module(config.model),"build")
            model = build(size = config.tile_size, chs = 3)
        
        gpu_model = multi_gpu_model(model, gpus = config.no_of_gpu)
        gpu_model.compile(loss= loss, optimizer=optimizer, metrics=[metric, 'accuracy'])
        model.compile(loss= loss, optimizer=optimizer, metrics=[metric, 'accuracy'])
        
    else:
        build = getattr(importlib.import_module(config.model),"build")
        model = build(size = config.tile_size, chs = 3)
        model.compile(loss= loss, optimizer=optimizer, metrics=[metric, 'accuracy'])
        gpu_model = None

Training from scratch


### Resume training

When we load a keras model using `load_model`, we do not need to compile it as it _returns_ a **compiled model**. <br>
**"load_model"** in keras does 4 things: 
 - loads architecure, 
 - loads weights, 
 - loads optimisers and loss, 
 - loads state of optimiser and loss
 

To know more about keras models api: https://keras.io/models/about-keras-models/#about-keras-models

In [32]:
if training_frm_chkpt:
    print("Training from prv checkpoint")
    model_path = config.model_path
    model = load_model(model_path, 
                       custom_objects={'bce_dice_loss': bce_dice_loss, 'dice_coeff':dice_coeff}) 

## Transfer Learning: 
for Transfer Learning we follow the sequence: <br> 
<font size="5">build --> load_weights --> finetune --> compile </font>

Note: Compiling a model only defines the loss function, the optimizer and the metrics. That's all. 
Weights after compilation are the same as before compilation.

#### To check the weights:
>for layer in model.layers: <br>
>>    weights = layer.get_weights() <br>
>>    print(weights) <br>

In [33]:
if transfer_lr:
    print("Transfer Learning mode")
    
    #build the model
    model_path = config.model_path
    gpu_model = load_model(model_path, 
                           custom_objects={'bce_dice_loss': bce_dice_loss,
                                           'dice_coeff':dice_coeff}) 
     
    build = getattr(importlib.import_module(config.model),"build")
    model = build(size = config.tile_size, chs = 3)
    model.set_weights(gpu_model.layers[-2].get_weights())
        
    #freeze layers for transfer learning & load weights
    model = finetune_model(model)
        
    if config.no_of_gpu > 1:
        gpu_model = multi_gpu_model(model, gpus = config.no_of_gpu, cpu_relocation=True)
        print("Running in multi-gpu mode")
    else:
        gpu_model = model
                     
    #compile the model
    gpu_model = compile_model(gpu_model, lr = config.learning_rate,
                              optimiser = optimiser_class[config.optimiser],
                              loss = loss_class[config.loss] , 
                              metric = metric_class[config.metric]) 

### Set the callbacks to be used for training

In [34]:
#Set callbacks        
callbacks_list = get_callbacks(model)

## Start/Resume training

In [None]:
history = gpu_model.fit_generator(steps_per_epoch= train_spe,
                generator=train_generator,
                epochs=epochs,
                validation_data = val_generator,
                validation_steps = val_spe,
                initial_epoch = initial_epoch,
                callbacks = callbacks_list)

In [None]:
#Save final complete model        
filename = "model_ep_"+str(int(epochs))+"_batch_"+str(int(batch_size))
model.save("./data/"+exp_name+"/"+filename+".h5")
print("Saved complete model file at: ", filename+"_model"+".h5")

In [None]:
#Save history
history_to_save = generate_stats(history, config)
pd.DataFrame(history_to_save).to_csv("./data/"+exp_name+"/"+filename + "_train_results.csv")
save_plots(history, exp_name)