# Tropical Cyclones Eye Detection
Script to train Deep Learning models to identify the presence/abscence of the TC eye in an image.

## Imports and configurations

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Insert your desired path to work on
import os
os.chdir('/content/drive/MyDrive/ESRIN_PhiLab/Tropical_Cyclones/data')

Run the following cell once per session. This cell links the code folder to the python exectution path.

In [None]:
# Path where the modules are stored
import sys
sys.path.append('/content/drive/MyDrive/ESRIN_PhiLab/Tropical_Cyclones/tropical_cyclones/src/code')

# Import modules
import utils
from models import DetectionCNN
from data_process import DataProcessor
from visualization import plot_history

The following cell allows Google Colab/Jupyter Notebooks to detect changes in external code and to automatically update it without restarting the runtime.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# General imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import pickle
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from datetime import datetime

import tensorflow as tf
from tensorflow.data import Dataset
from tensorflow.keras import Input
from tensorflow.keras.applications import resnet50, mobilenet_v2
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from tensorflow.keras.layers import concatenate, Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.metrics import BinaryAccuracy, Precision, Recall, TruePositives, FalsePositives, TrueNegatives, FalseNegatives
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

np.set_printoptions(precision=4)

## 1. Train on data according to csv split into train, validation and test sets

### 1.1. Define settings (arguments)

In [None]:
args = {
    'main_dir':       "SAR_swath_images_VV+VH+WS",
    'save_dir':       os.path.join(main_dir, "id_results", "R_700x400_nM_bs8_bf100_e10_lr0001"),
    'cnn':            "ResNet",     # choices: ["ResNet", "Mobile"]
    'loss':           "binary_crossentropy",
    'height':         700,
    'width':          400,
    'numerical_vars': False,
    'normalise':      True,
    'norm_mode':      "model",     # choices=['z-norm', 'model', 'simple', 'none']
    'rotate':         False,
    'crop':           True,
    'crop_mode':      "uniform",   # choices=['uniform', 'weighted']
    'nb_crops':       1,
    'batch_size':     8,
    'buffer_size':    100,
    'epochs':         10,
    'learning_rate':  0.0001
}

### 1.2. Prepare the tf.data.Dataset instances to be fed to the model

In [None]:
# Load data
main_dir = args['main_dir']
train_images, train_labels, train_bbox = utils.load_data("{}/csv/training.csv".format(main_dir), args)
val_images, val_labels, val_bbox = utils.load_data("{}/csv/val.csv".format(main_dir), args)
test_images, test_labels, test_bbox = utils.load_data("{}/csv/test.csv".format(main_dir), args)

# Create an instance of the DataProcessor
p = DataProcessor(args,
                  plot_light = False,          # plot only select_crop() images
                  plot_extensive = False,      # plot extensively all images
                  show_prints = False
                 )

# Generate datasets
train_ds = utils.prepare_dataset(p, train_images, train_labels, train_bbox)
val_ds = utils.prepare_dataset(p, val_images, val_labels, val_bbox)
test_ds = utils.prepare_dataset(p, test_images, test_labels, test_bbox)

# Perform normalization
train_ds_norm, val_ds_norm = utils.normalisation(train_ds, val_ds, args)
_, test_ds_norm = utils.normalisation(train_ds, test_ds, args)

# Configure for performance
train_dataset = utils.config_performance(train_ds_norm, args, shuffle=True)
val_dataset = utils.config_performance(val_ds_norm, args)
test_dataset = utils.config_performance(test_ds_norm, args)

### 1.3. Perform end-to-end training of the model

In [None]:
# Directory to save results
save_dir = args['save_dir']
os.makedirs(save_dir, exist_ok=True)

# Create model
model = DetectionCNN(args)

# Create callbacks
callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, verbose=1),
    ReduceLROnPlateau(factor=0.1, patience=5, min_lr=0.00001, verbose=1),
    ModelCheckpoint(os.path.join(save_dir, "best_model.h5"), verbose=1, save_best_only=True),
    TensorBoard(log_dir=os.path.join(save_dir, "logs", datetime.now().strftime("%d-%m-%Y %H:%M:%S")))
]

# Train the model
history = model.fit(
    x = train_dataset,
    steps_per_epoch = len(train_dataset),
    validation_data = val_dataset,
    validation_steps = len(val_dataset),
    epochs = args['epochs'],
    callbacks = callbacks,
    verbose = 1,
    class_weight = {0:1.6, 1:1},
    shuffle = True
)

In [None]:
# Load the TensorBoard notebook extension
#%load_ext tensorboard
%reload_ext tensorboard
%tensorboard --logdir SAR_swath_images_VV+VH+WS/id_results/R_700x400_nM_bs8_bf100_e10_lr0001/logs

In [None]:
# Evaluate model
print("Loaded best weights of the training")
model.load_weights(os.path.join(save_dir, "best_model.h5"))

results = model.evaluate(
    test_dataset, 
    steps = len(test_dataset),
    verbose = 1
)

In [None]:
# Make predictions
# Retrieve a batch of images from the test set
predictions = model.predict(test_dataset)
predictions = tf.where(predictions < 0.5, 0, 1)

print('Predictions:\n', predictions.numpy())
print('Labels:\n')
for label in test_labels_dataset:
    print(label)
#class_names = {0: "No eye", 1: "Eye"}
#plt.figure(figsize=(10, 10))
#for i in range(9):
#  ax = plt.subplot(3, 3, i + 1)
#  plt.imshow(image_batch[i].astype("uint8"))
#  plt.title(class_names[predictions[i]])
#  plt.axis("off")

## 2. Train on data using the Stratified K-Fold

### 2.1. Define settings (arguments)

In [None]:
args = {
    'main_dir':       "SAR_swath_images_VV+VH+WS",
    'save_dir':       os.path.join(main_dir, "id_results", "R_700x400_nM_bs8_bf100_e10_lr0001"),
    'cnn':            "ResNet",     # choices: ["ResNet", "Mobile"]
    'loss':           "binary_crossentropy",
    'height':         700,
    'width':          400,
    'numerical_vars': False,
    'normalise':      True,
    'norm_mode':      "model",     # choices=['z-norm', 'model', 'simple', 'none']
    'rotate':         False,
    'crop':           True,
    'crop_mode':      "uniform",   # choices=['uniform', 'weighted']
    'nb_crops':       1,
    'batch_size':     8,
    'buffer_size':    100,
    'epochs':         20,
    'learning_rate':  0.0001,
    'nb_splits':      5
}

### 2.2. Perform Stratified 5-fold

In [None]:
def stratified_cv(args):
    dataset_path = "{}/csv/full_dataset.csv".format(args['main_dir'])
    df = pd.read_csv(dataset_path, converters={'bbox_shape': eval}).dropna()
    #print("Dataset dimension: {}".format(len(df)))
    Y = df["label"]

    # Create an instance of the DataProcessor
    p = DataProcessor(args,
                      plot_light = False,              # plot only select_crop() images
                      plot_extensive = False,          # plot extensively all images
                      show_prints = False
                     )
    
    # Create an instance of the model
    model = DetectionCNN(args)
    
    print("Entering in K-fold Cross Validation...")
    stratified_k_fold = StratifiedKFold(n_splits=args['nb_splits'], random_state=42, shuffle=False)
    fold_var = 1
    
    for train_index, val_index in stratified_k_fold.split(np.zeros(len(df)), Y):
        training_data = df.iloc[train_index]
        validation_data = df.iloc[val_index]

        # Load data
        train_images, train_labels, train_bbox = utils.load_data(df = training_data, args)
        val_images, val_labels, val_bbox = utils.load_data(df = validation_data, args)

        # Generate datasets
        train_ds = utils.create_dataset(p, train_images, train_labels, train_bbox, args)
        val_ds = utils.create_dataset(p, val_images, val_labels, val_bbox, args)

        # Perform normalisation
        train_ds_norm, val_ds_norm = utils.normalisation(train_ds, val_ds, args)
        
        # Configure for performance
        train_dataset = utils.config_performance(train_ds_norm, args, shuffle=True)
        val_dataset = utils.config_performance(val_ds_norm, args)

        # Train the model
        """
        # multi GPU strategy
        strategy = tf.distribute.MirroredStrategy()
        print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
        with strategy.scope():
            model = DetectionCNN(args)
        """
        history = model.train(train_ds_perf, val_ds_perf, fold_var)

        # Plot history
        plot_history(history, fold_var, save_dir)
        #print(history)

        # Guarantee time for weights to be saved and loaded again
        time.sleep(10)

        # Load best model
        print("Loading best weights from training...")
        model.get_eval(val_ds_perf, fold_var)
        model.get_preds(val_ds_perf, val_labels, fold_var)

        tf.keras.backend.clear_session()
        model.__reset()
        fold_var += 1

    # Save the values of each fold
    model.save_metrics()
    return

In [None]:
stratified_cv(args)