# Scene Recognition with Deep Learning
Scene classification for 15 scene types with a state-of-the-art approach: deep learning. The task is also known as image classification. 

In [None]:
# flag to modify everything to run better on Colab; change it to true if you want to run on colab
use_colab = False

## Setup for Colab
You can skip this part if you are not running your notebook on Colab.

### GPU Configuration

We'll set up GPU computation on Colab.

Click on Runtime $\rightarrow$ Change Runtime Type, and select "GPU" under hardware accelerator.

In [None]:
# uncomment for running on colab
# uncomment for running on colab
# !wget "https://faculty.cc.gatech.edu/~hays/compvision2021fall/projects/proj4_data.zip" --no-check-certificate -O data.zip && unzip -qq data.zip
# !rm ./data.zip

### Upload code and unit tests

In [None]:
# uncomment for running on colab
# !unzip -qq cv_proj4.zip -d ./
# !mv -v ./src/vision ./
# !pip install -e .

### Preparation

Import the required functions:

In [None]:
import os

import torch

from vision.runner import Trainer, MultiLabelTrainer
from vision.optimizer import get_optimizer
from vision.simple_net import SimpleNet
from vision.simple_net_final import SimpleNetFinal
from vision.my_resnet import MyResNet18
from vision.multilabel_resnet import MultilabelResNet18
from vision.data_transforms import (
    get_fundamental_transforms,
    get_fundamental_normalization_transforms,
    get_fundamental_augmentation_transforms,
    get_all_transforms,
)
from vision.stats_helper import compute_mean_and_std
from vision.confusion_matrix import (
    generate_confusion_data,
    generate_confusion_matrix,
    plot_confusion_matrix,
    get_pred_images_for_target,
    generate_and_plot_confusion_matrix,
    generate_and_plot_accuracy_table,
)
from vision.dl_utils import save_trained_model_weights

%load_ext autoreload
%autoreload 2

In [None]:
from tests.utils import verify
from tests.test_stats_helper import test_mean_and_variance
from tests.test_image_loader import (
    test_dataset_length,
    test_unique_vals,
    test_class_values,
    test_load_img_from_path,
)
from tests.test_data_transforms import (
    test_fundamental_transforms,
    test_data_augmentation_transforms,
    test_data_augmentation_with_normalization_transforms,
)
from tests.test_dl_utils import test_compute_accuracy, test_compute_loss
from tests.test_simple_net import test_simple_net
from tests.test_simple_net_final import test_simple_net_final
from tests.test_my_resnet import test_my_resnet
from tests.test_multilabel_resnet import test_multilabel_resnet
from tests.test_confusion_matrix import (
    test_generate_confusion_matrix,
    test_generate_confusion_matrix_normalized,
)

In [None]:
is_cuda = True
is_cuda = (
    is_cuda and torch.cuda.is_available()
)  # will turn off cuda if the machine doesnt have a GPU

In [None]:
data_path = "./data/" if not use_colab else "./data/"
model_path = "../model_checkpoints/" if not use_colab else "./model_checkpoints/"

## SimpleNet
1. **Dataset** - an object which can load the data and labels given an index.
2. **Model** - an object that contains the network architecture definition.
3. **Loss function** - a function that measures how far the network output is from the ground truth label.
4. **Optimizer** - an object that optimizes the network parameters to reduce the loss value.

### Datasets
We will map the scene names (text) into indices 0 to 14 in the image loader

In [None]:
inp_size = (64, 64)
print("Testing your image loader (length):", verify(test_dataset_length))
print("Testing your image loader (values):", verify(test_unique_vals))
print("Testing your image loader (classes):", verify(test_class_values))
print("Testing your image loader (paths):", verify(test_load_img_from_path))

### Data transforms

In [None]:
print("Testing your fundamental data transforms: ", verify(test_fundamental_transforms))

### Model

In [None]:
print("Testing your SimpleNet architecture: ", verify(test_simple_net))

In [None]:
simple_model = SimpleNet()

### Loss function

In [None]:
print(simple_model)

### Optimizer

In [None]:
optimizer_config = {"optimizer_type": "adam", "lr": 3.5e-4, "weight_decay": 1e-6}

In [None]:
optimizer = get_optimizer(simple_model, optimizer_config)
# print(type(optimizer))
# print(optimizer.defaults)

### Trainer

In [None]:
print("Testing your trainer (loss values): ", verify(test_compute_loss))
print("Testing your trainer (accuracy computation): ", verify(test_compute_accuracy))

In [None]:
# re-init the model so that the weights are all random
simple_model_base = SimpleNet()
optimizer = get_optimizer(simple_model_base, optimizer_config)

trainer = Trainer(
    data_dir=data_path,
    model=simple_model_base,
    optimizer=optimizer,
    model_dir=os.path.join(model_path, "simple_net"),
    train_data_transforms=get_fundamental_transforms(inp_size),
    val_data_transforms=get_fundamental_transforms(inp_size),
    batch_size=32,
    load_from_disk=False,
    cuda=is_cuda,
)

In [None]:
%%time
trainer.run_training_loop(num_epochs=30)

In [None]:
trainer.plot_loss_history()
trainer.plot_accuracy()

In [None]:
train_accuracy = trainer.train_accuracy_history[-1]
validation_accuracy = trainer.validation_accuracy_history[-1]
print(
    "Train Accuracy = {}; Validation Accuracy = {}".format(
        train_accuracy, validation_accuracy
    )
)

In [None]:
save_trained_model_weights(simple_model_base, out_dir="./")

## SimpleNet with additional modifications

### Augment training data

Increase our amount of training data by left-right mirroring and color jittering the training images during the learning process.

In [None]:
print(
    "Testing your data transforms with data augmentation: ",
    verify(test_data_augmentation_transforms),
)

### Normalize Training Data

We are going to "zero-center" and "normalize" the dataset so that each entry has zero mean and the overall standard deviation is 1. 

In [None]:
print("Testing your mean and std computation: ", verify(test_mean_and_variance))
dataset_mean, dataset_std = compute_mean_and_std(data_path)

In [None]:
print("Dataset mean = {}, standard deviation = {}".format(dataset_mean, dataset_std))

In [None]:
print(
    "Testing your normalized data transforms: ",
    verify(test_data_augmentation_with_normalization_transforms),
)

In [None]:
inp_size = (64, 64)

### Modify the network.

1. Add the dropout layer
2. Add one or two more blocks of “conv/pool/relu”.
3. Add a batch normalization layer after each convolutional layer (except for the last)

In [None]:
print("Testing your SimpleNetFinal architecture: ", verify(test_simple_net_final))

In [None]:
simple_model_final = SimpleNetFinal()
print(simple_model_final)

In [None]:
optimizer_config = {"optimizer_type": "adam", "lr": 3.5e-4, "weight_decay": 1e-6}

In [None]:
simple_model_final = SimpleNetFinal()
optimizer = get_optimizer(simple_model_final, optimizer_config)

trainer = Trainer(
    data_dir=data_path,
    model=simple_model_final,
    optimizer=optimizer,
    model_dir=os.path.join(model_path, "simple_model_final"),
    train_data_transforms=get_all_transforms(inp_size, [dataset_mean], [dataset_std]),
    val_data_transforms=get_fundamental_normalization_transforms(
        inp_size, [dataset_mean], [dataset_std]
    ),
    batch_size=32,
    load_from_disk=False,
    cuda=is_cuda,
)

In [None]:
%%time
trainer.run_training_loop(num_epochs=30)

In [None]:
trainer.plot_loss_history()
trainer.plot_accuracy()

In [None]:
train_accuracy = trainer.train_accuracy_history[-1]
validation_accuracy = trainer.validation_accuracy_history[-1]
print(
    "Train Accuracy = {}; Validation Accuracy = {}".format(
        train_accuracy, validation_accuracy
    )
)

### Save the model for your SimpleNetFinal

In [None]:
save_trained_model_weights(simple_model_final, out_dir="./")

### Analysis using confusion matrix

In [None]:
print(verify(test_generate_confusion_matrix))
print(verify(test_generate_confusion_matrix_normalized))

In [None]:
%%time
targets, predictions, class_labels = generate_confusion_data(
    trainer.model, trainer.val_dataset, use_cuda=is_cuda
)

In [None]:
confusion_matrix = generate_confusion_matrix(targets, predictions, len(class_labels))

In [None]:
plot_confusion_matrix(confusion_matrix, class_labels)

## Part 3: ResNet

In [None]:
inp_size = (224, 224)

### Fine-tuning the ResNet

In [None]:
print("Testing your ResNet architecture: ", verify(test_my_resnet))

In [None]:
my_resnet = MyResNet18()
print(my_resnet)

In [None]:
optimizer_config = {"optimizer_type": "adam", "lr": 5.2e-4, "weight_decay": 5e-7}

In [None]:
my_resnet = MyResNet18()
optimizer = get_optimizer(my_resnet, optimizer_config)

trainer = Trainer(
    data_dir=data_path,
    model=my_resnet,
    optimizer=optimizer,
    model_dir=os.path.join(model_path, "resnet18"),
    train_data_transforms=get_all_transforms(inp_size, [dataset_mean], [dataset_std]),
    val_data_transforms=get_fundamental_normalization_transforms(
        inp_size, [dataset_mean], [dataset_std]
    ),
    batch_size=32,
    load_from_disk=False,
    cuda=is_cuda,
)

In [None]:
%%time
trainer.run_training_loop(num_epochs=5)

In [None]:
trainer.plot_loss_history()
trainer.plot_accuracy()

In [None]:
train_accuracy = trainer.train_accuracy_history[-1]
validation_accuracy = trainer.validation_accuracy_history[-1]
print(
    "Train Accuracy = {}; Validation Accuracy = {}".format(
        train_accuracy, validation_accuracy
    )
)

### Save Trained MyResnet18 model

In [None]:
save_trained_model_weights(my_resnet, out_dir="./")

### Visualize and Analyze Confusion Matrix

In [None]:
generate_and_plot_confusion_matrix(my_resnet, trainer.val_dataset, use_cuda=is_cuda)

In [None]:
#########################
# Use this cell to visualize your images depending on the confusion matrix visualization
#########################
import numpy as np
import random
targets, preds, classes = generate_confusion_data(my_resnet, trainer.val_dataset, use_cuda=is_cuda)
conf = generate_confusion_matrix(targets, preds, len(classes))
ind = np.argpartition(conf, 13, axis=1)[:,13:14].flatten()
for i in range(ind.shape[0]):
  if i == 0:
    arr = np.array(conf[i, ind[i]])
  else:
    arr = np.append(arr, conf[i, ind[i]])
top3 = np.argsort(arr)[-3:]
incImages = []
for index in top3:
  pred = ind[index]
  gt = index
  incImages.append(get_pred_images_for_target(my_resnet, trainer.val_dataset, pred, gt, is_cuda))
for paths in incImages:
  randImage = paths[random.randrange(len(paths))]
  print(randImage)


# Multilabel Classification - Scene Attribute Prediction

A subset of the above dataset containing images only from the 'coast', 'highway', 'mountain', 'opencountry', 'street' classes will be used. Target labels are represented as binary arrays corresponding to the following attributes:
<ul>
    <li> Clouds </li>
    <li> Water body </li>
    <li> People </li>
    <li> Animals </li>
    <li> Natural </li>
    <li> Man-made </li>
    <li> Vehicles </li>
</ul>

For example, if an input image of a natural scene has clouds, people and vehicles only, we expect an output of [1, 0, 1, 0, 1, 0, 1].

## Model Architecture & Training

In [None]:
print("Testing your MultilabelResNet architecture: ", verify(test_multilabel_resnet))

In [None]:
multi_resnet = MultilabelResNet18()
print(multi_resnet)

In [None]:
inp_size = (224, 224)

In [None]:
optimizer_config = {"optimizer_type": "adam", "lr": 3.8e-4, "weight_decay": 1e-6}

In [None]:
multi_resnet = MultilabelResNet18()
optimizer = get_optimizer(multi_resnet, optimizer_config)

trainer = MultiLabelTrainer(
    data_dir=data_path,
    model=multi_resnet,
    optimizer=optimizer,
    model_dir=os.path.join(model_path, "multilabel_resnet18"),
    train_data_transforms=get_all_transforms(inp_size, [dataset_mean], [dataset_std]),
    val_data_transforms=get_fundamental_normalization_transforms(
        inp_size, [dataset_mean], [dataset_std]
    ),
    batch_size=32,
    load_from_disk=False,
    cuda=is_cuda,
)

In [None]:
%%time
trainer.run_training_loop(num_epochs=5)

In [None]:
trainer.plot_loss_history()
trainer.plot_accuracy()

In [None]:
train_accuracy = trainer.train_accuracy_history[-1]
validation_accuracy = trainer.validation_accuracy_history[-1]
print(
    "Train Accuracy = {}; Validation Accuracy = {}".format(
        train_accuracy, validation_accuracy
    )
)

### Save Trained MultilabelResnet18 model

In [None]:
save_trained_model_weights(multi_resnet, out_dir="./")

## Visualize Results

In [None]:
att_list = ['clouds', 'water', 'people', 'animals', 'natural', 'man-made', 'vehicles']

generate_and_plot_accuracy_table(
    multi_resnet, 
    trainer.val_dataset, 
    num_attributes=7,
    attribute_labels=att_list,
    use_cuda=is_cuda
)