# Assignment 12 - Viraj Noorithaya


## Script Parameters


In [None]:
# Alert: Change these when running in production
# Where are you running this? Can be either 'local' or 'colab'
model_run_location = "colab" if "google.colab" in str(get_ipython()) else "local"

# # Do you want to install the required packages?
# install_required_packages = False
install_required_packages = False if model_run_location == "local" else True

# git repository url
repo_name = "era-v1"
git_repo_url = "https://github.com/nviraj/era-v1.git"

# Is the model being developed or is it in production?
# Can be development or production
code_mode = "development"

# WHich branch are you working on?
branch_name = "week-12" if code_mode == "development" else "main"
folder_name = "Session 12/Submission"

## Code Procurement


In [None]:
import sys
import os

# Current working directory
print(f"CWD: {os.getcwd()}")

# Mount google drive if running on colab
if model_run_location == 'colab':
    # from google.colab import drive
    # drive.mount('/content/drive')
    # %cd /content/drive/MyDrive/WorkSpace/era-v1/Session 10/Submission

    # Delete the folder if it exists
    # Avoids fatal: destination path already exists and is not an empty directory.
    # Get code from github
    !rm -rf {repo_name} && git clone {git_repo_url}

    # Switch to repo folder, Needed to switch branch
    # Switch branch and change to the correct directory
    !cd "{repo_name}" && git checkout {branch_name} && cd "{folder_name}"

    # Make custom modules available
    print(f"Appending folder to path in order to detect modules: {folder_name}")
    sys.path.append(f"era-v1/{folder_name}")

## Library Installation (Optional)


In [None]:
# # Install any required libraries not present in your working environment

if install_required_packages:
    # # Needed locally and in colab
    # !pip install torchsummary
    # !conda install -c frgfm torchscan

    # Run this cell if you are using colab or local machine
    import sys
    # https://lightning.ai/docs/pytorch/stable/
    !{sys.executable} -m pip install -q lightning
    # https://github.com/davidtvs/pytorch-lr-finder
    !{sys.executable} -m pip install -q torch-lr-finder
    # https://github.com/tyleryep/torchinfo
    !{sys.executable} -m pip install -q torchinfo
    # https://github.com/jacobgil/pytorch-grad-cam
    !{sys.executable} -m pip install -q grad-cam
    # https://www.gradio.app/
    !{sys.executable} -m pip install -q gradio
    # https://github.com/tensorflow/tensorboard
    !{sys.executable} -m pip install -q tensorboard


    # Needed in local machine only
    if model_run_location == 'local':
        %conda install --yes --prefix {sys.prefix} -c conda-forge tqdm
        %conda install --yes --prefix {sys.prefix} -c anaconda ipywidgets
        %conda install --yes --prefix {sys.prefix} -c conda-forge imgaug
        %conda install --yes --prefix {sys.prefix} -c conda-forge albumentations  
        %conda install --yes --prefix {sys.prefix} -c conda-forge tensorboard       


## Import Libraries


In [None]:
# Import necessary modules (external libs)
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
from torch.optim.lr_scheduler import OneCycleLR
from pytorch_lightning import LightningModule, Trainer
from torchmetrics import Accuracy
from lightning_fabric.utilities.seed import seed_everything
from pytorch_lightning.loggers import TensorBoardLogger, CSVLogger
import tensorboard

In [None]:
if code_mode == "development":
    %load_ext autoreload
    %autoreload 2

# Import user defined modules
import modules.config as config
from models.custom_resnet import CustomResNet as Net
from models.custom_resnet import detailed_model_summary
from modules.dataset import CIFAR_CLASSES
from modules.lightning_dataset import CIFARDataModule
from modules.trainer import train_and_test_model
from modules.utils import get_num_workers, save_model
from modules.visualize import (
    plot_gradcam_images,
    plot_misclassified_images,
    plot_sample_training_images,
    plot_train_test_metrics,
)

## Script Parameters


In [None]:
# Alert: Change these when running in production in modules.config.py:

# Constants naming convention: All caps separated by underscore
# https://realpython.com/python-constants/

# Where do we store the data, checkpoint and logging paths?
data_path = config.DATA_PATH
checkpoint_path = config.CHECKPOINT_PATH
logging_path = config.LOGGING_PATH

# Specify the number of epochs
num_epochs = config.NUM_EPOCHS

# Set the batch size
batch_size = config.BATCH_SIZE

# Set seed value for reproducibility
seed = config.SEED

## Workers and Options


In [None]:
# How many workers do you need?
num_workers = get_num_workers(model_run_location)

# https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
torch.set_float32_matmul_precision("medium")

print(
    f"Run location: {model_run_location}, \n",
    f"Number of workers: {num_workers}, \n",
    f"Batch size: {batch_size} \n",
    f"Seed: {seed} \n",
    f"Data path: {data_path} \n",
    f"Logging path: {logging_path} \n",
    f"Checkpoint path: {checkpoint_path}",
)

### Reproducibility settings


In [None]:
# # For reproducibility
# # https://pytorch.org/docs/stable/notes/randomness.html
# # https://github.com/pytorch/pytorch/issues/7068
# import random
# import numpy as np

# torch.manual_seed(seed)
# random.seed(seed)
# np.random.seed(seed)

# if device_support == "cuda":
#     torch.cuda.manual_seed(seed)
#     torch.cuda.manual_seed_all(seed)

seed_everything(seed)

## Dataloaders/ Datamodule


In [None]:
# Get the data module
cifar_data_module = CIFARDataModule(data_path=data_path, batch_size=batch_size, seed=seed, num_workers=num_workers)
# cifar_data_module = CIFARDataModule(data_path=data_path, batch_size=batch_size, seed=seed, num_workers=0)
cifar_data_module.prepare_data()
cifar_data_module.setup()

# Extract the train and test dataloaders
# train_loader = cifar_data_module.training_dataset
# val_loader = cifar_data_module.validation_dataset
# test_loader = cifar_data_module.testing_dataset

train_loader = cifar_data_module.train_dataloader()
val_loader = cifar_data_module.val_dataloader()
test_loader = cifar_data_module.test_dataloader()

# Get class mapping for the dataset
classes = CIFAR_CLASSES
print(f"Class Labels: {classes}")

## Sample Training Images


In [None]:
# Get a batch of training data from train_loader
batch_data, batch_label = next(iter(train_loader))

# Plot 30 sample images from the training data along with their labels
# plot_sample_training_images() imported from utils.py
fig, axs = plot_sample_training_images(batch_data, batch_label, class_label=classes, num_images=30)
plt.show()

## Model Summary


In [None]:
# Define the model
# No need to send the model to device as we are using the LightningModule
model = Net()

# enable printing shape
model.print_shape = True

# # Print the model summary by specifying the input size
# summary(model, input_size=(3, 32, 32))
detailed_model_summary(model, input_size=(3, 32, 32))

# disable printing shape for cleaner test train output
model.print_shape = False

## Train and Test


### Define Logger


In [None]:
# Define logger
# https://lightning.ai/docs/pytorch/stable/extensions/generated/lightning.pytorch.loggers.TensorBoardLogger.html
logger = TensorBoardLogger(save_dir=logging_path, name="lightning_logs", log_graph=False)
# logger = CSVLogger(save_dir=logging_path, name="lightning_logs")

### Train and test model


In [None]:
# Create a dictionary of lists for misclassified images, generated predictions and ground truth
misclassified_image_data = {"images": [], "ground_truths": [], "predicted_vals": []}

# Run the model for num_epochs
trainer, results, misclassified_image_data = train_and_test_model(
    batch_size=batch_size,
    num_epochs=num_epochs,
    model=model,
    datamodule=cifar_data_module,
    logger=logger,
    # Alert: Change this when running in production
    debug=False,
)

## Metrics


In [None]:
# Using trainer and tensorboard logger, show the model metrics in tensorboard
%load_ext tensorboard

!tensorboard --logdir {logging_path}

In [None]:
# print(type(results))
# print(results)
# print(results.keys())
# print(results["train_loss"])

In [None]:
# Plot the accuracy and loss graphs using data and plot_train_test_metrics() from model.py
print("Plotting accuracy and loss graphs.")
fig, axs = plot_train_test_metrics(results)
plt.show()
# plt.savefig("Loss and Accuracy Metrics.png")

## Show Misclassified images


In [None]:
# print(type(misclassified_image_data))
# print(len(misclassified_image_data))
# print(misclassified_image_data.keys())
# print(misclassified_image_data["ground_truths"])

In [None]:
# Plot misclassified images
fig, axs = plot_misclassified_images(data=misclassified_image_data, class_label=classes, num_images=10)
plt.show()
# plt.savefig("Misclassified Images.png")

## GradCAM (Misclassified)


In [None]:
# Plot misclassified images
fig, axs = plot_gradcam_images(
    model=model,
    data=misclassified_image_data,
    class_label=classes,
    # Use penultimate block of resnet18 layer 3 as the target layer for gradcam
    # Decided using model summary so that dimensions > 7x7
    target_layers=[model.layer3_r2[-1]],
    targets=None,
    num_images=10,
    image_weight=0.1,
)
plt.show()
# plt.savefig("GradCAM Images.png")