# ResNet50v2 trained on ImageNet-1K

## Install Dependencies

In [None]:
!pip install albumentations --quiet
!pip install torchsummary --quiet
!pip install tqdm --quiet
!pip install matplotlib --quiet
!pip install torch-lr-finder --quiet

## Import Modules

In [None]:
# Standard Library Imports
from collections import OrderedDict

# Third Party Imports
import matplotlib.pyplot as plt
import torch
import torchsummary
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from torch_lr_finder import LRFinder

# Local Imports
from datamodule.dataloader import ImageNetDataLoader
from datamodule.augmentations import ImageNetAugmentations
from model.resnets import ResNet50
from train.training_utils import get_lr, train, test, save_checkpoint
from utils.visualize import display_loss_and_accuracies
from configs.config import TrainingConfig, ModelConfig, DataConfig
from configs.aws_setup import setup_training_environment

## Training Configuration

In [None]:
# Setup training environment and get configuration
config, train_params = setup_training_environment(debug=True)

# Initialize configuration
training_config = TrainingConfig()
model_config = ModelConfig()
data_config = DataConfig()

# Update training parameters based on EC2 optimization
training_config.batch_size = train_params['batch_size']
training_config.num_workers = train_params['num_workers']
training_config.gradient_accumulation_steps = config.gradient_accumulation_steps

# Device configuration
device = 'cuda' if torch.cuda.is_available() else 'cpu'

## Augmentations

Augmentation to be applied during training on ImageNet-1K training dataset

In [None]:
augmentations = ImageNetAugmentations()

## DataLoaders

In [None]:
# Create training loader
train_loader = ImageNetDataLoader(
    beton_path=data_config.train_path,
    mode='train',
    batch_size=training_config.batch_size,
    num_workers=training_config.num_workers,
    device=device,
    transforms=augmentations.get_transforms('train')
)

# Create validation loader
test_loader = ImageNetDataLoader(
    beton_path=data_config.val_path,
    mode='val',
    batch_size=training_config.batch_size,
    num_workers=training_config.num_workers,
    device=device
)

## Load Model

In [None]:
model = ResNet50(num_classes=1000)
model.to(device)
torchsummary.summary(model, (3, data_config.input_size, data_config.input_size), device=device)

## Learning Rate Finder

In [None]:
# Loss Function
criterion = nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.Adam(
    model.parameters(),
    lr=training_config.learning_rate,
    weight_decay=training_config.weight_decay
)

# GradScaler for mixed precision training
scaler = GradScaler() if training_config.mixed_precision and torch.cuda.is_available() else None

# Compile Model
model = torch.compile(model)

# Learning rate finder
lr_finder = LRFinder(model, optimizer, criterion, device=device)
lr_finder.range_test(train_loader, end_lr=10, num_iter=200, step_mode="exp")
lr_finder.plot()
lr_finder.reset()

---

## Parameters

In [None]:
MAX_LR = 7.19E-02    # Suggested LR
STEPS_PER_EPOCH = len(train_loader)

## Training Loop

In [None]:
# Data to plot accuracy and loss graphs
train_losses = []
test_losses = []
train_acc = []
test_acc = []
learning_rates = []
test_incorrect_pred = {'images': [], 'ground_truths': [], 'predicted_vals': []}

# Scheduler
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,
                                                max_lr=training_config.learning_rate,
                                                steps_per_epoch=STEPS_PER_EPOCH,
                                                epochs=training_config.num_epochs,
                                                pct_start=5/training_config.epochs,
                                                div_factor=100,
                                                three_phase=True,
                                                final_div_factor=100,
                                                anneal_strategy="linear"
                                                )

# For each epoch
for epoch in range(1, training_config.epochs+1):
    print(f'Epoch {epoch}')

    # Train the model on training dataset and append the training loss and accuracy
    correct, processed, train_loss = train(model, device, train_loader, optimizer, criterion, scheduler, scaler, training_config.gradient_accumulation_steps)
    train_acc.append(100 * correct / processed)
    train_losses.append(train_loss / len(train_loader))
    learning_rates.append(get_lr(optimizer))

    # Test the model's performance on test dataset and append the training loss and accuracy
    correct, test_loss = test(model, device, test_loader, criterion)
    test_acc.append(100. * correct / len(test_loader.dataset))
    test_losses.append(test_loss)

    # Save the model checkpoint
    save_checkpoint({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'train_loss': train_loss,
        'val_loss': test_loss,
    }, f'checkpoint_epoch_{epoch}.pt')


## Visualize Train and Test - Loss and Accuracies

In [None]:
display_loss_and_accuracies(train_losses, train_acc, test_losses, test_acc)

## Visualize One Cycle Policy Implementation

In [None]:
plt.plot(learning_rates)
plt.title("Training Learning Rate")
plt.xlabel("Training Epochs")
plt.ylabel("Learning Rate")
plt.show()