# Base CNN model from NVIDIA paper


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os

PROJECT_ROOT = '/content/drive/MyDrive/cs-7643 project v2'
CODE_DIRECTORY = 'NVIDIA Dave Starter Code'

# Checking that files are accessible for the current user.
if not CODE_DIRECTORY in os.listdir(PROJECT_ROOT):
    # If this message displays, please create a shortcut to the group shared folder.
    print("Unable to access project files.")
else:
    print("Project files are accessible.")

Project files are accessible.


In [None]:
# Uncomment the command below if there is some unforeseen need to unpack the compressed dataset again.
# !unzip "/content/drive/MyDrive/cs-7643 project v2/NVIDIA Dave Starter Code/data/dataset.zip" -d "/content/drive/MyDrive/cs-7643 project v2/NVIDIA Dave Starter Code/data"

In [None]:
# Switching to the code directory to allow imports and access to dataset.
%cd "/content/drive/MyDrive/cs-7643 project v2/NVIDIA Dave Starter Code"
!pwd

/content/drive/.shortcut-targets-by-id/1SJkvN9wrOlKm97_4zyTZ0G3oipVi4Me0/cs-7643 project v2/NVIDIA Dave Starter Code
/content/drive/.shortcut-targets-by-id/1SJkvN9wrOlKm97_4zyTZ0G3oipVi4Me0/cs-7643 project v2/NVIDIA Dave Starter Code


In [2]:
! pip install ipywidgets
! jupyter nbextension enable --py widgetsnbextension

404 Client Error: Not Found for url: http://metadata/computeMetadata/v1/instance/attributes/use-collaborative
404 Client Error: Not Found for url: http://metadata/computeMetadata/v1/instance/attributes/notebook-disable-downloads
404 Client Error: Not Found for url: http://metadata/computeMetadata/v1/instance/attributes/notebook-disable-terminal
404 Client Error: Not Found for url: http://metadata/computeMetadata/v1/instance/attributes/notebook-enable-delete-to-trash
Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


In [5]:
import copy
import time
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim
import torchvision.transforms as transforms

from utils.dataset import NvidiaDaveDataset, load_nvidia_dataset
from models import nvidia


# Tqdm progress bar
from tqdm import tqdm_notebook, tqdm

LEARNING_RATE = 1e-4
MOMENTUM = 0.7
WEIGHT_DECAY_REGULARIZATION_TERM = 1e-6  # aiming for overfitting
BATCH_SIZE = 256
NUM_EPOCHS = 15


# Citation:
# - AverageMeter taken verbatim from the Assignment 2 training code.
# - Remainder of code in this file based on Assignment 2 training code.

class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def train(epoch, data_loader, model, optimizer, criterion, scaler=None):
    iter_time = AverageMeter()
    losses = AverageMeter()
    

    # Get the progress bar for later modification
    progress_bar = tqdm_notebook(data_loader, ascii=True)

    for idx, (data, target) in enumerate(progress_bar):
        start = time.time()

        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()

        # Forward pass and computation of loss.
        with torch.autocast("cuda"): #Automatic Mixed precision
            out = model(data).reshape(target.shape)
            # RMSE loss
            loss = torch.sqrt(criterion(out, target))

        
        # Backwards pass to determine gradients and update model parameters.
        # optimizer.zero_grad()
        # loss.backward()
        # optimizer.step()

        for param in model.parameters():
            param.grad = None
        scaler.scale(loss).backward()
        scaler.step(optimizer)

        scaler.update()
        losses.update(loss, out.shape[0])

        iter_time.update(time.time() - start)
        # if idx % 5 == 0:
        #     print(('Epoch: [{0}][{1}/{2}]\t'
        #            'Time {iter_time.val:.3f} ({iter_time.avg:.3f})\t'
        #            'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
        #           .format(epoch, idx, len(data_loader), iter_time=iter_time, loss=losses)))

        progress_bar.set_description_str(f"Epoch {epoch}, Batch: {idx+1}, Loss: {loss.item():.4f}")
            # "Batch: %d, Loss: %.4f" % ((idx + 1), loss.item()))
        
    return losses.avg

def validate(epoch, validation_loader, model, criterion):
    iter_time = AverageMeter()
    losses = AverageMeter()

    # Get the progress bar for later modification
    progress_bar = tqdm_notebook(validation_loader, ascii=True)

    for idx, (data, target) in enumerate(progress_bar):
        start = time.time()

        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()

        out = None
        loss = None

        with torch.no_grad():
            out = model(data)
            # RMSE loss
            loss = torch.sqrt(criterion(out, target))

        # loss.squeeze()
        losses.update(loss, out.shape[0])

        iter_time.update(time.time() - start)

#         if idx % 10 == 0:
#             print(('Epoch: [{0}][{1}/{2}]\t'
#                    'Time {iter_time.val:.3f} ({iter_time.avg:.3f})\t')
#                   .format(epoch, idx, len(validation_loader), iter_time=iter_time, loss=losses))

        progress_bar.set_description_str(f"Batch: {idx+1}, Loss: {loss.item():.4f}")
           
        
        # progress_bar.set_description_str(
        #     "Batch: %d, Loss: %.4f" % ((idx + 1), loss.item()))

    print("* Average Loss @1: {loss.avg:.4f}".format(loss=losses))
    return losses.avg

# RSME Loss function. eps prevents [nan] in the backward pass
def RSMELoss(yhat, y, eps=1e-6):
    return torch.sqrt(torch.mean((yhat-y)**2) + eps)

def plots(losses, lr=LEARNING_RATE, reg=WEIGHT_DECAY_REGULARIZATION_TERM, batch=BATCH_SIZE, momentum=MOMENTUM):
    fig1, ax1 = plt.subplots(figsize=(8, 10))
    ax1.plot(losses[0], label='Training Losses')
    ax1.plot(losses[1], label='Validation Losses')
    ax1.set_xlabel('Nr Epochs')
    ax1.set_ylabel('Loss')
    ax1.set_title(f'L-Curves -> Train & Valid LR={lr} Momentum={momentum}, Reg_Term={reg}, Batch={batch}')
    ax1.legend(loc="best")
    plt.show()



In [None]:
def main():
    # Normalizing images per the paper and resizing each image to 66 x 200.
    transform = transforms.Compose([
        # Citation:
        # https://pytorch.org/vision/stable/transforms.html#scriptable-transforms
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        transforms.Resize((66, 200)),
    ])

    # Loading in images with normalization and resizing applied.
    training_set, validation_set, test_set = load_nvidia_dataset(transform=transform, batch_size=BATCH_SIZE)
    torch.cuda.empty_cache()

    # Loading in the NVIDIA DAVE-2 model.
    model = nvidia.NvidiaDaveCNN()

    if torch.cuda.is_available():
        model = model.to(torch.device("cuda"))


    # Specify Mean Squared Error (MSE) or RSME as the criterion since this is a regression task.
    criterion = nn.MSELoss()
    # criterion = RMSELoss

    # Using Stochastic Gradient Descent (SGD) as the optimizer.
    optimizer = torch.optim.SGD(model.parameters(), 
                                LEARNING_RATE,
                                momentum=MOMENTUM,
                                weight_decay=WEIGHT_DECAY_REGULARIZATION_TERM)
    
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1, verbose=True) 
    # torch.backends.cudnn.benchmark = True

    best = float('inf')
    best_model = None
    best_epoch = None
    train_losses = []
    valid_losses = []
    scaler = torch.cuda.amp.GradScaler()
    for epoch in range(NUM_EPOCHS):

        # Training.
        train_loss = train(epoch, training_set, model, optimizer, criterion, scaler=scaler)
        temp = train_loss
        temp.cpu()
        temp = float(temp)
        train_losses.append(temp)  # average losses
        print(f"Average training loss this epoch: {temp:.4f}")

        # Validation.
        valid_loss = validate(epoch, test_set, model, criterion)
        temp = valid_loss
        temp.cpu()
        temp = float(temp)
        valid_losses.append(temp)

        scheduler.step(train_loss)

        if valid_loss < best:
            best = valid_loss
            best_model = copy.deepcopy(model)
            best_epoch = epoch
        print(f"Best performing model so far average validation loss: {best:.4f} on epoch {best_epoch}\n")

    print('Best Training Loss @1: {:.4f}'.format(best))

    torch.save(best_model.state_dict(), './checkpoints/nvidia_dave2.pth')

    losses_to_plot = train_losses, valid_losses
    plots(losses_to_plot, lr=LEARNING_RATE, reg=WEIGHT_DECAY_REGULARIZATION_TERM,
          batch=BATCH_SIZE, momentum=MOMENTUM)

    

if __name__ == '__main__':
    main()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


  0%|          | 0/286 [00:00<?, ?it/s]

True