<a href="https://colab.research.google.com/github/zachlim98/w251-submissions/blob/main/hw05/lab05.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Installs this package locally so we can import it like any other module (see below).
!pip install -e ..

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Obtaining file:///project
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h    Preparing wheel metadata ... [?25ldone
[?25hInstalling collected packages: mids-plane-classification
  Attempting uninstall: mids-plane-classification
    Found existing installation: mids-plane-classification 0.0.1
    Uninstalling mids-plane-classification-0.0.1:
      Successfully uninstalled mids-plane-classification-0.0.1
  Running setup.py develop for mids-plane-classification
Successfully installed mids-plane-classification-0.0.1


In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from mids_plane_classification.loaders.dataloader import PlaneDataModule

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import os
import random
import shutil
import time
import warnings

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim

import torch.utils.data
import torch.utils.data.distributed
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

from torch.cuda.amp import GradScaler
from torch.cuda.amp import autocast

In [4]:
# Import the data and set it up. If this is the first time running this, please use
#dm.prepare_data()
dm = PlaneDataModule(train_batch_size=64, val_batch_size=64, data_dir='../data', seed=2)
dm.setup()

In [5]:
import wandb

wandb.login()
wandb.init(project="W251-Draft-Models", entity="251-summer-22")

[34m[1mwandb[0m: Currently logged in as: [33mlywu0607[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Currently logged in as: [33mlywu0607[0m ([33m251-summer-22[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [6]:
GPU=0

In [7]:
SEED=1

In [8]:
random.seed(SEED)
torch.manual_seed(SEED)
cudnn.deterministic = True

In [9]:
torch.cuda.device_count()

1

### Set the architecture to resnet 18 below

In [10]:
##########################
ARCH = 'densenet161' # set the architecture to densenet
# please look up how to do that
########################
EPOCHS = 60
LR = 0.1
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-4
PRINT_FREQ = 50
TRAIN_BATCH=64
VAL_BATCH=64
WORKERS=2
START_EPOCH=0

#config for wandb
wandb.config = {
  "learning_rate": LR,
  "epochs": EPOCHS,
  "batch_size": TRAIN_BATCH,
    "arch": ARCH
}

### Check if cuda is available here

In [11]:
# set your active device to your GPU in this cell
torch.cuda.set_device(GPU)

In [12]:
# enable algorithm optimization
cudnn.benchmark = True

### Fill in the heart of the train section below

In [13]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses, top1, top5],
        prefix="Epoch: [{}]".format(epoch))

    # Grad Scaler
    scaler = GradScaler()
    # switch to train mode
    model.train()

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if GPU is not None:
            images = images.cuda(GPU, non_blocking=True)
        if torch.cuda.is_available():
            target = target.cuda(GPU, non_blocking=True)

        # compute output
        with autocast():
          output = model(images)
          loss = criterion(output, target)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # use the scaler
        #scaler.scale(loss).backward()
        #scaler.step(optimizer)
        #scaler.update()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        wandb.log({"Loss/train": loss, 'acc1/train': top1.avg, 'acc5/train': top5.avg})

        if i % PRINT_FREQ == 0:
            progress.display(i)

#### Fill in the validate section below

In [14]:
def validate(val_loader, model, criterion):
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(val_loader),
        [batch_time, losses, top1, top5],
        prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            if GPU is not None:
                images = images.cuda(GPU, non_blocking=True)
            if torch.cuda.is_available():
                target = target.cuda(GPU, non_blocking=True)

            # compute output
            output = model(images)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % PRINT_FREQ == 0:
                progress.display(i)

        # TODO: this should also be done with the ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))

    wandb.log({"Loss/val": losses.avg, 'acc1/val': top1.avg, 'acc5/val': top5.avg})
    return top1.avg

### Save the checkpoint

In [15]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')

In [16]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)

In [17]:
class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'

In [18]:
# if we are adjusting the LR manually use this
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = LR * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [19]:
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [20]:
# IMG_SIZE = 224
num_classes = 11

### Initialize the model using the architecture you selected above

In [21]:
# select the model
model = models.__dict__[ARCH]()
#inf = model.fc.in_features

model.fc = nn.Linear(1024, num_classes)
model.cuda(GPU)

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(96, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(192, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (rel

### Instantiate the loss to cross entropy

In [22]:
# use the cross-entropy loss
criterion = nn.CrossEntropyLoss().cuda(GPU)

### Instantiate the optimizer to SGD

In [23]:
# use SGD .. use the momentum and weight decay vars
optimizer = torch.optim.SGD(model.parameters(), LR,
                                momentum=MOMENTUM,
                                weight_decay=WEIGHT_DECAY)

#### Create the learning rate scheduler

In [24]:
# use CosineAnnealingLR
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

### Create the train and val dataloaders

In [25]:
# Access the data loaders directly from the modules.
train_loader = dm.train_dataloader()
val_loader = dm.val_dataloader()

In [26]:
best_acc1 = 0

In [27]:
%%time
for epoch in range(START_EPOCH, EPOCHS):
#    adjust_learning_rate(optimizer, epoch)

    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    acc1 = validate(val_loader, model, criterion)

    # remember best acc@1 and save checkpoint
    is_best = acc1 > best_acc1
    best_acc1 = max(acc1, best_acc1)


    save_checkpoint({
        'epoch': epoch + 1,
        'arch': ARCH,
        'state_dict': model.state_dict(),
        'best_acc1': best_acc1,
        'optimizer' : optimizer.state_dict(),
    }, is_best)
    
    scheduler.step()
    print('lr: ' + str(scheduler.get_last_lr()[0]))

Epoch: [0][ 0/81]	Time 23.529 (23.529)	Data  0.159 ( 0.159)	Loss 6.8359e+00 (6.8359e+00)	Acc@1   1.56 (  1.56)	Acc@5   3.12 (  3.12)
Epoch: [0][50/81]	Time  0.865 ( 1.310)	Data  0.144 ( 0.145)	Loss 2.7528e+00 (3.5118e+00)	Acc@1  10.94 ( 13.79)	Acc@5  53.12 ( 51.99)
Test: [ 0/21]	Time  3.861 ( 3.861)	Loss 3.0278e+00 (3.0278e+00)	Acc@1  10.94 ( 10.94)	Acc@5  53.12 ( 53.12)
 * Acc@1 18.033 Acc@5 65.262




lr: 0.09993147673772869
Epoch: [1][ 0/81]	Time  0.920 ( 0.920)	Data  0.143 ( 0.143)	Loss 2.3772e+00 (2.3772e+00)	Acc@1  17.19 ( 17.19)	Acc@5  62.50 ( 62.50)
Epoch: [1][50/81]	Time  0.887 ( 0.878)	Data  0.153 ( 0.144)	Loss 2.3945e+00 (2.3649e+00)	Acc@1  12.50 ( 18.44)	Acc@5  59.38 ( 62.81)
Test: [ 0/21]	Time  0.664 ( 0.664)	Loss 4.1586e+00 (4.1586e+00)	Acc@1  14.06 ( 14.06)	Acc@5  65.62 ( 65.62)
 * Acc@1 16.081 Acc@5 59.407
lr: 0.09972609476841367
Epoch: [2][ 0/81]	Time  0.878 ( 0.878)	Data  0.142 ( 0.142)	Loss 2.3384e+00 (2.3384e+00)	Acc@1  14.06 ( 14.06)	Acc@5  68.75 ( 68.75)
Epoch: [2][50/81]	Time  0.891 ( 0.884)	Data  0.151 ( 0.145)	Loss 2.1926e+00 (2.3593e+00)	Acc@1  20.31 ( 18.78)	Acc@5  73.44 ( 63.79)
Test: [ 0/21]	Time  0.700 ( 0.700)	Loss 3.4494e+00 (3.4494e+00)	Acc@1  14.06 ( 14.06)	Acc@5  54.69 ( 54.69)
 * Acc@1 16.003 Acc@5 58.938
lr: 0.0993844170297569
Epoch: [3][ 0/81]	Time  0.887 ( 0.887)	Data  0.146 ( 0.146)	Loss 2.0326e+00 (2.0326e+00)	Acc@1  29.69 ( 29.69)	Acc@5  68.75

In [28]:
#torch.cuda.empty_cache()

In [29]:
wandb.finish()

0,1
Loss/train,█▇▆▅▅▅▄▆▄▂▂▃▄▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/val,▂█▂▂▃▂▄▂▂▂▁▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
acc1/train,▁▁▂▂▃▄▄▄▄▆▇▆▅▇▆▇▇▇███▇██████████████████
acc1/val,▁▁▁▂▁▃▂▄▄▄▆▅▅▆▆▆▃▇██████████████████████
acc5/train,▁▂▄▄▆▆▇▇▇███▇███████████████████████████
acc5/val,▂▁▂▄▁▆▅▆▇▆█▇▇▇▇█▅███████████████████████

0,1
Loss/train,0.00714
Loss/val,0.28889
acc1/train,99.96097
acc1/val,92.42779
acc5/train,100.0
acc5/val,99.53162
