In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import time
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from google.colab import drive

SAVEPATH = '/content/drive/My Drive/CSE331_ML_Project/'
WEIGHTDECAY = 5e-4
MOMENTUM = 0.9
BATCHSIZE = 64
LR = 0.1
EPOCHS = 300
PRINTFREQ = 10

drive.mount('/content/drive')

!unzip '/content/drive/My Drive/CSE331_ML_Project/dataset.zip'

class BottleNeckConv(nn.Module):

    def __init__(self, in_channel, out_channel, decrease_channel):
        super().__init__()

        self.layer_1x1 = nn.Sequential(nn.Conv2d(in_channel, decrease_channel, kernel_size=1), nn.BatchNorm2d(decrease_channel), nn.ReLU(inplace=True))

        self.layer_1x1_2 = nn.Sequential(nn.Conv2d(decrease_channel, int(out_channel / 2), kernel_size=1), nn.BatchNorm2d(int(out_channel / 2)), nn.ReLU(inplace=True))
        self.layer_3x3 = nn.Sequential(nn.Conv2d(decrease_channel, int(out_channel / 2), kernel_size=3, padding=1), nn.BatchNorm2d(int(out_channel / 2)), nn.ReLU(inplace=True))

    def forward(self, x):

        x = self.layer_1x1(x)

        x = torch.cat([self.layer_1x1_2(x), self.layer_3x3(x)], 1)

        return x

class Net(nn.Module):

    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Sequential(nn.Conv2d(3, 96, kernel_size=5, padding=3), nn.BatchNorm2d(96), nn.ReLU(inplace=True), nn.MaxPool2d(2, 2))

        self.bottle1 = BottleNeckConv(96, 128, 16)
        self.bottle2 = BottleNeckConv(128, 128, 16)
        self.bottle3 = BottleNeckConv(128, 256, 32)
        self.bottle4 = BottleNeckConv(256, 256, 32)
        self.bottle5 = BottleNeckConv(256, 384, 48)
        self.bottle6 = BottleNeckConv(384, 384, 48)
        self.bottle7 = BottleNeckConv(384, 512, 64)
        self.bottle8 = BottleNeckConv(512, 512, 64)
        self.bottle9 = BottleNeckConv(512, 768, 80)
        self.bottle10 = BottleNeckConv(768, 768, 80)
        self.bottle11 = BottleNeckConv(768, 10, 96)

        self.avg = nn.AdaptiveAvgPool2d(1)
        self.maxpool = nn.MaxPool2d(2, 2)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)

        x = self.bottle1(x)
        x = self.bottle2(x) + x
        x = self.bottle3(x)
        x = self.maxpool(x)
        x = self.bottle4(x) + x
        x = self.bottle5(x)
        x = self.bottle6(x) + x
        x = self.bottle7(x)
        x = self.maxpool(x)
        x = self.bottle8(x) + x
        x = self.bottle9(x)
        x = self.bottle10(x) + x
        x = self.bottle11(x)
        x = self.avg(x)
        x = self.softmax(x)
        x = torch.flatten(x, 1)
        return x

def Model():
    r"""Return your custom model
    """
    return Net()

class AverageMeter(object):
    r"""Computes and stores the average and current value
    """
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


class ProgressMeter(object):
    def __init__(self, num_batches, *meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def print(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'


def accuracy(output, target, topk=(1,)):
    r"""Computes the accuracy over the $k$ top predictions for the specified values of k
    """
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        # _, pred = output.topk(maxk, 1, True, True)
        # pred = pred.t()
        # correct = pred.eq(target.view(1, -1).expand_as(pred))

        # faster topk (ref: https://github.com/pytorch/pytorch/issues/22812)
        _, idx = output.sort(descending=True)
        pred = idx[:,:maxk]
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []

        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

def main():
    model = Model()

    ##### optimizer / learning rate scheduler / criterion #####
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [100, 150],
                                                     gamma=0.1)
    criterion = torch.nn.CrossEntropyLoss()
    ###########################################################

    model = model.cuda()
    criterion = criterion.cuda()

    # Check number of parameters your model
    pytorch_total_params = sum(p.numel() for p in model.parameters())
    print(f"Number of parameters: {pytorch_total_params}")
    if int(pytorch_total_params) > 2000000:
        print('Your model has the number of parameters more than 2 millions..')
        return

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_transform = transforms.Compose([
        transforms.RandomRotation(15, ),
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.RandomAffine(15),
        transforms.ToTensor(),
        transforms.RandomErasing(),
        normalize
    ])

    valid_transform = transforms.Compose([
        transforms.ToTensor(),
        normalize
    ])

    train_dataset = torchvision.datasets.ImageFolder(
        './train', transform=train_transform)
    train_loader = DataLoader(train_dataset,
                              batch_size=BATCHSIZE, shuffle=True,
                              num_workers=4, pin_memory=True)
    
    val_dataset = torchvision.datasets.ImageFolder('./valid', transform=valid_transform)

    last_top1_acc = 0
    for epoch in range(EPOCHS):
        print("\n----- epoch: {}, lr: {} -----".format(
            epoch, optimizer.param_groups[0]["lr"]))

        # train for one epoch
        start_time = time.time()
        last_top1_acc = train(train_loader, epoch, model, optimizer, criterion)
        elapsed_time = time.time() - start_time
        print('==> {:.2f} seconds to train this epoch\n'.format(
            elapsed_time))

        # learning rate scheduling
        scheduler.step()

        # Save model each epoch
        torch.save(model.state_dict(), SAVEPATH+'model_weight.pth')

    print(f"Last Top-1 Accuracy: {last_top1_acc}")
    print(f"Number of parameters: {pytorch_total_params}")



def train(train_loader, epoch, model, optimizer, criterion):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(len(train_loader), batch_time, data_time, losses,
                             top1, top5, prefix="Epoch: [{}]".format(epoch))
    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        input = input.cuda()
        target = target.cuda()

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss, accuracy 
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(acc1[0].item(), input.size(0))
        top5.update(acc5[0].item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % PRINTFREQ == 0:
            progress.print(i)

    print('=> Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
          .format(top1=top1, top5=top5))
    return top1.avg


if __name__ == "__main__":
    main()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch: [180][ 440/1407]	Time  0.110 ( 0.118)	Data  0.000 ( 0.001)	Loss 1.8142e+00 (1.6526e+00)	Acc@1  26.56 ( 34.70)	Acc@5  64.06 ( 65.88)
Epoch: [180][ 450/1407]	Time  0.104 ( 0.117)	Data  0.000 ( 0.001)	Loss 1.5860e+00 (1.6539e+00)	Acc@1  39.06 ( 34.67)	Acc@5  62.50 ( 65.81)
Epoch: [180][ 460/1407]	Time  0.102 ( 0.117)	Data  0.000 ( 0.001)	Loss 1.9838e+00 (1.6540e+00)	Acc@1  20.31 ( 34.64)	Acc@5  70.31 ( 65.85)
Epoch: [180][ 470/1407]	Time  0.112 ( 0.117)	Data  0.000 ( 0.001)	Loss 1.3625e+00 (1.6525e+00)	Acc@1  46.88 ( 34.72)	Acc@5  70.31 ( 65.95)
Epoch: [180][ 480/1407]	Time  0.112 ( 0.117)	Data  0.000 ( 0.001)	Loss 1.6853e+00 (1.6540e+00)	Acc@1  37.50 ( 34.66)	Acc@5  62.50 ( 65.89)
Epoch: [180][ 490/1407]	Time  0.134 ( 0.117)	Data  0.000 ( 0.001)	Loss 1.7756e+00 (1.6534e+00)	Acc@1  28.12 ( 34.71)	Acc@5  59.38 ( 65.93)
Epoch: [180][ 500/1407]	Time  0.125 ( 0.117)	Data  0.000 ( 0.001)	Loss 1.5631e+00 (1.6530e+00)	Acc@1 

KeyboardInterrupt: ignored