# Sample code of Image Classification VGG16 Model with PyTorch

This notebook is the sample code of training the image classification model using COCO2014 dataset.  
COCO2014 dataset has not classification labels, therefore it makes classification dataset cropping bounding boxes.

|Item|Description|
|---|---|
|DeepLearning Framework|PyTorch|
|Dataset|COCO2014 Classification|
|Model Architecture|VGG16|

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
#import cv2
#import json
#import numpy as np
#import pandas as pd
#from pathlib import Path
#from tqdm import tqdm
#from PIL import Image
#from data_loader.data_loader import DataLoader
#from models.pytorch import vgg16
#
#import torch
#from torch.utils.data import Dataset

import random
import numpy as np
import torch
import pprint

from data_loader.data_loader import DataLoader
from models.pytorch import vgg16

## Set Random Seed

In [3]:
seed=42

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7fdd20bafbf0>

## Device Settings

In [4]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

## Hyperparameters

In [5]:
epochs = 50
batch_size = 32
learning_rate = 0.001
weight_decay = 0.001
input_tensor_shape = (3, 224, 224)   # CHW

## Preparing Dataset

### Download and Extract

In [6]:
dataset_dir = '/tmp/dataset'
os.makedirs(dataset_dir, exist_ok=True)
dataloader = DataLoader(dataset_name='coco2014_classification_pytorch', resize=input_tensor_shape[1:], dataset_dir=dataset_dir, batch_size=batch_size)

100% 300000/300000 [46:42<00:00, 107.04it/s] 
100% 84509/84509 [01:03<00:00, 1330.78it/s]
100% 84509/84509 [00:00<00:00, 643673.94it/s]
100% 291875/291875 [31:59<00:00, 152.05it/s] 
100% 56834/56834 [00:47<00:00, 1204.84it/s]
100% 56834/56834 [00:00<00:00, 602314.14it/s]


In [7]:
dataloader.dataset.class_name

['hot dog',
 'dog',
 'potted plant',
 'tv',
 'bird',
 'cat',
 'horse',
 'sheep',
 'cow',
 'bottle',
 'couch',
 'chair',
 'dining table',
 'bicycle',
 'car',
 'motorcycle',
 'airplane',
 'bus',
 'train',
 'boat',
 'person',
 'stop sign',
 'umbrella',
 'tie',
 'sports ball',
 'sandwich',
 'bed',
 'cell phone',
 'refrigerator',
 'clock',
 'toothbrush',
 'truck',
 'traffic light',
 'fire hydrant',
 'parking meter',
 'bench',
 'elephant',
 'bear',
 'zebra',
 'giraffe',
 'frisbee',
 'skis',
 'snowboard',
 'kite',
 'baseball bat',
 'baseball glove',
 'skateboard',
 'surfboard',
 'tennis racket',
 'wine glass',
 'cup',
 'fork',
 'knife',
 'spoon',
 'bowl',
 'banana',
 'apple',
 'orange',
 'broccoli',
 'carrot',
 'pizza',
 'donut',
 'cake',
 'toilet',
 'laptop',
 'mouse',
 'remote',
 'keyboard',
 'microwave',
 'oven',
 'toaster',
 'sink',
 'book',
 'vase',
 'scissors',
 'teddy bear',
 'hair drier',
 'backpack',
 'handbag',
 'suitcase']

## Training VGG16

In [8]:
input_size = (batch_size, 3, 224, 224)
num_classes = len(dataloader.dataset.class_name)
model = vgg16.VGG16(device, input_size=input_size, num_classes=num_classes)

Layer (type:depth-idx)                   Output Shape              Param #
Net                                      [32, 80]                  --
├─Sequential: 1-1                        [32, 64, 224, 224]        --
│    └─Conv2d: 2-1                       [32, 64, 224, 224]        1,792
│    └─BatchNorm2d: 2-2                  [32, 64, 224, 224]        128
│    └─ReLU: 2-3                         [32, 64, 224, 224]        --
├─Sequential: 1-2                        [32, 64, 112, 112]        --
│    └─Conv2d: 2-4                       [32, 64, 224, 224]        36,928
│    └─BatchNorm2d: 2-5                  [32, 64, 224, 224]        128
│    └─ReLU: 2-6                         [32, 64, 224, 224]        --
│    └─MaxPool2d: 2-7                    [32, 64, 112, 112]        --
├─Sequential: 1-3                        [32, 128, 112, 112]       --
│    └─Conv2d: 2-8                       [32, 128, 112, 112]       73,856
│    └─BatchNorm2d: 2-9                  [32, 128, 112, 112]       256
│

In [9]:
model_dir = 'coco2014_clf'
model.train(dataloader.dataset.trainloader, epochs=epochs, lr=learning_rate, wd=weight_decay, output_dir=model_dir)

[EPOCH #0] loss: 4.617096948425051
[EPOCH #1, elapsed time: 601.735[sec]] loss: 3.3977826505572177
[EPOCH #2, elapsed time: 1206.384[sec]] loss: 3.250460667034208
[EPOCH #3, elapsed time: 1810.384[sec]] loss: 3.24886881939528
[EPOCH #4, elapsed time: 2415.781[sec]] loss: 3.2474392568103654
[EPOCH #5, elapsed time: 3021.028[sec]] loss: 3.246565130252542
[EPOCH #6, elapsed time: 3616.101[sec]] loss: 3.2459778574460025
[EPOCH #7, elapsed time: 4185.793[sec]] loss: 3.245430490998597
[EPOCH #8, elapsed time: 4754.761[sec]] loss: 3.245501617101592
[EPOCH #9, elapsed time: 5349.848[sec]] loss: 3.245337612743587
[EPOCH #10, elapsed time: 5944.315[sec]] loss: 3.244406406647416
[EPOCH #11, elapsed time: 6522.772[sec]] loss: 3.244713014384555
[EPOCH #12, elapsed time: 7093.864[sec]] loss: 3.2441859611368957
[EPOCH #13, elapsed time: 7681.956[sec]] loss: 3.2441579041630875
[EPOCH #14, elapsed time: 8270.295[sec]] loss: 3.2432594435632973
[EPOCH #15, elapsed time: 8855.146[sec]] loss: 3.24325483236

## Test Model

In [10]:
train_result = model.predict(dataloader.dataset.trainloader)
train_predictions, train_labels = train_result

In [11]:
train_eval_result = model.evaluate(train_labels, train_predictions)
pprint.pprint(train_eval_result)

{'accuracy': 0.3529683229005195,
 'classification_report': {'0': {'f1-score': 0.0,
                                 'precision': 0.0,
                                 'recall': 0.0,
                                 'support': 408},
                           '1': {'f1-score': 0.0,
                                 'precision': 0.0,
                                 'recall': 0.0,
                                 'support': 1521},
                           '10': {'f1-score': 0.0,
                                  'precision': 0.0,
                                  'recall': 0.0,
                                  'support': 1754},
                           '11': {'f1-score': 0.0,
                                  'precision': 0.0,
                                  'recall': 0.0,
                                  'support': 1153},
                           '12': {'f1-score': 0.0,
                                  'precision': 0.0,
                                  'recall': 0.0,
        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [12]:
test_result = model.predict(dataloader.dataset.testloader)
test_predictions, test_labels = test_result

In [13]:
test_eval_result = model.evaluate(test_labels, test_predictions)
pprint.pprint(test_eval_result)

{'accuracy': 0.002058626878277088,
 'classification_report': {'0': {'f1-score': 0.0,
                                 'precision': 0.0,
                                 'recall': 0.0,
                                 'support': 872},
                           '1': {'f1-score': 0.0,
                                 'precision': 0.0,
                                 'recall': 0.0,
                                 'support': 432},
                           '10': {'f1-score': 0.0,
                                  'precision': 0.0,
                                  'recall': 0.0,
                                  'support': 1248},
                           '11': {'f1-score': 0.0,
                                  'precision': 0.0,
                                  'recall': 0.0,
                                  'support': 3267},
                           '12': {'f1-score': 0.0,
                                  'precision': 0.0,
                                  'recall': 0.0,
       

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
