In [1]:
import torch
import torchvision
import torch.utils.data as data
import torchvision.transforms as transforms

from torchinfo import summary

import numpy as np
import time

In [2]:
IMAGENET_DIR = '/home/data/ImageNet'
BATCH_SIZE = 128
LOADER_WORKERS = 4

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [4]:
w = torchvision.models.densenet.DenseNet201_Weights.IMAGENET1K_V1
model = torchvision.models.densenet201(weights=w)
model = model.to(device)
model = model.eval()

In [5]:
summary(model, (BATCH_SIZE, 3, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
DenseNet                                 [128, 1000]               --
├─Sequential: 1-1                        [128, 1920, 7, 7]         --
│    └─Conv2d: 2-1                       [128, 64, 112, 112]       9,408
│    └─BatchNorm2d: 2-2                  [128, 64, 112, 112]       128
│    └─ReLU: 2-3                         [128, 64, 112, 112]       --
│    └─MaxPool2d: 2-4                    [128, 64, 56, 56]         --
│    └─_DenseBlock: 2-5                  [128, 256, 56, 56]        --
│    │    └─_DenseLayer: 3-1             [128, 32, 56, 56]         45,440
│    │    └─_DenseLayer: 3-2             [128, 32, 56, 56]         49,600
│    │    └─_DenseLayer: 3-3             [128, 32, 56, 56]         53,760
│    │    └─_DenseLayer: 3-4             [128, 32, 56, 56]         57,920
│    │    └─_DenseLayer: 3-5             [128, 32, 56, 56]         62,080
│    │    └─_DenseLayer: 3-6             [128, 32, 56, 56]   

In [6]:
# transform = transforms.Compose([
#     transforms.Resize(256),
#     transforms.CenterCrop(224),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])
transform = torchvision.models.densenet.DenseNet201_Weights.IMAGENET1K_V1.transforms()
transform

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)

In [7]:
dataset = torchvision.datasets.ImageNet(root=IMAGENET_DIR, transform=transform, split='val')
loader = data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=LOADER_WORKERS)

In [8]:
n_top1 = 0
n_top5 = 0
cnt = 0

s = time.time()
pred_tms = []
with torch.no_grad():
    for images, labels in loader:

        x = images.to(device)
        y = labels.to(device)
        
        ss = time.time()
        output = model(x)
        pred_tms.append(time.time()-ss)
        
        cnt += output.size(0)
        
        _, pred_top1 = output.max(1)
        _, pred_top5 = output.topk(5, 1, True, True)
        
        n_top1 += torch.eq(pred_top1, y).sum().item()
        n_top5 += torch.isin(pred_top5, y).max(1)[0].sum().item()

        print(f"\rstep: {cnt}/{len(dataset)}", end='')
total_tm = time.time() - s
print()
print(f"top-1:  {n_top1/cnt:0.4f}")
print(f"top-5:  {n_top5/cnt:0.4f}")
print(f"Batch Size: {BATCH_SIZE}")
print(f"Total Time: {total_tm:0.4f} ({total_tm/len(dataset):0.4f})")
print(f"Average Prediction Time: {np.mean(pred_tms):0.4f}")

step: 50000/50000
top-1:  0.7689
top-5:  0.9398
Batch Size: 128
Total Time: 65.6227 (0.0013)
Average Prediction Time: 0.0414
