In [1]:
import torch
import torchvision
import torch.utils.data as data
import torchvision.transforms as transforms

from torchinfo import summary

import numpy as np
import time

In [2]:
# IMAGENET_DIR = '/home/data/ImageNet'
IMAGENET_DIR = 'D:/ImageNet'
BATCH_SIZE = 32
LOADER_WORKERS = 4

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [4]:
model = torchvision.models.efficientnet_b7(pretrained=True)
model = model.to(device)
model = model.eval()

In [5]:
summary(model, (BATCH_SIZE, 3, 224, 224))

Layer (type:depth-idx)                                  Output Shape              Param #
EfficientNet                                            [32, 1000]                --
├─Sequential: 1-1                                       [32, 2560, 7, 7]          --
│    └─ConvNormActivation: 2-1                          [32, 64, 112, 112]        --
│    │    └─Conv2d: 3-1                                 [32, 64, 112, 112]        1,728
│    │    └─BatchNorm2d: 3-2                            [32, 64, 112, 112]        128
│    │    └─SiLU: 3-3                                   [32, 64, 112, 112]        --
│    └─Sequential: 2-2                                  [32, 32, 112, 112]        --
│    │    └─MBConv: 3-4                                 [32, 32, 112, 112]        4,944
│    │    └─MBConv: 3-5                                 [32, 32, 112, 112]        1,992
│    │    └─MBConv: 3-6                                 [32, 32, 112, 112]        1,992
│    │    └─MBConv: 3-7                        

In [6]:
transform = transforms.Compose([
    transforms.Resize(633),
    transforms.CenterCrop(600),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [7]:
dataset = torchvision.datasets.ImageNet(root=IMAGENET_DIR, transform=transform, split='val')
loader = data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=LOADER_WORKERS)

In [8]:
n_top1 = 0
n_top5 = 0
cnt = 0

s = time.time()
pred_tms = []
with torch.no_grad():
    for images, labels in loader:

        x = images.to(device)
        y = labels.to(device)
        
        ss = time.time()
        output = model(x)
        pred_tms.append(time.time()-ss)
        
        cnt += output.size(0)
        
        _, pred_top1 = output.max(1)
        _, pred_top5 = output.topk(5, 1, True, True)
        
        n_top1 += torch.eq(pred_top1, y).sum().item()
        n_top5 += torch.isin(pred_top5, y).max(1)[0].sum().item()

        print(f"\rstep: {cnt}/{len(dataset)}", end='')
total_tm = time.time() - s
print()
print(f"top-1:  {n_top1/cnt:0.4f}")
print(f"top-5:  {n_top5/cnt:0.4f}")
print(f"Batch Size: {BATCH_SIZE}")
print(f"Total Time: {total_tm:0.4f} ({total_tm/len(dataset):0.4f})")
print(f"Average Prediction Time: {np.mean(pred_tms):0.4f}")

step: 50000/50000
top-1:  0.8394
top-5:  0.9690
Batch Size: 32
Total Time: 1453.8046 (0.0291)
Average Prediction Time: 0.0309
