In [1]:
import timm
import numpy as np
import pandas as pd
import time
import torch
from tqdm.notebook import tqdm 

import torch.utils.benchmark as benchmark
from torch.utils.data import Dataset, DataLoader

In [2]:
WARM_UP = 10
BATCH_SIZE = 4
NUM_TEST = 100

In [3]:
!nvidia-smi

Sat Jul  3 11:50:04 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.45.01    Driver Version: 455.45.01    CUDA Version: 11.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  RTX A6000           On   | 00000000:09:00.0 Off |                  Off |
| 30%   50C    P8    26W / 250W |     41MiB / 48682MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [4]:
df_models = pd.read_csv("results-imagenet-real.csv")
# use models with img size 224
modellist = df_models[df_models["img_size"]==224]["model"]
df_models.head()

Unnamed: 0,model,top1,top1_err,top5,top5_err,param_count,img_size,cropt_pct,interpolation,top1_diff,top5_diff,rank_diff
0,tf_efficientnet_l2_ns,90.563,9.437,98.779,1.221,480.31,800,0.96,bicubic,2.211,0.129,0
1,tf_efficientnet_l2_ns_475,90.537,9.463,98.71,1.29,480.31,475,0.936,bicubic,2.303,0.164,0
2,cait_m48_448,90.196,9.804,98.484,1.516,356.46,448,1.0,bicubic,3.712,0.73,3
3,vit_large_patch16_384,90.196,9.804,98.661,1.339,304.72,384,1.0,bicubic,3.116,0.361,0
4,tf_efficientnet_b7_ns,90.1,9.9,98.614,1.386,66.35,600,0.949,bicubic,3.26,0.52,0


In [5]:
class RandomDataset(Dataset):
    def __init__(self,  length, imsize):
        self.len = length
        self.data = torch.randn( 3, imsize, imsize, length)

    def __getitem__(self, index):
        return self.data[:,:,:,index]

    def __len__(self):
        return self.len

rand_loader = DataLoader(dataset=RandomDataset(BATCH_SIZE*(WARM_UP + NUM_TEST), 224),
                         batch_size=BATCH_SIZE, shuffle=False,num_workers=8)

In [None]:
# https://github.com/ryujaehun/pytorch-gpu-benchmark/blob/master/benchmark_models.py
def inference(modelname, benchmark, half=False):
    with torch.no_grad():
        model = timm.create_model(modelname,)
        model=model.to('cuda')
        model.eval()
        precision = "float"
        durations = []
        print(f'Benchmarking Inference {modelname} ')
        for step,img in enumerate(rand_loader):
            img=getattr(img,precision)()
            torch.cuda.synchronize()
            start = time.time()
            model(img.to('cuda'))
            torch.cuda.synchronize()
            end = time.time()
            if step >= WARM_UP:
                durations.append((end - start)*1000)
        print(f'{modelname} model average inference time : {sum(durations)/len(durations)}ms')
        
        if half:
            durations_half = []
            print(f'Benchmarking Inference half precision type {modelname} ')
            model.half()
            precision = "half"
            for step,img in enumerate(rand_loader):
                img=getattr(img,precision)()
                torch.cuda.synchronize()
                start = time.time()
                model(img.to('cuda'))
                torch.cuda.synchronize()
                end = time.time()
                if step >= WARM_UP:
                    durations_half.append((end - start)*1000)
            print(f'{modelname} half model average inference time : {sum(durations_half)/len(durations_half)}ms')
            
        if half:
            benchmark[modelname] = {"fp32": np.mean(durations), "fp16": np.mean(durations_half), "top1": df_models[df_models["model"]==modelname]["top1"]}
        else:
            benchmark[modelname] = {"fp32": np.mean(durations), "top1": float(df_models[df_models["model"]==modelname]["top1"])}
    return benchmark

In [None]:
benchmark = {}

# inference float precision
for i,modelname in tqdm(enumerate((modellist))):
    try:
        benchmark = inference(modelname, benchmark)
    except:
        print("pass {}".format(modelname))
benchmark

In [None]:
df_results = pd.DataFrame(benchmark).T
df_results

In [None]:
df_results.to_csv("results_fp32_224.csv")

## plot

In [None]:
import seaborn as sns
sns.lmplot(y='top1', x='fp32',  
           data=df_results, logx=True,
           fit_reg=False, scatter_kws={'alpha':0.8})

# For various image size

In [6]:
# https://github.com/ryujaehun/pytorch-gpu-benchmark/blob/master/benchmark_models.py
def inference_imsize(modelname, benchmark, imsize):
    with torch.no_grad():
        model = timm.create_model(modelname,)
        model=model.to('cuda')
        model.eval()
        precision = "float"
        durations = []
        rand_loader = DataLoader(dataset=RandomDataset(BATCH_SIZE*(WARM_UP + NUM_TEST), imsize),
                         batch_size=BATCH_SIZE, shuffle=False,num_workers=8)
        print(f'Benchmarking Inference {modelname} ')
        for step,img in enumerate(rand_loader):
            img=getattr(img,precision)()
            torch.cuda.synchronize()
            start = time.time()
            model(img.to('cuda'))
            torch.cuda.synchronize()
            end = time.time()
            if step >= WARM_UP:
                durations.append((end - start)*1000)
        print(f'{modelname} model average inference time : {sum(durations)/len(durations)}ms')
        
        benchmark[modelname] = {"fp32": np.mean(durations), "top1": float(df_models[df_models["model"]==modelname]["top1"]), "imsize": imsize}
    return benchmark

In [7]:
modellist = df_models["model"]
benchmark = {}

# inference float precision
for i,modelname in tqdm(enumerate((modellist))):
    imsize = int(df_models[df_models["model"]==modelname]["img_size"])
    try:
        benchmark = inference_imsize(modelname, benchmark, imsize)
    except:
        print("pass {}".format(modelname))
benchmark

HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…

Benchmarking Inference tf_efficientnet_l2_ns 
tf_efficientnet_l2_ns model average inference time : 681.3122296333313ms
Benchmarking Inference tf_efficientnet_l2_ns_475 
tf_efficientnet_l2_ns_475 model average inference time : 280.7985281944275ms
Benchmarking Inference cait_m48_448 
cait_m48_448 model average inference time : 556.2820100784302ms
Benchmarking Inference vit_large_patch16_384 
vit_large_patch16_384 model average inference time : 79.5830225944519ms
Benchmarking Inference tf_efficientnet_b7_ns 
tf_efficientnet_b7_ns model average inference time : 103.03890943527222ms
Benchmarking Inference cait_m36_384 
cait_m36_384 model average inference time : 241.93207263946533ms
Benchmarking Inference dm_nfnet_f6 
dm_nfnet_f6 model average inference time : 509.15677785873413ms
Benchmarking Inference swin_large_patch4_window12_384 
swin_large_patch4_window12_384 model average inference time : 60.2254581451416ms
Benchmarking Inference tf_efficientnetv2_l_in21ft1k 
tf_efficientnetv2_l_in21

Benchmarking Inference efficientnet_b4 
efficientnet_b4 model average inference time : 30.89365243911743ms
Benchmarking Inference resnet152d 
resnet152d model average inference time : 37.48584747314453ms
Benchmarking Inference tf_efficientnet_b4_ap 
tf_efficientnet_b4_ap model average inference time : 30.958642959594727ms
Benchmarking Inference tf_efficientnet_b5 
tf_efficientnet_b5 model average inference time : 38.48245143890381ms
Benchmarking Inference resnetrs152 
resnetrs152 model average inference time : 58.53393793106079ms
Benchmarking Inference deit_base_distilled_patch16_224 
deit_base_distilled_patch16_224 model average inference time : 8.232343196868896ms
Benchmarking Inference resnetv2_152x2_bit_teacher_384 
resnetv2_152x2_bit_teacher_384 model average inference time : 120.65580368041992ms
Benchmarking Inference ig_resnext101_32x8d 
ig_resnext101_32x8d model average inference time : 63.340277671813965ms
Benchmarking Inference cait_xxs36_384 
cait_xxs36_384 model average inf

Benchmarking Inference nf_resnet50 
nf_resnet50 model average inference time : 16.528451442718506ms
Benchmarking Inference resnest50d_4s2x40d 
resnest50d_4s2x40d model average inference time : 24.056780338287354ms
Benchmarking Inference efficientnet_b3_pruned 
efficientnet_b3_pruned model average inference time : 24.966726303100586ms
Benchmarking Inference repvgg_b3 
repvgg_b3 model average inference time : 28.252103328704834ms
Benchmarking Inference ssl_resnext101_32x4d 
ssl_resnext101_32x4d model average inference time : 45.07107496261597ms
Benchmarking Inference ecaresnet50d 
ecaresnet50d model average inference time : 16.41528844833374ms
Benchmarking Inference gluon_resnet152_v1s 
gluon_resnet152_v1s model average inference time : 36.24826669692993ms
Benchmarking Inference resnest50d_1s4x24d 
resnest50d_1s4x24d model average inference time : 22.795162200927734ms
Benchmarking Inference resnetv2_50x1_bitm 
resnetv2_50x1_bitm model average inference time : 23.91517400741577ms
Benchmar

Benchmarking Inference xception65 
xception65 model average inference time : 19.800798892974854ms
Benchmarking Inference skresnext50_32x4d 
skresnext50_32x4d model average inference time : 35.61493158340454ms
Benchmarking Inference dpn98 
dpn98 model average inference time : 52.345054149627686ms
Benchmarking Inference gluon_resnet101_v1c 
gluon_resnet101_v1c model average inference time : 24.925501346588135ms
Benchmarking Inference dpn68b 
dpn68b model average inference time : 35.77160835266113ms
Benchmarking Inference regnety_064 
regnety_064 model average inference time : 34.27822828292847ms
Benchmarking Inference resnetblur50 
resnetblur50 model average inference time : 13.456873893737793ms
Benchmarking Inference resmlp_24_224 
resmlp_24_224 model average inference time : 7.339167594909668ms
Benchmarking Inference coat_lite_mini 
coat_lite_mini model average inference time : 11.493215560913086ms
Benchmarking Inference regnety_080 
regnety_080 model average inference time : 30.399339

Benchmarking Inference tf_efficientnet_lite2 
tf_efficientnet_lite2 model average inference time : 11.678578853607178ms
Benchmarking Inference efficientnet_b0 
efficientnet_b0 model average inference time : 15.488321781158447ms
Benchmarking Inference gmixer_24_224 
gmixer_24_224 model average inference time : 10.4020357131958ms
Benchmarking Inference hardcorenas_e 
hardcorenas_e model average inference time : 16.024208068847656ms
Benchmarking Inference tf_efficientnet_cc_b0_8e 
tf_efficientnet_cc_b0_8e model average inference time : 17.45192289352417ms
Benchmarking Inference tv_resnext50_32x4d 
tv_resnext50_32x4d model average inference time : 23.615386486053467ms
Benchmarking Inference regnety_016 
regnety_016 model average inference time : 37.89597749710083ms
Benchmarking Inference gluon_resnet50_v1b 
gluon_resnet50_v1b model average inference time : 13.205344676971436ms
Benchmarking Inference densenet161 
densenet161 model average inference time : 39.54745054244995ms
Benchmarking In

Benchmarking Inference mnasnet_100 
mnasnet_100 model average inference time : 9.475789070129395ms
Benchmarking Inference vgg19_bn 
vgg19_bn model average inference time : 21.40399694442749ms
Benchmarking Inference convit_tiny 
convit_tiny model average inference time : 8.962047100067139ms
Benchmarking Inference spnasnet_100 
spnasnet_100 model average inference time : 11.406304836273193ms
Benchmarking Inference ghostnet_100 
ghostnet_100 model average inference time : 15.808005332946777ms
Benchmarking Inference regnety_004 
regnety_004 model average inference time : 29.50500249862671ms
Benchmarking Inference skresnet18 
skresnet18 model average inference time : 12.039923667907715ms
Benchmarking Inference regnetx_006 
regnetx_006 model average inference time : 16.601219177246094ms
Benchmarking Inference pit_ti_224 
pit_ti_224 model average inference time : 11.367049217224121ms
Benchmarking Inference swsl_resnet18 
swsl_resnet18 model average inference time : 6.388416290283203ms
Benchma

{'tf_efficientnet_l2_ns': {'fp32': 681.3122296333313,
  'top1': 90.56299999999999,
  'imsize': 800},
 'tf_efficientnet_l2_ns_475': {'fp32': 280.7985281944275,
  'top1': 90.537,
  'imsize': 475},
 'cait_m48_448': {'fp32': 556.2820100784302, 'top1': 90.196, 'imsize': 448},
 'vit_large_patch16_384': {'fp32': 79.5830225944519,
  'top1': 90.196,
  'imsize': 384},
 'tf_efficientnet_b7_ns': {'fp32': 103.03890943527222,
  'top1': 90.1,
  'imsize': 600},
 'cait_m36_384': {'fp32': 241.93207263946533, 'top1': 90.046, 'imsize': 384},
 'dm_nfnet_f6': {'fp32': 509.15677785873413, 'top1': 90.046, 'imsize': 576},
 'swin_large_patch4_window12_384': {'fp32': 60.2254581451416,
  'top1': 90.027,
  'imsize': 384},
 'tf_efficientnetv2_l_in21ft1k': {'fp32': 93.13963651657104,
  'top1': 90.008,
  'imsize': 480},
 'swin_base_patch4_window12_384': {'fp32': 36.01898908615112,
  'top1': 89.995,
  'imsize': 384},
 'vit_base_patch16_384': {'fp32': 27.246394157409668,
  'top1': 89.98899999999999,
  'imsize': 384},
 

In [8]:
df_results = pd.DataFrame(benchmark).T
df_results
df_results.to_csv("results_fp32_imsizeall.csv")