In [1]:
import timm
import numpy as np
import pandas as pd
import time
import torch
from tqdm.notebook import tqdm 

#import torch.utils.benchmark as benchmark
from torch.utils.data import Dataset, DataLoader

In [2]:
WARM_UP = 10
BATCH_SIZE = 4
NUM_TEST = 100

In [3]:
!nvidia-smi

Wed Dec 22 14:44:46 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.82.00    Driver Version: 470.82.00    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA RTX A6000    Off  | 00000000:09:00.0 Off |                  Off |
| 30%   47C    P8    17W / 300W |     33MiB / 48682MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
df_models = pd.read_csv("results-imagenet-real.csv")
# use models with img size 224
modellist = df_models[df_models["img_size"]==224]["model"]
df_models.head()

Unnamed: 0,model,top1,top1_err,top5,top5_err,param_count,img_size,cropt_pct,interpolation,top1_diff,top5_diff,rank_diff
0,beit_large_patch16_512,90.695,9.305,98.77,1.23,305.67,512,1.0,bicubic,2.111,0.11,0
1,beit_large_patch16_384,90.601,9.399,98.777,1.223,305.0,384,1.0,bicubic,2.219,0.169,0
2,tf_efficientnet_l2_ns,90.572,9.428,98.779,1.221,480.31,800,0.96,bicubic,2.226,0.125,0
3,tf_efficientnet_l2_ns_475,90.527,9.473,98.706,1.294,480.31,475,0.936,bicubic,2.289,0.156,0
4,beit_base_patch16_384,90.388,9.612,98.73,1.27,86.74,384,1.0,bicubic,3.58,0.59,4


In [5]:
class RandomDataset(Dataset):
    def __init__(self,  length, imsize):
        self.len = length
        self.data = torch.randn( 3, imsize, imsize, length)

    def __getitem__(self, index):
        return self.data[:,:,:,index]

    def __len__(self):
        return self.len

rand_loader = DataLoader(dataset=RandomDataset(BATCH_SIZE*(WARM_UP + NUM_TEST), 224),
                         batch_size=BATCH_SIZE, shuffle=False,num_workers=8)

In [6]:
# https://github.com/ryujaehun/pytorch-gpu-benchmark/blob/master/benchmark_models.py
def inference(modelname, benchmark, half=False):
    with torch.no_grad():
        model = timm.create_model(modelname,)
        model=model.to('cuda')
        model.eval()
        precision = "float"
        durations = []
        print(f'Benchmarking Inference {modelname} ')
        for step,img in enumerate(rand_loader):
            img=getattr(img,precision)()
            torch.cuda.synchronize()
            start = time.time()
            model(img.to('cuda'))
            torch.cuda.synchronize()
            end = time.time()
            if step >= WARM_UP:
                durations.append((end - start)*1000)
        print(f'{modelname} model average inference time : {sum(durations)/len(durations)}ms')
        
        if half:
            durations_half = []
            print(f'Benchmarking Inference half precision type {modelname} ')
            model.half()
            precision = "half"
            for step,img in enumerate(rand_loader):
                img=getattr(img,precision)()
                torch.cuda.synchronize()
                start = time.time()
                model(img.to('cuda'))
                torch.cuda.synchronize()
                end = time.time()
                if step >= WARM_UP:
                    durations_half.append((end - start)*1000)
            print(f'{modelname} half model average inference time : {sum(durations_half)/len(durations_half)}ms')
            
        if half:
            benchmark[modelname] = {"fp32": np.mean(durations), "fp16": np.mean(durations_half), "top1": df_models[df_models["model"]==modelname]["top1"]}
        else:
            benchmark[modelname] = {"fp32": np.mean(durations), "top1": float(df_models[df_models["model"]==modelname]["top1"])}
    return benchmark

In [7]:
benchmark = {}

# inference float precision
for i,modelname in tqdm(enumerate((modellist))):
    try:
        benchmark = inference(modelname, benchmark)
    except:
        print("pass {}".format(modelname))
benchmark

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Benchmarking Inference beit_large_patch16_224 
beit_large_patch16_224 model average inference time : 21.222758293151855ms
Benchmarking Inference swin_large_patch4_window7_224 
swin_large_patch4_window7_224 model average inference time : 17.0994234085083ms
Benchmarking Inference xcit_large_24_p8_224_dist 
xcit_large_24_p8_224_dist model average inference time : 66.53245449066162ms
Benchmarking Inference beit_base_patch16_224 
beit_base_patch16_224 model average inference time : 7.464489936828613ms
Benchmarking Inference vit_large_patch16_224 
vit_large_patch16_224 model average inference time : 20.04459857940674ms
Benchmarking Inference xcit_medium_24_p8_224_dist 
xcit_medium_24_p8_224_dist model average inference time : 40.78274726867676ms
Benchmarking Inference xcit_small_24_p8_224_dist 
xcit_small_24_p8_224_dist model average inference time : 29.373767375946045ms
Benchmarking Inference swin_base_patch4_window7_224 
swin_base_patch4_window7_224 model average inference time : 12.806141

{'beit_large_patch16_224': {'fp32': 21.222758293151855, 'top1': 90.157},
 'swin_large_patch4_window7_224': {'fp32': 17.0994234085083, 'top1': 89.796},
 'xcit_large_24_p8_224_dist': {'fp32': 66.53245449066162, 'top1': 89.519},
 'beit_base_patch16_224': {'fp32': 7.464489936828613, 'top1': 89.438},
 'vit_large_patch16_224': {'fp32': 20.04459857940674, 'top1': 89.308},
 'xcit_medium_24_p8_224_dist': {'fp32': 40.78274726867676, 'top1': 89.286},
 'xcit_small_24_p8_224_dist': {'fp32': 29.373767375946045, 'top1': 89.207},
 'swin_base_patch4_window7_224': {'fp32': 12.806141376495361, 'top1': 89.19},
 'ig_resnext101_32x32d': {'fp32': 132.56885290145874, 'top1': 89.109},
 'ig_resnext101_32x48d': {'fp32': 210.10546445846558, 'top1': 89.107},
 'xcit_large_24_p16_224_dist': {'fp32': 19.736363887786865, 'top1': 89.045},
 'resmlp_big_24_224_in22ft1k': {'fp32': 33.9638614654541, 'top1': 89.019},
 'xcit_small_12_p8_224_dist': {'fp32': 16.07722043991089, 'top1': 89.007},
 'vit_base_patch16_224': {'fp32':

In [8]:
df_results = pd.DataFrame(benchmark).T
df_results

Unnamed: 0,fp32,top1
beit_large_patch16_224,21.222758,90.157
swin_large_patch4_window7_224,17.099423,89.796
xcit_large_24_p8_224_dist,66.532454,89.519
beit_base_patch16_224,7.464490,89.438
vit_large_patch16_224,20.044599,89.308
...,...,...
tf_mobilenetv3_small_100,5.031617,74.736
dla46x_c,4.911094,73.645
tf_mobilenetv3_small_075,5.292332,72.816
dla46_c,5.090697,72.607


In [9]:
df_results.to_csv("results_fp32_224.csv")

## plot

In [10]:
import seaborn as sns
sns.lmplot(y='top1', x='fp32',  
           data=df_results, logx=True,
           fit_reg=False, scatter_kws={'alpha':0.8})

<seaborn.axisgrid.FacetGrid at 0x7efec36ae978>

# For various image size

In [11]:
# https://github.com/ryujaehun/pytorch-gpu-benchmark/blob/master/benchmark_models.py
def inference_imsize(modelname, benchmark, imsize):
    with torch.no_grad():
        model = timm.create_model(modelname,)
        model=model.to('cuda')
        model.eval()
        precision = "float"
        durations = []
        rand_loader = DataLoader(dataset=RandomDataset(BATCH_SIZE*(WARM_UP + NUM_TEST), imsize),
                         batch_size=BATCH_SIZE, shuffle=False,num_workers=8)
        print(f'Benchmarking Inference {modelname} ')
        for step,img in enumerate(rand_loader):
            img=getattr(img,precision)()
            torch.cuda.synchronize()
            start = time.time()
            model(img.to('cuda'))
            torch.cuda.synchronize()
            end = time.time()
            if step >= WARM_UP:
                durations.append((end - start)*1000)
        print(f'{modelname} model average inference time : {sum(durations)/len(durations)}ms')
        
        benchmark[modelname] = {"fp32": np.mean(durations), "top1": float(df_models[df_models["model"]==modelname]["top1"]), "imsize": imsize}
    return benchmark

In [12]:
modellist = df_models["model"]
benchmark = {}

# inference float precision
for i,modelname in tqdm(enumerate((modellist))):
    imsize = int(df_models[df_models["model"]==modelname]["img_size"])
    try:
        benchmark = inference_imsize(modelname, benchmark, imsize)
    except:
        print("pass {}".format(modelname))
benchmark

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Benchmarking Inference beit_large_patch16_512 
beit_large_patch16_512 model average inference time : 187.82024145126343ms
Benchmarking Inference beit_large_patch16_384 
beit_large_patch16_384 model average inference time : 78.99953365325928ms
Benchmarking Inference tf_efficientnet_l2_ns 


In [None]:
df_results = pd.DataFrame(benchmark).T
df_results
df_results.to_csv("results_fp32_imsizeall.csv")