# Compare pretrained image classification models

A simple notebook to download and analyse the ImageNet Validation results for the models in the `timm` collection. We can use these results to pick pretrained models to fine-tune that offer the best accuracy/size trade-off.

In [25]:
import os
import pandas as pd

In [26]:
# Download the Imagenet results CSV from the timm repository
os.system("wget https://raw.githubusercontent.com/huggingface/pytorch-image-models/refs/heads/main/results/results-imagenet.csv")

--2025-10-19 08:20:11--  https://raw.githubusercontent.com/huggingface/pytorch-image-models/refs/heads/main/results/results-imagenet.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 113785 (111K) [text/plain]
Saving to: ‘results-imagenet.csv.1’

     0K .......... .......... .......... .......... .......... 44% 6.91M 0s
    50K .......... .......... .......... .......... .......... 89% 5.24M 0s
   100K .......... .                                          100% 50.6M=0.02s

2025-10-19 08:20:11 (6.53 MB/s) - ‘results-imagenet.csv.1’ saved [113785/113785]



0

In [27]:
df = pd.read_csv("results-imagenet.csv")
df["param_count"] = df["param_count"].str.replace(",", "").astype(float)
print(df.shape)
df.head()

(1445, 9)


Unnamed: 0,model,img_size,top1,top1_err,top5,top5_err,param_count,crop_pct,interpolation
0,eva02_large_patch14_448.mim_m38m_ft_in22k_in1k,448,90.054,9.946,99.056,0.944,305.08,1.0,bicubic
1,eva02_large_patch14_448.mim_in22k_ft_in22k_in1k,448,89.966,10.034,99.016,0.984,305.08,1.0,bicubic
2,eva_giant_patch14_560.m30m_ft_in22k_in1k,560,89.796,10.204,98.99,1.01,1014.45,1.0,bicubic
3,eva02_large_patch14_448.mim_in22k_ft_in1k,448,89.632,10.368,98.954,1.046,305.08,1.0,bicubic
4,eva_giant_patch14_336.m30m_ft_in22k_in1k,336,89.57,10.43,98.954,1.046,1013.01,1.0,bicubic


In [28]:
# Use convnext_tiny as baseline model
df[df["model"].str.lower().str.contains("convnext_t")]

Unnamed: 0,model,img_size,top1,top1_err,top5,top5_err,param_count,crop_pct,interpolation
236,convnext_tiny.in12k_ft_in1k_384,384,85.154,14.846,97.634,2.366,28.59,1.0,bicubic
322,convnext_tiny.in12k_ft_in1k,288,84.448,15.552,97.328,2.672,28.59,1.0,bicubic
363,convnext_tiny.in12k_ft_in1k,224,84.192,15.808,97.132,2.868,28.59,0.95,bicubic
371,convnext_tiny.fb_in22k_ft_in1k_384,384,84.098,15.902,97.138,2.862,28.59,1.0,bicubic
561,convnext_tiny.fb_in22k_ft_in1k,224,82.888,17.112,96.628,3.372,28.59,0.875,bicubic
588,convnext_tiny.fb_in1k,288,82.694,17.306,96.144,3.856,28.59,1.0,bicubic
607,convnext_tiny_hnf.a2h_in1k,288,82.608,17.392,96.024,3.976,28.59,1.0,bicubic
669,convnext_tiny_hnf.a2h_in1k,224,82.23,17.77,95.86,4.14,28.59,0.95,bicubic
693,convnext_tiny.fb_in1k,224,82.066,17.934,95.854,4.146,28.59,0.875,bicubic
1097,convnext_tiny.fb_in22k_ft_in1k,288,78.95,21.05,94.708,5.292,28.59,1.0,bicubic


In [29]:
# Look how convnext_small compares, both in terms of size and accuracy
df[df["model"].str.lower().str.contains("convnext_s")]

Unnamed: 0,model,img_size,top1,top1_err,top5,top5_err,param_count,crop_pct,interpolation
133,convnext_small.in12k_ft_in1k_384,384,86.186,13.814,97.92,2.08,50.22,1.0,bicubic
179,convnext_small.fb_in22k_ft_in1k_384,384,85.754,14.246,97.882,2.118,50.22,1.0,bicubic
217,convnext_small.in12k_ft_in1k,288,85.324,14.676,97.554,2.446,50.22,1.0,bicubic
220,convnext_small.fb_in22k_ft_in1k,288,85.276,14.724,97.678,2.322,50.22,1.0,bicubic
235,convnext_small.in12k_ft_in1k,224,85.164,14.836,97.502,2.498,50.22,0.95,bicubic
301,convnext_small.fb_in22k_ft_in1k,224,84.582,15.418,97.414,2.586,50.22,0.875,bicubic
438,convnext_small.fb_in1k,288,83.704,16.296,96.816,3.184,50.22,1.0,bicubic
526,convnext_small.fb_in1k,224,83.144,16.856,96.432,3.568,50.22,0.875,bicubic


In [30]:
# Look at all models with param_count < 80 and top1 > 85
# Based on the results below, it might be useful to try one of the convformer or caformer models
df[(df["param_count"] < 80) & (df["top1"] > 85)].sort_values(by="top1", ascending=False).head(10)

Unnamed: 0,model,img_size,top1,top1_err,top5,top5_err,param_count,crop_pct,interpolation
52,caformer_m36.sail_in22k_ft_in1k_384,384,87.472,12.528,98.308,1.692,56.2,1.0,bicubic
55,vit_mediumd_patch16_reg4_gap_384.sbb2_e200_in1...,384,87.436,12.564,98.258,1.742,64.27,1.0,bicubic
57,coatnet_rmlp_2_rw_384.sw_in12k_ft_in1k,384,87.402,12.598,98.308,1.692,73.88,1.0,bicubic
79,convformer_m36.sail_in22k_ft_in1k_384,384,86.868,13.132,98.126,1.874,57.05,1.0,bicubic
80,caformer_s36.sail_in22k_ft_in1k_384,384,86.864,13.136,98.222,1.778,39.3,1.0,bicubic
82,tf_efficientnet_b7.ns_jft_in1k,600,86.852,13.148,98.08,1.92,66.35,0.949,bicubic
96,caformer_m36.sail_in22k_ft_in1k,224,86.608,13.392,98.038,1.962,56.2,1.0,bicubic
97,vit_mediumd_patch16_reg4_gap_256.sbb2_e200_in1...,256,86.604,13.396,97.936,2.064,64.11,0.95,bicubic
98,vit_betwixt_patch16_reg4_gap_384.sbb2_e200_in1...,384,86.602,13.398,98.022,1.978,60.6,1.0,bicubic
100,coatnet_2_rw_224.sw_in12k_ft_in1k,224,86.58,13.42,97.896,2.104,73.87,0.95,bicubic
