<a href="https://colab.research.google.com/github/juanmed/moa-challenge-micro2025/blob/main/timm_experiments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install onnx mmcv mmpretrain

Collecting onnx
  Downloading onnx-1.19.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (7.0 kB)
Collecting mmcv
  Downloading mmcv-2.2.0.tar.gz (479 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/479.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━[0m [32m327.7/479.1 kB[0m [31m10.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m479.1/479.1 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting mmpretrain
  Downloading mmpretrain-1.2.0-py2.py3-none-any.whl.metadata (20 kB)
Collecting addict (from mmcv)
  Downloading addict-2.4.0-py3-none-any.whl.metadata (1.0 kB)
Collecting mmengine>=0.3.0 (from mmcv)
  Downloading mmengine-0.10.7-py3-none-any.whl.metadata (20 kB)
Collecting yapf (from mmcv)
  Downloading yapf-0.43.0-py3-none-any.whl.metadata (46 kB)
[2K     [90m━━

In [15]:
import sys

import matplotlib.pyplot as plt
import PIL
from PIL import Image
import json

import torch
from torch import nn
import torchvision.transforms as T

from timm import create_model
from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD


In [26]:
NORMALIZE_MEAN = IMAGENET_DEFAULT_MEAN
NORMALIZE_STD = IMAGENET_DEFAULT_STD
SIZE = 224
TEST_IMAGE = "6.jpeg"
MODEL_NAME = "convnextv2_large.fcmae_ft_in22k_in1k_384"
#MODEL_NAME = "convnextv2_base.fcmae_ft_in22k_in1k_384"
#MODEL_NAME = "convnextv2_large.fcmae_ft_in22k_in1k_384"
EXPORT_PATH = f"{MODEL_NAME}.onnx"

In [27]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("device = ", device)
# create a ConvNeXt model : https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/convnext.py
model = create_model(MODEL_NAME, pretrained=True).to(device)

model.to(device)
model.eval()

# Here we resize smaller edge to 256, no center cropping
transforms = [
              T.Resize(SIZE, interpolation=T.InterpolationMode.BICUBIC),
              T.ToTensor(),
              T.Normalize(NORMALIZE_MEAN, NORMALIZE_STD),
              ]
transforms = T.Compose(transforms)

img = PIL.Image.open(TEST_IMAGE)
img_tensor = transforms(img).unsqueeze(0).to(device)
print(img_tensor.size())

output = torch.softmax(model(img_tensor), dim=1)
top5 = torch.topk(output, k=5)
top5_prob = top5.values[0]
top5_indices = top5.indices[0]

imagenet_labels = json.load(open('label_to_words.json'))
for i in range(5):
    labels = imagenet_labels[str(int(top5_indices[i]))]
    prob = "{:.2f}%".format(float(top5_prob[i])*100)
    print(labels, prob)


device =  cuda
torch.Size([1, 3, 224, 224])
mosquitofish, Gambusia affinis 81.77%
fly-fishing 10.16%
platy, Platypoecilus maculatus 0.28%
plasmodium, Plasmodium vivax, malaria parasite 0.12%
sporozoite 0.09%


In [25]:
c = 3
h = SIZE
w = SIZE
dummy_input = torch.rand(1, c, h, w, device=device)

print("4. Exporting to onnx: ...")
torch.onnx.export(
    model,
    dummy_input,
    EXPORT_PATH,
    export_params=True,
    opset_version=12,
    training=torch.onnx.TrainingMode.EVAL,
    do_constant_folding=True,
    dynamic_axes=None
)
print(f"ONNX export finished!: {EXPORT_PATH}")


4. Exporting to onnx: ...


  torch.onnx.export(


ONNX export finished!: convnextv2_large.fcmae_ft_in22k_in1k_384.onnx


In [10]:
import torch
import mmpretrain as mmp
from mmpretrain import get_model
from mmpretrain import inference_model

model = get_model('convnext-v2-large_fcmae-in21k-pre_3rdparty_in1k')
inputs = torch.rand(1, 3, 224, 224)
out = model(inputs)
print(type(out))
# To extract features.
feats = model.extract_feat(inputs)
print(type(feats))

TypeError: NoneType takes no arguments

In [14]:
predict = inference_model('wide-resnet101_3rdparty_8xb32_in1k', 'demo/bird.JPEG')
print(predict['pred_class'])
print(predict['pred_score'])

TypeError: NoneType takes no arguments

In [13]:
mmp.list_models()

['barlowtwins_resnet50_8xb256-coslr-300e_in1k',
 'beit-base-p16_beit-in21k-pre_3rdparty_in1k',
 'beit-base-p16_beit-pre_8xb128-coslr-100e_in1k',
 'beit-base-p16_beitv2-in21k-pre_3rdparty_in1k',
 'beit-base-p16_beitv2-pre_8xb128-coslr-100e_in1k',
 'beit-base-p16_cae-pre_8xb128-coslr-100e_in1k',
 'beit-g-p14_3rdparty-eva_30m',
 'beit-g-p14_eva-30m-in21k-pre_3rdparty_in1k-336px',
 'beit-g-p14_eva-30m-in21k-pre_3rdparty_in1k-560px',
 'beit-g-p14_eva-30m-pre_3rdparty_in21k',
 'beit-g-p16_3rdparty-eva_30m',
 'beit-l-p14_3rdparty-eva_in21k',
 'beit-l-p14_eva-in21k-pre_3rdparty_in1k-196px',
 'beit-l-p14_eva-in21k-pre_3rdparty_in1k-336px',
 'beit-l-p14_eva-pre_3rdparty_in1k-196px',
 'beit-l-p14_eva-pre_3rdparty_in1k-336px',
 'beit-l-p14_eva-pre_3rdparty_in21k',
 'beit_beit-base-p16_8xb256-amp-coslr-300e_in1k',
 'beitv2_beit-base-p16_8xb256-amp-coslr-300e_in1k',
 'blip-base_3rdparty_caption',
 'blip-base_3rdparty_nlvr',
 'blip-base_3rdparty_retrieval',
 'blip-base_3rdparty_vqa',
 'blip-base_8xb1