## Setting Datasets

In [1]:
from otx.v2.adapters.torch.mmengine.mmpretrain import Dataset
dataset = Dataset(
    train_data_roots="../../../../../demo/dataset/cifar10-small/train_data",
    val_data_roots="../../../../../demo/dataset/cifar10-small/val_data",
    test_data_roots="../../../../../demo/dataset/cifar10-small/val_data",
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_dataloader = dataset.train_dataloader(batch_size=2)

print(f"Dataset type: {type(train_dataloader)}")
print(f"Length of DataLoader: {len(train_dataloader)}")
print(f"Dataset size: {len(train_dataloader.dataset)}")
print(f"Number of classes: {dataset.num_classes}")

[*] Detected dataset format: imagenet
[*] Detected task type: CLASSIFICATION
2023-08-18 18:05:58,914 | INFO : Try to create a 0 size memory pool.
Dataset type: <class 'torch.utils.data.dataloader.DataLoader'>
Length of DataLoader: 5
Dataset size: 10
Number of classes: 10


## List Models

In [3]:
from otx.v2.adapters.torch.mmengine.mmpretrain import list_models
models = list_models("convnext*")
print(models)

['convnext-base_32xb128-noema_in1k', 'convnext-base_32xb128_in1k', 'convnext-base_3rdparty-noema_in1k', 'convnext-base_3rdparty_in1k', 'convnext-base_3rdparty_in1k-384px', 'convnext-base_3rdparty_in21k', 'convnext-base_in21k-pre-3rdparty_in1k-384px', 'convnext-base_in21k-pre_3rdparty_in1k', 'convnext-large_3rdparty_in1k', 'convnext-large_3rdparty_in1k-384px', 'convnext-large_3rdparty_in21k', 'convnext-large_in21k-pre-3rdparty_in1k-384px', 'convnext-large_in21k-pre_3rdparty_in1k', 'convnext-small_32xb128-noema_in1k', 'convnext-small_32xb128_in1k', 'convnext-small_in21k-pre_3rdparty_in1k', 'convnext-small_in21k-pre_3rdparty_in1k-384px', 'convnext-tiny_32xb128-noema_in1k', 'convnext-tiny_32xb128_in1k', 'convnext-tiny_in21k-pre_3rdparty_in1k', 'convnext-tiny_in21k-pre_3rdparty_in1k-384px', 'convnext-v2-atto_3rdparty-fcmae_in1k', 'convnext-v2-atto_fcmae-pre_3rdparty_in1k', 'convnext-v2-base_3rdparty-fcmae_in1k', 'convnext-v2-base_fcmae-in21k-pre_3rdparty_in1k', 'convnext-v2-base_fcmae-in21k

### Training with convnext-base_32xb128_in1k from mmpretrain

In [4]:
from otx.v2.adapters.torch.mmengine.mmpretrain import get_model
model = get_model(
    model="convnext-base_32xb128_in1k",
    num_classes=dataset.num_classes
)
print(model)

08/18 18:05:59 - mmengine - [4m[97mINFO[0m - Because batch augmentations are enabled, the data preprocessor automatically enables the `to_onehot` option to generate one-hot format labels.
ImageClassifier(
  (data_preprocessor): ClsDataPreprocessor()
  (backbone): ConvNeXt(
    (downsample_layers): ModuleList(
      (0): Sequential(
        (0): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
        (1): LayerNorm2d((128,), eps=1e-06, elementwise_affine=True)
      )
      (1): Sequential(
        (0): LayerNorm2d((128,), eps=1e-06, elementwise_affine=True)
        (1): Conv2d(128, 256, kernel_size=(2, 2), stride=(2, 2))
      )
      (2): Sequential(
        (0): LayerNorm2d((256,), eps=1e-06, elementwise_affine=True)
        (1): Conv2d(256, 512, kernel_size=(2, 2), stride=(2, 2))
      )
      (3): Sequential(
        (0): LayerNorm2d((512,), eps=1e-06, elementwise_affine=True)
        (1): Conv2d(512, 1024, kernel_size=(2, 2), stride=(2, 2))
      )
    )
    (stages): ModuleL

In [5]:
from otx.v2.adapters.torch.mmengine.mmpretrain.engine import MMPTEngine

# OTX Model Training
engine = MMPTEngine(work_dir="/tmp/otx-test",)

# Training without validation
results = engine.train(
    model=model,
    train_dataloader=train_dataloader,
    max_epochs=2,
)

print(results["checkpoint"])

08/18 18:06:00 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.9.13 (main, Aug 25 2022, 23:26:10) [GCC 11.2.0]
    CUDA available: True
    numpy_random_seed: 136858950
    GPU 0,1: NVIDIA GeForce RTX 3090
    CUDA_HOME: /usr/local/cuda
    NVCC: Cuda compilation tools, release 11.7, V11.7.64
    GCC: gcc (Ubuntu 9.5.0-1ubuntu1~22.04) 9.5.0
    PyTorch: 1.13.1+cu117
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.7
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=comp

### Training with Hugging-Face Model from mmpretrain (microsoft/resnet-50)

In [6]:
# Need to install huggingface
# !pip install transformers

In [7]:
from otx.v2.adapters.torch.mmengine.mmpretrain import get_model
cfg = dict(type='HuggingFaceClassifier', model_name='microsoft/resnet-50', pretrained=True)

hf_model = get_model(cfg)
print(hf_model)

HuggingFaceClassifier(
  (data_preprocessor): ClsDataPreprocessor()
  (model): ResNetForImageClassification(
    (resnet): ResNetModel(
      (embedder): ResNetEmbeddings(
        (embedder): ResNetConvLayer(
          (convolution): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
          (normalization): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation): ReLU()
        )
        (pooler): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      )
      (encoder): ResNetEncoder(
        (stages): ModuleList(
          (0): ResNetStage(
            (layers): Sequential(
              (0): ResNetBottleNeckLayer(
                (shortcut): ResNetShortCut(
                  (convolution): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
                  (normalization): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
                )


In [8]:
from otx.v2.adapters.torch.mmengine.mmpretrain.engine import MMPTEngine

# OTX Model Training
engine = MMPTEngine(work_dir="/tmp/otx-test",)

# Training without validation
results = engine.train(
    model=hf_model,
    train_dataloader=train_dataloader,
    max_epochs=2,
)

print(results["checkpoint"])

08/18 18:06:05 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.9.13 (main, Aug 25 2022, 23:26:10) [GCC 11.2.0]
    CUDA available: True
    numpy_random_seed: 1216755606
    GPU 0,1: NVIDIA GeForce RTX 3090
    CUDA_HOME: /usr/local/cuda
    NVCC: Cuda compilation tools, release 11.7, V11.7.64
    GCC: gcc (Ubuntu 9.5.0-1ubuntu1~22.04) 9.5.0
    PyTorch: 1.13.1+cu117
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.7
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=com

### Timm Model from mmpretrain

In [9]:
from otx.v2.adapters.torch.mmengine.mmpretrain import get_model
cfg = dict(type='TimmClassifier', model_name='resnet50', pretrained=True)

timm_model = get_model(cfg)
print(timm_model)

TimmClassifier(
  (data_preprocessor): ClsDataPreprocessor()
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (drop_block): Identity()
        (act2): ReLU(inplace=True)
        (aa): Identity()
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=Fals

In [10]:
from otx.v2.adapters.torch.mmengine.mmpretrain.engine import MMPTEngine

# OTX Model Training
engine = MMPTEngine(work_dir="/tmp/otx-test",)

# Training without validation
results = engine.train(
    model=timm_model,
    train_dataloader=train_dataloader,
    max_epochs=2,
)

print(results["checkpoint"])

08/18 18:06:07 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.9.13 (main, Aug 25 2022, 23:26:10) [GCC 11.2.0]
    CUDA available: True
    numpy_random_seed: 286981800
    GPU 0,1: NVIDIA GeForce RTX 3090
    CUDA_HOME: /usr/local/cuda
    NVCC: Cuda compilation tools, release 11.7, V11.7.64
    GCC: gcc (Ubuntu 9.5.0-1ubuntu1~22.04) 9.5.0
    PyTorch: 1.13.1+cu117
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.7
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=comp

### Vision-Transformer (MobileViT) from mmpretrain

In [11]:
from otx.v2.adapters.torch.mmengine.mmpretrain import list_models
models = list_models("*vit*")
print(models)

['cn-clip_vit-base-p16_zeroshot-cls_cifar100', 'cn-clip_vit-huge-p14_zeroshot-cls_cifar100', 'cn-clip_vit-large-p14_zeroshot-cls_cifar100', 'davit-base_3rdparty_in1k', 'davit-small_3rdparty_in1k', 'davit-tiny_3rdparty_in1k', 'eva-mae-style_vit-base-p16_16xb256-coslr-400e_in1k', 'hivit-base-p16_16xb64_in1k', 'hivit-small-p16_16xb64_in1k', 'hivit-tiny-p16_16xb64_in1k', 'itpn-clip-b_hivit-base-p16_8xb256-amp-coslr-800e_in1k', 'itpn-pixel_hivit-base-p16_8xb512-amp-coslr-800e_in1k', 'itpn-pixel_hivit-large-p16_8xb512-amp-coslr-800e_in1k', 'levit-128_3rdparty_in1k', 'levit-128s_3rdparty_in1k', 'levit-192_3rdparty_in1k', 'levit-256_3rdparty_in1k', 'levit-384_3rdparty_in1k', 'mae_vit-base-p16_8xb512-amp-coslr-1600e_in1k', 'mae_vit-base-p16_8xb512-amp-coslr-300e_in1k', 'mae_vit-base-p16_8xb512-amp-coslr-400e_in1k', 'mae_vit-base-p16_8xb512-amp-coslr-800e_in1k', 'mae_vit-huge-p16_8xb512-amp-coslr-1600e_in1k', 'mae_vit-large-p16_8xb512-amp-coslr-1600e_in1k', 'mae_vit-large-p16_8xb512-amp-coslr-40

In [12]:
from otx.v2.adapters.torch.mmengine.mmpretrain import get_model
vit_model = get_model(model="mobilevit-small_3rdparty_in1k")
print(vit_model)

ImageClassifier(
  (data_preprocessor): ClsDataPreprocessor()
  (backbone): MobileViT(
    (stem): ConvModule(
      (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activate): Swish()
    )
    (layers): Sequential(
      (0): Sequential(
        (0): InvertedResidual(
          (conv): Sequential(
            (0): ConvModule(
              (conv): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (activate): Swish()
            )
            (1): ConvModule(
              (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
              (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (activate): Swish()
            )
            (2)

In [13]:
from otx.v2.adapters.torch.mmengine.mmpretrain.engine import MMPTEngine

# OTX Model Training
engine = MMPTEngine(work_dir="/tmp/otx-test",)

# Training without validation
results = engine.train(
    model=vit_model,
    train_dataloader=train_dataloader,
    max_epochs=2,
)

print(results["checkpoint"])

08/18 18:06:08 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.9.13 (main, Aug 25 2022, 23:26:10) [GCC 11.2.0]
    CUDA available: True
    numpy_random_seed: 2073026823
    GPU 0,1: NVIDIA GeForce RTX 3090
    CUDA_HOME: /usr/local/cuda
    NVCC: Cuda compilation tools, release 11.7, V11.7.64
    GCC: gcc (Ubuntu 9.5.0-1ubuntu1~22.04) 9.5.0
    PyTorch: 1.13.1+cu117
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.7
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=com

## With AutoRunner

In [14]:
from otx.v2.api.core import AutoRunner

output_dir = "/tmp/OTX-API-test"
data_roots = "../../../../../demo/dataset/cifar10-small/train_data"

auto_engine = AutoRunner(
    work_dir=output_dir,
    train_data_roots=data_roots,
)

model_list = auto_engine.list_models("mobilevit")
print("MobileViT: ", model_list)

# Customization training
results = auto_engine.train(
    model="mobilevit-small_3rdparty_in1k",
    batch_size=2,
    max_epochs=2
)
print(results["checkpoint"])

[*] Detected dataset format: imagenet
[*] Detected task type: CLASSIFICATION
MobileViT:  ['mobilevit-small_3rdparty_in1k', 'mobilevit-xsmall_3rdparty_in1k', 'mobilevit-xxsmall_3rdparty_in1k']
08/18 18:06:09 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.9.13 (main, Aug 25 2022, 23:26:10) [GCC 11.2.0]
    CUDA available: True
    numpy_random_seed: 1768351892
    GPU 0,1: NVIDIA GeForce RTX 3090
    CUDA_HOME: /usr/local/cuda
    NVCC: Cuda compilation tools, release 11.7, V11.7.64
    GCC: gcc (Ubuntu 9.5.0-1ubuntu1~22.04) 9.5.0
    PyTorch: 1.13.1+cu117
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is