diff --git a/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k-224.py b/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k-224.py index e67b4b33b..3e0740109 100644 --- a/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k-224.py +++ b/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k-224.py @@ -6,8 +6,9 @@ img_size=224, stage_cfgs=dict(block_cfgs=dict(window_size=7)))) # train pipeline +file_client_args = dict(backend='disk') train_pipeline = [ - dict(type='LoadImageFromFile'), + dict(type='LoadImageFromFile', file_client_args=file_client_args), dict( type='RandomResizedCrop', scale=224, @@ -35,7 +36,7 @@ # test pipeline test_pipeline = [ - dict(type='LoadImageFromFile'), + dict(type='LoadImageFromFile', file_client_args=file_client_args), dict( type='mmcls.ResizeEdge', scale=256, @@ -46,5 +47,11 @@ dict(type='PackClsInputs') ] -train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) -val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +train_dataloader = dict( + dataset=dict(pipeline=train_pipeline), + collate_fn=dict(type='default_collate'), + pin_memory=True) +val_dataloader = dict( + dataset=dict(pipeline=test_pipeline), + collate_fn=dict(type='default_collate'), + pin_memory=True) diff --git a/configs/benchmarks/classification/imagenet/swin-large_ft-8xb256-coslr-ws14-100e_in1k-224.py b/configs/benchmarks/classification/imagenet/swin-large_ft-8xb256-coslr-ws14-100e_in1k-224.py new file mode 100644 index 000000000..45de9c606 --- /dev/null +++ b/configs/benchmarks/classification/imagenet/swin-large_ft-8xb256-coslr-ws14-100e_in1k-224.py @@ -0,0 +1,33 @@ +_base_ = 'swin-base_ft-8xb256-coslr-100e_in1k-224.py' + +# model settings +model = dict( + backbone=dict( + arch='L', + img_size=224, + drop_path_rate=0.2, + stage_cfgs=dict(block_cfgs=dict(window_size=14)), + pad_small_map=True), + head=dict(in_channels=1536)) + +# schedule settings +optim_wrapper = dict(optimizer=dict(layer_decay_rate=0.7)) + +# learning rate scheduler +param_scheduler = [ + dict( + type='LinearLR', + start_factor=2.5e-7 / 1.25e-3, + by_epoch=True, + begin=0, + end=20, + convert_to_iter_based=True), + dict( + type='CosineAnnealingLR', + T_max=100, + eta_min=1e-6, + by_epoch=True, + begin=20, + end=100, + convert_to_iter_based=True) +] diff --git a/configs/selfsup/_base_/datasets/imagenet_simmim.py b/configs/selfsup/_base_/datasets/imagenet_simmim.py index e3baac40e..ba44acd1b 100644 --- a/configs/selfsup/_base_/datasets/imagenet_simmim.py +++ b/configs/selfsup/_base_/datasets/imagenet_simmim.py @@ -3,7 +3,6 @@ dataset_type = 'mmcls.ImageNet' data_root = 'data/imagenet/' file_client_args = dict(backend='disk') - train_pipeline = [ dict(type='LoadImageFromFile', file_client_args=file_client_args), dict( diff --git a/configs/selfsup/simmim/README.md b/configs/selfsup/simmim/README.md index f76017052..b13169163 100644 --- a/configs/selfsup/simmim/README.md +++ b/configs/selfsup/simmim/README.md @@ -16,10 +16,76 @@ This paper presents SimMIM, a simple framework for masked image modeling. We sim Here, we report the results of the model, and more results will be coming soon. -| Backbone | Pre-train epoch | Pre-train resolution | Fine-tuning resolution | Fine-tuning Top-1 | Pre-train Config | Fine-tuning Config | Download | -| :-------: | :-------------: | :------------------: | :--------------------: | :---------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| Swin-Base | 100 | 192 | 192 | 82.9 | [config](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192.py) | [config](https://github.com/open-mmlab/mmselfsup/blob/master/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k.py) | [model](https://download.openmmlab.com/mmselfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192_20220316-1d090125.pth) \| [log](https://download.openmmlab.com/mmselfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192_20220316-1d090125.log.json) | -| Swin-Base | 100 | 192 | 224 | 83.5 | [config](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192.py) | [config](https://github.com/open-mmlab/mmselfsup/blob/master/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k-224.py) | [model](https://download.openmmlab.com/mmselfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192_20220316-1d090125.pth) \| [log](https://download.openmmlab.com/mmselfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192_20220316-1d090125.log.json) | +
Algorithm | +Backbone | +Epoch | +Fine-tuning Size | +Batch Size | +Results (Top-1 %) | +Links | +|||
---|---|---|---|---|---|---|---|---|---|
Linear Eval | +Fine-tuning | +Pretrain | +Linear Eval | +Fine-tuning | +|||||
SimMIM | +Swin-base | +100 | +192 | +2048 | +/ | +82.7 | +config | model | log | +/ | +config | model | log | +
SimMIM | +Swin-base | +100 | +224 | +2048 | +/ | +83.5 | +config | model | log | +/ | +config | model | log | +
SimMIM | +Swin-base | +800 | +224 | +2048 | +/ | +83.8 | +config | model | log | +/ | +config | model | log | +
SimMIM | +Swin-large | +800 | +224 | +2048 | +/ | +84.8 | +config | model | log | +/ | +config | model | log | +