Skip to content

Commit

Permalink
[Feature]: Full support of SimMIM (#487)
Browse files Browse the repository at this point in the history
* [Fix]: Refine simmim-224-ft config

* [Feature]: Add simmim-b 800e pt config

* [Feature]: Add simmim large

* [Feature]: Add ceph

* [Fix]: Fix min lr and add pin_memory

* [Feature]: Add more simmim res in readme and model_zoo

* [Fix]: Refine lr

* [Fix]: Delete file client
  • Loading branch information
YuanLiuuuuuu authored and fangyixiao18 committed Oct 12, 2022
1 parent 1905569 commit 14afb8e
Show file tree
Hide file tree
Showing 8 changed files with 193 additions and 11 deletions.
Expand Up @@ -6,8 +6,9 @@
img_size=224, stage_cfgs=dict(block_cfgs=dict(window_size=7))))

# train pipeline
file_client_args = dict(backend='disk')
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='RandomResizedCrop',
scale=224,
Expand Down Expand Up @@ -35,7 +36,7 @@

# test pipeline
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='mmcls.ResizeEdge',
scale=256,
Expand All @@ -46,5 +47,11 @@
dict(type='PackClsInputs')
]

train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
train_dataloader = dict(
dataset=dict(pipeline=train_pipeline),
collate_fn=dict(type='default_collate'),
pin_memory=True)
val_dataloader = dict(
dataset=dict(pipeline=test_pipeline),
collate_fn=dict(type='default_collate'),
pin_memory=True)
@@ -0,0 +1,33 @@
_base_ = 'swin-base_ft-8xb256-coslr-100e_in1k-224.py'

# model settings
model = dict(
backbone=dict(
arch='L',
img_size=224,
drop_path_rate=0.2,
stage_cfgs=dict(block_cfgs=dict(window_size=14)),
pad_small_map=True),
head=dict(in_channels=1536))

# schedule settings
optim_wrapper = dict(optimizer=dict(layer_decay_rate=0.7))

# learning rate scheduler
param_scheduler = [
dict(
type='LinearLR',
start_factor=2.5e-7 / 1.25e-3,
by_epoch=True,
begin=0,
end=20,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR',
T_max=100,
eta_min=1e-6,
by_epoch=True,
begin=20,
end=100,
convert_to_iter_based=True)
]
1 change: 0 additions & 1 deletion configs/selfsup/_base_/datasets/imagenet_simmim.py
Expand Up @@ -3,7 +3,6 @@
dataset_type = 'mmcls.ImageNet'
data_root = 'data/imagenet/'
file_client_args = dict(backend='disk')

train_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
Expand Down
74 changes: 70 additions & 4 deletions configs/selfsup/simmim/README.md
Expand Up @@ -16,10 +16,76 @@ This paper presents SimMIM, a simple framework for masked image modeling. We sim

Here, we report the results of the model, and more results will be coming soon.

| Backbone | Pre-train epoch | Pre-train resolution | Fine-tuning resolution | Fine-tuning Top-1 | Pre-train Config | Fine-tuning Config | Download |
| :-------: | :-------------: | :------------------: | :--------------------: | :---------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| Swin-Base | 100 | 192 | 192 | 82.9 | [config](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192.py) | [config](https://github.com/open-mmlab/mmselfsup/blob/master/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k.py) | [model](https://download.openmmlab.com/mmselfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192_20220316-1d090125.pth) \| [log](https://download.openmmlab.com/mmselfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192_20220316-1d090125.log.json) |
| Swin-Base | 100 | 192 | 224 | 83.5 | [config](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192.py) | [config](https://github.com/open-mmlab/mmselfsup/blob/master/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k-224.py) | [model](https://download.openmmlab.com/mmselfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192_20220316-1d090125.pth) \| [log](https://download.openmmlab.com/mmselfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192_20220316-1d090125.log.json) |
<table class="docutils">
<thead>
<tr>
<th rowspan="2">Algorithm</th>
<th rowspan="2">Backbone</th>
<th rowspan="2">Epoch</th>
<th rowspan="2">Fine-tuning Size</th>
<th rowspan="2">Batch Size</th>
<th colspan="2" align="center">Results (Top-1 %)</th>
<th colspan="3" align="center">Links</th>
</tr>
<tr>
<th>Linear Eval</th>
<th>Fine-tuning</th>
<th>Pretrain</th>
<th>Linear Eval</th>
<th>Fine-tuning</th>
</tr>
</thead>
<tbody>
<tr>
<td>SimMIM</td>
<td>Swin-base</td>
<td>100</td>
<td>192</td>
<td>2048</td>
<td>/</td>
<td>82.7</td>
<td><a href='https://github.com/open-mmlab/mmselfsup/blob/dev-1.x/configs/selfsup/simmim/simmim_swin-base_16xb128-amp-coslr-100e_in1k-192.py'>config</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_8xb256-amp-coslr-100e_in1k-192/simmim_swin-base_8xb256-amp-coslr-100e_in1k-192_20220829-0e15782d.pth'>model</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_8xb256-amp-coslr-100e_in1k-192/simmim_swin-base_8xb256-amp-coslr-100e_in1k-192_20220827_034052.json'>log</a></td>
<td>/</td>
<td><a href='https://github.com/open-mmlab/mmselfsup/blob/dev-1.x/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k.py'>config</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_8xb256-amp-coslr-100e_in1k-192/swin-base_ft-8xb256-coslr-100e_in1k/swin-base_ft-8xb256-coslr-100e_in1k_20220829-9cf23aa1.pth'>model</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_8xb256-amp-coslr-100e_in1k-192/swin-base_ft-8xb256-coslr-100e_in1k/swin-base_ft-8xb256-coslr-100e_in1k_20220829_001452.json'>log</a></td>
</tr>
<tr>
<td>SimMIM</td>
<td>Swin-base</td>
<td>100</td>
<td>224</td>
<td>2048</td>
<td>/</td>
<td>83.5</td>
<td><a href='https://github.com/open-mmlab/mmselfsup/blob/dev-1.x/configs/selfsup/simmim/simmim_swin-base_16xb128-amp-coslr-100e_in1k-192.py'>config</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_8xb256-amp-coslr-100e_in1k-192/simmim_swin-base_8xb256-amp-coslr-100e_in1k-192_20220829-0e15782d.pth'>model</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_8xb256-amp-coslr-100e_in1k-192/simmim_swin-base_8xb256-amp-coslr-100e_in1k-192_20220827_034052.json'>log</a></td>
<td>/</td>
<td><a href='https://github.com/open-mmlab/mmselfsup/blob/dev-1.x/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k-224.py'>config</a> | model | log</td>
</tr>
<tr>
<td>SimMIM</td>
<td>Swin-base</td>
<td>800</td>
<td>224</td>
<td>2048</td>
<td>/</td>
<td>83.8</td>
<td><a href='https://github.com/open-mmlab/mmselfsup/blob/dev-1.x/configs/selfsup/simmim/simmim_swin-base_16xb128-amp-coslr-800e_in1k-192.py'>config</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_16xb128-amp-coslr-800e_in1k-192/simmim_swin-base_16xb128-amp-coslr-800e_in1k-192_20220916-a0e931ac.pth'>model</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_16xb128-amp-coslr-800e_in1k-192/simmim_swin-base_16xb128-amp-coslr-800e_in1k-192_20220906_141645.json'>log</a></td>
<td>/</td>
<td><a href='https://github.com/open-mmlab/mmselfsup/blob/dev-1.x/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k-224.py'>config</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_16xb128-amp-coslr-800e_in1k-192/swin-base_ft-8xb256-coslr-100e_in1k-224/swin-base_ft-8xb256-coslr-100e_in1k-224_20220916-b202cd1c.pth'>model</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_16xb128-amp-coslr-800e_in1k-192/swin-base_ft-8xb256-coslr-100e_in1k-224/swin-base_ft-8xb256-coslr-100e_in1k-224_20220909_104645.json'>log</a></td>
</tr>
<tr>
<td>SimMIM</td>
<td>Swin-large</td>
<td>800</td>
<td>224</td>
<td>2048</td>
<td>/</td>
<td>84.8</td>
<td><a href='https://github.com/open-mmlab/mmselfsup/blob/dev-1.x/configs/selfsup/simmim/simmim_swin-large_16xb128-amp-coslr-800e_in1k-192.py'>config</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-large_16xb128-amp-coslr-800e_in1k-192/simmim_swin-large_16xb128-amp-coslr-800e_in1k-192_20220916-4ad216d3.pth'>model</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-large_16xb128-amp-coslr-800e_in1k-192/simmim_swin-large_16xb128-amp-coslr-800e_in1k-192_20220907_203738.json'>log</a></td>
<td>/</td>
<td><a href='https://github.com/open-mmlab/mmselfsup/blob/dev-1.x/configs/benchmarks/classification/imagenet/swin-large_ft-8xb256-coslr-ws14-100e_in1k-224.py'>config</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-large_16xb128-amp-coslr-800e_in1k-192/swin-large_ft-8xb256-coslr-ws14-100e_in1k-224/swin-large_ft-8xb256-coslr-ws14-100e_in1k-224_20220916-d4865790.pth'>model</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-large_16xb128-amp-coslr-800e_in1k-192/swin-large_ft-8xb256-coslr-ws14-100e_in1k-224/swin-large_ft-8xb256-coslr-ws14-100e_in1k-224_20220914_133331.json'>log</a></td>
</tr>
</tbody>
</table>

## Citation

Expand Down
@@ -0,0 +1,27 @@
_base_ = 'simmim_swin-base_16xb128-amp-coslr-100e_in1k-192.py'

# optimizer wrapper
optimizer = dict(
type='AdamW', lr=1e-4 * 2048 / 512, betas=(0.9, 0.999), eps=1e-8)
optim_wrapper = dict(optimizer=optimizer)

# learning rate scheduler
param_scheduler = [
dict(
type='LinearLR',
start_factor=5e-7 / 1e-4,
by_epoch=True,
begin=0,
end=10,
convert_to_iter_based=True),
dict(
type='MultiStepLR',
milestones=[700],
by_epoch=True,
begin=10,
end=800,
convert_to_iter_based=True)
]

# schedule
train_cfg = dict(max_epochs=800)
@@ -0,0 +1,13 @@
_base_ = 'simmim_swin-base_16xb128-amp-coslr-800e_in1k-192.py'

# model settings
model = dict(
backbone=dict(
arch='L',
stage_cfgs=dict(block_cfgs=dict(window_size=12)),
pad_small_map=True),
neck=dict(type='SimMIMNeck', in_channels=192 * 2**3, encoder_stride=32),
head=dict(
type='SimMIMHead',
patch_size=4,
loss=dict(type='SimMIMReconstructionLoss', encoder_in_channels=3)))
34 changes: 32 additions & 2 deletions docs/en/model_zoo.md
Expand Up @@ -332,8 +332,8 @@ ImageNet has multiple versions, but the most commonly used one is ILSVRC 2012. T
<td><a href='https://github.com/open-mmlab/mmselfsup/blob/dev-1.x/configs/benchmarks/classification/imagenet/vit-base-p16_ft-8xb128-coslr-100e-rpe_in1k.py'>config</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/cae/cae_vit-base-p16_16xb128-fp16-coslr-300e_in1k/vit-base-p16_ft-8xb128-coslr-100e-rpe_in1k/vit-base-p16_ft-8xb128-coslr-100e-rpe_in1k_20220825-f3d234cd.pth'>model</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/cae/cae_vit-base-p16_16xb128-fp16-coslr-300e_in1k/vit-base-p16_ft-8xb128-coslr-100e-rpe_in1k/vit-base-p16_ft-8xb128-coslr-100e-rpe_in1k_20220711_165500.json'>log</a></td>
</tr>
<tr>
<td>SimMIM</td>
<td>Swin-base</td>
<td rowspan="4">SimMIM</td>
<td>Swin-base-FT192</td>
<td>100</td>
<td>2048</td>
<td>/</td>
Expand All @@ -342,5 +342,35 @@ ImageNet has multiple versions, but the most commonly used one is ILSVRC 2012. T
<td>/</td>
<td><a href='https://github.com/open-mmlab/mmselfsup/blob/dev-1.x/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k.py'>config</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_8xb256-amp-coslr-100e_in1k-192/swin-base_ft-8xb256-coslr-100e_in1k/swin-base_ft-8xb256-coslr-100e_in1k_20220829-9cf23aa1.pth'>model</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_8xb256-amp-coslr-100e_in1k-192/swin-base_ft-8xb256-coslr-100e_in1k/swin-base_ft-8xb256-coslr-100e_in1k_20220829_001452.json'>log</a></td>
</tr>
<tr>
<td>Swin-base-FT224</td>
<td>100</td>
<td>2048</td>
<td>/</td>
<td>83.5</td>
<td><a href='https://github.com/open-mmlab/mmselfsup/blob/dev-1.x/configs/selfsup/simmim/simmim_swin-base_16xb128-amp-coslr-100e_in1k-192.py'>config</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_8xb256-amp-coslr-100e_in1k-192/simmim_swin-base_8xb256-amp-coslr-100e_in1k-192_20220829-0e15782d.pth'>model</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_8xb256-amp-coslr-100e_in1k-192/simmim_swin-base_8xb256-amp-coslr-100e_in1k-192_20220827_034052.json'>log</a></td>
<td>/</td>
<td><a href='https://github.com/open-mmlab/mmselfsup/blob/dev-1.x/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k-224.py'>config</a> | model | log</td>
</tr>
<tr>
<td>Swin-base-FT224</td>
<td>800</td>
<td>2048</td>
<td>/</td>
<td>83.8</td>
<td><a href='https://github.com/open-mmlab/mmselfsup/blob/dev-1.x/configs/selfsup/simmim/simmim_swin-base_16xb128-amp-coslr-800e_in1k-192.py'>config</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_16xb128-amp-coslr-800e_in1k-192/simmim_swin-base_16xb128-amp-coslr-800e_in1k-192_20220916-a0e931ac.pth'>model</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_16xb128-amp-coslr-800e_in1k-192/simmim_swin-base_16xb128-amp-coslr-800e_in1k-192_20220906_141645.json'>log</a></td>
<td>/</td>
<td><a href='https://github.com/open-mmlab/mmselfsup/blob/dev-1.x/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k-224.py'>config</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_16xb128-amp-coslr-800e_in1k-192/swin-base_ft-8xb256-coslr-100e_in1k-224/swin-base_ft-8xb256-coslr-100e_in1k-224_20220916-b202cd1c.pth'>model</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-base_16xb128-amp-coslr-800e_in1k-192/swin-base_ft-8xb256-coslr-100e_in1k-224/swin-base_ft-8xb256-coslr-100e_in1k-224_20220909_104645.json'>log</a></td>
</tr>
<tr>
<td>Swin-large-FT224</td>
<td>800</td>
<td>2048</td>
<td>/</td>
<td>84.8</td>
<td><a href='https://github.com/open-mmlab/mmselfsup/blob/dev-1.x/configs/selfsup/simmim/simmim_swin-large_16xb128-amp-coslr-800e_in1k-192.py'>config</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-large_16xb128-amp-coslr-800e_in1k-192/simmim_swin-large_16xb128-amp-coslr-800e_in1k-192_20220916-4ad216d3.pth'>model</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-large_16xb128-amp-coslr-800e_in1k-192/simmim_swin-large_16xb128-amp-coslr-800e_in1k-192_20220907_203738.json'>log</a></td>
<td>/</td>
<td><a href='https://github.com/open-mmlab/mmselfsup/blob/dev-1.x/configs/benchmarks/classification/imagenet/swin-large_ft-8xb256-coslr-ws14-100e_in1k-224.py'>config</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-large_16xb128-amp-coslr-800e_in1k-192/swin-large_ft-8xb256-coslr-ws14-100e_in1k-224/swin-large_ft-8xb256-coslr-ws14-100e_in1k-224_20220916-d4865790.pth'>model</a> | <a href='https://download.openmmlab.com/mmselfsup/1.x/simmim/simmim_swin-large_16xb128-amp-coslr-800e_in1k-192/swin-large_ft-8xb256-coslr-ws14-100e_in1k-224/swin-large_ft-8xb256-coslr-ws14-100e_in1k-224_20220914_133331.json'>log</a></td>
</tr>
</tbody>
</table>
7 changes: 7 additions & 0 deletions mmselfsup/models/backbones/simmim_swin.py
Expand Up @@ -42,6 +42,11 @@ class SimMIMSwinTransformer(SwinTransformer):
stage. Defaults to empty dict.
patch_cfg (dict): Extra config dict for patch embedding.
Defaults to empty dict.
pad_small_map (bool): If True, pad the small feature map to the window
size, which is common used in detection and segmentation. If False,
avoid shifting window and shrink the window size to the size of
feature map, which is common used in classification.
Defaults to False.
init_cfg (dict, optional): The Config for initialization.
Defaults to None.
"""
Expand All @@ -60,6 +65,7 @@ def __init__(self,
norm_cfg: dict = dict(type='LN'),
stage_cfgs: Union[Sequence, dict] = dict(),
patch_cfg: dict = dict(),
pad_small_map: bool = False,
init_cfg: Optional[dict] = None) -> None:
super().__init__(
arch=arch,
Expand All @@ -75,6 +81,7 @@ def __init__(self,
norm_cfg=norm_cfg,
stage_cfgs=stage_cfgs,
patch_cfg=patch_cfg,
pad_small_map=pad_small_map,
init_cfg=init_cfg)

self.mask_token = nn.Parameter(torch.zeros(1, 1, self.embed_dims))
Expand Down

0 comments on commit 14afb8e

Please sign in to comment.