Skip to content

Commit

Permalink
fix format
Browse files Browse the repository at this point in the history
  • Loading branch information
zhby99 committed Jan 12, 2023
1 parent afbf086 commit 4ad1856
Show file tree
Hide file tree
Showing 9 changed files with 154 additions and 155 deletions.
74 changes: 37 additions & 37 deletions configs/_base_/models/dest_simpatt-b0.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,37 @@
# model settings
embed_dims = [32, 64, 160, 256]
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
type='EncoderDecoder',
pretrained=None,
backbone=dict(
type='SimplifiedMixTransformer',
in_channels=3,
embed_dims=embed_dims,
num_stages=4,
num_layers=[2, 2, 2, 2],
num_heads=[1, 2, 5, 8],
patch_sizes=[7, 3, 3, 3],
strides=[4, 2, 2, 2],
sr_ratios=[8, 4, 2, 1],
out_indices=(0, 1, 2, 3),
mlp_ratios=[8, 8, 4, 4],
qkv_bias=True,
drop_rate=0.0,
attn_drop_rate=0.0,
drop_path_rate=0.1,
norm_cfg=norm_cfg),
decode_head=dict(
type='DESTHead',
in_channels=[32, 64, 160, 256],
in_index=[0, 1, 2, 3],
channels=32,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='slide', crop_size=(1024, 1024), stride=(768, 768)))
# model settings
embed_dims = [32, 64, 160, 256]
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
type='EncoderDecoder',
pretrained=None,
backbone=dict(
type='SimplifiedMixTransformer',
in_channels=3,
embed_dims=embed_dims,
num_stages=4,
num_layers=[2, 2, 2, 2],
num_heads=[1, 2, 5, 8],
patch_sizes=[7, 3, 3, 3],
strides=[4, 2, 2, 2],
sr_ratios=[8, 4, 2, 1],
out_indices=(0, 1, 2, 3),
mlp_ratios=[8, 8, 4, 4],
qkv_bias=True,
drop_rate=0.0,
attn_drop_rate=0.0,
drop_path_rate=0.1,
norm_cfg=norm_cfg),
decode_head=dict(
type='DESTHead',
in_channels=[32, 64, 160, 256],
in_index=[0, 1, 2, 3],
channels=32,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='slide', crop_size=(1024, 1024), stride=(768, 768)))
19 changes: 9 additions & 10 deletions configs/dest/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,16 @@ Transformer and its variants have shown state-of-the-art results in many vision

### Cityscapes


| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
| --------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| DEST | SMIT-B0 | 1024x1024 | 160000 | - | - | 64.34 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b0_1024x1024_160k_cityscapes.py) | - | - |
| DEST | SMIT-B1 | 1024x1024 | 160000 | - | - | 68.21 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b1_1024x1024_160k_cityscapes.py) | - | - |
| DEST | SMIT-B2 | 1024x1024 | 160000 | - | - | 71.89 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b2_1024x1024_160k_cityscapes.py) | - | - |
| DEST | SMIT-B3 | 1024x1024 | 160000 | - | - | 73.51 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b3_1024x1024_160k_cityscapes.py) | - | - |
| DEST | SMIT-B4 | 1024x1024 | 160000 | - | - | 73.99 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b4_1024x1024_160k_cityscapes.py) | - | - |
| DEST | SMIT-B5 | 1024x1024 | 160000 | - | - | 75.28 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b5_1024x1024_160k_cityscapes.py) | - | - |

| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
| ------ | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------- | -------- |
| DEST | SMIT-B0 | 1024x1024 | 160000 | - | - | 64.34 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b0_1024x1024_160k_cityscapes.py) | - |
| DEST | SMIT-B1 | 1024x1024 | 160000 | - | - | 68.21 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b1_1024x1024_160k_cityscapes.py) | - |
| DEST | SMIT-B2 | 1024x1024 | 160000 | - | - | 71.89 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b2_1024x1024_160k_cityscapes.py) | - |
| DEST | SMIT-B3 | 1024x1024 | 160000 | - | - | 73.51 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b3_1024x1024_160k_cityscapes.py) | - |
| DEST | SMIT-B4 | 1024x1024 | 160000 | - | - | 73.99 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b4_1024x1024_160k_cityscapes.py) | - |
| DEST | SMIT-B5 | 1024x1024 | 160000 | - | - | 75.28 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b5_1024x1024_160k_cityscapes.py) | - |

Note:

- The above models are all training from scratch without pretrained backbones. Accuracy can be further enhanced by appropriate pretraining.
- Training of DEST is not very stable, which is sensitive to random seeds.
60 changes: 30 additions & 30 deletions configs/dest/dest_simpatt-b0_1024x1024_160k_cityscapes.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
_base_ = [
'../_base_/models/dest_simpatt-b0.py',
'../_base_/datasets/cityscapes_1024x1024.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
]

optimizer = dict(
_delete_=True,
type='AdamW',
lr=0.00006,
betas=(0.9, 0.999),
weight_decay=0.01,
paramwise_cfg=dict(
custom_keys={
'pos_block': dict(decay_mult=0.),
'norm': dict(decay_mult=0.),
'head': dict(lr_mult=10.)
}))

lr_config = dict(
_delete_=True,
policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0,
min_lr=0.0,
by_epoch=False)

data = dict(samples_per_gpu=1, workers_per_gpu=1)
_base_ = [
'../_base_/models/dest_simpatt-b0.py',
'../_base_/datasets/cityscapes_1024x1024.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
]

optimizer = dict(
_delete_=True,
type='AdamW',
lr=0.00006,
betas=(0.9, 0.999),
weight_decay=0.01,
paramwise_cfg=dict(
custom_keys={
'pos_block': dict(decay_mult=0.),
'norm': dict(decay_mult=0.),
'head': dict(lr_mult=10.)
}))

lr_config = dict(
_delete_=True,
policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0,
min_lr=0.0,
by_epoch=False)

data = dict(samples_per_gpu=1, workers_per_gpu=1)
18 changes: 9 additions & 9 deletions configs/dest/dest_simpatt-b1_1024x1024_160k_cityscapes.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
_base_ = ['./dest_simpatt-b0_1024x1024_160k_cityscapes.py']

embed_dims = [64, 128, 250, 320]

model = dict(
type='EncoderDecoder',
pretrained=None,
backbone=dict(embed_dims=embed_dims),
decode_head=dict(in_channels=embed_dims, channels=64))
_base_ = ['./dest_simpatt-b0_1024x1024_160k_cityscapes.py']

embed_dims = [64, 128, 250, 320]

model = dict(
type='EncoderDecoder',
pretrained=None,
backbone=dict(embed_dims=embed_dims),
decode_head=dict(in_channels=embed_dims, channels=64))
18 changes: 9 additions & 9 deletions configs/dest/dest_simpatt-b2_1024x1024_160k_cityscapes.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
_base_ = ['./dest_simpatt-b0_1024x1024_160k_cityscapes.py']

embed_dims = [64, 128, 250, 320]

model = dict(
type='EncoderDecoder',
pretrained=None,
backbone=dict(embed_dims=embed_dims, num_layers=[3, 3, 6, 3]),
decode_head=dict(in_channels=embed_dims, channels=64))
_base_ = ['./dest_simpatt-b0_1024x1024_160k_cityscapes.py']

embed_dims = [64, 128, 250, 320]

model = dict(
type='EncoderDecoder',
pretrained=None,
backbone=dict(embed_dims=embed_dims, num_layers=[3, 3, 6, 3]),
decode_head=dict(in_channels=embed_dims, channels=64))
3 changes: 2 additions & 1 deletion mmseg/models/backbones/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,6 @@
'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3',
'VisionTransformer', 'SwinTransformer', 'MixVisionTransformer',
'BiSeNetV1', 'BiSeNetV2', 'ICNet', 'TIMMBackbone', 'ERFNet', 'PCPVT',
'SVT', 'STDCNet', 'STDCContextPathNet', 'BEiT', 'MAE', 'SimplifiedMixTransformer'
'SVT', 'STDCNet', 'STDCContextPathNet', 'BEiT', 'MAE',
'SimplifiedMixTransformer'
]
8 changes: 3 additions & 5 deletions mmseg/models/backbones/smit.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@

import torch
import torch.nn as nn
from mmcv.cnn import Conv2d, build_activation_layer, build_norm_layer
from mmcv.cnn import build_activation_layer, build_norm_layer
from mmcv.cnn.bricks.drop import build_dropout
from mmcv.cnn.utils.weight_init import (constant_init, normal_init,
trunc_normal_init)
from mmcv.cnn.utils.weight_init import trunc_normal_init
from mmcv.runner import BaseModule, ModuleList, Sequential

from ..builder import BACKBONES
Expand Down Expand Up @@ -127,7 +126,6 @@ def __init__(self,
dropout_layer=None,
norm_cfg=dict(type='SyncBN', requires_grad=True)):
super().__init__()
assert embed_dims % num_heads == 0, f"dim {embed_dims} should be divided by num_heads {num_heads}."

self.embed_dims = embed_dims
self.num_heads = num_heads
Expand Down Expand Up @@ -418,4 +416,4 @@ def forward(self, x):
N, C, L = x.shape
x = x.reshape(N, C, H, W)
outs.append(x)
return outs
return outs
106 changes: 54 additions & 52 deletions mmseg/models/decode_heads/dest_head.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,54 @@
import torch
import torch.nn as nn

from mmcv.cnn import ConvModule
from mmseg.models.builder import HEADS
from mmseg.models.decode_heads.decode_head import BaseDecodeHead


@HEADS.register_module()
class DESTHead(BaseDecodeHead):

def __init__(self, interpolate_mode='bilinear', **kwargs):
super().__init__(input_transform='multiple_select', **kwargs)
self.interpolate_mode = interpolate_mode
num_inputs = len(self.in_channels)
assert num_inputs == len(self.in_index)
self.fuse_in_channels = self.in_channels.copy()
for i in range(num_inputs - 1):
self.fuse_in_channels[i] += self.fuse_in_channels[i + 1]
self.convs = nn.ModuleList()
for i in range(num_inputs):
self.convs.append(
ConvModule(
in_channels=self.in_channels[i],
out_channels=self.in_channels[i],
kernel_size=1,
stride=1,
act_cfg=self.act_cfg))

self.fuse_convs = nn.ModuleList()
for i in range(num_inputs):
self.fuse_convs.append(
ConvModule(
in_channels=self.fuse_in_channels[i],
out_channels=self.in_channels[i],
kernel_size=3,
stride=1,
padding=1,
act_cfg=self.act_cfg))

self.upsample = nn.ModuleList([
nn.Sequential(nn.Upsample(scale_factor=2, mode=interpolate_mode))
] * len(self.in_channels))

def forward(self, inputs):
for idx in reversed(range(len(inputs))):
x = self.convs[idx](inputs[idx])
if idx != len(inputs) - 1:
x = torch.concat([feat, x], dim=1)
x = self.upsample[idx](x)
feat = self.fuse_convs[idx](x)
return self.cls_seg(feat)
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
from mmcv.cnn import ConvModule

from mmseg.models.builder import HEADS
from mmseg.models.decode_heads.decode_head import BaseDecodeHead


@HEADS.register_module()
class DESTHead(BaseDecodeHead):

def __init__(self, interpolate_mode='bilinear', **kwargs):
super().__init__(input_transform='multiple_select', **kwargs)
self.interpolate_mode = interpolate_mode
num_inputs = len(self.in_channels)
assert num_inputs == len(self.in_index)
self.fuse_in_channels = self.in_channels.copy()
for i in range(num_inputs - 1):
self.fuse_in_channels[i] += self.fuse_in_channels[i + 1]
self.convs = nn.ModuleList()
for i in range(num_inputs):
self.convs.append(
ConvModule(
in_channels=self.in_channels[i],
out_channels=self.in_channels[i],
kernel_size=1,
stride=1,
act_cfg=self.act_cfg))

self.fuse_convs = nn.ModuleList()
for i in range(num_inputs):
self.fuse_convs.append(
ConvModule(
in_channels=self.fuse_in_channels[i],
out_channels=self.in_channels[i],
kernel_size=3,
stride=1,
padding=1,
act_cfg=self.act_cfg))

self.upsample = nn.ModuleList([
nn.Sequential(nn.Upsample(scale_factor=2, mode=interpolate_mode))
] * len(self.in_channels))

def forward(self, inputs):
feat = None
for idx in reversed(range(len(inputs))):
x = self.convs[idx](inputs[idx])
if idx != len(inputs) - 1:
x = torch.concat([feat, x], dim=1)
x = self.upsample[idx](x)
feat = self.fuse_convs[idx](x)
return self.cls_seg(feat)
3 changes: 1 addition & 2 deletions mmseg/models/utils/embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import build_conv_layer, build_norm_layer
from mmcv.cnn.utils.weight_init import trunc_normal_init
from mmcv.runner.base_module import BaseModule
from mmcv.utils import to_2tuple
from mmcv.cnn.utils.weight_init import (constant_init, normal_init,
trunc_normal_init)


class AdaptivePadding(nn.Module):
Expand Down

0 comments on commit 4ad1856

Please sign in to comment.