fix format

open-mmlab · Jan 12, 2023 · 4ad1856 · 4ad1856
1 parent afbf086
commit 4ad1856
Show file tree

Hide file tree

Showing 9 changed files with 154 additions and 155 deletions.
diff --git a/configs/_base_/models/dest_simpatt-b0.py b/configs/_base_/models/dest_simpatt-b0.py
@@ -1,37 +1,37 @@
-# model settings
-embed_dims = [32, 64, 160, 256]
-norm_cfg = dict(type='SyncBN', requires_grad=True)
-model = dict(
-    type='EncoderDecoder',
-    pretrained=None,
-    backbone=dict(
-        type='SimplifiedMixTransformer',
-        in_channels=3,
-        embed_dims=embed_dims,
-        num_stages=4,
-        num_layers=[2, 2, 2, 2],
-        num_heads=[1, 2, 5, 8],
-        patch_sizes=[7, 3, 3, 3],
-        strides=[4, 2, 2, 2],
-        sr_ratios=[8, 4, 2, 1],
-        out_indices=(0, 1, 2, 3),
-        mlp_ratios=[8, 8, 4, 4],
-        qkv_bias=True,
-        drop_rate=0.0,
-        attn_drop_rate=0.0,
-        drop_path_rate=0.1,
-        norm_cfg=norm_cfg),
-    decode_head=dict(
-        type='DESTHead',
-        in_channels=[32, 64, 160, 256],
-        in_index=[0, 1, 2, 3],
-        channels=32,
-        dropout_ratio=0.1,
-        num_classes=19,
-        norm_cfg=norm_cfg,
-        align_corners=False,
-        loss_decode=dict(
-            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
-    # model training and testing settings
-    train_cfg=dict(),
-    test_cfg=dict(mode='slide', crop_size=(1024, 1024), stride=(768, 768)))
+# model settings
+embed_dims = [32, 64, 160, 256]
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained=None,
+    backbone=dict(
+        type='SimplifiedMixTransformer',
+        in_channels=3,
+        embed_dims=embed_dims,
+        num_stages=4,
+        num_layers=[2, 2, 2, 2],
+        num_heads=[1, 2, 5, 8],
+        patch_sizes=[7, 3, 3, 3],
+        strides=[4, 2, 2, 2],
+        sr_ratios=[8, 4, 2, 1],
+        out_indices=(0, 1, 2, 3),
+        mlp_ratios=[8, 8, 4, 4],
+        qkv_bias=True,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        drop_path_rate=0.1,
+        norm_cfg=norm_cfg),
+    decode_head=dict(
+        type='DESTHead',
+        in_channels=[32, 64, 160, 256],
+        in_index=[0, 1, 2, 3],
+        channels=32,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='slide', crop_size=(1024, 1024), stride=(768, 768)))
diff --git a/configs/dest/README.md b/configs/dest/README.md
@@ -35,17 +35,16 @@ Transformer and its variants have shown state-of-the-art results in many vision
 
 ### Cityscapes
 
-
-| Method    | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) |  mIoU | mIoU(ms+flip) | config                                                                                                                                 | download                                                                                                                                                                                                                                                                                                                                                                                       |
-| --------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| DEST | SMIT-B0   | 1024x1024 |  160000 |     - | -           | 64.34 | -         | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b0_1024x1024_160k_cityscapes.py) | - | - |
-| DEST | SMIT-B1   | 1024x1024 |  160000 |     - | -            | 68.21 | -        | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b1_1024x1024_160k_cityscapes.py) | - | - |
-| DEST | SMIT-B2   | 1024x1024 |  160000 |     - | -           | 71.89 | -         | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b2_1024x1024_160k_cityscapes.py) | - | - |
-| DEST | SMIT-B3   | 1024x1024 |  160000 |    - | -           | 73.51 | -         | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b3_1024x1024_160k_cityscapes.py) | - | - |
-| DEST | SMIT-B4   | 1024x1024 |  160000 |    - | -           | 73.99 | -         | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b4_1024x1024_160k_cityscapes.py) | - | - |
-| DEST | SMIT-B5   | 1024x1024 |  160000 |    - | -          | 75.28 | -        | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b5_1024x1024_160k_cityscapes.py) | - | - |
-
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) |  mIoU | mIoU(ms+flip) | config                                                                                                                       | download |
+| ------ | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------- | -------- |
+| DEST   | SMIT-B0  | 1024x1024 |  160000 |        - | -              | 64.34 | -             | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b0_1024x1024_160k_cityscapes.py) | -        |
+| DEST   | SMIT-B1  | 1024x1024 |  160000 |        - | -              | 68.21 | -             | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b1_1024x1024_160k_cityscapes.py) | -        |
+| DEST   | SMIT-B2  | 1024x1024 |  160000 |        - | -              | 71.89 | -             | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b2_1024x1024_160k_cityscapes.py) | -        |
+| DEST   | SMIT-B3  | 1024x1024 |  160000 |        - | -              | 73.51 | -             | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b3_1024x1024_160k_cityscapes.py) | -        |
+| DEST   | SMIT-B4  | 1024x1024 |  160000 |        - | -              | 73.99 | -             | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b4_1024x1024_160k_cityscapes.py) | -        |
+| DEST   | SMIT-B5  | 1024x1024 |  160000 |        - | -              | 75.28 | -             | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dest/dest_simpatt-b5_1024x1024_160k_cityscapes.py) | -        |
 
 Note:
+
 - The above models are all training from scratch without pretrained backbones. Accuracy can be further enhanced by appropriate pretraining.
 - Training of DEST is not very stable, which is sensitive to random seeds.
diff --git a/configs/dest/dest_simpatt-b0_1024x1024_160k_cityscapes.py b/configs/dest/dest_simpatt-b0_1024x1024_160k_cityscapes.py
@@ -1,30 +1,30 @@
-_base_ = [
-    '../_base_/models/dest_simpatt-b0.py',
-    '../_base_/datasets/cityscapes_1024x1024.py',
-    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
-]
-
-optimizer = dict(
-    _delete_=True,
-    type='AdamW',
-    lr=0.00006,
-    betas=(0.9, 0.999),
-    weight_decay=0.01,
-    paramwise_cfg=dict(
-        custom_keys={
-            'pos_block': dict(decay_mult=0.),
-            'norm': dict(decay_mult=0.),
-            'head': dict(lr_mult=10.)
-        }))
-
-lr_config = dict(
-    _delete_=True,
-    policy='poly',
-    warmup='linear',
-    warmup_iters=1500,
-    warmup_ratio=1e-6,
-    power=1.0,
-    min_lr=0.0,
-    by_epoch=False)
-
-data = dict(samples_per_gpu=1, workers_per_gpu=1)
+_base_ = [
+    '../_base_/models/dest_simpatt-b0.py',
+    '../_base_/datasets/cityscapes_1024x1024.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+
+optimizer = dict(
+    _delete_=True,
+    type='AdamW',
+    lr=0.00006,
+    betas=(0.9, 0.999),
+    weight_decay=0.01,
+    paramwise_cfg=dict(
+        custom_keys={
+            'pos_block': dict(decay_mult=0.),
+            'norm': dict(decay_mult=0.),
+            'head': dict(lr_mult=10.)
+        }))
+
+lr_config = dict(
+    _delete_=True,
+    policy='poly',
+    warmup='linear',
+    warmup_iters=1500,
+    warmup_ratio=1e-6,
+    power=1.0,
+    min_lr=0.0,
+    by_epoch=False)
+
+data = dict(samples_per_gpu=1, workers_per_gpu=1)
diff --git a/configs/dest/dest_simpatt-b1_1024x1024_160k_cityscapes.py b/configs/dest/dest_simpatt-b1_1024x1024_160k_cityscapes.py
@@ -1,9 +1,9 @@
-_base_ = ['./dest_simpatt-b0_1024x1024_160k_cityscapes.py']
-
-embed_dims = [64, 128, 250, 320]
-
-model = dict(
-    type='EncoderDecoder',
-    pretrained=None,
-    backbone=dict(embed_dims=embed_dims),
-    decode_head=dict(in_channels=embed_dims, channels=64))
+_base_ = ['./dest_simpatt-b0_1024x1024_160k_cityscapes.py']
+
+embed_dims = [64, 128, 250, 320]
+
+model = dict(
+    type='EncoderDecoder',
+    pretrained=None,
+    backbone=dict(embed_dims=embed_dims),
+    decode_head=dict(in_channels=embed_dims, channels=64))
diff --git a/configs/dest/dest_simpatt-b2_1024x1024_160k_cityscapes.py b/configs/dest/dest_simpatt-b2_1024x1024_160k_cityscapes.py
@@ -1,9 +1,9 @@
-_base_ = ['./dest_simpatt-b0_1024x1024_160k_cityscapes.py']
-
-embed_dims = [64, 128, 250, 320]
-
-model = dict(
-    type='EncoderDecoder',
-    pretrained=None,
-    backbone=dict(embed_dims=embed_dims, num_layers=[3, 3, 6, 3]),
-    decode_head=dict(in_channels=embed_dims, channels=64))
+_base_ = ['./dest_simpatt-b0_1024x1024_160k_cityscapes.py']
+
+embed_dims = [64, 128, 250, 320]
+
+model = dict(
+    type='EncoderDecoder',
+    pretrained=None,
+    backbone=dict(embed_dims=embed_dims, num_layers=[3, 3, 6, 3]),
+    decode_head=dict(in_channels=embed_dims, channels=64))
diff --git a/mmseg/models/backbones/__init__.py b/mmseg/models/backbones/__init__.py
@@ -27,5 +27,6 @@
     'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3',
     'VisionTransformer', 'SwinTransformer', 'MixVisionTransformer',
     'BiSeNetV1', 'BiSeNetV2', 'ICNet', 'TIMMBackbone', 'ERFNet', 'PCPVT',
-    'SVT', 'STDCNet', 'STDCContextPathNet', 'BEiT', 'MAE', 'SimplifiedMixTransformer'
+    'SVT', 'STDCNet', 'STDCContextPathNet', 'BEiT', 'MAE',
+    'SimplifiedMixTransformer'
 ]
diff --git a/mmseg/models/backbones/smit.py b/mmseg/models/backbones/smit.py
@@ -4,10 +4,9 @@
 
 import torch
 import torch.nn as nn
-from mmcv.cnn import Conv2d, build_activation_layer, build_norm_layer
+from mmcv.cnn import build_activation_layer, build_norm_layer
 from mmcv.cnn.bricks.drop import build_dropout
-from mmcv.cnn.utils.weight_init import (constant_init, normal_init,
-                                        trunc_normal_init)
+from mmcv.cnn.utils.weight_init import trunc_normal_init
 from mmcv.runner import BaseModule, ModuleList, Sequential
 
 from ..builder import BACKBONES
@@ -127,7 +126,6 @@ def __init__(self,
                  dropout_layer=None,
                  norm_cfg=dict(type='SyncBN', requires_grad=True)):
         super().__init__()
-        assert embed_dims % num_heads == 0, f"dim {embed_dims} should be divided by num_heads {num_heads}."
 
         self.embed_dims = embed_dims
         self.num_heads = num_heads
@@ -418,4 +416,4 @@ def forward(self, x):
             N, C, L = x.shape
             x = x.reshape(N, C, H, W)
             outs.append(x)
-        return outs
+        return outs
diff --git a/mmseg/models/decode_heads/dest_head.py b/mmseg/models/decode_heads/dest_head.py
@@ -1,52 +1,54 @@
-import torch
-import torch.nn as nn
-
-from mmcv.cnn import ConvModule
-from mmseg.models.builder import HEADS
-from mmseg.models.decode_heads.decode_head import BaseDecodeHead
-
-
-@HEADS.register_module()
-class DESTHead(BaseDecodeHead):
-
-    def __init__(self, interpolate_mode='bilinear', **kwargs):
-        super().__init__(input_transform='multiple_select', **kwargs)
-        self.interpolate_mode = interpolate_mode
-        num_inputs = len(self.in_channels)
-        assert num_inputs == len(self.in_index)
-        self.fuse_in_channels = self.in_channels.copy()
-        for i in range(num_inputs - 1):
-            self.fuse_in_channels[i] += self.fuse_in_channels[i + 1]
-        self.convs = nn.ModuleList()
-        for i in range(num_inputs):
-            self.convs.append(
-                ConvModule(
-                    in_channels=self.in_channels[i],
-                    out_channels=self.in_channels[i],
-                    kernel_size=1,
-                    stride=1,
-                    act_cfg=self.act_cfg))
-
-        self.fuse_convs = nn.ModuleList()
-        for i in range(num_inputs):
-            self.fuse_convs.append(
-                ConvModule(
-                    in_channels=self.fuse_in_channels[i],
-                    out_channels=self.in_channels[i],
-                    kernel_size=3,
-                    stride=1,
-                    padding=1,
-                    act_cfg=self.act_cfg))
-
-        self.upsample = nn.ModuleList([
-            nn.Sequential(nn.Upsample(scale_factor=2, mode=interpolate_mode))
-        ] * len(self.in_channels))
-
-    def forward(self, inputs):
-        for idx in reversed(range(len(inputs))):
-            x = self.convs[idx](inputs[idx])
-            if idx != len(inputs) - 1:
-                x = torch.concat([feat, x], dim=1)
-            x = self.upsample[idx](x)
-            feat = self.fuse_convs[idx](x)
-        return self.cls_seg(feat)
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+from mmseg.models.builder import HEADS
+from mmseg.models.decode_heads.decode_head import BaseDecodeHead
+
+
+@HEADS.register_module()
+class DESTHead(BaseDecodeHead):
+
+    def __init__(self, interpolate_mode='bilinear', **kwargs):
+        super().__init__(input_transform='multiple_select', **kwargs)
+        self.interpolate_mode = interpolate_mode
+        num_inputs = len(self.in_channels)
+        assert num_inputs == len(self.in_index)
+        self.fuse_in_channels = self.in_channels.copy()
+        for i in range(num_inputs - 1):
+            self.fuse_in_channels[i] += self.fuse_in_channels[i + 1]
+        self.convs = nn.ModuleList()
+        for i in range(num_inputs):
+            self.convs.append(
+                ConvModule(
+                    in_channels=self.in_channels[i],
+                    out_channels=self.in_channels[i],
+                    kernel_size=1,
+                    stride=1,
+                    act_cfg=self.act_cfg))
+
+        self.fuse_convs = nn.ModuleList()
+        for i in range(num_inputs):
+            self.fuse_convs.append(
+                ConvModule(
+                    in_channels=self.fuse_in_channels[i],
+                    out_channels=self.in_channels[i],
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                    act_cfg=self.act_cfg))
+
+        self.upsample = nn.ModuleList([
+            nn.Sequential(nn.Upsample(scale_factor=2, mode=interpolate_mode))
+        ] * len(self.in_channels))
+
+    def forward(self, inputs):
+        feat = None
+        for idx in reversed(range(len(inputs))):
+            x = self.convs[idx](inputs[idx])
+            if idx != len(inputs) - 1:
+                x = torch.concat([feat, x], dim=1)
+            x = self.upsample[idx](x)
+            feat = self.fuse_convs[idx](x)
+        return self.cls_seg(feat)
diff --git a/mmseg/models/utils/embed.py b/mmseg/models/utils/embed.py
@@ -5,10 +5,9 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from mmcv.cnn import build_conv_layer, build_norm_layer
+from mmcv.cnn.utils.weight_init import trunc_normal_init
 from mmcv.runner.base_module import BaseModule
 from mmcv.utils import to_2tuple
-from mmcv.cnn.utils.weight_init import (constant_init, normal_init,
-                                        trunc_normal_init)
 
 
 class AdaptivePadding(nn.Module):