diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index aa5942748a..fd16ba2dc3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,66 +1,10 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
 repos:
-  - repo: https://github.com/PyCQA/flake8
-    rev: 5.0.4
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.2.0
     hooks:
-      - id: flake8
-  - repo: https://github.com/zhouzaida/isort
-    rev: 5.12.1
-    hooks:
-      - id: isort
-  - repo: https://github.com/pre-commit/mirrors-yapf
-    rev: v0.32.0
-    hooks:
-      - id: yapf
-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.3.0
-    hooks:
-      - id: trailing-whitespace
-      - id: check-yaml
-      - id: end-of-file-fixer
-      - id: requirements-txt-fixer
-      - id: double-quote-string-fixer
-      - id: check-merge-conflict
-      - id: fix-encoding-pragma
-        args: ["--remove"]
-      - id: mixed-line-ending
-        args: ["--fix=lf"]
-  - repo: https://github.com/executablebooks/mdformat
-    rev: 0.7.9
-    hooks:
-      - id: mdformat
-        args: ["--number"]
-        additional_dependencies:
-          - mdformat-openmmlab
-          - mdformat_frontmatter
-          - linkify-it-py
-  - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.1
-    hooks:
-      - id: codespell
-  - repo: https://github.com/myint/docformatter
-    rev: v1.3.1
-    hooks:
-      - id: docformatter
-        args: ["--in-place", "--wrap-descriptions", "79"]
-  # temporarily remove update-model-index to avoid conflict raised
-  # by depth estimator models
-  # - repo: local
-  #   hooks:
-  #     - id: update-model-index
-  #       name: update-model-index
-  #       description: Collect model information and update model-index.yml
-  #       entry: .dev_scripts/update_model_index.py
-  #       additional_dependencies: [pyyaml]
-  #       language: python
-  #       require_serial: true
-  - repo: https://github.com/asottile/pyupgrade
-    rev: v3.0.0
-    hooks:
-      - id: pyupgrade
-        args: ["--py36-plus"]
-  - repo: https://github.com/open-mmlab/pre-commit-hooks
-    rev: v0.2.0  # Use the rev to fix revision
-    hooks:
-      - id: check-algo-readme
-      - id: check-copyright
-        args: ["mmseg", "tools", "tests", "demo"]  # the dir_to_check with expected directory to check
+    -   id: trailing-whitespace
+    -   id: end-of-file-fixer
+    -   id: check-yaml
+    -   id: check-added-large-files
diff --git a/configs/_base_/datasets/feces.py b/configs/_base_/datasets/feces.py
new file mode 100644
index 0000000000..b93c9f56f2
--- /dev/null
+++ b/configs/_base_/datasets/feces.py
@@ -0,0 +1,76 @@
+# dataset settings
+dataset_type = 'Feces'
+data_root = 'D:\Allcode\mmsegmentation-dev-1.x\data\data'
+img_scale = (448, 224)
+crop_size = (224, 224)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(
+        type='RandomResize',
+        scale=img_scale,
+        ratio_range=(0.5, 2.0),
+        keep_ratio=True),
+    dict(type='RandomFlip', prob=0.7),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='PackSegInputs')
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='Resize', scale=img_scale, keep_ratio=True),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(type='LoadAnnotations'),
+    dict(type='PackSegInputs')
+]
+
+img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
+tta_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=None),
+    dict(
+        type='TestTimeAug',
+        transforms=[
+            [
+                dict(type='Resize', scale_factor=r, keep_ratio=True)
+                for r in img_ratios
+            ],
+            [
+                dict(type='RandomFlip', prob=0., direction='horizontal'),
+                dict(type='RandomFlip', prob=1., direction='horizontal')
+            ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
+        ])
+]
+
+train_dataloader = dict(
+    batch_size=64,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='InfiniteSampler', shuffle=True),
+    dataset=dict(
+        type='RepeatDataset',
+        times=40000,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            data_prefix=dict(
+                img_path='images/training',
+                seg_map_path='annotations/training'),
+            pipeline=train_pipeline)))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            img_path='images/validation',
+            seg_map_path='annotations/validation'),
+        pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type='IoUMetric', iou_metrics=['mIoU', 'mDice', 'mFscore'])
+test_evaluator = val_evaluator
diff --git a/configs/ddrnet/ohem-DAPPM-cbam-lovasz-auxhead.py b/configs/ddrnet/ohem-DAPPM-cbam-lovasz-auxhead.py
new file mode 100644
index 0000000000..2a8b18f3a9
--- /dev/null
+++ b/configs/ddrnet/ohem-DAPPM-cbam-lovasz-auxhead.py
@@ -0,0 +1,109 @@
+_base_ = [
+    '../_base_/datasets/feces.py',
+    '../_base_/default_runtime.py',
+]
+
+class_weight = [0.7, 0.8, 1.1, 1.2]
+
+checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth'  # noqa
+crop_size = (224, 224)
+data_preprocessor = dict(
+    type='SegDataPreProcessor',
+    size=crop_size,
+    # mean=[123.675, 116.28, 103.53],
+    # std=[58.395, 57.12, 57.375],
+    mean=[0.609, 0.604, 0.578],
+    std=[0.195, 0.192, 0.202],
+    bgr_to_rgb=True,
+    pad_val=0,
+    seg_pad_val=255)
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type='DDRNet_cbam',
+        in_channels=3,
+        channels=64,
+        ppm_channels=128,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        init_cfg=None),
+    # init_cfg=dict(type='Pretrained', checkpoint=checkpoint)),
+    decode_head=dict(
+        type='DDRHead',
+        in_channels=64 * 4,
+        channels=128,
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss', class_weight=class_weight, loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+    auxiliary_head=dict(
+        type='DDRHead',
+        in_channels=64 * 4,
+        channels=128,
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss', class_weight=class_weight, loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+# train_dataloader = dict(batch_size=6, num_workers=4)
+
+iters = 6000
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
+# learning policy
+param_scheduler = [
+    dict(
+        type='PolyLR',
+        eta_min=0.0001,
+        power=0.9,
+        begin=0,
+        end=iters,
+        by_epoch=False)
+]
+
+# training schedule for 120k
+train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(
+        # type='CheckpointHook', by_epoch=False, interval=100, max_keep_ckpts=2, save_best='mIoU'),
+        type='CheckpointHook',
+        by_epoch=False,
+        interval=100,
+        max_keep_ckpts=2,
+        save_best='mDice'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='SegVisualizationHook'))
+
+randomness = dict(seed=304)
diff --git a/configs/ddrnet/ohem-DAPPM-cbam-lovasz.py b/configs/ddrnet/ohem-DAPPM-cbam-lovasz.py
new file mode 100644
index 0000000000..c6d9962b74
--- /dev/null
+++ b/configs/ddrnet/ohem-DAPPM-cbam-lovasz.py
@@ -0,0 +1,93 @@
+_base_ = [
+    '../_base_/datasets/feces.py',
+    '../_base_/default_runtime.py',
+]
+
+class_weight = [0.7, 0.8, 1.1, 1.2]
+
+checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth'  # noqa
+crop_size = (224, 224)
+data_preprocessor = dict(
+    type='SegDataPreProcessor',
+    size=crop_size,
+    mean=[0.609, 0.604, 0.578],
+    std=[0.195, 0.192, 0.202],
+    bgr_to_rgb=True,
+    pad_val=0,
+    seg_pad_val=255)
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type='DDRNet_cbam',
+        in_channels=3,
+        channels=64,
+        ppm_channels=128,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        init_cfg=None),
+    # init_cfg=dict(type='Pretrained', checkpoint=checkpoint)),
+    decode_head=dict(
+        type='DDRHead',
+        in_channels=64 * 4,
+        channels=128,
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss',
+                # thres=0.9,
+                # min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+# train_dataloader = dict(batch_size=6, num_workers=4)
+
+iters = 6000
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
+# learning policy
+param_scheduler = [
+    dict(
+        type='PolyLR',
+        eta_min=0.0001,
+        power=0.9,
+        begin=0,
+        end=iters,
+        by_epoch=False)
+]
+
+# training schedule for 120k
+train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(
+        # type='CheckpointHook', by_epoch=False, interval=100, max_keep_ckpts=2, save_best='mIoU'),
+        type='CheckpointHook',
+        by_epoch=False,
+        interval=100,
+        max_keep_ckpts=2,
+        save_best='mDice'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='SegVisualizationHook'))
+
+randomness = dict(seed=304)
diff --git a/configs/ddrnet/ohem-DAPPM-cbam.py b/configs/ddrnet/ohem-DAPPM-cbam.py
new file mode 100644
index 0000000000..de677186e8
--- /dev/null
+++ b/configs/ddrnet/ohem-DAPPM-cbam.py
@@ -0,0 +1,89 @@
+_base_ = [
+    '../_base_/datasets/feces.py',
+    '../_base_/default_runtime.py',
+]
+
+class_weight = [0.7, 0.8, 1.1, 1.2]
+
+checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth'  # noqa
+crop_size = (224, 224)
+data_preprocessor = dict(
+    type='SegDataPreProcessor',
+    size=crop_size,
+    mean=[0.609, 0.604, 0.578],
+    std=[0.195, 0.192, 0.202],
+    bgr_to_rgb=True,
+    pad_val=0,
+    seg_pad_val=255)
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type='DDRNet_cbam',
+        in_channels=3,
+        channels=64,
+        ppm_channels=128,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        init_cfg=None),
+    decode_head=dict(
+        type='DDRHead',
+        in_channels=64 * 4,
+        channels=128,
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+iters = 6000
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
+# learning policy
+param_scheduler = [
+    dict(
+        type='PolyLR',
+        eta_min=0.0001,
+        power=0.9,
+        begin=0,
+        end=iters,
+        by_epoch=False)
+]
+
+# training schedule for 120k
+train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(
+        type='CheckpointHook',
+        by_epoch=False,
+        interval=100,
+        max_keep_ckpts=2,
+        save_best='mDice'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='SegVisualizationHook'))
+
+randomness = dict(seed=304)
diff --git a/configs/ddrnet/ohem-DAPPM-slim.py b/configs/ddrnet/ohem-DAPPM-slim.py
new file mode 100644
index 0000000000..059aa1cbc8
--- /dev/null
+++ b/configs/ddrnet/ohem-DAPPM-slim.py
@@ -0,0 +1,86 @@
+_base_ = [
+    '../_base_/datasets/feces.py',
+    '../_base_/default_runtime.py',
+]
+
+class_weight = [0.7, 0.8, 1.1, 1.2]
+
+checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth'  # noqa
+crop_size = (224, 224)
+data_preprocessor = dict(
+    type='SegDataPreProcessor',
+    size=crop_size,
+    mean=[0.609, 0.604, 0.578],
+    std=[0.195, 0.192, 0.202],
+    bgr_to_rgb=True,
+    pad_val=0,
+    seg_pad_val=255)
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type='DDRNet',
+        in_channels=3,
+        channels=32,
+        ppm_channels=64,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        init_cfg=None),
+    # init_cfg=dict(type='Pretrained', checkpoint=checkpoint)),
+    decode_head=dict(
+        type='DDRHead',
+        in_channels=64 * 2,
+        channels=64,
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss',
+                class_weight=class_weight,
+                loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+iters = 6000
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
+# learning policy
+param_scheduler = [
+    dict(
+        type='PolyLR',
+        eta_min=0.0001,
+        power=0.9,
+        begin=0,
+        end=iters,
+        by_epoch=False)
+]
+
+train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(
+        type='CheckpointHook',
+        by_epoch=False,
+        interval=100,
+        max_keep_ckpts=2,
+        save_best='mIoU'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='SegVisualizationHook'))
+
+randomness = dict(seed=304)
diff --git a/configs/ddrnet/ohem-FAPPM_avgp-cbam-lovasz-auxhead-slim.py b/configs/ddrnet/ohem-FAPPM_avgp-cbam-lovasz-auxhead-slim.py
new file mode 100644
index 0000000000..254ed2e277
--- /dev/null
+++ b/configs/ddrnet/ohem-FAPPM_avgp-cbam-lovasz-auxhead-slim.py
@@ -0,0 +1,103 @@
+_base_ = [
+    '../_base_/datasets/feces.py',
+    '../_base_/default_runtime.py',
+]
+
+class_weight = [0.7, 0.8, 1.1, 1.2]
+
+checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth'  # noqa
+crop_size = (224, 224)
+data_preprocessor = dict(
+    type='SegDataPreProcessor',
+    size=crop_size,
+    mean=[0.609, 0.604, 0.578],
+    std=[0.195, 0.192, 0.202],
+    bgr_to_rgb=True,
+    pad_val=0,
+    seg_pad_val=255)
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type='FAPPM_AVGP',
+        in_channels=3,
+        channels=32,
+        ppm_channels=64,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        init_cfg=None),
+    decode_head=dict(
+        type='DDRHead',
+        in_channels=32 * 4,
+        channels=64,
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss', class_weight=class_weight, loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+    auxiliary_head=dict(
+        type='DDRHead',
+        in_channels=32 * 4,
+        channels=64,
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss', class_weight=class_weight, loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+iters = 6000
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
+# learning policy
+param_scheduler = [
+    dict(
+        type='PolyLR',
+        eta_min=0.0001,
+        power=0.9,
+        begin=0,
+        end=iters,
+        by_epoch=False)
+]
+
+# training schedule for 120k
+train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(
+        type='CheckpointHook',
+        by_epoch=False,
+        interval=100,
+        max_keep_ckpts=2,
+        save_best='mDice'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='SegVisualizationHook'))
+
+randomness = dict(seed=304)
diff --git a/configs/ddrnet/ohem-FAPPM_avgp-cbam-lovasz-auxhead.py b/configs/ddrnet/ohem-FAPPM_avgp-cbam-lovasz-auxhead.py
new file mode 100644
index 0000000000..c639b306ad
--- /dev/null
+++ b/configs/ddrnet/ohem-FAPPM_avgp-cbam-lovasz-auxhead.py
@@ -0,0 +1,103 @@
+_base_ = [
+    '../_base_/datasets/feces.py',
+    '../_base_/default_runtime.py',
+]
+
+class_weight = [0.7, 0.8, 1.1, 1.2]
+
+checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth'  # noqa
+crop_size = (224, 224)
+data_preprocessor = dict(
+    type='SegDataPreProcessor',
+    size=crop_size,
+    mean=[0.609, 0.604, 0.578],
+    std=[0.195, 0.192, 0.202],
+    bgr_to_rgb=True,
+    pad_val=0,
+    seg_pad_val=255)
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type='FAPPM_AVGP',
+        in_channels=3,
+        channels=64,
+        ppm_channels=96,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        init_cfg=None),
+    decode_head=dict(
+        type='DDRHead',
+        in_channels=64 * 4,
+        channels=128,
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss', class_weight=class_weight, loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+    auxiliary_head=dict(
+        type='DDRHead',
+        in_channels=64 * 4,
+        channels=128,
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss', class_weight=class_weight, loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+iters = 6000
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
+# learning policy
+param_scheduler = [
+    dict(
+        type='PolyLR',
+        eta_min=0.0001,
+        power=0.9,
+        begin=0,
+        end=iters,
+        by_epoch=False)
+]
+
+# training schedule for 120k
+train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(
+        type='CheckpointHook',
+        by_epoch=False,
+        interval=100,
+        max_keep_ckpts=2,
+        save_best='mDice'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='SegVisualizationHook'))
+
+randomness = dict(seed=304)
diff --git a/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-group-slim.py b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-group-slim.py
new file mode 100644
index 0000000000..5af551088f
--- /dev/null
+++ b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-group-slim.py
@@ -0,0 +1,102 @@
+_base_ = [
+    '../_base_/datasets/feces.py',
+    '../_base_/default_runtime.py',
+]
+
+class_weight = [0.7, 0.8, 1.1, 1.2]
+
+checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth'  # noqa
+crop_size = (224, 224)
+data_preprocessor = dict(
+    type='SegDataPreProcessor',
+    size=crop_size,
+    mean=[0.609, 0.604, 0.578],
+    std=[0.195, 0.192, 0.202],
+    bgr_to_rgb=True,
+    pad_val=0,
+    seg_pad_val=255)
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type='FAPPM_CONV_group_slim',
+        in_channels=3,
+        channels=32,  # 64-32
+        ppm_channels=64,  # 96-64
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        init_cfg=None),
+    decode_head=dict(
+        type='DDRHead',
+        in_channels=64 * 2,  # 256-128
+        channels=64,  # 128-64
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss', class_weight=class_weight, loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+    auxiliary_head=dict(
+        type='DDRHead',
+        in_channels=64 * 2,  # 256-128
+        channels=64,  # 128-64
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss', class_weight=class_weight, loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+iters = 6000
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
+# learning policy
+param_scheduler = [
+    dict(
+        type='PolyLR',
+        eta_min=0.0001,
+        power=0.9,
+        begin=0,
+        end=iters,
+        by_epoch=False)
+]
+
+train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(
+        type='CheckpointHook',
+        by_epoch=False,
+        interval=100,
+        max_keep_ckpts=2,
+        save_best='mDice'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='SegVisualizationHook'))
+
+randomness = dict(seed=304)
diff --git a/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-group.py b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-group.py
new file mode 100644
index 0000000000..8272c97310
--- /dev/null
+++ b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-group.py
@@ -0,0 +1,103 @@
+_base_ = [
+    '../_base_/datasets/feces.py',
+    '../_base_/default_runtime.py',
+]
+
+class_weight = [0.7, 0.8, 1.1, 1.2]
+
+checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth'  # noqa
+crop_size = (224, 224)
+data_preprocessor = dict(
+    type='SegDataPreProcessor',
+    size=crop_size,
+    mean=[0.609, 0.604, 0.578],
+    std=[0.195, 0.192, 0.202],
+    bgr_to_rgb=True,
+    pad_val=0,
+    seg_pad_val=255)
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type='FAPPM_CONV_group',
+        in_channels=3,
+        channels=64,
+        ppm_channels=96,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        init_cfg=None),
+    decode_head=dict(
+        type='DDRHead',
+        in_channels=64 * 4,
+        channels=128,
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss', class_weight=class_weight, loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+    auxiliary_head=dict(
+        type='DDRHead',
+        in_channels=64 * 4,
+        channels=128,
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss', class_weight=class_weight, loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+iters = 6000
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
+# learning policy
+param_scheduler = [
+    dict(
+        type='PolyLR',
+        eta_min=0.0001,
+        power=0.9,
+        begin=0,
+        end=iters,
+        by_epoch=False)
+]
+
+# training schedule for 120k
+train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(
+        type='CheckpointHook',
+        by_epoch=False,
+        interval=100,
+        max_keep_ckpts=2,
+        save_best='mDice'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='SegVisualizationHook'))
+
+randomness = dict(seed=304)
diff --git a/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-slim.py b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-slim.py
new file mode 100644
index 0000000000..ed826334c6
--- /dev/null
+++ b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-slim.py
@@ -0,0 +1,101 @@
+_base_ = [
+    '../_base_/datasets/feces.py',
+    '../_base_/default_runtime.py',
+]
+
+class_weight = [0.7, 0.8, 1.1, 1.2]
+
+checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth'  # noqa
+crop_size = (224, 224)
+data_preprocessor = dict(
+    type='SegDataPreProcessor',
+    size=crop_size,
+    mean=[0.609, 0.604, 0.578],
+    std=[0.195, 0.192, 0.202],
+    bgr_to_rgb=True,
+    pad_val=0,
+    seg_pad_val=255)
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type='FAPPM_CONV_slim',
+        in_channels=3,
+        channels=32,  # 64-32
+        ppm_channels=64,  # 96-64
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        init_cfg=None),
+    decode_head=dict(
+        type='DDRHead',
+        in_channels=64 * 2,  # 256-128
+        channels=64,  # 128-64
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss', class_weight=class_weight, loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+    auxiliary_head=dict(
+        type='DDRHead',
+        in_channels=64 * 2,  # 256-128
+        channels=64,  # 128-64
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss', class_weight=class_weight, loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+iters = 6000
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
+# learning policy
+param_scheduler = [
+    dict(
+        type='PolyLR',
+        eta_min=0.0001,
+        power=0.9,
+        begin=0,
+        end=iters,
+        by_epoch=False)
+]
+
+train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(
+        type='CheckpointHook',
+        by_epoch=False,
+        interval=100,
+        max_keep_ckpts=2,
+        save_best='mDice'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='SegVisualizationHook'))
+
+randomness = dict(seed=304)
diff --git a/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-slim2.py b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-slim2.py
new file mode 100644
index 0000000000..595d17b4d5
--- /dev/null
+++ b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-slim2.py
@@ -0,0 +1,86 @@
+_base_ = [
+    '../_base_/datasets/feces.py',
+    '../_base_/default_runtime.py',
+]
+
+class_weight = [0.7, 0.8, 1.1, 1.2]
+
+checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth'  # noqa
+crop_size = (224, 224)
+data_preprocessor = dict(
+    type='SegDataPreProcessor',
+    size=crop_size,
+    mean=[0.609, 0.604, 0.578],
+    std=[0.195, 0.192, 0.202],
+    bgr_to_rgb=True,
+    pad_val=0,
+    seg_pad_val=255)
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type='FAPPM_CONV_slim2',
+        in_channels=3,
+        channels=32,  # 64-32
+        ppm_channels=96,  # 96-64
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        init_cfg=None),
+    decode_head=dict(
+        type='DDRHead',
+        in_channels=64 * 2,  # 256-128
+        channels=96,  # 128-64
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss', class_weight=class_weight, loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+# train_dataloader = dict(batch_size=6, num_workers=4)
+
+iters = 6000
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
+# learning policy
+param_scheduler = [
+    dict(
+        type='PolyLR',
+        eta_min=0.0001,
+        power=0.9,
+        begin=0,
+        end=iters,
+        by_epoch=False)
+]
+
+train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(
+        type='CheckpointHook',
+        by_epoch=False,
+        interval=100,
+        max_keep_ckpts=2,
+        save_best='mDice'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='SegVisualizationHook'))
+
+randomness = dict(seed=304)
diff --git a/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead.py b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead.py
new file mode 100644
index 0000000000..f46dedf9a4
--- /dev/null
+++ b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead.py
@@ -0,0 +1,103 @@
+_base_ = [
+    '../_base_/datasets/feces.py',
+    '../_base_/default_runtime.py',
+]
+
+class_weight = [0.7, 0.8, 1.1, 1.2]
+
+checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth'  # noqa
+crop_size = (224, 224)
+data_preprocessor = dict(
+    type='SegDataPreProcessor',
+    size=crop_size,
+    mean=[0.609, 0.604, 0.578],
+    std=[0.195, 0.192, 0.202],
+    bgr_to_rgb=True,
+    pad_val=0,
+    seg_pad_val=255)
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type='FAPPM_CONV',
+        in_channels=3,
+        channels=64,
+        ppm_channels=96,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        init_cfg=None),
+    decode_head=dict(
+        type='DDRHead',
+        in_channels=64 * 4,
+        channels=128,
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss', class_weight=class_weight, loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+    auxiliary_head=dict(
+        type='DDRHead',
+        in_channels=64 * 4,
+        channels=128,
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss', class_weight=class_weight, loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+iters = 6000
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
+# learning policy
+param_scheduler = [
+    dict(
+        type='PolyLR',
+        eta_min=0.0001,
+        power=0.9,
+        begin=0,
+        end=iters,
+        by_epoch=False)
+]
+
+# training schedule for 120k
+train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(
+        type='CheckpointHook',
+        by_epoch=False,
+        interval=100,
+        max_keep_ckpts=2,
+        save_best='mDice'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='SegVisualizationHook'))
+
+randomness = dict(seed=304)
diff --git a/configs/ddrnet/ohem-FAPPM_conv-lovasz-auxhead.py b/configs/ddrnet/ohem-FAPPM_conv-lovasz-auxhead.py
new file mode 100644
index 0000000000..b66cbb3f62
--- /dev/null
+++ b/configs/ddrnet/ohem-FAPPM_conv-lovasz-auxhead.py
@@ -0,0 +1,103 @@
+_base_ = [
+    '../_base_/datasets/feces.py',
+    '../_base_/default_runtime.py',
+]
+
+class_weight = [0.7, 0.8, 1.1, 1.2]
+
+checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth'  # noqa
+crop_size = (224, 224)
+data_preprocessor = dict(
+    type='SegDataPreProcessor',
+    size=crop_size,
+    mean=[0.609, 0.604, 0.578],
+    std=[0.195, 0.192, 0.202],
+    bgr_to_rgb=True,
+    pad_val=0,
+    seg_pad_val=255)
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type='FAPPM_CONV_nocbam',
+        in_channels=3,
+        channels=64,
+        ppm_channels=96,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        init_cfg=None),
+    decode_head=dict(
+        type='DDRHead',
+        in_channels=64 * 4,
+        channels=128,
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss', class_weight=class_weight, loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+    auxiliary_head=dict(
+        type='DDRHead',
+        in_channels=64 * 4,
+        channels=128,
+        dropout_ratio=0.,
+        num_classes=4,
+        align_corners=False,
+        norm_cfg=norm_cfg,
+        loss_decode=[
+            dict(
+                type='LovaszLoss', class_weight=class_weight, loss_weight=1.0),
+            dict(
+                type='OhemCrossEntropy',
+                thres=0.9,
+                min_kept=131072,
+                class_weight=class_weight,
+                loss_weight=0.4),
+        ]),
+
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+iters = 6000
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
+# learning policy
+param_scheduler = [
+    dict(
+        type='PolyLR',
+        eta_min=0.0001,
+        power=0.9,
+        begin=0,
+        end=iters,
+        by_epoch=False)
+]
+
+# training schedule for 120k
+train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(
+        type='CheckpointHook',
+        by_epoch=False,
+        interval=100,
+        max_keep_ckpts=2,
+        save_best='mDice'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='SegVisualizationHook'))
+
+randomness = dict(seed=304)
diff --git a/mmseg/models/backbones/ddrnet.py b/mmseg/models/backbones/ddrnet.py
index 4508aade82..a49a12fa6b 100644
--- a/mmseg/models/backbones/ddrnet.py
+++ b/mmseg/models/backbones/ddrnet.py
@@ -1,13 +1,110 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+import torch
 import torch.nn as nn
 from mmcv.cnn import ConvModule, build_norm_layer
 from mmengine.model import BaseModule
 
-from mmseg.models.utils import DAPPM, BasicBlock, Bottleneck, resize
+from mmseg.models.utils import DAPPM, DAPPM_cbam, FAPPM_conv, FAPPM_avgp, BasicBlock, BasicBlock_cbam, \
+    Bottleneck, Bottleneck_cbam, resize, BasicBlock_cbam_group, Bottleneck_cbam_group, FAPPM_conv_group, \
+    BasicBlock_cbam_group_r8, Bottleneck_cbam_group_r8, FAPPM_conv_slim, FAPPM_conv_nocbam
 from mmseg.registry import MODELS
 from mmseg.utils import OptConfigType
 
 
+class ChannelAttention(nn.Module):
+
+    def __init__(self, in_planes, ratio=16):
+        super(ChannelAttention, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.max_pool = nn.AdaptiveMaxPool2d(1)
+        self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)
+        self.relu1 = nn.ReLU()
+        self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
+        max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
+        out = avg_out + max_out
+        return self.sigmoid(out)
+
+
+class ChannelAttention_group(nn.Module):
+
+    def __init__(self, in_planes, ratio=16):
+        super(ChannelAttention_group, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.max_pool = nn.AdaptiveMaxPool2d(1)
+        self.fc1 = nn.Conv2d(
+            in_planes, in_planes // ratio, 1, bias=False, groups=4)
+        self.relu1 = nn.ReLU()
+        self.fc2 = nn.Conv2d(
+            in_planes // ratio, in_planes, 1, bias=False, groups=4)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
+        max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
+        out = avg_out + max_out
+        return self.sigmoid(out)
+
+
+class SpatialAttention(nn.Module):
+
+    def __init__(self, kernel_size=7):
+        super(SpatialAttention, self).__init__()
+        assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
+        padding = 3 if kernel_size == 7 else 1
+        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        avg_out = torch.mean(x, dim=1, keepdim=True)
+        max_out, _ = torch.max(x, dim=1, keepdim=True)
+        x = torch.cat([avg_out, max_out], dim=1)
+        x = self.conv1(x)
+        return self.sigmoid(x)
+
+
+class CBAM(nn.Module):
+
+    def __init__(self, in_planes, ratio=16, kernel_size=7):
+        super(CBAM, self).__init__()
+        self.ca = ChannelAttention(in_planes, ratio)
+        self.sa = SpatialAttention(kernel_size)
+
+    def forward(self, x):
+        out = x * self.ca(x)
+        result = out * self.sa(out)
+        return result
+
+
+class CBAM_group(nn.Module):
+
+    def __init__(self, in_planes, ratio=16, kernel_size=7):
+        super(CBAM_group, self).__init__()
+        self.ca = ChannelAttention_group(in_planes, ratio)
+        self.sa = SpatialAttention(kernel_size)
+
+    def forward(self, x):
+        out = x * self.ca(x)
+        result = out * self.sa(out)
+        return result
+
+
+class CBAM_group_r8(nn.Module):
+
+    def __init__(self, in_planes, ratio=8, kernel_size=7):
+        super(CBAM_group_r8, self).__init__()
+        self.ca = ChannelAttention_group(in_planes, ratio)
+        self.sa = SpatialAttention(kernel_size)
+
+    def forward(self, x):
+        out = x * self.ca(x)
+        result = out * self.sa(out)
+        return result
+
+
 @MODELS.register_module()
 class DDRNet(BaseModule):
     """DDRNet backbone.
@@ -58,7 +155,7 @@ def __init__(self,
             self.context_branch_layers.append(
                 self._make_layer(
                     block=BasicBlock if i < 2 else Bottleneck,
-                    inplanes=channels * 2**(i + 1),
+                    inplanes=channels * 2 ** (i + 1),
                     planes=channels * 8 if i > 0 else channels * 4,
                     num_blocks=2 if i < 2 else 1,
                     stride=2))
@@ -220,3 +317,2044 @@ def forward(self, x):
             align_corners=self.align_corners)
 
         return (temp_context, x_s + x_c) if self.training else x_s + x_c
+
+
+@MODELS.register_module()
+class DDRNet_cbam(BaseModule):
+    """DDRNet backbone.
+
+    This backbone is the implementation of `Deep Dual-resolution Networks for
+    Real-time and Accurate Semantic Segmentation of Road Scenes
+    <http://arxiv.org/abs/2101.06085>`_.
+    Modified from https://github.com/ydhongHIT/DDRNet.
+
+    Args:
+        in_channels (int): Number of input image channels. Default: 3.
+        channels: (int): The base channels of DDRNet. Default: 32.
+        ppm_channels (int): The channels of PPM module. Default: 128.
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False.
+        norm_cfg (dict): Config dict to build norm layer.
+            Default: dict(type='BN', requires_grad=True).
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='ReLU', inplace=True).
+        init_cfg (dict, optional): Initialization config dict.
+            Default: None.
+    """
+
+    def __init__(self,
+                 in_channels: int = 3,
+                 channels: int = 32,
+                 ppm_channels: int = 128,
+                 align_corners: bool = False,
+                 norm_cfg: OptConfigType = dict(type='BN', requires_grad=True),
+                 act_cfg: OptConfigType = dict(type='ReLU', inplace=True),
+                 init_cfg: OptConfigType = None):
+        super().__init__(init_cfg)
+
+        self.in_channels = in_channels
+        self.ppm_channels = ppm_channels
+
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.align_corners = align_corners
+
+        # stage 0-2
+        self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2)
+        self.relu = nn.ReLU()
+
+        # low resolution(context) branch
+        self.context_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.context_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock_cbam if i < 2 else Bottleneck_cbam,
+                    inplanes=channels * 2 ** (i + 1),
+                    planes=channels * 8 if i > 0 else channels * 4,
+                    num_blocks=2 if i < 2 else 1,
+                    stride=2))
+
+        # bilateral fusion
+        self.compression_1 = ConvModule(
+            channels * 4,
+            channels * 2,
+            kernel_size=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam6 = CBAM(channels * 2)  # 64
+
+        self.down_1 = ConvModule(
+            channels * 2,
+            channels * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam7 = CBAM(channels * 4)  # 128
+
+        self.compression_2 = ConvModule(
+            channels * 8,
+            channels * 2,
+            kernel_size=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam8 = CBAM(channels * 2)  # 64
+
+        self.down_2 = nn.Sequential(
+            ConvModule(
+                channels * 2,
+                channels * 4,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels * 4,
+                channels * 8,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=None))
+        self.cbam9 = CBAM(channels * 8)  # 128
+
+        # high resolution(spatial) branch
+        self.spatial_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.spatial_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock_cbam if i < 2 else Bottleneck_cbam,
+                    inplanes=channels * 2,
+                    planes=channels * 2,
+                    num_blocks=2 if i < 2 else 1,
+                ))
+
+        self.spp = DAPPM_cbam(
+            channels * 16, ppm_channels, channels * 4, num_scales=5)
+        self.cbam13 = CBAM(channels * 4)  # 128  √
+
+    def _make_stem_layer(self, in_channels, channels, num_blocks):
+        layers = [
+            ConvModule(
+                in_channels,
+                channels,  # 32
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels,
+                channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            CBAM(channels)
+        ]
+
+        layers.extend([
+            self._make_layer(BasicBlock_cbam, channels, channels, num_blocks),
+            nn.ReLU(),
+            self._make_layer(
+                BasicBlock_cbam, channels, channels * 2, num_blocks, stride=2),
+            nn.ReLU(),
+        ])
+
+        return nn.Sequential(*layers)
+
+    def _make_layer(self, block, inplanes, planes, num_blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False),
+                build_norm_layer(self.norm_cfg, planes * block.expansion)[1])
+
+        layers = [
+            block(
+                in_channels=inplanes,
+                channels=planes,
+                stride=stride,
+                downsample=downsample)
+        ]
+        inplanes = planes * block.expansion
+        for i in range(1, num_blocks):
+            layers.append(
+                block(
+                    in_channels=inplanes,
+                    channels=planes,
+                    stride=1,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg_out=None if i == num_blocks - 1 else self.act_cfg))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        """Forward function."""
+        out_size = (x.shape[-2] // 8, x.shape[-1] // 8)
+
+        # stage 0-2
+        x = self.stem(x)
+
+        # stage3
+        x_c = self.context_branch_layers[0](x)
+        x_s = self.spatial_branch_layers[0](x)
+        comp_c = self.compression_1(self.relu(x_c))
+        x_c += self.down_1(self.relu(x_s))
+        x_c = self.cbam7(x_c)
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        x_s = self.cbam6(x_s)
+        if self.training:
+            temp_context = x_s.clone()
+
+        # stage4
+        x_c = self.context_branch_layers[1](self.relu(x_c))
+        x_s = self.spatial_branch_layers[1](self.relu(x_s))
+        comp_c = self.compression_2(self.relu(x_c))
+        x_c += self.down_2(self.relu(x_s))
+        x_c = self.cbam9(x_c)
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        x_s = self.cbam8(x_s)
+
+        # stage5
+        x_s = self.spatial_branch_layers[2](self.relu(x_s))
+        x_c = self.context_branch_layers[2](self.relu(x_c))
+        x_c = self.spp(x_c)
+        x_c = resize(
+            x_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+
+        add = self.cbam13(x_s + x_c)
+        return (temp_context, add) if self.training else add
+
+
+@MODELS.register_module()
+class FAPPM_CONV(BaseModule):
+    """DDRNet backbone.
+
+    This backbone is the implementation of `Deep Dual-resolution Networks for
+    Real-time and Accurate Semantic Segmentation of Road Scenes
+    <http://arxiv.org/abs/2101.06085>`_.
+    Modified from https://github.com/ydhongHIT/DDRNet.
+
+    Args:
+        in_channels (int): Number of input image channels. Default: 3.
+        channels: (int): The base channels of DDRNet. Default: 32.
+        ppm_channels (int): The channels of PPM module. Default: 128.
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False.
+        norm_cfg (dict): Config dict to build norm layer.
+            Default: dict(type='BN', requires_grad=True).
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='ReLU', inplace=True).
+        init_cfg (dict, optional): Initialization config dict.
+            Default: None.
+    """
+
+    def __init__(self,
+                 in_channels: int = 3,
+                 channels: int = 32,
+                 ppm_channels: int = 96,
+                 align_corners: bool = False,
+                 norm_cfg: OptConfigType = dict(type='BN', requires_grad=True),
+                 act_cfg: OptConfigType = dict(type='ReLU', inplace=True),
+                 init_cfg: OptConfigType = None):
+        super().__init__(init_cfg)
+
+        self.in_channels = in_channels
+        self.ppm_channels = ppm_channels
+
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.align_corners = align_corners
+
+        # stage 0-2
+        self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2)
+        self.relu = nn.ReLU()
+
+        # low resolution(context) branch
+        self.context_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.context_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock_cbam if i < 2 else Bottleneck_cbam,
+                    inplanes=channels * 2 ** (i + 1),
+                    planes=channels * 8 if i > 0 else channels * 4,
+                    num_blocks=2 if i < 2 else 1,
+                    stride=2))
+
+        # bilateral fusion
+        self.compression_1 = ConvModule(
+            channels * 4,
+            channels * 2,
+            kernel_size=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam6 = CBAM(channels * 2)  # 64
+
+        self.down_1 = ConvModule(
+            channels * 2,
+            channels * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam7 = CBAM(channels * 4)  # 128
+
+        self.compression_2 = ConvModule(
+            channels * 8,
+            channels * 2,
+            kernel_size=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam8 = CBAM(channels * 2)  # 64
+
+        self.down_2 = nn.Sequential(
+            ConvModule(
+                channels * 2,
+                channels * 4,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels * 4,
+                channels * 8,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=None))
+        self.cbam9 = CBAM(channels * 8)  # 128
+
+        # high resolution(spatial) branch
+        self.spatial_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.spatial_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock_cbam if i < 2 else Bottleneck_cbam,
+                    inplanes=channels * 2,
+                    planes=channels * 2,
+                    num_blocks=2 if i < 2 else 1,
+                ))
+
+        self.spp = FAPPM_conv(
+            channels * 16, ppm_channels, channels * 4, num_scales=5)
+        self.cbam13 = CBAM(channels * 4)  # 128  √
+
+    def _make_stem_layer(self, in_channels, channels, num_blocks):
+        layers = [
+            ConvModule(
+                in_channels,
+                channels,  # 32
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels,
+                channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            CBAM(channels)
+        ]
+
+        layers.extend([
+            self._make_layer(BasicBlock_cbam, channels, channels, num_blocks),
+            nn.ReLU(),
+            self._make_layer(
+                BasicBlock_cbam, channels, channels * 2, num_blocks, stride=2),
+            nn.ReLU(),
+        ])
+
+        return nn.Sequential(*layers)
+
+    def _make_layer(self, block, inplanes, planes, num_blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False),
+                build_norm_layer(self.norm_cfg, planes * block.expansion)[1])
+
+        layers = [
+            block(
+                in_channels=inplanes,
+                channels=planes,
+                stride=stride,
+                downsample=downsample)
+        ]
+        inplanes = planes * block.expansion
+        for i in range(1, num_blocks):
+            layers.append(
+                block(
+                    in_channels=inplanes,
+                    channels=planes,
+                    stride=1,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg_out=None if i == num_blocks - 1 else self.act_cfg))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        """Forward function."""
+        out_size = (x.shape[-2] // 8, x.shape[-1] // 8)
+
+        # stage 0-2
+        x = self.stem(x)
+
+        # stage3
+        x_c = self.context_branch_layers[0](x)
+        x_s = self.spatial_branch_layers[0](x)
+        comp_c = self.compression_1(self.relu(x_c))
+        x_c += self.down_1(self.relu(x_s))
+        x_c = self.cbam7(x_c)
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        x_s = self.cbam6(x_s)
+        if self.training:
+            temp_context = x_s.clone()
+
+        # stage4
+        x_c = self.context_branch_layers[1](self.relu(x_c))
+        x_s = self.spatial_branch_layers[1](self.relu(x_s))
+        comp_c = self.compression_2(self.relu(x_c))
+        x_c += self.down_2(self.relu(x_s))
+        x_c = self.cbam9(x_c)
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        x_s = self.cbam8(x_s)
+
+        # stage5
+        x_s = self.spatial_branch_layers[2](self.relu(x_s))
+        x_c = self.context_branch_layers[2](self.relu(x_c))
+        x_c = self.spp(x_c)
+        x_c = resize(
+            x_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+
+        add = self.cbam13(x_s + x_c)
+        return (temp_context, add) if self.training else add
+
+
+# ohem-FAPPM_conv-cbam-Lovasz-auxhead
+
+
+@MODELS.register_module()
+class FAPPM_CONV_nocbam(BaseModule):
+    """DDRNet backbone.
+
+    This backbone is the implementation of `Deep Dual-resolution Networks for
+    Real-time and Accurate Semantic Segmentation of Road Scenes
+    <http://arxiv.org/abs/2101.06085>`_.
+    Modified from https://github.com/ydhongHIT/DDRNet.
+
+    Args:
+        in_channels (int): Number of input image channels. Default: 3.
+        channels: (int): The base channels of DDRNet. Default: 32.
+        ppm_channels (int): The channels of PPM module. Default: 128.
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False.
+        norm_cfg (dict): Config dict to build norm layer.
+            Default: dict(type='BN', requires_grad=True).
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='ReLU', inplace=True).
+        init_cfg (dict, optional): Initialization config dict.
+            Default: None.
+    """
+
+    def __init__(self,
+                 in_channels: int = 3,
+                 channels: int = 32,
+                 ppm_channels: int = 96,
+                 align_corners: bool = False,
+                 norm_cfg: OptConfigType = dict(type='BN', requires_grad=True),
+                 act_cfg: OptConfigType = dict(type='ReLU', inplace=True),
+                 init_cfg: OptConfigType = None):
+        super().__init__(init_cfg)
+
+        self.in_channels = in_channels
+        self.ppm_channels = ppm_channels
+
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.align_corners = align_corners
+
+        # stage 0-2
+        self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2)
+        self.relu = nn.ReLU()
+
+        # low resolution(context) branch
+        self.context_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.context_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock if i < 2 else Bottleneck,
+                    inplanes=channels * 2 ** (i + 1),
+                    planes=channels * 8 if i > 0 else channels * 4,
+                    num_blocks=2 if i < 2 else 1,
+                    stride=2))
+
+        # bilateral fusion
+        self.compression_1 = ConvModule(
+            channels * 4,
+            channels * 2,
+            kernel_size=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+
+        self.down_1 = ConvModule(
+            channels * 2,
+            channels * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+
+        self.compression_2 = ConvModule(
+            channels * 8,
+            channels * 2,
+            kernel_size=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+
+        self.down_2 = nn.Sequential(
+            ConvModule(
+                channels * 2,
+                channels * 4,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels * 4,
+                channels * 8,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=None))
+
+        # high resolution(spatial) branch
+        self.spatial_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.spatial_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock if i < 2 else Bottleneck,
+                    inplanes=channels * 2,
+                    planes=channels * 2,
+                    num_blocks=2 if i < 2 else 1,
+                ))
+
+        self.spp = FAPPM_conv_nocbam(
+            channels * 16, ppm_channels, channels * 4, num_scales=5)
+
+    def _make_stem_layer(self, in_channels, channels, num_blocks):
+        layers = [
+            ConvModule(
+                in_channels,
+                channels,  # 32
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels,
+                channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+        ]
+
+        layers.extend([
+            self._make_layer(BasicBlock, channels, channels, num_blocks),
+            nn.ReLU(),
+            self._make_layer(
+                BasicBlock, channels, channels * 2, num_blocks, stride=2),
+            nn.ReLU(),
+        ])
+
+        return nn.Sequential(*layers)
+
+    def _make_layer(self, block, inplanes, planes, num_blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False),
+                build_norm_layer(self.norm_cfg, planes * block.expansion)[1])
+
+        layers = [
+            block(
+                in_channels=inplanes,
+                channels=planes,
+                stride=stride,
+                downsample=downsample)
+        ]
+        inplanes = planes * block.expansion
+        for i in range(1, num_blocks):
+            layers.append(
+                block(
+                    in_channels=inplanes,
+                    channels=planes,
+                    stride=1,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg_out=None if i == num_blocks - 1 else self.act_cfg))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        """Forward function."""
+        out_size = (x.shape[-2] // 8, x.shape[-1] // 8)
+
+        # stage 0-2
+        x = self.stem(x)
+
+        # stage3
+        x_c = self.context_branch_layers[0](x)
+        x_s = self.spatial_branch_layers[0](x)
+        comp_c = self.compression_1(self.relu(x_c))
+        x_c += self.down_1(self.relu(x_s))
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        if self.training:
+            temp_context = x_s.clone()
+
+        # stage4
+        x_c = self.context_branch_layers[1](self.relu(x_c))
+        x_s = self.spatial_branch_layers[1](self.relu(x_s))
+        comp_c = self.compression_2(self.relu(x_c))
+        x_c += self.down_2(self.relu(x_s))
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+
+        # stage5
+        x_s = self.spatial_branch_layers[2](self.relu(x_s))
+        x_c = self.context_branch_layers[2](self.relu(x_c))
+        x_c = self.spp(x_c)
+        x_c = resize(
+            x_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+
+        add = x_s + x_c
+        return (temp_context, add) if self.training else add
+
+
+@MODELS.register_module()
+class FAPPM_CONV_group(BaseModule):
+    """DDRNet backbone.
+
+    This backbone is the implementation of `Deep Dual-resolution Networks for
+    Real-time and Accurate Semantic Segmentation of Road Scenes
+    <http://arxiv.org/abs/2101.06085>`_.
+    Modified from https://github.com/ydhongHIT/DDRNet.
+
+    Args:
+        in_channels (int): Number of input image channels. Default: 3.
+        channels: (int): The base channels of DDRNet. Default: 32.
+        ppm_channels (int): The channels of PPM module. Default: 128.
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False.
+        norm_cfg (dict): Config dict to build norm layer.
+            Default: dict(type='BN', requires_grad=True).
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='ReLU', inplace=True).
+        init_cfg (dict, optional): Initialization config dict.
+            Default: None.
+    """
+
+    def __init__(self,
+                 in_channels: int = 3,
+                 channels: int = 32,
+                 ppm_channels: int = 96,
+                 align_corners: bool = False,
+                 norm_cfg: OptConfigType = dict(type='BN', requires_grad=True),
+                 act_cfg: OptConfigType = dict(type='ReLU', inplace=True),
+                 init_cfg: OptConfigType = None):
+        super().__init__(init_cfg)
+
+        self.in_channels = in_channels
+        self.ppm_channels = ppm_channels
+
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.align_corners = align_corners
+
+        # stage 0-2
+        self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2)
+        self.relu = nn.ReLU()
+
+        # low resolution(context) branch
+        self.context_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.context_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock_cbam if i < 2 else Bottleneck_cbam,
+                    inplanes=channels * 2 ** (i + 1),
+                    planes=channels * 8 if i > 0 else channels * 4,
+                    num_blocks=2 if i < 2 else 1,
+                    stride=2))
+
+        # bilateral fusion
+        self.compression_1 = ConvModule(
+            channels * 4,
+            channels * 2,
+            kernel_size=1,
+            groups=4,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam6 = CBAM_group(channels * 2)  # 64
+
+        self.down_1 = ConvModule(
+            channels * 2,
+            channels * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            groups=4,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam7 = CBAM_group(channels * 4)  # 128
+
+        self.compression_2 = ConvModule(
+            channels * 8,
+            channels * 2,
+            kernel_size=1,
+            groups=4,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam8 = CBAM_group(channels * 2)  # 64
+
+        self.down_2 = nn.Sequential(
+            ConvModule(
+                channels * 2,
+                channels * 4,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                groups=4,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels * 4,
+                channels * 8,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                groups=4,
+                norm_cfg=self.norm_cfg,
+                act_cfg=None))
+        self.cbam9 = CBAM_group(channels * 8)  # 128
+
+        # high resolution(spatial) branch
+        self.spatial_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.spatial_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock_cbam_group
+                    if i < 2 else Bottleneck_cbam_group,
+                    inplanes=channels * 2,
+                    planes=channels * 2,
+                    num_blocks=2 if i < 2 else 1,
+                ))
+
+        self.spp = FAPPM_conv_group(
+            channels * 16, ppm_channels, channels * 4, num_scales=5)
+        self.cbam13 = CBAM_group(channels * 4)  # 128  √
+
+    def _make_stem_layer(self, in_channels, channels, num_blocks):
+        layers = [
+            ConvModule(
+                in_channels,
+                channels,  # 32
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels,
+                channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                groups=4,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            CBAM_group(channels)
+        ]
+
+        layers.extend([
+            self._make_layer(BasicBlock_cbam_group, channels, channels,
+                             num_blocks),
+            nn.ReLU(),
+            self._make_layer(
+                BasicBlock_cbam_group,
+                channels,
+                channels * 2,
+                num_blocks,
+                stride=2),
+            nn.ReLU(),
+        ])
+
+        return nn.Sequential(*layers)
+
+    def _make_layer(self, block, inplanes, planes, num_blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    groups=4,
+                    bias=False),
+                build_norm_layer(self.norm_cfg, planes * block.expansion)[1])
+
+        layers = [
+            block(
+                in_channels=inplanes,
+                channels=planes,
+                stride=stride,
+                downsample=downsample)
+        ]
+        inplanes = planes * block.expansion
+        for i in range(1, num_blocks):
+            layers.append(
+                block(
+                    in_channels=inplanes,
+                    channels=planes,
+                    stride=1,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg_out=None if i == num_blocks - 1 else self.act_cfg))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        """Forward function."""
+        out_size = (x.shape[-2] // 8, x.shape[-1] // 8)
+
+        # stage 0-2
+        x = self.stem(x)
+
+        # stage3
+        x_c = self.context_branch_layers[0](x)
+        x_s = self.spatial_branch_layers[0](x)
+        comp_c = self.compression_1(self.relu(x_c))
+        x_c += self.down_1(self.relu(x_s))
+        x_c = self.cbam7(x_c)
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        x_s = self.cbam6(x_s)
+        if self.training:
+            temp_context = x_s.clone()
+
+        # stage4
+        x_c = self.context_branch_layers[1](self.relu(x_c))
+        x_s = self.spatial_branch_layers[1](self.relu(x_s))
+        comp_c = self.compression_2(self.relu(x_c))
+        x_c += self.down_2(self.relu(x_s))
+        x_c = self.cbam9(x_c)
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        x_s = self.cbam8(x_s)
+
+        # stage5
+        x_s = self.spatial_branch_layers[2](self.relu(x_s))
+        x_c = self.context_branch_layers[2](self.relu(x_c))
+        x_c = self.spp(x_c)
+        x_c = resize(
+            x_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+
+        add = self.cbam13(x_s + x_c)
+        return (temp_context, add) if self.training else add
+
+
+@MODELS.register_module()
+class FAPPM_CONV_slim(BaseModule):
+    """DDRNet backbone.
+
+    This backbone is the implementation of `Deep Dual-resolution Networks for
+    Real-time and Accurate Semantic Segmentation of Road Scenes
+    <http://arxiv.org/abs/2101.06085>`_.
+    Modified from https://github.com/ydhongHIT/DDRNet.
+
+    Args:
+        in_channels (int): Number of input image channels. Default: 3.
+        channels: (int): The base channels of DDRNet. Default: 32.
+        ppm_channels (int): The channels of PPM module. Default: 128.
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False.
+        norm_cfg (dict): Config dict to build norm layer.
+            Default: dict(type='BN', requires_grad=True).
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='ReLU', inplace=True).
+        init_cfg (dict, optional): Initialization config dict.
+            Default: None.
+    """
+
+    def __init__(self,
+                 in_channels: int = 3,
+                 channels: int = 32,
+                 ppm_channels: int = 64,
+                 align_corners: bool = False,
+                 norm_cfg: OptConfigType = dict(type='BN', requires_grad=True),
+                 act_cfg: OptConfigType = dict(type='ReLU', inplace=True),
+                 init_cfg: OptConfigType = None):
+        super().__init__(init_cfg)
+
+        self.in_channels = in_channels
+        self.ppm_channels = ppm_channels
+
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.align_corners = align_corners
+
+        # stage 0-2
+        self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2)
+        self.relu = nn.ReLU()
+
+        # low resolution(context) branch
+        self.context_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.context_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock_cbam if i < 2 else Bottleneck_cbam,
+                    inplanes=channels * 2 ** (i + 1),
+                    planes=channels * 8 if i > 0 else channels * 4,
+                    num_blocks=2 if i < 2 else 1,
+                    stride=2))
+
+        # bilateral fusion
+        self.compression_1 = ConvModule(
+            channels * 4,
+            channels * 2,
+            kernel_size=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam6 = CBAM(channels * 2)  # 64
+
+        self.down_1 = ConvModule(
+            channels * 2,
+            channels * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam7 = CBAM(channels * 4)  # 128
+
+        self.compression_2 = ConvModule(
+            channels * 8,
+            channels * 2,
+            kernel_size=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam8 = CBAM(channels * 2)  # 64
+
+        self.down_2 = nn.Sequential(
+            ConvModule(
+                channels * 2,
+                channels * 4,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels * 4,
+                channels * 8,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=None))
+        self.cbam9 = CBAM(channels * 8)  # 128
+
+        # high resolution(spatial) branch
+        self.spatial_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.spatial_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock_cbam if i < 2 else Bottleneck_cbam,
+                    inplanes=channels * 2,
+                    planes=channels * 2,
+                    num_blocks=2 if i < 2 else 1,
+                ))
+
+        self.spp = FAPPM_conv(
+            channels * 16, ppm_channels, channels * 4, num_scales=5)
+        self.cbam13 = CBAM(channels * 4)  # 128  √
+
+    def _make_stem_layer(self, in_channels, channels, num_blocks):
+        layers = [
+            ConvModule(
+                in_channels,
+                channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels,
+                channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            CBAM(channels)
+        ]
+
+        layers.extend([
+            self._make_layer(BasicBlock_cbam, channels, channels, num_blocks),
+            nn.ReLU(),
+            self._make_layer(
+                BasicBlock_cbam, channels, channels * 2, num_blocks, stride=2),
+            nn.ReLU(),
+        ])
+
+        return nn.Sequential(*layers)
+
+    def _make_layer(self, block, inplanes, planes, num_blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False),
+                build_norm_layer(self.norm_cfg, planes * block.expansion)[1])
+
+        layers = [
+            block(
+                in_channels=inplanes,
+                channels=planes,
+                stride=stride,
+                downsample=downsample)
+        ]
+        inplanes = planes * block.expansion
+        for i in range(1, num_blocks):
+            layers.append(
+                block(
+                    in_channels=inplanes,
+                    channels=planes,
+                    stride=1,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg_out=None if i == num_blocks - 1 else self.act_cfg))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        """Forward function."""
+        out_size = (x.shape[-2] // 8, x.shape[-1] // 8)
+
+        # stage 0-2
+        x = self.stem(x)
+
+        # stage3
+        x_c = self.context_branch_layers[0](x)
+        x_s = self.spatial_branch_layers[0](x)
+        comp_c = self.compression_1(self.relu(x_c))
+        x_c += self.down_1(self.relu(x_s))
+        x_c = self.cbam7(x_c)
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        x_s = self.cbam6(x_s)
+        if self.training:
+            temp_context = x_s.clone()
+
+        # stage4
+        x_c = self.context_branch_layers[1](self.relu(x_c))
+        x_s = self.spatial_branch_layers[1](self.relu(x_s))
+        comp_c = self.compression_2(self.relu(x_c))
+        x_c += self.down_2(self.relu(x_s))
+        x_c = self.cbam9(x_c)
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        x_s = self.cbam8(x_s)
+
+        # stage5
+        x_s = self.spatial_branch_layers[2](self.relu(x_s))
+        x_c = self.context_branch_layers[2](self.relu(x_c))
+        x_c = self.spp(x_c)
+        x_c = resize(
+            x_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+
+        add = self.cbam13(x_s + x_c)
+        return (temp_context, add) if self.training else add
+
+
+@MODELS.register_module()
+class FAPPM_CONV_slim2(BaseModule):
+    """DDRNet backbone.
+
+    This backbone is the implementation of `Deep Dual-resolution Networks for
+    Real-time and Accurate Semantic Segmentation of Road Scenes
+    <http://arxiv.org/abs/2101.06085>`_.
+    Modified from https://github.com/ydhongHIT/DDRNet.
+
+    Args:
+        in_channels (int): Number of input image channels. Default: 3.
+        channels: (int): The base channels of DDRNet. Default: 32.
+        ppm_channels (int): The channels of PPM module. Default: 128.
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False.
+        norm_cfg (dict): Config dict to build norm layer.
+            Default: dict(type='BN', requires_grad=True).
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='ReLU', inplace=True).
+        init_cfg (dict, optional): Initialization config dict.
+            Default: None.
+    """
+
+    def __init__(self,
+                 in_channels: int = 3,
+                 channels: int = 32,
+                 ppm_channels: int = 96,
+                 align_corners: bool = False,
+                 norm_cfg: OptConfigType = dict(type='BN', requires_grad=True),
+                 act_cfg: OptConfigType = dict(type='ReLU', inplace=True),
+                 init_cfg: OptConfigType = None):
+        super().__init__(init_cfg)
+
+        self.in_channels = in_channels
+        self.ppm_channels = ppm_channels
+
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.align_corners = align_corners
+
+        # stage 0-2
+        self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2)
+        self.relu = nn.ReLU()
+
+        # low resolution(context) branch
+        self.context_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.context_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock_cbam if i < 2 else Bottleneck_cbam,
+                    inplanes=channels * 2 ** (i + 1),
+                    planes=channels * 8 if i > 0 else channels * 4,
+                    num_blocks=2 if i < 2 else 1,
+                    stride=2))
+
+        # bilateral fusion
+        self.compression_1 = ConvModule(
+            channels * 4,
+            channels * 2,
+            kernel_size=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+
+        self.down_1 = ConvModule(
+            channels * 2,
+            channels * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+
+        self.compression_2 = ConvModule(
+            channels * 8,
+            channels * 2,
+            kernel_size=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+
+        self.down_2 = nn.Sequential(
+            ConvModule(
+                channels * 2,
+                channels * 4,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels * 4,
+                channels * 8,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=None))
+
+        # high resolution(spatial) branch
+        self.spatial_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.spatial_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock_cbam if i < 2 else Bottleneck_cbam,
+                    inplanes=channels * 2,
+                    planes=channels * 2,
+                    num_blocks=2 if i < 2 else 1,
+                ))
+
+        self.spp = FAPPM_conv_slim(
+            channels * 16, ppm_channels, channels * 4, num_scales=5)
+
+    def _make_stem_layer(self, in_channels, channels, num_blocks):
+        layers = [
+            ConvModule(
+                in_channels,
+                channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels,
+                channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+        ]
+
+        layers.extend([
+            self._make_layer(BasicBlock_cbam, channels, channels, num_blocks),
+            nn.ReLU(),
+            self._make_layer(
+                BasicBlock_cbam, channels, channels * 2, num_blocks, stride=2),
+            nn.ReLU(),
+        ])
+
+        return nn.Sequential(*layers)
+
+    def _make_layer(self, block, inplanes, planes, num_blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False),
+                build_norm_layer(self.norm_cfg, planes * block.expansion)[1])
+
+        layers = [
+            block(
+                in_channels=inplanes,
+                channels=planes,
+                stride=stride,
+                downsample=downsample)
+        ]
+        inplanes = planes * block.expansion
+        for i in range(1, num_blocks):
+            layers.append(
+                block(
+                    in_channels=inplanes,
+                    channels=planes,
+                    stride=1,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg_out=None if i == num_blocks - 1 else self.act_cfg))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        """Forward function."""
+        out_size = (x.shape[-2] // 8, x.shape[-1] // 8)
+
+        # stage 0-2
+        x = self.stem(x)
+
+        # stage3
+        x_c = self.context_branch_layers[0](x)
+        x_s = self.spatial_branch_layers[0](x)
+        comp_c = self.compression_1(self.relu(x_c))
+        x_c += self.down_1(self.relu(x_s))
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        if self.training:
+            temp_context = x_s.clone()
+
+        # stage4
+        x_c = self.context_branch_layers[1](self.relu(x_c))
+        x_s = self.spatial_branch_layers[1](self.relu(x_s))
+        comp_c = self.compression_2(self.relu(x_c))
+        x_c += self.down_2(self.relu(x_s))
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+
+        # stage5
+        x_s = self.spatial_branch_layers[2](self.relu(x_s))
+        x_c = self.context_branch_layers[2](self.relu(x_c))
+        x_c = self.spp(x_c)
+        x_c = resize(
+            x_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+
+        add = x_s + x_c
+        return (temp_context, add) if self.training else add
+
+
+@MODELS.register_module()
+class FAPPM_CONV_group_slim(BaseModule):
+    """DDRNet backbone.
+
+    This backbone is the implementation of `Deep Dual-resolution Networks for
+    Real-time and Accurate Semantic Segmentation of Road Scenes
+    <http://arxiv.org/abs/2101.06085>`_.
+    Modified from https://github.com/ydhongHIT/DDRNet.
+
+    Args:
+        in_channels (int): Number of input image channels. Default: 3.
+        channels: (int): The base channels of DDRNet. Default: 32.
+        ppm_channels (int): The channels of PPM module. Default: 128.
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False.
+        norm_cfg (dict): Config dict to build norm layer.
+            Default: dict(type='BN', requires_grad=True).
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='ReLU', inplace=True).
+        init_cfg (dict, optional): Initialization config dict.
+            Default: None.
+    """
+
+    def __init__(self,
+                 in_channels: int = 3,
+                 channels: int = 32,
+                 ppm_channels: int = 64,
+                 align_corners: bool = False,
+                 norm_cfg: OptConfigType = dict(type='BN', requires_grad=True),
+                 act_cfg: OptConfigType = dict(type='ReLU', inplace=True),
+                 init_cfg: OptConfigType = None):
+        super().__init__(init_cfg)
+
+        self.in_channels = in_channels
+        self.ppm_channels = ppm_channels
+
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.align_corners = align_corners
+
+        # stage 0-2
+        self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2)
+        self.relu = nn.ReLU()
+
+        # low resolution(context) branch
+        self.context_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.context_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock_cbam if i < 2 else Bottleneck_cbam,
+                    inplanes=channels * 2 ** (i + 1),
+                    planes=channels * 8 if i > 0 else channels * 4,
+                    num_blocks=2 if i < 2 else 1,
+                    stride=2))
+
+        # bilateral fusion
+        self.compression_1 = ConvModule(
+            channels * 4,
+            channels * 2,
+            kernel_size=1,
+            groups=4,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam6 = CBAM_group(channels * 2)  # 64
+
+        self.down_1 = ConvModule(
+            channels * 2,
+            channels * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            groups=4,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam7 = CBAM_group(channels * 4)  # 128
+
+        self.compression_2 = ConvModule(
+            channels * 8,
+            channels * 2,
+            kernel_size=1,
+            groups=4,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam8 = CBAM_group(channels * 2)  # 64
+
+        self.down_2 = nn.Sequential(
+            ConvModule(
+                channels * 2,
+                channels * 4,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                groups=4,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels * 4,
+                channels * 8,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                groups=4,
+                norm_cfg=self.norm_cfg,
+                act_cfg=None))
+        self.cbam9 = CBAM_group(channels * 8)  # 128
+
+        # high resolution(spatial) branch
+        self.spatial_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.spatial_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock_cbam_group
+                    if i < 2 else Bottleneck_cbam_group,
+                    inplanes=channels * 2,
+                    planes=channels * 2,
+                    num_blocks=2 if i < 2 else 1,
+                ))
+
+        self.spp = FAPPM_conv_group(
+            channels * 16, ppm_channels, channels * 4, num_scales=5)
+        self.cbam13 = CBAM_group(channels * 4)  # 128  √
+
+    def _make_stem_layer(self, in_channels, channels, num_blocks):
+        layers = [
+            ConvModule(
+                in_channels,
+                channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels,
+                channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                groups=4,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            CBAM_group_r8(channels)  # ratio为16,32/16=2,无法再除以4，所以把ratio改为8
+        ]
+
+        layers.extend([
+            self._make_layer(BasicBlock_cbam_group_r8, channels, channels,
+                             num_blocks),
+            nn.ReLU(),
+            self._make_layer(
+                BasicBlock_cbam_group_r8,
+                channels,
+                channels * 2,
+                num_blocks,
+                stride=2),
+            nn.ReLU(),
+        ])
+
+        return nn.Sequential(*layers)
+
+    def _make_layer(self, block, inplanes, planes, num_blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    groups=4,
+                    bias=False),
+                build_norm_layer(self.norm_cfg, planes * block.expansion)[1])
+
+        layers = [
+            block(
+                in_channels=inplanes,
+                channels=planes,
+                stride=stride,
+                downsample=downsample)
+        ]
+        inplanes = planes * block.expansion
+        for i in range(1, num_blocks):
+            layers.append(
+                block(
+                    in_channels=inplanes,
+                    channels=planes,
+                    stride=1,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg_out=None if i == num_blocks - 1 else self.act_cfg))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        """Forward function."""
+        out_size = (x.shape[-2] // 8, x.shape[-1] // 8)
+
+        # stage 0-2
+        x = self.stem(x)
+
+        # stage3
+        x_c = self.context_branch_layers[0](x)
+        x_s = self.spatial_branch_layers[0](x)
+        comp_c = self.compression_1(self.relu(x_c))
+        x_c += self.down_1(self.relu(x_s))
+        x_c = self.cbam7(x_c)
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        x_s = self.cbam6(x_s)
+        if self.training:
+            temp_context = x_s.clone()
+
+        # stage4
+        x_c = self.context_branch_layers[1](self.relu(x_c))
+        x_s = self.spatial_branch_layers[1](self.relu(x_s))
+        comp_c = self.compression_2(self.relu(x_c))
+        x_c += self.down_2(self.relu(x_s))
+        x_c = self.cbam9(x_c)
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        x_s = self.cbam8(x_s)
+
+        # stage5
+        x_s = self.spatial_branch_layers[2](self.relu(x_s))
+        x_c = self.context_branch_layers[2](self.relu(x_c))
+        x_c = self.spp(x_c)
+        x_c = resize(
+            x_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+
+        add = self.cbam13(x_s + x_c)
+        return (temp_context, add) if self.training else add
+
+
+@MODELS.register_module()
+class FAPPM_AVGP(BaseModule):
+    """DDRNet backbone.
+
+    This backbone is the implementation of `Deep Dual-resolution Networks for
+    Real-time and Accurate Semantic Segmentation of Road Scenes
+    <http://arxiv.org/abs/2101.06085>`_.
+    Modified from https://github.com/ydhongHIT/DDRNet.
+
+    Args:
+        in_channels (int): Number of input image channels. Default: 3.
+        channels: (int): The base channels of DDRNet. Default: 32.
+        ppm_channels (int): The channels of PPM module. Default: 128.
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False.
+        norm_cfg (dict): Config dict to build norm layer.
+            Default: dict(type='BN', requires_grad=True).
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='ReLU', inplace=True).
+        init_cfg (dict, optional): Initialization config dict.
+            Default: None.
+    """
+
+    def __init__(self,
+                 in_channels: int = 3,
+                 channels: int = 32,
+                 ppm_channels: int = 96,
+                 align_corners: bool = False,
+                 norm_cfg: OptConfigType = dict(type='BN', requires_grad=True),
+                 act_cfg: OptConfigType = dict(type='ReLU', inplace=True),
+                 init_cfg: OptConfigType = None):
+        super().__init__(init_cfg)
+
+        self.in_channels = in_channels
+        self.ppm_channels = ppm_channels
+
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.align_corners = align_corners
+
+        # stage 0-2
+        self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2)
+        self.relu = nn.ReLU()
+
+        # low resolution(context) branch
+        self.context_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.context_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock_cbam if i < 2 else Bottleneck_cbam,
+                    inplanes=channels * 2 ** (i + 1),
+                    planes=channels * 8 if i > 0 else channels * 4,
+                    num_blocks=2 if i < 2 else 1,
+                    stride=2))
+
+        # bilateral fusion
+        self.compression_1 = ConvModule(
+            channels * 4,
+            channels * 2,
+            kernel_size=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam6 = CBAM(channels * 2)  # 64
+
+        self.down_1 = ConvModule(
+            channels * 2,
+            channels * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam7 = CBAM(channels * 4)  # 128
+
+        self.compression_2 = ConvModule(
+            channels * 8,
+            channels * 2,
+            kernel_size=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.cbam8 = CBAM(channels * 2)  # 64
+
+        self.down_2 = nn.Sequential(
+            ConvModule(
+                channels * 2,
+                channels * 4,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels * 4,
+                channels * 8,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=None))
+        self.cbam9 = CBAM(channels * 8)  # 128
+
+        # high resolution(spatial) branch
+        self.spatial_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.spatial_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock_cbam if i < 2 else Bottleneck_cbam,
+                    inplanes=channels * 2,
+                    planes=channels * 2,
+                    num_blocks=2 if i < 2 else 1,
+                ))
+
+        self.spp = FAPPM_avgp(channels * 16, ppm_channels, channels * 4)
+        self.cbam13 = CBAM(channels * 4)  # 128  √
+
+    def _make_stem_layer(self, in_channels, channels, num_blocks):
+        layers = [
+            ConvModule(
+                in_channels,
+                channels,  # 32
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels,
+                channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            CBAM(channels)
+        ]
+
+        layers.extend([
+            self._make_layer(BasicBlock_cbam, channels, channels, num_blocks),
+            nn.ReLU(),
+            self._make_layer(
+                BasicBlock_cbam, channels, channels * 2, num_blocks, stride=2),
+            nn.ReLU(),
+        ])
+
+        return nn.Sequential(*layers)
+
+    def _make_layer(self, block, inplanes, planes, num_blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False),
+                build_norm_layer(self.norm_cfg, planes * block.expansion)[1])
+
+        layers = [
+            block(
+                in_channels=inplanes,
+                channels=planes,
+                stride=stride,
+                downsample=downsample)
+        ]
+        inplanes = planes * block.expansion
+        for i in range(1, num_blocks):
+            layers.append(
+                block(
+                    in_channels=inplanes,
+                    channels=planes,
+                    stride=1,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg_out=None if i == num_blocks - 1 else self.act_cfg))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        """Forward function."""
+        out_size = (x.shape[-2] // 8, x.shape[-1] // 8)
+
+        # stage 0-2
+        x = self.stem(x)
+
+        # stage3
+        x_c = self.context_branch_layers[0](x)
+        x_s = self.spatial_branch_layers[0](x)
+        comp_c = self.compression_1(self.relu(x_c))
+        x_c += self.down_1(self.relu(x_s))
+        x_c = self.cbam7(x_c)
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        x_s = self.cbam6(x_s)
+        if self.training:
+            temp_context = x_s.clone()
+
+        # stage4
+        x_c = self.context_branch_layers[1](self.relu(x_c))
+        x_s = self.spatial_branch_layers[1](self.relu(x_s))
+        comp_c = self.compression_2(self.relu(x_c))
+        x_c += self.down_2(self.relu(x_s))
+        x_c = self.cbam9(x_c)
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        x_s = self.cbam8(x_s)
+
+        # stage5
+        x_s = self.spatial_branch_layers[2](self.relu(x_s))
+        x_c = self.context_branch_layers[2](self.relu(x_c))
+        x_c = self.spp(x_c)
+        x_c = resize(
+            x_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+
+        add = self.cbam13(x_s + x_c)
+        return (temp_context, add) if self.training else add
+
+
+@MODELS.register_module()
+class FAPPM_AVGP_slim(BaseModule):
+    """DDRNet backbone.
+
+    This backbone is the implementation of `Deep Dual-resolution Networks for
+    Real-time and Accurate Semantic Segmentation of Road Scenes
+    <http://arxiv.org/abs/2101.06085>`_.
+    Modified from https://github.com/ydhongHIT/DDRNet.
+
+    Args:
+        in_channels (int): Number of input image channels. Default: 3.
+        channels: (int): The base channels of DDRNet. Default: 32.
+        ppm_channels (int): The channels of PPM module. Default: 128.
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False.
+        norm_cfg (dict): Config dict to build norm layer.
+            Default: dict(type='BN', requires_grad=True).
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='ReLU', inplace=True).
+        init_cfg (dict, optional): Initialization config dict.
+            Default: None.
+    """
+
+    def __init__(self,
+                 in_channels: int = 3,
+                 channels: int = 32,
+                 ppm_channels: int = 64,
+                 align_corners: bool = False,
+                 norm_cfg: OptConfigType = dict(type='BN', requires_grad=True),
+                 act_cfg: OptConfigType = dict(type='ReLU', inplace=True),
+                 init_cfg: OptConfigType = None):
+        super().__init__(init_cfg)
+
+        self.in_channels = in_channels
+        self.ppm_channels = ppm_channels
+
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.align_corners = align_corners
+
+        # stage 0-2
+        self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2)
+        self.relu = nn.ReLU()
+
+        # low resolution(context) branch
+        self.context_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.context_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock_cbam if i < 2 else Bottleneck_cbam,
+                    inplanes=channels * 2 ** (i + 1),
+                    planes=channels * 8 if i > 0 else channels * 4,
+                    num_blocks=2 if i < 2 else 1,
+                    stride=2))
+
+        # bilateral fusion
+        self.compression_1 = ConvModule(
+            channels * 4,
+            channels * 2,
+            kernel_size=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+
+        self.down_1 = ConvModule(
+            channels * 2,
+            channels * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+
+        self.compression_2 = ConvModule(
+            channels * 8,
+            channels * 2,
+            kernel_size=1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+
+        self.down_2 = nn.Sequential(
+            ConvModule(
+                channels * 2,
+                channels * 4,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels * 4,
+                channels * 8,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=None))
+
+        # high resolution(spatial) branch
+        self.spatial_branch_layers = nn.ModuleList()
+        for i in range(3):
+            self.spatial_branch_layers.append(
+                self._make_layer(
+                    block=BasicBlock_cbam if i < 2 else Bottleneck_cbam,
+                    inplanes=channels * 2,
+                    planes=channels * 2,
+                    num_blocks=2 if i < 2 else 1,
+                ))
+
+        self.spp = FAPPM_avgp(channels * 16, ppm_channels, channels * 4)
+
+    def _make_stem_layer(self, in_channels, channels, num_blocks):
+        layers = [
+            ConvModule(
+                in_channels,
+                channels,  # 32
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            ConvModule(
+                channels,
+                channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+        ]
+
+        layers.extend([
+            self._make_layer(BasicBlock_cbam, channels, channels, num_blocks),
+            nn.ReLU(),
+            self._make_layer(
+                BasicBlock_cbam, channels, channels * 2, num_blocks, stride=2),
+            nn.ReLU(),
+        ])
+
+        return nn.Sequential(*layers)
+
+    def _make_layer(self, block, inplanes, planes, num_blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False),
+                build_norm_layer(self.norm_cfg, planes * block.expansion)[1])
+
+        layers = [
+            block(
+                in_channels=inplanes,
+                channels=planes,
+                stride=stride,
+                downsample=downsample)
+        ]
+        inplanes = planes * block.expansion
+        for i in range(1, num_blocks):
+            layers.append(
+                block(
+                    in_channels=inplanes,
+                    channels=planes,
+                    stride=1,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg_out=None if i == num_blocks - 1 else self.act_cfg))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        """Forward function."""
+        out_size = (x.shape[-2] // 8, x.shape[-1] // 8)
+
+        # stage 0-2
+        x = self.stem(x)
+
+        # stage3
+        x_c = self.context_branch_layers[0](x)
+        x_s = self.spatial_branch_layers[0](x)
+        comp_c = self.compression_1(self.relu(x_c))
+        x_c += self.down_1(self.relu(x_s))
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        x_s = self.cbam6(x_s)
+        if self.training:
+            temp_context = x_s.clone()
+
+        # stage4
+        x_c = self.context_branch_layers[1](self.relu(x_c))
+        x_s = self.spatial_branch_layers[1](self.relu(x_s))
+        comp_c = self.compression_2(self.relu(x_c))
+        x_c += self.down_2(self.relu(x_s))
+        x_s += resize(
+            comp_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+
+        # stage5
+        x_s = self.spatial_branch_layers[2](self.relu(x_s))
+        x_c = self.context_branch_layers[2](self.relu(x_c))
+        x_c = self.spp(x_c)
+        x_c = resize(
+            x_c,
+            size=out_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+
+        add = x_s + x_c
+        return (temp_context, add) if self.training else add
diff --git a/mmseg/models/utils/__init__.py b/mmseg/models/utils/__init__.py
index c0751b17c0..c617421840 100644
--- a/mmseg/models/utils/__init__.py
+++ b/mmseg/models/utils/__init__.py
@@ -1,11 +1,13 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .basic_block import BasicBlock, Bottleneck
+from .basic_block import BasicBlock, BasicBlock_cbam, Bottleneck, Bottleneck_cbam, BasicBlock_cbam_group,\
+    Bottleneck_cbam_group, BasicBlock_cbam_group_r8, Bottleneck_cbam_group_r8
+
 from .embed import PatchEmbed
 from .encoding import Encoding
 from .inverted_residual import InvertedResidual, InvertedResidualV3
 from .make_divisible import make_divisible
 from .point_sample import get_uncertain_point_coords_with_randomness
-from .ppm import DAPPM, PAPPM
+from .ppm import DAPPM, PAPPM, DAPPM_cbam, FAPPM_conv, FAPPM_conv_nocbam, FAPPM_avgp, FAPPM_conv_group, FAPPM_conv_slim
 from .res_layer import ResLayer
 from .se_layer import SELayer
 from .self_attention_block import SelfAttentionBlock
@@ -21,7 +23,10 @@
     'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual',
     'UpConvBlock', 'InvertedResidualV3', 'SELayer', 'PatchEmbed',
     'nchw_to_nlc', 'nlc_to_nchw', 'nchw2nlc2nchw', 'nlc2nchw2nlc', 'Encoding',
-    'Upsample', 'resize', 'DAPPM', 'PAPPM', 'BasicBlock', 'Bottleneck',
-    'cross_attn_layer', 'LayerNorm2d', 'MLP',
-    'get_uncertain_point_coords_with_randomness'
+    'Upsample', 'resize', 'DAPPM', 'PAPPM', 'cross_attn_layer', 'LayerNorm2d',
+    'MLP', 'get_uncertain_point_coords_with_randomness', 'DAPPM_cbam',
+    'FAPPM_conv', 'FAPPM_avgp', 'BasicBlock', 'BasicBlock_cbam', 'Bottleneck',
+    'Bottleneck_cbam', 'FAPPM_conv_group', 'BasicBlock_cbam_group',
+    'Bottleneck_cbam_group', 'BasicBlock_cbam_group_r8',
+    'Bottleneck_cbam_group_r8', 'FAPPM_conv_slim', 'FAPPM_conv_nocbam'
 ]
diff --git a/mmseg/models/utils/basic_block.py b/mmseg/models/utils/basic_block.py
index 4e1ad8146d..b54c1361dd 100644
--- a/mmseg/models/utils/basic_block.py
+++ b/mmseg/models/utils/basic_block.py
@@ -1,15 +1,110 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from typing import Optional
-
+import torch
 import torch.nn as nn
 from mmcv.cnn import ConvModule
 from mmengine.model import BaseModule
 from torch import Tensor
+import torch.nn.functional as F
 
 from mmseg.registry import MODELS
 from mmseg.utils import OptConfigType
 
 
+class ChannelAttention(nn.Module):
+
+    def __init__(self, in_planes, ratio=16):
+        super(ChannelAttention, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.max_pool = nn.AdaptiveMaxPool2d(1)
+        self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)
+        self.relu1 = nn.ReLU()
+        self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
+        max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
+        out = avg_out + max_out
+        return self.sigmoid(out)
+
+
+class ChannelAttention_group(nn.Module):
+
+    def __init__(self, in_planes, ratio=16):
+        super(ChannelAttention_group, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.max_pool = nn.AdaptiveMaxPool2d(1)
+        self.fc1 = nn.Conv2d(
+            in_planes, in_planes // ratio, 1, bias=False, groups=4)
+        self.relu1 = nn.ReLU()
+        self.fc2 = nn.Conv2d(
+            in_planes // ratio, in_planes, 1, bias=False, groups=4)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
+        max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
+        out = avg_out + max_out
+        return self.sigmoid(out)
+
+
+class SpatialAttention(nn.Module):
+
+    def __init__(self, kernel_size=7):
+        super(SpatialAttention, self).__init__()
+        assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
+        padding = 3 if kernel_size == 7 else 1
+        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        avg_out = torch.mean(x, dim=1, keepdim=True)
+        max_out, _ = torch.max(x, dim=1, keepdim=True)
+        x = torch.cat([avg_out, max_out], dim=1)
+        x = self.conv1(x)
+        return self.sigmoid(x)
+
+
+class CBAM(nn.Module):
+
+    def __init__(self, in_planes, ratio=16, kernel_size=7):
+        super(CBAM, self).__init__()
+        self.ca = ChannelAttention(in_planes, ratio)
+        self.sa = SpatialAttention(kernel_size)
+
+    def forward(self, x):
+        out = x * self.ca(x)
+        result = out * self.sa(out)
+        return result
+
+
+class CBAM_group(nn.Module):
+
+    def __init__(self, in_planes, ratio=16, kernel_size=7):
+        super(CBAM_group, self).__init__()
+        self.ca = ChannelAttention_group(in_planes, ratio)
+        self.sa = SpatialAttention(kernel_size)
+
+    def forward(self, x):
+        out = x * self.ca(x)
+        result = out * self.sa(out)
+        return result
+
+
+class CBAM_group_r8(nn.Module):
+
+    def __init__(self, in_planes, ratio=8, kernel_size=7):
+        super(CBAM_group_r8, self).__init__()
+        self.ca = ChannelAttention_group(in_planes, ratio)
+        self.sa = SpatialAttention(kernel_size)
+
+    def forward(self, x):
+        out = x * self.ca(x)
+        result = out * self.sa(out)
+        return result
+
+
 class BasicBlock(BaseModule):
     """Basic block from `ResNet <https://arxiv.org/abs/1512.03385>`_.
 
@@ -141,3 +236,563 @@ def forward(self, x: Tensor) -> Tensor:
             out = self.act(out)
 
         return out
+
+
+class GroupBatchnorm2d(nn.Module):
+
+    def __init__(self, c_num: int, group_num: int = 16, eps: float = 1e-10):
+        super(GroupBatchnorm2d, self).__init__()
+        assert c_num >= group_num
+        self.group_num = group_num
+        self.weight = nn.Parameter(torch.randn(c_num, 1, 1))
+        self.bias = nn.Parameter(torch.zeros(c_num, 1, 1))
+        self.eps = eps
+
+    def forward(self, x):
+        N, C, H, W = x.size()
+        x = x.view(N, self.group_num, -1)
+        mean = x.mean(dim=2, keepdim=True)
+        std = x.std(dim=2, keepdim=True)
+        x = (x - mean) / (std + self.eps)
+        x = x.view(N, C, H, W)
+        return x * self.weight + self.bias
+
+
+class SRU(nn.Module):
+
+    def __init__(self,
+                 oup_channels: int,
+                 group_num: int = 16,
+                 gate_treshold: float = 0.5,
+                 torch_gn: bool = True):
+        super().__init__()
+
+        self.gn = nn.GroupNorm(
+            num_channels=oup_channels,
+            num_groups=group_num) if torch_gn else GroupBatchnorm2d(
+                c_num=oup_channels, group_num=group_num)
+        self.gate_treshold = gate_treshold
+        self.sigomid = nn.Sigmoid()
+
+    def forward(self, x):
+        gn_x = self.gn(x)
+        w_gamma = self.gn.weight / sum(self.gn.weight)
+        w_gamma = w_gamma.view(1, -1, 1, 1)
+        reweigts = self.sigomid(gn_x * w_gamma)
+        # Gate
+        w1 = torch.where(reweigts > self.gate_treshold,
+                         torch.ones_like(reweigts),
+                         reweigts)  # 大于门限值的设为1，否则保留原值
+        w2 = torch.where(reweigts > self.gate_treshold,
+                         torch.zeros_like(reweigts),
+                         reweigts)  # 大于门限值的设为0，否则保留原值
+        x_1 = w1 * x
+        x_2 = w2 * x
+        y = self.reconstruct(x_1, x_2)
+        return y
+
+    def reconstruct(self, x_1, x_2):
+        x_11, x_12 = torch.split(x_1, x_1.size(1) // 2, dim=1)
+        x_21, x_22 = torch.split(x_2, x_2.size(1) // 2, dim=1)
+        return torch.cat([x_11 + x_22, x_12 + x_21], dim=1)
+
+
+class CRU(nn.Module):
+    '''
+    alpha: 0<alpha<1
+    '''
+
+    def __init__(
+        self,
+        op_channel: int,
+        alpha: float = 1 / 2,
+        squeeze_radio: int = 2,
+        group_size: int = 2,
+        group_kernel_size: int = 3,
+    ):
+        super().__init__()
+        self.up_channel = up_channel = int(alpha * op_channel)
+        self.low_channel = low_channel = op_channel - up_channel
+        self.squeeze1 = nn.Conv2d(
+            up_channel, up_channel // squeeze_radio, kernel_size=1, bias=False)
+        self.squeeze2 = nn.Conv2d(
+            low_channel,
+            low_channel // squeeze_radio,
+            kernel_size=1,
+            bias=False)
+        # up
+        self.GWC = nn.Conv2d(
+            up_channel // squeeze_radio,
+            op_channel,
+            kernel_size=group_kernel_size,
+            stride=1,
+            padding=group_kernel_size // 2,
+            groups=group_size)
+        self.PWC1 = nn.Conv2d(
+            up_channel // squeeze_radio, op_channel, kernel_size=1, bias=False)
+        # low
+        self.PWC2 = nn.Conv2d(
+            low_channel // squeeze_radio,
+            op_channel - low_channel // squeeze_radio,
+            kernel_size=1,
+            bias=False)
+        self.advavg = nn.AdaptiveAvgPool2d(1)
+
+    def forward(self, x):
+        # Split
+        up, low = torch.split(x, [self.up_channel, self.low_channel], dim=1)
+        up, low = self.squeeze1(up), self.squeeze2(low)
+        # Transform
+        Y1 = self.GWC(up) + self.PWC1(up)
+        Y2 = torch.cat([self.PWC2(low), low], dim=1)
+        # Fuse
+        out = torch.cat([Y1, Y2], dim=1)
+        out = F.softmax(self.advavg(out), dim=1) * out
+        out1, out2 = torch.split(out, out.size(1) // 2, dim=1)
+        return out1 + out2
+
+
+class ScConv(nn.Module):
+
+    def __init__(
+        self,
+        op_channel: int,
+        group_num: int = 4,
+        gate_treshold: float = 0.5,
+        alpha: float = 1 / 2,
+        squeeze_radio: int = 2,
+        group_size: int = 2,
+        group_kernel_size: int = 3,
+    ):
+        super().__init__()
+        self.SRU = SRU(
+            op_channel, group_num=group_num, gate_treshold=gate_treshold)
+        self.CRU = CRU(
+            op_channel,
+            alpha=alpha,
+            squeeze_radio=squeeze_radio,
+            group_size=group_size,
+            group_kernel_size=group_kernel_size)
+
+    def forward(self, x):
+        x = self.SRU(x)
+        x = self.CRU(x)
+        return x
+
+
+class BasicBlock_cbam(BaseModule):
+    """Basic block from `ResNet <https://arxiv.org/abs/1512.03385>`_.
+
+    Args:
+        in_channels (int): Input channels.
+        channels (int): Output channels.
+        stride (int): Stride of the first block. Default: 1.
+        downsample (nn.Module, optional): Downsample operation on identity.
+            Default: None.
+        norm_cfg (dict, optional): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict, optional): Config dict for activation layer in
+            ConvModule. Default: dict(type='ReLU', inplace=True).
+        act_cfg_out (dict, optional): Config dict for activation layer at the
+            last of the block. Default: None.
+        init_cfg (dict, optional): Initialization config dict. Default: None.
+    """
+
+    expansion = 1
+
+    def __init__(self,
+                 in_channels: int,
+                 channels: int,
+                 stride: int = 1,
+                 downsample: nn.Module = None,
+                 norm_cfg: OptConfigType = dict(type='BN'),
+                 act_cfg: OptConfigType = dict(type='ReLU', inplace=True),
+                 act_cfg_out: OptConfigType = dict(type='ReLU', inplace=True),
+                 init_cfg: OptConfigType = None):
+        super().__init__(init_cfg)
+        # self.scconv1 = ScConv(channels)
+        # self.scconv2 = ScConv(channels)
+        self.conv1 = ConvModule(
+            in_channels,
+            channels,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg)
+        self.conv2 = ConvModule(
+            channels,
+            channels,
+            kernel_size=3,
+            padding=1,
+            norm_cfg=norm_cfg,
+            act_cfg=None)
+        self.downsample = downsample
+        self.cbam = CBAM(channels)
+        if act_cfg_out:
+            self.act = MODELS.build(act_cfg_out)
+
+    def forward(self, x: Tensor) -> Tensor:
+        residual = x
+        out = self.conv1(x)
+        out = self.conv2(out)
+        out = self.cbam(out)  # √
+
+        if self.downsample:
+            residual = self.downsample(x)
+        out = out + residual
+
+        if hasattr(self, 'act'):
+            out = self.act(out)
+
+        return out
+
+
+class Bottleneck_cbam(BaseModule):
+    """Bottleneck block from `ResNet <https://arxiv.org/abs/1512.03385>`_.
+
+    Args:
+        in_channels (int): Input channels.
+        channels (int): Output channels.
+        stride (int): Stride of the first block. Default: 1.
+        downsample (nn.Module, optional): Downsample operation on identity.
+            Default: None.
+        norm_cfg (dict, optional): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict, optional): Config dict for activation layer in
+            ConvModule. Default: dict(type='ReLU', inplace=True).
+        act_cfg_out (dict, optional): Config dict for activation layer at
+            the last of the block. Default: None.
+        init_cfg (dict, optional): Initialization config dict. Default: None.
+    """
+
+    expansion = 2
+
+    def __init__(self,
+                 in_channels: int,
+                 channels: int,
+                 stride: int = 1,
+                 downsample: Optional[nn.Module] = None,
+                 norm_cfg: OptConfigType = dict(type='BN'),
+                 act_cfg: OptConfigType = dict(type='ReLU', inplace=True),
+                 act_cfg_out: OptConfigType = None,
+                 init_cfg: OptConfigType = None):
+        super().__init__(init_cfg)
+        self.conv1 = ConvModule(
+            in_channels, channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg)
+        self.conv2 = ConvModule(
+            channels,
+            channels,
+            3,
+            stride,
+            1,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg)
+        self.conv3 = ConvModule(
+            channels,
+            channels * self.expansion,
+            1,
+            norm_cfg=norm_cfg,
+            act_cfg=None)
+        self.cbam2 = CBAM(channels)
+        self.cbam3 = CBAM(channels * self.expansion)
+        if act_cfg_out:
+            self.act = MODELS.build(act_cfg_out)
+        self.downsample = downsample
+
+    def forward(self, x: Tensor) -> Tensor:
+        residual = x
+
+        out = self.conv1(x)
+        out = self.conv2(out)
+        out = self.conv3(out)
+        out = self.cbam3(out)  # √
+
+        if self.downsample:
+            residual = self.downsample(x)
+        out = out + residual
+
+        if hasattr(self, 'act'):
+            out = self.act(out)
+
+        return out
+
+
+class BasicBlock_cbam_group(BaseModule):
+    """Basic block from `ResNet <https://arxiv.org/abs/1512.03385>`_.
+
+    Args:
+        in_channels (int): Input channels.
+        channels (int): Output channels.
+        stride (int): Stride of the first block. Default: 1.
+        downsample (nn.Module, optional): Downsample operation on identity.
+            Default: None.
+        norm_cfg (dict, optional): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict, optional): Config dict for activation layer in
+            ConvModule. Default: dict(type='ReLU', inplace=True).
+        act_cfg_out (dict, optional): Config dict for activation layer at the
+            last of the block. Default: None.
+        init_cfg (dict, optional): Initialization config dict. Default: None.
+    """
+
+    expansion = 1
+
+    def __init__(self,
+                 in_channels: int,
+                 channels: int,
+                 stride: int = 1,
+                 downsample: nn.Module = None,
+                 norm_cfg: OptConfigType = dict(type='BN'),
+                 act_cfg: OptConfigType = dict(type='ReLU', inplace=True),
+                 act_cfg_out: OptConfigType = dict(type='ReLU', inplace=True),
+                 init_cfg: OptConfigType = None):
+        super().__init__(init_cfg)
+        self.conv1 = ConvModule(
+            in_channels,
+            channels,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=4,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg)
+        self.conv2 = ConvModule(
+            channels,
+            channels,
+            kernel_size=3,
+            padding=1,
+            groups=4,
+            norm_cfg=norm_cfg,
+            act_cfg=None)
+        self.downsample = downsample
+        self.cbam = CBAM_group(channels)
+        if act_cfg_out:
+            self.act = MODELS.build(act_cfg_out)
+
+    def forward(self, x: Tensor) -> Tensor:
+        residual = x
+        out = self.conv1(x)
+        out = self.conv2(out)
+        out = self.cbam(out)  # √
+
+        if self.downsample:
+            residual = self.downsample(x)
+        out = out + residual
+
+        if hasattr(self, 'act'):
+            out = self.act(out)
+
+        return out
+
+
+class Bottleneck_cbam_group(BaseModule):
+    """Bottleneck block from `ResNet <https://arxiv.org/abs/1512.03385>`_.
+
+    Args:
+        in_channels (int): Input channels.
+        channels (int): Output channels.
+        stride (int): Stride of the first block. Default: 1.
+        downsample (nn.Module, optional): Downsample operation on identity.
+            Default: None.
+        norm_cfg (dict, optional): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict, optional): Config dict for activation layer in
+            ConvModule. Default: dict(type='ReLU', inplace=True).
+        act_cfg_out (dict, optional): Config dict for activation layer at
+            the last of the block. Default: None.
+        init_cfg (dict, optional): Initialization config dict. Default: None.
+    """
+
+    expansion = 2
+
+    def __init__(self,
+                 in_channels: int,
+                 channels: int,
+                 stride: int = 1,
+                 downsample: Optional[nn.Module] = None,
+                 norm_cfg: OptConfigType = dict(type='BN'),
+                 act_cfg: OptConfigType = dict(type='ReLU', inplace=True),
+                 act_cfg_out: OptConfigType = None,
+                 init_cfg: OptConfigType = None):
+        super().__init__(init_cfg)
+        self.conv1 = ConvModule(
+            in_channels, channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg)
+        self.conv2 = ConvModule(
+            channels,
+            channels,
+            3,
+            stride,
+            1,
+            groups=4,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg)
+        self.conv3 = ConvModule(
+            channels,
+            channels * self.expansion,
+            1,
+            groups=4,
+            norm_cfg=norm_cfg,
+            act_cfg=None)
+        self.cbam2 = CBAM_group(channels)
+        self.cbam3 = CBAM_group(channels * self.expansion)
+        if act_cfg_out:
+            self.act = MODELS.build(act_cfg_out)
+        self.downsample = downsample
+
+    def forward(self, x: Tensor) -> Tensor:
+        residual = x
+
+        out = self.conv1(x)
+        out = self.conv2(out)
+        out = self.conv3(out)
+        out = self.cbam3(out)  # √
+
+        if self.downsample:
+            residual = self.downsample(x)
+        out = out + residual
+
+        if hasattr(self, 'act'):
+            out = self.act(out)
+
+        return out
+
+
+class BasicBlock_cbam_group_r8(BaseModule):
+    """Basic block from `ResNet <https://arxiv.org/abs/1512.03385>`_.
+
+    Args:
+        in_channels (int): Input channels.
+        channels (int): Output channels.
+        stride (int): Stride of the first block. Default: 1.
+        downsample (nn.Module, optional): Downsample operation on identity.
+            Default: None.
+        norm_cfg (dict, optional): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict, optional): Config dict for activation layer in
+            ConvModule. Default: dict(type='ReLU', inplace=True).
+        act_cfg_out (dict, optional): Config dict for activation layer at the
+            last of the block. Default: None.
+        init_cfg (dict, optional): Initialization config dict. Default: None.
+    """
+
+    expansion = 1
+
+    def __init__(self,
+                 in_channels: int,
+                 channels: int,
+                 stride: int = 1,
+                 downsample: nn.Module = None,
+                 norm_cfg: OptConfigType = dict(type='BN'),
+                 act_cfg: OptConfigType = dict(type='ReLU', inplace=True),
+                 act_cfg_out: OptConfigType = dict(type='ReLU', inplace=True),
+                 init_cfg: OptConfigType = None):
+        super().__init__(init_cfg)
+        self.conv1 = ConvModule(
+            in_channels,
+            channels,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=4,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg)
+        self.conv2 = ConvModule(
+            channels,
+            channels,
+            kernel_size=3,
+            padding=1,
+            groups=4,
+            norm_cfg=norm_cfg,
+            act_cfg=None)
+        self.downsample = downsample
+        self.cbam = CBAM_group_r8(channels)
+        if act_cfg_out:
+            self.act = MODELS.build(act_cfg_out)
+
+    def forward(self, x: Tensor) -> Tensor:
+        residual = x
+        out = self.conv1(x)
+        out = self.conv2(out)
+        out = self.cbam(out)  # √
+
+        if self.downsample:
+            residual = self.downsample(x)
+        out = out + residual
+
+        if hasattr(self, 'act'):
+            out = self.act(out)
+
+        return out
+
+
+class Bottleneck_cbam_group_r8(BaseModule):
+    """Bottleneck block from `ResNet <https://arxiv.org/abs/1512.03385>`_.
+
+    Args:
+        in_channels (int): Input channels.
+        channels (int): Output channels.
+        stride (int): Stride of the first block. Default: 1.
+        downsample (nn.Module, optional): Downsample operation on identity.
+            Default: None.
+        norm_cfg (dict, optional): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict, optional): Config dict for activation layer in
+            ConvModule. Default: dict(type='ReLU', inplace=True).
+        act_cfg_out (dict, optional): Config dict for activation layer at
+            the last of the block. Default: None.
+        init_cfg (dict, optional): Initialization config dict. Default: None.
+    """
+
+    expansion = 2
+
+    def __init__(self,
+                 in_channels: int,
+                 channels: int,
+                 stride: int = 1,
+                 downsample: Optional[nn.Module] = None,
+                 norm_cfg: OptConfigType = dict(type='BN'),
+                 act_cfg: OptConfigType = dict(type='ReLU', inplace=True),
+                 act_cfg_out: OptConfigType = None,
+                 init_cfg: OptConfigType = None):
+        super().__init__(init_cfg)
+        self.conv1 = ConvModule(
+            in_channels, channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg)
+        self.conv2 = ConvModule(
+            channels,
+            channels,
+            3,
+            stride,
+            1,
+            groups=4,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg)
+        self.conv3 = ConvModule(
+            channels,
+            channels * self.expansion,
+            1,
+            groups=4,
+            norm_cfg=norm_cfg,
+            act_cfg=None)
+        self.cbam2 = CBAM_group_r8(channels)
+        self.cbam3 = CBAM_group_r8(channels * self.expansion)
+        if act_cfg_out:
+            self.act = MODELS.build(act_cfg_out)
+        self.downsample = downsample
+
+    def forward(self, x: Tensor) -> Tensor:
+        residual = x
+
+        out = self.conv1(x)
+        out = self.conv2(out)
+        out = self.conv3(out)
+        out = self.cbam3(out)  # √
+
+        if self.downsample:
+            residual = self.downsample(x)
+        out = out + residual
+
+        if hasattr(self, 'act'):
+            out = self.act(out)
+
+        return out
diff --git a/mmseg/models/utils/ppm.py b/mmseg/models/utils/ppm.py
index 5fe6ff26fa..00296dd9ed 100644
--- a/mmseg/models/utils/ppm.py
+++ b/mmseg/models/utils/ppm.py
@@ -9,6 +9,87 @@
 from torch import Tensor
 
 
+class ChannelAttention(nn.Module):
+
+    def __init__(self, in_planes, ratio=16):
+        super(ChannelAttention, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.max_pool = nn.AdaptiveMaxPool2d(1)
+        self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)
+        self.relu1 = nn.ReLU()
+        self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
+        max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
+        out = avg_out + max_out
+        return self.sigmoid(out)
+
+
+class ChannelAttention_group(nn.Module):
+
+    def __init__(self, in_planes, ratio=16):
+        super(ChannelAttention_group, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.max_pool = nn.AdaptiveMaxPool2d(1)
+        self.fc1 = nn.Conv2d(
+            in_planes, in_planes // ratio, 1, bias=False, groups=4)
+        self.relu1 = nn.ReLU()
+        self.fc2 = nn.Conv2d(
+            in_planes // ratio, in_planes, 1, bias=False, groups=4)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
+        max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
+        out = avg_out + max_out
+        return self.sigmoid(out)
+
+
+class SpatialAttention(nn.Module):
+
+    def __init__(self, kernel_size=7):
+        super(SpatialAttention, self).__init__()
+        assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
+        padding = 3 if kernel_size == 7 else 1
+        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        avg_out = torch.mean(x, dim=1, keepdim=True)
+        max_out, _ = torch.max(x, dim=1, keepdim=True)
+        x = torch.cat([avg_out, max_out], dim=1)
+        x = self.conv1(x)
+        return self.sigmoid(x)
+
+
+class CBAM(nn.Module):
+
+    def __init__(self, in_planes, ratio=16, kernel_size=7):
+        super(CBAM, self).__init__()
+        self.ca = ChannelAttention(in_planes, ratio)
+        self.sa = SpatialAttention(kernel_size)
+
+    def forward(self, x):
+        out = x * self.ca(x)
+        result = out * self.sa(out)
+        return result
+
+
+class CBAM_group(nn.Module):
+
+    def __init__(self, in_planes, ratio=16, kernel_size=7):
+        super(CBAM_group, self).__init__()
+        self.ca = ChannelAttention_group(in_planes, ratio)
+        self.sa = SpatialAttention(kernel_size)
+
+    def forward(self, x):
+        out = x * self.ca(x)
+        result = out * self.sa(out)
+        return result
+
+
 class DAPPM(BaseModule):
     """DAPPM module in `DDRNet <https://arxiv.org/abs/2101.06085>`_.
 
@@ -191,3 +272,540 @@ def forward(self, inputs: Tensor):
         scale_out = self.processes(torch.cat(feats, dim=1))
         return self.compression(torch.cat([x_, scale_out],
                                           dim=1)) + self.shortcut(inputs)
+
+
+class DAPPM_cbam(BaseModule):
+    """DAPPM module in `DDRNet <https://arxiv.org/abs/2101.06085>`_.
+
+    Args:
+        in_channels (int): Input channels.
+        branch_channels (int): Branch channels.
+        out_channels (int): Output channels.
+        num_scales (int): Number of scales.
+        kernel_sizes (list[int]): Kernel sizes of each scale.
+        strides (list[int]): Strides of each scale.
+        paddings (list[int]): Paddings of each scale.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict): Config dict for activation layer in ConvModule.
+            Default: dict(type='ReLU', inplace=True).
+        conv_cfg (dict): Config dict for convolution layer in ConvModule.
+            Default: dict(order=('norm', 'act', 'conv'), bias=False).
+        upsample_mode (str): Upsample mode. Default: 'bilinear'.
+    """
+
+    def __init__(self,
+                 in_channels: int,
+                 branch_channels: int,
+                 out_channels: int,
+                 num_scales: int,
+                 kernel_sizes: List[int] = [5, 9, 17],
+                 strides: List[int] = [2, 4, 8],
+                 paddings: List[int] = [2, 4, 8],
+                 norm_cfg: Dict = dict(type='BN', momentum=0.1),
+                 act_cfg: Dict = dict(type='ReLU', inplace=True),
+                 conv_cfg: Dict = dict(
+                     order=('norm', 'act', 'conv'), bias=False),
+                 upsample_mode: str = 'bilinear'):
+        super().__init__()
+
+        self.num_scales = num_scales
+        self.unsample_mode = upsample_mode
+        self.in_channels = in_channels
+        self.branch_channels = branch_channels
+        self.out_channels = out_channels
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.conv_cfg = conv_cfg
+
+        self.scales = ModuleList([
+            ConvModule(
+                in_channels,
+                branch_channels,
+                kernel_size=1,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg,
+                **conv_cfg)
+        ])
+        for i in range(1, num_scales - 1):
+            self.scales.append(
+                Sequential(*[
+                    nn.AvgPool2d(
+                        kernel_size=kernel_sizes[i - 1],
+                        stride=strides[i - 1],
+                        padding=paddings[i - 1]),
+                    ConvModule(
+                        in_channels,
+                        branch_channels,
+                        kernel_size=1,
+                        norm_cfg=norm_cfg,
+                        act_cfg=act_cfg,
+                        **conv_cfg)
+                ]))
+        self.scales.append(
+            Sequential(*[
+                nn.AdaptiveAvgPool2d((1, 1)),
+                ConvModule(
+                    in_channels,
+                    branch_channels,
+                    kernel_size=1,
+                    norm_cfg=norm_cfg,
+                    act_cfg=act_cfg,
+                    **conv_cfg)
+            ]))
+        self.processes = ModuleList()
+        for i in range(num_scales - 1):
+            self.processes.append(
+                Sequential(*[
+                    ConvModule(
+                        branch_channels,
+                        branch_channels,
+                        kernel_size=3,
+                        padding=1,
+                        norm_cfg=norm_cfg,
+                        act_cfg=act_cfg),
+                    # 3*3卷积之后添加cbam
+                    CBAM(branch_channels)
+                ]))
+
+        self.compression = ConvModule(
+            branch_channels * num_scales,
+            out_channels,
+            kernel_size=1,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg,
+            **conv_cfg)
+
+        self.shortcut = ConvModule(
+            in_channels,
+            out_channels,
+            kernel_size=1,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg,
+            **conv_cfg)
+
+    def forward(self, inputs: Tensor):
+        feats = []
+        feats.append(self.scales[0](inputs))
+
+        for i in range(1, self.num_scales):
+            feat_up = F.interpolate(
+                self.scales[i](inputs),
+                size=inputs.shape[2:],
+                mode=self.unsample_mode)
+            feats.append(self.processes[i - 1](feat_up + feats[i - 1]))
+
+        return self.compression(torch.cat(feats,
+                                          dim=1)) + self.shortcut(inputs)
+
+
+class FAPPM_conv(DAPPM):
+    """PAPPM module in `PIDNet <https://arxiv.org/abs/2206.02066>`_.
+
+    Args:
+        in_channels (int): Input channels.
+        branch_channels (int): Branch channels.
+        out_channels (int): Output channels.
+        num_scales (int): Number of scales.
+        kernel_sizes (list[int]): Kernel sizes of each scale.
+        strides (list[int]): Strides of each scale.
+        paddings (list[int]): Paddings of each scale.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: dict(type='BN', momentum=0.1).
+        act_cfg (dict): Config dict for activation layer in ConvModule.
+            Default: dict(type='ReLU', inplace=True).
+        conv_cfg (dict): Config dict for convolution layer in ConvModule.
+            Default: dict(order=('norm', 'act', 'conv'), bias=False).
+        upsample_mode (str): Upsample mode. Default: 'bilinear'.
+    """
+
+    def __init__(self,
+                 in_channels: int,
+                 branch_channels: int,
+                 out_channels: int,
+                 num_scales: int,
+                 kernel_sizes: List[int] = [3, 5, 9],
+                 strides: List[int] = [1, 2, 4],
+                 paddings: List[int] = [1, 2, 4],
+                 norm_cfg: Dict = dict(type='BN', momentum=0.1),
+                 act_cfg: Dict = dict(type='ReLU', inplace=True),
+                 conv_cfg: Dict = dict(
+                     order=('norm', 'act', 'conv'), bias=False),
+                 upsample_mode: str = 'bilinear'):
+        super().__init__(in_channels, branch_channels, out_channels,
+                         num_scales, kernel_sizes, strides, paddings, norm_cfg,
+                         act_cfg, conv_cfg, upsample_mode)
+
+        self.processes = ConvModule(
+            self.branch_channels * (self.num_scales - 1),
+            self.branch_channels * (self.num_scales - 1),
+            kernel_size=3,
+            padding=1,
+            groups=self.num_scales - 1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg,
+            **self.conv_cfg)
+        self.cbam = CBAM(self.branch_channels * (self.num_scales - 1))
+
+    def forward(self, inputs: Tensor):
+        x_ = self.scales[0](inputs)
+        feats = []
+        for i in range(1, self.num_scales):
+            feat_up = F.interpolate(
+                self.scales[i](inputs),
+                size=inputs.shape[2:],
+                mode=self.unsample_mode,
+                align_corners=False)
+            feats.append(feat_up + x_)
+        scale_out = self.processes(torch.cat(feats, dim=1))
+        scale_out = self.cbam(scale_out)
+        return self.compression(torch.cat([x_, scale_out],
+                                          dim=1)) + self.shortcut(inputs)
+
+
+class FAPPM_conv_nocbam(DAPPM):
+    """PAPPM module in `PIDNet <https://arxiv.org/abs/2206.02066>`_.
+
+    Args:
+        in_channels (int): Input channels.
+        branch_channels (int): Branch channels.
+        out_channels (int): Output channels.
+        num_scales (int): Number of scales.
+        kernel_sizes (list[int]): Kernel sizes of each scale.
+        strides (list[int]): Strides of each scale.
+        paddings (list[int]): Paddings of each scale.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: dict(type='BN', momentum=0.1).
+        act_cfg (dict): Config dict for activation layer in ConvModule.
+            Default: dict(type='ReLU', inplace=True).
+        conv_cfg (dict): Config dict for convolution layer in ConvModule.
+            Default: dict(order=('norm', 'act', 'conv'), bias=False).
+        upsample_mode (str): Upsample mode. Default: 'bilinear'.
+    """
+
+    def __init__(self,
+                 in_channels: int,
+                 branch_channels: int,
+                 out_channels: int,
+                 num_scales: int,
+                 kernel_sizes: List[int] = [3, 5, 9],
+                 strides: List[int] = [1, 2, 4],
+                 paddings: List[int] = [1, 2, 4],
+                 norm_cfg: Dict = dict(type='BN', momentum=0.1),
+                 act_cfg: Dict = dict(type='ReLU', inplace=True),
+                 conv_cfg: Dict = dict(
+                     order=('norm', 'act', 'conv'), bias=False),
+                 upsample_mode: str = 'bilinear'):
+        super().__init__(in_channels, branch_channels, out_channels,
+                         num_scales, kernel_sizes, strides, paddings, norm_cfg,
+                         act_cfg, conv_cfg, upsample_mode)
+
+        self.processes = ConvModule(
+            self.branch_channels * (self.num_scales - 1),
+            self.branch_channels * (self.num_scales - 1),
+            kernel_size=3,
+            padding=1,
+            groups=self.num_scales - 1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg,
+            **self.conv_cfg)
+
+    def forward(self, inputs: Tensor):
+        x_ = self.scales[0](inputs)
+        feats = []
+        for i in range(1, self.num_scales):
+            feat_up = F.interpolate(
+                self.scales[i](inputs),
+                size=inputs.shape[2:],
+                mode=self.unsample_mode,
+                align_corners=False)
+            feats.append(feat_up + x_)
+        scale_out = self.processes(torch.cat(feats, dim=1))
+        return self.compression(torch.cat([x_, scale_out],
+                                          dim=1)) + self.shortcut(inputs)
+
+
+class FAPPM_conv_slim(DAPPM):
+    """PAPPM module in `PIDNet <https://arxiv.org/abs/2206.02066>`_.
+
+    Args:
+        in_channels (int): Input channels.
+        branch_channels (int): Branch channels.
+        out_channels (int): Output channels.
+        num_scales (int): Number of scales.
+        kernel_sizes (list[int]): Kernel sizes of each scale.
+        strides (list[int]): Strides of each scale.
+        paddings (list[int]): Paddings of each scale.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: dict(type='BN', momentum=0.1).
+        act_cfg (dict): Config dict for activation layer in ConvModule.
+            Default: dict(type='ReLU', inplace=True).
+        conv_cfg (dict): Config dict for convolution layer in ConvModule.
+            Default: dict(order=('norm', 'act', 'conv'), bias=False).
+        upsample_mode (str): Upsample mode. Default: 'bilinear'.
+    """
+
+    def __init__(self,
+                 in_channels: int,
+                 branch_channels: int,
+                 out_channels: int,
+                 num_scales: int,
+                 kernel_sizes: List[int] = [3, 5, 9],
+                 strides: List[int] = [1, 2, 4],
+                 paddings: List[int] = [1, 2, 4],
+                 norm_cfg: Dict = dict(type='BN', momentum=0.1),
+                 act_cfg: Dict = dict(type='ReLU', inplace=True),
+                 conv_cfg: Dict = dict(
+                     order=('norm', 'act', 'conv'), bias=False),
+                 upsample_mode: str = 'bilinear'):
+        super().__init__(in_channels, branch_channels, out_channels,
+                         num_scales, kernel_sizes, strides, paddings, norm_cfg,
+                         act_cfg, conv_cfg, upsample_mode)
+
+        self.processes = ConvModule(
+            self.branch_channels * (self.num_scales - 1),
+            self.branch_channels * (self.num_scales - 1),
+            kernel_size=3,
+            padding=1,
+            groups=self.num_scales - 1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg,
+            **self.conv_cfg)
+
+    def forward(self, inputs: Tensor):
+        x_ = self.scales[0](inputs)
+        feats = []
+        for i in range(1, self.num_scales):
+            feat_up = F.interpolate(
+                self.scales[i](inputs),
+                size=inputs.shape[2:],
+                mode=self.unsample_mode,
+                align_corners=False)
+            feats.append(feat_up + x_)
+        scale_out = self.processes(torch.cat(feats, dim=1))
+        return self.compression(torch.cat([x_, scale_out],
+                                          dim=1)) + self.shortcut(inputs)
+
+
+class FAPPM_conv_group(DAPPM):
+    """PAPPM module in `PIDNet <https://arxiv.org/abs/2206.02066>`_.
+
+    Args:
+        in_channels (int): Input channels.
+        branch_channels (int): Branch channels.
+        out_channels (int): Output channels.
+        num_scales (int): Number of scales.
+        kernel_sizes (list[int]): Kernel sizes of each scale.
+        strides (list[int]): Strides of each scale.
+        paddings (list[int]): Paddings of each scale.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: dict(type='BN', momentum=0.1).
+        act_cfg (dict): Config dict for activation layer in ConvModule.
+            Default: dict(type='ReLU', inplace=True).
+        conv_cfg (dict): Config dict for convolution layer in ConvModule.
+            Default: dict(order=('norm', 'act', 'conv'), bias=False).
+        upsample_mode (str): Upsample mode. Default: 'bilinear'.
+    """
+
+    def __init__(self,
+                 in_channels: int,
+                 branch_channels: int,
+                 out_channels: int,
+                 num_scales: int,
+                 kernel_sizes: List[int] = [3, 5, 9],
+                 strides: List[int] = [1, 2, 4],
+                 paddings: List[int] = [1, 2, 4],
+                 norm_cfg: Dict = dict(type='BN', momentum=0.1),
+                 act_cfg: Dict = dict(type='ReLU', inplace=True),
+                 conv_cfg: Dict = dict(
+                     order=('norm', 'act', 'conv'), bias=False),
+                 upsample_mode: str = 'bilinear'):
+        super().__init__(in_channels, branch_channels, out_channels,
+                         num_scales, kernel_sizes, strides, paddings, norm_cfg,
+                         act_cfg, conv_cfg, upsample_mode)
+
+        self.processes = ConvModule(
+            self.branch_channels * (self.num_scales - 1),
+            self.branch_channels * (self.num_scales - 1),
+            kernel_size=3,
+            padding=1,
+            groups=self.num_scales - 1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg,
+            **self.conv_cfg)
+        self.cbam = CBAM_group(self.branch_channels * (self.num_scales - 1))
+
+        self.scales = ModuleList([
+            ConvModule(
+                in_channels,
+                branch_channels,
+                kernel_size=1,
+                groups=4,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg,
+                **conv_cfg)
+        ])
+        for i in range(1, num_scales - 1):
+            self.scales.append(
+                Sequential(*[
+                    nn.AvgPool2d(
+                        kernel_size=kernel_sizes[i - 1],
+                        stride=strides[i - 1],
+                        padding=paddings[i - 1]),
+                    ConvModule(
+                        in_channels,
+                        branch_channels,
+                        kernel_size=1,
+                        groups=4,
+                        norm_cfg=norm_cfg,
+                        act_cfg=act_cfg,
+                        **conv_cfg)
+                ]))
+        self.scales.append(
+            Sequential(*[
+                nn.AdaptiveAvgPool2d((1, 1)),
+                ConvModule(
+                    in_channels,
+                    branch_channels,
+                    kernel_size=1,
+                    groups=4,
+                    norm_cfg=norm_cfg,
+                    act_cfg=act_cfg,
+                    **conv_cfg)
+            ]))
+
+        self.compression = ConvModule(
+            branch_channels * num_scales,
+            out_channels,
+            kernel_size=1,
+            groups=4,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg,
+            **conv_cfg)
+
+        self.shortcut = ConvModule(
+            in_channels,
+            out_channels,
+            kernel_size=1,
+            groups=4,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg,
+            **conv_cfg)
+
+    def forward(self, inputs: Tensor):
+        x_ = self.scales[0](inputs)
+        feats = []
+        for i in range(1, self.num_scales):
+            feat_up = F.interpolate(
+                self.scales[i](inputs),
+                size=inputs.shape[2:],
+                mode=self.unsample_mode,
+                align_corners=False)
+            feats.append(feat_up + x_)
+        scale_out = self.processes(torch.cat(feats, dim=1))
+        scale_out = self.cbam(scale_out)
+        return self.compression(torch.cat([x_, scale_out],
+                                          dim=1)) + self.shortcut(inputs)
+
+
+algc = False
+class FAPPM_avgp(nn.Module):
+
+    def __init__(self,
+                 inplanes,
+                 branch_planes,
+                 outplanes,
+                 BatchNorm=nn.BatchNorm2d):
+        super(FAPPM_avgp, self).__init__()
+        bn_mom = 0.1
+        self.scale1 = nn.Sequential(
+            nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
+            BatchNorm(inplanes, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False),
+        )
+        self.scale2 = nn.Sequential(
+            nn.AvgPool2d(kernel_size=5, stride=2, padding=2),
+            BatchNorm(inplanes, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False),
+        )
+        self.scale3 = nn.Sequential(
+            nn.AvgPool2d(kernel_size=9, stride=4, padding=4),
+            BatchNorm(inplanes, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False),
+        )
+        self.scale4 = nn.Sequential(
+            nn.AdaptiveAvgPool2d((1, 1)),
+            BatchNorm(inplanes, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False),
+        )
+        # 奇怪，为什么都是BN,Relu，conv2d
+        self.scale0 = nn.Sequential(
+            BatchNorm(inplanes, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False),
+        )
+
+        self.scale_process = nn.Sequential(
+            BatchNorm(branch_planes * 4, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(
+                branch_planes * 4,
+                branch_planes * 4,
+                kernel_size=3,
+                padding=1,
+                bias=False),  # groups=4
+        )
+        self.cbam = CBAM(branch_planes * 4)
+
+        self.compression = nn.Sequential(
+            BatchNorm(branch_planes * 5, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(branch_planes * 5, outplanes, kernel_size=1, bias=False),
+        )
+
+        self.shortcut = nn.Sequential(
+            BatchNorm(inplanes, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(inplanes, outplanes, kernel_size=1, bias=False),
+        )
+
+    def forward(self, x):
+        width = x.shape[-1]
+        height = x.shape[-2]
+        scale_list = []
+
+        x_ = self.scale0(x)
+        scale_list.append(
+            F.interpolate(
+                self.scale1(x),
+                size=[height, width],
+                mode='bilinear',
+                align_corners=algc) + x_)
+        scale_list.append(
+            F.interpolate(
+                self.scale2(x),
+                size=[height, width],
+                mode='bilinear',
+                align_corners=algc) + x_)
+        scale_list.append(
+            F.interpolate(
+                self.scale3(x),
+                size=[height, width],
+                mode='bilinear',
+                align_corners=algc) + x_)
+        scale_list.append(
+            F.interpolate(
+                self.scale4(x),
+                size=[height, width],
+                mode='bilinear',
+                align_corners=algc) + x_)
+
+        scale_out = self.scale_process(torch.cat(scale_list, 1))
+        scale_out = self.cbam(scale_out)
+        out = self.compression(torch.cat([x_, scale_out],
+                                         1)) + self.shortcut(x)
+        return out
diff --git a/mmseg/version.py b/mmseg/version.py
index 9c8753b6d8..ddcdd5e63c 100644
--- a/mmseg/version.py
+++ b/mmseg/version.py
@@ -2,7 +2,7 @@
 
 __version__ = '1.2.1'
 
-
+# parse_version_info and __version__ must be defined in the same file
 def parse_version_info(version_str):
     version_info = []
     for x in version_str.split('.'):