diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index aa5942748a..fd16ba2dc3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,66 +1,10 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks repos: - - repo: https://github.com/PyCQA/flake8 - rev: 5.0.4 +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 hooks: - - id: flake8 - - repo: https://github.com/zhouzaida/isort - rev: 5.12.1 - hooks: - - id: isort - - repo: https://github.com/pre-commit/mirrors-yapf - rev: v0.32.0 - hooks: - - id: yapf - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 - hooks: - - id: trailing-whitespace - - id: check-yaml - - id: end-of-file-fixer - - id: requirements-txt-fixer - - id: double-quote-string-fixer - - id: check-merge-conflict - - id: fix-encoding-pragma - args: ["--remove"] - - id: mixed-line-ending - args: ["--fix=lf"] - - repo: https://github.com/executablebooks/mdformat - rev: 0.7.9 - hooks: - - id: mdformat - args: ["--number"] - additional_dependencies: - - mdformat-openmmlab - - mdformat_frontmatter - - linkify-it-py - - repo: https://github.com/codespell-project/codespell - rev: v2.2.1 - hooks: - - id: codespell - - repo: https://github.com/myint/docformatter - rev: v1.3.1 - hooks: - - id: docformatter - args: ["--in-place", "--wrap-descriptions", "79"] - # temporarily remove update-model-index to avoid conflict raised - # by depth estimator models - # - repo: local - # hooks: - # - id: update-model-index - # name: update-model-index - # description: Collect model information and update model-index.yml - # entry: .dev_scripts/update_model_index.py - # additional_dependencies: [pyyaml] - # language: python - # require_serial: true - - repo: https://github.com/asottile/pyupgrade - rev: v3.0.0 - hooks: - - id: pyupgrade - args: ["--py36-plus"] - - repo: https://github.com/open-mmlab/pre-commit-hooks - rev: v0.2.0 # Use the rev to fix revision - hooks: - - id: check-algo-readme - - id: check-copyright - args: ["mmseg", "tools", "tests", "demo"] # the dir_to_check with expected directory to check + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files diff --git a/configs/_base_/datasets/feces.py b/configs/_base_/datasets/feces.py new file mode 100644 index 0000000000..b93c9f56f2 --- /dev/null +++ b/configs/_base_/datasets/feces.py @@ -0,0 +1,76 @@ +# dataset settings +dataset_type = 'Feces' +data_root = 'D:\Allcode\mmsegmentation-dev-1.x\data\data' +img_scale = (448, 224) +crop_size = (224, 224) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=img_scale, + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomFlip', prob=0.7), + dict(type='PhotoMetricDistortion'), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=img_scale, keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] + +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] + +train_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/training', + seg_map_path='annotations/training'), + pipeline=train_pipeline))) +val_dataloader = dict( + batch_size=1, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='IoUMetric', iou_metrics=['mIoU', 'mDice', 'mFscore']) +test_evaluator = val_evaluator diff --git a/configs/ddrnet/ohem-DAPPM-cbam-lovasz-auxhead.py b/configs/ddrnet/ohem-DAPPM-cbam-lovasz-auxhead.py new file mode 100644 index 0000000000..2a8b18f3a9 --- /dev/null +++ b/configs/ddrnet/ohem-DAPPM-cbam-lovasz-auxhead.py @@ -0,0 +1,109 @@ +_base_ = [ + '../_base_/datasets/feces.py', + '../_base_/default_runtime.py', +] + +class_weight = [0.7, 0.8, 1.1, 1.2] + +checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth' # noqa +crop_size = (224, 224) +data_preprocessor = dict( + type='SegDataPreProcessor', + size=crop_size, + # mean=[123.675, 116.28, 103.53], + # std=[58.395, 57.12, 57.375], + mean=[0.609, 0.604, 0.578], + std=[0.195, 0.192, 0.202], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='DDRNet_cbam', + in_channels=3, + channels=64, + ppm_channels=128, + norm_cfg=norm_cfg, + align_corners=False, + init_cfg=None), + # init_cfg=dict(type='Pretrained', checkpoint=checkpoint)), + decode_head=dict( + type='DDRHead', + in_channels=64 * 4, + channels=128, + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', class_weight=class_weight, loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + auxiliary_head=dict( + type='DDRHead', + in_channels=64 * 4, + channels=128, + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', class_weight=class_weight, loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +# train_dataloader = dict(batch_size=6, num_workers=4) + +iters = 6000 +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0.0001, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] + +# training schedule for 120k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + # type='CheckpointHook', by_epoch=False, interval=100, max_keep_ckpts=2, save_best='mIoU'), + type='CheckpointHook', + by_epoch=False, + interval=100, + max_keep_ckpts=2, + save_best='mDice'), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +randomness = dict(seed=304) diff --git a/configs/ddrnet/ohem-DAPPM-cbam-lovasz.py b/configs/ddrnet/ohem-DAPPM-cbam-lovasz.py new file mode 100644 index 0000000000..c6d9962b74 --- /dev/null +++ b/configs/ddrnet/ohem-DAPPM-cbam-lovasz.py @@ -0,0 +1,93 @@ +_base_ = [ + '../_base_/datasets/feces.py', + '../_base_/default_runtime.py', +] + +class_weight = [0.7, 0.8, 1.1, 1.2] + +checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth' # noqa +crop_size = (224, 224) +data_preprocessor = dict( + type='SegDataPreProcessor', + size=crop_size, + mean=[0.609, 0.604, 0.578], + std=[0.195, 0.192, 0.202], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='DDRNet_cbam', + in_channels=3, + channels=64, + ppm_channels=128, + norm_cfg=norm_cfg, + align_corners=False, + init_cfg=None), + # init_cfg=dict(type='Pretrained', checkpoint=checkpoint)), + decode_head=dict( + type='DDRHead', + in_channels=64 * 4, + channels=128, + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', + # thres=0.9, + # min_kept=131072, + class_weight=class_weight, + loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +# train_dataloader = dict(batch_size=6, num_workers=4) + +iters = 6000 +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0.0001, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] + +# training schedule for 120k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + # type='CheckpointHook', by_epoch=False, interval=100, max_keep_ckpts=2, save_best='mIoU'), + type='CheckpointHook', + by_epoch=False, + interval=100, + max_keep_ckpts=2, + save_best='mDice'), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +randomness = dict(seed=304) diff --git a/configs/ddrnet/ohem-DAPPM-cbam.py b/configs/ddrnet/ohem-DAPPM-cbam.py new file mode 100644 index 0000000000..de677186e8 --- /dev/null +++ b/configs/ddrnet/ohem-DAPPM-cbam.py @@ -0,0 +1,89 @@ +_base_ = [ + '../_base_/datasets/feces.py', + '../_base_/default_runtime.py', +] + +class_weight = [0.7, 0.8, 1.1, 1.2] + +checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth' # noqa +crop_size = (224, 224) +data_preprocessor = dict( + type='SegDataPreProcessor', + size=crop_size, + mean=[0.609, 0.604, 0.578], + std=[0.195, 0.192, 0.202], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='DDRNet_cbam', + in_channels=3, + channels=64, + ppm_channels=128, + norm_cfg=norm_cfg, + align_corners=False, + init_cfg=None), + decode_head=dict( + type='DDRHead', + in_channels=64 * 4, + channels=128, + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +iters = 6000 +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0.0001, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] + +# training schedule for 120k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', + by_epoch=False, + interval=100, + max_keep_ckpts=2, + save_best='mDice'), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +randomness = dict(seed=304) diff --git a/configs/ddrnet/ohem-DAPPM-slim.py b/configs/ddrnet/ohem-DAPPM-slim.py new file mode 100644 index 0000000000..059aa1cbc8 --- /dev/null +++ b/configs/ddrnet/ohem-DAPPM-slim.py @@ -0,0 +1,86 @@ +_base_ = [ + '../_base_/datasets/feces.py', + '../_base_/default_runtime.py', +] + +class_weight = [0.7, 0.8, 1.1, 1.2] + +checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth' # noqa +crop_size = (224, 224) +data_preprocessor = dict( + type='SegDataPreProcessor', + size=crop_size, + mean=[0.609, 0.604, 0.578], + std=[0.195, 0.192, 0.202], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='DDRNet', + in_channels=3, + channels=32, + ppm_channels=64, + norm_cfg=norm_cfg, + align_corners=False, + init_cfg=None), + # init_cfg=dict(type='Pretrained', checkpoint=checkpoint)), + decode_head=dict( + type='DDRHead', + in_channels=64 * 2, + channels=64, + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', + class_weight=class_weight, + loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +iters = 6000 +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0.0001, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] + +train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', + by_epoch=False, + interval=100, + max_keep_ckpts=2, + save_best='mIoU'), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +randomness = dict(seed=304) diff --git a/configs/ddrnet/ohem-FAPPM_avgp-cbam-lovasz-auxhead-slim.py b/configs/ddrnet/ohem-FAPPM_avgp-cbam-lovasz-auxhead-slim.py new file mode 100644 index 0000000000..254ed2e277 --- /dev/null +++ b/configs/ddrnet/ohem-FAPPM_avgp-cbam-lovasz-auxhead-slim.py @@ -0,0 +1,103 @@ +_base_ = [ + '../_base_/datasets/feces.py', + '../_base_/default_runtime.py', +] + +class_weight = [0.7, 0.8, 1.1, 1.2] + +checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth' # noqa +crop_size = (224, 224) +data_preprocessor = dict( + type='SegDataPreProcessor', + size=crop_size, + mean=[0.609, 0.604, 0.578], + std=[0.195, 0.192, 0.202], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='FAPPM_AVGP', + in_channels=3, + channels=32, + ppm_channels=64, + norm_cfg=norm_cfg, + align_corners=False, + init_cfg=None), + decode_head=dict( + type='DDRHead', + in_channels=32 * 4, + channels=64, + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', class_weight=class_weight, loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + auxiliary_head=dict( + type='DDRHead', + in_channels=32 * 4, + channels=64, + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', class_weight=class_weight, loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +iters = 6000 +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0.0001, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] + +# training schedule for 120k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', + by_epoch=False, + interval=100, + max_keep_ckpts=2, + save_best='mDice'), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +randomness = dict(seed=304) diff --git a/configs/ddrnet/ohem-FAPPM_avgp-cbam-lovasz-auxhead.py b/configs/ddrnet/ohem-FAPPM_avgp-cbam-lovasz-auxhead.py new file mode 100644 index 0000000000..c639b306ad --- /dev/null +++ b/configs/ddrnet/ohem-FAPPM_avgp-cbam-lovasz-auxhead.py @@ -0,0 +1,103 @@ +_base_ = [ + '../_base_/datasets/feces.py', + '../_base_/default_runtime.py', +] + +class_weight = [0.7, 0.8, 1.1, 1.2] + +checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth' # noqa +crop_size = (224, 224) +data_preprocessor = dict( + type='SegDataPreProcessor', + size=crop_size, + mean=[0.609, 0.604, 0.578], + std=[0.195, 0.192, 0.202], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='FAPPM_AVGP', + in_channels=3, + channels=64, + ppm_channels=96, + norm_cfg=norm_cfg, + align_corners=False, + init_cfg=None), + decode_head=dict( + type='DDRHead', + in_channels=64 * 4, + channels=128, + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', class_weight=class_weight, loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + auxiliary_head=dict( + type='DDRHead', + in_channels=64 * 4, + channels=128, + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', class_weight=class_weight, loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +iters = 6000 +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0.0001, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] + +# training schedule for 120k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', + by_epoch=False, + interval=100, + max_keep_ckpts=2, + save_best='mDice'), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +randomness = dict(seed=304) diff --git a/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-group-slim.py b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-group-slim.py new file mode 100644 index 0000000000..5af551088f --- /dev/null +++ b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-group-slim.py @@ -0,0 +1,102 @@ +_base_ = [ + '../_base_/datasets/feces.py', + '../_base_/default_runtime.py', +] + +class_weight = [0.7, 0.8, 1.1, 1.2] + +checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth' # noqa +crop_size = (224, 224) +data_preprocessor = dict( + type='SegDataPreProcessor', + size=crop_size, + mean=[0.609, 0.604, 0.578], + std=[0.195, 0.192, 0.202], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='FAPPM_CONV_group_slim', + in_channels=3, + channels=32, # 64-32 + ppm_channels=64, # 96-64 + norm_cfg=norm_cfg, + align_corners=False, + init_cfg=None), + decode_head=dict( + type='DDRHead', + in_channels=64 * 2, # 256-128 + channels=64, # 128-64 + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', class_weight=class_weight, loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + auxiliary_head=dict( + type='DDRHead', + in_channels=64 * 2, # 256-128 + channels=64, # 128-64 + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', class_weight=class_weight, loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +iters = 6000 +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0.0001, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] + +train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', + by_epoch=False, + interval=100, + max_keep_ckpts=2, + save_best='mDice'), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +randomness = dict(seed=304) diff --git a/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-group.py b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-group.py new file mode 100644 index 0000000000..8272c97310 --- /dev/null +++ b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-group.py @@ -0,0 +1,103 @@ +_base_ = [ + '../_base_/datasets/feces.py', + '../_base_/default_runtime.py', +] + +class_weight = [0.7, 0.8, 1.1, 1.2] + +checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth' # noqa +crop_size = (224, 224) +data_preprocessor = dict( + type='SegDataPreProcessor', + size=crop_size, + mean=[0.609, 0.604, 0.578], + std=[0.195, 0.192, 0.202], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='FAPPM_CONV_group', + in_channels=3, + channels=64, + ppm_channels=96, + norm_cfg=norm_cfg, + align_corners=False, + init_cfg=None), + decode_head=dict( + type='DDRHead', + in_channels=64 * 4, + channels=128, + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', class_weight=class_weight, loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + auxiliary_head=dict( + type='DDRHead', + in_channels=64 * 4, + channels=128, + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', class_weight=class_weight, loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +iters = 6000 +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0.0001, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] + +# training schedule for 120k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', + by_epoch=False, + interval=100, + max_keep_ckpts=2, + save_best='mDice'), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +randomness = dict(seed=304) diff --git a/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-slim.py b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-slim.py new file mode 100644 index 0000000000..ed826334c6 --- /dev/null +++ b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-slim.py @@ -0,0 +1,101 @@ +_base_ = [ + '../_base_/datasets/feces.py', + '../_base_/default_runtime.py', +] + +class_weight = [0.7, 0.8, 1.1, 1.2] + +checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth' # noqa +crop_size = (224, 224) +data_preprocessor = dict( + type='SegDataPreProcessor', + size=crop_size, + mean=[0.609, 0.604, 0.578], + std=[0.195, 0.192, 0.202], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='FAPPM_CONV_slim', + in_channels=3, + channels=32, # 64-32 + ppm_channels=64, # 96-64 + norm_cfg=norm_cfg, + align_corners=False, + init_cfg=None), + decode_head=dict( + type='DDRHead', + in_channels=64 * 2, # 256-128 + channels=64, # 128-64 + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', class_weight=class_weight, loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + auxiliary_head=dict( + type='DDRHead', + in_channels=64 * 2, # 256-128 + channels=64, # 128-64 + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', class_weight=class_weight, loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +iters = 6000 +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0.0001, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] + +train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', + by_epoch=False, + interval=100, + max_keep_ckpts=2, + save_best='mDice'), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +randomness = dict(seed=304) diff --git a/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-slim2.py b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-slim2.py new file mode 100644 index 0000000000..595d17b4d5 --- /dev/null +++ b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead-slim2.py @@ -0,0 +1,86 @@ +_base_ = [ + '../_base_/datasets/feces.py', + '../_base_/default_runtime.py', +] + +class_weight = [0.7, 0.8, 1.1, 1.2] + +checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth' # noqa +crop_size = (224, 224) +data_preprocessor = dict( + type='SegDataPreProcessor', + size=crop_size, + mean=[0.609, 0.604, 0.578], + std=[0.195, 0.192, 0.202], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='FAPPM_CONV_slim2', + in_channels=3, + channels=32, # 64-32 + ppm_channels=96, # 96-64 + norm_cfg=norm_cfg, + align_corners=False, + init_cfg=None), + decode_head=dict( + type='DDRHead', + in_channels=64 * 2, # 256-128 + channels=96, # 128-64 + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', class_weight=class_weight, loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +# train_dataloader = dict(batch_size=6, num_workers=4) + +iters = 6000 +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0.0001, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] + +train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', + by_epoch=False, + interval=100, + max_keep_ckpts=2, + save_best='mDice'), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +randomness = dict(seed=304) diff --git a/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead.py b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead.py new file mode 100644 index 0000000000..f46dedf9a4 --- /dev/null +++ b/configs/ddrnet/ohem-FAPPM_conv-cbam-lovasz-auxhead.py @@ -0,0 +1,103 @@ +_base_ = [ + '../_base_/datasets/feces.py', + '../_base_/default_runtime.py', +] + +class_weight = [0.7, 0.8, 1.1, 1.2] + +checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth' # noqa +crop_size = (224, 224) +data_preprocessor = dict( + type='SegDataPreProcessor', + size=crop_size, + mean=[0.609, 0.604, 0.578], + std=[0.195, 0.192, 0.202], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='FAPPM_CONV', + in_channels=3, + channels=64, + ppm_channels=96, + norm_cfg=norm_cfg, + align_corners=False, + init_cfg=None), + decode_head=dict( + type='DDRHead', + in_channels=64 * 4, + channels=128, + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', class_weight=class_weight, loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + auxiliary_head=dict( + type='DDRHead', + in_channels=64 * 4, + channels=128, + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', class_weight=class_weight, loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +iters = 6000 +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0.0001, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] + +# training schedule for 120k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', + by_epoch=False, + interval=100, + max_keep_ckpts=2, + save_best='mDice'), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +randomness = dict(seed=304) diff --git a/configs/ddrnet/ohem-FAPPM_conv-lovasz-auxhead.py b/configs/ddrnet/ohem-FAPPM_conv-lovasz-auxhead.py new file mode 100644 index 0000000000..b66cbb3f62 --- /dev/null +++ b/configs/ddrnet/ohem-FAPPM_conv-lovasz-auxhead.py @@ -0,0 +1,103 @@ +_base_ = [ + '../_base_/datasets/feces.py', + '../_base_/default_runtime.py', +] + +class_weight = [0.7, 0.8, 1.1, 1.2] + +checkpoint = '/home/panjm/hepengguang/mmlab_he/mmsegmentation-dev-1.x/checkpoints/ddrnet23-in1kpre_3rdparty-9ca29f62.pth' # noqa +crop_size = (224, 224) +data_preprocessor = dict( + type='SegDataPreProcessor', + size=crop_size, + mean=[0.609, 0.604, 0.578], + std=[0.195, 0.192, 0.202], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='FAPPM_CONV_nocbam', + in_channels=3, + channels=64, + ppm_channels=96, + norm_cfg=norm_cfg, + align_corners=False, + init_cfg=None), + decode_head=dict( + type='DDRHead', + in_channels=64 * 4, + channels=128, + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', class_weight=class_weight, loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + auxiliary_head=dict( + type='DDRHead', + in_channels=64 * 4, + channels=128, + dropout_ratio=0., + num_classes=4, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='LovaszLoss', class_weight=class_weight, loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +iters = 6000 +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0.0001, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] + +# training schedule for 120k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=iters, val_interval=100) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=5, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', + by_epoch=False, + interval=100, + max_keep_ckpts=2, + save_best='mDice'), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +randomness = dict(seed=304) diff --git a/mmseg/models/backbones/ddrnet.py b/mmseg/models/backbones/ddrnet.py index 4508aade82..a49a12fa6b 100644 --- a/mmseg/models/backbones/ddrnet.py +++ b/mmseg/models/backbones/ddrnet.py @@ -1,13 +1,110 @@ # Copyright (c) OpenMMLab. All rights reserved. +import torch import torch.nn as nn from mmcv.cnn import ConvModule, build_norm_layer from mmengine.model import BaseModule -from mmseg.models.utils import DAPPM, BasicBlock, Bottleneck, resize +from mmseg.models.utils import DAPPM, DAPPM_cbam, FAPPM_conv, FAPPM_avgp, BasicBlock, BasicBlock_cbam, \ + Bottleneck, Bottleneck_cbam, resize, BasicBlock_cbam_group, Bottleneck_cbam_group, FAPPM_conv_group, \ + BasicBlock_cbam_group_r8, Bottleneck_cbam_group_r8, FAPPM_conv_slim, FAPPM_conv_nocbam from mmseg.registry import MODELS from mmseg.utils import OptConfigType +class ChannelAttention(nn.Module): + + def __init__(self, in_planes, ratio=16): + super(ChannelAttention, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.max_pool = nn.AdaptiveMaxPool2d(1) + self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False) + self.relu1 = nn.ReLU() + self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) + max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) + out = avg_out + max_out + return self.sigmoid(out) + + +class ChannelAttention_group(nn.Module): + + def __init__(self, in_planes, ratio=16): + super(ChannelAttention_group, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.max_pool = nn.AdaptiveMaxPool2d(1) + self.fc1 = nn.Conv2d( + in_planes, in_planes // ratio, 1, bias=False, groups=4) + self.relu1 = nn.ReLU() + self.fc2 = nn.Conv2d( + in_planes // ratio, in_planes, 1, bias=False, groups=4) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) + max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) + out = avg_out + max_out + return self.sigmoid(out) + + +class SpatialAttention(nn.Module): + + def __init__(self, kernel_size=7): + super(SpatialAttention, self).__init__() + assert kernel_size in (3, 7), 'kernel size must be 3 or 7' + padding = 3 if kernel_size == 7 else 1 + self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + avg_out = torch.mean(x, dim=1, keepdim=True) + max_out, _ = torch.max(x, dim=1, keepdim=True) + x = torch.cat([avg_out, max_out], dim=1) + x = self.conv1(x) + return self.sigmoid(x) + + +class CBAM(nn.Module): + + def __init__(self, in_planes, ratio=16, kernel_size=7): + super(CBAM, self).__init__() + self.ca = ChannelAttention(in_planes, ratio) + self.sa = SpatialAttention(kernel_size) + + def forward(self, x): + out = x * self.ca(x) + result = out * self.sa(out) + return result + + +class CBAM_group(nn.Module): + + def __init__(self, in_planes, ratio=16, kernel_size=7): + super(CBAM_group, self).__init__() + self.ca = ChannelAttention_group(in_planes, ratio) + self.sa = SpatialAttention(kernel_size) + + def forward(self, x): + out = x * self.ca(x) + result = out * self.sa(out) + return result + + +class CBAM_group_r8(nn.Module): + + def __init__(self, in_planes, ratio=8, kernel_size=7): + super(CBAM_group_r8, self).__init__() + self.ca = ChannelAttention_group(in_planes, ratio) + self.sa = SpatialAttention(kernel_size) + + def forward(self, x): + out = x * self.ca(x) + result = out * self.sa(out) + return result + + @MODELS.register_module() class DDRNet(BaseModule): """DDRNet backbone. @@ -58,7 +155,7 @@ def __init__(self, self.context_branch_layers.append( self._make_layer( block=BasicBlock if i < 2 else Bottleneck, - inplanes=channels * 2**(i + 1), + inplanes=channels * 2 ** (i + 1), planes=channels * 8 if i > 0 else channels * 4, num_blocks=2 if i < 2 else 1, stride=2)) @@ -220,3 +317,2044 @@ def forward(self, x): align_corners=self.align_corners) return (temp_context, x_s + x_c) if self.training else x_s + x_c + + +@MODELS.register_module() +class DDRNet_cbam(BaseModule): + """DDRNet backbone. + + This backbone is the implementation of `Deep Dual-resolution Networks for + Real-time and Accurate Semantic Segmentation of Road Scenes + `_. + Modified from https://github.com/ydhongHIT/DDRNet. + + Args: + in_channels (int): Number of input image channels. Default: 3. + channels: (int): The base channels of DDRNet. Default: 32. + ppm_channels (int): The channels of PPM module. Default: 128. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + norm_cfg (dict): Config dict to build norm layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + init_cfg (dict, optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels: int = 3, + channels: int = 32, + ppm_channels: int = 128, + align_corners: bool = False, + norm_cfg: OptConfigType = dict(type='BN', requires_grad=True), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + + self.in_channels = in_channels + self.ppm_channels = ppm_channels + + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + + # stage 0-2 + self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2) + self.relu = nn.ReLU() + + # low resolution(context) branch + self.context_branch_layers = nn.ModuleList() + for i in range(3): + self.context_branch_layers.append( + self._make_layer( + block=BasicBlock_cbam if i < 2 else Bottleneck_cbam, + inplanes=channels * 2 ** (i + 1), + planes=channels * 8 if i > 0 else channels * 4, + num_blocks=2 if i < 2 else 1, + stride=2)) + + # bilateral fusion + self.compression_1 = ConvModule( + channels * 4, + channels * 2, + kernel_size=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam6 = CBAM(channels * 2) # 64 + + self.down_1 = ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam7 = CBAM(channels * 4) # 128 + + self.compression_2 = ConvModule( + channels * 8, + channels * 2, + kernel_size=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam8 = CBAM(channels * 2) # 64 + + self.down_2 = nn.Sequential( + ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels * 4, + channels * 8, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=None)) + self.cbam9 = CBAM(channels * 8) # 128 + + # high resolution(spatial) branch + self.spatial_branch_layers = nn.ModuleList() + for i in range(3): + self.spatial_branch_layers.append( + self._make_layer( + block=BasicBlock_cbam if i < 2 else Bottleneck_cbam, + inplanes=channels * 2, + planes=channels * 2, + num_blocks=2 if i < 2 else 1, + )) + + self.spp = DAPPM_cbam( + channels * 16, ppm_channels, channels * 4, num_scales=5) + self.cbam13 = CBAM(channels * 4) # 128 √ + + def _make_stem_layer(self, in_channels, channels, num_blocks): + layers = [ + ConvModule( + in_channels, + channels, # 32 + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels, + channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + CBAM(channels) + ] + + layers.extend([ + self._make_layer(BasicBlock_cbam, channels, channels, num_blocks), + nn.ReLU(), + self._make_layer( + BasicBlock_cbam, channels, channels * 2, num_blocks, stride=2), + nn.ReLU(), + ]) + + return nn.Sequential(*layers) + + def _make_layer(self, block, inplanes, planes, num_blocks, stride=1): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, planes * block.expansion)[1]) + + layers = [ + block( + in_channels=inplanes, + channels=planes, + stride=stride, + downsample=downsample) + ] + inplanes = planes * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + in_channels=inplanes, + channels=planes, + stride=1, + norm_cfg=self.norm_cfg, + act_cfg_out=None if i == num_blocks - 1 else self.act_cfg)) + + return nn.Sequential(*layers) + + def forward(self, x): + """Forward function.""" + out_size = (x.shape[-2] // 8, x.shape[-1] // 8) + + # stage 0-2 + x = self.stem(x) + + # stage3 + x_c = self.context_branch_layers[0](x) + x_s = self.spatial_branch_layers[0](x) + comp_c = self.compression_1(self.relu(x_c)) + x_c += self.down_1(self.relu(x_s)) + x_c = self.cbam7(x_c) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + x_s = self.cbam6(x_s) + if self.training: + temp_context = x_s.clone() + + # stage4 + x_c = self.context_branch_layers[1](self.relu(x_c)) + x_s = self.spatial_branch_layers[1](self.relu(x_s)) + comp_c = self.compression_2(self.relu(x_c)) + x_c += self.down_2(self.relu(x_s)) + x_c = self.cbam9(x_c) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + x_s = self.cbam8(x_s) + + # stage5 + x_s = self.spatial_branch_layers[2](self.relu(x_s)) + x_c = self.context_branch_layers[2](self.relu(x_c)) + x_c = self.spp(x_c) + x_c = resize( + x_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + + add = self.cbam13(x_s + x_c) + return (temp_context, add) if self.training else add + + +@MODELS.register_module() +class FAPPM_CONV(BaseModule): + """DDRNet backbone. + + This backbone is the implementation of `Deep Dual-resolution Networks for + Real-time and Accurate Semantic Segmentation of Road Scenes + `_. + Modified from https://github.com/ydhongHIT/DDRNet. + + Args: + in_channels (int): Number of input image channels. Default: 3. + channels: (int): The base channels of DDRNet. Default: 32. + ppm_channels (int): The channels of PPM module. Default: 128. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + norm_cfg (dict): Config dict to build norm layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + init_cfg (dict, optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels: int = 3, + channels: int = 32, + ppm_channels: int = 96, + align_corners: bool = False, + norm_cfg: OptConfigType = dict(type='BN', requires_grad=True), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + + self.in_channels = in_channels + self.ppm_channels = ppm_channels + + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + + # stage 0-2 + self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2) + self.relu = nn.ReLU() + + # low resolution(context) branch + self.context_branch_layers = nn.ModuleList() + for i in range(3): + self.context_branch_layers.append( + self._make_layer( + block=BasicBlock_cbam if i < 2 else Bottleneck_cbam, + inplanes=channels * 2 ** (i + 1), + planes=channels * 8 if i > 0 else channels * 4, + num_blocks=2 if i < 2 else 1, + stride=2)) + + # bilateral fusion + self.compression_1 = ConvModule( + channels * 4, + channels * 2, + kernel_size=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam6 = CBAM(channels * 2) # 64 + + self.down_1 = ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam7 = CBAM(channels * 4) # 128 + + self.compression_2 = ConvModule( + channels * 8, + channels * 2, + kernel_size=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam8 = CBAM(channels * 2) # 64 + + self.down_2 = nn.Sequential( + ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels * 4, + channels * 8, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=None)) + self.cbam9 = CBAM(channels * 8) # 128 + + # high resolution(spatial) branch + self.spatial_branch_layers = nn.ModuleList() + for i in range(3): + self.spatial_branch_layers.append( + self._make_layer( + block=BasicBlock_cbam if i < 2 else Bottleneck_cbam, + inplanes=channels * 2, + planes=channels * 2, + num_blocks=2 if i < 2 else 1, + )) + + self.spp = FAPPM_conv( + channels * 16, ppm_channels, channels * 4, num_scales=5) + self.cbam13 = CBAM(channels * 4) # 128 √ + + def _make_stem_layer(self, in_channels, channels, num_blocks): + layers = [ + ConvModule( + in_channels, + channels, # 32 + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels, + channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + CBAM(channels) + ] + + layers.extend([ + self._make_layer(BasicBlock_cbam, channels, channels, num_blocks), + nn.ReLU(), + self._make_layer( + BasicBlock_cbam, channels, channels * 2, num_blocks, stride=2), + nn.ReLU(), + ]) + + return nn.Sequential(*layers) + + def _make_layer(self, block, inplanes, planes, num_blocks, stride=1): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, planes * block.expansion)[1]) + + layers = [ + block( + in_channels=inplanes, + channels=planes, + stride=stride, + downsample=downsample) + ] + inplanes = planes * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + in_channels=inplanes, + channels=planes, + stride=1, + norm_cfg=self.norm_cfg, + act_cfg_out=None if i == num_blocks - 1 else self.act_cfg)) + + return nn.Sequential(*layers) + + def forward(self, x): + """Forward function.""" + out_size = (x.shape[-2] // 8, x.shape[-1] // 8) + + # stage 0-2 + x = self.stem(x) + + # stage3 + x_c = self.context_branch_layers[0](x) + x_s = self.spatial_branch_layers[0](x) + comp_c = self.compression_1(self.relu(x_c)) + x_c += self.down_1(self.relu(x_s)) + x_c = self.cbam7(x_c) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + x_s = self.cbam6(x_s) + if self.training: + temp_context = x_s.clone() + + # stage4 + x_c = self.context_branch_layers[1](self.relu(x_c)) + x_s = self.spatial_branch_layers[1](self.relu(x_s)) + comp_c = self.compression_2(self.relu(x_c)) + x_c += self.down_2(self.relu(x_s)) + x_c = self.cbam9(x_c) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + x_s = self.cbam8(x_s) + + # stage5 + x_s = self.spatial_branch_layers[2](self.relu(x_s)) + x_c = self.context_branch_layers[2](self.relu(x_c)) + x_c = self.spp(x_c) + x_c = resize( + x_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + + add = self.cbam13(x_s + x_c) + return (temp_context, add) if self.training else add + + +# ohem-FAPPM_conv-cbam-Lovasz-auxhead + + +@MODELS.register_module() +class FAPPM_CONV_nocbam(BaseModule): + """DDRNet backbone. + + This backbone is the implementation of `Deep Dual-resolution Networks for + Real-time and Accurate Semantic Segmentation of Road Scenes + `_. + Modified from https://github.com/ydhongHIT/DDRNet. + + Args: + in_channels (int): Number of input image channels. Default: 3. + channels: (int): The base channels of DDRNet. Default: 32. + ppm_channels (int): The channels of PPM module. Default: 128. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + norm_cfg (dict): Config dict to build norm layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + init_cfg (dict, optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels: int = 3, + channels: int = 32, + ppm_channels: int = 96, + align_corners: bool = False, + norm_cfg: OptConfigType = dict(type='BN', requires_grad=True), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + + self.in_channels = in_channels + self.ppm_channels = ppm_channels + + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + + # stage 0-2 + self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2) + self.relu = nn.ReLU() + + # low resolution(context) branch + self.context_branch_layers = nn.ModuleList() + for i in range(3): + self.context_branch_layers.append( + self._make_layer( + block=BasicBlock if i < 2 else Bottleneck, + inplanes=channels * 2 ** (i + 1), + planes=channels * 8 if i > 0 else channels * 4, + num_blocks=2 if i < 2 else 1, + stride=2)) + + # bilateral fusion + self.compression_1 = ConvModule( + channels * 4, + channels * 2, + kernel_size=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + + self.down_1 = ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + + self.compression_2 = ConvModule( + channels * 8, + channels * 2, + kernel_size=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + + self.down_2 = nn.Sequential( + ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels * 4, + channels * 8, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=None)) + + # high resolution(spatial) branch + self.spatial_branch_layers = nn.ModuleList() + for i in range(3): + self.spatial_branch_layers.append( + self._make_layer( + block=BasicBlock if i < 2 else Bottleneck, + inplanes=channels * 2, + planes=channels * 2, + num_blocks=2 if i < 2 else 1, + )) + + self.spp = FAPPM_conv_nocbam( + channels * 16, ppm_channels, channels * 4, num_scales=5) + + def _make_stem_layer(self, in_channels, channels, num_blocks): + layers = [ + ConvModule( + in_channels, + channels, # 32 + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels, + channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ] + + layers.extend([ + self._make_layer(BasicBlock, channels, channels, num_blocks), + nn.ReLU(), + self._make_layer( + BasicBlock, channels, channels * 2, num_blocks, stride=2), + nn.ReLU(), + ]) + + return nn.Sequential(*layers) + + def _make_layer(self, block, inplanes, planes, num_blocks, stride=1): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, planes * block.expansion)[1]) + + layers = [ + block( + in_channels=inplanes, + channels=planes, + stride=stride, + downsample=downsample) + ] + inplanes = planes * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + in_channels=inplanes, + channels=planes, + stride=1, + norm_cfg=self.norm_cfg, + act_cfg_out=None if i == num_blocks - 1 else self.act_cfg)) + + return nn.Sequential(*layers) + + def forward(self, x): + """Forward function.""" + out_size = (x.shape[-2] // 8, x.shape[-1] // 8) + + # stage 0-2 + x = self.stem(x) + + # stage3 + x_c = self.context_branch_layers[0](x) + x_s = self.spatial_branch_layers[0](x) + comp_c = self.compression_1(self.relu(x_c)) + x_c += self.down_1(self.relu(x_s)) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + if self.training: + temp_context = x_s.clone() + + # stage4 + x_c = self.context_branch_layers[1](self.relu(x_c)) + x_s = self.spatial_branch_layers[1](self.relu(x_s)) + comp_c = self.compression_2(self.relu(x_c)) + x_c += self.down_2(self.relu(x_s)) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + + # stage5 + x_s = self.spatial_branch_layers[2](self.relu(x_s)) + x_c = self.context_branch_layers[2](self.relu(x_c)) + x_c = self.spp(x_c) + x_c = resize( + x_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + + add = x_s + x_c + return (temp_context, add) if self.training else add + + +@MODELS.register_module() +class FAPPM_CONV_group(BaseModule): + """DDRNet backbone. + + This backbone is the implementation of `Deep Dual-resolution Networks for + Real-time and Accurate Semantic Segmentation of Road Scenes + `_. + Modified from https://github.com/ydhongHIT/DDRNet. + + Args: + in_channels (int): Number of input image channels. Default: 3. + channels: (int): The base channels of DDRNet. Default: 32. + ppm_channels (int): The channels of PPM module. Default: 128. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + norm_cfg (dict): Config dict to build norm layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + init_cfg (dict, optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels: int = 3, + channels: int = 32, + ppm_channels: int = 96, + align_corners: bool = False, + norm_cfg: OptConfigType = dict(type='BN', requires_grad=True), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + + self.in_channels = in_channels + self.ppm_channels = ppm_channels + + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + + # stage 0-2 + self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2) + self.relu = nn.ReLU() + + # low resolution(context) branch + self.context_branch_layers = nn.ModuleList() + for i in range(3): + self.context_branch_layers.append( + self._make_layer( + block=BasicBlock_cbam if i < 2 else Bottleneck_cbam, + inplanes=channels * 2 ** (i + 1), + planes=channels * 8 if i > 0 else channels * 4, + num_blocks=2 if i < 2 else 1, + stride=2)) + + # bilateral fusion + self.compression_1 = ConvModule( + channels * 4, + channels * 2, + kernel_size=1, + groups=4, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam6 = CBAM_group(channels * 2) # 64 + + self.down_1 = ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + groups=4, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam7 = CBAM_group(channels * 4) # 128 + + self.compression_2 = ConvModule( + channels * 8, + channels * 2, + kernel_size=1, + groups=4, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam8 = CBAM_group(channels * 2) # 64 + + self.down_2 = nn.Sequential( + ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + groups=4, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels * 4, + channels * 8, + kernel_size=3, + stride=2, + padding=1, + groups=4, + norm_cfg=self.norm_cfg, + act_cfg=None)) + self.cbam9 = CBAM_group(channels * 8) # 128 + + # high resolution(spatial) branch + self.spatial_branch_layers = nn.ModuleList() + for i in range(3): + self.spatial_branch_layers.append( + self._make_layer( + block=BasicBlock_cbam_group + if i < 2 else Bottleneck_cbam_group, + inplanes=channels * 2, + planes=channels * 2, + num_blocks=2 if i < 2 else 1, + )) + + self.spp = FAPPM_conv_group( + channels * 16, ppm_channels, channels * 4, num_scales=5) + self.cbam13 = CBAM_group(channels * 4) # 128 √ + + def _make_stem_layer(self, in_channels, channels, num_blocks): + layers = [ + ConvModule( + in_channels, + channels, # 32 + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels, + channels, + kernel_size=3, + stride=2, + padding=1, + groups=4, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + CBAM_group(channels) + ] + + layers.extend([ + self._make_layer(BasicBlock_cbam_group, channels, channels, + num_blocks), + nn.ReLU(), + self._make_layer( + BasicBlock_cbam_group, + channels, + channels * 2, + num_blocks, + stride=2), + nn.ReLU(), + ]) + + return nn.Sequential(*layers) + + def _make_layer(self, block, inplanes, planes, num_blocks, stride=1): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + groups=4, + bias=False), + build_norm_layer(self.norm_cfg, planes * block.expansion)[1]) + + layers = [ + block( + in_channels=inplanes, + channels=planes, + stride=stride, + downsample=downsample) + ] + inplanes = planes * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + in_channels=inplanes, + channels=planes, + stride=1, + norm_cfg=self.norm_cfg, + act_cfg_out=None if i == num_blocks - 1 else self.act_cfg)) + + return nn.Sequential(*layers) + + def forward(self, x): + """Forward function.""" + out_size = (x.shape[-2] // 8, x.shape[-1] // 8) + + # stage 0-2 + x = self.stem(x) + + # stage3 + x_c = self.context_branch_layers[0](x) + x_s = self.spatial_branch_layers[0](x) + comp_c = self.compression_1(self.relu(x_c)) + x_c += self.down_1(self.relu(x_s)) + x_c = self.cbam7(x_c) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + x_s = self.cbam6(x_s) + if self.training: + temp_context = x_s.clone() + + # stage4 + x_c = self.context_branch_layers[1](self.relu(x_c)) + x_s = self.spatial_branch_layers[1](self.relu(x_s)) + comp_c = self.compression_2(self.relu(x_c)) + x_c += self.down_2(self.relu(x_s)) + x_c = self.cbam9(x_c) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + x_s = self.cbam8(x_s) + + # stage5 + x_s = self.spatial_branch_layers[2](self.relu(x_s)) + x_c = self.context_branch_layers[2](self.relu(x_c)) + x_c = self.spp(x_c) + x_c = resize( + x_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + + add = self.cbam13(x_s + x_c) + return (temp_context, add) if self.training else add + + +@MODELS.register_module() +class FAPPM_CONV_slim(BaseModule): + """DDRNet backbone. + + This backbone is the implementation of `Deep Dual-resolution Networks for + Real-time and Accurate Semantic Segmentation of Road Scenes + `_. + Modified from https://github.com/ydhongHIT/DDRNet. + + Args: + in_channels (int): Number of input image channels. Default: 3. + channels: (int): The base channels of DDRNet. Default: 32. + ppm_channels (int): The channels of PPM module. Default: 128. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + norm_cfg (dict): Config dict to build norm layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + init_cfg (dict, optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels: int = 3, + channels: int = 32, + ppm_channels: int = 64, + align_corners: bool = False, + norm_cfg: OptConfigType = dict(type='BN', requires_grad=True), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + + self.in_channels = in_channels + self.ppm_channels = ppm_channels + + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + + # stage 0-2 + self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2) + self.relu = nn.ReLU() + + # low resolution(context) branch + self.context_branch_layers = nn.ModuleList() + for i in range(3): + self.context_branch_layers.append( + self._make_layer( + block=BasicBlock_cbam if i < 2 else Bottleneck_cbam, + inplanes=channels * 2 ** (i + 1), + planes=channels * 8 if i > 0 else channels * 4, + num_blocks=2 if i < 2 else 1, + stride=2)) + + # bilateral fusion + self.compression_1 = ConvModule( + channels * 4, + channels * 2, + kernel_size=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam6 = CBAM(channels * 2) # 64 + + self.down_1 = ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam7 = CBAM(channels * 4) # 128 + + self.compression_2 = ConvModule( + channels * 8, + channels * 2, + kernel_size=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam8 = CBAM(channels * 2) # 64 + + self.down_2 = nn.Sequential( + ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels * 4, + channels * 8, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=None)) + self.cbam9 = CBAM(channels * 8) # 128 + + # high resolution(spatial) branch + self.spatial_branch_layers = nn.ModuleList() + for i in range(3): + self.spatial_branch_layers.append( + self._make_layer( + block=BasicBlock_cbam if i < 2 else Bottleneck_cbam, + inplanes=channels * 2, + planes=channels * 2, + num_blocks=2 if i < 2 else 1, + )) + + self.spp = FAPPM_conv( + channels * 16, ppm_channels, channels * 4, num_scales=5) + self.cbam13 = CBAM(channels * 4) # 128 √ + + def _make_stem_layer(self, in_channels, channels, num_blocks): + layers = [ + ConvModule( + in_channels, + channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels, + channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + CBAM(channels) + ] + + layers.extend([ + self._make_layer(BasicBlock_cbam, channels, channels, num_blocks), + nn.ReLU(), + self._make_layer( + BasicBlock_cbam, channels, channels * 2, num_blocks, stride=2), + nn.ReLU(), + ]) + + return nn.Sequential(*layers) + + def _make_layer(self, block, inplanes, planes, num_blocks, stride=1): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, planes * block.expansion)[1]) + + layers = [ + block( + in_channels=inplanes, + channels=planes, + stride=stride, + downsample=downsample) + ] + inplanes = planes * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + in_channels=inplanes, + channels=planes, + stride=1, + norm_cfg=self.norm_cfg, + act_cfg_out=None if i == num_blocks - 1 else self.act_cfg)) + + return nn.Sequential(*layers) + + def forward(self, x): + """Forward function.""" + out_size = (x.shape[-2] // 8, x.shape[-1] // 8) + + # stage 0-2 + x = self.stem(x) + + # stage3 + x_c = self.context_branch_layers[0](x) + x_s = self.spatial_branch_layers[0](x) + comp_c = self.compression_1(self.relu(x_c)) + x_c += self.down_1(self.relu(x_s)) + x_c = self.cbam7(x_c) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + x_s = self.cbam6(x_s) + if self.training: + temp_context = x_s.clone() + + # stage4 + x_c = self.context_branch_layers[1](self.relu(x_c)) + x_s = self.spatial_branch_layers[1](self.relu(x_s)) + comp_c = self.compression_2(self.relu(x_c)) + x_c += self.down_2(self.relu(x_s)) + x_c = self.cbam9(x_c) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + x_s = self.cbam8(x_s) + + # stage5 + x_s = self.spatial_branch_layers[2](self.relu(x_s)) + x_c = self.context_branch_layers[2](self.relu(x_c)) + x_c = self.spp(x_c) + x_c = resize( + x_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + + add = self.cbam13(x_s + x_c) + return (temp_context, add) if self.training else add + + +@MODELS.register_module() +class FAPPM_CONV_slim2(BaseModule): + """DDRNet backbone. + + This backbone is the implementation of `Deep Dual-resolution Networks for + Real-time and Accurate Semantic Segmentation of Road Scenes + `_. + Modified from https://github.com/ydhongHIT/DDRNet. + + Args: + in_channels (int): Number of input image channels. Default: 3. + channels: (int): The base channels of DDRNet. Default: 32. + ppm_channels (int): The channels of PPM module. Default: 128. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + norm_cfg (dict): Config dict to build norm layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + init_cfg (dict, optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels: int = 3, + channels: int = 32, + ppm_channels: int = 96, + align_corners: bool = False, + norm_cfg: OptConfigType = dict(type='BN', requires_grad=True), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + + self.in_channels = in_channels + self.ppm_channels = ppm_channels + + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + + # stage 0-2 + self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2) + self.relu = nn.ReLU() + + # low resolution(context) branch + self.context_branch_layers = nn.ModuleList() + for i in range(3): + self.context_branch_layers.append( + self._make_layer( + block=BasicBlock_cbam if i < 2 else Bottleneck_cbam, + inplanes=channels * 2 ** (i + 1), + planes=channels * 8 if i > 0 else channels * 4, + num_blocks=2 if i < 2 else 1, + stride=2)) + + # bilateral fusion + self.compression_1 = ConvModule( + channels * 4, + channels * 2, + kernel_size=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + + self.down_1 = ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + + self.compression_2 = ConvModule( + channels * 8, + channels * 2, + kernel_size=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + + self.down_2 = nn.Sequential( + ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels * 4, + channels * 8, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=None)) + + # high resolution(spatial) branch + self.spatial_branch_layers = nn.ModuleList() + for i in range(3): + self.spatial_branch_layers.append( + self._make_layer( + block=BasicBlock_cbam if i < 2 else Bottleneck_cbam, + inplanes=channels * 2, + planes=channels * 2, + num_blocks=2 if i < 2 else 1, + )) + + self.spp = FAPPM_conv_slim( + channels * 16, ppm_channels, channels * 4, num_scales=5) + + def _make_stem_layer(self, in_channels, channels, num_blocks): + layers = [ + ConvModule( + in_channels, + channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels, + channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ] + + layers.extend([ + self._make_layer(BasicBlock_cbam, channels, channels, num_blocks), + nn.ReLU(), + self._make_layer( + BasicBlock_cbam, channels, channels * 2, num_blocks, stride=2), + nn.ReLU(), + ]) + + return nn.Sequential(*layers) + + def _make_layer(self, block, inplanes, planes, num_blocks, stride=1): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, planes * block.expansion)[1]) + + layers = [ + block( + in_channels=inplanes, + channels=planes, + stride=stride, + downsample=downsample) + ] + inplanes = planes * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + in_channels=inplanes, + channels=planes, + stride=1, + norm_cfg=self.norm_cfg, + act_cfg_out=None if i == num_blocks - 1 else self.act_cfg)) + + return nn.Sequential(*layers) + + def forward(self, x): + """Forward function.""" + out_size = (x.shape[-2] // 8, x.shape[-1] // 8) + + # stage 0-2 + x = self.stem(x) + + # stage3 + x_c = self.context_branch_layers[0](x) + x_s = self.spatial_branch_layers[0](x) + comp_c = self.compression_1(self.relu(x_c)) + x_c += self.down_1(self.relu(x_s)) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + if self.training: + temp_context = x_s.clone() + + # stage4 + x_c = self.context_branch_layers[1](self.relu(x_c)) + x_s = self.spatial_branch_layers[1](self.relu(x_s)) + comp_c = self.compression_2(self.relu(x_c)) + x_c += self.down_2(self.relu(x_s)) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + + # stage5 + x_s = self.spatial_branch_layers[2](self.relu(x_s)) + x_c = self.context_branch_layers[2](self.relu(x_c)) + x_c = self.spp(x_c) + x_c = resize( + x_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + + add = x_s + x_c + return (temp_context, add) if self.training else add + + +@MODELS.register_module() +class FAPPM_CONV_group_slim(BaseModule): + """DDRNet backbone. + + This backbone is the implementation of `Deep Dual-resolution Networks for + Real-time and Accurate Semantic Segmentation of Road Scenes + `_. + Modified from https://github.com/ydhongHIT/DDRNet. + + Args: + in_channels (int): Number of input image channels. Default: 3. + channels: (int): The base channels of DDRNet. Default: 32. + ppm_channels (int): The channels of PPM module. Default: 128. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + norm_cfg (dict): Config dict to build norm layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + init_cfg (dict, optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels: int = 3, + channels: int = 32, + ppm_channels: int = 64, + align_corners: bool = False, + norm_cfg: OptConfigType = dict(type='BN', requires_grad=True), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + + self.in_channels = in_channels + self.ppm_channels = ppm_channels + + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + + # stage 0-2 + self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2) + self.relu = nn.ReLU() + + # low resolution(context) branch + self.context_branch_layers = nn.ModuleList() + for i in range(3): + self.context_branch_layers.append( + self._make_layer( + block=BasicBlock_cbam if i < 2 else Bottleneck_cbam, + inplanes=channels * 2 ** (i + 1), + planes=channels * 8 if i > 0 else channels * 4, + num_blocks=2 if i < 2 else 1, + stride=2)) + + # bilateral fusion + self.compression_1 = ConvModule( + channels * 4, + channels * 2, + kernel_size=1, + groups=4, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam6 = CBAM_group(channels * 2) # 64 + + self.down_1 = ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + groups=4, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam7 = CBAM_group(channels * 4) # 128 + + self.compression_2 = ConvModule( + channels * 8, + channels * 2, + kernel_size=1, + groups=4, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam8 = CBAM_group(channels * 2) # 64 + + self.down_2 = nn.Sequential( + ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + groups=4, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels * 4, + channels * 8, + kernel_size=3, + stride=2, + padding=1, + groups=4, + norm_cfg=self.norm_cfg, + act_cfg=None)) + self.cbam9 = CBAM_group(channels * 8) # 128 + + # high resolution(spatial) branch + self.spatial_branch_layers = nn.ModuleList() + for i in range(3): + self.spatial_branch_layers.append( + self._make_layer( + block=BasicBlock_cbam_group + if i < 2 else Bottleneck_cbam_group, + inplanes=channels * 2, + planes=channels * 2, + num_blocks=2 if i < 2 else 1, + )) + + self.spp = FAPPM_conv_group( + channels * 16, ppm_channels, channels * 4, num_scales=5) + self.cbam13 = CBAM_group(channels * 4) # 128 √ + + def _make_stem_layer(self, in_channels, channels, num_blocks): + layers = [ + ConvModule( + in_channels, + channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels, + channels, + kernel_size=3, + stride=2, + padding=1, + groups=4, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + CBAM_group_r8(channels) # ratio为16,32/16=2,无法再除以4,所以把ratio改为8 + ] + + layers.extend([ + self._make_layer(BasicBlock_cbam_group_r8, channels, channels, + num_blocks), + nn.ReLU(), + self._make_layer( + BasicBlock_cbam_group_r8, + channels, + channels * 2, + num_blocks, + stride=2), + nn.ReLU(), + ]) + + return nn.Sequential(*layers) + + def _make_layer(self, block, inplanes, planes, num_blocks, stride=1): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + groups=4, + bias=False), + build_norm_layer(self.norm_cfg, planes * block.expansion)[1]) + + layers = [ + block( + in_channels=inplanes, + channels=planes, + stride=stride, + downsample=downsample) + ] + inplanes = planes * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + in_channels=inplanes, + channels=planes, + stride=1, + norm_cfg=self.norm_cfg, + act_cfg_out=None if i == num_blocks - 1 else self.act_cfg)) + + return nn.Sequential(*layers) + + def forward(self, x): + """Forward function.""" + out_size = (x.shape[-2] // 8, x.shape[-1] // 8) + + # stage 0-2 + x = self.stem(x) + + # stage3 + x_c = self.context_branch_layers[0](x) + x_s = self.spatial_branch_layers[0](x) + comp_c = self.compression_1(self.relu(x_c)) + x_c += self.down_1(self.relu(x_s)) + x_c = self.cbam7(x_c) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + x_s = self.cbam6(x_s) + if self.training: + temp_context = x_s.clone() + + # stage4 + x_c = self.context_branch_layers[1](self.relu(x_c)) + x_s = self.spatial_branch_layers[1](self.relu(x_s)) + comp_c = self.compression_2(self.relu(x_c)) + x_c += self.down_2(self.relu(x_s)) + x_c = self.cbam9(x_c) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + x_s = self.cbam8(x_s) + + # stage5 + x_s = self.spatial_branch_layers[2](self.relu(x_s)) + x_c = self.context_branch_layers[2](self.relu(x_c)) + x_c = self.spp(x_c) + x_c = resize( + x_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + + add = self.cbam13(x_s + x_c) + return (temp_context, add) if self.training else add + + +@MODELS.register_module() +class FAPPM_AVGP(BaseModule): + """DDRNet backbone. + + This backbone is the implementation of `Deep Dual-resolution Networks for + Real-time and Accurate Semantic Segmentation of Road Scenes + `_. + Modified from https://github.com/ydhongHIT/DDRNet. + + Args: + in_channels (int): Number of input image channels. Default: 3. + channels: (int): The base channels of DDRNet. Default: 32. + ppm_channels (int): The channels of PPM module. Default: 128. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + norm_cfg (dict): Config dict to build norm layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + init_cfg (dict, optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels: int = 3, + channels: int = 32, + ppm_channels: int = 96, + align_corners: bool = False, + norm_cfg: OptConfigType = dict(type='BN', requires_grad=True), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + + self.in_channels = in_channels + self.ppm_channels = ppm_channels + + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + + # stage 0-2 + self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2) + self.relu = nn.ReLU() + + # low resolution(context) branch + self.context_branch_layers = nn.ModuleList() + for i in range(3): + self.context_branch_layers.append( + self._make_layer( + block=BasicBlock_cbam if i < 2 else Bottleneck_cbam, + inplanes=channels * 2 ** (i + 1), + planes=channels * 8 if i > 0 else channels * 4, + num_blocks=2 if i < 2 else 1, + stride=2)) + + # bilateral fusion + self.compression_1 = ConvModule( + channels * 4, + channels * 2, + kernel_size=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam6 = CBAM(channels * 2) # 64 + + self.down_1 = ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam7 = CBAM(channels * 4) # 128 + + self.compression_2 = ConvModule( + channels * 8, + channels * 2, + kernel_size=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.cbam8 = CBAM(channels * 2) # 64 + + self.down_2 = nn.Sequential( + ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels * 4, + channels * 8, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=None)) + self.cbam9 = CBAM(channels * 8) # 128 + + # high resolution(spatial) branch + self.spatial_branch_layers = nn.ModuleList() + for i in range(3): + self.spatial_branch_layers.append( + self._make_layer( + block=BasicBlock_cbam if i < 2 else Bottleneck_cbam, + inplanes=channels * 2, + planes=channels * 2, + num_blocks=2 if i < 2 else 1, + )) + + self.spp = FAPPM_avgp(channels * 16, ppm_channels, channels * 4) + self.cbam13 = CBAM(channels * 4) # 128 √ + + def _make_stem_layer(self, in_channels, channels, num_blocks): + layers = [ + ConvModule( + in_channels, + channels, # 32 + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels, + channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + CBAM(channels) + ] + + layers.extend([ + self._make_layer(BasicBlock_cbam, channels, channels, num_blocks), + nn.ReLU(), + self._make_layer( + BasicBlock_cbam, channels, channels * 2, num_blocks, stride=2), + nn.ReLU(), + ]) + + return nn.Sequential(*layers) + + def _make_layer(self, block, inplanes, planes, num_blocks, stride=1): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, planes * block.expansion)[1]) + + layers = [ + block( + in_channels=inplanes, + channels=planes, + stride=stride, + downsample=downsample) + ] + inplanes = planes * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + in_channels=inplanes, + channels=planes, + stride=1, + norm_cfg=self.norm_cfg, + act_cfg_out=None if i == num_blocks - 1 else self.act_cfg)) + + return nn.Sequential(*layers) + + def forward(self, x): + """Forward function.""" + out_size = (x.shape[-2] // 8, x.shape[-1] // 8) + + # stage 0-2 + x = self.stem(x) + + # stage3 + x_c = self.context_branch_layers[0](x) + x_s = self.spatial_branch_layers[0](x) + comp_c = self.compression_1(self.relu(x_c)) + x_c += self.down_1(self.relu(x_s)) + x_c = self.cbam7(x_c) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + x_s = self.cbam6(x_s) + if self.training: + temp_context = x_s.clone() + + # stage4 + x_c = self.context_branch_layers[1](self.relu(x_c)) + x_s = self.spatial_branch_layers[1](self.relu(x_s)) + comp_c = self.compression_2(self.relu(x_c)) + x_c += self.down_2(self.relu(x_s)) + x_c = self.cbam9(x_c) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + x_s = self.cbam8(x_s) + + # stage5 + x_s = self.spatial_branch_layers[2](self.relu(x_s)) + x_c = self.context_branch_layers[2](self.relu(x_c)) + x_c = self.spp(x_c) + x_c = resize( + x_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + + add = self.cbam13(x_s + x_c) + return (temp_context, add) if self.training else add + + +@MODELS.register_module() +class FAPPM_AVGP_slim(BaseModule): + """DDRNet backbone. + + This backbone is the implementation of `Deep Dual-resolution Networks for + Real-time and Accurate Semantic Segmentation of Road Scenes + `_. + Modified from https://github.com/ydhongHIT/DDRNet. + + Args: + in_channels (int): Number of input image channels. Default: 3. + channels: (int): The base channels of DDRNet. Default: 32. + ppm_channels (int): The channels of PPM module. Default: 128. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + norm_cfg (dict): Config dict to build norm layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + init_cfg (dict, optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels: int = 3, + channels: int = 32, + ppm_channels: int = 64, + align_corners: bool = False, + norm_cfg: OptConfigType = dict(type='BN', requires_grad=True), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + + self.in_channels = in_channels + self.ppm_channels = ppm_channels + + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + + # stage 0-2 + self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2) + self.relu = nn.ReLU() + + # low resolution(context) branch + self.context_branch_layers = nn.ModuleList() + for i in range(3): + self.context_branch_layers.append( + self._make_layer( + block=BasicBlock_cbam if i < 2 else Bottleneck_cbam, + inplanes=channels * 2 ** (i + 1), + planes=channels * 8 if i > 0 else channels * 4, + num_blocks=2 if i < 2 else 1, + stride=2)) + + # bilateral fusion + self.compression_1 = ConvModule( + channels * 4, + channels * 2, + kernel_size=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + + self.down_1 = ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + + self.compression_2 = ConvModule( + channels * 8, + channels * 2, + kernel_size=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + + self.down_2 = nn.Sequential( + ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels * 4, + channels * 8, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=None)) + + # high resolution(spatial) branch + self.spatial_branch_layers = nn.ModuleList() + for i in range(3): + self.spatial_branch_layers.append( + self._make_layer( + block=BasicBlock_cbam if i < 2 else Bottleneck_cbam, + inplanes=channels * 2, + planes=channels * 2, + num_blocks=2 if i < 2 else 1, + )) + + self.spp = FAPPM_avgp(channels * 16, ppm_channels, channels * 4) + + def _make_stem_layer(self, in_channels, channels, num_blocks): + layers = [ + ConvModule( + in_channels, + channels, # 32 + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels, + channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ] + + layers.extend([ + self._make_layer(BasicBlock_cbam, channels, channels, num_blocks), + nn.ReLU(), + self._make_layer( + BasicBlock_cbam, channels, channels * 2, num_blocks, stride=2), + nn.ReLU(), + ]) + + return nn.Sequential(*layers) + + def _make_layer(self, block, inplanes, planes, num_blocks, stride=1): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, planes * block.expansion)[1]) + + layers = [ + block( + in_channels=inplanes, + channels=planes, + stride=stride, + downsample=downsample) + ] + inplanes = planes * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + in_channels=inplanes, + channels=planes, + stride=1, + norm_cfg=self.norm_cfg, + act_cfg_out=None if i == num_blocks - 1 else self.act_cfg)) + + return nn.Sequential(*layers) + + def forward(self, x): + """Forward function.""" + out_size = (x.shape[-2] // 8, x.shape[-1] // 8) + + # stage 0-2 + x = self.stem(x) + + # stage3 + x_c = self.context_branch_layers[0](x) + x_s = self.spatial_branch_layers[0](x) + comp_c = self.compression_1(self.relu(x_c)) + x_c += self.down_1(self.relu(x_s)) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + x_s = self.cbam6(x_s) + if self.training: + temp_context = x_s.clone() + + # stage4 + x_c = self.context_branch_layers[1](self.relu(x_c)) + x_s = self.spatial_branch_layers[1](self.relu(x_s)) + comp_c = self.compression_2(self.relu(x_c)) + x_c += self.down_2(self.relu(x_s)) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + + # stage5 + x_s = self.spatial_branch_layers[2](self.relu(x_s)) + x_c = self.context_branch_layers[2](self.relu(x_c)) + x_c = self.spp(x_c) + x_c = resize( + x_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + + add = x_s + x_c + return (temp_context, add) if self.training else add diff --git a/mmseg/models/utils/__init__.py b/mmseg/models/utils/__init__.py index c0751b17c0..c617421840 100644 --- a/mmseg/models/utils/__init__.py +++ b/mmseg/models/utils/__init__.py @@ -1,11 +1,13 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .basic_block import BasicBlock, Bottleneck +from .basic_block import BasicBlock, BasicBlock_cbam, Bottleneck, Bottleneck_cbam, BasicBlock_cbam_group,\ + Bottleneck_cbam_group, BasicBlock_cbam_group_r8, Bottleneck_cbam_group_r8 + from .embed import PatchEmbed from .encoding import Encoding from .inverted_residual import InvertedResidual, InvertedResidualV3 from .make_divisible import make_divisible from .point_sample import get_uncertain_point_coords_with_randomness -from .ppm import DAPPM, PAPPM +from .ppm import DAPPM, PAPPM, DAPPM_cbam, FAPPM_conv, FAPPM_conv_nocbam, FAPPM_avgp, FAPPM_conv_group, FAPPM_conv_slim from .res_layer import ResLayer from .se_layer import SELayer from .self_attention_block import SelfAttentionBlock @@ -21,7 +23,10 @@ 'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual', 'UpConvBlock', 'InvertedResidualV3', 'SELayer', 'PatchEmbed', 'nchw_to_nlc', 'nlc_to_nchw', 'nchw2nlc2nchw', 'nlc2nchw2nlc', 'Encoding', - 'Upsample', 'resize', 'DAPPM', 'PAPPM', 'BasicBlock', 'Bottleneck', - 'cross_attn_layer', 'LayerNorm2d', 'MLP', - 'get_uncertain_point_coords_with_randomness' + 'Upsample', 'resize', 'DAPPM', 'PAPPM', 'cross_attn_layer', 'LayerNorm2d', + 'MLP', 'get_uncertain_point_coords_with_randomness', 'DAPPM_cbam', + 'FAPPM_conv', 'FAPPM_avgp', 'BasicBlock', 'BasicBlock_cbam', 'Bottleneck', + 'Bottleneck_cbam', 'FAPPM_conv_group', 'BasicBlock_cbam_group', + 'Bottleneck_cbam_group', 'BasicBlock_cbam_group_r8', + 'Bottleneck_cbam_group_r8', 'FAPPM_conv_slim', 'FAPPM_conv_nocbam' ] diff --git a/mmseg/models/utils/basic_block.py b/mmseg/models/utils/basic_block.py index 4e1ad8146d..b54c1361dd 100644 --- a/mmseg/models/utils/basic_block.py +++ b/mmseg/models/utils/basic_block.py @@ -1,15 +1,110 @@ # Copyright (c) OpenMMLab. All rights reserved. from typing import Optional - +import torch import torch.nn as nn from mmcv.cnn import ConvModule from mmengine.model import BaseModule from torch import Tensor +import torch.nn.functional as F from mmseg.registry import MODELS from mmseg.utils import OptConfigType +class ChannelAttention(nn.Module): + + def __init__(self, in_planes, ratio=16): + super(ChannelAttention, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.max_pool = nn.AdaptiveMaxPool2d(1) + self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False) + self.relu1 = nn.ReLU() + self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) + max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) + out = avg_out + max_out + return self.sigmoid(out) + + +class ChannelAttention_group(nn.Module): + + def __init__(self, in_planes, ratio=16): + super(ChannelAttention_group, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.max_pool = nn.AdaptiveMaxPool2d(1) + self.fc1 = nn.Conv2d( + in_planes, in_planes // ratio, 1, bias=False, groups=4) + self.relu1 = nn.ReLU() + self.fc2 = nn.Conv2d( + in_planes // ratio, in_planes, 1, bias=False, groups=4) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) + max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) + out = avg_out + max_out + return self.sigmoid(out) + + +class SpatialAttention(nn.Module): + + def __init__(self, kernel_size=7): + super(SpatialAttention, self).__init__() + assert kernel_size in (3, 7), 'kernel size must be 3 or 7' + padding = 3 if kernel_size == 7 else 1 + self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + avg_out = torch.mean(x, dim=1, keepdim=True) + max_out, _ = torch.max(x, dim=1, keepdim=True) + x = torch.cat([avg_out, max_out], dim=1) + x = self.conv1(x) + return self.sigmoid(x) + + +class CBAM(nn.Module): + + def __init__(self, in_planes, ratio=16, kernel_size=7): + super(CBAM, self).__init__() + self.ca = ChannelAttention(in_planes, ratio) + self.sa = SpatialAttention(kernel_size) + + def forward(self, x): + out = x * self.ca(x) + result = out * self.sa(out) + return result + + +class CBAM_group(nn.Module): + + def __init__(self, in_planes, ratio=16, kernel_size=7): + super(CBAM_group, self).__init__() + self.ca = ChannelAttention_group(in_planes, ratio) + self.sa = SpatialAttention(kernel_size) + + def forward(self, x): + out = x * self.ca(x) + result = out * self.sa(out) + return result + + +class CBAM_group_r8(nn.Module): + + def __init__(self, in_planes, ratio=8, kernel_size=7): + super(CBAM_group_r8, self).__init__() + self.ca = ChannelAttention_group(in_planes, ratio) + self.sa = SpatialAttention(kernel_size) + + def forward(self, x): + out = x * self.ca(x) + result = out * self.sa(out) + return result + + class BasicBlock(BaseModule): """Basic block from `ResNet `_. @@ -141,3 +236,563 @@ def forward(self, x: Tensor) -> Tensor: out = self.act(out) return out + + +class GroupBatchnorm2d(nn.Module): + + def __init__(self, c_num: int, group_num: int = 16, eps: float = 1e-10): + super(GroupBatchnorm2d, self).__init__() + assert c_num >= group_num + self.group_num = group_num + self.weight = nn.Parameter(torch.randn(c_num, 1, 1)) + self.bias = nn.Parameter(torch.zeros(c_num, 1, 1)) + self.eps = eps + + def forward(self, x): + N, C, H, W = x.size() + x = x.view(N, self.group_num, -1) + mean = x.mean(dim=2, keepdim=True) + std = x.std(dim=2, keepdim=True) + x = (x - mean) / (std + self.eps) + x = x.view(N, C, H, W) + return x * self.weight + self.bias + + +class SRU(nn.Module): + + def __init__(self, + oup_channels: int, + group_num: int = 16, + gate_treshold: float = 0.5, + torch_gn: bool = True): + super().__init__() + + self.gn = nn.GroupNorm( + num_channels=oup_channels, + num_groups=group_num) if torch_gn else GroupBatchnorm2d( + c_num=oup_channels, group_num=group_num) + self.gate_treshold = gate_treshold + self.sigomid = nn.Sigmoid() + + def forward(self, x): + gn_x = self.gn(x) + w_gamma = self.gn.weight / sum(self.gn.weight) + w_gamma = w_gamma.view(1, -1, 1, 1) + reweigts = self.sigomid(gn_x * w_gamma) + # Gate + w1 = torch.where(reweigts > self.gate_treshold, + torch.ones_like(reweigts), + reweigts) # 大于门限值的设为1,否则保留原值 + w2 = torch.where(reweigts > self.gate_treshold, + torch.zeros_like(reweigts), + reweigts) # 大于门限值的设为0,否则保留原值 + x_1 = w1 * x + x_2 = w2 * x + y = self.reconstruct(x_1, x_2) + return y + + def reconstruct(self, x_1, x_2): + x_11, x_12 = torch.split(x_1, x_1.size(1) // 2, dim=1) + x_21, x_22 = torch.split(x_2, x_2.size(1) // 2, dim=1) + return torch.cat([x_11 + x_22, x_12 + x_21], dim=1) + + +class CRU(nn.Module): + ''' + alpha: 0`_. + + Args: + in_channels (int): Input channels. + channels (int): Output channels. + stride (int): Stride of the first block. Default: 1. + downsample (nn.Module, optional): Downsample operation on identity. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict, optional): Config dict for activation layer in + ConvModule. Default: dict(type='ReLU', inplace=True). + act_cfg_out (dict, optional): Config dict for activation layer at the + last of the block. Default: None. + init_cfg (dict, optional): Initialization config dict. Default: None. + """ + + expansion = 1 + + def __init__(self, + in_channels: int, + channels: int, + stride: int = 1, + downsample: nn.Module = None, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + act_cfg_out: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + # self.scconv1 = ScConv(channels) + # self.scconv2 = ScConv(channels) + self.conv1 = ConvModule( + in_channels, + channels, + kernel_size=3, + stride=stride, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.conv2 = ConvModule( + channels, + channels, + kernel_size=3, + padding=1, + norm_cfg=norm_cfg, + act_cfg=None) + self.downsample = downsample + self.cbam = CBAM(channels) + if act_cfg_out: + self.act = MODELS.build(act_cfg_out) + + def forward(self, x: Tensor) -> Tensor: + residual = x + out = self.conv1(x) + out = self.conv2(out) + out = self.cbam(out) # √ + + if self.downsample: + residual = self.downsample(x) + out = out + residual + + if hasattr(self, 'act'): + out = self.act(out) + + return out + + +class Bottleneck_cbam(BaseModule): + """Bottleneck block from `ResNet `_. + + Args: + in_channels (int): Input channels. + channels (int): Output channels. + stride (int): Stride of the first block. Default: 1. + downsample (nn.Module, optional): Downsample operation on identity. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict, optional): Config dict for activation layer in + ConvModule. Default: dict(type='ReLU', inplace=True). + act_cfg_out (dict, optional): Config dict for activation layer at + the last of the block. Default: None. + init_cfg (dict, optional): Initialization config dict. Default: None. + """ + + expansion = 2 + + def __init__(self, + in_channels: int, + channels: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + act_cfg_out: OptConfigType = None, + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + self.conv1 = ConvModule( + in_channels, channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg) + self.conv2 = ConvModule( + channels, + channels, + 3, + stride, + 1, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.conv3 = ConvModule( + channels, + channels * self.expansion, + 1, + norm_cfg=norm_cfg, + act_cfg=None) + self.cbam2 = CBAM(channels) + self.cbam3 = CBAM(channels * self.expansion) + if act_cfg_out: + self.act = MODELS.build(act_cfg_out) + self.downsample = downsample + + def forward(self, x: Tensor) -> Tensor: + residual = x + + out = self.conv1(x) + out = self.conv2(out) + out = self.conv3(out) + out = self.cbam3(out) # √ + + if self.downsample: + residual = self.downsample(x) + out = out + residual + + if hasattr(self, 'act'): + out = self.act(out) + + return out + + +class BasicBlock_cbam_group(BaseModule): + """Basic block from `ResNet `_. + + Args: + in_channels (int): Input channels. + channels (int): Output channels. + stride (int): Stride of the first block. Default: 1. + downsample (nn.Module, optional): Downsample operation on identity. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict, optional): Config dict for activation layer in + ConvModule. Default: dict(type='ReLU', inplace=True). + act_cfg_out (dict, optional): Config dict for activation layer at the + last of the block. Default: None. + init_cfg (dict, optional): Initialization config dict. Default: None. + """ + + expansion = 1 + + def __init__(self, + in_channels: int, + channels: int, + stride: int = 1, + downsample: nn.Module = None, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + act_cfg_out: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + self.conv1 = ConvModule( + in_channels, + channels, + kernel_size=3, + stride=stride, + padding=1, + groups=4, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.conv2 = ConvModule( + channels, + channels, + kernel_size=3, + padding=1, + groups=4, + norm_cfg=norm_cfg, + act_cfg=None) + self.downsample = downsample + self.cbam = CBAM_group(channels) + if act_cfg_out: + self.act = MODELS.build(act_cfg_out) + + def forward(self, x: Tensor) -> Tensor: + residual = x + out = self.conv1(x) + out = self.conv2(out) + out = self.cbam(out) # √ + + if self.downsample: + residual = self.downsample(x) + out = out + residual + + if hasattr(self, 'act'): + out = self.act(out) + + return out + + +class Bottleneck_cbam_group(BaseModule): + """Bottleneck block from `ResNet `_. + + Args: + in_channels (int): Input channels. + channels (int): Output channels. + stride (int): Stride of the first block. Default: 1. + downsample (nn.Module, optional): Downsample operation on identity. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict, optional): Config dict for activation layer in + ConvModule. Default: dict(type='ReLU', inplace=True). + act_cfg_out (dict, optional): Config dict for activation layer at + the last of the block. Default: None. + init_cfg (dict, optional): Initialization config dict. Default: None. + """ + + expansion = 2 + + def __init__(self, + in_channels: int, + channels: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + act_cfg_out: OptConfigType = None, + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + self.conv1 = ConvModule( + in_channels, channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg) + self.conv2 = ConvModule( + channels, + channels, + 3, + stride, + 1, + groups=4, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.conv3 = ConvModule( + channels, + channels * self.expansion, + 1, + groups=4, + norm_cfg=norm_cfg, + act_cfg=None) + self.cbam2 = CBAM_group(channels) + self.cbam3 = CBAM_group(channels * self.expansion) + if act_cfg_out: + self.act = MODELS.build(act_cfg_out) + self.downsample = downsample + + def forward(self, x: Tensor) -> Tensor: + residual = x + + out = self.conv1(x) + out = self.conv2(out) + out = self.conv3(out) + out = self.cbam3(out) # √ + + if self.downsample: + residual = self.downsample(x) + out = out + residual + + if hasattr(self, 'act'): + out = self.act(out) + + return out + + +class BasicBlock_cbam_group_r8(BaseModule): + """Basic block from `ResNet `_. + + Args: + in_channels (int): Input channels. + channels (int): Output channels. + stride (int): Stride of the first block. Default: 1. + downsample (nn.Module, optional): Downsample operation on identity. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict, optional): Config dict for activation layer in + ConvModule. Default: dict(type='ReLU', inplace=True). + act_cfg_out (dict, optional): Config dict for activation layer at the + last of the block. Default: None. + init_cfg (dict, optional): Initialization config dict. Default: None. + """ + + expansion = 1 + + def __init__(self, + in_channels: int, + channels: int, + stride: int = 1, + downsample: nn.Module = None, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + act_cfg_out: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + self.conv1 = ConvModule( + in_channels, + channels, + kernel_size=3, + stride=stride, + padding=1, + groups=4, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.conv2 = ConvModule( + channels, + channels, + kernel_size=3, + padding=1, + groups=4, + norm_cfg=norm_cfg, + act_cfg=None) + self.downsample = downsample + self.cbam = CBAM_group_r8(channels) + if act_cfg_out: + self.act = MODELS.build(act_cfg_out) + + def forward(self, x: Tensor) -> Tensor: + residual = x + out = self.conv1(x) + out = self.conv2(out) + out = self.cbam(out) # √ + + if self.downsample: + residual = self.downsample(x) + out = out + residual + + if hasattr(self, 'act'): + out = self.act(out) + + return out + + +class Bottleneck_cbam_group_r8(BaseModule): + """Bottleneck block from `ResNet `_. + + Args: + in_channels (int): Input channels. + channels (int): Output channels. + stride (int): Stride of the first block. Default: 1. + downsample (nn.Module, optional): Downsample operation on identity. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict, optional): Config dict for activation layer in + ConvModule. Default: dict(type='ReLU', inplace=True). + act_cfg_out (dict, optional): Config dict for activation layer at + the last of the block. Default: None. + init_cfg (dict, optional): Initialization config dict. Default: None. + """ + + expansion = 2 + + def __init__(self, + in_channels: int, + channels: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + act_cfg_out: OptConfigType = None, + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + self.conv1 = ConvModule( + in_channels, channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg) + self.conv2 = ConvModule( + channels, + channels, + 3, + stride, + 1, + groups=4, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.conv3 = ConvModule( + channels, + channels * self.expansion, + 1, + groups=4, + norm_cfg=norm_cfg, + act_cfg=None) + self.cbam2 = CBAM_group_r8(channels) + self.cbam3 = CBAM_group_r8(channels * self.expansion) + if act_cfg_out: + self.act = MODELS.build(act_cfg_out) + self.downsample = downsample + + def forward(self, x: Tensor) -> Tensor: + residual = x + + out = self.conv1(x) + out = self.conv2(out) + out = self.conv3(out) + out = self.cbam3(out) # √ + + if self.downsample: + residual = self.downsample(x) + out = out + residual + + if hasattr(self, 'act'): + out = self.act(out) + + return out diff --git a/mmseg/models/utils/ppm.py b/mmseg/models/utils/ppm.py index 5fe6ff26fa..00296dd9ed 100644 --- a/mmseg/models/utils/ppm.py +++ b/mmseg/models/utils/ppm.py @@ -9,6 +9,87 @@ from torch import Tensor +class ChannelAttention(nn.Module): + + def __init__(self, in_planes, ratio=16): + super(ChannelAttention, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.max_pool = nn.AdaptiveMaxPool2d(1) + self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False) + self.relu1 = nn.ReLU() + self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) + max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) + out = avg_out + max_out + return self.sigmoid(out) + + +class ChannelAttention_group(nn.Module): + + def __init__(self, in_planes, ratio=16): + super(ChannelAttention_group, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.max_pool = nn.AdaptiveMaxPool2d(1) + self.fc1 = nn.Conv2d( + in_planes, in_planes // ratio, 1, bias=False, groups=4) + self.relu1 = nn.ReLU() + self.fc2 = nn.Conv2d( + in_planes // ratio, in_planes, 1, bias=False, groups=4) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) + max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) + out = avg_out + max_out + return self.sigmoid(out) + + +class SpatialAttention(nn.Module): + + def __init__(self, kernel_size=7): + super(SpatialAttention, self).__init__() + assert kernel_size in (3, 7), 'kernel size must be 3 or 7' + padding = 3 if kernel_size == 7 else 1 + self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + avg_out = torch.mean(x, dim=1, keepdim=True) + max_out, _ = torch.max(x, dim=1, keepdim=True) + x = torch.cat([avg_out, max_out], dim=1) + x = self.conv1(x) + return self.sigmoid(x) + + +class CBAM(nn.Module): + + def __init__(self, in_planes, ratio=16, kernel_size=7): + super(CBAM, self).__init__() + self.ca = ChannelAttention(in_planes, ratio) + self.sa = SpatialAttention(kernel_size) + + def forward(self, x): + out = x * self.ca(x) + result = out * self.sa(out) + return result + + +class CBAM_group(nn.Module): + + def __init__(self, in_planes, ratio=16, kernel_size=7): + super(CBAM_group, self).__init__() + self.ca = ChannelAttention_group(in_planes, ratio) + self.sa = SpatialAttention(kernel_size) + + def forward(self, x): + out = x * self.ca(x) + result = out * self.sa(out) + return result + + class DAPPM(BaseModule): """DAPPM module in `DDRNet `_. @@ -191,3 +272,540 @@ def forward(self, inputs: Tensor): scale_out = self.processes(torch.cat(feats, dim=1)) return self.compression(torch.cat([x_, scale_out], dim=1)) + self.shortcut(inputs) + + +class DAPPM_cbam(BaseModule): + """DAPPM module in `DDRNet `_. + + Args: + in_channels (int): Input channels. + branch_channels (int): Branch channels. + out_channels (int): Output channels. + num_scales (int): Number of scales. + kernel_sizes (list[int]): Kernel sizes of each scale. + strides (list[int]): Strides of each scale. + paddings (list[int]): Paddings of each scale. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU', inplace=True). + conv_cfg (dict): Config dict for convolution layer in ConvModule. + Default: dict(order=('norm', 'act', 'conv'), bias=False). + upsample_mode (str): Upsample mode. Default: 'bilinear'. + """ + + def __init__(self, + in_channels: int, + branch_channels: int, + out_channels: int, + num_scales: int, + kernel_sizes: List[int] = [5, 9, 17], + strides: List[int] = [2, 4, 8], + paddings: List[int] = [2, 4, 8], + norm_cfg: Dict = dict(type='BN', momentum=0.1), + act_cfg: Dict = dict(type='ReLU', inplace=True), + conv_cfg: Dict = dict( + order=('norm', 'act', 'conv'), bias=False), + upsample_mode: str = 'bilinear'): + super().__init__() + + self.num_scales = num_scales + self.unsample_mode = upsample_mode + self.in_channels = in_channels + self.branch_channels = branch_channels + self.out_channels = out_channels + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.conv_cfg = conv_cfg + + self.scales = ModuleList([ + ConvModule( + in_channels, + branch_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + ]) + for i in range(1, num_scales - 1): + self.scales.append( + Sequential(*[ + nn.AvgPool2d( + kernel_size=kernel_sizes[i - 1], + stride=strides[i - 1], + padding=paddings[i - 1]), + ConvModule( + in_channels, + branch_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + ])) + self.scales.append( + Sequential(*[ + nn.AdaptiveAvgPool2d((1, 1)), + ConvModule( + in_channels, + branch_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + ])) + self.processes = ModuleList() + for i in range(num_scales - 1): + self.processes.append( + Sequential(*[ + ConvModule( + branch_channels, + branch_channels, + kernel_size=3, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg), + # 3*3卷积之后添加cbam + CBAM(branch_channels) + ])) + + self.compression = ConvModule( + branch_channels * num_scales, + out_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + + self.shortcut = ConvModule( + in_channels, + out_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + + def forward(self, inputs: Tensor): + feats = [] + feats.append(self.scales[0](inputs)) + + for i in range(1, self.num_scales): + feat_up = F.interpolate( + self.scales[i](inputs), + size=inputs.shape[2:], + mode=self.unsample_mode) + feats.append(self.processes[i - 1](feat_up + feats[i - 1])) + + return self.compression(torch.cat(feats, + dim=1)) + self.shortcut(inputs) + + +class FAPPM_conv(DAPPM): + """PAPPM module in `PIDNet `_. + + Args: + in_channels (int): Input channels. + branch_channels (int): Branch channels. + out_channels (int): Output channels. + num_scales (int): Number of scales. + kernel_sizes (list[int]): Kernel sizes of each scale. + strides (list[int]): Strides of each scale. + paddings (list[int]): Paddings of each scale. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', momentum=0.1). + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU', inplace=True). + conv_cfg (dict): Config dict for convolution layer in ConvModule. + Default: dict(order=('norm', 'act', 'conv'), bias=False). + upsample_mode (str): Upsample mode. Default: 'bilinear'. + """ + + def __init__(self, + in_channels: int, + branch_channels: int, + out_channels: int, + num_scales: int, + kernel_sizes: List[int] = [3, 5, 9], + strides: List[int] = [1, 2, 4], + paddings: List[int] = [1, 2, 4], + norm_cfg: Dict = dict(type='BN', momentum=0.1), + act_cfg: Dict = dict(type='ReLU', inplace=True), + conv_cfg: Dict = dict( + order=('norm', 'act', 'conv'), bias=False), + upsample_mode: str = 'bilinear'): + super().__init__(in_channels, branch_channels, out_channels, + num_scales, kernel_sizes, strides, paddings, norm_cfg, + act_cfg, conv_cfg, upsample_mode) + + self.processes = ConvModule( + self.branch_channels * (self.num_scales - 1), + self.branch_channels * (self.num_scales - 1), + kernel_size=3, + padding=1, + groups=self.num_scales - 1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + **self.conv_cfg) + self.cbam = CBAM(self.branch_channels * (self.num_scales - 1)) + + def forward(self, inputs: Tensor): + x_ = self.scales[0](inputs) + feats = [] + for i in range(1, self.num_scales): + feat_up = F.interpolate( + self.scales[i](inputs), + size=inputs.shape[2:], + mode=self.unsample_mode, + align_corners=False) + feats.append(feat_up + x_) + scale_out = self.processes(torch.cat(feats, dim=1)) + scale_out = self.cbam(scale_out) + return self.compression(torch.cat([x_, scale_out], + dim=1)) + self.shortcut(inputs) + + +class FAPPM_conv_nocbam(DAPPM): + """PAPPM module in `PIDNet `_. + + Args: + in_channels (int): Input channels. + branch_channels (int): Branch channels. + out_channels (int): Output channels. + num_scales (int): Number of scales. + kernel_sizes (list[int]): Kernel sizes of each scale. + strides (list[int]): Strides of each scale. + paddings (list[int]): Paddings of each scale. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', momentum=0.1). + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU', inplace=True). + conv_cfg (dict): Config dict for convolution layer in ConvModule. + Default: dict(order=('norm', 'act', 'conv'), bias=False). + upsample_mode (str): Upsample mode. Default: 'bilinear'. + """ + + def __init__(self, + in_channels: int, + branch_channels: int, + out_channels: int, + num_scales: int, + kernel_sizes: List[int] = [3, 5, 9], + strides: List[int] = [1, 2, 4], + paddings: List[int] = [1, 2, 4], + norm_cfg: Dict = dict(type='BN', momentum=0.1), + act_cfg: Dict = dict(type='ReLU', inplace=True), + conv_cfg: Dict = dict( + order=('norm', 'act', 'conv'), bias=False), + upsample_mode: str = 'bilinear'): + super().__init__(in_channels, branch_channels, out_channels, + num_scales, kernel_sizes, strides, paddings, norm_cfg, + act_cfg, conv_cfg, upsample_mode) + + self.processes = ConvModule( + self.branch_channels * (self.num_scales - 1), + self.branch_channels * (self.num_scales - 1), + kernel_size=3, + padding=1, + groups=self.num_scales - 1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + **self.conv_cfg) + + def forward(self, inputs: Tensor): + x_ = self.scales[0](inputs) + feats = [] + for i in range(1, self.num_scales): + feat_up = F.interpolate( + self.scales[i](inputs), + size=inputs.shape[2:], + mode=self.unsample_mode, + align_corners=False) + feats.append(feat_up + x_) + scale_out = self.processes(torch.cat(feats, dim=1)) + return self.compression(torch.cat([x_, scale_out], + dim=1)) + self.shortcut(inputs) + + +class FAPPM_conv_slim(DAPPM): + """PAPPM module in `PIDNet `_. + + Args: + in_channels (int): Input channels. + branch_channels (int): Branch channels. + out_channels (int): Output channels. + num_scales (int): Number of scales. + kernel_sizes (list[int]): Kernel sizes of each scale. + strides (list[int]): Strides of each scale. + paddings (list[int]): Paddings of each scale. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', momentum=0.1). + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU', inplace=True). + conv_cfg (dict): Config dict for convolution layer in ConvModule. + Default: dict(order=('norm', 'act', 'conv'), bias=False). + upsample_mode (str): Upsample mode. Default: 'bilinear'. + """ + + def __init__(self, + in_channels: int, + branch_channels: int, + out_channels: int, + num_scales: int, + kernel_sizes: List[int] = [3, 5, 9], + strides: List[int] = [1, 2, 4], + paddings: List[int] = [1, 2, 4], + norm_cfg: Dict = dict(type='BN', momentum=0.1), + act_cfg: Dict = dict(type='ReLU', inplace=True), + conv_cfg: Dict = dict( + order=('norm', 'act', 'conv'), bias=False), + upsample_mode: str = 'bilinear'): + super().__init__(in_channels, branch_channels, out_channels, + num_scales, kernel_sizes, strides, paddings, norm_cfg, + act_cfg, conv_cfg, upsample_mode) + + self.processes = ConvModule( + self.branch_channels * (self.num_scales - 1), + self.branch_channels * (self.num_scales - 1), + kernel_size=3, + padding=1, + groups=self.num_scales - 1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + **self.conv_cfg) + + def forward(self, inputs: Tensor): + x_ = self.scales[0](inputs) + feats = [] + for i in range(1, self.num_scales): + feat_up = F.interpolate( + self.scales[i](inputs), + size=inputs.shape[2:], + mode=self.unsample_mode, + align_corners=False) + feats.append(feat_up + x_) + scale_out = self.processes(torch.cat(feats, dim=1)) + return self.compression(torch.cat([x_, scale_out], + dim=1)) + self.shortcut(inputs) + + +class FAPPM_conv_group(DAPPM): + """PAPPM module in `PIDNet `_. + + Args: + in_channels (int): Input channels. + branch_channels (int): Branch channels. + out_channels (int): Output channels. + num_scales (int): Number of scales. + kernel_sizes (list[int]): Kernel sizes of each scale. + strides (list[int]): Strides of each scale. + paddings (list[int]): Paddings of each scale. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', momentum=0.1). + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU', inplace=True). + conv_cfg (dict): Config dict for convolution layer in ConvModule. + Default: dict(order=('norm', 'act', 'conv'), bias=False). + upsample_mode (str): Upsample mode. Default: 'bilinear'. + """ + + def __init__(self, + in_channels: int, + branch_channels: int, + out_channels: int, + num_scales: int, + kernel_sizes: List[int] = [3, 5, 9], + strides: List[int] = [1, 2, 4], + paddings: List[int] = [1, 2, 4], + norm_cfg: Dict = dict(type='BN', momentum=0.1), + act_cfg: Dict = dict(type='ReLU', inplace=True), + conv_cfg: Dict = dict( + order=('norm', 'act', 'conv'), bias=False), + upsample_mode: str = 'bilinear'): + super().__init__(in_channels, branch_channels, out_channels, + num_scales, kernel_sizes, strides, paddings, norm_cfg, + act_cfg, conv_cfg, upsample_mode) + + self.processes = ConvModule( + self.branch_channels * (self.num_scales - 1), + self.branch_channels * (self.num_scales - 1), + kernel_size=3, + padding=1, + groups=self.num_scales - 1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + **self.conv_cfg) + self.cbam = CBAM_group(self.branch_channels * (self.num_scales - 1)) + + self.scales = ModuleList([ + ConvModule( + in_channels, + branch_channels, + kernel_size=1, + groups=4, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + ]) + for i in range(1, num_scales - 1): + self.scales.append( + Sequential(*[ + nn.AvgPool2d( + kernel_size=kernel_sizes[i - 1], + stride=strides[i - 1], + padding=paddings[i - 1]), + ConvModule( + in_channels, + branch_channels, + kernel_size=1, + groups=4, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + ])) + self.scales.append( + Sequential(*[ + nn.AdaptiveAvgPool2d((1, 1)), + ConvModule( + in_channels, + branch_channels, + kernel_size=1, + groups=4, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + ])) + + self.compression = ConvModule( + branch_channels * num_scales, + out_channels, + kernel_size=1, + groups=4, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + + self.shortcut = ConvModule( + in_channels, + out_channels, + kernel_size=1, + groups=4, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + + def forward(self, inputs: Tensor): + x_ = self.scales[0](inputs) + feats = [] + for i in range(1, self.num_scales): + feat_up = F.interpolate( + self.scales[i](inputs), + size=inputs.shape[2:], + mode=self.unsample_mode, + align_corners=False) + feats.append(feat_up + x_) + scale_out = self.processes(torch.cat(feats, dim=1)) + scale_out = self.cbam(scale_out) + return self.compression(torch.cat([x_, scale_out], + dim=1)) + self.shortcut(inputs) + + +algc = False +class FAPPM_avgp(nn.Module): + + def __init__(self, + inplanes, + branch_planes, + outplanes, + BatchNorm=nn.BatchNorm2d): + super(FAPPM_avgp, self).__init__() + bn_mom = 0.1 + self.scale1 = nn.Sequential( + nn.AvgPool2d(kernel_size=3, stride=1, padding=1), + BatchNorm(inplanes, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False), + ) + self.scale2 = nn.Sequential( + nn.AvgPool2d(kernel_size=5, stride=2, padding=2), + BatchNorm(inplanes, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False), + ) + self.scale3 = nn.Sequential( + nn.AvgPool2d(kernel_size=9, stride=4, padding=4), + BatchNorm(inplanes, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False), + ) + self.scale4 = nn.Sequential( + nn.AdaptiveAvgPool2d((1, 1)), + BatchNorm(inplanes, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False), + ) + # 奇怪,为什么都是BN,Relu,conv2d + self.scale0 = nn.Sequential( + BatchNorm(inplanes, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False), + ) + + self.scale_process = nn.Sequential( + BatchNorm(branch_planes * 4, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d( + branch_planes * 4, + branch_planes * 4, + kernel_size=3, + padding=1, + bias=False), # groups=4 + ) + self.cbam = CBAM(branch_planes * 4) + + self.compression = nn.Sequential( + BatchNorm(branch_planes * 5, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(branch_planes * 5, outplanes, kernel_size=1, bias=False), + ) + + self.shortcut = nn.Sequential( + BatchNorm(inplanes, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(inplanes, outplanes, kernel_size=1, bias=False), + ) + + def forward(self, x): + width = x.shape[-1] + height = x.shape[-2] + scale_list = [] + + x_ = self.scale0(x) + scale_list.append( + F.interpolate( + self.scale1(x), + size=[height, width], + mode='bilinear', + align_corners=algc) + x_) + scale_list.append( + F.interpolate( + self.scale2(x), + size=[height, width], + mode='bilinear', + align_corners=algc) + x_) + scale_list.append( + F.interpolate( + self.scale3(x), + size=[height, width], + mode='bilinear', + align_corners=algc) + x_) + scale_list.append( + F.interpolate( + self.scale4(x), + size=[height, width], + mode='bilinear', + align_corners=algc) + x_) + + scale_out = self.scale_process(torch.cat(scale_list, 1)) + scale_out = self.cbam(scale_out) + out = self.compression(torch.cat([x_, scale_out], + 1)) + self.shortcut(x) + return out diff --git a/mmseg/version.py b/mmseg/version.py index 9c8753b6d8..ddcdd5e63c 100644 --- a/mmseg/version.py +++ b/mmseg/version.py @@ -2,7 +2,7 @@ __version__ = '1.2.1' - +# parse_version_info and __version__ must be defined in the same file def parse_version_info(version_str): version_info = [] for x in version_str.split('.'):