-
Notifications
You must be signed in to change notification settings - Fork 39
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Questions about the details of configuration of RN50-CLIP #13
Comments
Hi, thanks for your interest in our work. You can use |
Thanks for your reply. Specifically, I'm in trouble with running RN50-CLIP instead of RN50-DenseCLIP in segmentation. Could you please share more details? |
We use the same batch size to fairly compare these two models. I think you can just replace the config name in the above command to train RN50-CLIP. |
Thank you! Problem solved. |
I cannot reach the mIoU of RN50-CLIP that was showed in the paper, though I used the configuration mentioned in the README. Could you please tell me what batch size was used and how many GPUs were used. More details of implement are very helpful. I've tried batch size of 16, but only got 38.85 of mIoU. Here is my configuration and log file is putted in the attachment.
'''
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
type='EncoderDecoder',
pretrained='pretrained/RN50.pt',
backbone=dict(
type='CLIPResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 1, 1),
strides=(1, 2, 2, 2),
norm_cfg=dict(type='SyncBN', requires_grad=True),
norm_eval=False,
style='pytorch',
contract_dilation=True,
layers=[3, 4, 6, 3]),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=4),
decode_head=dict(
type='FPNHead',
in_channels=[256, 256, 256, 256],
in_index=[0, 1, 2, 3],
feature_strides=[4, 8, 16, 32],
channels=256,
dropout_ratio=0.1,
num_classes=150,
norm_cfg=dict(type='SyncBN', requires_grad=True),
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
train_cfg=dict(),
test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341)))
dataset_type = 'ADE20KDataset'
data_root = 'data/ade/ADEChallengeData2016'
IMG_MEAN = [122.7709383, 116.7460125, 104.09373615000001]
IMG_VAR = [68.5005327, 66.6321579, 70.32316304999999]
img_norm_cfg = dict(
mean=[122.7709383, 116.7460125, 104.09373615000001],
std=[68.5005327, 66.6321579, 70.32316304999999],
to_rgb=True)
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(
type='Normalize',
mean=[122.7709383, 116.7460125, 104.09373615000001],
std=[68.5005327, 66.6321579, 70.32316304999999],
to_rgb=True),
dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(2048, 512),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[122.7709383, 116.7460125, 104.09373615000001],
std=[68.5005327, 66.6321579, 70.32316304999999],
to_rgb=True),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type='ADE20KDataset',
data_root='data/ade/ADEChallengeData2016',
img_dir='images/training',
ann_dir='annotations/training',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(
type='Normalize',
mean=[122.7709383, 116.7460125, 104.09373615000001],
std=[68.5005327, 66.6321579, 70.32316304999999],
to_rgb=True),
dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
]),
val=dict(
type='ADE20KDataset',
data_root='data/ade/ADEChallengeData2016',
img_dir='images/validation',
ann_dir='annotations/validation',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(2048, 512),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[122.7709383, 116.7460125, 104.09373615000001],
std=[68.5005327, 66.6321579, 70.32316304999999],
to_rgb=True),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]),
test=dict(
type='ADE20KDataset',
data_root='data/ade/ADEChallengeData2016',
img_dir='images/validation',
ann_dir='annotations/validation',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(2048, 512),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[122.7709383, 116.7460125, 104.09373615000001],
std=[68.5005327, 66.6321579, 70.32316304999999],
to_rgb=True),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]))
log_config = dict(
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
cudnn_benchmark = True
find_unused_parameters = True
optimizer = dict(
type='AdamW',
lr=0.0001,
weight_decay=0.0001,
paramwise_cfg=dict(
custom_keys=dict(
backbone=dict(lr_mult=0.1), norm=dict(decay_mult=0.0))))
optimizer_config = dict()
lr_config = dict(
policy='poly',
power=0.9,
min_lr=1e-06,
by_epoch=False,
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-06)
runner = dict(type='IterBasedRunner', max_iters=80000)
checkpoint_config = dict(by_epoch=False, interval=8000)
evaluation = dict(interval=8000, metric='mIoU')
work_dir = './work_dirs/fpn_clipres50_test4k'
gpu_ids = range(0, 1)
'''
20220320_015954.log
The text was updated successfully, but these errors were encountered: