diff --git a/references/detection/train.py b/references/detection/train.py index 7152f293b0f..c4a83a27e02 100644 --- a/references/detection/train.py +++ b/references/detection/train.py @@ -5,6 +5,9 @@ python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \ train.py ... --world-size $NGPU +The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu. + --lr 0.02 --batch-size 2 --world-size 8 +If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU. """ import datetime import os @@ -140,12 +143,15 @@ def main(args): parser.add_argument('--dataset', default='coco', help='dataset') parser.add_argument('--model', default='maskrcnn_resnet50_fpn', help='model') parser.add_argument('--device', default='cuda', help='device') - parser.add_argument('-b', '--batch-size', default=2, type=int) + parser.add_argument('-b', '--batch-size', default=2, type=int, + help='images per gpu, the total batch size is $NGPU x batch_size') parser.add_argument('--epochs', default=13, type=int, metavar='N', help='number of total epochs to run') parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 16)') - parser.add_argument('--lr', default=0.02, type=float, help='initial learning rate') + parser.add_argument('--lr', default=0.02, type=float, + help='initial learning rate, 0.02 is the default value for training ' + 'on 8 gpus and 2 images_per_gpu') parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,