In [1]:
#Libs
import os
import sys
import torch
import logging
import argparse
import itertools

from src.config import config
from src.base import MatchPrior
from src.network import create_network
from src.multibox_loss import MultiboxLoss
from src.open_images import OpenImagesDataset
from src.data_preprocessing import TrainAugmentation, TestTransform
from utils.misc import str2bool, Timer, freeze_net_layers, store_labels

from torch.utils.data import DataLoader, ConcatDataset
from torch.optim.lr_scheduler import CosineAnnealingLR, MultiStepLR

In [2]:

parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training With PyTorch')

# Params for datasets
parser.add_argument('--datasets', '--data',    nargs='+', default=["data"], help='Dataset directory path')
parser.add_argument('--balance-data',          action='store_true', help="Balance training data by down-sampling more frequent labels.")

# Params for network
parser.add_argument('--freeze-base-net',       action='store_true',help="Freeze base net layers.")
parser.add_argument('--freeze-net',            action='store_true',help="Freeze all the layers except the prediction head.")
parser.add_argument('--width-mult',            default=1.0, type=float, help='Width Multiplifier for network')

# Params for loading pretrained basenet or checkpoints.
parser.add_argument('--base-net',              help='Pretrained base model')
parser.add_argument('--pretrained',            default='models/pretrained.pth', type=str, help='Pre-trained base model')
parser.add_argument('--resume',                default=None, type=str,help='Checkpoint state_dict file to resume training from')

# Params for SGD
parser.add_argument('--lr', '--learning-rate', default=0.01, type=float,help='initial learning rate')
parser.add_argument('--momentum',              default=0.9, type=float,help='Momentum value for optim')
parser.add_argument('--weight-decay',          default=5e-4, type=float,help='Weight decay for SGD')
parser.add_argument('--gamma',                 default=0.1, type=float,help='Gamma update for SGD')
parser.add_argument('--base-net-lr',           default=0.001, type=float,help='initial learning rate for base net, or None to use --lr')
parser.add_argument('--extra-layers-lr',       default=None, type=float,help='initial learning rate for the layers not in base net and prediction heads.')

# Scheduler
parser.add_argument('--scheduler',             default="cosine", type=str,help="Scheduler for SGD. It can one of multi-step and cosine")

# Params for Multi-step Scheduler
parser.add_argument('--milestones',            default="80,100", type=str,help="milestones for MultiStepLR")

# Params for Cosine Annealing
parser.add_argument('--t-max',                 default=100,  type=float,help='T_max value for Cosine Annealing Scheduler.')

# Train params
parser.add_argument('--batch-size',            default=64,    type=int,help='Batch size for training')
parser.add_argument('--num-epochs',            default=100,   type=int,help='the number epochs')
parser.add_argument('--num-workers',           default=4,    type=int, help='Number of workers used in dataloading')
parser.add_argument('--validation-epochs',     default=5,    type=int,help='the number epochs between running validation')
parser.add_argument('--debug-steps',           default=10,   type=int,help='Set the debug log output frequency.')
parser.add_argument('--use-cuda',              default=True, type=str2bool,help='Use CUDA to train model')
parser.add_argument('--checkpoint-folder',     default='models/', help='Directory for saving checkpoint models')
logging.basicConfig(stream=sys.stdout, level=logging.INFO,
                    format='%(asctime)s - %(message)s', datefmt="%Y-%m-%d %H:%M:%S")
                    
args   = parser.parse_args([])
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() and args.use_cuda else "cpu")


In [7]:
args

Namespace(balance_data=False, base_net=None, base_net_lr=0.001, batch_size=4, checkpoint_folder='models/', datasets=['data'], debug_steps=10, extra_layers_lr=None, freeze_base_net=False, freeze_net=False, gamma=0.1, lr=0.01, milestones='80,100', momentum=0.9, num_epochs=30, num_workers=2, pretrained='models/pretrained.pth', resume=None, scheduler='cosine', t_max=100, use_cuda=True, validation_epochs=1, weight_decay=0.0005, width_mult=1.0)

In [8]:
!nvidia-smi

Sun Jul 10 10:39:36 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.142.00   Driver Version: 450.142.00   CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  On   | 00000000:00:1E.0 Off |                    0 |
| N/A   32C    P0    26W / 300W |      2MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [11]:
!lscpu

Architecture:        x86_64
CPU op-mode(s):      32-bit, 64-bit
Byte Order:          Little Endian
CPU(s):              8
On-line CPU(s) list: 0-7
Thread(s) per core:  2
Core(s) per socket:  4
Socket(s):           1
NUMA node(s):        1
Vendor ID:           GenuineIntel
CPU family:          6
Model:               79
Model name:          Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
Stepping:            1
CPU MHz:             2701.581
CPU max MHz:         3000.0000
CPU min MHz:         1200.0000
BogoMIPS:            4600.04
Hypervisor vendor:   Xen
Virtualization type: full
L1d cache:           32K
L1i cache:           32K
L2 cache:            256K
L3 cache:            46080K
NUMA node0 CPU(s):   0-7
Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2a