In [1]:
import argparse

import torch.distributed as dist
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.tensorboard import SummaryWriter

import test  # import test.py to get mAP after each epoch
from models import *
from utils.datasets import *
from utils.utils import *

#from azureml.core import Run

import os

In [2]:
mixed_precision = False

#wdir = 'weights' + os.sep  # weights dir
#last = wdir + 'last.pt'
#best = wdir + 'best.pt'
os.makedirs('outputs', exist_ok=True)

wdir = 'outputs' + os.sep
last = wdir + 'last.pt'
best = wdir + 'best.pt'
results_file = 'results.txt'

# Hyperparameters
hyp = {'giou': 3.54,  # giou loss gain
       'cls': 37.4,  # cls loss gain
       'cls_pw': 1.0,  # cls BCELoss positive_weight
       'obj': 64.3,  # obj loss gain (*=img_size/320 if img_size != 320)
       'obj_pw': 1.0,  # obj BCELoss positive_weight
       'iou_t': 0.20,  # iou training threshold
       'lr0': 0.01,  # initial learning rate (SGD=5E-3, Adam=5E-4)
       'lrf': 0.0005,  # final learning rate (with cos scheduler)
       'momentum': 0.937,  # SGD momentum
       'weight_decay': 0.000484,  # optimizer weight decay
       'fl_gamma': 0.0,  # focal loss gamma (efficientDet default is gamma=1.5)
       'hsv_h': 0.0138,  # image HSV-Hue augmentation (fraction)
       'hsv_s': 0.678,  # image HSV-Saturation augmentation (fraction)
       'hsv_v': 0.36,  # image HSV-Value augmentation (fraction)
       'degrees': 1.98 * 0,  # image rotation (+/- deg)
       'translate': 0.05 * 0,  # image translation (+/- fraction)
       'scale': 0.05 * 0,  # image scale (+/- gain)
       'shear': 0.641 * 0}  # image shear (+/- deg)

# Overwrite hyp with hyp*.txt (optional)
f = glob.glob('hyp*.txt')
if f:
    print('Using %s' % f[0])
    for k, v in zip(hyp.keys(), np.loadtxt(f[0])):
        hyp[k] = v

# Print focal loss if gamma > 0
if hyp['fl_gamma']:
    print('Using FocalLoss(gamma=%g)' % hyp['fl_gamma'])

In [3]:
#parser = argparse.ArgumentParser()
#parser.add_argument('--epochs', type=int, default=5)  # 500200 batches at bs 16, 117263 COCO images = 273 epochs
epochs = 5
#parser.add_argument('--batch-size', type=int, default=6)  # effective bs = batch_size * accumulate = 16 * 4 = 64
batch_size = 6
#parser.add_argument('--cfg', type=str, default='cfg/yolov3-tiny-3cls.cfg', help='*.cfg path')
cfg = 'cfg/yolov3-tiny-3cls.cfg'
#parser.add_argument('--data', type=str, default='data/anji_detect-test.data', help='*.data path')
data = 'data/anji_detect-test.data'
#parser.add_argument('--multi-scale', action='store_true', help='adjust (67%% - 150%%) img_size every 10 batches')
multi_scale = False
#parser.add_argument('--img-size', nargs='+', type=int, default=[320, 640], help='[min_train, max-train, test]')
img_size = [320,640]
#parser.add_argument('--rect', action='store_true', help='rectangular training')
#parser.add_argument('--resume', action='store_true', help='resume training from last.pt')
#parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
#parser.add_argument('--notest', action='store_true', help='only test final epoch')
#parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
rect = False
resume = False
nosave = False
notest = False
evolve = False
#parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
bucket = ''
#parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
cache_images = False
#parser.add_argument('--weights', type=str, default='weights/yolov3-tiny.conv.15', help='initial weights path')
weights = 'weights/yolov3-tiny.conv.15'
#parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')
name = ''
#parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1 or cpu)')
device = ''
#parser.add_argument('--adam', action='store_true', help='use adam optimizer')
adam = False
#parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
single_cls = False
#parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point')
#opt = parser.parse_args()

In [4]:
#weights = last if resume else weights
#    check_git_status()
img_size.extend([img_size[-1]] * (3 - len(img_size))) 

In [5]:
device = torch_utils.select_device(device, apex=mixed_precision, batch_size=batch_size)
print(device.type)
if device.type == 'cpu':
    mixed_precision = False
print(mixed_precision)

Using CUDA device0 _CudaDeviceProperties(name='GeForce GTX 1050', total_memory=2048MB)

cuda
False


In [7]:
accumulate = max(round(64 / batch_size), 1)  # accumulate n times before optimizer update (bs 64)
print(accumulate)

11


In [8]:
imgsz_min, imgsz_max, imgsz_test = img_size 
print(imgsz_min,imgsz_max,imgsz_test)
gs = 64  # (pixels) grid size
assert math.fmod(imgsz_min, gs) == 0, '--img-size %g must be a %g-multiple' % (imgsz_min, gs)

320 640 640


In [9]:
print(multi_scale)

False


In [10]:
multi_scale |= imgsz_min != imgsz_max  # multi if different (min, max)
print(multi_scale)

True


In [11]:
if multi_scale:
        if imgsz_min == imgsz_max:
            imgsz_min //= 1.5
            imgsz_max //= 0.667
        grid_min, grid_max = imgsz_min // gs, imgsz_max // gs
        imgsz_min, imgsz_max = int(grid_min * gs), int(grid_max * gs)
img_size = imgsz_max  # initialize with max size

In [12]:
init_seeds()
data_dict = parse_data_cfg(data)
train_path = data_dict['train']
test_path = data_dict['valid']
print(train_path)
print(test_path)

data/train_local.txt
data/val_local.txt


In [13]:
print(int(data_dict['classes']))
print(single_cls)

3
False


In [14]:
nc = 1 if single_cls else int(data_dict['classes'])  # number of classes
hyp['cls'] *= nc / 80
print(nc)

3


In [15]:
# Remove previous results
for f in glob.glob('*_batch*.jpg') + glob.glob(results_file):
    os.remove(f)

In [16]:
print(cfg)

cfg/yolov3-tiny-3cls.cfg


In [17]:
# Initialize model
model = Darknet(cfg).to(device)

Model Summary: 37 layers, 8.6745e+06 parameters, 8.6745e+06 gradients


In [18]:
# Optimizer
pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
for k, v in dict(model.named_parameters()).items():
    if '.bias' in k:
        pg2 += [v]  # biases
    elif 'Conv2d.weight' in k:
        pg1 += [v]  # apply weight_decay
    else:
        pg0 += [v]  # all else

In [19]:
print(pg2)

[Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.

In [20]:
print(adam)

False


In [21]:
optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay
optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
print('Optimizer groups: %g .bias, %g Conv2d.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
del pg0, pg1, pg2

Optimizer groups: 13 .bias, 13 Conv2d.weight, 11 other


In [22]:
start_epoch = 0
best_fitness = 0.0
attempt_download(weights)

In [23]:
print(weights.endswith('.pt'),len(weights)>0)
load_darknet_weights(model, weights)

False True


In [24]:
print(mixed_precision)

False


In [25]:
# Scheduler https://arxiv.org/pdf/1812.01187.pdf
lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.95 + 0.05  # cosine
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
scheduler.last_epoch = start_epoch - 1  # see link below
# https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822

In [27]:
print(scheduler.last_epoch)

-1


In [26]:
print(device.type,torch.cuda.device_count(),torch.cuda.device_count() > 1,torch.distributed.is_available())

cuda 1 False True


In [28]:
print(hyp,rect,cache_images,single_cls)

{'giou': 3.54, 'cls': 1.4024999999999999, 'cls_pw': 1.0, 'obj': 64.3, 'obj_pw': 1.0, 'iou_t': 0.2, 'lr0': 0.01, 'lrf': 0.0005, 'momentum': 0.937, 'weight_decay': 0.000484, 'fl_gamma': 0.0, 'hsv_h': 0.0138, 'hsv_s': 0.678, 'hsv_v': 0.36, 'degrees': 0.0, 'translate': 0.0, 'scale': 0.0, 'shear': 0.0} False False False


In [29]:
dataset = LoadImagesAndLabels(train_path, img_size, batch_size,
                                  augment=True,
                                  hyp=hyp,  # augmentation hyperparameters
                                  rect=rect,  # rectangular training
                                  cache_images=cache_images,
                                  single_cls=single_cls)

Caching labels (6265 found, 0 missing, 0 empty, 1 duplicate, for 6265 images): 100%|██████████| 6265/6265 [00:02<00:00, 2955.15it/s]


In [126]:
print(dataset)

<utils.datasets.LoadImagesAndLabels object at 0x00000237AF335940>


In [30]:
# Dataloader
batch_size = min(batch_size, len(dataset))
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             num_workers=nw,
                                             shuffle=not rect,  # Shuffle=True unless rectangular training is used
                                             pin_memory=True,
                                             collate_fn=dataset.collate_fn)

In [31]:
testloader = torch.utils.data.DataLoader(LoadImagesAndLabels(test_path, imgsz_test, batch_size,
                                                                 hyp=hyp,
                                                                 rect=True,
                                                                 cache_images=cache_images,
                                                                 single_cls=single_cls),
                                             batch_size=batch_size,
                                             num_workers=nw,
                                             pin_memory=True,
                                             collate_fn=dataset.collate_fn)

Caching labels (2117 found, 0 missing, 0 empty, 0 duplicate, for 2117 images): 100%|██████████| 2117/2117 [00:00<00:00, 2666.24it/s]


In [32]:
# Model parameters
model.nc = nc  # attach number of classes to model
model.hyp = hyp  # attach hyperparameters to model
model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights

# Model EMA
ema = torch_utils.ModelEMA(model)

# Start training
nb = len(dataloader)  # number of batches
n_burn = max(3 * nb, 500)  # burn-in iterations, max(3 epochs, 500 iterations)
maps = np.zeros(nc)  # mAP per class
# torch.autograd.set_detect_anomaly(True)
results = (0, 0, 0, 0, 0, 0, 0)  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
t0 = time.time()
print('Image sizes %g - %g train, %g test' % (imgsz_min, imgsz_max, imgsz_test))
print('Using %g dataloader workers' % nw)
print('Starting training for %g epochs...' % epochs)

Image sizes 320 - 640 train, 640 test
Using 6 dataloader workers
Starting training for 5 epochs...


In [71]:
print(dataset.image_weights)

False


In [48]:
epoch = 2

In [49]:
model.train()

Darknet(
  (module_list): ModuleList(
    (0): Sequential(
      (Conv2d): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (BatchNorm2d): BatchNorm2d(16, eps=0.0001, momentum=0.03, affine=True, track_running_stats=True)
      (activation): LeakyReLU(negative_slope=0.1, inplace=True)
    )
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Sequential(
      (Conv2d): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (BatchNorm2d): BatchNorm2d(32, eps=0.0001, momentum=0.03, affine=True, track_running_stats=True)
      (activation): LeakyReLU(negative_slope=0.1, inplace=True)
    )
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Sequential(
      (Conv2d): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (BatchNorm2d): BatchNorm2d(64, eps=0.0001, momentum=0.03, affine=True, track_running_stats=True)
 

In [50]:
mloss = torch.zeros(4).to(device)  # mean losses
print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
pbar = tqdm(enumerate(dataloader), total=nb)  # progress bar


     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
  0%|          | 0/1045 [00:00<?, ?it/s]

In [79]:
print(mixed_precision,start_epoch,epochs)

False 0 5


In [51]:
# batch -------------------------------------------------------------
for i, (imgs, targets, paths, _) in pbar:  
    ni = i + nb * epoch  # number integrated batches (since train start)
    imgs = imgs.to(device).float() / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
    targets = targets.to(device)

    # Burn-in
    if ni <= n_burn * 2:
        model.gr = np.interp(ni, [0, n_burn * 2], [0.0, 1.0])  # giou loss ratio (obj_loss = 1.0 or giou)
        if ni == n_burn:  # burnin complete
            print_model_biases(model)

        for j, x in enumerate(optimizer.param_groups):
            # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
            x['lr'] = np.interp(ni, [0, n_burn], [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
            if 'momentum' in x:
                x['momentum'] = np.interp(ni, [0, n_burn], [0.9, hyp['momentum']])

    # Multi-Scale
    if multi_scale:
        if ni / accumulate % 1 == 0:  #  adjust img_size (67% - 150%) every 1 batch
            img_size = random.randrange(grid_min, grid_max + 1) * gs
        sf = img_size / max(imgs.shape[2:])  # scale factor
        if sf != 1:
            ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to 32-multiple)
            imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)

    # Forward
    pred = model(imgs)

    # Loss
    loss, loss_items = compute_loss(pred, targets, model)
    #if not torch.isfinite(loss):
    #    print('WARNING: non-finite loss, ending training ', loss_items)
    #    return results

    # Backward
    loss *= batch_size / 64  # scale loss
    loss.backward()

    # Optimize
    if ni % accumulate == 0:
        optimizer.step()
        optimizer.zero_grad()
        ema.update(model)

    # Print
    mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
    #mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0)  # (GB)
    mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0)  # (GB)
    s = ('%10s' * 2 + '%10.3g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, len(targets), img_size)
    pbar.set_description(s)

    # Plot
    if ni < 1:
        f = 'train_batch%g.jpg' % i  # filename
        res = plot_images(images=imgs, targets=targets, paths=paths, fname=f)
        if tb_writer:
            tb_writer.add_image(f, res, dataformats='HWC', global_step=epoch)
            # tb_writer.add_graph(model, imgs)  # add model to tensorboard
# end batch ------------------------------------------------------------------------------------------------

       2/4      1.2G       3.1     0.779      0.53      4.41         3       640: 100%|██████████| 1045/1045 [06:38<00:00,  2.62it/s]


In [52]:
# Update scheduler
scheduler.step()

In [93]:
print(notest)

False


In [53]:
# Process epoch results
ema.update_attr(model)
final_epoch = epoch + 1 == epochs
print(final_epoch)

False


In [54]:
if not notest or final_epoch:  # Calculate mAP
            is_coco = any([x in data for x in ['coco.data', 'coco2014.data', 'coco2017.data']]) and model.nc == 80
            results, maps = test.test(cfg,
                                      data,
                                      batch_size=batch_size,
                                      img_size=imgsz_test,
                                      model=ema.ema,
                                      save_json=final_epoch and is_coco,
                                      single_cls=single_cls,
                                      dataloader=testloader)

               Class    Images   Targets         P         R   mAP@0.5        F1: 100%|██████████| 353/353 [01:04<00:00,  5.51it/s]
                 all  2.12e+03  8.28e+03     0.346     0.543     0.401     0.417


In [57]:
# Write
with open(results_file, 'a') as f:
    f.write(s + '%10.3g' * 7 % results + '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
if len(name) and bucket:
    os.system('gsutil cp results.txt gs://%s/results/results%s.txt' % (bucket, name))

In [55]:
fi = fitness(np.array(results).reshape(1, -1))  # fitness_i = weighted combination of [P, R, mAP, F1]
if fi > best_fitness:
    best_fitness = fi

In [56]:
print(best_fitness)
print('the best_fitness is %f .\n' % (best_fitness))

[    0.40265]
the best_fitness is 0.402650 .



In [None]:
# Save model
save = (not opt.nosave) or (final_epoch and not opt.evolve)
if save:
    with open(results_file, 'r') as f:  # create checkpoint
        chkpt = {'epoch': epoch,
                         'best_fitness': best_fitness,
                         'training_results': f.read(),
                         'model': ema.ema.module.state_dict() if hasattr(model, 'module') else ema.ema.state_dict(),
                         'optimizer': None if final_epoch else optimizer.state_dict()}

            # Save last, best and delete
        torch.save(chkpt, last)
        if (best_fitness == fi) and not final_epoch:
            torch.save(chkpt, best)
        del chkpt

In [59]:
print(torch.cuda.device_count() )

1


In [58]:
torch.cuda.empty_cache()
