In [None]:
## Commands
# python train.py --data data/person_dog 

In [None]:
#Libs
import os
import sys
import torch
import logging
import argparse
import itertools

from src.config import config
from src.base import MatchPrior
from src.network import create_network
from src.multibox_loss import MultiboxLoss
from src.open_images import OpenImagesDataset
from src.data_preprocessing import TrainAugmentation, TestTransform
from utils.misc import str2bool, Timer, freeze_net_layers, store_labels

from torch.utils.data import DataLoader, ConcatDataset
from torch.optim.lr_scheduler import CosineAnnealingLR, MultiStepLR

In [None]:

parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training With PyTorch')

# Params for datasets
parser.add_argument('--datasets', '--data',    nargs='+', default=["data"], help='Dataset directory path')
parser.add_argument('--balance-data',          action='store_true', help="Balance training data by down-sampling more frequent labels.")

# Params for network
parser.add_argument('--freeze-base-net',       action='store_true',help="Freeze base net layers.")
parser.add_argument('--freeze-net',            action='store_true',help="Freeze all the layers except the prediction head.")
parser.add_argument('--width-mult',            default=1.0, type=float, help='Width Multiplifier for network')

# Params for loading pretrained basenet or checkpoints.
parser.add_argument('--base-net',              help='Pretrained base model')
parser.add_argument('--pretrained',            default='models/pretrained.pth', type=str, help='Pre-trained base model')
parser.add_argument('--resume',                default=None, type=str,help='Checkpoint state_dict file to resume training from')

# Params for SGD
parser.add_argument('--lr', '--learning-rate', default=0.01, type=float,help='initial learning rate')
parser.add_argument('--momentum',              default=0.9, type=float,help='Momentum value for optim')
parser.add_argument('--weight-decay',          default=5e-4, type=float,help='Weight decay for SGD')
parser.add_argument('--gamma',                 default=0.1, type=float,help='Gamma update for SGD')
parser.add_argument('--base-net-lr',           default=0.001, type=float,help='initial learning rate for base net, or None to use --lr')
parser.add_argument('--extra-layers-lr',       default=None, type=float,help='initial learning rate for the layers not in base net and prediction heads.')

# Scheduler
parser.add_argument('--scheduler',             default="cosine", type=str,help="Scheduler for SGD. It can one of multi-step and cosine")

# Params for Multi-step Scheduler
parser.add_argument('--milestones',            default="80,100", type=str,help="milestones for MultiStepLR")

# Params for Cosine Annealing
parser.add_argument('--t-max',                 default=100,  type=float,help='T_max value for Cosine Annealing Scheduler.')

# Train params
parser.add_argument('--batch-size',            default=4,    type=int,help='Batch size for training')
parser.add_argument('--num-epochs',            default=100,   type=int,help='the number epochs')
parser.add_argument('--num-workers',           default=4,    type=int, help='Number of workers used in dataloading')
parser.add_argument('--validation-epochs',     default=5,    type=int,help='the number epochs between running validation')
parser.add_argument('--debug-steps',           default=10,   type=int,help='Set the debug log output frequency.')
parser.add_argument('--use-cuda',              default=True, type=str2bool,help='Use CUDA to train model')
parser.add_argument('--checkpoint-folder',     default='models/', help='Directory for saving checkpoint models')
logging.basicConfig(stream=sys.stdout, level=logging.INFO,
                    format='%(asctime)s - %(message)s', datefmt="%Y-%m-%d %H:%M:%S")
                    
args   = parser.parse_args([])
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() and args.use_cuda else "cpu")


In [None]:
create_net = lambda num: create_network(num, width_mult=args.width_mult)

In [None]:
config.image_size = 640
print(config.priors.shape)

In [None]:
config.priors[:,0].shape

In [None]:
from utils.box_utils import  generate_priors, Spec, BoxSizes

In [None]:
image_size = 600
# specs = [ 736
#     Spec(46, 16, BoxSizes(60, 105), [2, 3]),
#     Spec(23, 32, BoxSizes(105, 150), [2, 3]),
#     Spec(11, 64, BoxSizes(150, 195), [2, 3]),
#     Spec(7, 100, BoxSizes(195, 240), [2, 3]),
#     Spec(4, 150, BoxSizes(240, 285), [2, 3]),
#     Spec(2, 300, BoxSizes(285, 330), [2, 3]),
#     Spec(2, 400, BoxSizes(500, 600), [2])
# ]
specs = [ # 640
    Spec(40, 16, BoxSizes(60, 105), [2, 3]),
    Spec(20, 32, BoxSizes(105, 150), [2, 3]),
    Spec(9, 64, BoxSizes(150, 195), [2, 3]),
    Spec(6, 100, BoxSizes(195, 240), [2, 3]),
    Spec(4, 150, BoxSizes(240, 285), [2, 3]),
    Spec(2, 300, BoxSizes(285, 330), [2, 3]),
    Spec(1, 600, BoxSizes(285, 330), [2, 3]),
]


In [None]:
priors = generate_priors(specs, image_size)

In [None]:
priors.shape

In [None]:
#17028

In [None]:
train_transform  = TrainAugmentation(config.image_size, config.image_mean, config.image_std)
target_transform = MatchPrior(config.priors, config.center_variance,config.size_variance, 0.5)
test_transform   = TestTransform(config.image_size, config.image_mean, config.image_std)

In [None]:
args.datasets = "data/person_dog/"

In [None]:
dataset = OpenImagesDataset(args.datasets,
                            transform        = train_transform, 
                            target_transform = target_transform,
                            dataset_type     = "train",
                            balance_data     = args.balance_data
                           )

In [None]:
label_file  = os.path.join(args.checkpoint_folder, "labels.txt")
store_labels(label_file, dataset.class_names)
logging.info(dataset)
num_classes = len(dataset.class_names)

In [None]:
train_loader  = DataLoader(dataset, 
                           args.batch_size,
                           num_workers = args.num_workers,
                           shuffle     = True)

In [None]:
net        = create_net(num_classes)
min_loss   = -10000.0
last_epoch = -1

# freeze certain layers (if requested)
base_net_lr = args.base_net_lr if args.base_net_lr is not None else args.lr
extra_layers_lr = args.extra_layers_lr if args.extra_layers_lr is not None else args.lr


In [None]:
params = [
        {'params': net.base_net.parameters(), 'lr': base_net_lr},
        {'params': itertools.chain(
            net.source_layer_add_ons.parameters(),
            net.extras.parameters()
        ), 'lr': extra_layers_lr},
        {'params': itertools.chain(
            net.regression_headers.parameters(),
            net.classification_headers.parameters()
        )}
    ]

In [None]:
net.init_from_pretrained(args.pretrained)
net.train(True)
net.to(DEVICE)

In [None]:
for i, data in enumerate(train_loader):
    break

In [None]:
images, boxes, labels = data
images                = images.to(DEVICE)
boxes                 = boxes.to(DEVICE)
labels                = labels.to(DEVICE)

In [None]:
images.shape, boxes.shape, labels.shape

In [None]:
confidence, locations = net(images)

In [None]:
confidence.shape, locations.shape

In [None]:
# train(train_loader, net, criterion, optimizer,
#               device=DEVICE, debug_steps=args.debug_steps, epoch=0)

In [None]:
net

### Train Pipe

In [None]:
def train(loader, net, criterion, optimizer, device, debug_steps=100, epoch=-1):
    net.train(True)
    running_loss = 0.0
    running_regression_loss = 0.0
    running_classification_loss = 0.0
    for i, data in enumerate(loader):
        images, boxes, labels = data
        images = images.to(device)
        boxes = boxes.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        confidence, locations = net(images)
        regression_loss, classification_loss = criterion(confidence, locations, labels, boxes)  # TODO CHANGE BOXES
        loss = regression_loss + classification_loss
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        running_regression_loss += regression_loss.item()
        running_classification_loss += classification_loss.item()
        if i and i % debug_steps == 0:
            avg_loss = running_loss / debug_steps
            avg_reg_loss = running_regression_loss / debug_steps
            avg_clf_loss = running_classification_loss / debug_steps
            logging.info(
                f"Epoch: {epoch}, Step: {i}/{len(loader)}, " +
                f"Avg Loss: {avg_loss:.4f}, " +
                f"Avg Regression Loss {avg_reg_loss:.4f}, " +
                f"Avg Classification Loss: {avg_clf_loss:.4f}"
            )
            running_loss = 0.0
            running_regression_loss = 0.0
            running_classification_loss = 0.0

In [None]:
# define loss function and optimizer
criterion = MultiboxLoss(config.priors, iou_threshold=0.5, neg_pos_ratio=3,
                         center_variance=0.1, size_variance=0.2, device=DEVICE)
optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum,
                            weight_decay=args.weight_decay)
logging.info(f"Learning rate: {args.lr}, Base net learning rate: {base_net_lr}, "
             + f"Extra Layers learning rate: {extra_layers_lr}.")

In [None]:
scheduler = CosineAnnealingLR(optimizer, args.t_max, last_epoch=last_epoch)

In [None]:
for epoch in range(last_epoch + 1, args.num_epochs):
    scheduler.step()
    train(train_loader, net, criterion, optimizer,
          device=DEVICE, debug_steps=args.debug_steps, epoch=epoch)
    break

#     if epoch % args.validation_epochs == 0 or epoch == args.num_epochs - 1:
#         val_loss, val_regression_loss, val_classification_loss = test(val_loader, net, criterion, DEVICE)
#         logging.info(
#             f"Epoch: {epoch}, " +
#             f"Validation Loss: {val_loss:.4f}, " +
#             f"Validation Regression Loss {val_regression_loss:.4f}, " +
#             f"Validation Classification Loss: {val_classification_loss:.4f}"
#         )
#         model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{val_loss}.pth")
#         net.save(model_path)
#         logging.info(f"Saved model {model_path}")

# logging.info("Task done, exiting program.")


In [None]:
import cv2
import pandas as pd
from tqdm import tqdm

In [None]:
for i in tqdm(os.listdir("data/person_dog/train")):
    img = cv2.imread("data/person_dog/train/"+i)
    if len(img.shape)!=3:
        print(i)

In [None]:
img = cv2.imread("data/person_dog/train/000000235832.jpg")

In [None]:
!rm data/person_dog/train/000000235832.jpg

In [None]:
img

In [None]:
ls data/person_dog/

In [None]:
df = pd.read_csv('data/person_dog/sub-train-annotations-bbox.csv')

In [None]:
df[df["ImageID"]!= "000000235832"].to_csv('data/person_dog/sub-train-annotations-bbox.csv', index=False)

In [1]:
!nvidia-smi

Sun Jul 17 06:13:23 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.129.06   Driver Version: 470.129.06   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0 Off |                  N/A |
| 35%   67C    P2   310W / 350W |  17065MiB / 24268MiB |     78%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------