In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
import IPython

from PIL import Image, ImageDraw
from random import randrange
    
from torchvision.ops import nms


In [None]:
def draw_cell_boundaries(image, cells=7):
    overlay = Image.new('RGBA', image.size, (0, 0, 0, 0))
    draw = ImageDraw.Draw(image)
    overlay_draw = ImageDraw.Draw(overlay)
    image_width, image_height = image.size
    
    fill_color = (255, 255, 128, 200)
    
    unit = image_width / cells
    for idx in range(1, cells):
        vertical_line_x = unit * idx
        overlay_draw.line([(vertical_line_x, 0), (vertical_line_x, image_height)], fill=fill_color)

    unit = image_height / cells
    for idx in range(1, cells):
        horizontal_line_y = unit * idx
        overlay_draw.line([(0, horizontal_line_y), (image_width, horizontal_line_y)], fill=fill_color)
        
    return Image.alpha_composite(image.convert("RGBA"), overlay).convert("RGB")
        
def draw_center_cell_object(image, annotator, annotation, cells=7):
    image_width, image_height = image.size
    
    fill_color = (255, 0, 0, 255)
    
    for item in annotation:
        (class_id, cell_idx_x, cell_idx_y, cell_pos_x, cell_pos_y, width, height) = item
        overlay = Image.new('RGBA', image.size, (0, 0, 0, 0))
        overlay_draw = ImageDraw.Draw(overlay)

        horizontal_unit = image_width / cells
        vertical_unit = image_height / cells

        class_name = annotator.labels[class_id]

        # draw center cell as red color
        cxmin = horizontal_unit * cell_idx_x
        cxmax = horizontal_unit * (cell_idx_x + 1)
        cymin = vertical_unit * cell_idx_y
        cymax = vertical_unit * (cell_idx_y + 1)
        
        draw = ImageDraw.Draw(image)
        draw.line([(cxmin, cymin), (cxmax, cymin)], fill=fill_color)
        draw.line([(cxmax, cymin), (cxmax, cymax)], fill=fill_color)
        draw.line([(cxmax, cymax), (cxmin, cymax)], fill=fill_color)
        draw.line([(cxmin, cymax), (cxmin, cymin)], fill=fill_color)
        
        cell_obj_center_x = int(cxmin + (cell_pos_x * image_width))
        cell_obj_center_y = int(cymin + (cell_pos_y * image_height))
        
        oxmin, oxmax = int(cell_obj_center_x + (width * image_width / 2)), int(cell_obj_center_x - (width * image_width / 2))
        oymin, oymax = int(cell_obj_center_y + (height * image_height / 2)), int(cell_obj_center_y - (height * image_height / 2))
        
        draw.ellipse([(cell_obj_center_x - 3, cell_obj_center_y - 3), (cell_obj_center_x + 3, cell_obj_center_y + 3)], fill=(255, 0, 0), width=6)
        draw.text((cell_obj_center_x + 8, cell_obj_center_y - 6), "CLSID: %s" % class_name, fill=fill_color)
        
        random_color_r, random_color_g, random_color_b = randrange(255), randrange(255), randrange(255)
        overlay_color = (random_color_r, random_color_g, random_color_b, 90)
        overlay_draw.rectangle([oxmin, oymin, oxmax, oymax], fill=overlay_color)  # draw object in random color
        
        image = Image.alpha_composite(image.convert("RGBA"), overlay).convert("RGB")
        
    return image

# TODO: redesign based on 2 predictor output
# ignore predictor with lower bbox confidence

# model output version of draw_center_cell_object
def draw_center_cell_object_output(image, annotator, output, confidence_threshold=0.3):
    image_width, image_height = image.size
    
    fill_color = (255, 0, 0, 255)
    
    # output -> [30, 7, 7]
    cells = output.shape[1] # 1, 2 indicates cell count
    bboxes = (output.shape[0] - 20) // 5
    assert((output.shape[0] - 20) % 5 == 0) 
    
    # Organize bboxes for NMS algorithm
    bbox_coordinates = []
    bbox_scores = []
    bbox_classes = []
    for cell_idx_y in range(cells):
        for cell_idx_x in range(cells):
            for bbox_idx in range(bboxes):
                current_predictor = output[5 * (bbox_idx):5 * (bbox_idx + 1), cell_idx_y, cell_idx_x]
                (cell_pos_x, cell_pos_y, width, height, confidence) = torch.sigmoid(torch.from_numpy(current_predictor[:5])).numpy()
                class_id = np.argmax(output[5 * bboxes:, cell_idx_y, cell_idx_x])
                
                if confidence < confidence_threshold:
                    continue

                horizontal_unit = image_width / cells
                vertical_unit = image_height / cells
                
                class_name = annotator.labels[class_id]
                obj_center_x = (cell_idx_x + cell_pos_x) / cells
                obj_center_y = (cell_idx_y + cell_pos_y) / cells
                oxmin, oxmax = obj_center_x - (width / 2), obj_center_x + (width / 2)
                oymin, oymax = obj_center_y - (height / 2), obj_center_y + (height / 2)
                
                bbox_coordinates.append([oxmin, oymin, oxmax, oymax])
                bbox_scores.append(confidence)
                bbox_classes.append(class_id)
                
#     print("bbox_coordinates", bbox_coordinates)
#     print("bbox_scores", bbox_scores)
#     print("bbox_classes", bbox_classes)
                
    if len(bbox_coordinates) > 0:
        print("[Before NMS] BBoxes:", len(bbox_coordinates))

        bbox_coordinates = torch.from_numpy(np.array(bbox_coordinates)).float()
        bbox_scores = torch.from_numpy(np.array(bbox_scores)).float()
        bbox_classes = torch.from_numpy(np.array(bbox_classes)).float()

        coordinates_indicies = nms(boxes=bbox_coordinates, scores=bbox_scores, iou_threshold=0.2)
        
#         bbox_filtered_coordinates = torch.gather(bbox_coordinates, 1, coordinates_indicies)
#         bbox_filtered_scores = torch.gather(bbox_scores, 0, coordinates_indicies)
#         bbox_filtered_classes = torch.gather(bbox_classes, 0, coordinates_indicies)

        bbox_filtered_coordinates = bbox_coordinates.index_select(0, coordinates_indicies)
        bbox_filtered_scores = bbox_scores.index_select(0, coordinates_indicies)
        bbox_filtered_classes = bbox_classes.index_select(0, coordinates_indicies).int()
        
        print("[After NMS] BBoxes:", len(bbox_filtered_coordinates))

        for idx in range(bbox_filtered_coordinates.shape[0]):
            (xmin, ymin, xmax, ymax) = bbox_filtered_coordinates[idx].numpy()
            confidence = bbox_filtered_scores[idx].numpy()
            class_id = bbox_filtered_classes[idx].numpy()
            
            oxmin, oxmax = max(xmin * image_width, 0), min(xmax * image_width, image_width - 1)
            oymin, oymax = max(ymin * image_height, 0), min(ymax * image_height, image_height - 1)
            
            cell_obj_center_x, cell_obj_center_y = int(oxmin + (oxmax - oxmin) / 2), int(oymin + (oymax - oymin) / 2)

            class_name = annotator.labels[class_id]

            overlay = Image.new('RGBA', image.size, (0, 0, 0, 0))
            overlay_draw = ImageDraw.Draw(overlay)
            draw = ImageDraw.Draw(image)
            
            draw.ellipse([(cell_obj_center_x - 3, cell_obj_center_y - 3), (cell_obj_center_x + 3, cell_obj_center_y + 3)], fill=(255, 0, 0), width=6)
            draw.text((cell_obj_center_x + 8, cell_obj_center_y - 6), "CLSID: %s" % class_name, fill=fill_color)

            random_color_r, random_color_g, random_color_b = randrange(255), randrange(255), randrange(255)
            overlay_color = (random_color_r, random_color_g, random_color_b, int(confidence * 80))
            overlay_draw.rectangle([oxmin, oymin, oxmax, oymax], fill=overlay_color)  # draw object in random color

            image = Image.alpha_composite(image.convert("RGBA"), overlay).convert("RGB")
            
    return image


In [None]:
# Test code

# samples = 4

# for _ in range(samples):
#     random_sample = randrange(len(annotations))
#     filepath, annotation = annotations[random_sample]

#     image = Image.open(filepath)
#     image = draw_cell_boundaries(image, cells=7)
#     image = draw_center_cell_object(image, annotator, annotation, cells=7)
    
#     plt.figure(figsize=(15, 15))
#     plt.imshow(np.asarray(image))

# Prepare dataset and training
- Create random 5-image batch and pass it through model.
- We will evaluate loss via result image and intermediate output console

In [None]:
%matplotlib inline

from maintrainer.dataset import VOCYOLOAnnotator, VOCYolo

annotator = VOCYOLOAnnotator(
    annotation_root=r'C:\Dataset\VOCdevkit\VOC2008\Annotations',
    image_root=r'C:\Dataset\VOCdevkit\VOC2008\JPEGImages'
)

annotations = annotator.parse_annotation()
print("Annotation[0]:", annotations[0][0])
print("Annotation[1]:", annotations[0][1])

In [None]:
import torchvision.transforms
from torchvision.transforms import *

train_dataset = VOCYolo(
    annotator.labels,
    annotations,
    transform=transforms.Compose([
        transforms.Resize((448, 448)),
        transforms.ToTensor(),
#         transforms.Normalize(
#             mean=[0.4547857, 0.4349471, 0.40525291],
#             std=[0.12003352, 0.12323549, 0.1392444]
#         )
    ])
)

train_dataloader = torch.utils.data.DataLoader(train_dataset, shuffle=False, batch_size=1, num_workers=0, pin_memory=True)

bunch_of_batch = []
for i, item in enumerate(train_dataloader):
    if i >= 4:
        break
    bunch_of_batch.append(item)
    
bunch_of_batch = [(image.cuda(non_blocking=True), label.cuda(non_blocking=True)) for (image, label) in bunch_of_batch]

In [None]:
fig = plt.figure(figsize=(32, 8))
fig.suptitle("GT Annotation based visualization", fontsize=24)
for i, (image, label) in enumerate(bunch_of_batch):
    ax = fig.add_subplot(1, 4, i + 1)
    image = torch.squeeze(image.cpu())
    label = torch.squeeze(label.cpu())
    
    image = (image.numpy().transpose((1, 2, 0)) * 255).astype(np.uint8)
    label = label.numpy()
    
    image = Image.fromarray(image)
    image = draw_cell_boundaries(image)
    image = draw_center_cell_object(image, annotator, annotations[i][1])
    ax.imshow(np.array(image))

In [None]:
def do_visualization(title = None, confidence_threshold = 0.8):
    fig = plt.figure(figsize=(40, 10))
    fig.suptitle("Output based visualization" if title is None else title, fontsize=24)
    
    model.eval()
    
    batched_image = torch.cat([image.cuda(non_blocking=True) for image, label in bunch_of_batch], 0)
    batched_label = torch.cat([label for image, label in bunch_of_batch], 0)
    
    output = model(batched_image).detach().cpu()
    output_images = []
    for i in range(output.shape[0]):
        output_images.append(output[i:i+1, :, :, :])
    
    for i, output in enumerate(output_images):
        ax = fig.add_subplot(1, 4, i + 1)

        image = torch.squeeze(batched_image[i].cpu())
        output = torch.squeeze(output)

        image = (image.numpy().transpose((1, 2, 0)) * 255).astype(np.uint8)
        output = output.numpy()

        image = Image.fromarray(image)
        image = draw_cell_boundaries(image)
        image = draw_center_cell_object_output(image, annotator, output, confidence_threshold=confidence_threshold)
        ax.imshow(np.array(image))
    model.train()

    plt.show()

In [None]:
from maintrainer.loss import YoloLoss, YoloLossOld
from model import YOLOv1, YOLOv1Pretrainer
import random
import os

PRETRAINED_WEIGHT = r'C:\Workspace\study-projects\yolov1-torch\prepare-torch\.pretrained\2021-06-16-10-44-40_FullDataset-LRDecay-Continued-From-22epoch-LowerLR_LR0.000250_BS064_WORKERS16_EPOCHS200_GPU-epoch0043-train_loss1.512860-val_loss1.674705-val_acc0.616029-val_acct50.832321.zip'

if not os.path.isfile(PRETRAINED_WEIGHT):
    print("Pretrained weight file %s not found!" % args.pretrained)
    exit(-1)

checkpoint = torch.load(PRETRAINED_WEIGHT)
# c_epoch = checkpoint['epoch'] + 1
c_model_state_dict = checkpoint['model_state_dict']
# c_optimizer_state_dict = checkpoint['optimizer_state_dict']
# c_loss = checkpoint['loss']

pretrainer = YOLOv1Pretrainer(classes=1000)
pretrainer.load_state_dict(c_model_state_dict)
model = YOLOv1(pretrainer).cuda().float()
del pretrainer

In [None]:
# Testing loss with given pseudo output!
criterion = YoloLoss(lambda_coord=5, lambda_noobj=0.5, debug=True)
optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)

# batched_image = torch.cat([image for image, label in bunch_of_batch], 0)
# batched_label = torch.cat([label for image, label in bunch_of_batch], 0)

losses = []
for epoch in range(1000):
    random.shuffle(bunch_of_batch)
    
    if epoch % 5 == 0:
        for g in optimizer.param_groups:
            g['lr'] *= 0.95
        do_visualization(title="Epoch %d, LR=%.8f" % (epoch, optimizer.param_groups[0]['lr']))
            
#     if epoch % 50 == 0:
#         do_visualization(title="Epoch %d, LR=%.8f" % (epoch, optimizer.param_groups[0]['lr'])
        
    for image, label in bunch_of_batch:
        output = model(image)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad(set_to_none=True)
        
        with torch.no_grad():
            losses.append(loss.item())

#     output = model(batched_image)
#     loss = criterion(output, batched_label)
#     loss.backward()
#     optimizer.step()
#     optimizer.zero_grad(set_to_none=True)

#     with torch.no_grad():
#         losses.append(loss.item())
    print("Epoch %d Loss: %.6f" % (epoch, np.average(losses)))

In [None]:
mask = torch.from_numpy(np.array([[[[False, False, True, False, False]]]]))
values = torch.from_numpy(np.array([[[[1.0, 0.8, 0.2, 0.6, 0.4]]]]))

keep = values * mask
print(keep)

In [None]:
# Training within real-dataset!
criterion = YoloLoss(lambda_coord=5, lambda_noobj=0.5, debug=False)
optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)

losses = []
for epoch in range(1000):
    # random.shuffle(bunch_of_batch)
    
#     if epoch == 15:
#         do_visualization(title="Epoch %d" % epoch)
#         for g in optimizer.param_groups:
#             g['lr'] = 0.00005
#     elif epoch == 30:
#         do_visualization(title="Epoch %d" % epoch)
#         for g in optimizer.param_groups:
#             g['lr'] = 0.00002
#     elif epoch > 30 and epoch % 15 == 0:
#         do_visualization(title="Epoch %d" % epoch)
#         for g in optimizer.param_groups:
#             g['lr'] *= 0.9

    if epoch % 15 == 0:
        do_visualization(title="Epoch %d" % epoch)
        
    for image, label in bunch_of_batch:
        output = model(image)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad(set_to_none=True)
        
        with torch.no_grad():
            losses.append(loss.item())
            
    print("Epoch %d Loss: %.6f" % (epoch, np.average(losses)))