In [1]:
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import importlib
import pandas as pd
import random
import pickle
import time
import matplotlib.pyplot as plt
import sys

import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data

sys.path.append('./objectDetectionModule/')
from objectDetectionModule.model import SSD300, MultiBoxLoss
from datasets import PascalVOCDataset
from utils import *

import functions as fc

# train model and hyperparameter define

In [None]:
label_map = {'aquarium':1
         ,'bottle':2
         ,'bowl':3
         ,'box':4
         ,'bucket':5
         ,'plastic_bag':6
         ,'plate':7
         ,'styrofoam':8
         ,'tire':9
         ,'toilet':10
         ,'tub':11
         ,'washing_machine':12
         ,'water_tower':13
         ,'background':0}

# Data parameters
data_folder = './dataSet/train_cdc/train_images/'  # folder with data files
keep_difficult = True  # use objects considered difficult to detect?

# Model parameters
# Not too many here since the SSD300 has a very specific structure
n_classes = len(label_map)  # number of different types of objects
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Learning parameters
checkpoint = None  # path to model checkpoint, None if none
batch_size = 8  # batch size
iterations = 120000  # number of iterations to train
workers = 4  # number of workers for loading data in the DataLoader
print_freq = 200  # print training status every __ batches
lr = 1e-3  # learning rate
decay_lr_at = [80000, 100000]  # decay learning rate after these many iterations
decay_lr_to = 0.1  # decay learning rate to this fraction of the existing learning rate
momentum = 0.9  # momentum
weight_decay = 5e-4  # weight decay
grad_clip = None  # clip if gradients are exploding, which may happen at larger batch sizes (sometimes at 32) - you will recognize it by a sorting error in the MuliBox loss calculation

cudnn.benchmark = True

In [None]:
def train(train_loader, model, criterion, optimizer, epoch):
    """
    One epoch's training.
    :param train_loader: DataLoader for training data
    :param model: model
    :param criterion: MultiBox loss
    :param optimizer: optimizer
    :param epoch: epoch number
    """
    model.train()  # training mode enables dropout

    batch_time = AverageMeter()  # forward prop. + back prop. time
    data_time = AverageMeter()  # data loading time
    losses = AverageMeter()  # loss

    start = time.time()
    lossList = []
    # Batches
    for i, (images, boxes, labels, _) in enumerate(train_loader):
        data_time.update(time.time() - start)

        # Move to default device
        images = images.to(device)  # (batch_size (N), 3, 300, 300)
        boxes = [b.to(device) for b in boxes]
        labels = [l.to(device) for l in labels]

        # Forward prop.
        predicted_locs, predicted_scores = model(images)  # (N, 8732, 4), (N, 8732, n_classes)

        # Loss
        loss = criterion(predicted_locs, predicted_scores, boxes, labels)  # scalar
        lossList.append(loss.item())
        # Backward prop.
        optimizer.zero_grad()
        loss.backward()

        # Clip gradients, if necessary
        if grad_clip is not None:
            clip_gradient(optimizer, grad_clip)

        # Update model
        optimizer.step()

        losses.update(loss.item(), images.size(0))
        batch_time.update(time.time() - start)

        start = time.time()

        # Print status
        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data Time {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(epoch, i, len(train_loader),
                                                                  batch_time=batch_time,
                                                                  data_time=data_time, loss=losses))
    del predicted_locs, predicted_scores, images, boxes, labels  # free some memory since their histories may be stored
    return sum(lossList) / len(lossList)

In [None]:
def main():
    """
    Training.
    """
    global start_epoch, label_map, epoch, checkpoint, decay_lr_at

    # Initialize model or load checkpoint
    if checkpoint is None:
        start_epoch = 0
        model = SSD300(n_classes=n_classes)
        # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo
        biases = list()
        not_biases = list()
        for param_name, param in model.named_parameters():
            if param.requires_grad:
                if param_name.endswith('.bias'):
                    biases.append(param)
                else:
                    not_biases.append(param)
        optimizer = torch.optim.SGD(params=[{'params': biases, 'lr': 2 * lr}, {'params': not_biases}],
                                    lr=lr, momentum=momentum, weight_decay=weight_decay)

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        print('\nLoaded checkpoint from epoch %d.\n' % start_epoch)
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']

    # Move to default device
    model = model.to(device)
    criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy).to(device)

    # Custom dataloaders
    train_dataset = PascalVOCDataset(data_folder,
                                     split='train',
                                     keep_difficult=keep_difficult)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                                               collate_fn=train_dataset.collate_fn, num_workers=workers,
                                               pin_memory=True)  # note that we're passing the collate function here

    # Calculate total number of epochs to train and the epochs to decay learning rate at (i.e. convert iterations to epochs)
    # To convert iterations to epochs, divide iterations by the number of iterations per epoch
    # The paper trains for 120,000 iterations with a batch size of 32, decays after 80,000 and 100,000 iterations
#     epochs = iterations // (len(train_dataset) // 32)
    epochs = 500
    decay_lr_at = [it // (len(train_dataset) // 32) for it in decay_lr_at]
    lossList = []
    # Epochs
    for epoch in range(start_epoch, epochs):

        # Decay learning rate at particular epochs
        if epoch in decay_lr_at:
            adjust_learning_rate(optimizer, decay_lr_to)

        # One epoch's training
        lossx = train(train_loader=train_loader,
                  model=model,
                  criterion=criterion,
                  optimizer=optimizer,
                  epoch=epoch)

        # Save checkpoint
        save_checkpoint(epoch, model, optimizer)
        lossList.append(lossx)
        with open('./models/accu.pickle', 'wb') as f:
            pickle.dump(lossList, f)
        torch.save(model.state_dict(), './models/model.pt')


In [None]:
main()

# prediction

In [None]:
label_color_map = {
    'aquarium': '#e6194b',
     'bottle': '#3cb44b',
     'bowl': '#ffe119',
     'box': '#0082c8',
     'bucket': '#f58231',
     'plastic_bag': '#911eb4',
     'plate': '#46f0f0',
     'styrofoam': '#f032e6',
     'tire': '#d2f53c',
     'toilet': '#fabebe',
     'tub': '#008080',
     'washing_machine': '#000080',
     'water_tower': '#aa6e28',
     'background': '#FFFFFF'
    }

rev_label_map = {1:'aquarium'
                ,2:'bottle'
                ,3:'bowl'
                ,4:'box'
                ,5:'bucket'
                ,6:'plastic_bag'
                ,7:'plate'
                ,8:'styrofoam'
                ,9:'tire'
                ,10:'toilet'
                ,11:'tub'
                ,12:'washing_machine'
                ,13:'water_tower'
                ,0:'background'}

label_map = {'aquarium':1
         ,'bottle':2
         ,'bowl':3
         ,'box':4
         ,'bucket':5
         ,'plastic_bag':6
         ,'plate':7
         ,'styrofoam':8
         ,'tire':9
         ,'toilet':10
         ,'tub':11
         ,'washing_machine':12
         ,'water_tower':13
         ,'background':0}

In [None]:
# test data load
path = './dataSet/test_cdc/test_images/'
with open('./dataSet/test_cdc/datas.pickle', 'rb') as f:
    datas = pickle.load(f)

testDatas = []
for i, j in enumerate(datas):
    testDatas.append(path + datas[i][0])

In [None]:
# model parameters load
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model checkpoint
checkpoint = './models/checkpoint_ssd300.pth.tar'
checkpoint = torch.load(checkpoint)
start_epoch = checkpoint['epoch'] + 1
print('\nLoaded checkpoint from epoch %d.\n' % start_epoch)
model = checkpoint['model']
model = model.to(device)
model.eval()

# Transforms
resize = transforms.Resize((300, 300))
to_tensor = transforms.ToTensor()
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

In [None]:
def detect(original_image, min_score, max_overlap, top_k, suppress=None):
    """
    Detect objects in an image with a trained SSD300, and visualize the results.
    :param original_image: image, a PIL Image
    :param min_score: minimum threshold for a detected box to be considered a match for a certain class
    :param max_overlap: maximum overlap two boxes can have so that the one with the lower score is not suppressed via Non-Maximum Suppression (NMS)
    :param top_k: if there are a lot of resulting detection across all classes, keep only the top 'k'
    :param suppress: classes that you know for sure cannot be in the image or you do not want in the image, a list
    :return: annotated image, a PIL Image
    """

    # Transform
    image = normalize(to_tensor(resize(original_image)))

    # Move to default device
    image = image.to(device)

    # Forward prop.
    predicted_locs, predicted_scores = model(image.unsqueeze(0))

    # Detect objects in SSD output
    det_boxes, det_labels, det_scores = model.detect_objects(predicted_locs, predicted_scores, min_score=min_score,
                                                             max_overlap=max_overlap, top_k=top_k)

    # Move detections to the CPU
    det_boxes = det_boxes[0].to('cpu')

    # Transform to original image dimensions
    original_dims = torch.FloatTensor(
        [original_image.width, original_image.height, original_image.width, original_image.height]).unsqueeze(0)
    det_boxes = det_boxes * original_dims

    # Decode class integer labels
    det_labels = [rev_label_map[l] for l in det_labels[0].to('cpu').tolist()]

    # If no objects found, the detected labels will be set to ['0.'], i.e. ['background'] in SSD300.detect_objects() in model.py
    if det_labels == ['background']:
        # Just return original image
        return 0

    # Annotate
    annotated_image = original_image
    draw = ImageDraw.Draw(annotated_image)
#     font = ImageFont.truetype("./calibril.ttf", 15)

    # Suppress specific classes, if needed
#     for i in range(det_boxes.size(0)):
#         if suppress is not None:
#             if det_labels[i] in suppress:
#                 continue

#         # Boxes
#         box_location = det_boxes[i].tolist()
#         draw.rectangle(xy=box_location, outline=label_color_map[det_labels[i]])
#         draw.rectangle(xy=[l + 1. for l in box_location], outline=label_color_map[
#             det_labels[i]])  # a second rectangle at an offset of 1 pixel to increase line thickness
        # draw.rectangle(xy=[l + 2. for l in box_location], outline=label_color_map[
        #     det_labels[i]])  # a third rectangle at an offset of 1 pixel to increase line thickness
        # draw.rectangle(xy=[l + 3. for l in box_location], outline=label_color_map[
        #     det_labels[i]])  # a fourth rectangle at an offset of 1 pixel to increase line thickness

        # Text
#         text_size = font.getsize(det_labels[i].upper())
#         text_location = [box_location[0] + 2., box_location[1] - text_size[1]]
#         textbox_location = [box_location[0], box_location[1] - text_size[1], box_location[0] + text_size[0] + 4.,
#                             box_location[1]]
#         draw.rectangle(xy=textbox_location, fill=label_color_map[det_labels[i]])
#         draw.text(xy=text_location, text=det_labels[i].upper(), fill='white',
#                   font=font)
    del draw
    
    return -1, det_boxes, det_labels
#     return annotated_image, det_boxes, det_labels

In [None]:
finalList = []
for img_path in testDatas:
    original_image = Image.open(img_path, mode='r')
    original_image = original_image.convert('RGB')
    outputs = detect(original_image, min_score=0.2, max_overlap=0.5, top_k=200)
    if outputs == 0:
        continue
    name = img_path.split('/')[-1]
    
    labelx = []
    for i in outputs[2]:
        labelx.append(label_map[i])
    
    loc = outputs[1].cpu().detach().numpy()
    loc = loc.astype(int)
    loc[:, 2] = loc[:, 2] - loc[:, 0]
    loc[:, 3] = loc[:, 3] - loc[:, 1]
    
    cc = [name, labelx, loc]
    finalList.append(cc)

finalList2 = []
for i in finalList:
    ln = len(i[1])
    l = np.array(i[1]).reshape(-1, 1)   # labelx
    l2 = i[2]          # loc
    l3 = [i[0]] * ln
    l3 = np.array(l3).reshape(-1, 1)  # name
    l4 = np.array(l3).reshape(-1, 1)
    l5 = np.concatenate([l3, l, l2], axis=1)
    finalList2.append(l5)
    
final3 = np.concatenate(finalList2, axis=0)
n = final3.shape[0]
ff = (np.zeros(n) + 0.99).reshape(-1, 1)
final4 = np.concatenate([final3, ff], axis=1)

In [None]:
dataFrame = pd.DataFrame(final4, columns=['image_filename', 'label_id', 'x', 'y', 'w', ' h', 'confidence'])
dataFrame.to_csv('./outputs/output1.csv', index=False)