<a href="https://colab.research.google.com/github/shivam-39/Position-and-Rotation-Invariant-Sign-Language-Recognition/blob/master/DOTA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
## dota process

import shapely.geometry as geometry
import numpy as np
import math

classnames = ['plane', 'baseball-diamond', 'bridge', 'ground-track-field', 'small-vehicle',
              'large-vehicle', 'ship', 'tennis-court', 'basketball-court', 'storage-tank',
              'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter']


########################################################
#    parse the dota ground truth in the format:
#    [x1, y1, x2, y2, x3, y3, x4, y4]
########################################################
def read_dota_gt(filename):

    objects = parse_dota_box(filename)
    for obj in objects:
        obj['box'] = vertex_rect(obj['box'])
        obj['box'] = list(map(int, obj['box']))
    return objects


########################################################
# parse the dota ground truth in the format:
# [(x1, y1), (x2, y2), (x3, y3), (x4, y4)]
########################################################
def parse_dota_box(filename):
    objects = []
    fd = open(filename, 'r')
    txt_lines = fd.readlines()
    fd.close()
    for idx in range(txt_lines.__len__()):
        line = txt_lines[idx]
        if line.strip('\n') != '':
            splitlines = line.strip().split(' ')
            object_struct = {}
            if len(splitlines) < 9:
                continue
            if len(splitlines) >= 9:
                    object_struct['name'] = splitlines[8]
            if len(splitlines) == 9:
                object_struct['difficult'] = '0'
            elif len(splitlines) >= 10:
                object_struct['difficult'] = splitlines[9]
            object_struct['box'] = [(float(splitlines[0]), float(splitlines[1])),
                                    (float(splitlines[2]), float(splitlines[3])),
                                    (float(splitlines[4]), float(splitlines[5])),
                                    (float(splitlines[6]), float(splitlines[7]))]
            gtbox = geometry.Polygon(object_struct['box'])
            object_struct['area'] = gtbox.area
            objects.append(object_struct)

    return objects


def vertex_rect(box):
    outbox = [box[0][0], box[0][1], box[1][0], box[1][1], box[2][0], box[2][1], box[3][0], box[3][1]]
    return outbox


def calc_half_iou(box1, box2):
    inter_box = box1.intersection(box2)
    inter_area = inter_box.area
    box1_area = box1.area
    half_iou = inter_area / box1_area
    return inter_box, half_iou


def boxorig2sub(left, up, box):
    boxInsub = np.zeros(len(box))
    for i in range(int(len(box)/2)):
        boxInsub[i * 2] = int(box[i * 2] - left)
        boxInsub[i * 2 + 1] = int(box[i * 2 + 1] - up)
    return boxInsub


def box5_box4(box):
    distances = [calc_line_length((box[i * 2], box[i * 2 + 1]), (box[(i + 1) * 2], box[(i + 1) * 2 + 1])) for i in range(int(len(box)/2 - 1))]
    distances.append(calc_line_length((box[0], box[1]), (box[8], box[9])))
    pos = np.array(distances).argsort()[0]
    count = 0
    outbox = []
    while count < 5:
        if count == pos:
            outbox.append((box[count * 2] + box[(count * 2 + 2) % 10])/2)
            outbox.append((box[(count * 2 + 1) % 10] + box[(count * 2 + 3) % 10])/2)
            count = count + 1
        elif count == (pos + 1) % 5:
            count = count + 1
            continue
        else:
            outbox.append(box[count * 2])
            outbox.append(box[count * 2 + 1])
            count = count + 1
    return outbox


def calc_line_length(point1, point2):
    return math.sqrt(math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2))


def choose_best_point_order_fit_another(box1, box2):
    x1 = box1[0]
    y1 = box1[1]
    x2 = box1[2]
    y2 = box1[3]
    x3 = box1[4]
    y3 = box1[5]
    x4 = box1[6]
    y4 = box1[7]
    combinate = [np.array([x1, y1, x2, y2, x3, y3, x4, y4]), np.array([x2, y2, x3, y3, x4, y4, x1, y1]),
                 np.array([x3, y3, x4, y4, x1, y1, x2, y2]), np.array([x4, y4, x1, y1, x2, y2, x3, y3])]
    dst_coordinate = np.array(box2)
    distances = np.array([np.sum((coord - dst_coordinate)**2) for coord in combinate])
    sorted = distances.argsort()
    return combinate[sorted[0]]

In [4]:
###   utils

import os
import time
import math
import torch
import numpy as np
from PIL import Image, ImageDraw, ImageFont
####################################################
##  //  import dota_process
import shapely.geometry as geometry
####################################################


def bbox_iou(box1, box2, x1y1x2y2=True):
    if x1y1x2y2:
        min_x = min(box1[0], box2[0])
        max_x = max(box1[2], box2[2])
        min_y = min(box1[1], box2[1])
        max_y = max(box1[3], box2[3])
        box1_w = box1[2] - box1[0]
        box1_h = box1[3] - box1[1]
        box2_w = box2[2] - box2[0]
        box2_h = box2[3] - box2[1]
    else:
        min_x = min(box1[0]-box1[2]/2.0, box2[0]-box2[2]/2.0)
        max_x = max(box1[0]+box1[2]/2.0, box2[0]+box2[2]/2.0)
        min_y = min(box1[1]-box1[3]/2.0, box2[1]-box2[3]/2.0)
        max_y = max(box1[1]+box1[3]/2.0, box2[1]+box2[3]/2.0)
        box1_w = box1[2]
        box1_h = box1[3]
        box2_w = box2[2]
        box2_h = box2[3]
    union_w = max_x - min_x
    union_h = max_y - min_y
    inter_w = box1_w + box2_w - union_w
    inter_h = box1_h + box2_h - union_h
    if inter_w <= 0 or inter_h <= 0:
        return 0.0
    box1_area = box1_w * box1_h
    box2_area = box2_w * box2_h
    inter_area = inter_w * inter_h
    union_area = box1_area + box2_area - inter_area
    return inter_area/union_area


def bbox_ious(boxes1, boxes2, x1y1x2y2=True):
    if x1y1x2y2:
        min_x = torch.min(boxes1[0], boxes2[0])
        max_x = torch.max(boxes1[2], boxes2[2])
        min_y = torch.min(boxes1[1], boxes2[1])
        max_y = torch.max(boxes1[3], boxes2[3])
        box1_w = boxes1[2] - boxes1[0]
        box1_h = boxes1[3] - boxes1[1]
        box2_w = boxes2[2] - boxes2[0]
        box2_h = boxes2[3] - boxes2[1]
    else:
        min_x = torch.min(boxes1[0]-boxes1[2]/2.0, boxes2[0]-boxes2[2]/2.0)
        max_x = torch.max(boxes1[0]+boxes1[2]/2.0, boxes2[0]+boxes2[2]/2.0)
        min_y = torch.min(boxes1[1]-boxes1[3]/2.0, boxes2[1]-boxes2[3]/2.0)
        max_y = torch.max(boxes1[1]+boxes1[3]/2.0, boxes2[1]+boxes2[3]/2.0)
        box1_w = boxes1[2]
        box1_h = boxes1[3]
        box2_w = boxes2[2]
        box2_h = boxes2[3]
    union_w = max_x - min_x
    union_h = max_y - min_y
    inter_w = box1_w + box2_w - union_w
    inter_h = box1_h + box2_h - union_h
    mask = ((inter_w <= 0) + (inter_h <= 0) > 0)
    box1_area = box1_w * box1_h
    box2_area = box2_w * box2_h
    inter_area = inter_w * inter_h
    inter_area[mask] = 0
    union_area = box1_area + box2_area - inter_area
    return inter_area/union_area


# def nms_cls(boxes, nms_thresh):
#     if boxes.__len__() == 0:
#         return boxes
#     det_confs = torch.zeros(boxes.__len__())
#     for i in range(boxes.__len__()):
#         det_confs[i] = boxes[i][4]
#     _,sortIds = torch.sort(det_confs, descending=True)
#     out_boxes = []
#     for i in range(len(boxes)):
#         box_i = boxes[sortIds[i]]
#         if box_i[4] > 0:
#             out_boxes.append(box_i)
#             for j in range(i+1, len(boxes)):
#                 box_j = boxes[sortIds[j]]
#                 if (box_j[4] != 0) & (box_i[6] == box_j[6]):
#                     if bbox_iou(box_i, box_j, x1y1x2y2=False) > nms_thresh:
#                         box_j[4] = 0
#     return out_boxes

def nms(boxes, nms_thresh):
    if boxes.__len__() == 0:
        return boxes

    det_confs = torch.zeros(boxes.__len__())
    for i in range(boxes.__len__()):
        det_confs[i] = boxes[i][4]

    _, sortIds = torch.sort(det_confs, descending=True)
    out_boxes = []
    for i in range(len(boxes)):
        box_i = boxes[sortIds[i]]
        if box_i[4] > 0:
            out_boxes.append(box_i)
            for j in range(i+1, len(boxes)):
                box_j = boxes[sortIds[j]]
                if box_j[4] != 0:
                    if bbox_iou(box_i, box_j, x1y1x2y2=False) > nms_thresh:
                        box_j[4] = 0
    return out_boxes


def convert2cpu(gpu_matrix):
    return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)


def plot_boxes(img, boxes, savename=None, class_names=None):

    width = img.width
    height = img.height
    draw = ImageDraw.Draw(img)
    fnt = ImageFont.truetype('times.ttf', 20)
    for i in range(len(boxes)):
        box = boxes[i]
        x1 = (box[0] - box[2] / 2.0) * width
        y1 = (box[1] - box[3] / 2.0) * height
        x2 = (box[0] + box[2] / 2.0) * width
        y2 = (box[1] + box[3] / 2.0) * height

        rgb = (255, 0, 0)
        if len(box) >= 7 and class_names:
            cls_conf = box[5]
            cls_id = box[6]
            print('%s: %f' % (class_names[cls_id], cls_conf))
            classes = len(class_names)
            offset = cls_id * 123457 % classes
            red = get_color(2, offset, classes)
            green = get_color(1, offset, classes)
            blue = get_color(0, offset, classes)
            rgb = (red, green, blue)
            draw.text((x1, y1), class_names[cls_id], fill=rgb, font=fnt)
        draw_rect(draw, [x1, y1, x2, y2], rgb, 4)
    if savename:
        print("save plot results to %s" % savename)
        img.save(savename)
    return img


def draw_rect(drawcontext, xy, color=None, width=1):
    offset = 1
    for i in range(0, width):
        drawcontext.rectangle(xy, outline=color)
        xy[0] = xy[0] - offset
        xy[1] = xy[1] + offset
        xy[2] = xy[2] + offset
        xy[3] = xy[3] - offset


def get_color(c, x, max_val):
    colors = torch.FloatTensor([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]]);
    ratio = float(x) / max_val * 5
    i = int(math.floor(ratio))
    j = int(math.ceil(ratio))
    ratio = ratio - i
    r = (1 - ratio) * colors[i][c] + ratio * colors[j][c]
    return int(r * 255)


def truths_length(truths):
    for i in range(800):
        if truths[i][1] == 0:
            return i
    return 800


####################################################
def read_truths_args(lab_path, min_box_scale, shape):
    new_truths = []
    objects = dota_process.read_dota_gt(lab_path)
    for obj in objects:
        gtbox = geometry.Polygon([(obj['box'][0], obj['box'][1]),
                                   (obj['box'][2], obj['box'][3]),
                                   (obj['box'][4], obj['box'][5]),
                                   (obj['box'][6], obj['box'][7])])
        out_box = gtbox.exterior.bounds
        x = abs(out_box[2] + out_box[0]) / (2. * shape[0])
        y = abs(out_box[1] + out_box[3]) / (2. * shape[1])
        w = abs(out_box[2] - out_box[0]) / shape[0]
        h = abs(out_box[3] - out_box[1]) / shape[1]
        if (max(w, h) < min_box_scale) or ((w*h) < (min_box_scale*min_box_scale)):
            continue
        c = dota_process.classnames.index(obj['name'])
        new_truths.append([c, x, y, w, h])
    return np.array(new_truths)
####################################################


def load_class_names(names_file):
    class_names = []
    fp = open(names_file, 'r')
    lines = fp.readlines()
    fp.close()

    for line in lines:
        line = line.strip('\n')
        class_names.append(line)
    return class_names


def read_data_cfg(datacfg):
    options = dict()
    options['gpus'] = '0'
    fp = open(datacfg, 'r')
    lines = fp.readlines()
    fp.close()

    for line in lines:
        line = line.strip('\n')
        if line == '':
            continue
        key, value = line.split('=')
        key = key.strip()
        value = value.strip()
        options[key] = value
    return options


def file_lines(file_path):
    fd = open(file_path, 'r')
    file_lines = fd.readlines()
    fd.close()
    lines = file_lines.__len__()
    for idx in range(lines):
        if file_lines[lines-idx-1].strip('\n') == '':
            lines = lines - 1
        else:
            break
    return lines


def logging(message):
    print('%s %s' % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), message))

In [5]:
########################  config

import torch
##  //  from utils import convert2cpu


def parse_cfg(cfgfile):
    blocks = []
    fp = open(cfgfile, 'r')
    block = None
    line = fp.readline()
    while line != '':
        line = line.rstrip()
        if line == '' or line[0] == '#':
            line = fp.readline()
            continue        
        elif line[0] == '[':
            if block:
                blocks.append(block)
            block = dict()
            block['type'] = line.lstrip('[').rstrip(']')
            if block['type'] == 'convolutional':
                block['batch_normalize'] = 0
        else:
            key, value = line.split('=')
            key = key.strip()
            if key == 'type':
                key = '_type'
            value = value.strip()
            block[key] = value
        line = fp.readline()

    if block:
        blocks.append(block)
    fp.close()
    return blocks


def load_conv(buf, start, conv_model):
    if conv_model.bias is not None:
        num_b = conv_model.bias.numel()
        conv_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]))
        start = start + num_b
    num_w = conv_model.weight.numel()
    conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w]).view(conv_model.weight.shape)); start = start + num_w
    return start


def load_deform_conv(buf, start, conv_model):
    num_w = conv_model.weight.numel()
    conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w]).view(conv_model.weight.shape))
    start = start + num_w

    num_w = conv_model.layer_1.weight.numel()
    conv_model.layer_1.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).view(conv_model.layer_1.weight.shape))
    start = start + num_w
    return start


def save_conv(fp, conv_model):
    if conv_model.weight.is_cuda:
        if conv_model.bias is not None:
            convert2cpu(conv_model.bias.data).numpy().tofile(fp)
        convert2cpu(conv_model.weight.data).numpy().tofile(fp)
    else:
        if conv_model.bias is not None:
            conv_model.bias.data.numpy().tofile(fp)
        conv_model.weight.data.numpy().tofile(fp)


def save_deform_conv(fp, conv_model):
    if conv_model.weight.is_cuda:
        convert2cpu(conv_model.weight.data).numpy().tofile(fp)
        convert2cpu(conv_model.layer_1.weight.data).numpy().tofile(fp)
    else:
        conv_model.weight.data.numpy().tofile(fp)
        conv_model.layer_1.weight.data.numpy().tofile(fp)


def load_conv_bn(buf, start, conv_model, bn_model):
    num_w = conv_model.weight.numel()

    num_b = bn_model.bias.numel()
    bn_model.bias.data.copy_(torch.from_numpy(buf[start:start+num_b]));     start = start + num_b
    bn_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_b]));   start = start + num_b
    bn_model.running_mean.copy_(torch.from_numpy(buf[start:start+num_b]));  start = start + num_b
    bn_model.running_var.copy_(torch.from_numpy(buf[start:start+num_b]));   start = start + num_b
    conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w]).view(conv_model.weight.shape)); start = start + num_w
    if conv_model.bias is not None:
        num_w_b = conv_model.bias.numel()
        conv_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_w_b]).view(conv_model.bias.shape)); start = start + num_w_b
    return start


def load_bn(buf, start, bn_model):
    num_b = bn_model.bias.numel()
    bn_model.bias.data.copy_(torch.from_numpy(buf[start:start+num_b]));     start = start + num_b
    bn_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_b]));   start = start + num_b
    bn_model.running_mean.copy_(torch.from_numpy(buf[start:start+num_b]));  start = start + num_b
    bn_model.running_var.copy_(torch.from_numpy(buf[start:start+num_b]));   start = start + num_b
    return start


def save_bn(fp, bn_model):
    if bn_model.bias.is_cuda:
        convert2cpu(bn_model.bias.data).numpy().tofile(fp)
        convert2cpu(bn_model.weight.data).numpy().tofile(fp)
        convert2cpu(bn_model.running_mean).numpy().tofile(fp)
        convert2cpu(bn_model.running_var).numpy().tofile(fp)
    else:
        bn_model.bias.data.numpy().tofile(fp)
        bn_model.weight.data.numpy().tofile(fp)
        bn_model.running_mean.numpy().tofile(fp)
        bn_model.running_var.numpy().tofile(fp)


def save_conv_bn(fp, conv_model, bn_model):
    if bn_model.bias.is_cuda:
        convert2cpu(bn_model.bias.data).numpy().tofile(fp)
        convert2cpu(bn_model.weight.data).numpy().tofile(fp)
        convert2cpu(bn_model.running_mean).numpy().tofile(fp)
        convert2cpu(bn_model.running_var).numpy().tofile(fp)
        convert2cpu(conv_model.weight.data).numpy().tofile(fp)
        if conv_model.bias is not None:
            convert2cpu(conv_model.bias.data).numpy().tofile(fp)
    else:
        bn_model.bias.data.numpy().tofile(fp)
        bn_model.weight.data.numpy().tofile(fp)
        bn_model.running_mean.numpy().tofile(fp)
        bn_model.running_var.numpy().tofile(fp)
        conv_model.weight.data.numpy().tofile(fp)
        if conv_model.bias is not None:
            conv_model.bias.data.numpy().tofile(fp)

In [6]:
##  iorn

import torch
from torch.nn.modules.module import Module
from torch.nn.parameter import Parameter
from torch.nn import functional as F
from torch.autograd.variable import Variable

class ORBatchNorm2d(Module):

    def __init__(self, num_features, nOrientation, eps=1e-5, momentum=0.1, affine=True):
        super(ORBatchNorm2d, self).__init__()
        self.num_features = num_features
        self.nOrientation = nOrientation
        self.affine = affine
        self.eps = eps
        self.momentum = momentum
        if self.affine:
            self.weight = Parameter(torch.Tensor(num_features))
            self.bias = Parameter(torch.Tensor(num_features))
        else:
            self.register_parameter('weight', None)
            self.register_parameter('bias', None)
        self.register_buffer('running_mean', torch.zeros(num_features))
        self.register_buffer('running_var', torch.ones(num_features))
        self.reset_parameters()

    def reset_parameters(self):
        self.running_mean.zero_()
        self.running_var.fill_(1)
        if self.affine:
            self.weight.data.uniform_()
            self.bias.data.zero_()

    def forward(self, input):
        batch_size, channels, h, w = input.size()

        input_reshaped = input.view(batch_size, channels//self.nOrientation, h*self.nOrientation, w)

        result = F.batch_norm(
            input_reshaped, self.running_mean, self.running_var, self.weight, self.bias,
            self.training, self.momentum, self.eps)

        return result.view(batch_size, channels, h, w)

    def __repr__(self):
        return ('{name}({num_features}, eps={eps}, momentum={momentum},'
                ' affine={affine})'
                .format(name=self.__class__.__name__, **self.__dict__))

In [7]:
##  region_loss

##  //  from utils import *
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F


def build_targets(pred_boxes, target, anchors, num_anchors, nH, nW, noobject_scale, object_scale, sil_thresh, seen):
    nB = target.size(0)
    nA = num_anchors
    anchor_step = len(anchors)//num_anchors
    conf_mask = torch.ones(nB, nA, nH, nW) * noobject_scale
    coord_mask = torch.zeros(nB, nA, nH, nW)
    cls_mask = torch.zeros(nB, nA, nH, nW)
    tx = torch.zeros(nB, nA, nH, nW)
    ty = torch.zeros(nB, nA, nH, nW)
    tw = torch.zeros(nB, nA, nH, nW)
    th = torch.zeros(nB, nA, nH, nW)
    tconf = torch.zeros(nB, nA, nH, nW)
    tcls = torch.zeros(nB, nA, nH, nW)

    nAnchors = nA*nH*nW
    nPixels  = nH*nW
    for b in range(nB):
        cur_pred_boxes = pred_boxes[b*nAnchors:(b+1)*nAnchors].t()
        cur_ious = torch.zeros(nAnchors)
        for t in range(800):
            if target[b][t*5+1] == 0:
                break
            gx = target[b][t*5+1]*nW
            gy = target[b][t*5+2]*nH
            gw = target[b][t*5+3]*nW
            gh = target[b][t*5+4]*nH
            cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors, 1).t()
            cur_ious = torch.max(cur_ious, bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False))
        conf_mask[b][cur_ious > sil_thresh] = 0
    if seen < 12800:
        if anchor_step == 4:
            tx = torch.FloatTensor(anchors).view(nA, anchor_step).index_select(1, torch.LongTensor([2])).view(1, nA, 1, 1).repeat(nB, 1, nH,nW)
            ty = torch.FloatTensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([2])).view(1, nA, 1, 1).repeat(nB, 1, nH, nW)
        else:
            tx.fill_(0.5)
            ty.fill_(0.5)
        tw.zero_()
        th.zero_()
        coord_mask.fill_(1)

    nGT = 0
    nCorrect = 0
    for b in range(nB):
        for t in range(800):
            if target[b][t*5+1] == 0:
                break
            nGT = nGT + 1
            best_iou = 0.0
            best_n = -1
            min_dist = 10000
            gx = target[b][t*5+1] * nW
            gy = target[b][t*5+2] * nH
            gi = int(gx)
            gj = int(gy)
            gw = target[b][t*5+3]*nW
            gh = target[b][t*5+4]*nH
            gt_box = [0, 0, gw, gh]
            for n in range(nA):
                aw = anchors[anchor_step*n]
                ah = anchors[anchor_step*n+1]
                anchor_box = [0, 0, aw, ah]
                iou = bbox_iou(anchor_box, gt_box, x1y1x2y2=False)
                if anchor_step == 4:
                    ax = anchors[anchor_step*n+2]
                    ay = anchors[anchor_step*n+3]
                    dist = pow(((gi+ax) - gx), 2) + pow(((gj+ay) - gy), 2)
                if iou > best_iou:
                    best_iou = iou
                    best_n = n
                elif anchor_step == 4 and iou == best_iou and dist < min_dist:
                    best_iou = iou
                    best_n = n
                    min_dist = dist

            gt_box = [gx, gy, gw, gh]
            pred_box = pred_boxes[b*nAnchors+best_n*nPixels+gj*nW+gi]

            coord_mask[b][best_n][gj][gi] = 1
            cls_mask[b][best_n][gj][gi] = 1
            conf_mask[b][best_n][gj][gi] = object_scale
            tx[b][best_n][gj][gi] = target[b][t*5+1] * nW - gi
            ty[b][best_n][gj][gi] = target[b][t*5+2] * nH - gj
            tw[b][best_n][gj][gi] = math.log(gw/anchors[anchor_step*best_n])
            th[b][best_n][gj][gi] = math.log(gh/anchors[anchor_step*best_n+1])
            iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)
            tconf[b][best_n][gj][gi] = iou
            tcls[b][best_n][gj][gi] = target[b][t*5]
            if iou > 0.5:
                nCorrect = nCorrect + 1

    return nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls


class RegionLoss(nn.Module):
    def __init__(self, num_classes=0, anchors=[], num_anchors=1):
        super(RegionLoss, self).__init__()
        self.num_classes = num_classes
        self.anchors = anchors
        self.num_anchors = num_anchors
        self.anchor_step = len(anchors)/num_anchors
        self.coord_scale = 1
        self.noobject_scale = 1
        self.object_scale = 5
        self.class_scale = 1
        self.thresh = 0.6
        self.seen = 0
        self.iter_cnt = 0

    def forward(self, output, target, batch_idx):
        nB = output.data.size(0)
        nA = self.num_anchors
        nC = self.num_classes
        nH = output.data.size(2)
        nW = output.data.size(3)

        output = output.view(nB, nA, (5+nC), nH, nW)
        x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
        y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
        w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
        h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
        conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))
        cls = output.index_select(2, Variable(torch.linspace(5,5+nC-1,nC).long().cuda()))
        cls = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC)

        pred_boxes = torch.cuda.FloatTensor(4, nB, nA, nH, nW)
        grid_x = torch.linspace(0, nW-1, nW).repeat(nH,1).repeat(nB*nA, 1, 1).view(nB, nA, nH, nW).cuda()
        grid_y = torch.linspace(0, nH-1, nH).repeat(nW,1).t().repeat(nB*nA, 1, 1).view(nB, nA, nH, nW).cuda()
        anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()
        anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()
        anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH*nW).view(nB, nA, nH, nW) #note
        anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH*nW).view(nB, nA, nH, nW) #note
        pred_boxes[0] = x.data + grid_x
        pred_boxes[1] = y.data + grid_y
        pred_boxes[2] = torch.exp(w.data) * anchor_w
        pred_boxes[3] = torch.exp(h.data) * anchor_h
        pred_boxes = convert2cpu(pred_boxes.view(4, -1).transpose(0, 1).contiguous().view(-1, 4))

        nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf,tcls = build_targets(pred_boxes,
                                                        target.data, self.anchors, nA, nH, nW, self.noobject_scale,
                                                        self.object_scale, self.thresh, self.seen)
        cls_mask = (cls_mask == 1)
        nProposals = int((conf.data > 0.25).sum())
        tx    = Variable(tx.cuda())
        ty    = Variable(ty.cuda())
        tw    = Variable(tw.cuda())
        th    = Variable(th.cuda())
        tconf = Variable(tconf.cuda())
        tcls  = Variable(tcls[cls_mask].view(-1).long().cuda())

        coord_mask = Variable(coord_mask.cuda())
        conf_mask  = Variable(conf_mask.cuda().sqrt())
        cls_mask   = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda())
        cls        = cls[cls_mask].view(-1, nC)

        loss_x = self.coord_scale * nn.MSELoss(size_average=False)(x*coord_mask, tx*coord_mask)/2.0
        loss_y = self.coord_scale * nn.MSELoss(size_average=False)(y*coord_mask, ty*coord_mask)/2.0
        loss_w = self.coord_scale * nn.MSELoss(size_average=False)(w*coord_mask, tw*coord_mask)/2.0
        loss_h = self.coord_scale * nn.MSELoss(size_average=False)(h*coord_mask, th*coord_mask)/2.0
        loss_conf = nn.MSELoss(size_average=False)(conf*conf_mask, tconf*conf_mask)/2.0
        if tcls.shape[0] != 0:
            loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls)
        else:
            loss_cls = Variable(torch.zeros(1))
        loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

        print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' %
              (self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0],
               loss_conf.data[0], loss_cls.data[0], loss.data[0]))

        self.iter_cnt = self.iter_cnt + 1

        return loss

In [8]:
##   model_network
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
##  //  from region_loss import RegionLoss
##  //  from config import *
##  //  from iorn.modules import ORConv2d
##  //  from iorn_bn import ORBatchNorm2d
from torch.autograd import Variable


def get_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors,
                     x_start=0, y_start=0, imgwidth=0, imgheight=0, validation=False):
    anchor_step = len(anchors) // num_anchors
    if output.dim() == 3:
        output = output.unsqueeze(0)
    batch = output.size(0)
    assert (output.size(1) == (5 + num_classes) * num_anchors)
    h = output.size(2)
    w = output.size(3)
    all_boxes = []
    output = output.view(batch * num_anchors, 5 + num_classes, h * w).\
        transpose(0, 1).contiguous().view(5 + num_classes, batch * num_anchors * h * w)

    grid_x = torch.linspace(0, w - 1, w).repeat(h, 1).repeat(batch * num_anchors, 1, 1).view(
        batch * num_anchors * h * w).cuda()
    grid_y = torch.linspace(0, h - 1, h).repeat(w, 1).t().repeat(batch * num_anchors, 1, 1).view(
        batch * num_anchors * h * w).cuda()
    xs = torch.sigmoid(output[0]) + grid_x
    ys = torch.sigmoid(output[1]) + grid_y

    anchor_w = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([0]))
    anchor_h = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([1]))
    anchor_w = anchor_w.repeat(batch, 1).repeat(1, 1, h * w).view(batch * num_anchors * h * w).cuda()
    anchor_h = anchor_h.repeat(batch, 1).repeat(1, 1, h * w).view(batch * num_anchors * h * w).cuda()
    ws = torch.exp(output[2]) * anchor_w
    hs = torch.exp(output[3]) * anchor_h

    det_confs = torch.sigmoid(output[4])
    cls_confs = torch.nn.Softmax(dim=1)((Variable(output[5:5 + num_classes].transpose(0, 1)))).data
    cls_max_confs, cls_max_ids = torch.max(cls_confs, 1)
    cls_max_confs = cls_max_confs.view(-1)
    cls_max_ids = cls_max_ids.view(-1)

    sz_hw = h * w
    sz_hwa = sz_hw * num_anchors
    box_idx = torch.nonzero(det_confs > conf_thresh)

    if not validation:
        for b_idx in range(batch):
            boxes = []
            for ind in box_idx:
                if (ind >= b_idx * sz_hwa) and (ind < sz_hwa * (b_idx + 1)):
                    det_conf = det_confs[ind][0]
                    bcx = xs[ind][0]
                    bcy = ys[ind][0]
                    bw = ws[ind][0]
                    bh = hs[ind][0]
                    cls_max_conf = cls_max_confs[ind][0]
                    cls_max_id = cls_max_ids[ind][0]
                    box = [bcx / w, bcy / h, bw / w, bh / h, det_conf, cls_max_conf, cls_max_id]
                    boxes.append(box)
            all_boxes.append(boxes)
    else:
        for b_idx in range(batch):
            boxes = []
            for ind in box_idx:
                if (ind >= b_idx * sz_hwa) and (ind < sz_hwa * (b_idx + 1)):
                    det_conf = det_confs[ind][0]
                    bcx = xs[ind][0]
                    bcy = ys[ind][0]
                    bw = ws[ind][0]
                    bh = hs[ind][0]
                    cls_max_conf = cls_max_confs[ind][0]
                    cls_max_id = cls_max_ids[ind][0]
                    box = [(x_start + (bcx / w) * 1024.0) / imgwidth, (y_start + (bcy / h) * 1024.0) / imgheight,
                           ((bw / w) * 1024.0) / imgwidth, ((bh / h) * 1024.0) / imgheight, det_conf, cls_max_conf,
                           cls_max_id]
                    # box = [bcx / w, bcy / h, bw / w, bh / h, det_conf, cls_max_conf, cls_max_id]
                    boxes.append(box)
            all_boxes.append(boxes)
    return all_boxes


class MaxPoolStride(nn.Module):
    def __init__(self):
        super(MaxPoolStride, self).__init__()

    def forward(self, x):
        x = F.max_pool2d(F.pad(x, (0, 1, 0, 1), mode='replicate'), 2, stride=1)
        return x


class EmpotyModule(nn.Module):
    def __init__(self):
        super(EmpotyModule, self).__init__()

    def forward(self, x):
        return x


class deformDarknet(nn.Module):
    def __init__(self, cfgfile):
        super(deformDarknet, self).__init__()
        self.blocks = parse_cfg(cfgfile)
        self.models = self.create_network(self.blocks)
        self.loss = self.models[len(self.models) - 1]

        self.width = int(self.blocks[0]['width'])
        self.height = int(self.blocks[0]['height'])

        if self.blocks[(len(self.blocks) - 1)]['type'] == 'region':
            self.anchors = self.loss.anchors
            self.num_anchors = self.loss.num_anchors
            self.anchor_step = self.loss.anchor_step
            self.num_classes = self.loss.num_classes

        self.header = torch.IntTensor([0, 0, 0, 0])
        self.seen = 0

    def forward(self, x):
        ind = -2
        self.loss = None
        outputs = dict()
        for block in self.blocks:
            ind = ind + 1
            if block['type'] == 'net':
                continue
            elif block['type'] == 'iorn_convolutional' or block['type'] == 'convolutional' or\
                    block['type'] == 'maxpool':
                x = self.models[ind](x)
                outputs[ind] = x
            elif block['type'] == 'trans_conv':
                x = self.models[ind](x)
                outputs[ind] = x
            elif block['type'] == 'route':
                layers = block['layers'].split(',')
                layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers]
                if len(layers) == 1:
                    x = outputs[layers[0]]
                    outputs[ind] = x
                elif len(layers) == 2:
                    x1 = outputs[layers[0]]
                    x2 = outputs[layers[1]]
                    x = torch.cat((x1, x2), 1)
                    outputs[ind] = x
                elif len(layers) == 3:
                    x1 = outputs[layers[0]]
                    x2 = outputs[layers[1]]
                    x3 = outputs[layers[2]]
                    x = torch.cat((x1, x2, x3), 1)
                    outputs[ind] = x
            elif block['type'] == 'region':
                continue
            else:
                print('unknown type %s' % (block['type']))
        return x

    def create_network(self, blocks):
        models = nn.ModuleList()
        prev_filters = 3
        out_filters = []
        conv_id = 0
        for block in blocks:
            if block['type'] == 'net':
                prev_filters = int(block['channels'])
                continue
            elif block['type'] == 'iorn_convolutional':
                conv_id = conv_id + 1
                iorn_id = 1
                batch_normalize = int(block['batch_normalize'])
                filters = int(block['filters'])
                activation = block['activation']
                pad = int(block['pad'])
                dilate = int(block['dilate'])
                stride = int(block['stride'])
                nOrientation = int(block['nOrientation'])
                model = nn.Sequential()
                if batch_normalize:
                    if iorn_id == 1:
                        model.add_module('conv{0}'.format(conv_id),
                                     ORConv2d(prev_filters, filters // nOrientation,
                                              arf_config=(1, nOrientation), kernel_size=3,
                                              padding=pad, stride=stride, dilation=dilate))
                    else:
                        model.add_module('conv{0}'.format(conv_id),
                                         ORConv2d(prev_filters // nOrientation, filters // nOrientation,
                                                  arf_config=nOrientation, kernel_size=3,
                                                  padding=pad, stride=stride, dilation=dilate))
                    model.add_module('bn{0}'.format(conv_id), ORBatchNorm2d(filters // nOrientation, nOrientation))
                else:
                    if iorn_id == 1:
                        model.add_module('conv{0}'.format(conv_id),
                                         ORConv2d(prev_filters, filters / nOrientation, arf_config=nOrientation,
                                                  kernel_size=3, padding=1))
                    else:
                        model.add_module('conv{0}'.format(conv_id),
                                         ORConv2d(prev_filters // nOrientation, filters // nOrientation,
                                                  arf_config=nOrientation, kernel_size=3, padding=1))
                if activation == 'leaky':
                    model.add_module('leaky{0}'.format(conv_id), nn.LeakyReLU(0.1, inplace=True))
                elif activation == 'relu':
                    model.add_module('relu{0}'.format(conv_id), nn.ReLU(inplace=True))
                prev_filters = filters
                out_filters.append(prev_filters)
                models.append(model)
            elif block['type'] == 'convolutional':
                conv_id = conv_id + 1
                batch_normalize = int(block['batch_normalize'])
                filters = int(block['filters'])
                kernel_size = int(block['size'])
                stride = int(block['stride'])
                is_pad = int(block['pad'])
                pad = (kernel_size - 1) // 2 if is_pad else 0
                activation = block['activation']
                model = nn.Sequential()
                if batch_normalize:
                    model.add_module('conv{0}'.format(conv_id),
                                     nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=False))
                    model.add_module('bn{0}'.format(conv_id), nn.BatchNorm2d(filters))
                else:
                    model.add_module('conv{0}'.format(conv_id),
                                     nn.Conv2d(prev_filters, filters, kernel_size, stride, pad))
                if activation == 'leaky':
                    model.add_module('leaky{0}'.format(conv_id), nn.LeakyReLU(0.1, inplace=True))
                elif activation == 'relu':
                    model.add_module('relu{0}'.format(conv_id), nn.ReLU(inplace=True))
                prev_filters = filters
                out_filters.append(prev_filters)
                models.append(model)
            elif block['type'] == 'trans_conv':
                conv_id = conv_id + 1
                batch_normalize = int(block['batch_normalize'])
                filters = int(block['filters'])
                kernel_size = int(block['size'])
                stride = int(block['stride'])
                is_pad = int(block['pad'])
                pad = (kernel_size - 1) // 2 if is_pad else 0
                activation = block['activation']
                model = nn.Sequential()
                if batch_normalize:
                    model.add_module('conv{0}'.format(conv_id),
                                     nn.ConvTranspose2d(prev_filters, filters, kernel_size, stride,
                                                        pad, output_padding=1, bias=False))
                    model.add_module('bn{0}'.format(conv_id), nn.BatchNorm2d(filters))
                else:
                    model.add_module('conv{0}'.format(conv_id),
                                     nn.ConvTranspose2d(prev_filters, filters, kernel_size, stride, pad))
                if activation == 'leaky':
                    model.add_module('leaky{0}'.format(conv_id), nn.LeakyReLU(0.1, inplace=True))
                elif activation == 'relu':
                    model.add_module('relu{0}'.format(conv_id), nn.ReLU(inplace=True))
                prev_filters = filters
                out_filters.append(prev_filters)
                models.append(model)
            elif block['type'] == 'maxpool':
                pool_size = int(block['size'])
                stride = int(block['stride'])
                if stride > 1:
                    model = nn.MaxPool2d(pool_size, stride)
                else:
                    model = MaxPoolStride()
                out_filters.append(prev_filters)
                models.append(model)
            elif block['type'] == 'route':
                layers = block['layers'].split(',')
                ind = len(models)
                layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers]
                if len(layers) == 1:
                    prev_filters = out_filters[layers[0]]
                elif len(layers) == 2:
                    assert (layers[0] == ind - 1)
                    prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
                elif len(layers) == 3:
                    assert (layers[0] == ind - 1)
                    prev_filters = out_filters[layers[0]] + out_filters[layers[1]] + out_filters[layers[2]]
                out_filters.append(prev_filters)
                models.append(EmpotyModule())
            elif block['type'] == 'region':
                loss = RegionLoss()
                anchors = block['anchors'].split(',')
                loss.anchors = [float(i) for i in anchors]
                loss.num_classes = int(block['classes'])
                loss.num_anchors = int(block['num'])
                loss.anchor_step = len(loss.anchors) // loss.num_anchors
                loss.object_scale = float(block['object_scale'])
                loss.noobject_scale = float(block['noobject_scale'])
                loss.class_scale = float(block['class_scale'])
                loss.coord_scale = float(block['coord_scale'])
                out_filters.append(prev_filters)
                models.append(loss)
            else:
                print('unknown type %s' % (block['type']))

        return models

    def load_weights(self, weightfile):
        fp = open(weightfile, 'rb')
        header = np.fromfile(fp, count=4, dtype=np.int32)
        self.header = torch.from_numpy(header)
        self.seen = self.header[3]
        buf = np.fromfile(fp, dtype=np.float32)
        fp.close()

        start = 0
        ind = -2
        for block in self.blocks:
            if start >= buf.size:
                break
            ind = ind + 1
            # if ind == 24:
            #     break
            if block['type'] == 'net':
                continue
            elif block['type'] == 'iorn_convolutional':
                model = self.models[ind]
                batch_normalize = int(block['batch_normalize'])
                if batch_normalize:
                    start = load_conv_bn(buf, start, model[0], model[1])
                else:
                    start = load_conv(buf, start, model[0])
            elif block['type'] == 'convolutional' or block['type'] == 'trans_conv':
                model = self.models[ind]
                batch_normalize = int(block['batch_normalize'])
                if batch_normalize:
                    start = load_conv_bn(buf, start, model[0], model[1])
                else:
                    start = load_conv(buf, start, model[0])
            elif block['type'] == 'maxpool':
                pass
            elif block['type'] == 'route':
                pass
            elif block['type'] == 'region':
                pass
            else:
                print('unknown type %s' % (block['type']))

    def save_weights(self, outfile, cutoff=0):
        if cutoff <= 0:
            cutoff = len(self.blocks) - 1

        fp = open(outfile, 'wb')
        self.header[3] = self.seen
        header = self.header
        header.numpy().tofile(fp)
        ind = -1
        for blockId in range(1, cutoff + 1):
            ind = ind + 1
            block = self.blocks[blockId]
            if block['type'] == 'iorn_convolutional':
                model = self.models[ind]
                batch_normalize = int(block['batch_normalize'])
                if batch_normalize:
                    save_conv_bn(fp, model[0], model[1])
                else:
                    save_conv(fp, model[0])
            elif block['type'] == 'convolutional' or block['type'] == 'trans_conv':
                model = self.models[ind]
                batch_normalize = int(block['batch_normalize'])
                if batch_normalize:
                    save_conv_bn(fp, model[0], model[1])
                else:
                    save_conv(fp, model[0])
            elif block['type'] == 'maxpool':
                pass
            elif block['type'] == 'route':
                pass
            elif block['type'] == 'region':
                pass
            else:
                print('unknown type %s' % (block['type']))
        fp.close()

In [9]:
## image

import random
from PIL import Image
import numpy as np
####################################################
import shapely.geometry as geometry
##  //  import dota_process
####################################################


def distort_image(im, hue, sat, val):
    im = im.convert('HSV')
    cs = list(im.split())
    cs[1] = cs[1].point(lambda i: i * sat)
    cs[2] = cs[2].point(lambda i: i * val)
    
    def change_hue(x):
        x += hue*255
        if x > 255:
            x -= 255
        if x < 0:
            x += 255
        return x
    cs[0] = cs[0].point(change_hue)
    im = Image.merge(im.mode, tuple(cs))

    im = im.convert('RGB')
    return im


def rand_scale(s):
    scale = random.uniform(1, s)
    if random.randint(1, 10000) % 2:
        return scale
    return 1./scale


def random_distort_image(im, hue, saturation, exposure):
    dhue = random.uniform(-hue, hue)
    dsat = rand_scale(saturation)
    dexp = rand_scale(exposure)
    res = distort_image(im, dhue, dsat, dexp)
    return res


####################################################
def data_augmentation(img, shape, jitter, hue, saturation, exposure):
    oh = img.height
    ow = img.width

    dw = int(ow * jitter)
    dh = int(oh * jitter)

    pleft = random.randint(-dw, dw)
    pright = random.randint(-dw, dw)
    ptop = random.randint(-dh, dh)
    pbot = random.randint(-dh, dh)

    swidth = ow - pleft - pright
    sheight = oh - ptop - pbot

    flip = random.randint(1, 10000) % 2
    cropped = img.crop((pleft, ptop, pleft + swidth - 1, ptop + sheight - 1))

    sized = cropped.resize(shape)

    if flip:
        sized = sized.transpose(Image.FLIP_LEFT_RIGHT)
    img = random_distort_image(sized, hue, saturation, exposure)

    return img, flip, swidth, sheight, pleft, ptop
####################################################


def fill_truth_detection(labpath, w, h, flip, swidth, sheight, pleft, ptop):
    max_boxes = 800
    cc = 0
    label = np.zeros((max_boxes, 5))

    left = max(pleft, 0)
    right = min((pleft + swidth), w)
    up = max(ptop, 0)
    down = min((ptop + sheight), h)

    objects = dota_process.read_dota_gt(labpath)
    if objects is None:
        return label
    imgbox = geometry.Polygon([(left, up), (right, up), (right, down),
                               (left, down)])

    for obj in objects:
        gtbox = geometry.Polygon([(obj['box'][0], obj['box'][1]),
                                  (obj['box'][2], obj['box'][3]),
                                  (obj['box'][4], obj['box'][5]),
                                  (obj['box'][6], obj['box'][7])])
        if gtbox.area <= 0:
            continue
        inter_box, half_iou = dota_process.calc_half_iou(gtbox, imgbox)
        if half_iou == 1:
            boxInsub = dota_process.boxorig2sub(pleft, ptop, obj['box'])
        elif half_iou > 0.3:
            inter_box = geometry.polygon.orient(inter_box, sign=1)
            out_box = list(inter_box.exterior.coords)[0: -1]
            if len(out_box) < 4:
                continue
            out_box2 = []
            for i in range(len(out_box)):
                out_box2.append(out_box[i][0])
                out_box2.append(out_box[i][1])

            if len(out_box) == 5:
                out_box2 = dota_process.box5_box4(out_box2)
            elif len(out_box) > 5:
                continue
            out_box2 = dota_process.choose_best_point_order_fit_another(out_box2, obj['box'])
            boxInsub = dota_process.boxorig2sub(left, up, out_box2)

            for index, item in enumerate(boxInsub):
                if index % 2 == 0:
                    if item <= 1:
                        boxInsub[index] = 1
                    elif item >= swidth:
                        boxInsub[index] = swidth
                elif index % 2 == 1:
                    if item <= 1:
                        boxInsub[index] = 1
                    elif item >= sheight:
                        boxInsub[index] = sheight
        else:
            continue
        length = max(np.abs(np.subtract(boxInsub[0], boxInsub[4])), np.abs(np.subtract(boxInsub[1], boxInsub[5])))
        boxInsub = [(boxInsub[0], boxInsub[1]), (boxInsub[2], boxInsub[3]), (boxInsub[4], boxInsub[5]),
                    (boxInsub[6], boxInsub[7])]

        if (length / min(swidth, sheight)) < (5.0/704.0) and geometry.Polygon(boxInsub).area < 15:
            continue
        rect_minx = geometry.Polygon(boxInsub).bounds[0]
        rect_miny = geometry.Polygon(boxInsub).bounds[1]
        rect_maxx = geometry.Polygon(boxInsub).bounds[2]
        rect_maxy = geometry.Polygon(boxInsub).bounds[3]
        if max(abs(rect_maxx - rect_minx)/swidth, abs(rect_maxy - rect_miny)/sheight) < (5.0/704.0) \
                or min(abs(rect_maxx - rect_minx)/swidth, abs(rect_maxy - rect_miny)/sheight) <= 0:
            continue
        label[cc][0] = dota_process.classnames.index(obj['name'])
        if flip:
            label[cc][1] = 0.999 - abs(rect_maxx + rect_minx) / (2.0 * swidth)
        else:
            label[cc][1] = abs(rect_maxx + rect_minx) / (2.0 * swidth)
        label[cc][2] = abs(rect_maxy + rect_miny) / (2.0 * sheight)
        label[cc][3] = abs(rect_maxx - rect_minx) / swidth
        label[cc][4] = abs(rect_maxy - rect_miny) / sheight
        cc += 1
        if cc >= 800:
            break
    label = np.reshape(label, (-1))
    return label
####################################################


def load_data_detection(imgpath, shape, jitter, hue, saturation, exposure):
    labpath = imgpath.replace('images', 'labels').replace('JPEGImages', 'labels').replace('.jpg', '.txt').replace('.png', '.txt')

    ## data augmentation
    img = Image.open(imgpath).convert('RGB')
    ####################################################
    img, flip, swidth, sheight, pleft, ptop = data_augmentation(img, shape, jitter, hue, saturation, exposure)
    label = fill_truth_detection(labpath, img.width, img.height, flip, swidth, sheight, pleft, ptop)
    ####################################################
    return img, label

In [10]:
# dataset

import torch
from torch.utils.data import Dataset
##  //  from utils import read_truths_args
##  //  from image import *


class listDataset(Dataset):

    def __init__(self, root, shape=None, shuffle=True, transform=None, target_transform=None, train=False, seen=0, batch_size=64, num_workers=24):
        with open(root, 'r') as file:
            self.lines = file.readlines()

        if shuffle:
            random.shuffle(self.lines)

        self.nSamples = len(self.lines)
        self.transform = transform
        self.target_transform = target_transform
        self.train = train
        self.shape = shape
        self.seen = seen
        self.batch_size = batch_size
        self.num_workers = num_workers

    def __len__(self):
        return self.nSamples

    def __getitem__(self, index):
        assert index <= len(self), 'index range error'
        imgpath = self.lines[index].rstrip()

        if self.train:
            jitter = 0.2
            hue = 0.1
            saturation = 1.5 
            exposure = 1.5

            img, label = load_data_detection(imgpath, self.shape, jitter, hue, saturation, exposure)
            label = torch.from_numpy(label)
        else:
            img = Image.open(imgpath).convert('RGB')
            if self.shape:
                img = img.resize(self.shape)
    
            labpath = imgpath.replace('images', 'labels').replace('JPEGImages', 'labels').replace('.jpg', '.txt').replace('.png','.txt')
            label = torch.zeros(800*5)
            try:
                tmp = torch.from_numpy(read_truths_args(labpath, 5.0/img.width, self.shape).astype('float32'))
            except Exception:
                tmp = torch.zeros(1, 5)
            tmp = tmp.view(-1)
            tsz = tmp.numel()
            if tsz > 800*5:
                label = tmp[0:800*5]
            elif tsz > 0:
                label[0:tsz] = tmp

        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            label = self.target_transform(label)

        self.seen = self.seen + self.num_workers
        return (img, label)

In [13]:
## test for map

##  //  from utils import *
##  //  from deform_darknet import deformDarknet, get_region_boxes
from torchvision import transforms
from torch.autograd import Variable

seed = int(time.time())
torch.manual_seed(seed)


def write_boxes(img, file_name, boxes, class_names=None, Result_dir=None):
    file_class = []
    for i in range(len(class_names)):
        file_class.append(open(Result_dir + '/Task2_'+class_names[i]+'.txt', 'a'))

    width = img.width
    height = img.height

    for i in range(len(boxes)):
        box = boxes[i]
        x1 = (box[0] - box[2]/2.0) * width
        y1 = (box[1] - box[3]/2.0) * height
        x2 = (box[0] + box[2]/2.0) * width
        y2 = (box[1] + box[3]/2.0) * height

        cls_id = box[6]
        conf = box[4]*box[5]
        file_class[cls_id].write(file_name.split('/')[-1].split('.')[0]+' '+str(conf)+' '+str(x1)+' '+str(y1)+' '+str(x2)+' '+str(y2)+'\n')

    for i in range(len(class_names)):
        file_class[i].close()


def detect(model, weightfile, imgfile, Result_dir):

    conf_thresh = 0.01
    nms_thresh = 0.4
    model.load_weights(weightfile)
    print('Loading weights from %s... Done!' % (weightfile))
    namesfile = 'data/dota.names'
    model.eval()
    use_cuda = 1
    if use_cuda:
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
        torch.cuda.manual_seed(seed)
        model.cuda()
#################################################################
    img_list_file = open(imgfile)
    img_list = img_list_file.readlines()
    img_list_file.close()
    # img_list = os.listdir(imgfile)
#################################################################
    for imgpath in img_list:
        imgpath = imgpath.strip('\n')
        img = Image.open(imgpath).convert('RGB')
        x_idx = range(0, img.width, 1024-512)
        y_idx = range(0, img.height, 1024-512)
        all_boxes = []
        for x_start in x_idx:
            for y_start in y_idx:
                x_stop = x_start + 1024
                if x_stop > img.width:
                    x_start = img.width - 1024
                    x_stop = img.width
                y_stop = y_start + 1024
                if y_stop > img.height:
                    y_start = img.height - 1024
                    y_stop = img.height
                croped_img = img.crop((x_start, y_start, x_stop, y_stop))
                croped_img = transforms.ToTensor()(croped_img)
                croped_img = torch.unsqueeze(croped_img, 0)
                croped_img = Variable(croped_img, requires_grad=False)
                output = model(croped_img.cuda()).data
                boxes = get_region_boxes(output, conf_thresh, model.num_classes, model.anchors, model.num_anchors,
                                         x_start, y_start, img.width, img.height, validation=True)[0]
                all_boxes = all_boxes + boxes
        boxes = nms(all_boxes, nms_thresh)
        class_names = load_class_names(namesfile)
        # write_boxes(imgpath, boxes, Result_dir)
        write_boxes(img, imgpath, boxes, class_names, Result_dir)
        print("save results of %s" % imgpath)


if __name__ == '__main__':
    workdir = '/content/drive/My Drive/DOTA-dataset/'
    cfgfile = workdir + 'cfg/orn_4_dota.cfg'
    model = deformDarknet(cfgfile)
    imgfile = '/home/lwc/my_prj/DOTA/val/val/images/val_list.txt'

    weightfile_list = open(workdir + 'backup/weight_list.txt').readlines()

    num_weight = weightfile_list.__len__()
    for idx_weight in range(num_weight):
        weightfile = workdir + 'backup/' + weightfile_list[idx_weight].strip('\n')
        Result_dir = workdir + 'backup/' + weightfile_list[idx_weight].split('.')[0]
        if not os.path.exists(Result_dir):
            os.mkdir(Result_dir)
        detect(model, weightfile, imgfile, Result_dir)

FileNotFoundError: ignored

In [11]:
## train

from __future__ import print_function
import sys
if len(sys.argv) != 4:
    print('Usage:')
    print('python train.py datacfg cfgfile weightfile')
    exit()

import torch.optim as optim
from torchvision import transforms
##  //  import dataset
##  //  from utils import *
##  //  from config import parse_cfg
##  //  from deform_darknet import deformDarknet, get_region_boxes
from torch.autograd import Variable

# Training settings
datacfg       = sys.argv[1]
cfgfile       = sys.argv[2]
weightfile    = sys.argv[3]

data_options  = read_data_cfg(datacfg)
net_options   = parse_cfg(cfgfile)[0]

trainlist     = data_options['train']
testlist      = data_options['valid']
backupdir     = data_options['backup']
nsamples      = file_lines(trainlist)
gpus          = data_options['gpus']
ngpus         = len(gpus.split(','))
num_workers   = int(data_options['num_workers'])

batch_size    = int(net_options['batch'])
subdiv        = int(net_options['subdivisions'])
max_batches   = int(net_options['max_batches'])
learning_rate = float(net_options['learning_rate'])
momentum      = float(net_options['momentum'])
decay         = float(net_options['decay'])
steps         = [float(step) for step in net_options['steps'].split(',')]
scales        = [float(scale) for scale in net_options['scales'].split(',')]

#Train parameters
max_epochs    = max_batches*batch_size//nsamples+1
use_cuda      = True
seed          = int(time.time())
eps           = 1e-5
save_interval = 1   # epoches
dot_interval  = 70  # batches

# Test parameters
conf_thresh   = 0.3
nms_thresh    = 0.4
iou_thresh    = 0.5
############################################################
if not os.path.exists(backupdir):
    os.mkdir(backupdir)
############################################################
torch.manual_seed(seed)
if use_cuda:
    os.environ['CUDA_VISIBLE_DEVICES'] = gpus
    torch.cuda.manual_seed(seed)
############################################################
model = deformDarknet(cfgfile)
############################################################
init_epoch = int(weightfile.split('/')[-1].split('.')[0])
############################################################
model.load_weights(weightfile)
#################################s###########################
region_loss = model.loss
region_loss.seen = model.seen
processed_batches = model.seen//batch_size

init_width = model.width
init_height = model.height
############################################################
kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {}
test_loader = torch.utils.data.DataLoader(
    dataset.listDataset(testlist, shape=(init_width, init_height),
                   shuffle=False,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                   ]), train=False),
    batch_size=batch_size//subdiv, shuffle=False, **kwargs)

train_loader = torch.utils.data.DataLoader(
    dataset.listDataset(trainlist, shape=(init_width, init_height),
                        shuffle=True,
                        transform=transforms.Compose([
                            transforms.ToTensor(),
                        ]),
                        train=True,
                        seen=model.seen,
                        batch_size=batch_size // subdiv,
                        num_workers=num_workers // subdiv),
    batch_size=batch_size // subdiv, shuffle=False, **kwargs)
############################################################
if use_cuda:
    if ngpus > 1:
        model = torch.nn.DataParallel(model).cuda()
    else:
        model = model.cuda()
############################################################
optimizer = optim.Adam(model.parameters(), lr=learning_rate/batch_size, weight_decay=decay*batch_size)
# optimizer = optim.SGD(model.parameters(), momentum=0.9, lr=learning_rate/batch_size, weight_decay=decay*batch_size)


def train(epoch):
    global processed_batches
    if ngpus > 1:
        cur_model = model.module
    else:
        cur_model = model
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if use_cuda:
            data = data.cuda()
        data, target = Variable(data), Variable(target)
        processed_batches = processed_batches + 1
        optimizer.zero_grad()
        output = model(data)
        region_loss.seen = region_loss.seen + data.data.size(0)
        loss = region_loss(output, target, batch_idx)
        loss.backward()
        optimizer.step()
    if (epoch+1) % save_interval == 0:
        logging('save weights to %s/%06d.weights' % (backupdir, epoch+1))
        cur_model.seen = (epoch + 1) * len(train_loader.dataset)
        cur_model.save_weights('%s/%06d.weights' % (backupdir, epoch + 1))


def test():
    model.eval()
    if ngpus > 1:
        cur_model = model.module
    else:
        cur_model = model
    num_classes = cur_model.num_classes
    anchors = cur_model.anchors
    num_anchors = cur_model.num_anchors
    total = 0.0
    proposals = 0.0
    correct = 0.0

    for batch_idx, (data, target) in enumerate(test_loader):
        if use_cuda:
            data = data.cuda()
        data = Variable(data, requires_grad=False)
        output = model(data).data
        all_boxes = get_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors)
        for i in range(output.size(0)):
            boxes = all_boxes[i]
            boxes = nms(boxes, nms_thresh)
            truths = target[i].view(-1, 5)
            num_gts = truths_length(truths)
            total = total + num_gts
            for j in range(len(boxes)):
                if boxes[j][4] > conf_thresh:
                    proposals = proposals + 1
            for k in range(num_gts):
                box_gt = [truths[k][1], truths[k][2], truths[k][3], truths[k][4], 1.0, 1.0, truths[k][0]]
                best_iou = 0
                best_j = 0
                for j in range(len(boxes)):
                    iou = bbox_iou(box_gt, boxes[j], x1y1x2y2=False)
                    if iou > best_iou:
                        best_iou = iou
                        best_j = j
                if best_iou > iou_thresh and boxes[best_j][4] > conf_thresh and boxes[best_j][6] == box_gt[6]:
                    correct = correct + 1

    precision = 1.0 * correct / (proposals + eps)
    recall = 1.0 * correct / (total + eps)
    fscore = 2.0 * precision * recall / (precision + recall + eps)
    logging("precision: %f, recall: %f, fscore: %f" % (precision, recall, fscore))
    fp = open('./log.dat', 'a')
    fp.write("precision: %f, recall: %f, fscore: %f\n" % (precision, recall, fscore))
    fp.close()


if __name__ == '__main__':
    for epoch in range(init_epoch, max_epochs):
        train(epoch)
        if epoch % 4 == 3:
            test()

Usage:
python train.py datacfg cfgfile weightfile


IndexError: ignored

In [None]:
## detect

##  //  from utils import *
##  //  from deform_darknet import deformDarknet, get_region_boxes
from torchvision import transforms
from torch.autograd import Variable


seed = int(time.time())
torch.manual_seed(seed)


def detect(model, weightfile, imgfile, Result_dir):

    conf_thresh = 0.5
    nms_thresh = 0.4
    model.load_weights(weightfile)
    print('Loading weights from %s... Done!' % (weightfile))
    namesfile = 'data/dota.names'
    model.eval()
    use_cuda = 1
    if use_cuda:
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
        torch.cuda.manual_seed(seed)
        model.cuda()
#################################################################
    img_list_file = open(imgfile)
    img_list = img_list_file.readlines()
    img_list_file.close()
#################################################################
    for imgpath in img_list:
        imgpath = imgpath.strip('\n')
        img = Image.open(imgpath).convert('RGB')
        x_idx = range(0, img.width, 1024-512)
        y_idx = range(0, img.height, 1024-512)
        all_boxes = []
        for x_start in x_idx:
            for y_start in y_idx:
                x_stop = x_start + 1024
                if x_stop > img.width:
                    x_start = img.width - 1024
                    x_stop = img.width
                y_stop = y_start + 1024
                if y_stop > img.height:
                    y_start = img.height - 1024
                    y_stop = img.height
                croped_img = img.crop((x_start, y_start, x_stop, y_stop))
                croped_img = transforms.ToTensor()(croped_img)
                croped_img = torch.unsqueeze(croped_img, 0)
                croped_img = Variable(croped_img, requires_grad=False)
                output = model(croped_img.cuda()).data
                boxes = get_region_boxes(output, conf_thresh, model.num_classes, model.anchors, model.num_anchors,
                                         x_start, y_start, img.width, img.height, validation=True)[0]
                all_boxes = all_boxes + boxes
        boxes = nms(all_boxes, nms_thresh)

        class_names = load_class_names(namesfile)
        plot_boxes(img, boxes, os.path.join(Result_dir, imgpath.split('/')[-1]), class_names)


if __name__ == '__main__':
    workdir = '/content/drive/My Drive/DOTA-dataset/'
    cfgfile = workdir + 'cfg/orn_4_dota.cfg'
    model = deformDarknet(cfgfile)
    imgfile = '/content/drive/My Drive/DOTA-dataset/test_img/test_img_list.txt'

    weightfile_list = open(workdir + 'backup/test_weight_list.txt').readlines()

    num_weight = weightfile_list.__len__()
    for idx_weight in range(num_weight):
        weightfile = workdir + 'backup/' + weightfile_list[idx_weight].strip('\n')
        Result_dir = workdir + 'final_result/test_img/'
        if not os.path.exists(Result_dir):
            os.mkdir(Result_dir)
        detect(model, weightfile, imgfile, Result_dir)