In [0]:
# # ! rm -rf CS4180-DL
# ! git clone --branch rukai-yolo2 https://github.com/prerakmody/CS4180-DL.git

# ! wget https://pjreddie.com/media/files/yolov2-voc.weights
# ! git clone https://github.com/marvis/pytorch-yolo2.git

# # Get The Pascal VOC Data
# ! wget https://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar
# ! wget https://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar
# ! wget https://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar
# ! tar xf VOCtrainval_11-May-2012.tar
# ! tar xf VOCtrainval_06-Nov-2007.tar
# ! tar xf VOCtest_06-Nov-2007.tar

# # Generate Labels for VOC

# ! wget http://pjreddie.com/media/files/voc_label.py
# ! python voc_label.py
# ! cat 2007_train.txt 2007_val.txt 2012_*.txt > voc_train.txt

In [14]:
! nvidia-smi

Sat May 25 15:13:50 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.67       Driver Version: 410.79       CUDA Version: 10.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   48C    P8    15W /  70W |     10MiB / 15079MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [0]:
# dataloader

import os
import cv2
import random
import numpy as np
import matplotlib.pyplot as plt

from PIL import Image

import torch
import torch.utils.data as data

## --------------------------------------- PASCAL VOC - v2 --------------------------------------- ##

class VOCDatasetv2(data.Dataset):

    def __init__(self, root, shape=None, shuffle=True, transform=None, target_transform=None, train=False, seen=0, batch_size=64, num_workers=4):
       with open(root, 'r') as file:
           self.lines = file.readlines()

       if shuffle:
           random.shuffle(self.lines)

       self.nSamples  = len(self.lines)
       self.transform = transform
       self.target_transform = target_transform
       self.train = train
       self.shape = shape
       self.seen = seen
       self.batch_size = batch_size
       self.num_workers = num_workers

    def __len__(self):
        return self.nSamples

    def __getitem__(self, index):
        assert index <= len(self), 'index range error'
        imgpath = self.lines[index].rstrip()

        if self.train and index % 64== 0:
            if self.seen < 4000*64:
               width = 13*32
               self.shape = (width, width)
            elif self.seen < 8000*64:
               width = (random.randint(0,3) + 13)*32
               self.shape = (width, width)
            elif self.seen < 12000*64:
               width = (random.randint(0,5) + 12)*32
               self.shape = (width, width)
            elif self.seen < 16000*64:
               width = (random.randint(0,7) + 11)*32
               self.shape = (width, width)
            else: # self.seen < 20000*64:
               width = (random.randint(0,9) + 10)*32
               self.shape = (width, width)

        if self.train:
            jitter = 0.2
            hue = 0.1
            saturation = 1.5 
            exposure = 1.5

            img, label = load_data_detection(imgpath, self.shape, jitter, hue, saturation, exposure)
            label = torch.from_numpy(label)
        else:
            img = Image.open(imgpath).convert('RGB')
            if self.shape:
                img = img.resize(self.shape)
    
            labpath = imgpath.replace('images', 'labels').replace('JPEGImages', 'labels').replace('.jpg', '.txt').replace('.png','.txt')
            label = torch.zeros(50*5)
            #if os.path.getsize(labpath):
            #tmp = torch.from_numpy(np.loadtxt(labpath))
            try:
                tmp = torch.from_numpy(read_truths_args(labpath, 8.0/img.width).astype('float32'))
            except Exception:
                tmp = torch.zeros(1,5)
            #tmp = torch.from_numpy(read_truths(labpath))
            tmp = tmp.view(-1)
            tsz = tmp.numel()
            #print('labpath = %s , tsz = %d' % (labpath, tsz))
            if tsz > 50*5:
                label = tmp[0:50*5]
            elif tsz > 0:
                label[0:tsz] = tmp

        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            label = self.target_transform(label)

        self.seen = self.seen + self.num_workers
        return (img, label)

def scale_image_channel(im, c, v):
    cs = list(im.split())
    cs[c] = cs[c].point(lambda i: i * v)
    out = Image.merge(im.mode, tuple(cs))
    return out

def distort_image(im, hue, sat, val):
    im = im.convert('HSV')
    cs = list(im.split())
    cs[1] = cs[1].point(lambda i: i * sat)
    cs[2] = cs[2].point(lambda i: i * val)
    
    def change_hue(x):
        x += hue*255
        if x > 255:
            x -= 255
        if x < 0:
            x += 255
        return x
    cs[0] = cs[0].point(change_hue)
    im = Image.merge(im.mode, tuple(cs))

    im = im.convert('RGB')
    #constrain_image(im)
    return im

def rand_scale(s):
    scale = random.uniform(1, s)
    if(random.randint(1,10000)%2): 
        return scale
    return 1./scale

def random_distort_image(im, hue, saturation, exposure):
    dhue = random.uniform(-hue, hue)
    dsat = rand_scale(saturation)
    dexp = rand_scale(exposure)
    res = distort_image(im, dhue, dsat, dexp)
    return res

def data_augmentation(img, shape, jitter, hue, saturation, exposure):
    oh = img.height  
    ow = img.width
    
    dw =int(ow*jitter)
    dh =int(oh*jitter)

    pleft  = random.randint(-dw, dw)
    pright = random.randint(-dw, dw)
    ptop   = random.randint(-dh, dh)
    pbot   = random.randint(-dh, dh)

    swidth =  ow - pleft - pright
    sheight = oh - ptop - pbot

    sx = float(swidth)  / ow
    sy = float(sheight) / oh
    
    flip = random.randint(1,10000)%2
    cropped = img.crop( (pleft, ptop, pleft + swidth - 1, ptop + sheight - 1))

    dx = (float(pleft)/ow)/sx
    dy = (float(ptop) /oh)/sy

    sized = cropped.resize(shape)

    if flip: 
        sized = sized.transpose(Image.FLIP_LEFT_RIGHT)
    img = random_distort_image(sized, hue, saturation, exposure)
    
    return img, flip, dx,dy,sx,sy 

def fill_truth_detection(labpath, w, h, flip, dx, dy, sx, sy):
    max_boxes = 50
    label = np.zeros((max_boxes,5))
    if os.path.getsize(labpath):
        bs = np.loadtxt(labpath)
        if bs is None:
            return label
        bs = np.reshape(bs, (-1, 5))
        cc = 0
        for i in range(bs.shape[0]):
            x1 = bs[i][1] - bs[i][3]/2
            y1 = bs[i][2] - bs[i][4]/2
            x2 = bs[i][1] + bs[i][3]/2
            y2 = bs[i][2] + bs[i][4]/2
            
            x1 = min(0.999, max(0, x1 * sx - dx)) 
            y1 = min(0.999, max(0, y1 * sy - dy)) 
            x2 = min(0.999, max(0, x2 * sx - dx))
            y2 = min(0.999, max(0, y2 * sy - dy))
            
            bs[i][1] = (x1 + x2)/2
            bs[i][2] = (y1 + y2)/2
            bs[i][3] = (x2 - x1)
            bs[i][4] = (y2 - y1)

            if flip:
                bs[i][1] =  0.999 - bs[i][1] 
            
            if bs[i][3] < 0.001 or bs[i][4] < 0.001:
                continue
            label[cc] = bs[i]
            cc += 1
            if cc >= 50:
                break

    label = np.reshape(label, (-1))
    return label

def load_data_detection(imgpath, shape, jitter, hue, saturation, exposure):
    labpath = imgpath.replace('images', 'labels').replace('JPEGImages', 'labels').replace('.jpg', '.txt').replace('.png','.txt')

    ## data augmentation
    img = Image.open(imgpath).convert('RGB')
    img,flip,dx,dy,sx,sy = data_augmentation(img, shape, jitter, hue, saturation, exposure)
    label = fill_truth_detection(labpath, img.width, img.height, flip, dx, dy, 1./sx, 1./sy)
    return img,label


## --------------------------------------- PASCAL VOC - v1 --------------------------------------- ##

class YoloDataset(data.Dataset):
    
    def __init__(self, dir_data, file_annotations
                 , train
                 , image_size, grid_num
                 , flag_augm
                 , transform):
        
        self.dir_data   = dir_data
        self.dir_img    = os.path.join(dir_data, 'JPEGImages')
        self.train      = train
        self.transform  = transform
        
        self.fnames     = []
        self.boxes      = []
        self.labels     = []
        self.mean       = (123,117,104) # RGB ([How?])
        
        self.grid_num   = grid_num
        self.image_size = image_size
        self.flag_augm  = flag_augm

        self.verbose_aug = False

        with open(file_annotations) as f:
            for line in f.readlines():
                splited   = line.strip().split()
                self.fnames.append(splited[0])
                num_boxes = (len(splited) - 1) // 5
                box       = []
                label     = []
                for i in range(num_boxes):
                    x  = float(splited[1+5*i])
                    y  = float(splited[2+5*i])
                    x2 = float(splited[3+5*i])
                    y2 = float(splited[4+5*i])
                    c  = splited[5+5*i]
                    box.append([x,y,x2,y2])
                    label.append(int(c)+1)
                self.boxes.append(torch.Tensor(box))
                self.labels.append(torch.LongTensor(label))
                
        self.num_samples = len(self.boxes)
    
    def __getitem__(self,idx, verbose=0):
        fname  = self.fnames[idx]
        img    = cv2.imread(os.path.join(self.dir_img, fname), cv2.IMREAD_UNCHANGED)
        boxes  = self.boxes[idx].clone()
        labels = self.labels[idx].clone()
        
        if (0):
            print (' - fname :', fname)
            print (' - path : ', os.path.join(self.dir_img, fname))
            # plt.imshow(img)
            print (' - labels : ', labels)
        
        if self.train:
            if (self.flag_augm == 1):
                img = self.random_bright(img)
                img, boxes       = self.random_flip(img, boxes)
                img,boxes        = self.randomScale(img,boxes)
                img              = self.randomBlur(img)
                img              = self.RandomBrightness(img)
                img              = self.RandomHue(img)
                img              = self.RandomSaturation(img)
                img,boxes,labels = self.randomShift(img,boxes,labels)
                img,boxes,labels = self.randomCrop(img,boxes,labels)

        h,w,_  = img.shape
        
        boxes  /= torch.Tensor([w,h,w,h]).expand_as(boxes)
        img    = self.BGR2RGB(img) #because pytorch pretrained model use RGB
        #img    = self.subMean(img,self.mean) 
        img    = cv2.resize(img,(self.image_size,self.image_size))
        target = self.encoder(boxes,labels) # 7x7x30
        for t in self.transform:
            img = t(img)
        return img,target
    
    def __len__(self):
        return self.num_samples
    
    def encoder(self,boxes,labels):
        '''
        boxes (tensor) [[x1,y1,x2,y2],[]]
        labels (tensor) [...]
        return 7x7x30
        '''
        
        target    = torch.zeros((self.grid_num, self.grid_num,30))
        cell_size = 1./self.grid_num
        wh        = boxes[:,2:] - boxes[:,:2]
        cxcy      = (boxes[:,2:] + boxes[:,:2])/2
        for i in range(cxcy.size()[0]):
            cxcy_sample                       = cxcy[i]
            ij                                = (cxcy_sample/cell_size).ceil()-1 #
            target[int(ij[1]),int(ij[0]),4]   = 1
            target[int(ij[1]),int(ij[0]),9]   = 1
            target[int(ij[1]),int(ij[0]),int(labels[i])+9] = 1
            xy                                = ij*cell_size # The relative coordinates of the upper left corner of the matched mesh
            delta_xy                          = (cxcy_sample -xy)/cell_size
            target[int(ij[1]),int(ij[0]),2:4] = wh[i]
            target[int(ij[1]),int(ij[0]),:2]  = delta_xy
            target[int(ij[1]),int(ij[0]),7:9] = wh[i]
            target[int(ij[1]),int(ij[0]),5:7] = delta_xy
            
        return target
    
    def BGR2RGB(self,img):
        return cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    def BGR2HSV(self,img):
        return cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
    def HSV2BGR(self,img):
        return cv2.cvtColor(img,cv2.COLOR_HSV2BGR)
    
    def subMean(self,bgr,mean):
        mean = np.array(mean, dtype=np.float32)
        bgr  = bgr - mean
        return bgr
    
    def RandomBrightness(self,bgr):
        if random.random() < 0.5:
            if self.verbose_aug:
                print (' - [AUG] : randomBrightness')
            hsv = self.BGR2HSV(bgr)
            h,s,v = cv2.split(hsv)
            adjust = random.choice([0.5,1.5])
            v = v*adjust
            v = np.clip(v, 0, 255).astype(hsv.dtype)
            hsv = cv2.merge((h,s,v))
            bgr = self.HSV2BGR(hsv)
        return bgr
    
    def RandomSaturation(self,bgr):
        if random.random() < 0.5:
            if self.verbose_aug:
                print (' - [AUG] : randomSaturation')
            hsv = self.BGR2HSV(bgr)
            h,s,v = cv2.split(hsv)
            adjust = random.choice([0.5,1.5])
            s = s*adjust
            s = np.clip(s, 0, 255).astype(hsv.dtype)
            hsv = cv2.merge((h,s,v))
            bgr = self.HSV2BGR(hsv)
        return bgr
    
    def RandomHue(self,bgr):
        if random.random() < 0.5:
            if self.verbose_aug:
                print (' - [AUG] : randomHue')
            hsv = self.BGR2HSV(bgr)
            h,s,v = cv2.split(hsv)
            adjust = random.choice([0.5,1.5])
            h = h*adjust
            h = np.clip(h, 0, 255).astype(hsv.dtype)
            hsv = cv2.merge((h,s,v))
            bgr = self.HSV2BGR(hsv)
        return bgr

    def randomBlur(self,bgr):
        if random.random()<0.5:
            if self.verbose_aug:
                print (' - [AUG] : randomBlur')
            bgr = cv2.blur(bgr,(5,5))
        return bgr

    def randomShift(self,bgr,boxes,labels):
        #平移变换
        center = (boxes[:,2:]+boxes[:,:2])/2
        if random.random() <0.5:
            if self.verbose_aug:
                print (' - [AUG] : randomShift')
            height,width,c = bgr.shape
            after_shfit_image = np.zeros((height,width,c),dtype=bgr.dtype)
            after_shfit_image[:,:,:] = (104,117,123) #bgr
            shift_x = random.uniform(-width*0.2,width*0.2)
            shift_y = random.uniform(-height*0.2,height*0.2)
            #print(bgr.shape,shift_x,shift_y)
            #原图像的平移
            if shift_x>=0 and shift_y>=0:
                after_shfit_image[int(shift_y):,int(shift_x):,:] = bgr[:height-int(shift_y),:width-int(shift_x),:]
            elif shift_x>=0 and shift_y<0:
                after_shfit_image[:height+int(shift_y),int(shift_x):,:] = bgr[-int(shift_y):,:width-int(shift_x),:]
            elif shift_x <0 and shift_y >=0:
                after_shfit_image[int(shift_y):,:width+int(shift_x),:] = bgr[:height-int(shift_y),-int(shift_x):,:]
            elif shift_x<0 and shift_y<0:
                after_shfit_image[:height+int(shift_y),:width+int(shift_x),:] = bgr[-int(shift_y):,-int(shift_x):,:]

            shift_xy = torch.FloatTensor([[int(shift_x),int(shift_y)]]).expand_as(center)
            center = center + shift_xy
            mask1 = (center[:,0] >0) & (center[:,0] < width)
            mask2 = (center[:,1] >0) & (center[:,1] < height)
            mask = (mask1 & mask2).view(-1,1)
            boxes_in = boxes[mask.expand_as(boxes)].view(-1,4)
            if len(boxes_in) == 0:
                return bgr,boxes,labels
            box_shift = torch.FloatTensor([[int(shift_x),int(shift_y),int(shift_x),int(shift_y)]]).expand_as(boxes_in)
            boxes_in = boxes_in+box_shift
            labels_in = labels[mask.view(-1)]
            return after_shfit_image,boxes_in,labels_in
        return bgr,boxes,labels

    def randomScale(self,bgr,boxes):
        #固定住高度，以0.8-1.2伸缩宽度，做图像形变
        if random.random() < 0.5:
            if self.verbose_aug:
                print (' - [AUG] : randomScale')
            scale = random.uniform(0.8,1.2)
            height,width,c = bgr.shape
            bgr = cv2.resize(bgr,(int(width*scale),height))
            scale_tensor = torch.FloatTensor([[scale,1,scale,1]]).expand_as(boxes)
            boxes = boxes * scale_tensor
            return bgr,boxes
        return bgr,boxes

    def randomCrop(self,bgr,boxes,labels):
        if random.random() < 0.5:
            if self.verbose_aug:
                print (' - [AUG] : randomCrop')
            center = (boxes[:,2:]+boxes[:,:2])/2
            height,width,c = bgr.shape
            h = random.uniform(0.6*height,height)
            w = random.uniform(0.6*width,width)
            x = random.uniform(0,width-w)
            y = random.uniform(0,height-h)
            x,y,h,w = int(x),int(y),int(h),int(w)

            center = center - torch.FloatTensor([[x,y]]).expand_as(center)
            mask1 = (center[:,0]>0) & (center[:,0]<w)
            mask2 = (center[:,1]>0) & (center[:,1]<h)
            mask = (mask1 & mask2).view(-1,1)

            boxes_in = boxes[mask.expand_as(boxes)].view(-1,4)
            if(len(boxes_in)==0):
                return bgr,boxes,labels
            box_shift = torch.FloatTensor([[x,y,x,y]]).expand_as(boxes_in)

            boxes_in = boxes_in - box_shift
            boxes_in[:,0]=boxes_in[:,0].clamp_(min=0,max=w)
            boxes_in[:,2]=boxes_in[:,2].clamp_(min=0,max=w)
            boxes_in[:,1]=boxes_in[:,1].clamp_(min=0,max=h)
            boxes_in[:,3]=boxes_in[:,3].clamp_(min=0,max=h)

            labels_in = labels[mask.view(-1)]
            img_croped = bgr[y:y+h,x:x+w,:]
            return img_croped,boxes_in,labels_in
        return bgr,boxes,labels
    
    def random_flip(self, im, boxes):
        if random.random() < 0.5:
            if self.verbose_aug:
                print (' - [AUG] : random_flip')
            im_lr = np.fliplr(im).copy()
            h,w,_ = im.shape
            xmin = w - boxes[:,2]
            xmax = w - boxes[:,0]
            boxes[:,0] = xmin
            boxes[:,2] = xmax
            return im_lr, boxes
        return im, boxes
    
    def random_bright(self, im, delta=16):
        alpha = random.random()
        if alpha > 0.3:
            im = im * alpha + random.randrange(-delta,delta)
            im = im.clip(min=0,max=255).astype(np.uint8)
        return im

    def display(self, X):
        plt.imshow(X.data.numpy().transpose(1,2,0))
    
    def display_anno(self, X,y):
        pass
    


In [0]:
# nets2_util

import sys
import os
import time
import math
import torch
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from torch.autograd import Variable

import struct # get_image_size
import imghdr # get_image_size

def sigmoid(x):
    return 1.0/(math.exp(-x)+1.)

def softmax(x):
    x = torch.exp(x - torch.max(x))
    x = x/x.sum()
    return x

def bbox_iou(box1, box2, x1y1x2y2=True):
    if x1y1x2y2:
        mx = min(box1[0], box2[0])
        Mx = max(box1[2], box2[2])
        my = min(box1[1], box2[1])
        My = max(box1[3], box2[3])
        w1 = box1[2] - box1[0]
        h1 = box1[3] - box1[1]
        w2 = box2[2] - box2[0]
        h2 = box2[3] - box2[1]
    else:
        mx = min(box1[0]-box1[2]/2.0, box2[0]-box2[2]/2.0)
        Mx = max(box1[0]+box1[2]/2.0, box2[0]+box2[2]/2.0)
        my = min(box1[1]-box1[3]/2.0, box2[1]-box2[3]/2.0)
        My = max(box1[1]+box1[3]/2.0, box2[1]+box2[3]/2.0)
        w1 = box1[2]
        h1 = box1[3]
        w2 = box2[2]
        h2 = box2[3]
    uw = Mx - mx
    uh = My - my
    cw = w1 + w2 - uw
    ch = h1 + h2 - uh
    carea = 0
    if cw <= 0 or ch <= 0:
        return 0.0

    area1 = w1 * h1
    area2 = w2 * h2
    carea = cw * ch
    uarea = area1 + area2 - carea
    return carea/uarea

def bbox_ious(boxes1, boxes2, x1y1x2y2=True):
    if x1y1x2y2:
        mx = torch.min(boxes1[0], boxes2[0])
        Mx = torch.max(boxes1[2], boxes2[2])
        my = torch.min(boxes1[1], boxes2[1])
        My = torch.max(boxes1[3], boxes2[3])
        w1 = boxes1[2] - boxes1[0]
        h1 = boxes1[3] - boxes1[1]
        w2 = boxes2[2] - boxes2[0]
        h2 = boxes2[3] - boxes2[1]
    else:
        mx = torch.min(boxes1[0]-boxes1[2]/2.0, boxes2[0]-boxes2[2]/2.0)
        Mx = torch.max(boxes1[0]+boxes1[2]/2.0, boxes2[0]+boxes2[2]/2.0)
        my = torch.min(boxes1[1]-boxes1[3]/2.0, boxes2[1]-boxes2[3]/2.0)
        My = torch.max(boxes1[1]+boxes1[3]/2.0, boxes2[1]+boxes2[3]/2.0)
        w1 = boxes1[2]
        h1 = boxes1[3]
        w2 = boxes2[2]
        h2 = boxes2[3]
    uw = Mx - mx
    uh = My - my
    cw = w1 + w2 - uw
    ch = h1 + h2 - uh
    mask = ((cw <= 0) + (ch <= 0) > 0)
    area1 = w1 * h1
    area2 = w2 * h2
    carea = cw * ch
    carea[mask] = 0
    uarea = area1 + area2 - carea
    return carea/uarea

def nms(boxes, nms_thresh):
    if len(boxes) == 0:
        return boxes

    det_confs = torch.zeros(len(boxes))
    for i in range(len(boxes)):
        det_confs[i] = 1-boxes[i][4]                

    _,sortIds = torch.sort(det_confs)
    out_boxes = []
    for i in range(len(boxes)):
        box_i = boxes[sortIds[i]]
        if box_i[4] > 0:
            out_boxes.append(box_i)
            for j in range(i+1, len(boxes)):
                box_j = boxes[sortIds[j]]
                if bbox_iou(box_i, box_j, x1y1x2y2=False) > nms_thresh:
                    #print(box_i, box_j, bbox_iou(box_i, box_j, x1y1x2y2=False))
                    box_j[4] = 0
    return out_boxes

def convert2cpu(gpu_matrix):
    return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)

def convert2cpu_long(gpu_matrix):
    return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)

def get_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, only_objectness=1, validation=False):
    anchor_step = int(len(anchors)/num_anchors)
    if output.dim() == 3:
        output = output.unsqueeze(0)
    batch = output.size(0)
    assert(output.size(1) == (5+num_classes)*num_anchors)
    h = output.size(2)
    w = output.size(3)

    t0 = time.time()
    all_boxes = []
    output = output.view(batch*num_anchors, 5+num_classes, h*w).transpose(0,1).contiguous().view(5+num_classes, batch*num_anchors*h*w)

    grid_x = torch.linspace(0, w-1, w).repeat(h,1).repeat(batch*num_anchors, 1, 1).view(batch*num_anchors*h*w).cuda()
    grid_y = torch.linspace(0, h-1, h).repeat(w,1).t().repeat(batch*num_anchors, 1, 1).view(batch*num_anchors*h*w).cuda()
    xs = torch.sigmoid(output[0]) + grid_x
    ys = torch.sigmoid(output[1]) + grid_y

    anchor_w = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([0]))
    anchor_h = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([1]))
    anchor_w = anchor_w.repeat(batch, 1).repeat(1, 1, h*w).view(batch*num_anchors*h*w).cuda()
    anchor_h = anchor_h.repeat(batch, 1).repeat(1, 1, h*w).view(batch*num_anchors*h*w).cuda()
    ws = torch.exp(output[2]) * anchor_w
    hs = torch.exp(output[3]) * anchor_h

    det_confs = torch.sigmoid(output[4])

    cls_confs = torch.nn.Softmax()(Variable(output[5:5+num_classes].transpose(0,1))).data
    cls_max_confs, cls_max_ids = torch.max(cls_confs, 1)
    cls_max_confs = cls_max_confs.view(-1)
    cls_max_ids = cls_max_ids.view(-1)
    t1 = time.time()
    
    sz_hw = h*w
    sz_hwa = sz_hw*num_anchors
    det_confs = convert2cpu(det_confs)
    cls_max_confs = convert2cpu(cls_max_confs)
    cls_max_ids = convert2cpu_long(cls_max_ids)
    xs = convert2cpu(xs)
    ys = convert2cpu(ys)
    ws = convert2cpu(ws)
    hs = convert2cpu(hs)
    if validation:
        cls_confs = convert2cpu(cls_confs.view(-1, num_classes))
    t2 = time.time()
    for b in range(batch):
        boxes = []
        for cy in range(h):
            for cx in range(w):
                for i in range(num_anchors):
                    ind = b*sz_hwa + i*sz_hw + cy*w + cx
                    det_conf =  det_confs[ind]
                    if only_objectness:
                        conf =  det_confs[ind]
                    else:
                        conf = det_confs[ind] * cls_max_confs[ind]
    
                    if conf > conf_thresh:
                        bcx = xs[ind]
                        bcy = ys[ind]
                        bw = ws[ind]
                        bh = hs[ind]
                        cls_max_conf = cls_max_confs[ind]
                        cls_max_id = cls_max_ids[ind]
                        box = [bcx/w, bcy/h, bw/w, bh/h, det_conf, cls_max_conf, cls_max_id]
                        if (not only_objectness) and validation:
                            for c in range(num_classes):
                                tmp_conf = cls_confs[ind][c]
                                if c != cls_max_id and det_confs[ind]*tmp_conf > conf_thresh:
                                    box.append(tmp_conf)
                                    box.append(c)
                        boxes.append(box)
        all_boxes.append(boxes)
    t3 = time.time()
    if False:
        print('---------------------------------')
        print('matrix computation : %f' % (t1-t0))
        print('        gpu to cpu : %f' % (t2-t1))
        print('      boxes filter : %f' % (t3-t2))
        print('---------------------------------')
    return all_boxes

def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None):
    import cv2
    colors = torch.FloatTensor([[1,0,1],[0,0,1],[0,1,1],[0,1,0],[1,1,0],[1,0,0]]);
    def get_color(c, x, max_val):
        ratio = float(x)/max_val * 5
        i = int(math.floor(ratio))
        j = int(math.ceil(ratio))
        ratio = ratio - i
        r = (1-ratio) * colors[i][c] + ratio*colors[j][c]
        return int(r*255)

    width = img.shape[1]
    height = img.shape[0]
    for i in range(len(boxes)):
        box = boxes[i]
        x1 = int(round((box[0] - box[2]/2.0) * width))
        y1 = int(round((box[1] - box[3]/2.0) * height))
        x2 = int(round((box[0] + box[2]/2.0) * width))
        y2 = int(round((box[1] + box[3]/2.0) * height))

        if color:
            rgb = color
        else:
            rgb = (255, 0, 0)
        if len(box) >= 7 and class_names:
            cls_conf = box[5]
            cls_id = box[6]
            print('%s: %f' % (class_names[cls_id], cls_conf))
            classes = len(class_names)
            offset = cls_id * 123457 % classes
            red   = get_color(2, offset, classes)
            green = get_color(1, offset, classes)
            blue  = get_color(0, offset, classes)
            if color is None:
                rgb = (red, green, blue)
            img = cv2.putText(img, class_names[cls_id], (x1,y1), cv2.FONT_HERSHEY_SIMPLEX, 1.2, rgb, 1)
        img = cv2.rectangle(img, (x1,y1), (x2,y2), rgb, 1)
    if savename:
        print("save plot results to %s" % savename)
        cv2.imwrite(savename, img)
    return img

def plot_boxes(img, boxes, savename=None, class_names=None):
    colors = torch.FloatTensor([[1,0,1],[0,0,1],[0,1,1],[0,1,0],[1,1,0],[1,0,0]]);
    def get_color(c, x, max_val):
        ratio = float(x)/max_val * 5
        i = int(math.floor(ratio))
        j = int(math.ceil(ratio))
        ratio = ratio - i
        r = (1-ratio) * colors[i][c] + ratio*colors[j][c]
        return int(r*255)

    width = img.width
    height = img.height
    draw = ImageDraw.Draw(img)
    for i in range(len(boxes)):
        box = boxes[i]
        x1 = (box[0] - box[2]/2.0) * width
        y1 = (box[1] - box[3]/2.0) * height
        x2 = (box[0] + box[2]/2.0) * width
        y2 = (box[1] + box[3]/2.0) * height

        rgb = (255, 0, 0)
        if len(box) >= 7 and class_names:
            cls_conf = box[5]
            cls_id = box[6]
            print (' - cls_id : ', cls_id)
            print('%s: %f' % (class_names[cls_id], cls_conf))
            classes = len(class_names)
            offset = cls_id * 123457 % classes
            red   = get_color(2, offset, classes)
            green = get_color(1, offset, classes)
            blue  = get_color(0, offset, classes)
            rgb = (red, green, blue)
            draw.text((x1, y1), class_names[cls_id], fill=rgb)
        draw.rectangle([x1, y1, x2, y2], outline = rgb)
    if savename:
        print("save plot results to %s" % savename)
        img.save(savename)
    return img

def read_truths(lab_path):
    if not os.path.exists(lab_path):
        return np.array([])
    if os.path.getsize(lab_path):
        truths = np.loadtxt(lab_path)
        truths = truths.reshape(truths.size/5, 5) # to avoid single truth problem
        return truths
    else:
        return np.array([])

def read_truths_args(lab_path, min_box_scale):
    truths = read_truths(lab_path)
    new_truths = []
    for i in range(truths.shape[0]):
        if truths[i][3] < min_box_scale:
            continue
        new_truths.append([truths[i][0], truths[i][1], truths[i][2], truths[i][3], truths[i][4]])
    return np.array(new_truths)

def load_class_names(namesfile):
    class_names = []
    with open(namesfile, 'r') as fp:
        lines = fp.readlines()
    for line in lines:
        line = line.rstrip()
        class_names.append(line)
    return class_names

def image2torch(img):
    width = img.width
    height = img.height
    img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
    img = img.view(height, width, 3).transpose(0,1).transpose(0,2).contiguous()
    img = img.view(1, 3, height, width)
    img = img.float().div(255.0)
    return img

def do_detect(model, img, conf_thresh, nms_thresh, use_cuda=1):
    model.eval()
    t0 = time.time()

    if isinstance(img, Image.Image):
        width = img.width
        height = img.height
        img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
        img = img.view(height, width, 3).transpose(0,1).transpose(0,2).contiguous()
        img = img.view(1, 3, height, width)
        img = img.float().div(255.0)
    elif type(img) == np.ndarray: # cv2 image
        img = torch.from_numpy(img.transpose(2,0,1)).float().div(255.0).unsqueeze(0)
    else:
        print("unknow image type")
        exit(-1)

    t1 = time.time()

    if use_cuda:
        img = img.cuda()
    img = torch.autograd.Variable(img)
    t2 = time.time()

    output = model(img)
    output = output.data
    #for j in range(100):
    #    sys.stdout.write('%f ' % (output.storage()[j]))
    #print('')
    t3 = time.time()

    boxes = get_region_boxes(output, conf_thresh, model.num_classes, model.anchors, model.num_anchors)[0]
    #for j in range(len(boxes)):
    #    print(boxes[j])
    t4 = time.time()

    boxes = nms(boxes, nms_thresh)
    t5 = time.time()

    if False:
        print('-----------------------------------')
        print(' image to tensor : %f' % (t1 - t0))
        print('  tensor to cuda : %f' % (t2 - t1))
        print('         predict : %f' % (t3 - t2))
        print('get_region_boxes : %f' % (t4 - t3))
        print('             nms : %f' % (t5 - t4))
        print('           total : %f' % (t5 - t0))
        print('-----------------------------------')
    return boxes

def read_data_cfg(datacfg):
    options = dict()
    options['gpus'] = '0,1,2,3'
    options['num_workers'] = '10'
    with open(datacfg, 'r') as fp:
        lines = fp.readlines()

    for line in lines:
        line = line.strip()
        if line == '':
            continue
        key,value = line.split('=')
        key = key.strip()
        value = value.strip()
        options[key] = value
    return options

def scale_bboxes(bboxes, width, height):
    import copy
    dets = copy.deepcopy(bboxes)
    for i in range(len(dets)):
        dets[i][0] = dets[i][0] * width
        dets[i][1] = dets[i][1] * height
        dets[i][2] = dets[i][2] * width
        dets[i][3] = dets[i][3] * height
    return dets
      
def file_lines(thefilepath):
    count = 0
    thefile = open(thefilepath, 'rb')
    while True:
        buffer = thefile.read(8192*1024)
        if not buffer:
            break
        count += buffer.count(b'\n')
    thefile.close( )
    return count

def get_image_size(fname):
    '''Determine the image type of fhandle and return its size.
    from draco'''
    with open(fname, 'rb') as fhandle:
        head = fhandle.read(24)
        if len(head) != 24: 
            return
        if imghdr.what(fname) == 'png':
            check = struct.unpack('>i', head[4:8])[0]
            if check != 0x0d0a1a0a:
                return
            width, height = struct.unpack('>ii', head[16:24])
        elif imghdr.what(fname) == 'gif':
            width, height = struct.unpack('<HH', head[6:10])
        elif imghdr.what(fname) == 'jpeg' or imghdr.what(fname) == 'jpg':
            try:
                fhandle.seek(0) # Read 0xff next
                size = 2 
                ftype = 0 
                while not 0xc0 <= ftype <= 0xcf:
                    fhandle.seek(size, 1)
                    byte = fhandle.read(1)
                    while ord(byte) == 0xff:
                        byte = fhandle.read(1)
                    ftype = ord(byte)
                    size = struct.unpack('>H', fhandle.read(2))[0] - 2 
                # We are at a SOFn block
                fhandle.seek(1, 1)  # Skip `precision' byte.
                height, width = struct.unpack('>HH', fhandle.read(4))
            except Exception: #IGNORE:W0703
                return
        else:
            return
        return width, height

def logging(message):
    print('%s %s' % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), message))


# if __name__ == "__main__":
#     file = '/home/strider/Work/Netherlands/TUDelft/1_Courses/Sem2/DeepLearning/Project/repo1/data/dataset/voc.names'
#     class_names = load_class_names(file)
#     print (class_names)

In [27]:
# nets

#encoding:utf-8
import os
import sys
import math
import traceback
import numpy as np

import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
from torchvision import models
import torch.nn.functional as F
from collections import OrderedDict

# print (os.getcwd())
# from src.nets2_utils import *
# from .nets2_utils import *

# from pruning.weightPruning.layers import MaskedLinear

torch.cuda.empty_cache()
USE_GPU = torch.cuda.is_available()

## --------------------------------------- YOLOV2 --------------------------------------- ##

## ----------------- YOLOV2:cfg
def parse_cfg(cfgfile, verbose=0):
    blocks = []
    fp     = open(cfgfile, 'r')
    block  =  None
    line   = fp.readline()
    while line != '':
        line = line.rstrip()
        if line == '' or line[0] == '#':
            line = fp.readline()
            continue

        elif line[0] == '[':
            if block:
                if verbose:
                    print ('')
                    print (' - block : ', block)
                blocks.append(block)
            block = dict()
            block['type'] = line.lstrip('[').rstrip(']')
            # set default value
            if block['type'] == 'convolutional':
                block['batch_normalize'] = 0
        else:
            key,value = line.split('=')
            key = key.strip()
            if key == 'type':
                key = '_type'
            value = value.strip()
            block[key] = value
        line = fp.readline()

    if block:
        blocks.append(block)
    fp.close()
    return blocks

def print_cfg(blocks):
    print('layer     filters    size              input                output');
    prev_width = 416
    prev_height = 416
    prev_filters = 3
    out_filters =[]
    out_widths =[]
    out_heights =[]
    ind = -2
    for block in blocks:
        ind = ind + 1
        if block['type'] == 'net':
            prev_width = int(block['width'])
            prev_height = int(block['height'])
            continue
        elif block['type'] == 'convolutional':
            filters = int(block['filters'])
            kernel_size = int(block['size'])
            stride = int(block['stride'])
            is_pad = int(block['pad'])
            pad = (kernel_size-1)/2 if is_pad else 0
            width = (prev_width + 2*pad - kernel_size)/stride + 1
            height = (prev_height + 2*pad - kernel_size)/stride + 1
            print('%5d %-6s %4d  %d x %d / %d   %3d x %3d x%4d   ->   %3d x %3d x%4d' % (ind, 'conv', filters, kernel_size, kernel_size, stride, prev_width, prev_height, prev_filters, width, height, filters))
            prev_width = width
            prev_height = height
            prev_filters = filters
            out_widths.append(prev_width)
            out_heights.append(prev_height)
            out_filters.append(prev_filters)
        elif block['type'] == 'maxpool':
            pool_size = int(block['size'])
            stride = int(block['stride'])
            width = prev_width/stride
            height = prev_height/stride
            print('%5d %-6s       %d x %d / %d   %3d x %3d x%4d   ->   %3d x %3d x%4d' % (ind, 'max', pool_size, pool_size, stride, prev_width, prev_height, prev_filters, width, height, filters))
            prev_width = width
            prev_height = height
            prev_filters = filters
            out_widths.append(prev_width)
            out_heights.append(prev_height)
            out_filters.append(prev_filters)
        elif block['type'] == 'avgpool':
            width = 1
            height = 1
            print('%5d %-6s                   %3d x %3d x%4d   ->  %3d' % (ind, 'avg', prev_width, prev_height, prev_filters,  prev_filters))
            prev_width = width
            prev_height = height
            prev_filters = filters
            out_widths.append(prev_width)
            out_heights.append(prev_height)
            out_filters.append(prev_filters)
        elif block['type'] == 'softmax':
            print('%5d %-6s                                    ->  %3d' % (ind, 'softmax', prev_filters))
            out_widths.append(prev_width)
            out_heights.append(prev_height)
            out_filters.append(prev_filters)
        elif block['type'] == 'cost':
            print('%5d %-6s                                     ->  %3d' % (ind, 'cost', prev_filters))
            out_widths.append(prev_width)
            out_heights.append(prev_height)
            out_filters.append(prev_filters)
        elif block['type'] == 'reorg':
            stride = int(block['stride'])
            filters = stride * stride * prev_filters
            width = prev_width/stride
            height = prev_height/stride
            print('%5d %-6s             / %d   %3d x %3d x%4d   ->   %3d x %3d x%4d' % (ind, 'reorg', stride, prev_width, prev_height, prev_filters, width, height, filters))
            prev_width = width
            prev_height = height
            prev_filters = filters
            out_widths.append(prev_width)
            out_heights.append(prev_height)
            out_filters.append(prev_filters)
        elif block['type'] == 'route':
            layers = block['layers'].split(',')
            layers = [int(i) if int(i) > 0 else int(i)+ind for i in layers]
            if len(layers) == 1:
                print('%5d %-6s %d' % (ind, 'route', layers[0]))
                prev_width = out_widths[layers[0]]
                prev_height = out_heights[layers[0]]
                prev_filters = out_filters[layers[0]]
            elif len(layers) == 2:
                print('%5d %-6s %d %d' % (ind, 'route', layers[0], layers[1]))
                prev_width = out_widths[layers[0]]
                prev_height = out_heights[layers[0]]
                assert(prev_width == out_widths[layers[1]])
                assert(prev_height == out_heights[layers[1]])
                prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
            out_widths.append(prev_width)
            out_heights.append(prev_height)
            out_filters.append(prev_filters)
        elif block['type'] == 'region':
            print('%5d %-6s' % (ind, 'detection'))
            out_widths.append(prev_width)
            out_heights.append(prev_height)
            out_filters.append(prev_filters)
        elif block['type'] == 'shortcut':
            from_id = int(block['from'])
            from_id = from_id if from_id > 0 else from_id+ind
            print('%5d %-6s %d' % (ind, 'shortcut', from_id))
            prev_width = out_widths[from_id]
            prev_height = out_heights[from_id]
            prev_filters = out_filters[from_id]
            out_widths.append(prev_width)
            out_heights.append(prev_height)
            out_filters.append(prev_filters)
        elif block['type'] == 'connected':
            filters = int(block['output'])
            print('%5d %-6s                            %d  ->  %3d' % (ind, 'connected', prev_filters,  filters))
            prev_filters = filters
            out_widths.append(1)
            out_heights.append(1)
            out_filters.append(prev_filters)
        else:
            print('unknown type %s' % (block['type']))

def load_conv_old(buf, start, conv_model):
    num_w = conv_model.weight.numel()
    num_b = conv_model.bias.numel()
    conv_model.bias.data.copy_(torch.from_numpy(buf[start:start+num_b]));   start = start + num_b
    conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w])); start = start + num_w
    return start

def save_conv(fp, conv_model):
    if conv_model.bias.is_cuda:
        convert2cpu(conv_model.bias.data).numpy().tofile(fp)
        convert2cpu(conv_model.weight.data).numpy().tofile(fp)
    else:
        conv_model.bias.data.numpy().tofile(fp)
        conv_model.weight.data.numpy().tofile(fp)

def load_conv_bn_old(buf, start, conv_model, bn_model, verbose=0):
    num_w = conv_model.weight.numel()
    num_b = bn_model.bias.numel()
    if (1):
        print ('      - conv weights : ', num_w)
        print ('      - bias weights : ', num_b)
        print ('      - bn_model.bias : ', bn_model.bias.shape)
        print ('      - bn_model.weight : ', bn_model.weight.shape)
    
    bn_model.bias.data.copy_(torch.from_numpy(buf[start:start+num_b]));     start = start + num_b
    bn_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_b]));   start = start + num_b
    bn_model.running_mean.copy_(torch.from_numpy(buf[start:start+num_b]));  start = start + num_b
    bn_model.running_var.copy_(torch.from_numpy(buf[start:start+num_b]));   start = start + num_b
    
    if (1):
        print ('      - start : ', start)
        print ('      - size :  ', buf[start:start+num_w].shape)
        print ('      - shape :  ', conv_model.weight.data.shape)
    conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w])); start = start + num_w 
    
    return start

def save_conv_bn(fp, conv_model, bn_model):
    if bn_model.bias.is_cuda:
        convert2cpu(bn_model.bias.data).numpy().tofile(fp)
        convert2cpu(bn_model.weight.data).numpy().tofile(fp)
        convert2cpu(bn_model.running_mean).numpy().tofile(fp)
        convert2cpu(bn_model.running_var).numpy().tofile(fp)
        convert2cpu(conv_model.weight.data).numpy().tofile(fp)
    else:
        bn_model.bias.data.numpy().tofile(fp)
        bn_model.weight.data.numpy().tofile(fp)
        bn_model.running_mean.numpy().tofile(fp)
        bn_model.running_var.numpy().tofile(fp)
        conv_model.weight.data.numpy().tofile(fp)

def load_fc(buf, start, fc_model):
    num_w = fc_model.weight.numel()
    num_b = fc_model.bias.numel()
    fc_model.bias.data.copy_(torch.from_numpy(buf[start:start+num_b]));     start = start + num_b
    fc_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w]));   start = start + num_w 
    return start

def save_fc(fp, fc_model):
    fc_model.bias.data.numpy().tofile(fp)
    fc_model.weight.data.numpy().tofile(fp)

def load_param(file, param):
    param.data.copy_(torch.from_numpy(
        np.fromfile(file, dtype=np.float32, count=param.numel()).reshape(param.shape)
    ))

def load_conv(file, conv_model):
    load_param(file, conv_model.bias)
    load_param(file, conv_model.weight)

def load_conv_bn(file, conv_model, bn_model):
    load_param(file, bn_model.bias)
    load_param(file, bn_model.weight)
    load_param(file, bn_model.running_mean)
    load_param(file, bn_model.running_var)
    load_param(file, conv_model.weight)


## ----------------- YOLOV2:modelling
def build_targets(pred_boxes, target, anchors, num_anchors, num_classes, nH, nW, noobject_scale, object_scale, sil_thresh, seen):
    nB = target.size(0)
    nA = num_anchors
    nC = num_classes
    anchor_step = len(anchors)/num_anchors
    conf_mask  = torch.ones(nB, nA, nH, nW) * noobject_scale
    coord_mask = torch.zeros(nB, nA, nH, nW)
    cls_mask   = torch.zeros(nB, nA, nH, nW)
    tx         = torch.zeros(nB, nA, nH, nW) 
    ty         = torch.zeros(nB, nA, nH, nW) 
    tw         = torch.zeros(nB, nA, nH, nW) 
    th         = torch.zeros(nB, nA, nH, nW) 
    tconf      = torch.zeros(nB, nA, nH, nW)
    tcls       = torch.zeros(nB, nA, nH, nW) 

    nAnchors = nA*nH*nW
    nPixels  = nH*nW
    for b in xrange(nB):
        cur_pred_boxes = pred_boxes[b*nAnchors:(b+1)*nAnchors].t()
        cur_ious = torch.zeros(nAnchors)
        for t in xrange(50):
            if target[b][t*5+1] == 0:
                break
            gx = target[b][t*5+1]*nW
            gy = target[b][t*5+2]*nH
            gw = target[b][t*5+3]*nW
            gh = target[b][t*5+4]*nH
            cur_gt_boxes = torch.FloatTensor([gx,gy,gw,gh]).repeat(nAnchors,1).t()
            cur_ious = torch.max(cur_ious, bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False))
        conf_mask[b][cur_ious>sil_thresh] = 0
    if seen < 12800:
       if anchor_step == 4:
           tx = torch.FloatTensor(anchors).view(nA, anchor_step).index_select(1, torch.LongTensor([2])).view(1,nA,1,1).repeat(nB,1,nH,nW)
           ty = torch.FloatTensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([2])).view(1,nA,1,1).repeat(nB,1,nH,nW)
       else:
           tx.fill_(0.5)
           ty.fill_(0.5)
       tw.zero_()
       th.zero_()
       coord_mask.fill_(1)

    nGT = 0
    nCorrect = 0
    for b in xrange(nB):
        for t in xrange(50):
            if target[b][t*5+1] == 0:
                break
            nGT = nGT + 1
            best_iou = 0.0
            best_n = -1
            min_dist = 10000
            gx = target[b][t*5+1] * nW
            gy = target[b][t*5+2] * nH
            gi = int(gx)
            gj = int(gy)
            gw = target[b][t*5+3]*nW
            gh = target[b][t*5+4]*nH
            gt_box = [0, 0, gw, gh]
            for n in xrange(nA):
                aw = anchors[anchor_step*n]
                ah = anchors[anchor_step*n+1]
                anchor_box = [0, 0, aw, ah]
                iou  = bbox_iou(anchor_box, gt_box, x1y1x2y2=False)
                if anchor_step == 4:
                    ax = anchors[anchor_step*n+2]
                    ay = anchors[anchor_step*n+3]
                    dist = pow(((gi+ax) - gx), 2) + pow(((gj+ay) - gy), 2)
                if iou > best_iou:
                    best_iou = iou
                    best_n = n
                elif anchor_step==4 and iou == best_iou and dist < min_dist:
                    best_iou = iou
                    best_n = n
                    min_dist = dist

            gt_box = [gx, gy, gw, gh]
            pred_box = pred_boxes[b*nAnchors+best_n*nPixels+gj*nW+gi]

            coord_mask[b][best_n][gj][gi] = 1
            cls_mask[b][best_n][gj][gi] = 1
            conf_mask[b][best_n][gj][gi] = object_scale
            tx[b][best_n][gj][gi] = target[b][t*5+1] * nW - gi
            ty[b][best_n][gj][gi] = target[b][t*5+2] * nH - gj
            tw[b][best_n][gj][gi] = math.log(gw/anchors[anchor_step*best_n])
            th[b][best_n][gj][gi] = math.log(gh/anchors[anchor_step*best_n+1])
            iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False) # best_iou
            tconf[b][best_n][gj][gi] = iou
            tcls[b][best_n][gj][gi] = target[b][t*5]
            if iou > 0.5:
                nCorrect = nCorrect + 1

    return nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls

class RegionLoss(nn.Module):
    def __init__(self, num_classes=0, anchors=[], num_anchors=1):
        super(RegionLoss, self).__init__()
        self.num_classes = num_classes
        self.anchors = anchors
        self.num_anchors = num_anchors
        self.anchor_step = int(len(anchors)/num_anchors)
        self.coord_scale = 1
        self.noobject_scale = 1
        self.object_scale = 5
        self.class_scale = 1
        self.thresh = 0.6
        self.seen = 0

    def forward(self, output, target):
        #output : BxAs*(4+1+num_classes)*H*W
        t0 = time.time()
        nB = output.data.size(0)
        nA = self.num_anchors
        nC = self.num_classes
        nH = output.data.size(2)
        nW = output.data.size(3)

        output   = output.view(nB, nA, (5+nC), nH, nW)
        x    = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
        y    = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
        w    = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
        h    = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
        conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))
        cls  = output.index_select(2, Variable(torch.linspace(5,5+nC-1,nC).long().cuda()))
        cls  = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC)
        t1 = time.time()

        pred_boxes = torch.cuda.FloatTensor(4, nB*nA*nH*nW)
        grid_x = torch.linspace(0, nW-1, nW).repeat(nH,1).repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda()
        grid_y = torch.linspace(0, nH-1, nH).repeat(nW,1).t().repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda()
        anchor_w = torch.Tensor(self.anchors).view(nA, int(self.anchor_step)).index_select(1, torch.LongTensor([0])).cuda()
        anchor_h = torch.Tensor(self.anchors).view(nA, int(self.anchor_step)).index_select(1, torch.LongTensor([1])).cuda()
        anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH*nW).view(nB*nA*nH*nW)
        anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH*nW).view(nB*nA*nH*nW)
        pred_boxes[0] = x.data + grid_x
        pred_boxes[1] = y.data + grid_y
        pred_boxes[2] = torch.exp(w.data) * anchor_w
        pred_boxes[3] = torch.exp(h.data) * anchor_h
        pred_boxes = convert2cpu(pred_boxes.transpose(0,1).contiguous().view(-1,4))
        t2 = time.time()

        nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf,tcls = build_targets(pred_boxes, target.data, self.anchors, nA, nC, \
                                                               nH, nW, self.noobject_scale, self.object_scale, self.thresh, self.seen)
        cls_mask = (cls_mask == 1)
        nProposals = int((conf > 0.25).sum().data[0])

        tx    = Variable(tx.cuda())
        ty    = Variable(ty.cuda())
        tw    = Variable(tw.cuda())
        th    = Variable(th.cuda())
        tconf = Variable(tconf.cuda())
        tcls  = Variable(tcls.view(-1)[cls_mask].long().cuda())

        coord_mask = Variable(coord_mask.cuda())
        conf_mask  = Variable(conf_mask.cuda().sqrt())
        cls_mask   = Variable(cls_mask.view(-1, 1).repeat(1,nC).cuda())
        cls        = cls[cls_mask].view(-1, nC)  

        t3 = time.time()

        loss_x = self.coord_scale * nn.MSELoss(size_average=False)(x*coord_mask, tx*coord_mask)/2.0
        loss_y = self.coord_scale * nn.MSELoss(size_average=False)(y*coord_mask, ty*coord_mask)/2.0
        loss_w = self.coord_scale * nn.MSELoss(size_average=False)(w*coord_mask, tw*coord_mask)/2.0
        loss_h = self.coord_scale * nn.MSELoss(size_average=False)(h*coord_mask, th*coord_mask)/2.0
        loss_conf = nn.MSELoss(size_average=False)(conf*conf_mask, tconf*conf_mask)/2.0
        loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls)
        loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
        t4 = time.time()
        if False:
            print('-----------------------------------')
            print('        activation : %f' % (t1 - t0))
            print(' create pred_boxes : %f' % (t2 - t1))
            print('     build targets : %f' % (t3 - t2))
            print('       create loss : %f' % (t4 - t3))
            print('             total : %f' % (t4 - t0))
        print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0], loss_conf.data[0], loss_cls.data[0], loss.data[0]))
        return loss

class MaxPoolStride1(nn.Module):
    def __init__(self):
        super(MaxPoolStride1, self).__init__()

    def forward(self, x):
        x = F.max_pool2d(F.pad(x, (0,1,0,1), mode='replicate'), 2, stride=1)
        return x

class Reorg(nn.Module):
    def __init__(self, stride=2):
        super(Reorg, self).__init__()
        self.stride = stride
    def forward(self, x):
        stride = self.stride
        assert(x.data.dim() == 4)
        B = x.data.size(0)
        C = x.data.size(1)
        H = x.data.size(2)
        W = x.data.size(3)
        assert(H % stride == 0)
        assert(W % stride == 0)
        ws = stride
        hs = stride
        x = x.view((B, C, int(H/hs), hs, int(W/ws), ws)).transpose(3,4).contiguous()
        x = x.view((B, C, int(H/hs*W/ws), hs*ws)).transpose(2,3).contiguous()
        x = x.view((B, C, hs*ws, int(H/hs), int(W/ws))).transpose(1,2).contiguous()
        x = x.view((B, hs*ws*C, int(H/hs), int(W/ws)))
        return x

class GlobalAvgPool2d(nn.Module):
    def __init__(self):
        super(GlobalAvgPool2d, self).__init__()

    def forward(self, x):
        N = x.data.size(0)
        C = x.data.size(1)
        H = x.data.size(2)
        W = x.data.size(3)
        x = F.avg_pool2d(x, (H, W))
        x = x.view(N, C)
        return x

# for route and shortcut
class EmptyModule(nn.Module):
    def __init__(self):
        super(EmptyModule, self).__init__()

    def forward(self, x):
        return x

# support route shortcut and reorg
class Darknet(nn.Module):

    def __init__(self, cfgfile):
        super(Darknet, self).__init__()
        self.blocks = parse_cfg(cfgfile)
        self.models = self.create_network(self.blocks) # merge conv, bn,leaky
        self.loss = self.models[len(self.models)-1]

        self.width = int(self.blocks[0]['width'])
        self.height = int(self.blocks[0]['height'])

        if self.blocks[(len(self.blocks)-1)]['type'] == 'region':
            self.anchors = self.loss.anchors
            self.num_anchors = self.loss.num_anchors
            self.anchor_step = self.loss.anchor_step
            self.num_classes = self.loss.num_classes

        self.header = torch.IntTensor([0,0,0,0])
        self.seen = 0

    def forward(self, x):
        ind = -2
        self.loss = None
        outputs = dict()
        for block in self.blocks:
            ind = ind + 1
            #if ind > 0:
            #    return x

            if block['type'] == 'net':
                continue
            elif block['type'] == 'convolutional' or block['type'] == 'maxpool' or block['type'] == 'reorg' or block['type'] == 'avgpool' or block['type'] == 'softmax' or block['type'] == 'connected':
                x = self.models[ind](x)
                outputs[ind] = x

            elif block['type'] == 'route':
                layers = block['layers'].split(',')
                layers = [int(i) if int(i) > 0 else int(i)+ind for i in layers]
                if len(layers) == 1:
                    x = outputs[layers[0]]
                    outputs[ind] = x
                elif len(layers) == 2:
                    x1 = outputs[layers[0]]
                    x2 = outputs[layers[1]]
                    x = torch.cat((x1,x2),1)
                    outputs[ind] = x

            elif block['type'] == 'shortcut':
                from_layer = int(block['from'])
                activation = block['activation']
                from_layer = from_layer if from_layer > 0 else from_layer + ind
                x1 = outputs[from_layer]
                x2 = outputs[ind-1]
                x  = x1 + x2
                if activation == 'leaky':
                    x = F.leaky_relu(x, 0.1, inplace=True)
                elif activation == 'relu':
                    x = F.relu(x, inplace=True)
                outputs[ind] = x

            elif block['type'] == 'region':
                continue
                if self.loss:
                    self.loss = self.loss + self.models[ind](x)
                else:
                    self.loss = self.models[ind](x)
                outputs[ind] = None

            elif block['type'] == 'cost':
                continue
            else:
                print('unknown type %s' % (block['type']))

        return x

    def print_network(self):
        print_cfg(self.blocks)

    def create_network(self, blocks):
        models = nn.ModuleList()
    
        prev_filters = 3
        out_filters =[]
        conv_id = 0
        for block in blocks:
            if block['type'] == 'net':
                prev_filters = int(block['channels'])
                continue
            elif block['type'] == 'convolutional':
                conv_id = conv_id + 1
                batch_normalize = int(block['batch_normalize'])
                filters = int(block['filters'])
                kernel_size = int(block['size'])
                stride = int(block['stride'])
                is_pad = int(block['pad'])
                pad    = int((kernel_size-1)/2) if is_pad else 0
                activation = block['activation']
                model = nn.Sequential()
                if batch_normalize:
                    model.add_module('conv{0}'.format(conv_id), nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=False))
                    model.add_module('bn{0}'.format(conv_id), nn.BatchNorm2d(filters))
                    #model.add_module('bn{0}'.format(conv_id), BN2d(filters))
                else:
                    model.add_module('conv{0}'.format(conv_id), nn.Conv2d(prev_filters, filters, kernel_size, stride, pad))
                if activation == 'leaky':
                    model.add_module('leaky{0}'.format(conv_id), nn.LeakyReLU(0.1, inplace=True))
                elif activation == 'relu':
                    model.add_module('relu{0}'.format(conv_id), nn.ReLU(inplace=True))
                prev_filters = filters
                out_filters.append(prev_filters)
                models.append(model)
            elif block['type'] == 'maxpool':
                pool_size = int(block['size'])
                stride = int(block['stride'])
                if stride > 1:
                    model = nn.MaxPool2d(pool_size, stride)
                else:
                    model = MaxPoolStride1()
                out_filters.append(prev_filters)
                models.append(model)
            elif block['type'] == 'avgpool':
                model = GlobalAvgPool2d()
                out_filters.append(prev_filters)
                models.append(model)
            elif block['type'] == 'softmax':
                model = nn.Softmax()
                out_filters.append(prev_filters)
                models.append(model)
            elif block['type'] == 'cost':
                if block['_type'] == 'sse':
                    model = nn.MSELoss(size_average=True)
                elif block['_type'] == 'L1':
                    model = nn.L1Loss(size_average=True)
                elif block['_type'] == 'smooth':
                    model = nn.SmoothL1Loss(size_average=True)
                out_filters.append(1)
                models.append(model)
            elif block['type'] == 'reorg':
                stride = int(block['stride'])
                prev_filters = stride * stride * prev_filters
                out_filters.append(prev_filters)
                models.append(Reorg(stride))
            elif block['type'] == 'route':
                layers = block['layers'].split(',')
                ind = len(models)
                layers = [int(i) if int(i) > 0 else int(i)+ind for i in layers]
                if len(layers) == 1:
                    prev_filters = out_filters[layers[0]]
                elif len(layers) == 2:
                    assert(layers[0] == ind - 1)
                    prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
                out_filters.append(prev_filters)
                models.append(EmptyModule())
            elif block['type'] == 'shortcut':
                ind = len(models)
                prev_filters = out_filters[ind-1]
                out_filters.append(prev_filters)
                models.append(EmptyModule())
            elif block['type'] == 'connected':
                filters = int(block['output'])
                if block['activation'] == 'linear':
                    model = nn.Linear(prev_filters, filters)
                elif block['activation'] == 'leaky':
                    model = nn.Sequential(
                               nn.Linear(prev_filters, filters),
                               nn.LeakyReLU(0.1, inplace=True))
                elif block['activation'] == 'relu':
                    model = nn.Sequential(
                               nn.Linear(prev_filters, filters),
                               nn.ReLU(inplace=True))
                prev_filters = filters
                out_filters.append(prev_filters)
                models.append(model)
            elif block['type'] == 'region':
                loss = RegionLoss()
                anchors = block['anchors'].split(',')
                loss.anchors = [float(i) for i in anchors]
                loss.num_classes = int(block['classes'])
                loss.num_anchors = int(block['num'])
                loss.anchor_step = len(loss.anchors)/loss.num_anchors
                loss.object_scale = float(block['object_scale'])
                loss.noobject_scale = float(block['noobject_scale'])
                loss.class_scale = float(block['class_scale'])
                loss.coord_scale = float(block['coord_scale'])
                out_filters.append(prev_filters)
                models.append(loss)
            else:
                print('unknown type %s' % (block['type']))
    
        return models

    def load_weights(self, weightfile):
        with open(weightfile, mode='rb') as f:
            major = np.fromfile(f, dtype=np.int32, count=1)
            minor = np.fromfile(f, dtype=np.int32, count=1)
            np.fromfile(f, dtype=np.int32, count=1)  # revision
            if major * 10 + minor >= 2 and major < 1000 and minor < 1000:
                np.fromfile(f, dtype=np.int64, count=1)  # seen
            else:
                np.fromfile(f, dtype=np.int32, count=1)  # seen

            ind = -2
            for block in self.blocks:
                if ind >= len(self.models):
                    break
                ind = ind + 1
                if block['type'] == 'net':
                    continue
                elif block['type'] == 'convolutional':
                    model = self.models[ind]
                    batch_normalize = int(block['batch_normalize'])
                    if batch_normalize:
                        start = load_conv_bn(f, model[0], model[1])
                    else:
                        start = load_conv(f, model[0])
                elif block['type'] == 'connected':
                    model = self.models[ind]
                    if block['activation'] != 'linear':
                        start = load_fc(f, model[0])
                    else:
                        start = load_fc(f, model)
                elif block['type'] == 'maxpool':
                    pass
                elif block['type'] == 'reorg':
                    pass
                elif block['type'] == 'route':
                    pass
                elif block['type'] == 'shortcut':
                    pass
                elif block['type'] == 'region':
                    pass
                elif block['type'] == 'avgpool':
                    pass
                elif block['type'] == 'softmax':
                    pass
                elif block['type'] == 'cost':
                    pass
                else:
                    print('unknown type %s' % (block['type']))

    def load_weights_old(self, weightfile):
        fp          = open(weightfile, 'rb')
        header      = np.fromfile(fp, count=4, dtype=np.int32)
        self.header = torch.from_numpy(header)
        self.seen   = self.header[3]
        buf         = np.fromfile(fp, dtype = np.float32)
        fp.close()

        start = 0
        ind = -2
        for block_i, block in enumerate(self.blocks):
            print ('')
            print (' ----------------- block : ', block_i, '(start:',start,')')
            print (' || --------------> block : ', block)
            if start >= buf.size:
                break
            ind = ind + 1
            print (' || ---------------> self.models[ind] : ', self.models[ind])
            if block['type'] == 'net':
                continue
            elif block['type'] == 'convolutional':
                try:
                    model           = self.models[ind]
                    batch_normalize = int(block['batch_normalize'])
                    if batch_normalize:
                        start = load_conv_bn(buf, start, model[0], model[1])
                    else:
                        start = load_conv(buf, start, model[0])
                except:
                    print (' - [Err] Block :', block_i)
                    traceback.print_exc()
                    import sys; sys.exit(1)
            elif block['type'] == 'connected':
                model = self.models[ind]
                if block['activation'] != 'linear':
                    start = load_fc(buf, start, model[0])
                else:
                    start = load_fc(buf, start, model)
            elif block['type'] == 'maxpool':
                pass
            elif block['type'] == 'reorg':
                pass
            elif block['type'] == 'route':
                pass
            elif block['type'] == 'shortcut':
                pass
            elif block['type'] == 'region':
                pass
            elif block['type'] == 'avgpool':
                pass
            elif block['type'] == 'softmax':
                pass
            elif block['type'] == 'cost':
                pass
            else:
                print('unknown type %s' % (block['type']))

    def save_weights(self, outfile, cutoff=0):
        if cutoff <= 0:
            cutoff = len(self.blocks)-1

        fp = open(outfile, 'wb')
        self.header[3] = self.seen
        header = self.header
        header.numpy().tofile(fp)

        ind = -1
        for blockId in range(1, cutoff+1):
            ind = ind + 1
            block = self.blocks[blockId]
            if block['type'] == 'convolutional':
                model = self.models[ind]
                batch_normalize = int(block['batch_normalize'])
                if batch_normalize:
                    save_conv_bn(fp, model[0], model[1])
                else:
                    save_conv(fp, model[0])
            elif block['type'] == 'connected':
                model = self.models[ind]
                if block['activation'] != 'linear':
                    save_fc(fc, model)
                else:
                    save_fc(fc, model[0])
            elif block['type'] == 'maxpool':
                pass
            elif block['type'] == 'reorg':
                pass
            elif block['type'] == 'route':
                pass
            elif block['type'] == 'shortcut':
                pass
            elif block['type'] == 'region':
                pass
            elif block['type'] == 'avgpool':
                pass
            elif block['type'] == 'softmax':
                pass
            elif block['type'] == 'cost':
                pass
            else:
                print('unknown type %s' % (block['type']))
        fp.close()

def getYOLOv2(cfgfile, weightfile):
    model = Darknet(cfgfile)
    model.load_weights(weightfile)
    if USE_GPU:
        model.cuda()
    return model

def testYOLOv2():

    cfgfile    = 'data/cfg/github_pjreddie/yolov2-voc.cfg'
    weightfile = 'data/weights/github_pjreddie/yolov2-voc.weights'
    model      = getYOLOv2(cfgfile, weightfile) 
    print (' - 1. Model is loaded!')

    imgdir      = 'data/dataset/yolo_samples'
    namesfile   = 'data/dataset/voc.names'
    class_names = load_class_names(namesfile)
    for each in ['dog.jpg', 'eagle.jpg',  'giraffe.jpg',  'horses.jpg',  'person.jpg',  'scream.jpg']:
        try:
            print ('')
            imgfile = os.path.join(imgdir, each)        
            img     = Image.open(imgfile).convert('RGB')
            sized   = img.resize((model.width, model.height))

            start  = time.time()
            boxes  = do_detect(model, sized, 0.5, 0.4, USE_GPU)
            finish = time.time()
            print('%s: Predicted in %f seconds.' % (imgfile, (finish-start)))
            plot_boxes(img, boxes, os.path.join(imgdir, '_' + each), class_names)
        except:
            traceback.print_exc()
            pass

class TinyYoloNet(nn.Module):
    def __init__(self):
        super(TinyYoloNet, self).__init__()
        self.seen = 0
        self.num_classes = 20
        self.anchors = [1.08,1.19,  3.42,4.41,  6.63,11.38,  9.42,5.11,  16.62,10.52]
        self.num_anchors = len(self.anchors)/2
        num_output = (5+self.num_classes)*self.num_anchors
        self.width = 160
        self.height = 160

        self.loss = RegionLoss(self.num_classes, self.anchors, self.num_anchors)
        self.cnn = nn.Sequential(OrderedDict([
            # conv1
            ('conv1', nn.Conv2d( 3, 16, 3, 1, 1, bias=False)),
            ('bn1', nn.BatchNorm2d(16)),
            ('leaky1', nn.LeakyReLU(0.1, inplace=True)),
            ('pool1', nn.MaxPool2d(2, 2)),

            # conv2
            ('conv2', nn.Conv2d(16, 32, 3, 1, 1, bias=False)),
            ('bn2', nn.BatchNorm2d(32)),
            ('leaky2', nn.LeakyReLU(0.1, inplace=True)),
            ('pool2', nn.MaxPool2d(2, 2)),

            # conv3
            ('conv3', nn.Conv2d(32, 64, 3, 1, 1, bias=False)),
            ('bn3', nn.BatchNorm2d(64)),
            ('leaky3', nn.LeakyReLU(0.1, inplace=True)),
            ('pool3', nn.MaxPool2d(2, 2)),

            # conv4
            ('conv4', nn.Conv2d(64, 128, 3, 1, 1, bias=False)),
            ('bn4', nn.BatchNorm2d(128)),
            ('leaky4', nn.LeakyReLU(0.1, inplace=True)),
            ('pool4', nn.MaxPool2d(2, 2)),

            # conv5
            ('conv5', nn.Conv2d(128, 256, 3, 1, 1, bias=False)),
            ('bn5', nn.BatchNorm2d(256)),
            ('leaky5', nn.LeakyReLU(0.1, inplace=True)),
            ('pool5', nn.MaxPool2d(2, 2)),

            # conv6
            ('conv6', nn.Conv2d(256, 512, 3, 1, 1, bias=False)),
            ('bn6', nn.BatchNorm2d(512)),
            ('leaky6', nn.LeakyReLU(0.1, inplace=True)),
            ('pool6', MaxPoolStride1()),

            # conv7
            ('conv7', nn.Conv2d(512, 1024, 3, 1, 1, bias=False)),
            ('bn7', nn.BatchNorm2d(1024)),
            ('leaky7', nn.LeakyReLU(0.1, inplace=True)),

            # conv8
            ('conv8', nn.Conv2d(1024, 1024, 3, 1, 1, bias=False)),
            ('bn8', nn.BatchNorm2d(1024)),
            ('leaky8', nn.LeakyReLU(0.1, inplace=True)),

            # output
            ('output', nn.Conv2d(1024, num_output, 1, 1, 0)),
        ]))

    def forward(self, x):
        x = self.cnn(x)
        return x

    def print_network(self):
        print(self)

    def load_weights(self, path):
        #buf = np.fromfile('tiny-yolo-voc.weights', dtype = np.float32)
        buf = np.fromfile(path, dtype = np.float32)
        start = 4
        
        start = load_conv_bn(buf, start, self.cnn[0], self.cnn[1])
        start = load_conv_bn(buf, start, self.cnn[4], self.cnn[5])
        start = load_conv_bn(buf, start, self.cnn[8], self.cnn[9])
        start = load_conv_bn(buf, start, self.cnn[12], self.cnn[13])
        start = load_conv_bn(buf, start, self.cnn[16], self.cnn[17])
        start = load_conv_bn(buf, start, self.cnn[20], self.cnn[21])
        
        start = load_conv_bn(buf, start, self.cnn[24], self.cnn[25])
        start = load_conv_bn(buf, start, self.cnn[27], self.cnn[28])
        start = load_conv(buf, start, self.cnn[30])

## --------------------------------------- YOLOV1 --------------------------------------- ##


def getYOLOv1Self(name=''):

    cfg = {
        'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
        'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
        'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
        'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
    }

    if name != '':
        myYOLO            = YOLOv1Self(name, cfg['D'], batch_norm=True)
        VGG               = models.vgg16_bn(pretrained=True)
        state_dict_VGG    = VGG.state_dict()
        state_dict_myYOLO = myYOLO.state_dict()
        
        for k in state_dict_VGG.keys():
            if k in state_dict_myYOLO.keys() and k.startswith('features'):
                state_dict_myYOLO[k] = state_dict_VGG[k]
        myYOLO.load_state_dict(state_dict_myYOLO)
        return myYOLO

    else:
        print (' - Pass a name for your model')
        sys.exit(1)

class YOLOv1Self(nn.Module):

    def __init__(self, name, cfg, batch_norm, image_size=448):
        super(YOLOv1Self, self).__init__()
        self.name       = name
        self.features   = self.getFeatureLayers(cfg, batch_norm)
        self.linear1    = MaskedLinear(512 * 7 * 7, 4096)
        self.linear2    = MaskedLinear(4096, 1470)
        self.classifier = nn.Sequential( # add the regression part to the features
            # nn.Linear(512 * 7 * 7, 4096),
            self.linear1,
            nn.ReLU(True),
            nn.Dropout(),
            # nn.Linear(4096, 1470),
            self.linear2,
        )
        self._initialize_weights()
        self.image_size = image_size

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        x = torch.sigmoid(x)
        x = x.view(-1,7,7,30)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()


    def getFeatureLayers(self, cfg, batch_norm=False):
        if (1):
            params_in_channels  = 3
            params_conv_stride  = 1
            params_conv_size    = 3 
            params_first_flag   = True
            params_pool_stride  = 2
            params_pool_kernel  = 2

        layers = []
        for item in cfg:
            params_conv_stride = 1
            if (item == 64 and params_first_flag):
                params_conv_stride = 2
                params_first_flag  = False

            if item == 'M': # max-pooling
                layers += [nn.MaxPool2d(kernel_size=params_pool_kernel, stride=params_pool_stride)]
            else:
                params_kernels = item
                conv2d = nn.Conv2d(params_in_channels, params_kernels, kernel_size=params_conv_size, stride=params_conv_stride, padding=1)
                if batch_norm:
                    layers += [conv2d, nn.BatchNorm2d(item), nn.ReLU(inplace=True)]
                else:
                    layers += [conv2d, nn.ReLU(inplace=True)]
                params_in_channels = item
        return nn.Sequential(*layers)

    def set_masks(self, masks):
        self.linear1.set_mask(masks[0])
        self.linear2.set_mask(masks[1])

def testYOLOv1():
    if (0):
        net = getYOLOv1Self()
        img = torch.rand(1,3,448,448)
        img = Variable(img)
        output = net(img)
        print(output.size())
    else:
        pass

# if __name__ == '__main__':
#     # testYOLOv1()
#     # testYOLOv2()
#     # blocks = parse_cfg('/home/strider/Work/Netherlands/TUDelft/1_Courses/Sem2/DeepLearning/Project/repo1/data/cfg/github_pjreddie/yolov2-voc.cfg',1)
#     blocks = parse_cfg('/home/strider/Work/Netherlands/TUDelft/1_Courses/Sem2/DeepLearning/Project/repo1/data/cfg/github_pjreddie/yolov1.cfg',1)
#     model  = create_network(self.blocks) # merge conv, bn,leaky
    



"""
URLs
    - YOLOv1 
        - https://pjreddie.com/darknet/yolov1/
            - wget http://pjreddie.com/media/files/yolov1/yolov1.weights (~800MB) [trained on 2007 train/val+ 2012 train/val]
            - https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov1.cfg (output = 1715 = 7x7x(3x5 + 20) ) 
                - Locally Connected FC (https://discuss.pytorch.org/t/how-does-pytorch-implement-local-convolutional-layer-local-connected-layer/4316)
                    - https://github.com/pjreddie/darknet/issues/876
                    - https://github.com/pytorch/pytorch/issues/499
                    - https://github.com/pytorch/pytorch/compare/master...1zb:conv-local
                    - Theory : https://www.cs.toronto.edu/~jlucas/teaching/csc411/lectures/lec11_handout.pdf
                - Locally Connected FC (https://www.tensorflow.org/api_docs/python/tf/keras/layers/LocallyConnected2D)
    - YOLOv2    
        - https://pjreddie.com/darknet/yolov2/
            - https://github.com/pjreddie/darknet/blob/master/cfg/yolov2-voc.cfg [416 x 416]
            - wget https://pjreddie.com/media/files/yolov2-voc.weights (~MB) [trained on 2007 train/val+ 2012 train/val]
        - On other gthubs
            - wget http://pjreddie.com/media/files/yolo.weights
            - wget https://pjreddie.com/media/files/yolo-voc.weights
    - Dataset
        - <> 

Results
    - YOLOv1 (inside YOLO paper)
        - (2007 + 2012) = 63.4 mAP
        - (2007 + 2012) = 66.4 mAP (VGG-16)
    - repo1 (https://github.com/yxlijun/tensorflow-yolov1)
        - (2007 + 2012) = 65.3 mAP (VGG-16)
        - (2007 + 2012) = 66.12 mAP (VGG-19)
        - (2007 + 2012) = 65.23 mAP (Resnet)

Pre-trained Weights
    - YOLOv1
        - repo1 : https://github.com/dshahrokhian/YOLO_tensorflow
            - YOLO_small.ckpt
        - repo1 : https://docs.openvinotoolkit.org/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_YOLO_From_Tensorflow.html
            - convert from pjreddie --> tensorflow --> convert to pytorch

Converter
    - https://github.com/microsoft/MMdnn
    - https://github.com/marvis/pytorch-caffe-darknet-convert
    - https://github.com/AceCoooool/YOLO-pytorch/blob/master/tools/yad2t.py
    - https://github.com/thtrieu/darkflow.git
        - git clone https://github.com/thtrieu/darkflow.git
        - cd darkflow
        - pip install -e .
        - wget http://pjreddie.com/media/files/yolov1/yolov1.weights
        - flow --model cfg/v1.1/yolov1.cfg --load ../../repo1/data/weights/github_pjreddie/yolov1.weights --savepb
        - Colab
            - # ! git clone https://github.com/thtrieu/darkflow.git
                # ! cd darkflow && pip install -e .
                # ! wget http://pjreddie.com/media/files/yolov1/yolov1.weights
                # ! flow -h
                # ! ls -l
                # ! flow --model darkflow/cfg/v1.1/yolov1.cfg --load yolov1.weights --savepb

Random
    - https://github.com/happyjin/pytorch-YOLO/blob/master/network.py
    - https://github.com/kevin970401/pytorch-YOLO-v1/blob/master/models/yolo.py
"""

"""
We train the network for about 
 - 135 epochs on the train-ing and validation data sets from PASCAL VOC 2007 and 2012. 
 - When testing on 2012 we also include the VOC 2007 test data for training. 
 - Throughout training we use a 
    - batch size of 64, a momentum of 0.9 and a decay of 0.0005. 
 - Our learning rate schedule is as follows: 
    - For the first epochs we slowly raise the learning rate from 10−3 to 10−2. 
    - If we start at a high learning rate our model often diverges due to unstable gradients. 
    - We continue training with 
        - 10−2 for 75 epochs
        - then 10−3 for 30 epochs
        - and finally 10−4 for 30 epochs
"""

'\nWe train the network for about \n - 135 epochs on the train-ing and validation data sets from PASCAL VOC 2007 and 2012. \n - When testing on 2012 we also include the VOC 2007 test data for training. \n - Throughout training we use a \n    - batch size of 64, a momentum of 0.9 and a decay of 0.0005. \n - Our learning rate schedule is as follows: \n    - For the first epochs we slowly raise the learning rate from 10−3 to 10−2. \n    - If we start at a high learning rate our model often diverges due to unstable gradients. \n    - We continue training with \n        - 10−2 for 75 epochs\n        - then 10−3 for 30 epochs\n        - and finally 10−4 for 30 epochs\n'

In [0]:
# train
import tqdm
import time
import random
import math
import os

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torchvision import datasets, transforms
from torch.autograd import Variable

# # import dataloader
# from nets2_utils import *
# from nets import parse_cfg
# from nets import Darknet
# from nets import RegionLoss
# from nets import TinyYoloNet

## --------------------------------------- YOLOV2 --------------------------------------- ##
class YOLOv2Train():

    def __init__(self):
        self.model       = ''
        self.optimizer   = ''

    def train(self, datacfg, cfgfile, weightfile):
        data_options  = read_data_cfg(datacfg)
        net_options   = parse_cfg(cfgfile)[0]

        trainlist     = data_options['train']
        testlist      = data_options['valid']
        backupdir     = data_options['backup']
        nsamples      = file_lines(trainlist)
        gpus          = "1"
        ngpus         = 1
        num_workers   = 4

        batch_size    = int(net_options['batch'])
        max_batches   = int(net_options['max_batches'])
        learning_rate = float(net_options['learning_rate'])
        momentum      = float(net_options['momentum'])
        decay         = float(net_options['decay'])
        steps         = [float(step) for step in net_options['steps'].split(',')]
        scales        = [float(scale) for scale in net_options['scales'].split(',')]

        #Train parameters
        max_epochs    =int(max_batches*batch_size/nsamples+1)
        use_cuda      = True
        seed          = int(time.time())
        eps           = 1e-5
        save_interval = 10  # epoches
        dot_interval  = 70  # batches

        # Test parameters
        conf_thresh   = 0.25
        nms_thresh    = 0.4
        iou_thresh    = 0.5

        if not os.path.exists(backupdir):
            os.mkdir(backupdir)

        ###############
        torch.manual_seed(seed)
        if use_cuda:
            os.environ['CUDA_VISIBLE_DEVICES'] = gpus
            torch.cuda.manual_seed(seed)

        model       = Darknet(cfgfile)
        region_loss = model.loss

        model.load_weights(weightfile)
        model.print_network()

        region_loss.seen  = model.seen
        processed_batches = model.seen/batch_size

        init_width        = model.width
        init_height       = model.height
        init_epoch        = model.seen/nsamples 

        kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {}
        test_loader = torch.utils.data.DataLoader(
#             dataloader.VOCDatasetv2(testlist, shape=(init_width, init_height),
              VOCDatasetv2(testlist, shape=(init_width, init_height),
                        shuffle=False,
                        transform=transforms.Compose([
                            transforms.ToTensor(),
                        ]), train=False),
            batch_size=batch_size, shuffle=False, **kwargs)

        if use_cuda:
            if ngpus > 1:
                model = torch.nn.DataParallel(model).cuda()
            else:
                model = model.cuda()

        params_dict = dict(model.named_parameters())
        params = []
        for key, value in params_dict.items():
            if key.find('.bn') >= 0 or key.find('.bias') >= 0:
                params += [{'params': [value], 'weight_decay': 0.0}]
            else:
                params += [{'params': [value], 'weight_decay': decay*batch_size}]
        optimizer = optim.SGD(model.parameters(), 
                                lr=learning_rate/batch_size, momentum=momentum,
                                dampening=0, weight_decay=decay*batch_size)
        
        for epoch in range(int(init_epoch), max_epochs): 
            ## ----------------------- TRAIN ------------------------
#             global processed_batches
            t0 = time.time()
            if ngpus > 1:
                cur_model = model.module
            else:
                cur_model = model
            
            train_loader = torch.utils.data.DataLoader(
#                 dataloader.VOCDatasetv2(trainlist, shape=(init_width, init_height),
                VOCDatasetv2(trainlist, shape=(init_width, init_height),
                            shuffle=True,
                            transform=transforms.Compose([
                                transforms.ToTensor(),
                            ]),
                            train=True,
                            seen=cur_model.seen,
                            batch_size=batch_size),
                batch_size=batch_size, shuffle=False, **kwargs)               

            lr = self.adjust_learning_rate(optimizer, processed_batches, learning_rate, steps, scales, batch_size)
            logging('epoch %d, processed %d samples, lr %f' % (epoch, epoch * len(train_loader.dataset), lr))
            model.train()
            t1 = time.time()
            avg_time = torch.zeros(9)
            for batch_idx, (data, target) in enumerate(train_loader):
                t2 = time.time()
                self.adjust_learning_rate(optimizer, processed_batches, learning_rate, steps, scales, batch_size)
                processed_batches = processed_batches + 1
                #if (batch_idx+1) % dot_interval == 0:
                #    sys.stdout.write('.')

                if use_cuda:
                    data = data.cuda()
                    #target= target.cuda()
                t3 = time.time()
                data, target = Variable(data), Variable(target)
                t4 = time.time()
                optimizer.zero_grad()
                t5 = time.time()
                output = model(data)
                t6 = time.time()
                region_loss.seen = region_loss.seen + data.data.size(0)
                loss = region_loss(output, target)
                t7 = time.time()
                loss.backward()
                t8 = time.time()
                optimizer.step()
                t9 = time.time()
                if False and batch_idx > 1:
                    avg_time[0] = avg_time[0] + (t2-t1)
                    avg_time[1] = avg_time[1] + (t3-t2)
                    avg_time[2] = avg_time[2] + (t4-t3)
                    avg_time[3] = avg_time[3] + (t5-t4)
                    avg_time[4] = avg_time[4] + (t6-t5)
                    avg_time[5] = avg_time[5] + (t7-t6)
                    avg_time[6] = avg_time[6] + (t8-t7)
                    avg_time[7] = avg_time[7] + (t9-t8)
                    avg_time[8] = avg_time[8] + (t9-t1)
                    print('-------------------------------')
                    print('       load data : %f' % (avg_time[0]/(batch_idx)))
                    print('     cpu to cuda : %f' % (avg_time[1]/(batch_idx)))
                    print('cuda to variable : %f' % (avg_time[2]/(batch_idx)))
                    print('       zero_grad : %f' % (avg_time[3]/(batch_idx)))
                    print(' forward feature : %f' % (avg_time[4]/(batch_idx)))
                    print('    forward loss : %f' % (avg_time[5]/(batch_idx)))
                    print('        backward : %f' % (avg_time[6]/(batch_idx)))
                    print('            step : %f' % (avg_time[7]/(batch_idx)))
                    print('           total : %f' % (avg_time[8]/(batch_idx)))
                t1 = time.time()
            print('')
            t1 = time.time()
            logging('training with %f samples/s' % (len(train_loader.dataset)/(t1-t0)))
            if (epoch+1) % save_interval == 0:
                logging('save weights to %s/%06d.weights' % (backupdir, epoch+1))
                cur_model.seen = (epoch + 1) * len(train_loader.dataset)
                cur_model.save_weights('%s/%06d.weights' % (backupdir, epoch+1))

            ## ----------------------- TEST ------------------------
            self.test(epoch)
        # end for epoch

    def adjust_learning_rate(self, optimizer, batch, learning_rate, steps, scales, batch_size):
        """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
        lr = learning_rate
        for i in range(len(steps)):
            scale = scales[i] if i < len(scales) else 1
            if batch >= steps[i]:
                lr = lr * scale
                if batch == steps[i]:
                    break
            else:
                break
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr/batch_size
        return lr

    def test(self, epoch):
        def truths_length(truths):
            for i in range(50):
                if truths[i][1] == 0:
                    return i

        model.eval()
        if ngpus > 1:
            cur_model = model.module
        else:
            cur_model = model
        num_classes = cur_model.num_classes
        anchors     = cur_model.anchors
        num_anchors = cur_model.num_anchors
        total       = 0.0
        proposals   = 0.0
        correct     = 0.0

        for batch_idx, (data, target) in enumerate(test_loader):
            if use_cuda:
                data = data.cuda()
            data = Variable(data, volatile=True)
            output = model(data).data
            all_boxes = get_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors)
            for i in range(output.size(0)):
                boxes = all_boxes[i]
                boxes = nms(boxes, nms_thresh)
                truths = target[i].view(-1, 5)
                num_gts = truths_length(truths)
        
                total = total + num_gts
        
                for i in range(len(boxes)):
                    if boxes[i][4] > conf_thresh:
                        proposals = proposals+1

                for i in range(num_gts):
                    box_gt = [truths[i][1], truths[i][2], truths[i][3], truths[i][4], 1.0, 1.0, truths[i][0]]
                    best_iou = 0
                    best_j = -1
                    for j in range(len(boxes)):
                        iou = bbox_iou(box_gt, boxes[j], x1y1x2y2=False)
                        if iou > best_iou:
                            best_j = j
                            best_iou = iou
                    if best_iou > iou_thresh and boxes[best_j][6] == box_gt[6]:
                        correct = correct+1
        
        precision = 1.0*correct/(proposals+eps)
        recall = 1.0*correct/(total+eps)
        fscore = 2.0*precision*recall/(precision+recall+eps)
        logging("precision: %f, recall: %f, fscore: %f" % (precision, recall, fscore))




## --------------------------------------- YOLOV1 --------------------------------------- ##

class YOLOv1Train():
	
    def __init__(self):
        self.model       = ''
        self.optimizer   = ''

    def train(self, model, criterion, optimizer
				, DataLoaderTrain, DataLoaderTest
				, LEARNING_RATE, EPOCHS, BATCH_SIZE
				, USE_GPU, LOGGER
                , CHKP_LOAD, CHKP_DIR, CHKP_NAME, CHKP_EPOCHS
                , DEBUG):

        if USE_GPU:
            model.cuda ()
		
		# different learning rate
        params      = []
        params_dict = dict(model.named_parameters())
        for key,value in params_dict.items():
            if key.startswith('features'):
                params += [{'params':[value],'lr':LEARNING_RATE*1}]
            else:
                params += [{'params':[value],'lr':LEARNING_RATE}]
        
        if (optimizer == 'SGD'):
            optimizer = torch.optim.SGD(params, lr=LEARNING_RATE, momentum=0.9, weight_decay=5e-4)
        
        print ('')
        epoch_start = 0
        if (CHKP_LOAD):
            path_model = os.path.join(CHKP_DIR, CHKP_NAME)
            if os.path.exists(path_model):
                print ('  -- [TRAIN] Loading Chkpoint : ', path_model)
                checkpoint  = torch.load(path_model)
                epoch_start = checkpoint['epoch']
                print ('  -- [TRAIN] Start Epoch : ', epoch_start)
                print ('  -- [TRAIN][Loss] Train : ', checkpoint['loss_train'])
                print ('  -- [TRAIN][Loss] Val   : ', checkpoint['loss_val'])
                model.load_state_dict(checkpoint['model_state_dict'])
                optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
                print ('')

        model.train()
        for epoch in range(epoch_start,EPOCHS):
            print ('')
            print (' --------------------------------------------------------- ')
            if epoch >= 30:
                LEARNING_RATE = 0.0001
            if epoch >= 40:
                LEARNING_RATE = 0.00001
            for param_group in optimizer.param_groups:
                param_group['lr'] = LEARNING_RATE
            
            print ('  -- [TRAIN] Epoch % d / % d '  % (epoch +  1 , EPOCHS))
            print ('  -- [TRAIN] LR : {}'.format(LEARNING_RATE))

            
            ## ----------------------- TRAIN ------------------------
            train_loss_total       = 0.0
            train_loss_loc_total   = 0.0
            train_loss_class_total = 0.0
            with tqdm.tqdm_notebook(total = len(DataLoaderTrain)*BATCH_SIZE) as pbar:
                for i,(images,target) in enumerate(DataLoaderTrain):
                    pbar.update(BATCH_SIZE)
                    images = Variable(images)
                    target = Variable(target)
                    if USE_GPU:
                        images,target = images.cuda(),target.cuda()
                    
                    pred                           = model(images)
                    loss_tot, loss_loc, loss_class = criterion(pred,target)
                    train_loss_total               += loss_tot.data
                    train_loss_loc_total           += loss_loc.data
                    train_loss_class_total	       += loss_class.data 	

                    optimizer.zero_grad()
                    loss_tot.backward()
                    optimizer.step()

                    if (DEBUG):
                        break
                
                train_loss_total       /= len(DataLoaderTrain)
                train_loss_loc_total   /= len(DataLoaderTrain)
                train_loss_class_total /= len(DataLoaderTrain)
                if LOGGER != '':
                    LOGGER.save_value('Total Loss', 'Train Loss', epoch, train_loss_total)
                    LOGGER.save_value('Location Loss', 'Train Loc Loss', epoch, train_loss_loc_total)
                    LOGGER.save_value('Class Loss', 'Train Class Loss', epoch, train_loss_class_total)
                print ('  -- [TRAIN] Train Loss : ', train_loss_total)

            
            ## ----------------------- VALIDATION ------------------------
            val_loss_total       = 0.0
            val_loss_loc_total   = 0.0
            val_loss_class_total = 0.0
            model.eval() # to set dropout and batch normalization layers to evaluation mode 
            with torch.no_grad():
                with tqdm.tqdm_notebook(total = len(DataLoaderTest)*BATCH_SIZE) as pbar:
                    for i,(images,target) in enumerate(DataLoaderTest):
                        pbar.update(BATCH_SIZE)
                        images = Variable(images)
                        target = Variable(target)
                        if USE_GPU:
                            images,target = images.cuda(),target.cuda()

                        pred                           = model(images)
                        loss_tot, loss_loc, loss_class = criterion(pred,target)
                        val_loss_total               += loss_tot.data
                        val_loss_loc_total           += loss_loc.data
                        val_loss_class_total	     += loss_class.data 	

                        if (DEBUG):
                            break

                    val_loss_total       /= len(DataLoaderTrain)
                    val_loss_loc_total   /= len(DataLoaderTrain)
                    val_loss_class_total /= len(DataLoaderTrain)
                    if LOGGER != '':
                        LOGGER.save_value('Total Loss', 'Val Loss', epoch, val_loss_total)
                        LOGGER.save_value('Location Loss', 'Val Loc Loss', epoch, val_loss_loc_total)
                        LOGGER.save_value('Class Loss', 'Val Class Loss', epoch, val_loss_class_total)
                    print ('  -- [TRAIN] Validation Loss : ', val_loss_total)

            if USE_GPU:
                print ('  -- [TRAIN] GPU Memory : ', torch.cuda.max_memory_allocated(device=None)/1024/1024/1024, ' GB')
                torch.cuda.reset_max_memory_allocated(device=None)

            ## ----------------------- SAVING ------------------------ 
            if (epoch+1) % CHKP_EPOCHS == 0:
                if not os.path.exists(CHKP_DIR):
                    os.mkdir(CHKP_DIR)

                CHKP_NAME_ = str(CHKP_NAME)
                CHKP_NAME_ = CHKP_NAME_.split('_')[0] + '_epoch%.3d.pkl' % (epoch+1)
                torch.save({
                    'epoch'                : epoch + 1,
                    'model_state_dict'     : model.state_dict(),
                    'optimizer_state_dict' : optimizer.state_dict(),
                    'loss_train'           : train_loss_total,
                    'loss_val'             : val_loss_total
                        }, os.path.join(CHKP_DIR, CHKP_NAME_)
                    )
                        

        if LOGGER != '' : LOGGER.close()
        self.model = model
        self.optimizer = optimizer

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

class YOLOv1Loss(nn.Module):

    def __init__(self,S,B,l_coord,l_noobj):
        super(YOLOv1Loss,self).__init__()
        self.S = S
        self.B = B
        self.l_coord = l_coord # for BBox coord loss
        self.l_noobj = l_noobj # for BBox confidence vals
        
        if (0):
            print ('  - [yoloLoss] S : ', self.S)
            print ('  - [yoloLoss] B : ', self.B)
            print ('  - [yoloLoss] l_coord : ', self.l_coord)
            print ('  - [yoloLoss] l_noobj : ', self.l_noobj)

    def compute_iou(self, box1, box2):
        '''Compute the intersection over union of two set of boxes, each box is [x1,y1,x2,y2].
        Args:
          box1: (tensor) bounding boxes, sized [N,4].
          box2: (tensor) bounding boxes, sized [M,4].
        Return:
          (tensor) iou, sized [N,M].
        '''
        N = box1.size(0)
        M = box2.size(0)

        lt = torch.max(
            box1[:,:2].unsqueeze(1).expand(N,M,2),  # [N,2] -> [N,1,2] -> [N,M,2]
            box2[:,:2].unsqueeze(0).expand(N,M,2),  # [M,2] -> [1,M,2] -> [N,M,2]
        )

        rb = torch.min(
            box1[:,2:].unsqueeze(1).expand(N,M,2),  # [N,2] -> [N,1,2] -> [N,M,2]
            box2[:,2:].unsqueeze(0).expand(N,M,2),  # [M,2] -> [1,M,2] -> [N,M,2]
        )

        wh = rb - lt  # [N,M,2]
        wh[wh<0] = 0  # clip at 0
        inter = wh[:,:,0] * wh[:,:,1]  # [N,M]

        area1 = (box1[:,2]-box1[:,0]) * (box1[:,3]-box1[:,1])  # [N,]
        area2 = (box2[:,2]-box2[:,0]) * (box2[:,3]-box2[:,1])  # [M,]
        area1 = area1.unsqueeze(1).expand_as(inter)  # [N,] -> [N,1] -> [N,M]
        area2 = area2.unsqueeze(0).expand_as(inter)  # [M,] -> [1,M] -> [N,M]

        iou = inter / (area1 + area2 - inter)
        return iou
    
    def forward(self,pred_tensor,target_tensor):
        verbose = 0
        '''
        pred_tensor   : (tensor) size(batchsize, S, S, Bx5+20=30) [x,y,w,h,c]
        target_tensor : (tensor) size(batchsize, S, S, 30)
        '''
        if (1): # get masks for BBoxes in the image
            N        = pred_tensor.size()[0] #batch_size
            coo_mask = target_tensor[:,:,:,4] > 0
            noo_mask = target_tensor[:,:,:,4] == 0
            if (verbose):
                print (' - N : ', N)
                print (' - coo_mask : ', coo_mask.shape)
                print (' - noo_mask : ', noo_mask.shape)
                print (' - coo_mask : ', coo_mask)
                print (' - noo_mask : ', noo_mask)

            coo_mask = coo_mask.unsqueeze(-1).expand_as(target_tensor)
            noo_mask = noo_mask.unsqueeze(-1).expand_as(target_tensor)
        
        if (1):
            coo_target   = target_tensor[coo_mask].view(-1,30)
            box_target   = coo_target[:,:10].contiguous().view(-1,5)
            class_target = coo_target[:,10:]
            
            coo_pred   = pred_tensor[coo_mask].view(-1,30)
            box_pred   = coo_pred[:,:10].contiguous().view(-1,5) # box[x1,y1,w1,h1,c1]
            class_pred = coo_pred[:,10:]                         #    [x2,y2,w2,h2,c2]
            if (verbose):
                print (' - box_target   : ', box_target.shape)
                print (' - box_target   : ', box_target)
                print (' - class_target : ', class_target)
                print (' - box_pred : ', box_pred.shape)
                print (' - box_pred : ', box_pred)
                print (' - class_pred : ', class_pred)
           
        if (1):
            # compute not contain obj loss
            noo_pred           = pred_tensor[noo_mask].view(-1,30)
            noo_target         = target_tensor[noo_mask].view(-1,30)
            noo_pred_mask      = torch.cuda.ByteTensor(noo_pred.size())
            noo_pred_mask.zero_()
            noo_pred_mask[:,4] = 1;
            noo_pred_mask[:,9] = 1
            noo_pred_c         = noo_pred[noo_pred_mask] #noo pred只需要计算 c 的损失 size[-1,2]
            noo_target_c       = noo_target[noo_pred_mask]
            nooobj_loss        = F.mse_loss(noo_pred_c,noo_target_c,size_average=False)

        if (1):
            #compute contain obj loss
            
            coo_response_mask     = torch.cuda.ByteTensor(box_target.size())
            if (verbose):
                print (' - box_target : ', box_target)
                print (' -- coo_response_mask : ', coo_response_mask.shape)
                print (' -- coo_response_mask : ', coo_response_mask)
            coo_response_mask.zero_()
            if verbose:
                print (' -- coo_response_mask : ', coo_response_mask)
            coo_not_response_mask = torch.cuda.ByteTensor(box_target.size())
            coo_not_response_mask.zero_()
            box_target_iou        = torch.zeros(box_target.size()).cuda()
            # print (' - box_target_iou : ', box_target_iou)
            
            
        for i in range(0,box_target.size()[0],2): #choose the best iou box
            box1             = box_pred[i:i+2]
            box1_xyxy        = Variable(torch.FloatTensor(box1.size()))
            box1_xyxy[:,:2]  = box1[:,:2]/14. -0.5*box1[:,2:4]
            box1_xyxy[:,2:4] = box1[:,:2]/14. +0.5*box1[:,2:4]
            
            box2             = box_target[i].view(-1,5)
            box2_xyxy        = Variable(torch.FloatTensor(box2.size()))
            box2_xyxy[:,:2]  = box2[:,:2]/14. -0.5*box2[:,2:4]
            box2_xyxy[:,2:4] = box2[:,:2]/14. +0.5*box2[:,2:4]
            
            if (verbose):
                print (' - box1_xyxy[:,:4] : ', box1_xyxy[:,:4])
                print (' - box2_xyxy[:,:4] : ', box2_xyxy[:,:4])
            
            iou               = self.compute_iou(box1_xyxy[:,:4],box2_xyxy[:,:4]) #[2,1]
            max_iou,max_index = iou.max(0)
            max_index         = max_index.data.cuda()
            
            coo_response_mask[i+max_index]       = 1
            coo_not_response_mask[i+1-max_index] = 1

            #####
            # we want the confidence score to equal the
            # intersection over union (IOU) between the predicted box
            # and the ground truth
            #####
            box_target_iou[i+max_index, torch.LongTensor([4]).cuda()] = (max_iou).data.cuda()
            
            
        
        box_target_iou = Variable(box_target_iou).cuda()
        # print (' - box_target_iou : ', box_target_iou)
        # import sys; sys.exit(1)
        
        # 1.response loss (xpred,ypred vs xtrue,ytrue) + (wpred,hpred vs wpred,hpred)
        box_pred_response       = box_pred[coo_response_mask].view(-1,5)
        box_target_response_iou = box_target_iou[coo_response_mask].view(-1,5)
        box_target_response     = box_target[coo_response_mask].view(-1,5)
        loc_loss                = F.mse_loss(box_pred_response[:,:2],box_target_response[:,:2],size_average=False) + F.mse_loss(torch.sqrt(box_pred_response[:,2:4]),torch.sqrt(box_target_response[:,2:4]),size_average=False)
        contain_loss            = F.mse_loss(box_pred_response[:,4],box_target_response_iou[:,4],size_average=False)

        # 2.not response loss
        box_pred_not_response        = box_pred[coo_not_response_mask].view(-1,5)
        box_target_not_response      = box_target[coo_not_response_mask].view(-1,5)
        box_target_not_response[:,4] = 0
        not_contain_loss             = F.mse_loss(box_pred_not_response[:,4], box_target_not_response[:,4],size_average=False)

        #3.class loss
        class_loss = F.mse_loss(class_pred,class_target,size_average=False)
        

        total_loss = (self.l_coord*loc_loss + 2*contain_loss + not_contain_loss + self.l_noobj*nooobj_loss + class_loss)/N
        return total_loss, self.l_coord*loc_loss, class_loss

In [28]:
# get the yolov2 training work

datacfg = "pytorch-yolo2/cfg/voc.data"
cfgfile = "CS4180-DL/data/cfg/github_pjreddie/yolov2-voc.cfg"
weightfile = "yolov2-voc.weights"

trainObj = YOLOv2Train()
trainObj.train(datacfg, cfgfile, weightfile)

layer     filters    size              input                output
    0 conv     32  3 x 3 / 1   416 x 416 x   3   ->   416 x 416 x  32
    1 max          2 x 2 / 2   416 x 416 x  32   ->   208 x 208 x  32
    2 conv     64  3 x 3 / 1   208 x 208 x  32   ->   208 x 208 x  64
    3 max          2 x 2 / 2   208 x 208 x  64   ->   104 x 104 x  64
    4 conv    128  3 x 3 / 1   104 x 104 x  64   ->   104 x 104 x 128
    5 conv     64  1 x 1 / 1   104 x 104 x 128   ->   104 x 104 x  64
    6 conv    128  3 x 3 / 1   104 x 104 x  64   ->   104 x 104 x 128
    7 max          2 x 2 / 2   104 x 104 x 128   ->    52 x  52 x 128
    8 conv    256  3 x 3 / 1    52 x  52 x 128   ->    52 x  52 x 256
    9 conv    128  1 x 1 / 1    52 x  52 x 256   ->    52 x  52 x 128
   10 conv    256  3 x 3 / 1    52 x  52 x 128   ->    52 x  52 x 256
   11 max          2 x 2 / 2    52 x  52 x 256   ->    26 x  26 x 256
   12 conv    512  3 x 3 / 1    26 x  26 x 256   ->    26 x  26 x 512
   13 conv    256  1 x 



RuntimeError: ignored