In [14]:
%load_ext autoreload
%autoreload 2

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.patches as patches
from torch.utils.data import DataLoader, Dataset
import cv2
import os
import glob
import random
import itertools
import re

from parse_config import parse_model_config
from models import Darknet

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
LABELLED_DATA_PATH = 'data/train_images/'

def img_transform(img_path, boxes):
    img = cv2.imread(img_path)
    h, w = img.shape[:2]
    square_dim = max(w,h)
    pad_len = abs(w-h)//2
    pad_dim = (0,0, 0,0, pad_len, pad_len) if w >= h else (0,0, pad_len,pad_len, 0,0)
    img = F.pad(torch.from_numpy(img), pad_dim, )
    shift_x, shift_y = (0, pad_len) if w>=h else (pad_len, 0)
    boxes = torch.IntTensor(boxes)
    boxes[..., :2] += torch.IntTensor([shift_x, shift_y])
    boxes[:, :2] += (boxes[:, 2:]//2)
    boxes = boxes/float(img.shape[0])
    img = F.interpolate(img.permute(2,0,1).unsqueeze(0), size=416, mode='nearest').float().squeeze(0)
    return img, boxes

def draw_boxes(img, box_norm):
    h, w = img.shape[:2]
    square_dim = max(w,h)
    pad_len = abs(w-h)//2
    boxes = (box_norm * w).astype(int)
    shift_x, shift_y = (0, pad_len) if w>=h else (pad_len, 0)
    print(pad_len)
    boxes[:, :2] -= [shift_x, shift_y]
    boxes[:, :2] -= (boxes[:, 2:]//2)
    for x1, y1, w, h in boxes:
        cv2.rectangle(img, (x1, y1), (x1+w, y1+h), (255, 0, 0), 2)

    plt.imshow(img)

class FaceTrainSet(Dataset):
    def __init__(self, data_path, labels_path, transforms=None):
        super(FaceTrainSet, self).__init__()
        self.data_path = data_path
        self.transforms = transforms
        self.labels_path = labels_path

        self.labels = np.loadtxt(self.labels_path, dtype=str, delimiter='\n')
        self.file_names = list(filter(lambda x:x.endswith('.jpg'), self.labels))
        self.file_name_index = [i for i in range(len(self.labels)) if self.labels[i].endswith('.jpg')]
        
        self.batch_count = 0
    
    def __len__(self):
        return len(self.file_names)

    def __getitem__(self, index):
        idx = self.file_name_index[index]
        try:
            label_info = self.labels[idx: self.file_name_index[self.file_name_index.index(idx)+1]]
        except IndexError as e:
            label_info = self.labels[idx:]
        file_name = label_info[0]
        num_faces = label_info[1]
        box_dims = [np.array(values.split()).astype(np.int)[:4] for values in label_info[2:]]
        img_tensor, boxes = img_transform(LABELLED_DATA_PATH+file_name, box_dims)
        return (img_tensor, boxes, num_faces)
    
    def collate_fn(self, batch):
        img_tensors, targets, num_faces = list(zip(*batch))
        # Remove empty placeholder targets
        targets = [boxes for boxes in targets if boxes is not None] 
        # Add sample index to targets
        for i, boxes in enumerate(targets):
            index = torch.ones(len(boxes), 1) * i
            targets[i] = torch.cat((index, boxes), 1)
        targets = torch.cat(targets, 0)
        # Selects new image size every tenth batch
        #         if self.multiscale and self.batch_count % 10 == 0:
        #             self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32))
        #         # Resize images to input shape
        #         imgs = torch.stack([resize(img, self.img_size) for img in imgs])
        img_tensors = torch.stack([img for img in img_tensors])
        self.batch_count += 1
        return img_tensors, targets, num_faces

In [16]:
dataset = FaceTrainSet('data/train_images/', 'data/labels.txt')
dataloader = DataLoader(dataset, batch_size=2, collate_fn = dataset.collate_fn)

In [17]:
img_tensors, boxes, num_faces = next(iter(dataloader))

In [18]:
model = Darknet('yolov3.cfg')

In [None]:
out= model(img_tensors)

In [None]:
out.shape

In [7]:
def build_targets(pred_boxes, object_pred, target, anchors, obj_thres):
    ByteTensor = torch.cuda.ByteTensor if predictions.is_cuda else torch.ByteTensor
    FloatTensor = torch.cuda.FloatTensor if predictions.is_cuda else torch.floatTensor

    nS = pred_boxes.size(0)
    nA = pred_boxes.size(1)
    nG = pred_boxes.size(2)

    # Output tensors
    obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0)
    noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1)
    iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0)
    tx = FloatTensor(nB, nA, nG, nG).fill_(0)
    ty = FloatTensor(nB, nA, nG, nG).fill_(0)
    tw = FloatTensor(nB, nA, nG, nG).fill_(0)
    th = FloatTensor(nB, nA, nG, nG).fill_(0)

    target_boxes = target[..., 1:5]