In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import sys
from importlib import reload
import cv2
import json

print('Python | Pytorch | OpenCV versions: %s | %s | %s' %(sys.version, torch.__version__, cv2.__version__))

sys.path.append('/home/blanca/mnt/projects/')
import tools
from tools.utils import *
from tools.visuals import *

Python | Pytorch | OpenCV versions: 3.7.0 (default, Jun 28 2018, 13:15:42) 
[GCC 7.2.0] | 0.4.1 | 3.4.3


In [8]:
anns_file = '/home/blanca/mnt/data/__toy_plane/annotations/annotations_classes_attbs_body_drill.json'

with open(anns_file, 'r') as f: anns_dataset = json.load(f)
len(anns_dataset['images']), len(anns_dataset['annotations'])

(576, 7711)

In [None]:
import torch
from torch.utils.data import DataLoader, Dataset
from torch.autograd import Variable

from skimage import io
import cv2
import numpy as np
from pathlib import Path
import random
import json
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')
import time
import pickle

class Resize(object):
    def __init__(self, new_dims):
        if isinstance(new_dims, int): self.dims = (new_dims, new_dims)
        else: self.dims = new_dims
    def resize_image(self, im):
        return cv2.resize(im, (self.dims))
    def __call__(self, im):
        return self.resize_image(im)

def bbcc_from_xxww(boxes):
    assert boxes.shape[1] == 4
    cc = np.zeros((boxes.shape[0], 2))
    cc[:, 0] = boxes[:, 0] + boxes[:, 2] / 2
    cc[:, 1] = boxes[:, 1] + boxes[:, 3] / 2
    return cc.astype(np.int32)

def resize_angle(ang, input_size, target_size):
    sx, sy = target_size[1] / input_size[1], target_size[0] / input_size[0]
    sin, cos = np.sin(ang) * sy, np.cos(ang) * sx
    angr = np.arctan2(sin, cos)
    return angr

def resize_xxyy_bboxs(bboxs, input_size, target_size):
    ws = target_size[1] / input_size[1]
    hs = target_size[0] / input_size[0]
    # print('HERE:', input_size, target_size, ws, hs, bboxs)
    bboxsr = np.zeros_like(bboxs)
    bboxsr[:,0], bboxsr[:,2] = bboxs[:,0] * ws, bboxs[:,2] * ws
    bboxsr[:,1], bboxsr[:,3] = bboxs[:,1] * hs, bboxs[:,3] * hs
    return bboxsr

def ulbr_to_ulwh(boxes):
    """
    From upper | left | bottom | right to upper | left | w | h
    """
    nboxes = np.zeros(boxes.shape, dtype=np.int32)
    nboxes[:, 0] = boxes[:, 0]
    nboxes[:, 1] = boxes[:, 1]
    nboxes[:, 2] = boxes[:, 2] - boxes[:, 0]
    nboxes[:, 3] = boxes[:, 3] - boxes[:, 1]
    return nboxes

def resize_bbccs_for_vis(bbccs, input_size, target_size):
    ws = target_size[1] / input_size[1]
    hs = target_size[0] / input_size[0]
    bbcsr = np.zeros_like(bbccs)
    bbcsr[0], bbcsr[1]  = bbccs[0] * ws, bbccs[1] * hs
    return bbcsr
    
class VectorDataset(Dataset):
    def __init__(self, CF, mode):
        super(VectorDataset).__init__()
        """
        'i', 'l' in variable names stand for input and label respectively
        """
        self.CF = CF

        input_dir=Path(CF.input_dir) if CF.input_dir else None
        data_dir=Path(CF.data_dir) if CF.data_dir else None

        # if data.pickle file available, load:
        if not data_dir: 
            print('Provide a "data_dir" in the .yaml file to store the .pickle files')
            return
        
        pickle_file = list(data_dir.glob(input_dir.name + '_data.pickle'))
        print('************ ', pickle_file)
        if len(pickle_file) == 1:
            pickle_file = pickle_file[0]
            print('Loading %s file as dataset ' %pickle_file)
            with open(str(pickle_file), 'rb') as data: dataset = pickle.load(data)
        else: 
            print('No .pickle file available @%s  |  Loading dataset from input_dir %s' %(data_dir, input_dir))
            assert input_dir is not None

            ims_dir = input_dir / 'JPEGImagesDV_9009'
            anns_dir = input_dir / 'annotations/annotations_9009_with_directions.json'

            with open(anns_dir, 'r') as f: anns_dataset = json.load(f)

            # create dataset and shuffle 
            dataset = {}
            count_ims = 0
            count_nf_ims = 0
            for imo in anns_dataset['images']:
                iid, url = imo['id'], imo['coco_url']
                anns = [ao for ao in anns_dataset['annotations'] if ao['image_id'] == iid] #and ao['angle'] is not None] 
                if len(anns) == 0: print('anns empty - double check', iid, url); break
                # try: 
                if True:
                    # print('Reading %s from disk: ' %imo['file_name'], end='')
                    im = cv2.imread(str(ims_dir / (imo['file_name'].split('.png')[0] + '.jpg')))
                    if im is None: 
                        count_nf_ims += 1
                        continue
                    dataset[iid] = {'url': url, 
                                    'im': im,
                                    'ann': anns,
                                }
                    count_ims += 1
                # except: print('Not able to request image at %s | read images: %d' %(url, count_ims))
            print('Not able to found %d images' %count_nf_ims)

            with open(str(data_dir / (input_dir.name + '_data.pickle')), 'wb') as output: pickle.dump(dataset, output)
            print('Saved full dataset @%s, pickle it next time' %data_dir)

            assert len(dataset) == len(np.unique(list(dataset.keys())))

        # shuffle and split data
        np.random.seed(13)
        overfit = isinstance(CF.overfit, int) # dataset size to overfit
        if overfit: 
            ss_ix = [list(dataset.keys())[i] for i in np.random.permutation(CF.overfit)]
            ss_ixs = {'train': ss_ix}
        else: 
            ss_ix = [list(dataset.keys())[i] for i in np.random.permutation(len(dataset))]
            split_ix = int((len(ss_ix) / CF.batch_size) // 5) * CF.batch_size
            ss_ixs = {'train': ss_ix[split_ix:], 'eval': ss_ix[:split_ix]}
        
        self.dataset = {i: j for i, j in dataset.items() if i in ss_ixs[mode]}
        # print('Check: ', assert [i not in ss_ixs['train'] for i in ss_ixs['eval']])

        self.n_obs_per_im = CF.n_obs_per_im
        # self.label_size = self.n_obs_per_im * 2 # assume 50 objects per image
        print('Initialized "%s" dataset with %d images-label pairs' %(mode, len(self.dataset)))
        
        # if overfit: [print(self.dataset[ix]['url']) for ix in list(self.dataset.keys())]

        # apply transforms to image and labels
        # self.transforms = ['resize'] #[RandomCrop(512)]
        
    def __getitem__(self, index):
        it = time.time()
        
        d_ix = list(self.dataset.keys())[index] # ix. for the dictionary  
        im = self.dataset[d_ix]['im']
        anns = self.dataset[d_ix]['ann'] 
        url = self.dataset[d_ix]['url']

        model_input_size = self.CF.input_size
        # color_list = colormap(rgb=True) / 255
        # angs = [random.randint(0, 360) for i in range(len(anns))]
        
        # bboxes & labels 
        bboxs = np.array([ann['bbox'] for ann in anns])
        angs = np.array([ann['angle'] for ann in anns])
        ## order from left to right
        
        order_ix = bboxs[:,0].argsort()
        bboxs = bboxs[order_ix]
        angs = angs[order_ix]
        
        ## pad with zeros
        bboxs = np.pad(bboxs, [(0, self.n_obs_per_im - bboxs.shape[0]), (0, 0)], mode='constant', constant_values=0) #bboxs[:a.shape[0], :a.shape[1]] = 0
        angs = np.pad(angs, [(0, self.n_obs_per_im - len(angs))], mode='constant', constant_values=0)

        ## save originals before risizing - used if decoding
        bboxs_ = bboxs.copy() 
        angs_ = angs.copy()
        im_ = im.copy()

        ## resize
        if im.shape[:2] != model_input_size:
            angs = np.array([resize_angle(ang, im.shape, model_input_size) for ang in angs])
            bboxs = resize_xxyy_bboxs(bboxs[:, :4], im.shape, model_input_size)
            im = cv2.resize(im, (model_input_size[1], model_input_size[0]))
        # bboxes centres
        bbccs = bbcc_from_xxww(bboxs[:, :4])

        ## add a fourth channel with the bbox centre points
        input_nc = self.CF.input_nc
        if input_nc == 3: 
            for bbcc in bbccs: 
                if bbcc[0] == 0 and bbcc[1] == 0: continue
                cv2.circle(im, (int(bbcc[0]), int(bbcc[1])), 5, (255, 255, 255), -1)
        elif input_nc == 4:
            imcc = np.expand_dims(np.zeros(im.shape[:2]), axis=2)
            for bbcc in bbccs: 
                if bbcc[0] == bbcc[1] == 0: continue
                cv2.circle(imcc, (int(bbcc[0]), int(bbcc[1])), 2, (255, 255, 255), -1)
            if self.CF.provide_bbox:
                for bbox in bboxs:
                    if bbox[0] == bbox[1] == bbox[2] == bbox[3] == 0: continue
                    bbox = [int(i) for i in bbox]
                    x, y, w, h = bbox[0], bbox[1], bbox[2], bbox[3]
                    cv2.rectangle(imcc, (x, y), (x + w, y + h), (255, 255, 255), 1)
            im = np.concatenate((im, imcc), axis=2)

        # LABEL: 1D ARRAY (MODEL: ENCODER)
        if not self.CF.decoder:
            assert self.CF.predict_centre
            # print('Using encoder.. ')

            n_ob = 0
            lab = np.zeros(4 * self.n_obs_per_im)
            for _, (bbcc, ang) in enumerate(zip(bbccs, angs)):
                if bbcc[0] == 0.0 and bbcc[1] == 0.0 and ang == 0.0: continue
                if self.CF.predict_score: 
                    lab[n_ob*4], lab[n_ob*4 + 1], lab[n_ob*4 + 2], lab[n_ob*4 + 3], lab[n_ob*4 + 4]  = np.ones(1), bbcc[0], bbcc[1], np.sin(ang), np.cos(ang)
                else: 
                    lab[n_ob*4], lab[n_ob*4 + 1], lab[n_ob*4 + 2], lab[n_ob*4 + 3] = bbcc[0], bbcc[1], np.sin(ang), np.cos(ang)
                n_ob += 1

            n_ob = 0
            mask = np.zeros(4 * self.n_obs_per_im)
            for _, bbcc in enumerate(bbccs):
                if bbcc[0] == 0.0 and bbcc[1] == 0.0: continue
                mask[n_ob*4 : n_ob*4 + 4] = np.ones(4)
            n_ob += 1

        # else: 
            # lab = np.zeros(2 * self.n_obs_per_im)
            # for _, ang in enumerate(angs):
            #     if bbcc[0] == 0.0 and bbcc[1] == 0.0 and ang == 0.0: continue
            #     lab[_*2], lab[_*2 + 1] = np.sin(ang), np.cos(ang)

        # LABEL: 3D ARRAY (MODEL: AUTOENCODER)
        else:
            assert not self.CF.predict_centre
            # print('Using autoencoder.. ')

            ## 1. resize for label
            model_output_size = self.CF.output_size
            if im_.shape[:2] != model_output_size:
                angso = np.array([resize_angle(ang_, im_.shape, model_output_size) for ang_ in angs_])
                bboxso = resize_xxyy_bboxs(bboxs_[:, :4], im_.shape, model_output_size)
                imo = cv2.resize(im_, (model_output_size[1], model_output_size[0]))
            
            """check"""
            assert model_output_size == model_input_size
            assert (angso == angs).all()
            assert (bboxso == bboxs).all()
            # assert (imo == im).all()
            # print(imo, im)

            bbccso = bbcc_from_xxww(bboxso[:, :4])
            
            # fill label
            lab = np.zeros((model_output_size[0], model_output_size[1], self.CF.output_nc))
            for _, (bbcco, ango) in enumerate(zip(bbccso, angso)):
                "Label of output_size with sin, cos in the centre of the object bounding box"
                if bbcco[0] == 0.0 and bbcco[1] == 0.0 and ango == 0.0: continue
                lab[bbcco[1], bbcco[0], 0] = np.sin(ango)
                lab[bbcco[1], bbcco[0], 1] = np.cos(ango)
                if self.CF.predict_score: lab[bbcco[1], bbcco[0], 2] = np.ones(1) 
            
            mask = np.zeros((model_output_size[0], model_output_size[1]))
            for _, bbcco in enumerate(bbccso):
                if bbcco[0] == 0.0 and bbcco[1] == 0.0: continue
                mask[bbcco[1], bbcco[0]] = np.ones(1)                 

            # if self.CF.predict_score: lab = np.concatenate((lab, np.expand_dims(mask, axis=2)), axis=2)

        input_im = Variable(torch.Tensor(np.transpose(im, (2,0,1)) / 255.))
        input_lb = Variable(torch.Tensor(lab)) if not self.CF.decoder else Variable(torch.Tensor(np.transpose(lab, (2,0,1))))
        mask = Variable(torch.Tensor(mask))

        # print(input_lb[2,:,:].shape, mask.shape, (input_lb[2,:,:] == mask).all())

        return input_im, input_lb, bboxs, bbccs, url, angs, mask
    
    def __len__(self):
        return len(self.dataset)


def save_input_image(im, bboxs, bbccs, angs, vis_size):
    imc = im.copy()
    
    if im.shape[:2] != vis_size:
        bboxs = resize_xxyy_bboxs(bboxs[:, :4], im.shape, vis_size)
        bbccs = bbcc_from_xxww(bboxs[:, :4])
        im = cv2.resize(im, (vis_size[1], vis_size[0]))
    
    im = np.ascontiguousarray(im*255, dtype=np.int32)

    for bbox in bboxs:
        bbox = [int(i) for i in bbox]
        x, y, w, h = bbox[0], bbox[1], bbox[2], bbox[3]
        cv2.rectangle(imc, (x, y), (x + w, y + h), (255, 0, 0), 10)

    for _, (bbcc, ang) in enumerate(zip(bbccs, angs)):
        cv2.addText(im, 'bb_%d' %_, (bbcc[0], bbcc[1]))
        
    cv2.imwrite(im, 'input_sample.jpg')