In [15]:
import cv2, os
import numpy as np
from pycocotools.coco import COCO


LINE_class_labels = ['ape', 'benchvise', 'bowl', 'can', 'cat', 'cup',
                    'driller', 'duck', 'glue', 'holepuncher', 'iron',
                    'lamp', 'phone', 'cam', 'eggbox']
LINE_class_nums = len(LINE_class_labels)

LINE_ROOT = os.path.join(DATA_ROOT, 'lm', 'train_pbr')
class COCOSingleDatasetBase(ObjectDetectionDatasetBase):
    def __init__(self, coco_dir, focus, ignore=None, transform=None, target_transform=None, augmentation=None, class_labels=None):
        """
        :param coco_dir: str, coco directory path above 'annotations' and 'images'
                e.g.) coco_dir = '~~~~/coco2007/trainval'
        :param focus: str or str, directory name under images
                e.g.) focus = 'train2014'
        :param ignore: target_transforms.Ignore
        :param transform: instance of transforms
        :param target_transform: instance of target_transforms
        :param augmentation:  instance of augmentations
        :param class_labels: None or list or tuple, if it's None use VOC_class_labels
        """
        super().__init__(ignore=ignore, transform=transform, target_transform=target_transform, augmentation=augmentation)

        self._coco_dir = coco_dir
        self._focus = focus

        self._class_labels = _check_ins('class_labels', class_labels, (list, tuple), allow_none=True)
        if self._class_labels is None:
            self._class_labels = LINE_class_labels

        self._annopath = os.path.join(self._coco_dir, self._focus, 'scene_gt_coco_modal.json')
        if os.path.exists(self._annopath):
            self._coco = COCO(self._annopath)
        else:
            raise FileNotFoundError('json: {} was not found'.format('instances_' + self._focus + '.json'))


        # remove no annotation image
        self._imageids = list(self._coco.imgToAnns.keys())

    @property
    def class_nums(self):
        return len(self._class_labels)
    @property
    def class_labels(self):
        return self._class_labels

    def _jpgpath(self, filename,forcus):
        """
        :param filename: path containing .jpg
        :return: path of jpg
        """
        return os.path.join(LINE_ROOT, forcus,filename) #self._coco_dir

    def __len__(self):
        return len(self._imageids)

    """
    Detail of contents in voc > https://towardsdatascience.com/coco-data-format-for-object-detection-a4c5eaf518c5

    VOC bounding box (xmin, ymin, xmax, ymax)
    """
    def _get_image(self, index):
        """
        :param index: int
        :return:
            rgb image(ndarray)
        """

        """
        self._coco.loadImgs(self._imageids[index]): list of dict, contains;
            license: int
            file_name: str
            coco_url: str
            height: int
            width: int
            date_captured: str
            flickr_url: str
            id: int
        """
        filename = self._coco.loadImgs(self._imageids[index])[0]['file_name']
        img = cv2.imread(self._jpgpath(filename))
        # pytorch's image order is rgb
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        return img.astype(np.float32)

    def _get_target(self, index):
        """
        :param index: int
        :return:
            list of bboxes, list of bboxes' label index, list of flags([difficult, truncated,...])
        """
        linds = []
        bboxes = []
        flags = []

        # anno_ids is list
        anno_ids = self._coco.getAnnIds(self._imageids[index])

        # annos is list of dict
        annos = self._coco.loadAnns(anno_ids)
        for anno in annos:
            """
            anno's  keys are;
                segmentation: list of float
                area: float
                iscrowd: int, 0 or 1
                image_id: int
                bbox: list of float, whose length is 4
                category_id: int
                id: int
            """
            """
            self._coco.loadCats(anno['category_id']) is list of dict, contains;
                supercategory: str
                id: int
                name: str
            """
            cat = self._coco.loadCats(anno['category_id'])[0]

            linds.append(self.class_labels.index(cat['name']))

            # bbox = [xmin, ymin, w, h]
            xmin, ymin, w, h = anno['bbox']
            # convert to corners
            xmax, ymax = xmin + w, ymin + h
            bboxes.append([xmin, ymin, xmax, ymax])

            """
            flag = {}
            keys = ['iscrowd']
            for key in keys:
                if key in anno.keys():
                    flag[key] = anno[key] == 1
                else:
                    flag[key] = False
            flags.append(flag)
            """
            flags.append({'difficult': anno['iscrowd'] == 1})

        return np.array(bboxes, dtype=np.float32), np.array(linds, dtype=np.float32), flags


class COCOMultiDatasetBase(Compose):
    def __init__(self, **kwargs):
        """
        :param datasets: tuple of Dataset
        :param kwargs:
            :param ignore:
            :param transform:
            :param target_transform:
            :param augmentation:
        """
        super().__init__(datasets=(), **kwargs)

        coco_dir = _check_ins('coco_dir', kwargs.pop('coco_dir'), (tuple, list, str))
        focus = _check_ins('focus', kwargs.pop('focus'), (tuple, list, str))

        if isinstance(coco_dir, str) and isinstance(focus, str):
            datasets = [COCOSingleDatasetBase(coco_dir, focus, **kwargs)]
            lens = [len(datasets[0])]

        elif isinstance(coco_dir, (list, tuple)) and isinstance(focus, (list, tuple)):
            if len(coco_dir) != len(focus):
                raise ValueError('coco_dir and focus must be same length, but got {}, {}'.format(len(coco_dir), len(focus)))

            datasets = [COCOSingleDatasetBase(cdir, f, **kwargs) for cdir, f in zip(coco_dir, focus)]
            lens = [len(d) for d in datasets]
        else:
            raise ValueError('Invalid coco_dir and focus combination')

        self.datasets = datasets
        self.lens = lens
        self._class_labels = datasets[0].class_labels


class COCO2014_TrainDataset(COCOSingleDatasetBase):
    def __init__(self, **kwargs):
        super().__init__(DATA_ROOT + '/coco/coco2014/trainval', 'train2014', **kwargs)

class COCO2014_ValDataset(COCOSingleDatasetBase):
    def __init__(self, **kwargs):
        super().__init__(DATA_ROOT + '/coco/coco2014/trainval', 'val2014', **kwargs)

class COCO2014_TrainValDataset(COCOMultiDatasetBase):
    def __init__(self, **kwargs):
        super().__init__(coco_dir=(DATA_ROOT + '/coco/coco2014/trainval',
                                   DATA_ROOT + '/coco/coco2014/trainval'),
                         focus=('train2014', 'val2014'), **kwargs)

class COCO2017_TrainDataset(COCOSingleDatasetBase):
    def __init__(self, **kwargs):
        super().__init__(DATA_ROOT + '/coco/coco2017/trainval', 'train2014', **kwargs)

class COCO2017_ValDataset(COCOSingleDatasetBase):
    def __init__(self, **kwargs):
        super().__init__(DATA_ROOT + '/coco/coco2017/trainval', 'val2014', **kwargs)

class COCO2017_TrainValDataset(COCOMultiDatasetBase):
    def __init__(self, **kwargs):
        super().__init__(coco_dir=(DATA_ROOT + '/coco/coco2017/trainval',
                                   DATA_ROOT + '/coco/coco2017/trainval'),
                         focus=('train2017', 'val2017'), **kwargs)
    

In [13]:
annopath = os.path.join(LINE_ROOT, '000000', 'scene_gt_coco_modal.json')
A = COCO(annopath)
B=list(A.imgToAnns.keys())
imageids=list(B)
def jpgpath(filename):        
    return os.path.join(LINE_ROOT, '000000',filename)

C =jpgpath(A.loadImgs(imageids[1])[0]['file_name'])
C
#self._coco = COCO(self._annopath)
#filename = loadImgs(A[index])[0]['file_name']
class_labels = ['1', '2', '3', '4', '5',
                    '6', '7', '8', '9', '10',
                    '11', '12', '13', '14','15']
class_labels = _check_ins('class_labels', class_labels, (list, tuple), allow_none=True)
#class_labels
#A.loadImgs(imageids[1])[0]['file_name']
def get_target(index):
        """
        :param index: int
        :return:
            list of bboxes, list of bboxes' label index, list of flags([difficult, truncated,...])
        """
        linds = []
        bboxes = []
        flags = []

        # anno_ids is list
        anno_ids = A.getAnnIds(imageids[index])

        # annos is list of dict
        annos = A.loadAnns(anno_ids)
        for anno in annos:
            """
            anno's  keys are;
                segmentation: list of float
                area: float
                iscrowd: int, 0 or 1
                image_id: int
                bbox: list of float, whose length is 4
                category_id: int
                id: int
            """
            """
            self._coco.loadCats(anno['category_id']) is list of dict, contains;
                supercategory: str
                id: int
                name: str
            """
            cat = A.loadCats(anno['category_id'])[0]

            linds.append(class_labels.index(cat['name']))

            # bbox = [xmin, ymin, w, h]
            xmin, ymin, w, h = anno['bbox']
            # convert to corners
            xmax, ymax = xmin + w, ymin + h
            bboxes.append([xmin, ymin, xmax, ymax])

            """
            flag = {}
            keys = ['iscrowd']
            for key in keys:
                if key in anno.keys():
                    flag[key] = anno[key] == 1
                else:
                    flag[key] = False
            flags.append(flag)
            """
            flags.append({'difficult': anno['iscrowd'] == 1})

        return np.array(bboxes, dtype=np.float32), np.array(linds, dtype=np.float32), flags

get_target(0)

loading annotations into memory...
Done (t=0.30s)
creating index...
index created!


(array([[301., 191., 355., 296.],
        [335., 213., 433., 348.],
        [331., 249., 335., 279.],
        [186., 218., 259., 261.],
        [244., 217., 277., 259.],
        [444., 313., 471., 362.],
        [  0., 158.,  98., 243.],
        [390., 220., 421., 295.],
        [316., 238., 335., 253.],
        [471., 257., 562., 394.],
        [411., 292., 523., 442.],
        [402., 318., 449., 376.],
        [334., 266., 396., 297.]], dtype=float32),
 array([ 1., 13., 10.,  2.,  6.,  0., 12.,  4.,  9.,  7., 14.,  8.,  3.],
       dtype=float32),
 [{'difficult': False},
  {'difficult': False},
  {'difficult': False},
  {'difficult': False},
  {'difficult': False},
  {'difficult': False},
  {'difficult': False},
  {'difficult': False},
  {'difficult': False},
  {'difficult': False},
  {'difficult': False},
  {'difficult': False},
  {'difficult': False}])

In [25]:
from ssd_data import transforms, target_transforms, augmentations, utils
transform = transforms.Compose(
    [transforms.Resize((300, 300)),
     transforms.ToTensor(),
     transforms.Normalize(rgb_means=(0.485, 0.456, 0.406), rgb_stds=(0.229, 0.224, 0.225))]
)
target_transform = target_transforms.Compose(
    [target_transforms.Corners2Centroids(),
     target_transforms.OneHot(class_nums=len(LINE_class_labels), add_background=True),
     target_transforms.ToTensor()]
)
train_dataset = COCOMultiDatasetBase(coco_dir=[LINE_ROOT], focus=['000000'], ignore=None,
                                                  transform=transform, target_transform=target_transform, augmentation=None,
                                                  class_labels=LINE_class_labels)

ImportError: attempted relative import with no known parent package

In [23]:
coco_dir = _check_ins('coco_dir', ['coco_dir'], (tuple, list, str))
focus = _check_ins('focus', ['focus'], (tuple, list, str))
len(coco_dir)


1

In [7]:
import torch
from torch.utils.data import Dataset
import numpy as np
import abc


"""
ref > https://pytorch.org/tutorials/beginner/data_loading_tutorial.html

torch.utils.data.Dataset is an abstract class representing a dataset. Your custom dataset should inherit Dataset and override the following methods:

__len__ so that len(dataset) returns the size of the dataset.
__getitem__ to support the indexing such that dataset[i] can be used to get ith sample

"""

class _DatasetBase(Dataset):
    @property
    @abc.abstractmethod
    def class_nums(self):
        pass
    @property
    @abc.abstractmethod
    def class_labels(self):
        pass

class ObjectDetectionDatasetBase(_DatasetBase):
    def __init__(self, ignore=None, transform=None, target_transform=None, augmentation=None):
        """
        :param ignore: target_transforms.Ignore
        :param transform: instance of transforms
        :param target_transform: instance of target_transforms
        :param augmentation:  instance of augmentations
        """
        #ignore, target_transform = _separate_ignore(target_transform)
        self.ignore = _check_ins('ignore', ignore, Ignore, allow_none=True)
        self.transform = transform
        self.target_transform = _contain_ignore(target_transform)
        self.augmentation = augmentation

    @property
    @abc.abstractmethod
    def class_nums(self):
        pass
    @property
    @abc.abstractmethod
    def class_labels(self):
        pass

    @abc.abstractmethod
    def _get_image(self, index):
        """
        :param index: int
        :return:
            rgb image(Tensor)
        """
        raise NotImplementedError('\'_get_image\' must be overridden')

    @abc.abstractmethod
    def _get_target(self, index):
        """
        :param index: int
        :return:
            list of bboxes, list of bboxes' label index, list of flags([difficult, truncated])
        """
        raise NotImplementedError('\'_get_target\' must be overridden')

    def __getitem__(self, index):
        """
        :param index: int
        :return:
            img : rgb image(Tensor or ndarray)
            targets : Tensor or ndarray of bboxes and labels [box, label]
            = [xmin, ymin, xmamx, ymax, label index(or relu_one-hotted label)]
            or
            = [cx, cy, w, h, label index(or relu_one-hotted label)]
        """
        img = self._get_image(index)
        targets = self._get_target(index)
        if len(targets) >= 3:
            bboxes, linds, flags = targets[:3]
            args = targets[3:]
        else:
            raise ValueError('ValueError: not enough values to unpack (expected more than 3, got {})'.format(len(targets)))
        img, bboxes, linds, flags, args = self.apply_transform(img, bboxes, linds, flags, *args)

        # concatenate bboxes and linds
        if isinstance(bboxes, torch.Tensor) and isinstance(linds, torch.Tensor):
            if linds.ndim == 1:
                linds = linds.unsqueeze(1)
            targets = torch.cat((bboxes, linds), dim=1)
        else:
            if linds.ndim == 1:
                linds = linds[:, np.newaxis]
            targets = np.concatenate((bboxes, linds), axis=1)

        return img, targets

    def apply_transform(self, img, bboxes, linds, flags, *args):
        """
        IMPORTATANT: apply transform function in order with ignore, augmentation, transform and target_transform
        :param img:
        :param bboxes:
        :param linds:
        :param flags:
        :return:
            Transformed img, bboxes, linds, flags
        """
        # To Percent mode
        height, width, channel = img.shape
        # bbox = [xmin, ymin, xmax, ymax]
        # [bbox[0] / width, bbox[1] / height, bbox[2] / width, bbox[3] / height]
        bboxes[:, 0::2] /= float(width)
        bboxes[:, 1::2] /= float(height)

        if self.ignore:
            bboxes, linds, flags, args = self.ignore(bboxes, linds, flags, *args)

        if self.augmentation:
            img, bboxes, linds, flags, args = self.augmentation(img, bboxes, linds, flags, *args)

        if self.transform:
            img, bboxes, linds, flag, args = self.transform(img, bboxes, linds, flags, *args)

        if self.target_transform:
            bboxes, linds, flags, args = self.target_transform(bboxes, linds, flags, *args)

        return img, bboxes, linds, flags, args

    @abc.abstractmethod
    def __len__(self):
        pass



class Compose(_DatasetBase):
    def __init__(self, datasets, **kwargs):
        """
        :param datasets: tuple of Dataset
        :param kwargs:
            :param ignore:
            :param transform:
            :param target_transform:
            :param augmentation:
        """
        self.transform = kwargs.get('transform', None)
        self.target_transform = kwargs.get('target_transform', None)
        self.augmentation = kwargs.get('augmentation', None)

        datasets = _check_ins('datasets', datasets, (tuple, list))

        _datasets, _lens = [], []
        _class_labels = None
        for dataset in datasets:
            try:
                dataset = dataset(**kwargs)
            except Exception as e:
                raise ValueError('Invalid arguments were passed. {} could not be initialized because\n{}'.format(dataset.__name__, e))
            dataset = _check_ins('element of datasets', dataset, _DatasetBase)
            if _class_labels is None:
                _class_labels = dataset.class_labels
            else:
                #if set(_class_labels) != set(dataset.class_labels):
                if _class_labels != dataset.class_labels:
                    raise ValueError('all of datasets must be same class labels')

            # initialization
            _datasets += [dataset]

            _lens += [len(_datasets[-1])]

        self.datasets = _datasets
        self.lens = _lens
        self._class_labels = _class_labels

    @property
    def class_labels(self):
        return self._class_labels
    @property
    def class_nums(self):
        return len(self._class_labels)

    def __getitem__(self, index):
        for i in range(len(self.lens)):
            if index < sum(self.lens[:i+1]):
                return self.datasets[i][index - sum(self.lens[:i])]

        raise ValueError('Index out of range')

    def __len__(self):
        return sum(self.lens)

In [6]:
import torch

def batch_ind_fn(batch):
    """
    :param batch:
    :return:
        imgs: Tensor, shape = (b, c, h, w)
        targets: list of Tensor, whose shape = (object box num, 4 + class num) including background
    """
    imgs, gts = list(zip(*batch))

    return torch.stack(imgs), gts

In [5]:
import os, fnmatch
import numpy as np

def _get_recurrsive_paths(basedir, ext):
    """
    :param basedir:
    :param ext:
    :return: list of path of files including basedir and ext(extension)
    """
    matches = []
    for root, dirnames, filenames in os.walk(basedir):
        for filename in fnmatch.filter(filenames, '*.{}'.format(ext)):
            matches.append(os.path.join(root, filename))
    return sorted(matches)


def _get_xml_et_value(xml_et, key, rettype=str):
    """
    :param xml_et: Elementtree's element
    :param key:
    :param rettype: class, force to convert it from str
    :return: rettype's value
    Note that if there is no keys in xml object, return None
    """
    elm = xml_et.find(key)
    if elm is None:
        return elm

    if isinstance(rettype, str):
        return elm.text
    else:
        return rettype(elm.text)

def _one_hot_encode(indices, class_num):
    """
    :param indices: list of index
    :param class_num:
    :return: ndarray, relu_one-hot vectors
    """
    size = len(indices)
    one_hot = np.zeros((size, class_num))
    one_hot[np.arange(size), indices] = 1
    return one_hot

def _separate_ignore(target_transform):
    """
    Separate Ignore by target_transform
    :param target_transform:
    :return: ignore, target_transform
    """
    if target_transform:
        from .target_transforms import Ignore, Compose
        if isinstance(target_transform, Ignore):
            return target_transform, None

        if not isinstance(target_transform, Compose):
            return None, target_transform

        # search existing target_transforms.Ignore in target_transform
        new_target_transform = []
        ignore = None
        for t in target_transform.target_transforms:
            if isinstance(t, Ignore):
                ignore = t
            else:
                new_target_transform += [t]
        return ignore, Compose(new_target_transform)

    else:
        return None, target_transform

def _contain_ignore(target_transform):
    if target_transform:
        from .target_transforms import Ignore, Compose
        if isinstance(target_transform, Ignore):
            raise ValueError('target_transforms.Ignore must be passed to \'ignore\' argument')

        if isinstance(target_transform, Compose):
            for t in target_transform.target_transforms:
                if isinstance(t, Ignore):
                    raise ValueError('target_transforms.Ignore must be passed to \'ignore\' argument')

    return target_transform

def _check_ins(name, val, cls, allow_none=False):
    if allow_none and val is None:
        return val

    if not isinstance(val, cls):
        raise ValueError('Argument \'{}\' must be {}, but got {}'.format(name, cls.__name__, type(val).__name__))
    return val

DATA_ROOT = os.path.join(os.path.expanduser('~'),'Desktop')

In [4]:
import numpy as np
import torch
import logging


class Compose(object):
    def __init__(self, target_transforms):
        self.target_transforms = target_transforms

    def __call__(self, bboxes, labels, flags, *args):
        for t in self.target_transforms:
            bboxes, labels, flags, args = t(bboxes, labels, flags, *args)
        return bboxes, labels, flags, args

    def __repr__(self):
        format_string = self.__class__.__name__ + '('
        for t in self.target_transforms:
            format_string += '\n'
            format_string += '    {0}'.format(t)
        format_string += '\n)'
        return format_string

class ToTensor(object):
    def __call__(self, bboxes, labels, flags, *args):
        return torch.from_numpy(bboxes), torch.from_numpy(labels), flags, args

class Corners2Centroids(object):
    def __call__(self, bboxes, labels, flags, *args):
        # bbox = [xmin, ymin, xmax, ymax] to [cx, cy, w, h]
        bboxes = np.concatenate(((bboxes[:, 2:] + bboxes[:, :2]) / 2,
                                 (bboxes[:, 2:] - bboxes[:, :2])), axis=1)

        return bboxes, labels, flags, args

class Corners2MinMax(object):
    def __call__(self, bboxes, labels, flags, *args):
        # bbox = [xmin, ymin, xmax, ymax] to [xmin, xmax, ymin, ymax]
        bboxes = bboxes[:, np.array((0, 2, 1, 3))]

        return bboxes, labels, flags, args

class Centroids2Corners(object):
    def __call__(self, bboxes, labels, flags, *args):
        # bbox = [cx, cy, w, h] to [xmin, ymin, xmax, ymax]
        bboxes = np.concatenate((bboxes[:, :2] - bboxes[:, 2:]/2,
                                 bboxes[:, :2] + bboxes[:, 2:]/2), axis=1)

        return bboxes, labels, flags, args

class Centroids2MinMax(object):
    def __call__(self, bboxes, labels, flags, *args):
        # bbox = [cx, cy, w, h] to [xmin, xmax, ymin, ymax]
        bboxes = np.concatenate((bboxes[:, 0] - bboxes[:, 2]/2,
                                 bboxes[:, 0] + bboxes[:, 2]/2,
                                 bboxes[:, 1] - bboxes[:, 3]/2,
                                 bboxes[:, 1] + bboxes[:, 3]/2), axis=1)

        return bboxes, labels, flags, args

class MinMax2Centroids(object):
    def __call__(self, bboxes, labels, flags, *args):
        # bbox = [xmin, xmax, ymin, ymax] to [cx, cy, w, h]
        bboxes = np.concatenate((bboxes[:, 0] + (bboxes[:, 1] - bboxes[:, 0])/2,
                                 bboxes[:, 2] + (bboxes[:, 3] - bboxes[:, 2])/2,
                                 bboxes[:, 1] - bboxes[:, 0],
                                 bboxes[:, 3] - bboxes[:, 2]), axis=1)

        return bboxes, labels, flags, args

class MinMax2Corners(object):
    def __call__(self, bboxes, labels, flags, *args):
        # bbox = [xmin, xmax, ymin, ymax] to [xmin, ymin, xmax, ymax]
        bboxes = bboxes[:, np.array((0, 2, 1, 3))]

        return bboxes, labels, flags, args

class Ignore(object):
    supported_key = ['difficult', 'truncated', 'occluded', 'iscrowd']
    def __init__(self, **kwargs):
        """
        :param kwargs: if true, specific keyword will be ignored
        """
        self.ignore_key = []
        for key, val in kwargs.items():
            if key in Ignore.supported_key:
                val = _check_ins(key, val, bool)
                if not val:
                    logging.warning('No meaning: {}=False'.format(key))
                else:
                    self.ignore_key += [key]
            else:
                logging.warning('Unsupported arguments: {}'.format(key))

    def __call__(self, bboxes, labels, flags, *args):
        ret_bboxes = []
        ret_labels = []
        ret_flags = []

        for bbox, label, flag in zip(bboxes, labels, flags):
            flag_keys = list(flag.keys())
            ig_flag = [flag[ig_key] if ig_key in flag_keys else False for ig_key in self.ignore_key]
            if any(ig_flag):
                continue
            """
            isIgnore = False
            for key, value in self.kwargs.items():
                if value and key in flag and flag[key]:
                    isIgnore = True
                    break
            if isIgnore:
                continue
            #if self._ignore_partial and flag['partial']:
            #    continue
            """
            # normalize
            # bbox = [xmin, ymin, xmax, ymax]
            ret_bboxes += [bbox]
            ret_labels += [label]
            ret_flags += [flag]

        ret_bboxes = np.array(ret_bboxes, dtype=np.float32)
        ret_labels = np.array(ret_labels, dtype=np.float32)

        return ret_bboxes, ret_labels, ret_flags, args

class OneHot(object):
    def __init__(self, class_nums, add_background=True):
        self._class_nums = class_nums
        self._add_background = add_background
        if add_background:
            self._class_nums += 1

    def __call__(self, bboxes, labels, flags, *args):
        if labels.ndim != 1:
            raise ValueError('labels might have been already relu_one-hotted or be invalid shape')

        labels = _one_hot_encode(labels.astype(np.int), self._class_nums)
        labels = np.array(labels, dtype=np.float32)

        return bboxes, labels, flags, args

In [3]:
import numpy as np
import torch
import logging

class Compose(object):
    def __init__(self, target_transforms):
        self.target_transforms = target_transforms

    def __call__(self, bboxes, labels, flags, *args):
        for t in self.target_transforms:
            bboxes, labels, flags, args = t(bboxes, labels, flags, *args)
        return bboxes, labels, flags, args

    def __repr__(self):
        format_string = self.__class__.__name__ + '('
        for t in self.target_transforms:
            format_string += '\n'
            format_string += '    {0}'.format(t)
        format_string += '\n)'
        return format_string

class ToTensor(object):
    def __call__(self, bboxes, labels, flags, *args):
        return torch.from_numpy(bboxes), torch.from_numpy(labels), flags, args

class Corners2Centroids(object):
    def __call__(self, bboxes, labels, flags, *args):
        # bbox = [xmin, ymin, xmax, ymax] to [cx, cy, w, h]
        bboxes = np.concatenate(((bboxes[:, 2:] + bboxes[:, :2]) / 2,
                                 (bboxes[:, 2:] - bboxes[:, :2])), axis=1)

        return bboxes, labels, flags, args

class Corners2MinMax(object):
    def __call__(self, bboxes, labels, flags, *args):
        # bbox = [xmin, ymin, xmax, ymax] to [xmin, xmax, ymin, ymax]
        bboxes = bboxes[:, np.array((0, 2, 1, 3))]

        return bboxes, labels, flags, args

class Centroids2Corners(object):
    def __call__(self, bboxes, labels, flags, *args):
        # bbox = [cx, cy, w, h] to [xmin, ymin, xmax, ymax]
        bboxes = np.concatenate((bboxes[:, :2] - bboxes[:, 2:]/2,
                                 bboxes[:, :2] + bboxes[:, 2:]/2), axis=1)

        return bboxes, labels, flags, args

class Centroids2MinMax(object):
    def __call__(self, bboxes, labels, flags, *args):
        # bbox = [cx, cy, w, h] to [xmin, xmax, ymin, ymax]
        bboxes = np.concatenate((bboxes[:, 0] - bboxes[:, 2]/2,
                                 bboxes[:, 0] + bboxes[:, 2]/2,
                                 bboxes[:, 1] - bboxes[:, 3]/2,
                                 bboxes[:, 1] + bboxes[:, 3]/2), axis=1)

        return bboxes, labels, flags, args

class MinMax2Centroids(object):
    def __call__(self, bboxes, labels, flags, *args):
        # bbox = [xmin, xmax, ymin, ymax] to [cx, cy, w, h]
        bboxes = np.concatenate((bboxes[:, 0] + (bboxes[:, 1] - bboxes[:, 0])/2,
                                 bboxes[:, 2] + (bboxes[:, 3] - bboxes[:, 2])/2,
                                 bboxes[:, 1] - bboxes[:, 0],
                                 bboxes[:, 3] - bboxes[:, 2]), axis=1)

        return bboxes, labels, flags, args

class MinMax2Corners(object):
    def __call__(self, bboxes, labels, flags, *args):
        # bbox = [xmin, xmax, ymin, ymax] to [xmin, ymin, xmax, ymax]
        bboxes = bboxes[:, np.array((0, 2, 1, 3))]

        return bboxes, labels, flags, args

class Ignore(object):
    supported_key = ['difficult', 'truncated', 'occluded', 'iscrowd']
    def __init__(self, **kwargs):
        """
        :param kwargs: if true, specific keyword will be ignored
        """
        self.ignore_key = []
        for key, val in kwargs.items():
            if key in Ignore.supported_key:
                val = _check_ins(key, val, bool)
                if not val:
                    logging.warning('No meaning: {}=False'.format(key))
                else:
                    self.ignore_key += [key]
            else:
                logging.warning('Unsupported arguments: {}'.format(key))

    def __call__(self, bboxes, labels, flags, *args):
        ret_bboxes = []
        ret_labels = []
        ret_flags = []

        for bbox, label, flag in zip(bboxes, labels, flags):
            flag_keys = list(flag.keys())
            ig_flag = [flag[ig_key] if ig_key in flag_keys else False for ig_key in self.ignore_key]
            if any(ig_flag):
                continue
            """
            isIgnore = False
            for key, value in self.kwargs.items():
                if value and key in flag and flag[key]:
                    isIgnore = True
                    break
            if isIgnore:
                continue
            #if self._ignore_partial and flag['partial']:
            #    continue
            """
            # normalize
            # bbox = [xmin, ymin, xmax, ymax]
            ret_bboxes += [bbox]
            ret_labels += [label]
            ret_flags += [flag]

        ret_bboxes = np.array(ret_bboxes, dtype=np.float32)
        ret_labels = np.array(ret_labels, dtype=np.float32)

        return ret_bboxes, ret_labels, ret_flags, args

class OneHot(object):
    def __init__(self, class_nums, add_background=True):
        self._class_nums = class_nums
        self._add_background = add_background
        if add_background:
            self._class_nums += 1

    def __call__(self, bboxes, labels, flags, *args):
        if labels.ndim != 1:
            raise ValueError('labels might have been already relu_one-hotted or be invalid shape')

        labels = _one_hot_encode(labels.astype(np.int), self._class_nums)
        labels = np.array(labels, dtype=np.float32)

        return bboxes, labels, flags, args

In [2]:
import numpy as np
import torch
import cv2
import logging

class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img, bboxes, labels, flags, *args):
        for t in self.transforms:
            transformed = t(img, bboxes, labels, flags, *args)
            img, bboxes, labels, flags = transformed[:4]
            args = transformed[4:]
        return img, bboxes, labels, flags, args

    def __repr__(self):
        format_string = self.__class__.__name__ + '('
        for t in self.transforms:
            format_string += '\n'
            format_string += '    {0}'.format(t)
        format_string += '\n)'
        return format_string

"""
bellow classes are consisted of
    :param img: Tensor
    :param bboxes: ndarray of bboxes
    :param labels: ndarray of bboxes' indices
    :param flags: list of flag's dict
    :return: Tensor of img, ndarray of bboxes, ndarray of labels, dict of flags
"""

class ToTensor(object):
    """
    Note that convert ndarray to tensor and [0-255] to [0-1]
    """
    def __call__(self, img, *args):
        # convert ndarray into Tensor
        # transpose img's tensor (h, w, c) to pytorch's format (c, h, w). (num, c, h, w)
        img = np.transpose(img, (2, 0, 1))
        return (torch.from_numpy(img).float() / 255., *args)

class Resize(object):
    def __init__(self, size):
        """
        :param size: 2d-array-like, (height, width)
        """
        self._size = size

    def __call__(self, img, *args):
        return (cv2.resize(img, self._size), *args)


class Normalize(object):
    #def __init__(self, rgb_means=(103.939, 116.779, 123.68), rgb_stds=(1.0, 1.0, 1.0)):
    def __init__(self, rgb_means=(0.485, 0.456, 0.406), rgb_stds=(0.229, 0.224, 0.225)):
        self.means = np.array(rgb_means, dtype=np.float32).reshape((-1, 1, 1))
        if np.any(np.abs(self.means) > 1):
            logging.warning("In general, mean value should be less than 1 because img's range is [0-1]")

        self.stds = np.array(rgb_stds, dtype=np.float32).reshape((-1, 1, 1))

    def __call__(self, img, *args):
        if isinstance(img, torch.Tensor):
            return ((img.float() - torch.from_numpy(self.means)) / torch.from_numpy(self.stds), *args)
        else:
            return ((img.astype(np.float32) - self.means) / self.stds, *args)

In [10]:
import torch

def batch_ind_fn(batch):
    """
    :param batch:
    :return:
        imgs: Tensor, shape = (b, c, h, w)
        targets: list of Tensor, whose shape = (object box num, 4 + class num) including background
    """
    imgs, gts = list(zip(*batch))

    return torch.stack(imgs), gts

In [9]:
import sys
print(sys.path)

['C:\\Users\\takayanagi\\jjjkkk\\pytorch_SSD', 'C:\\Users\\takayanagi\\jjjkkk\\pytorch_SSD', 'c:\\opencv\\build\\python', 'C:\\Users\\takayanagi\\anaconda3\\envs\\jjjkkk\\python39.zip', 'C:\\Users\\takayanagi\\anaconda3\\envs\\jjjkkk\\DLLs', 'C:\\Users\\takayanagi\\anaconda3\\envs\\jjjkkk\\lib', 'C:\\Users\\takayanagi\\anaconda3\\envs\\jjjkkk', '', 'C:\\Users\\takayanagi\\AppData\\Roaming\\Python\\Python39\\site-packages', 'C:\\Users\\takayanagi\\anaconda3\\envs\\jjjkkk\\lib\\site-packages', 'C:\\Users\\takayanagi\\anaconda3\\envs\\jjjkkk\\lib\\site-packages\\win32', 'C:\\Users\\takayanagi\\anaconda3\\envs\\jjjkkk\\lib\\site-packages\\win32\\lib', 'C:\\Users\\takayanagi\\anaconda3\\envs\\jjjkkk\\lib\\site-packages\\Pythonwin']
