In [None]:
!git push origin master

In [None]:
import os
import numpy as np
from chainer import Chain
import chainer.links as L
import glob
import chainer
from chainer import function
from PIL import Image

In [None]:
vgg = L.VGG16Layers()
convert_image = chainer.links.model.vision.vgg.prepare

In [None]:
def cos_sim_matrix(matrix):
  d = matrix @ matrix.T
  norm = (matrix * matrix).sum(axis=1, keepdims=True) ** .5
  return d / norm /norm.T

In [None]:
def read_image_as_array(path, dtype=np.float32):
  f = Image.open(path)
  image = np.asarray(f, dtype=dtype)
  return convert_image(image)

def read_image_as_array_croped(path, dtype=np.float32):
  img = utils.read_image(path, color=True)
  bboxes, labels, scores = model.predict([img])
  #vis_bbox(img, bboxes[0], labels[0], scores[0], label_names=voc_bbox_label_names)
  im = Image.open(path)
  for bbox, label in zip(bboxes, labels):
    bbox = bbox[0][[1,0,3,2]]
    crop_img = im.crop(bbox)
    return convert_image(np.asarray(crop_img, dtype=dtype))
  
data_path = glob.glob("./data/*")
data = np.asarray([read_image_as_array_croped(path) for path in data_path])

In [None]:
with function.no_backprop_mode():
  h = vgg(data, layers=['fc6'])

In [None]:
matrix = h['fc6'].data

In [None]:
def cos_sim(v1, v2):
    return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))

In [None]:
for i, j in zip(data_path, matrix):
  print(i)
  print(cos_sim(matrix[6], j))

In [None]:
%matplotlib inline
import chainercv
from chainercv import utils
from chainercv.datasets import voc_bbox_label_names
from chainercv.visualizations import vis_bbox
import matplotlib.pyplot as plt

model = chainercv.links.SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='voc0712')

In [None]:
for path in data_path:
  img = utils.read_image(path, color=True)
  bboxes, labels, scores = model.predict([img])
  #vis_bbox(img, bboxes[0], labels[0], scores[0], label_names=voc_bbox_label_names)
  im = Image.open(path)
  for bbox, label in zip(bboxes, labels):
    bbox = bbox[0][[1,0,3,2]]
    crop_img = im.crop(bbox)
    plt.imshow(np.array(crop_img))
    plt.show()

In [None]:
import os
import argparse
import copy
import warnings
import json
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plt
import chainer
from chainer.datasets import TransformDataset
from chainer.optimizer import WeightDecay
from chainer import serializers
from chainer import training
from chainer.training import extensions
from chainer.training import triggers
from chainer.links.model.vision import resnet

import chainercv
from chainercv.extensions import DetectionVOCEvaluator
from chainercv.links.model.ssd import GradientScaling
from chainercv.links.model.ssd import multibox_loss
from chainercv import transforms

from chainercv.links.model.ssd import random_crop_with_bbox_constraints
from chainercv.links.model.ssd import random_distort
from chainercv.links.model.ssd import resize_with_random_interpolation


from chainercv.links import SSD300
from chainercv.links import SSD512
from chainercv.utils import read_image

In [None]:
class MultiboxTrainChain(chainer.Chain):

    def __init__(self, model, alpha=1, k=3):
        super(MultiboxTrainChain, self).__init__()
        with self.init_scope():
            self.model = model
        self.alpha = alpha
        self.k = k

    def __call__(self, imgs, gt_mb_locs, gt_mb_labels):
        mb_locs, mb_confs = self.model(imgs)
        loc_loss, conf_loss = multibox_loss(
            mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, self.k)
        loss = loc_loss * self.alpha + conf_loss

        chainer.reporter.report(
            {'loss': loss, 'loss/loc': loc_loss, 'loss/conf': conf_loss},
            self)

        return loss

In [None]:
class Transform(object):

    def __init__(self, coder, size, mean):
        # to send cpu, make a copy
        self.coder = copy.copy(coder)
        self.coder.to_cpu()

        self.size = size
        self.mean = mean

    def __call__(self, in_data):
        # There are five data augmentation steps
        # 1. Color augmentation
        # 2. Random expansion
        # 3. Random cropping
        # 4. Resizing with random interpolation
        # 5. Random horizontal flipping

        img, bbox, label = in_data

        # 1. Color augmentation
        img = random_distort(img)

        # 2. Random expansion
        if np.random.randint(2):
            img, param = transforms.random_expand(
                img, fill=self.mean, return_param=True)
            bbox = transforms.translate_bbox(
                bbox, y_offset=param['y_offset'], x_offset=param['x_offset'])

        # 3. Random cropping
        img, param = random_crop_with_bbox_constraints(
            img, bbox, return_param=True)
        bbox, param = transforms.crop_bbox(
            bbox, y_slice=param['y_slice'], x_slice=param['x_slice'],
            allow_outside_center=False, return_param=True)
        label = label[param['index']]

        # 4. Resizing with random interpolatation
        _, H, W = img.shape
        img = resize_with_random_interpolation(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        # 5. Random horizontal flipping
        img, params = transforms.random_flip(
            img, x_random=True, return_param=True)
        bbox = transforms.flip_bbox(
            bbox, (self.size, self.size), x_flip=params['x_flip'])

        # Preparation for SSD network
        img -= self.mean
        mb_loc, mb_label = self.coder.encode(bbox, label)

        return img, mb_loc, mb_label

In [None]:
import chainer 
from pathlib import Path

label_names = ('tops', 'bottoms')

class BBoxDataset(chainer.dataset.DatasetMixin):
  def __init__(self, data_dir='data', split='train'):
    id_list_file = os.path.join(
      data_dir, 'ImageSets/{0}.txt'.format(split))
    self.ids = [id_.strip() for id_ in open(id_list_file)]
    self.data_dir = data_dir
    
  def __len__(self):
    return len(self.ids)
  
  def get_example(self, i):
    id_ = self.ids[i]
    
    json_path = Path('data', 'Annotations', 'annotation.json')
    json_loaded = json.load(json_path.open('r'))
    
    jpg_path = Path('data', 'images', id_ + '.jpg')
    jpg_size = jpg_path.stat().st_size
    anno = json_loaded[id_ + '.jpg' + str(jpg_size)]

    bbox = []
    label = []
    for obj in anno['regions']:
      bndbox_anno = obj['shape_attributes']
      bbox.append([
        bndbox_anno['y'],
        bndbox_anno['x'],
        bndbox_anno['y'] + bndbox_anno['height'],
        bndbox_anno['x'] + bndbox_anno['width']])
      name = obj['region_attributes']['type'].lower().strip()
      label.append(label_names.index(name))
    bbox = np.stack(bbox).astype(np.float32)
    label = np.stack(label).astype(np.int32)
    img = read_image(jpg_path.as_posix(), color=True)
    return img, bbox, label

In [None]:
gpu = 0

def main():
  model = SSD300(n_fg_class=len(label_names),
    pretrained_model='via_model')  
  model.use_preset('evaluate')
  train_chain = MultiboxTrainChain(model)
  if gpu >= 0:
    chainer.cuda.get_device_from_id(gpu).use()
    model.to_gpu()

  train = TransformDataset(
    BBoxDataset(split='train'),
    Transform(model.coder, model.insize, model.mean))
  train_iter = chainer.iterators.SerialIterator(train, 10)

  test = BBoxDataset(split='test')
  test_iter = chainer.iterators.SerialIterator(
  test, 2, repeat=False, shuffle=False)

  optimizer = chainer.optimizers.MomentumSGD(lr=0.0001)
  optimizer.setup(train_chain)
  for param in train_chain.params():
    if param.name == 'b':
      param.update_rule.add_hook(GradientScaling(2))
    else:
      param.update_rule.add_hook(WeightDecay(0.0005))

  updater = training.updaters.StandardUpdater(train_iter, optimizer, device=gpu)
  trainer = training.Trainer(updater, (1000, 'iteration'), 'result')
  trainer.extend(
    DetectionVOCEvaluator(
    test_iter, model, use_07_metric=True,
    label_names=label_names),
    trigger=(10, 'iteration'))

  log_interval = 10, 'iteration'
  trainer.extend(extensions.LogReport(trigger=log_interval))
  trainer.extend(extensions.observe_lr(), trigger=log_interval)
  trainer.extend(extensions.PrintReport(
    ['epoch', 'iteration', 'lr',
    'main/loss', 'main/loss/loc', 'main/loss/conf',
    'validation/main/map']),
    trigger=log_interval)
  trainer.extend(extensions.ProgressBar(update_interval=10))
  trainer.run()
  serializers.save_npz('via_model', model)

In [None]:
main()

In [None]:
hoge = BBoxDataset(split='test')

In [None]:
for i in hoge[0]:
  print(i.shape)

In [None]:
plt.imshow(np.asarray(hoge[1][0]).transpose(1,2,0) / 255)
plt.show()

In [None]:
print(hoge[0][0].shape)

In [None]:
model = SSD300(n_fg_class=len(label_names),
    pretrained_model='via_model')
bboxes, labels, scores = model.predict([hoge[1][0]])

In [None]:
print(bboxes)

In [None]:
print(len(labels[0]))

In [None]:
print(scores)

In [None]:
from chainercv.visualizations import vis_bbox
vis_bbox(hoge[1][0], bboxes[0], labels[0], scores[0], label_names=label_names)

In [None]:
import glob
import chainercv
from chainercv.visualizations import vis_bbox
from chainercv import utils
from PIL import Image
data_path = glob.glob("./data/images/*")
print(data_path)

In [None]:
for path in data_path[-10:]:
  img = utils.read_image(path, color=True)
  bboxes, labels, scores = model.predict([img])
  #vis_bbox(img, bboxes[0], labels[0], scores[0], label_names=label_names)
  im = Image.open(path)
  for bbox, label in zip(bboxes, labels):
    for b, l in zip(bbox, label):
      print(l)
      b = b[[1,0,3,2]]
      crop_img = im.crop(b)
      plt.imshow(np.array(crop_img))
      plt.show()

In [None]:
with open("data/Annotations/annotation.json", mode="r") as f:
  json_list = json.load(f)
for item in json_list.items():
  if item[1]['regions']:
    print(item[0].split(".")[0])

In [None]:
from annoy import AnnoyIndex

annoy_model = AnnoyIndex(4096)
with function.no_backprop_mode():
  for i, path in enumerate(data_path):
    img = utils.read_image(path, color=True)
    bboxes, labels, scores = model.predict([img])
    #vis_bbox(img, bboxes[0], labels[0], scores[0], label_names=label_names)
    im = Image.open(path)
    for bbox, label in zip(bboxes, labels):
      for b, l in zip(bbox, label):
        if l==0:
          b = b[[1,0,3,2]]
          crop_img = im.crop(b)
          h = vgg(convert_image(np.asarray(crop_img, dtype=np.float32))[np.newaxis,:,:,:], layers=['fc6'])
          annoy_model.add_item(i, h['fc6'][0].data)
annoy_model.build(1000)

In [None]:
fig = plt.figure(figsize=(15, 15))
query_img = utils.read_image("data/test/yamatoshi.jpg", color=True)
im = Image.open("data/test/yamatoshi.jpg")
bboxes, labels, scores = model.predict([query_img])
with function.no_backprop_mode():
  for bbox, label in zip(bboxes, labels):
    for b, l in zip(bbox, label):
      if l==1:
        b = b[[1,0,3,2]]
        crop_img = im.crop(b)
        ax = fig.add_subplot(1, 6, 1)
        ax.imshow(np.array(crop_img))
        h = vgg(convert_image(np.asarray(crop_img, dtype=np.float32))[np.newaxis,:,:,:], layers=['fc6'])
        predict_indexes = annoy_model.get_nns_by_vector(h['fc6'][0].data, 5, search_k=-1)
        print(predict_indexes)
        
for idx, predict_index in enumerate(predict_indexes):
  neibor_image = data_path[predict_index]
  print(neibor_image)
  img = utils.read_image(neibor_image, color=True)
  bboxes, labels, scores = model.predict([img])
  im = Image.open(neibor_image)
  for bbox, label in zip(bboxes, labels):
    for b, l in zip(bbox, label):
      if l==1:
        b = b[[1,0,3,2]]
        crop_img = im.crop(b)
        ax = fig.add_subplot(1, 6, idx+2)
        ax.imshow(np.array(crop_img))
        plt.axis("off")