In [1]:
import math

import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import torchvision.models as tmdl

In [5]:
scales = [32, 64, 128]
ratios = [0.5, 1, 2]
anchor_stride = 1
feature_strides = [4, 8, 16, 32, 64]
feature_shapes = np.array([[int(math.ceil(640 / stride)), int(math.ceil(640 / stride))] 
                           for stride in feature_strides])

In [6]:
def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
    """
    scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
    ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
    shape: [height, width] spatial shape of the feature map over which
            to generate anchors.
    feature_stride: Stride of the feature map relative to the image in pixels.
    anchor_stride: Stride of anchors on the feature map. For example, if the
        value is 2 then generate anchors for every other feature map pixel.
    """
    ## Get all combinations of scales and ratios
    scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
    scales = scales.flatten()
    ratios = ratios.flatten()

    ## Enumerate heights and widths from scales and ratios
    heights = scales / np.sqrt(ratios)
    widths = scales * np.sqrt(ratios)

    ## Enumerate shifts in feature space
    shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
    shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
    shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)

    ## Enumerate combinations of shifts, widths, and heights
    box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
    box_heights, box_centers_y = np.meshgrid(heights, shifts_y)

    ## Reshape to get a list of (y, x) and a list of (h, w)
    box_centers = np.stack(
        [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
    box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])

    ## Convert to corner coordinates (y1, x1, y2, x2)
    boxes = np.concatenate([box_centers - 0.5 * box_sizes,
                            box_centers + 0.5 * box_sizes], axis=1)
    return boxes


In [7]:
def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
                             anchor_stride):
    """Generate anchors at different levels of a feature pyramid. Each scale
    is associated with a level of the pyramid, but each ratio is used in
    all levels of the pyramid.
    Returns:
    anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
        with the same order of the given scales. So, anchors of scale[0] come
        first, then anchors of scale[1], and so on.
    """
    ## Anchors
    ## [anchor_count, (y1, x1, y2, x2)]
    anchors = []
    for i in range(len(scales)):
        anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
                                        feature_strides[i], anchor_stride))
    anchors = np.concatenate(anchors, axis=0)
    return anchors


In [3]:
model = tmdl.resnet101(pretrained=False)

In [4]:
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [1]:
import numpy as np

In [5]:
x = np.load("resources/scene_ids_val.npy")

In [6]:
x.shape

(130,)

In [2]:
a = np.array([1,1,1,3,2,3,6,5,6,5])

In [4]:
np.unique(a, return_counts=True)

(array([1, 2, 3, 5, 6]), array([3, 1, 2, 2, 2]))

In [5]:
f = np.array([[[1,2],[3,4],[5,6]],[[7,8],[9,10],[11,12]]])

In [6]:
f.shape

(2, 3, 2)

In [12]:
256*256

65536

In [3]:
x = np.load("resources/annotation/planes.npy")

In [4]:
x.shape

(400, 3)

In [6]:
y = np.linalg.norm(x, axis=-1, keepdims=True)

In [7]:
y.shape

(400, 1)

In [8]:
y[:5,:]

array([[6.19848593],
       [1.61935664],
       [1.37142423],
       [6.39381482],
       [1.12139934]])

In [9]:
centers = np.concatenate([y, np.ones((y.shape[0],1))], axis=-1)
centers.shape

(400, 2)

In [10]:
centers[:5]

array([[6.19848593, 1.        ],
       [1.61935664, 1.        ],
       [1.37142423, 1.        ],
       [6.39381482, 1.        ],
       [1.12139934, 1.        ]])