In [1]:
import numpy as np
from sklearn import cluster
from collections import defaultdict

In [2]:
data = np.random.randn(1000, 3)
km = cluster.KMeans(n_clusters=3, )
km.fit(data)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=3, n_init=10, n_jobs=1, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)

In [3]:
import numpy as np


def iou(box, clusters):
    """
    Calculates the Intersection over Union (IoU) between a box and k clusters.
    :param box: tuple or array, shifted to the origin (i. e. width and height)
    :param clusters: numpy array of shape (k, 2) where k is the number of clusters
    :return: numpy array of shape (k, 0) where k is the number of clusters
    """
    x = np.minimum(clusters[:, 0], box[0])
    y = np.minimum(clusters[:, 1], box[1])
    if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
        print(x, y, box)
        raise ValueError("Box has no area")

    intersection = x * y
    box_area = box[0] * box[1]
    cluster_area = clusters[:, 0] * clusters[:, 1]

    iou_ = intersection / (box_area + cluster_area - intersection)

    return iou_


def avg_iou(boxes, clusters):
    """
    Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters.
    :param boxes: numpy array of shape (r, 2), where r is the number of rows
    :param clusters: numpy array of shape (k, 2) where k is the number of clusters
    :return: average IoU as a single float
    """
    return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])


def translate_boxes(boxes):
    """
    Translates all the boxes to the origin.
    :param boxes: numpy array of shape (r, 4)
    :return: numpy array of shape (r, 2)
    """
    new_boxes = boxes.copy()
    for row in range(new_boxes.shape[0]):
        new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0])
        new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1])
    return np.delete(new_boxes, [0, 1], axis=1)


def kmeans(boxes, k, dist=np.median):
    """
    Calculates k-means clustering with the Intersection over Union (IoU) metric.
    :param boxes: numpy array of shape (r, 2), where r is the number of rows
    :param k: number of clusters
    :param dist: distance function
    :return: numpy array of shape (k, 2)
    """
    rows = boxes.shape[0]

    distances = np.empty((rows, k))
    last_clusters = np.zeros((rows,))

#     np.random.seed()

    # the Forgy method will fail if the whole array contains the same rows
    clusters = boxes[np.random.choice(rows, k, replace=False)]
    while True:
        for row in range(rows):
            distances[row] = 1 - iou(boxes[row], clusters)

        nearest_clusters = np.argmin(distances, axis=1)

        if (last_clusters == nearest_clusters).all():
            break

        for cluster in range(k):
            clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)

        last_clusters = nearest_clusters

    return clusters

In [4]:
info = defaultdict(int)

with open('./label_file.txt') as f:
    lines = f.readlines()
    for lin in lines:
        k = lin.strip().split(':')[1]
        info[k] += 1
        
    lines = [lin.strip().split(':')[-1][1:-1] for lin in lines]      
    lines = [list(map(float, lin.strip().split(','))) for lin in lines]
    data = [[lin[2], lin[3]] for lin in lines]
    data = np.array(data)
    print(data.shape)
    data = data[data[:, 0] > 0]
    print(data.shape)

(120667, 2)
(120663, 2)


In [38]:
out = kmeans(data, k=3)

print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))
print("Boxes:\n {}".format(out))

ratios = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
print("Ratios:\n {}".format(sorted(ratios)))

Accuracy: 54.63%
Boxes:
 [[0.14983444 0.10099338]
 [0.01986755 0.02400662]
 [0.04304636 0.04895833]]
Ratios:
 [0.83, 0.88, 1.48]


In [7]:
area = out[:, 0] * out[:, 1]
bbox = out[np.argsort(area)]
print(bbox)

[[0.01076159 0.01821192]
 [0.02152318 0.01986755]
 [0.01821192 0.03642384]
 [0.03145695 0.02649007]
 [0.0397351  0.0397351 ]
 [0.0281457  0.06291391]
 [0.07119205 0.034375  ]
 [0.06291391 0.05794702]
 [0.04801325 0.09602649]
 [0.17466887 0.05629139]
 [0.10099338 0.10761589]
 [0.25165563 0.14321192]]


In [8]:
(bbox) * (416)

array([[  4.47682119,   7.57615894],
       [  8.95364238,   8.26490066],
       [  7.57615894,  15.15231788],
       [ 13.08609272,  11.01986755],
       [ 16.52980132,  16.52980132],
       [ 11.70860927,  26.17218543],
       [ 29.61589404,  14.3       ],
       [ 26.17218543,  24.10596026],
       [ 19.97350993,  39.94701987],
       [ 72.66225166,  23.41721854],
       [ 42.01324503,  44.76821192],
       [104.68874172,  59.57615894]])

In [190]:
info

defaultdict(int,
            {'car': 19598,
             'cycle': 689,
             'Left_p': 6075,
             'Junc_p': 11939,
             'car_head': 6041,
             'car_wheel': 11208,
             'car_rear': 2319,
             'Right_p': 6044,
             'parking_spot': 5998,
             'cone': 16907,
             'pillar': 17608,
             'person': 7874,
             'car_sanlun': 12,
             'June_p': 85,
             'cycle_sanlun': 77,
             'left_p': 135,
             'car_whee1': 18,
             'forbid_brand': 5610,
             'annimal': 1,
             'Lift_p': 2,
             'parking_pot': 1573,
             'parkinng_lot': 130,
             'parking_lot': 234,
             'forbid_brand_star': 93,
             'cone_star': 81,
             'car_star': 124,
             'person_star': 99,
             'parking_spot_star': 1,
             'parking_sopt': 88,
             'car_ground_point': 2,
             'car_ground_line': 2})

In [206]:
classes = ['Junc_p','Left_p','Right_p','car','cone','cycle','person','pillar','forbid_brand','parking_spot']
 
nums = np.array([info[k] for k in classes])

weights = 1 - nums / nums.sum()

print(weights)

[0.87859714 0.93822578 0.93854101 0.80071587 0.82807956 0.99299384
 0.91993248 0.82095137 0.94295418 0.93900877]


In [207]:
nums

array([11939,  6075,  6044, 19598, 16907,   689,  7874, 17608,  5610,
        5998])

In [208]:
{k:info[k] for k in classes}

{'Junc_p': 11939,
 'Left_p': 6075,
 'Right_p': 6044,
 'car': 19598,
 'cone': 16907,
 'cycle': 689,
 'person': 7874,
 'pillar': 17608,
 'forbid_brand': 5610,
 'parking_spot': 5998}

In [209]:
ANCHORS = [[(1.25, 1.625), (2.0, 3.75), (4.125, 2.875)],            # Anchors for small obj
           [(1.875, 3.8125), (3.875, 2.8125), (3.6875, 7.4375)],    # Anchors for medium obj
           [(3.625, 2.8125), (4.875, 6.1875), (11.65625, 10.1875)]] # Anchors for big obj

In [211]:
np.array(ANCHORS) * 32

array([[[ 40.,  52.],
        [ 64., 120.],
        [132.,  92.]],

       [[ 60., 122.],
        [124.,  90.],
        [118., 238.]],

       [[116.,  90.],
        [156., 198.],
        [373., 326.]]])

In [226]:
np.array([[0.00993377, 0.01490066],
 [0.01490066, 0.02980132],
 [0.02649007, 0.02291667],
 [0.02483444, 0.05298013],
 [0.04139073, 0.03476821],
 [0.04966887, 0.06622517],
 [0.14072848, 0.05215232],
 [0.08774834, 0.10596026],
 [0.23509934, 0.13576159]]).reshape(3, 3, 2) * 320

array([[[ 3.1788064,  4.7682112],
        [ 4.7682112,  9.5364224],
        [ 8.4768224,  7.3333344]],

       [[ 7.9470208, 16.9536416],
        [13.2450336, 11.1258272],
        [15.8940384, 21.1920544]],

       [[45.0331136, 16.6887424],
        [28.0794688, 33.9072832],
        [75.2317888, 43.4437088]]])

In [227]:
anchors = np.array([[[116,90], [156,198], [373,326]],
                    [[30,61], [62,45], [59,119]],  
                    [[10,13], [16,30], [33,23]]], dtype=np.float32) / 2

In [235]:
1208 / 32

37.75

In [487]:
import imp
import ops_dcn
import torch
imp.reload(ops_dcn)

data = torch.rand(1, 3, 16, 16)
dcn = ops_dcn.DeformConv2d(3, 10, kernel_size=3, padding=1, stride=2)

print(dcn(data).shape)

m = nn.Conv2d(3, 10, 3, padding=1, stride=2)
m(data).shape

offset.shape： torch.Size([1, 18, 8, 8])
16
p.shape:  torch.Size([1, 18, 8, 8])
torch.Size([1, 8, 8, 9])
torch.Size([1, 8, 8, 9])
torch.Size([1, 8, 8, 9])
torch.Size([1, 8, 8, 9])
torch.Size([1, 3, 8, 8, 9]) --
tensor([0.0000, 0.0000, 0.0000, 0.0837, 0.6630, 0.7257, 0.0000, 0.5391],
       grad_fn=<SliceBackward>)
tensor([0., 0., 0., 0., 0., 0., 0., 0.], grad_fn=<SliceBackward>)
torch.Size([1, 10, 8, 8])


torch.Size([1, 10, 8, 8])

In [551]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Deformable_Conv2d(nn.Module):
    
    def __init__(self, inc, outc, kernel_size=3, padding=1, stride=1, bias=None, modulation=False):
        super(Deformable_Conv2d, self).__init__()
        
        self.kernel_size = kernel_size
        self.padding = padding
        self.stride = stride
        self.N = kernel_size * kernel_size
        self.max_fea_dim = 100
        
        self.zero_padding = nn.ZeroPad2d(padding)
        self.p_conv = nn.Conv2d(inc, 2 * self.N, kernel_size=kernel_size, padding=padding, stride=stride)
        nn.init.zeros_(self.p_conv.weight)
        
        self.modulation = modulation
        if modulation:
            self.m_conv = nn.Conv2d(inc, self.N, kernel_size=kernel_size, padding=padding, stride=stride)
            nn.init.constant_(self.m_conv.weight, 0.5)
        
        self.conv = nn.Conv2d(inc, outc, kernel_size=kernel_size, stride=kernel_size, bias=bias)

        
        self.p = self.get_p() # n h w 2N
    
    
    def forward(self, x):
        '''
        '''
        offset = self.p_conv(x)
        
        if self.modulation:
            m = torch.sigmoid(self.m_conv(x))
        
        x = self.zero_padding(x)
        
        p = offset + self.p[:, :, :offset.size(2), :offset.size(3)].to(device=x.device, dtype=x.dtype)
        p = p.permute(0, 2, 3, 1) # n, h, w, 2N
        
        p_lt = p.detach().floor()
        p_rb = p_lt + 1.
        
        p_lt[..., :self.N].clamp_(0, x.size(3)-1); p_lt[..., self.N:].clamp_(0, x.size(2)-1)
        p_rb[..., :self.N].clamp_(0, x.size(3)-1); p_rb[..., self.N:].clamp_(0, x.size(2)-1)
        p_lb = torch.cat((p_lt[..., :self.N], p_rb[..., self.N:]), dim=-1)
        p_rt = torch.cat((p_rb[..., :self.N], p_lt[..., self.N:]), dim=-1)
        
        p[..., :self.N].clamp_(0, x.size(3)-1); p[..., self.N:].clamp_(0, x.size(2)-1)
        
        # n 1 h w N
        g_lt = self.compute_bilinear_g(p_lt, p).unsqueeze(1)
        g_rb = self.compute_bilinear_g(p_rb, p).unsqueeze(1)
        g_lb = self.compute_bilinear_g(p_lb, p).unsqueeze(1)
        g_rt = self.compute_bilinear_g(p_rt, p).unsqueeze(1)
        
        # n c h w N
        x_q_lt = self.get_x_q(x, p_lt)
        x_q_rb = self.get_x_q(x, p_rb)
        x_q_lb = self.get_x_q(x, p_lb)
        x_q_rt = self.get_x_q(x, p_rt)
        
        # n c h w N
        x_offset = g_lt * x_q_lt + g_rb * x_q_rb + g_lb * x_q_lb + g_rt * x_q_rt
        
        if self.modulation:
            m = m.permute(0, 2, 3, 1).unsqueeze(1)
            x_offset *= m
        
        n, c, h, w, N = x_offset.size()
        x_offset = x_offset.view(n, c, h, w, self.kernel_size, self.kernel_size).permute(0, 1, 2, 4, 3, 5).contiguous()
        out = self.conv(x_offset.view(n, c, h * self.kernel_size, w * self.kernel_size))
    
        return out
        
        
    def compute_bilinear_g(self, q, p):
        '''
        '''
        gx = torch.clamp(1 - torch.abs(p[..., :self.N] - q[..., :self.N]), min=0)
        gy = torch.clamp(1 - torch.abs(p[..., self.N:] - q[..., self.N:]), min=0)
        return gx * gy
    
    def get_x_q(self, x, q):
        '''
        '''
        n, h, w, _ = q.size()
        _, c, _, x_w = x.size()
        x = x.view(n, c, -1)
        
        index = q[..., :self.N] * x_w + q[..., self.N:]
        index = index.unsqueeze(1).expand(-1, c, -1, -1, -1).view(n, c, -1)
        x_offset = x.gather(dim=-1, index=index.long()).view(n, c, h, w, self.N)
        
        return x_offset
    
    def get_p(self, ):
        '''
        '''
        # kernel scope R
        p_n_x, p_n_y = torch.meshgrid(torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2 + 1), 
                                      torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2 + 1))
        p_n = torch.cat((p_n_x.flatten(), p_n_y.flatten()), dim=0).view(1, 2 * self.N, 1, 1)
        
        # center location 
        p_o_x, p_o_y = torch.meshgrid(torch.arange(1, self.max_fea_dim * self.stride + 1, self.stride), 
                                      torch.arange(1, self.max_fea_dim * self.stride + 1, self.stride))
        p_o_x = p_o_x.flatten().view(1, 1, self.max_fea_dim, self.max_fea_dim).expand(1, self.N, -1, -1)
        p_o_y = p_o_y.flatten().view(1, 1, self.max_fea_dim, self.max_fea_dim).expand(1, self.N, -1, -1)
        p_o = torch.cat((p_o_x, p_o_y), dim=1)
        
        return p_n + p_o

In [552]:
data = torch.rand(1, 3, 20, 20)#.to(torch.device('cuda:0'))
deform_conv = Deformable_Conv2d(3, 3, kernel_size=3, stride=1, padding=1, modulation=True)#.to(torch.device('cuda:0'))
conv = nn.Conv2d(3, 3, kernel_size=3, stride=1, padding=1)#.to(torch.device('cuda:0'))

print(deform_conv(data).shape)
print(conv(data).shape)

torch.Size([1, 3, 20, 20])
torch.Size([1, 3, 20, 20])


In [555]:
%timeit deform_conv(data)

39.9 ms ± 8.75 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [546]:
%timeit conv(data)

14.5 ms ± 3.1 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [338]:
print(deform_conv)
for n, m in deform_conv.named_modules():
    print(n, )

Deformable_Conv2d(
  (zero_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
  (p_conv): Conv2d(1, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (m_conv): Conv2d(1, 9, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)

zero_padding
p_conv
m_conv


In [10]:
a = np.random.randn(3, 4)

In [16]:
a[:, 2:].prod(axis=1)

array([ 0.11984671,  0.01168084, -0.75272534])

In [37]:
np.log2(0.1) + 5

1.6780719051126378

In [32]:
np.round(4.8) 

5.0