In [1]:
using BenchmarkTools
using Knet

In [2]:
@doc randn
dummy = KnetArray(Float32.(randn((512,512,3,1))))

dummy

In [3]:

struct Conv; w; b; f; p; end
(c::Conv)(x) = c.f.(pool(conv4(c.w, dropout(x,c.p)) .+ c.b))
Conv(w1::Int,w2::Int,cx::Int,cy::Int,f=relu;pdrop=0) = Conv(param(w1,w2,cx,cy), param0(1,1,cy,1), f, pdrop)

Conv

In [4]:
# Define a convolutional layer:
struct BasicStem; w; b; f; p; end
(bs::BasicStem)(x) = bs.f.(pool(conv4(bs.w, dropout(x,bs.p),stride=2,padding=3) .+ bs.b,window=2))
BasicStem(w1::Int,w2::Int,cx::Int,cy::Int,f=relu;pdrop=0) = BasicStem(param(w1,w2,cx,cy), param0(1,1,cy,1), f, pdrop)


BasicStem

In [5]:
@doc conv4

```
conv4(w, x; kwargs...)
```

Execute convolutions or cross-correlations using filters specified with `w` over tensor `x`.

If `w` has dimensions `(W1,W2,...,Cx,Cy)` and `x` has dimensions `(X1,X2,...,Cx,N)`, the result `y` will have dimensions `(Y1,Y2,...,Cy,N)` where `Cx` is the number of input channels, `Cy` is the number of output channels, `N` is the number of instances, and `Wi,Xi,Yi` are spatial dimensions with `Yi` determined by:

```
Yi = 1 + floor((Xi + 2*padding[i] - ((Wi-1)*dilation[i] + 1)) / stride[i])
```

`padding`, `stride` and `dilation` are keyword arguments that can be specified as a single number (in which case they apply to all dimensions), or an array/tuple with entries for each spatial dimension.

# Keywords

  * `padding=0`: the number of extra zeros implicitly concatenated at the start and end of each dimension.
  * `stride=1`: the number of elements to slide to reach the next filtering window.
  * `dilation=1`: dilation factor for each dimension.
  * `mode=0`: 0 for convolution and 1 for cross-correlation (which flips the filter).
  * `alpha=1`: can be used to scale the result.
  * `group=1`: can be used to perform grouped convolutions.


In [6]:
# Define a convolutional layer:
struct BottleneckBlock_b; w0; w1; w2; w3; p; end
(this::BottleneckBlock_b)(x) = relu.(
    conv4(this.w3, dropout(
            relu.(conv4(this.w2, dropout(
                    relu.(conv4(this.w1, dropout(x,this.p)))
                    ,this.p),
                    padding=1)
                )
            ,this.p)) .+
    conv4(this.w0, dropout(x,this.p))
)
BottleneckBlock_b(cx::Int,cy::Int;pdrop=0) =
    BottleneckBlock_b(
        param(1,1,cx,cy), #w0
        param(1,1,cx,cx), #w1
        param(3,3,cx,cx), #w2
        param(1,1,cx,cy), #w3
        pdrop
    )


BottleneckBlock_b

In [7]:
@doc pool

```
pool(x; kwargs...)
```

Compute pooling of input values (i.e., the maximum or average of several adjacent values) to produce an output with smaller height and/or width.

If `x` has dimensions `(X1,X2,...,Cx,N)`, the result `y` will have dimensions `(Y1,Y2,...,Cx,N)` where

```
Yi=1+floor((Xi+2*padding[i]-window[i])/stride[i])
```

Here `Cx` is the number of input channels, `N` is the number of instances, and `Xi,Yi` are spatial dimensions.  `window`, `padding` and `stride` are keyword arguments that can be specified as a single number (in which case they apply to all dimensions), or an array/tuple with entries for each spatial dimension.

# Keywords:

  * `window=2`: the pooling window size for each dimension.
  * `padding=0`: the number of extra zeros implicitly concatenated at the start and at the end of each dimension.
  * `stride=window`: the number of elements to slide to reach the next pooling window.
  * `mode=0`: 0 for max, 1 for average including padded values, 2 for average excluding padded values, 3 for deterministic max.
  * `maxpoolingNanOpt=1`: Nan numbers are not propagated if 0, they are propagated if 1.
  * `alpha=1`: can be used to scale the result.


In [8]:
dummybb_b = BottleneckBlock_b(3,3)

BottleneckBlock_b(P(KnetArray{Float32,4}(1,1,3,3)), P(KnetArray{Float32,4}(1,1,3,3)), P(KnetArray{Float32,4}(3,3,3,3)), P(KnetArray{Float32,4}(1,1,3,3)), 0)

In [9]:
@time dummybb_b(dummy)

  9.445986 seconds (18.23 M allocations: 923.061 MiB, 6.87% gc time)


512×512×3×1 KnetArray{Float32,4}:
[:, :, 1, 1] =
 1.86114   0.0       0.542849  1.62628     …  0.955678   0.965414   0.972152
 0.0       0.0       2.5603    0.0            0.0        2.89676    0.954212
 0.0       0.0       0.0       0.0            0.786457   0.969218   0.301087
 0.0       0.0       0.760335  0.0            0.41926    0.0        0.267461
 0.0       0.0       0.0       1.58539        0.0        2.25346    0.504671
 0.0       0.0       0.0       0.0         …  0.0        0.0        0.0
 1.1129    0.0       0.0       0.369513       0.0        1.41014    0.0
 0.0       0.0       1.82711   0.0            0.0        0.982605   0.148999
 0.37624   0.604688  0.745195  0.0348488      0.506591   0.0        0.564787
 1.43722   1.60575   0.0       0.432641       0.0        2.05851    0.127971
 1.62786   0.0       0.448428  2.50721     …  0.0        0.0299819  0.641027
 0.0       0.0       0.0       0.0            0.0        0.388475   0.0
 0.0       1.09188   0.0       0.0        

In [10]:
# Define a convolutional layer:
struct BottleneckBlock_a; w1; w2; w3; p; end
(this::BottleneckBlock_a)(x) = relu.(
    conv4(this.w3, dropout(
            relu.(conv4(this.w2, dropout(
                    relu.(conv4(this.w1, dropout(x,this.p)))
                    ,this.p),
                    padding=1))
            ,this.p)) .+
    x
)
BottleneckBlock_a(cx::Int,cy::Int;pdrop=0) =
    BottleneckBlock_a(
        param(1,1,cx,cy), #w1
        param(3,3,cy,cy), #w2
        param(1,1,cy,cx), #w3
        pdrop
    )


BottleneckBlock_a

In [11]:
dummybb_a = BottleneckBlock_a(3,1)

BottleneckBlock_a(P(KnetArray{Float32,4}(1,1,3,1)), P(KnetArray{Float32,4}(3,3,1,1)), P(KnetArray{Float32,4}(1,1,1,3)), 0)

In [12]:
@time dummybb_a(dummy)

  0.027866 seconds (7.93 k allocations: 435.831 KiB)


512×512×3×1 KnetArray{Float32,4}:
[:, :, 1, 1] =
 0.0       0.508761  0.0        0.638269  …  0.0        0.0       0.0
 0.0       0.0       0.464915   0.0          0.0        1.08543   0.445862
 1.40298   0.0       0.63193    0.208365     0.0        0.359107  1.12401
 0.0       1.09607   0.185778   0.212255     0.0        1.38321   0.9353
 0.255622  0.0       0.155535   0.463058     0.0        0.0       1.40997
 0.0       0.678276  0.0551335  1.05908   …  2.2331     0.846802  0.0
 0.0       0.0       0.0        0.182355     0.0        0.699376  0.0276002
 0.0       0.0       0.23589    0.0          0.0        0.0       1.25623
 1.20527   0.0       0.447696   0.431691     0.0        1.46283   0.0
 0.0       0.204454  0.0        1.62528      0.0        1.36814   0.318914
 0.189558  0.0       0.0        1.58516   …  0.0        0.0       0.54103
 0.0       1.69819   0.0        1.80811      1.04505    1.22719   0.446802
 1.43787   1.59612   0.0150481  0.0          0.0        0.0       0.083

In [13]:
# Define a convolutional layer:
struct BottleneckBlock_c; w0; w1; w2; w3; p; end
(this::BottleneckBlock_c)(x) = relu.(
    conv4(this.w3, dropout(
            relu.(conv4(this.w2, dropout(
                    relu.(conv4(this.w1, dropout(x,this.p),stride=2))
                    ,this.p),
                    padding=1)
                )
            ,this.p)) .+
    conv4(this.w0, dropout(x,this.p),stride=2)
)
BottleneckBlock_c(cx::Int,cy::Int,ci::Int;pdrop=0) =
    BottleneckBlock_c(
        param(1,1,cx,cy), #w0
        param(1,1,cx,ci), #w1
        param(3,3,ci,ci), #w2
        param(1,1,ci,cy), #w3
        pdrop
    )


BottleneckBlock_c

In [14]:
dummybb_c = BottleneckBlock_c(3,1,2)

BottleneckBlock_c(P(KnetArray{Float32,4}(1,1,3,1)), P(KnetArray{Float32,4}(1,1,3,2)), P(KnetArray{Float32,4}(3,3,2,2)), P(KnetArray{Float32,4}(1,1,2,1)), 0)

In [15]:
@time dummybb_c(dummy)

  0.226677 seconds (582.74 k allocations: 31.023 MiB, 7.60% gc time)


256×256×1×1 KnetArray{Float32,4}:
[:, :, 1, 1] =
 0.0       0.259841   0.0        1.19015    …  0.337331  0.0        0.996897
 1.23436   1.15098    0.149838   0.0           0.888061  0.0        0.248614
 1.236     0.158171   0.0        0.395779      0.0       3.04839    0.0
 0.722186  1.64074    0.90629    0.0           0.0       1.52061    0.0
 1.41872   0.391236   0.0        0.898133      1.37035   0.0        0.793682
 0.569994  0.0        1.8065     0.0462089  …  0.0       0.0        0.0
 1.15272   0.0        1.22494    1.71573       0.455934  0.0        1.13586
 0.0       1.45388    0.719307   1.16118       0.956943  0.797954   0.880557
 0.0       0.0        2.05572    0.0           1.44114   0.535114   0.0
 1.30601   0.0        0.626656   0.37887       0.9283    2.76353    0.0
 0.870601  0.0        0.0        0.741804   …  0.448237  0.0699344  0.0
 0.0       0.889106   0.0164227  0.938426      1.15358   0.0196386  1.50882
 1.08644   0.0        0.967996   0.2096        1.2358    0.

In [16]:
struct Chain
    layers
    Chain(layers...) = new(layers)
end
(c::Chain)(x) = (for l in c.layers; x = l(x); end; x)
(c::Chain)(x,y) = sse(c(x),y)

In [17]:
backbone = Chain(
    BasicStem(7,7,3,64),
    BottleneckBlock_b(64,256),
    BottleneckBlock_a(256,64),
    BottleneckBlock_c(256,512,128),
    BottleneckBlock_a(512,128),
    BottleneckBlock_a(512,128),
    BottleneckBlock_a(512,128),
    BottleneckBlock_c(512,1024,256),
    BottleneckBlock_a(1024,256),
    BottleneckBlock_a(1024,256),
    BottleneckBlock_a(1024,256),
    BottleneckBlock_a(1024,256),
    BottleneckBlock_c(1024,2048,512),
    BottleneckBlock_a(2048,4096),
    BottleneckBlock_a(2048,4096),
)

Chain((BasicStem(P(KnetArray{Float32,4}(7,7,3,64)), P(KnetArray{Float32,4}(1,1,64,1)), Knet.Ops20.relu, 0), BottleneckBlock_b(P(KnetArray{Float32,4}(1,1,64,256)), P(KnetArray{Float32,4}(1,1,64,64)), P(KnetArray{Float32,4}(3,3,64,64)), P(KnetArray{Float32,4}(1,1,64,256)), 0), BottleneckBlock_a(P(KnetArray{Float32,4}(1,1,256,64)), P(KnetArray{Float32,4}(3,3,64,64)), P(KnetArray{Float32,4}(1,1,64,256)), 0), BottleneckBlock_c(P(KnetArray{Float32,4}(1,1,256,512)), P(KnetArray{Float32,4}(1,1,256,128)), P(KnetArray{Float32,4}(3,3,128,128)), P(KnetArray{Float32,4}(1,1,128,512)), 0), BottleneckBlock_a(P(KnetArray{Float32,4}(1,1,512,128)), P(KnetArray{Float32,4}(3,3,128,128)), P(KnetArray{Float32,4}(1,1,128,512)), 0), BottleneckBlock_a(P(KnetArray{Float32,4}(1,1,512,128)), P(KnetArray{Float32,4}(3,3,128,128)), P(KnetArray{Float32,4}(1,1,128,512)), 0), BottleneckBlock_a(P(KnetArray{Float32,4}(1,1,512,128)), P(KnetArray{Float32,4}(3,3,128,128)), P(KnetArray{Float32,4}(1,1,128,512)), 0), Bottleneck

In [18]:
@time backbone(dummy)

  1.289268 seconds (1.55 M allocations: 81.046 MiB)


16×16×2048×1 KnetArray{Float32,4}:
[:, :, 1, 1] =
 0.0        0.0019023  0.00983799  …  0.0        0.0         0.0142959
 0.0258378  0.0254935  0.0192449      0.0193629  0.0196242   0.0149422
 0.0315548  0.0436926  0.0198608      0.0373575  0.036246    0.0229732
 0.0224715  0.0255725  0.0354985      0.0386999  0.0264843   0.0256632
 0.0306949  0.0460532  0.0356504      0.0363406  0.0341441   0.010619
 0.0244115  0.0268852  0.0536154   …  0.0322514  0.0244651   0.0088153
 0.0301859  0.0213732  0.0421636      0.0601192  0.0400615   0.023701
 0.0241582  0.0398711  0.0388851      0.0369853  0.033705    0.0172734
 0.0390706  0.02827    0.0362115      0.0415402  0.0313748   0.0115358
 0.0271671  0.0215233  0.0356906      0.0541072  0.0299694   0.0181748
 0.0233642  0.031591   0.0232809   …  0.046913   0.0423421   0.01961
 0.0237943  0.0260807  0.0330564      0.0431888  0.00946233  0.0223585
 0.0282296  0.0279735  0.0357545      0.0435145  0.0393461   0.0199641
 0.0227538  0.0341361  0.036827

In [19]:
BasicStem(7,7,3,64)(dummy)

128×128×64×1 KnetArray{Float32,4}:
[:, :, 1, 1] =
 0.0352165  0.540592    0.0963047  …  0.144441   0.236264   0.240665
 0.2447     0.0         0.637007      0.337211   0.0        0.516928
 0.130041   0.567055    0.153623      0.48537    0.437868   0.460014
 0.025715   0.242873    0.363507      0.291884   0.197354   0.269797
 0.236543   0.279261    0.148285      0.226635   0.432296   0.37948
 0.174989   0.119007    0.26594    …  0.382472   0.644718   0.211079
 0.323527   0.0         0.24894       0.363139   0.0544726  0.553465
 0.161222   0.25437     0.180551      0.514618   0.256482   0.0827562
 0.015004   0.384495    0.117426      0.400887   0.104268   0.731624
 0.171283   0.355369    0.0           0.0740212  0.167556   0.216154
 0.525138   0.525988    0.0132711  …  0.573255   0.393855   0.223849
 0.394108   0.214123    0.125566      0.704633   0.228259   0.158252
 0.202818   0.00159973  0.570106      0.273616   0.442646   0.329679
 ⋮                                 ⋱  ⋮              

In [20]:
struct Pyramid
    stem
    layers
    Pyramid(stem, layers...) = new(stem,layers)
end
function (p::Pyramid)(x)
    # Climbing up the pyramid
    r = p.stem(x);
    featureMaps = ();
    for l in p.layers;
        r = l(r);
        featureMaps = (featureMaps...,r);
    end
    featureMaps;
end

In [21]:
backbone = Pyramid(
    BasicStem(7,7,3,64),
    Chain(
        BottleneckBlock_b(64,256),
        BottleneckBlock_a(256,64)
        ),
    Chain(
        BottleneckBlock_c(256,512,128),
        BottleneckBlock_a(512,128),
        BottleneckBlock_a(512,128),
        BottleneckBlock_a(512,128),
        ),
    Chain(
        BottleneckBlock_c(512,1024,256),
        BottleneckBlock_a(1024,256),
        BottleneckBlock_a(1024,256),
        BottleneckBlock_a(1024,256),
        BottleneckBlock_a(1024,256),
        ),
    Chain(
        BottleneckBlock_c(1024,2048,512),
        BottleneckBlock_a(2048,4096),
        BottleneckBlock_a(2048,4096),
        ),
    Chain(
        x -> pool(x)
        )
)

Pyramid(BasicStem(P(KnetArray{Float32,4}(7,7,3,64)), P(KnetArray{Float32,4}(1,1,64,1)), Knet.Ops20.relu, 0), (Chain((BottleneckBlock_b(P(KnetArray{Float32,4}(1,1,64,256)), P(KnetArray{Float32,4}(1,1,64,64)), P(KnetArray{Float32,4}(3,3,64,64)), P(KnetArray{Float32,4}(1,1,64,256)), 0), BottleneckBlock_a(P(KnetArray{Float32,4}(1,1,256,64)), P(KnetArray{Float32,4}(3,3,64,64)), P(KnetArray{Float32,4}(1,1,64,256)), 0))), Chain((BottleneckBlock_c(P(KnetArray{Float32,4}(1,1,256,512)), P(KnetArray{Float32,4}(1,1,256,128)), P(KnetArray{Float32,4}(3,3,128,128)), P(KnetArray{Float32,4}(1,1,128,512)), 0), BottleneckBlock_a(P(KnetArray{Float32,4}(1,1,512,128)), P(KnetArray{Float32,4}(3,3,128,128)), P(KnetArray{Float32,4}(1,1,128,512)), 0), BottleneckBlock_a(P(KnetArray{Float32,4}(1,1,512,128)), P(KnetArray{Float32,4}(3,3,128,128)), P(KnetArray{Float32,4}(1,1,128,512)), 0), BottleneckBlock_a(P(KnetArray{Float32,4}(1,1,512,128)), P(KnetArray{Float32,4}(3,3,128,128)), P(KnetArray{Float32,4}(1,1,128,512

In [22]:
@time pyramid_o = backbone(dummy)

  0.028324 seconds (54.44 k allocations: 2.939 MiB)


(K32(128,128,256,1)[0.3458485⋯], K32(64,64,512,1)[0.012571201⋯], K32(32,32,1024,1)[0.09636523⋯], K32(16,16,2048,1)[0.065328695⋯], K32(8,8,2048,1)[0.12434062⋯])

In [23]:
struct FPN
    pyramid
    lateral_layers
    output_layers
    FPN(pyramid,lateral_layers,output_layers) =
        new(pyramid,lateral_layers,output_layers)
end
function (fpn::FPN)(x)
    # Climbing up the pyramid
    y = fpn.pyramid(x);
    L = ();
    # Reduce #channels
    for i in 1:length(fpn.lateral_layers)
        L = (L...,fpn.lateral_layers[i](y[i]))
    end
    # upsample and add
    
    # output
    O = ();
    for i in 1:length(fpn.output_layers)
        O = (O...,fpn.output_layers[i](L[i]))
    end
    (O...,L[end])
end



In [24]:

struct onlyConv; w; f; p; pad; end
(c::onlyConv)(x) = c.f.(conv4(c.w, dropout(x,c.p),padding=c.pad))
onlyConv(w1::Int,w2::Int,cx::Int,cy::Int,f=relu;pdrop=0,padding=0) =
onlyConv(param(w1,w2,cx,cy), f, pdrop,padding)


onlyConv

In [25]:
myFPN = FPN(
    backbone,
    [
        onlyConv(1,1,256,256),
        onlyConv(1,1,512,256),
        onlyConv(1,1,1024,256),
        onlyConv(1,1,2048,256),
        onlyConv(1,1,2048,256)
    ],
    [
        onlyConv(3,3,256,256,padding=1),
        onlyConv(3,3,256,256,padding=1),
        onlyConv(3,3,256,256,padding=1),
        onlyConv(3,3,256,256,padding=1)
    ]
);

In [26]:
fpn_dummy = myFPN(dummy)

(K32(128,128,256,1)[0.0⋯], K32(64,64,256,1)[0.0⋯], K32(32,32,256,1)[0.036092877⋯], K32(16,16,256,1)[0.03522419⋯], K32(8,8,256,1)[0.11121067⋯])

In [27]:
#@benchmark myFPN(dummy)

In [28]:
struct RPN_head
    conv_plain
    objectness_logit_conv
    anchor_deltas_conv
    RPN_head(conv_plain,objectness_logit_conv,anchor_deltas_conv) =
        new(conv_plain,objectness_logit_conv,anchor_deltas_conv)
end
function (rpnh::RPN_head)(x)
    objectness_logit = [];
    anchor_deltas = [];
    # for each head in bundle (P2 to P6)
    for h in x
        temp = rpnh.conv_plain(h)
        push!(objectness_logit, rpnh.objectness_logit_conv(temp))
        push!(anchor_deltas, rpnh.anchor_deltas_conv(temp))
    end
    (objectness_logit,anchor_deltas)
end

In [29]:
myRPN_head = RPN_head(
    onlyConv(3,3,256,256,padding=1),
    onlyConv(1,1,256,3,sigm),
    onlyConv(1,1,256,3*4)
)

RPN_head(onlyConv(P(KnetArray{Float32,4}(3,3,256,256)), Knet.Ops20.relu, 0, 1), onlyConv(P(KnetArray{Float32,4}(1,1,256,3)), Knet.Ops20.sigm, 0, 0), onlyConv(P(KnetArray{Float32,4}(1,1,256,12)), Knet.Ops20.relu, 0, 0))

In [30]:
dummy_objectness_logits, dummy_anchor_deltas = myRPN_head(fpn_dummy)

(Any[K32(128,128,3,1)[0.50727785⋯], K32(64,64,3,1)[0.49473512⋯], K32(32,32,3,1)[0.5069591⋯], K32(16,16,3,1)[0.4962831⋯], K32(8,8,3,1)[0.50000024⋯]], Any[K32(128,128,12,1)[0.09653285⋯], K32(64,64,12,1)[0.053986758⋯], K32(32,32,12,1)[0.075714104⋯], K32(16,16,12,1)[0.03471388⋯], K32(8,8,12,1)[0.07259219⋯]])

In [31]:
@benchmark myRPN_head(myFPN(dummy))

BenchmarkTools.Trial: 
  memory estimate:  215.83 KiB
  allocs estimate:  5354
  --------------
  minimum time:     3.971 ms (0.00% GC)
  median time:      106.992 ms (0.00% GC)
  mean time:        94.625 ms (0.29% GC)
  maximum time:     113.830 ms (0.00% GC)
  --------------
  samples:          53
  evals/sample:     1

In [32]:
dummy_objectness_logits

5-element Array{Any,1}:
 K32(128,128,3,1)[0.50727785⋯]
 K32(64,64,3,1)[0.49473512⋯]
 K32(32,32,3,1)[0.5069591⋯]
 K32(16,16,3,1)[0.4962831⋯]
 K32(8,8,3,1)[0.50000024⋯]

In [33]:
dummy_anchor_deltas

5-element Array{Any,1}:
 K32(128,128,12,1)[0.09653285⋯]
 K32(64,64,12,1)[0.053986758⋯]
 K32(32,32,12,1)[0.075714104⋯]
 K32(16,16,12,1)[0.03471388⋯]
 K32(8,8,12,1)[0.07259219⋯]

In [85]:
#https://github.com/facebookresearch/detectron2/blob/5e2a1ecccd228227c5a605c0a98d58e1b2db3640/detectron2/modeling/anchor_generator.py#L140-L177
function generate_cell_anchors(
        sizes=(32,64,128,256,512),
        aspect_ratios=(0.5,1,2)
    )
    anchors = []
    for size in sizes
        anchor = []
        area = size ^ 2.0
        for aspect_ratio in aspect_ratios
            # s * s = w * h
            # a = h / w
            # ... some algebra ...
            # w = sqrt(s * s / a)
            # h = a * w
            w = sqrt(area / aspect_ratio)
            h = aspect_ratio * w
            x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0
            append!(anchor,[[x0, y0, x1, y1]])
        end
        append!(anchors,[anchor])
    end
    anchors
end


generate_cell_anchors (generic function with 3 methods)

In [86]:
cell_anchors = generate_cell_anchors()

5-element Array{Any,1}:
 Any[[-22.627416997969522, -11.313708498984761, 22.627416997969522, 11.313708498984761], [-16.0, -16.0, 16.0, 16.0], [-11.313708498984761, -22.627416997969522, 11.313708498984761, 22.627416997969522]]
 Any[[-45.254833995939045, -22.627416997969522, 45.254833995939045, 22.627416997969522], [-32.0, -32.0, 32.0, 32.0], [-22.627416997969522, -45.254833995939045, 22.627416997969522, 45.254833995939045]]
 Any[[-90.50966799187809, -45.254833995939045, 90.50966799187809, 45.254833995939045], [-64.0, -64.0, 64.0, 64.0], [-45.254833995939045, -90.50966799187809, 45.254833995939045, 90.50966799187809]]
 Any[[-181.01933598375618, -90.50966799187809, 181.01933598375618, 90.50966799187809], [-128.0, -128.0, 128.0, 128.0], [-90.50966799187809, -181.01933598375618, 90.50966799187809, 181.01933598375618]]
 Any[[-362.03867196751236, -181.01933598375618, 362.03867196751236, 181.01933598375618], [-256.0, -256.0, 256.0, 256.0], [-181.01933598375618, -362.03867196751236, 181.01933598

In [52]:
function bb_intersection_over_union(boxA, boxB)
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[1], boxB[1])
    yA = max(boxA[2], boxB[2])
    xB = min(boxA[3], boxB[3])
    yB = min(boxA[4], boxB[4])
    # compute the area of intersection rectangle
    interArea = max(0, xB - xA) * max(0, yB - yA)
    print(interArea);print("\n");
    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (boxA[3] - boxA[1]) * (boxA[4] - boxA[2])
    boxBArea = (boxB[3] - boxB[1]) * (boxB[4] - boxB[2])
    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / (boxAArea + boxBArea - interArea)
    # return the intersection over union value
end

bb_intersection_over_union (generic function with 1 method)

In [56]:
bb_intersection_over_union(
    [0,0,2,1],
    [0,0,1,2]
)

1


0.3333333333333333

In [59]:
strides = KnetArray([4,8,16,32,64])

5-element KnetArray{Int64,1}:
  4
  8
 16
 32
 64

In [88]:
anchors = []
for anchor in 1:length(dummy_anchor_deltas)
    a = []
    for i = 1:size(dummy_anchor_deltas[anchor],1)
        for j = 1:size(dummy_anchor_deltas[anchor],1)
            b = []
            for c in cell_anchors[anchor]
                append!(b,[[strides[anchor]*i,strides[anchor]*j,0,0]+c])
            end
            append!(a,b) 
        end
    end
    append!(anchors,a) 
end
summary(anchors)


"65472-element Array{Any,1}"

In [89]:
anchors[1]

4-element Array{Float64,1}:
 -18.627416997969522
  -7.313708498984761
  22.627416997969522
  11.313708498984761