-
Notifications
You must be signed in to change notification settings - Fork 95
/
conv.nim
140 lines (126 loc) · 6.68 KB
/
conv.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# Copyright 2017 the Arraymancer contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ../../tensor,
../private/p_nnp_types
proc im2col*[T]( input: Tensor[T], kernel_size: Size2D,
padding: Size2D = (0,0), stride: Size2D = (1,1),
result: var Tensor[T]) =
## Convert blocks of an image into columns, useful for preprocessing
## an image before convolutions
let
channels = input.nchw_channels
height = input.nchw_height
width = input.nchw_width
channels_col = channels * kernel_size.height * kernel_size.width
height_col = (height + (2 * padding.height) - kernel_size.height) div stride.height + 1
width_col = (width + (2 * padding.width) - kernel_size.width) div stride.width + 1
flatten_size_col = height_col * width_col
flatten_size = height * width
assert result.is_C_contiguous and input.is_C_contiguous
assert result.shape == [channels_col, flatten_size_col]
let odata = result.unsafe_raw_offset()
let idata = input.unsafe_raw_offset()
for c in `||`(0, channels_col-1, "simd"):
let
w_offset = (c mod kernel_size.width) - padding.width
h_offset = ((c div kernel_size.width) mod kernel_size.height) - padding.height
c_offset = (c div kernel_size.width) div kernel_size.height
for h in 0..<height_col:
let
row = h_offset + (h * stride.height)
offset_col = h * width_col
for w in 0..<width_col:
let col = w_offset + (w * stride.width)
var v = 0.T
if row >= 0 and col >= 0 and row < height and col < width:
let iidx = (c_offset * flatten_size) + row * width + col
v = idata[iidx]
let oidx = (c * flatten_size_col) + offset_col + w
odata[oidx] = v
proc col2im*[T](input: Tensor[T], channels, height, width: int,
kernel_size: Size2D,
padding: Size2D = (0,0), stride: Size2D = (1,1)): Tensor[T] =
## Convert blocks of an image from columns back to an image, collapsed
## pixels are summed
let
channels_col = channels * kernel_size.height * kernel_size.width
height_col = (height + (2 * padding.height) - kernel_size.height) div stride.height + 1
width_col = (width + (2 * padding.width) - kernel_size.width) div stride.width + 1
result = zeros[T](channels, height, width)
for c in 0..<channels_col:
let
w_offset = (c mod kernel_size.width) - padding.width
h_offset = ((c div kernel_size.width) mod kernel_size.height) - padding.height
c_offset = (c div kernel_size.width) div kernel_size.height
for h in 0..<height_col:
let
row = h_offset + (h * stride.height)
offset_col = h * width_col
for w in 0..<width_col:
let col = w_offset + (w * stride.width)
if row < 0 or col < 0 or row >= height or col >= width:
continue
result[c_offset, row, col] += input[c, offset_col + w]
proc im2colgemm_conv2d*[T](input, kernel, bias: Tensor[T],
padding: Size2D = (0,0),
stride: Size2D = (1,1)): Tensor[T] =
## Compute cross-correlate for image with the given kernel weights
# Implementation with ideas from http://cs231n.github.io/convolutional-networks/#conv
let
batch_size = input.shape[^4]
output_channels = kernel.shape[^4]
kernel_size = (height: kernel.nchw_height, width: kernel.nchw_width)
output_height = (input.nchw_height + (2*padding.height) - kernel.nchw_height) div stride.height + 1
output_width = (input.nchw_width + (2*padding.width) - kernel.nchw_width) div stride.width + 1
channels_col = input.nchw_channels * kernel.nchw_height * kernel.nchw_width
kernel_col = kernel.reshape(output_channels, channels_col)
result = newTensorUninit[T](batch_size, output_channels, output_height, output_width)
var input_col = newTensorUninit[T](channels_col, output_height * output_width)
var output: Tensor[T]
for i in 0..<batch_size: #TODO: batch matmul
im2col(input.atAxisIndex(0, i).squeeze(0), kernel_size, padding, stride, input_col)
# The following must be done without copy: GEMM will directly write in the result tensor
output = result.atAxisIndex(0, i).reshape(kernel_col.shape[0], input_col.shape[1])
gemm(1.T, kernel_col, input_col, 0.T, output)
if bias.rank > 0:
result +.= bias.unsqueeze(0)
proc im2colgemm_conv2d_gradient*[T](input, kernel: Tensor[T],
padding: Size2D = (0,0),
stride: Size2D = (1,1),
grad_output: Tensor[T],
grad_input, grad_weight: var Tensor[T]) =
## Computes gradients w.r.t input and weights for a 2D convolution
let
batch_size = input.shape[^4]
output_channels = kernel.shape[^4]
kernel_size = (height: kernel.nchw_height, width: kernel.nchw_width)
output_height = (input.nchw_height + (2*padding.height) - kernel.nchw_height) div stride.height + 1
output_width = (input.nchw_width + (2*padding.width) - kernel.nchw_width) div stride.width + 1
output_flatten_size = output_height*output_width
channels_col = input.nchw_channels * kernel_size.height * kernel_size.width
kernel_col = kernel.reshape(output_channels, input.nchw_channels*kernel.nchw_height*kernel.nchw_width)
# Check if grad output shape looks correct
assert grad_output.nchw_width == output_width and grad_output.nchw_height == output_height
assert grad_output.nchw_channels == output_channels
assert grad_output.shape[0] == input.shape[0]
grad_input = zeros[T](batch_size, input.nchw_channels, input.nchw_height, input.nchw_width)
grad_weight = zeros[T](output_channels, kernel.nchw_channels, kernel.nchw_height, kernel.nchw_width)
var input_col = newTensorUninit[T](channels_col, output_height * output_width)
for i in 0..<batch_size:
let
grad_output_col = grad_output.atAxisIndex(0, i).reshape(output_channels, output_flatten_size)
grad_input_col = kernel_col.transpose() * grad_output_col
im2col(input.atAxisIndex(0, i).squeeze(0), kernel_size, padding, stride, input_col)
grad_input[i, _, _, _] = col2im(grad_input_col, input.nchw_channels, input.nchw_height, input.nchw_width, kernel_size, padding, stride).unsqueeze(0)
grad_weight += (grad_output_col * input_col.transpose()).reshape(grad_weight.shape)