/
PadNd.cpp
248 lines (216 loc) · 9.57 KB
/
PadNd.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include <ATen/PadNd.h>
#include <ATen/core/Tensor.h>
#include <c10/util/irange.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Functions.h>
#include <ATen/NativeFunctions.h>
#else
#include <ATen/ops/_empty_affine_quantized.h>
#include <ATen/ops/_pad_circular.h>
#include <ATen/ops/_pad_circular_native.h>
#include <ATen/ops/_pad_enum_native.h>
#include <ATen/ops/constant_pad_nd.h>
#include <ATen/ops/constant_pad_nd_native.h>
#include <ATen/ops/empty.h>
#include <ATen/ops/pad_native.h>
#include <ATen/ops/reflection_pad1d.h>
#include <ATen/ops/reflection_pad2d.h>
#include <ATen/ops/reflection_pad3d.h>
#include <ATen/ops/replication_pad1d.h>
#include <ATen/ops/replication_pad2d.h>
#include <ATen/ops/replication_pad3d.h>
#endif
namespace at { namespace native {
Tensor constant_pad_nd(const Tensor& self, IntArrayRef pad, const Scalar& value) {
TORCH_CHECK(pad.size() % 2 == 0, "Length of pad must be even but instead it equals ",
pad.size());
auto input_sizes = self.sizes();
auto l_inp = self.dim();
auto l_pad = pad.size() / 2;
auto l_diff = l_inp - l_pad;
TORCH_CHECK(l_inp >= (int64_t)l_pad, "Length of pad should be no more than twice the number of "
"dimensions of the input. Pad length is ", pad.size(), "while the input has ",
l_inp, "dimensions.");
std::vector<int64_t> new_shape;
bool all_pads_non_positive = true;
auto c_input = self;
for (const auto i : c10::irange(l_diff, l_inp)) {
auto pad_idx = 2 * (l_inp - i - 1);
if (pad[pad_idx] < 0) {
c_input = c_input.narrow(i, -pad[pad_idx], c_input.size(i) + pad[pad_idx]);
} else if (pad[pad_idx] != 0) {
all_pads_non_positive = false;
}
if (pad[pad_idx + 1] < 0) {
c_input = c_input.narrow(i, 0, c_input.size(i) + pad[pad_idx + 1]);
} else if (pad[pad_idx + 1] != 0) {
all_pads_non_positive = false;
}
}
// if none of the pads are positive we can optimize and just return the result
// of calling .narrow() on the input
if (all_pads_non_positive) {
return c_input.clone();
}
for (size_t i = 0; i < (size_t)l_diff; i ++) {
new_shape.emplace_back(input_sizes[i]);
}
for (const auto i : c10::irange((size_t)l_pad)) {
auto pad_idx = pad.size() - ((i + 1) * 2);
auto new_dim = input_sizes[l_diff + i] + pad[pad_idx] + pad[pad_idx + 1];
TORCH_CHECK(new_dim > 0, "The input size ", input_sizes[l_diff + i], ", plus negative padding ",
pad[pad_idx], " and ", pad[pad_idx + 1], " resulted in a negative output size, "
"which is invalid. Check dimension ", l_diff + i, " of your input.");
new_shape.emplace_back(new_dim);
}
at::Tensor output;
const auto memory_format = self.suggest_memory_format();
if (self.is_quantized()) {
const auto qscheme = self.qscheme();
TORCH_CHECK(qscheme == kPerTensorAffine || qscheme == kPerTensorSymmetric,
"Only per-tensor padding is supported.");
output = at::_empty_affine_quantized(
new_shape, self.options().memory_format(memory_format),
self.q_scale(), self.q_zero_point(), c10::nullopt);
} else {
output = at::empty(new_shape, self.options().memory_format(memory_format));
}
output.fill_(value);
auto c_output = output;
for (const auto i : c10::irange(l_diff, l_inp)) {
auto pad_idx = 2 * (l_inp - i - 1);
if (pad[pad_idx] > 0) {
c_output = c_output.narrow(i, pad[pad_idx], c_output.size(i) - pad[pad_idx]);
}
if (pad[pad_idx + 1] > 0) {
c_output = c_output.narrow(i, 0, c_output.size(i) - pad[pad_idx + 1]);
}
}
c_output.copy_(c_input);
return output;
}
Tensor _pad_circular_symint(const Tensor &self, c10::SymIntArrayRef padding) {
const auto in_shape = self.sym_sizes();
const auto self_ndim = static_cast<int64_t>(in_shape.size());
// number of dimensions that are padded
const auto ndim_padded = padding.size() / 2;
// number of preceding non_padded dimensions (1 for no_batch_dim case or 2)
const auto ndim_nonpadded = self_ndim - ndim_padded;
TORCH_CHECK(ndim_nonpadded == 1 || ndim_nonpadded == 2,
"Invalid padding size, expected 1 or 2 non-padded dimensions, ",
"which would be equivalent to padding of length ",
(self_ndim - 1) * 2,
" or ",
(self_ndim - 2) * 2,
" respectively but got ",
padding.size());
c10::SymDimVector out_shape(in_shape.size());
for (const auto i: c10::irange(ndim_nonpadded)) {
out_shape[i] = in_shape[i];
}
// Get shape of padded tensor
for (const auto i : c10::irange(ndim_padded)) {
const auto& pad_l = padding[2 * (ndim_padded - i - 1) + 0];
const auto& pad_r = padding[2 * (ndim_padded - i - 1) + 1];
const auto& size = in_shape[ndim_nonpadded + i];
out_shape[ndim_nonpadded + i] = size + pad_l + pad_r;
TORCH_CHECK(
pad_l <= size && pad_r <= size,
"Padding value causes wrapping around more than once.");
TORCH_CHECK(
out_shape[ndim_nonpadded + i] >= 0,
"Negative padding value is resulting in an empty dimension");
}
auto out = self.new_empty_symint(out_shape, self.options());
// Put original array into the padded array
Tensor out_slice = out;
Tensor in_slice = self;
const SymInt zero = 0;
for (const auto i : c10::irange(ndim_padded)) {
const auto dim = ndim_padded - i + ndim_nonpadded - 1;
const auto& pad_l = padding[2*i + 0];
const auto& pad_r = padding[2*i + 1];
out_slice = out_slice.slice_symint(dim, std::max(pad_l, zero), out_shape[dim] - std::max(pad_r, zero));
in_slice = in_slice.slice_symint(dim, std::max(-pad_l, zero), in_shape[dim] - std::max(-pad_r, zero));
}
out_slice.copy_(in_slice);
// The following steps first pad the beginning of the tensor (left side),
// and then pad the end of the tensor (right side).
// Note: Corners will be written more than once when ndim_padded > 1.
//
// Only in cases where padding values are > 0 are when additional copying
// is required.
for (const auto i : c10::irange(ndim_padded)) {
const auto dim = ndim_padded - i + ndim_nonpadded - 1;
const auto& pad_l = padding[2*i + 0];
const auto& pad_r = padding[2*i + 1];
if (pad_l > 0) {
out_slice = out.slice_symint(dim, 0, pad_l);
in_slice = out.slice_symint(dim,
out_shape[dim] - pad_l - std::max(pad_r, zero),
out_shape[dim] - std::max(pad_r, zero));
out_slice.copy_(in_slice);
}
if (pad_r > 0) {
out_slice = out.slice_symint(dim, out_shape[dim] - pad_r, out_shape[dim]);
in_slice = out.slice_symint(dim, std::max(pad_l, zero), std::max(pad_l, zero) + pad_r);
out_slice.copy_(in_slice);
}
}
return out;
}
Tensor _pad_enum_symint(const Tensor &self, c10::SymIntArrayRef pad, int64_t mode_int, c10::optional<double> value) {
const auto input_dim = self.dim();
TORCH_CHECK(pad.size() % 2 == 0, "Padding length must be divisible by 2");
TORCH_CHECK(static_cast<int64_t>(pad.size()) <= input_dim * 2,
"Padding length should be less than or equal to two times the input dimension but got padding length ", pad.size(), " and input of dimension ", input_dim);
auto mode = static_cast<at::padding_mode>(mode_int);
if (mode == at::padding_mode::constant) {
return at::constant_pad_nd_symint(self, pad, value.value_or(0.0));
}
TORCH_CHECK(!value.has_value() || *value == 0,
"Padding mode \"", padding_mode_string(mode),
"\" doesn't take in value argument");
if (pad.size() == 2 && (input_dim == 2 || input_dim == 3)) {
switch (mode) {
case at::padding_mode::reflect: return at::reflection_pad1d_symint(self, pad);
case at::padding_mode::replicate: return at::replication_pad1d_symint(self, pad);
case at::padding_mode::circular: return at::_pad_circular_symint(self, pad);
default: {}
}
} else if(pad.size() == 4 && (input_dim == 3 || input_dim == 4)) {
switch (mode) {
case at::padding_mode::reflect: return at::reflection_pad2d_symint(self, pad);
case at::padding_mode::replicate: return at::replication_pad2d_symint(self, pad);
case at::padding_mode::circular: return at::_pad_circular_symint(self, pad);
default: {}
}
} else if (pad.size() == 6 && (input_dim == 4 || input_dim == 5)) {
switch (mode) {
case at::padding_mode::reflect: return at::reflection_pad3d_symint(self, pad);
case at::padding_mode::replicate: return at::replication_pad3d_symint(self, pad);
case at::padding_mode::circular: return at::_pad_circular_symint(self, pad);
default: {}
}
}
C10_THROW_ERROR(NotImplementedError,
"Only 2D, 3D, 4D, 5D padding with non-constant padding are supported for now");
}
Tensor pad_symint(const Tensor &self, c10::SymIntArrayRef pad, c10::string_view mode, c10::optional<double> value) {
const auto mode_enum = [&] {
if (mode == "reflect") {
return at::padding_mode::reflect;
} else if (mode == "constant") {
return at::padding_mode::constant;
} else if (mode == "replicate") {
return at::padding_mode::replicate;
} else if (mode == "circular") {
return at::padding_mode::circular;
}
C10_THROW_ERROR(NotImplementedError,
c10::str("Unrecognised padding mode ", mode));
}();
return at::native::_pad_enum_symint(self, pad, static_cast<int64_t>(mode_enum), value);
}
}} // namespace at::native