-
Notifications
You must be signed in to change notification settings - Fork 329
Closed
Description
I'm testing the transposed convolution op with the code at https://github.com/jb2020-super/test-DirectML.git .
The result when using float16 seems wrong. But I can not find the root cause.
Here are some core code:
void DMLInferer::CreateTransposedConvolutionOp(DML_TENSOR_DATA_TYPE data_type)
{
m_op_type = DML_OPERATOR_CONVOLUTION;
m_data_type = data_type;
m_is_backward = true;
UINT input_dim[4]{ 1, 1, 5, 5 };
m_input_tensor.Create(data_type, input_dim, 4);
UINT filter_dim[4]{ 1, 1, 3, 3 };
m_filter_tensor.Create(data_type, filter_dim, 4);
UINT strides[2] = { 1, 1 };
UINT dilations[2] = { 1, 1 };
UINT start_pad[2] = { 0, 0 };
UINT end_pad[2] = { 0, 0 };
UINT output_pad[2] = { 0, 0 };
UINT output_dim[4]{ 1,
1,
_cal_transposed_conv_2d_out_size(input_dim[2], strides[0], start_pad[0] + end_pad[0], dilations[0], filter_dim[2], output_pad[0]),
_cal_transposed_conv_2d_out_size(input_dim[3], strides[1], start_pad[1] + end_pad[1], dilations[1], filter_dim[3], output_pad[1])
};
m_output_tensor.Create(data_type, output_dim, 4);
DML_CONVOLUTION_OPERATOR_DESC conv_op_desc{};
conv_op_desc.InputTensor = &m_input_tensor.GetDesc();
conv_op_desc.FilterTensor = &m_filter_tensor.GetDesc();
conv_op_desc.BiasTensor = nullptr;
conv_op_desc.OutputTensor = &m_output_tensor.GetDesc();
conv_op_desc.Mode = DML_CONVOLUTION_MODE_CROSS_CORRELATION;
conv_op_desc.Direction = DML_CONVOLUTION_DIRECTION_BACKWARD;
conv_op_desc.DimensionCount = m_input_tensor.DimCount() - 2;
conv_op_desc.Strides = strides;
conv_op_desc.Dilations = dilations;
conv_op_desc.StartPadding = start_pad;
conv_op_desc.EndPadding = end_pad;
conv_op_desc.OutputPadding = output_pad;
conv_op_desc.GroupCount = 1;
conv_op_desc.FusedActivation = nullptr;
DML_OPERATOR_DESC op_desc{};
op_desc.Type = DML_OPERATOR_CONVOLUTION;
op_desc.Desc = &conv_op_desc;
check_hresult(m_dml_device->CreateOperator(&op_desc, __uuidof(m_dml_op), m_dml_op.put_void()));
DML_EXECUTION_FLAGS exe_flg{ DML_EXECUTION_FLAG_NONE };
if (data_type == DML_TENSOR_DATA_TYPE_FLOAT16) {
exe_flg |= DML_EXECUTION_FLAG_ALLOW_HALF_PRECISION_COMPUTATION;
}
//exe_flg |= DML_EXECUTION_FLAG_DISABLE_META_COMMANDS;
check_hresult(m_dml_device->CompileOperator(m_dml_op.get(), exe_flg, __uuidof(m_pso), m_pso.put_void()));
}
void DMLInferer::_upload_convolution_data()
{
auto input_size = m_input_tensor.GetTensorSizeInBytes();
if (m_data_type == DML_TENSOR_DATA_TYPE_FLOAT16) {
std::unique_ptr<uint16_t> input_data = std::unique_ptr<uint16_t>(new uint16_t[m_input_tensor.GetElementCount()]);
std::unique_ptr<FLOAT> data = std::unique_ptr<FLOAT>(new FLOAT[m_input_tensor.GetElementCount()]);
for (int i = 0; i < m_input_tensor.GetElementCount(); ++i) {
input_data.get()[i] = Float16Compressor::compress(i);
data.get()[i] = Float16Compressor::decompress(input_data.get()[i]);
}
_print_tensor("input", m_input_tensor, data.get());
_create_resource(input_data.get(), input_size, m_input_rc, m_upload_rc);
}
else if (m_data_type == DML_TENSOR_DATA_TYPE_INT16) {
std::unique_ptr<int16_t> input_data = std::unique_ptr<int16_t>(new int16_t[m_input_tensor.GetElementCount()]);
for (int i = 0; i < m_input_tensor.GetElementCount(); ++i) {
input_data.get()[i] = i;
}
_print_tensor("input", m_input_tensor, input_data.get());
_create_resource(input_data.get(), input_size, m_input_rc, m_upload_rc);
}
else if (m_data_type == DML_TENSOR_DATA_TYPE_FLOAT32) {
std::unique_ptr<FLOAT> input_data = std::unique_ptr<FLOAT>(new FLOAT[m_input_tensor.GetElementCount()]);
for (int i = 0; i < m_input_tensor.GetElementCount(); ++i) {
input_data.get()[i] = i;
}
_print_tensor("input", m_input_tensor, input_data.get());
_create_resource(input_data.get(), input_size, m_input_rc, m_upload_rc);
}
auto filter_size = m_filter_tensor.GetTensorSizeInBytes();
std::unique_ptr<FLOAT> filter_data = std::unique_ptr<FLOAT>(new FLOAT[m_filter_tensor.GetElementCount()]);
for (int i = 0; i < m_filter_tensor.GetElementCount(); ++i) {
filter_data.get()[i] = 1.0f;
}
_print_tensor("filter", m_filter_tensor, filter_data.get());
_create_resource(filter_data.get(), filter_size, m_filter_rc, m_upload_rc2);
DML_BUFFER_BINDING input_buffer_bd = { m_input_rc.get(), 0, m_input_rc->GetDesc().Width };
DML_BINDING_DESC input_binding = { DML_BINDING_TYPE_BUFFER, &input_buffer_bd };
DML_BUFFER_BINDING filter_buffer_bd = { m_filter_rc.get(), 0, m_filter_rc->GetDesc().Width };
DML_BINDING_DESC filter_binding = { DML_BINDING_TYPE_BUFFER, &filter_buffer_bd };
DML_BUFFER_BINDING bias_buffer_bd = { nullptr, 0, 0 };
DML_BINDING_DESC bias_binding = { DML_BINDING_TYPE_NONE, nullptr };
DML_BINDING_DESC binding_descs[3] = { input_binding, filter_binding, bias_binding };
m_binding_tlb->BindInputs(3, binding_descs);
}The output with data type set to DML_TENSOR_DATA_TYPE_FLOAT32 is:
input tensor dimension: 5x5
0 1 2 3 4
5 6 7 8 9
10 11 12 13 14
15 16 17 18 19
20 21 22 23 24
filter tensor dimension: 3x3
1 1 1
1 1 1
1 1 1
output tensor dimension: 7x7
0 1 3 6 9 7 4
5 12 21 27 33 24 13
15 33 54 63 72 51 27
30 63 99 108 117 81 42
45 93 144 153 162 111 57
35 72 111 117 123 84 43
20 41 63 66 69 47 24The output with data type set to DML_TENSOR_DATA_TYPE_FLOAT16 is:
input tensor dimension: 5x5
0 1 2 3 4
5 6 7 8 9
10 11 12 13 14
15 16 17 18 19
20 21 22 23 24
filter tensor dimension: 3x3
1 1 1
1 1 1
1 1 1
output tensor dimension: 7x7
0 0 1.875 3.75 5.625 7.5 0
0 11.25 15 20.625 26.25 22.5 7.5
9.375 30 45 52.5 60 48.75 16.875
18.75 58.125 82.5 90 97.5 76.875 26.25
28.125 86.25 120 127.5 135 105 35.625
37.5 67.5 108.75 114.375 120 78.75 45
0 37.5 39.375 41.25 43.125 45 0
Metadata
Metadata
Assignees
Labels
No labels