Different transposed convolution result by using float32 vs float16.

I'm testing the transposed convolution op with the code at https://github.com/jb2020-super/test-DirectML.git .
The result when using float16 seems wrong. But I can not find the root cause.
Here are some core code:
```c++
void DMLInferer::CreateTransposedConvolutionOp(DML_TENSOR_DATA_TYPE data_type)
{
	m_op_type = DML_OPERATOR_CONVOLUTION;
	m_data_type = data_type;
	m_is_backward = true;
	UINT input_dim[4]{ 1, 1, 5, 5 };
	m_input_tensor.Create(data_type, input_dim, 4);
	UINT filter_dim[4]{ 1, 1, 3, 3 };
	m_filter_tensor.Create(data_type, filter_dim, 4);
	UINT strides[2] = { 1, 1 };
	UINT dilations[2] = { 1, 1 };
	UINT start_pad[2] = { 0, 0 };
	UINT end_pad[2] = { 0, 0 };
	UINT output_pad[2] = { 0, 0 };
	UINT output_dim[4]{ 1, 
		1, 
		_cal_transposed_conv_2d_out_size(input_dim[2], strides[0], start_pad[0] + end_pad[0], dilations[0], filter_dim[2], output_pad[0]), 
		_cal_transposed_conv_2d_out_size(input_dim[3], strides[1], start_pad[1] + end_pad[1], dilations[1], filter_dim[3], output_pad[1])
	};
	m_output_tensor.Create(data_type, output_dim, 4);
	DML_CONVOLUTION_OPERATOR_DESC conv_op_desc{};
	conv_op_desc.InputTensor = &m_input_tensor.GetDesc();
	conv_op_desc.FilterTensor = &m_filter_tensor.GetDesc();
	conv_op_desc.BiasTensor = nullptr;
	conv_op_desc.OutputTensor = &m_output_tensor.GetDesc();
	conv_op_desc.Mode = DML_CONVOLUTION_MODE_CROSS_CORRELATION;
	conv_op_desc.Direction = DML_CONVOLUTION_DIRECTION_BACKWARD;
	conv_op_desc.DimensionCount = m_input_tensor.DimCount() - 2;
	conv_op_desc.Strides = strides;
	conv_op_desc.Dilations = dilations;
	conv_op_desc.StartPadding = start_pad;
	conv_op_desc.EndPadding = end_pad;
	conv_op_desc.OutputPadding = output_pad;
	conv_op_desc.GroupCount = 1;
	conv_op_desc.FusedActivation = nullptr;
	DML_OPERATOR_DESC op_desc{};
	op_desc.Type = DML_OPERATOR_CONVOLUTION;
	op_desc.Desc = &conv_op_desc;
	check_hresult(m_dml_device->CreateOperator(&op_desc, __uuidof(m_dml_op), m_dml_op.put_void()));
	DML_EXECUTION_FLAGS  exe_flg{ DML_EXECUTION_FLAG_NONE };
	if (data_type == DML_TENSOR_DATA_TYPE_FLOAT16) {
		exe_flg |= DML_EXECUTION_FLAG_ALLOW_HALF_PRECISION_COMPUTATION;

	}

	//exe_flg |= DML_EXECUTION_FLAG_DISABLE_META_COMMANDS;
	check_hresult(m_dml_device->CompileOperator(m_dml_op.get(), exe_flg, __uuidof(m_pso), m_pso.put_void()));
}
void DMLInferer::_upload_convolution_data()
{
	auto input_size = m_input_tensor.GetTensorSizeInBytes();
	if (m_data_type == DML_TENSOR_DATA_TYPE_FLOAT16) {
		std::unique_ptr<uint16_t> input_data = std::unique_ptr<uint16_t>(new uint16_t[m_input_tensor.GetElementCount()]);
		std::unique_ptr<FLOAT> data = std::unique_ptr<FLOAT>(new FLOAT[m_input_tensor.GetElementCount()]);
		for (int i = 0; i < m_input_tensor.GetElementCount(); ++i) {
			input_data.get()[i] = Float16Compressor::compress(i);
			data.get()[i] = Float16Compressor::decompress(input_data.get()[i]);
		}
		_print_tensor("input", m_input_tensor, data.get());
		_create_resource(input_data.get(), input_size, m_input_rc, m_upload_rc);		
	}
	else if (m_data_type == DML_TENSOR_DATA_TYPE_INT16) {
		std::unique_ptr<int16_t> input_data = std::unique_ptr<int16_t>(new int16_t[m_input_tensor.GetElementCount()]);
		for (int i = 0; i < m_input_tensor.GetElementCount(); ++i) {
			input_data.get()[i] = i;
		}
		_print_tensor("input", m_input_tensor, input_data.get());
		_create_resource(input_data.get(), input_size, m_input_rc, m_upload_rc);
	}
	else if (m_data_type == DML_TENSOR_DATA_TYPE_FLOAT32) {
		std::unique_ptr<FLOAT> input_data = std::unique_ptr<FLOAT>(new FLOAT[m_input_tensor.GetElementCount()]);
		for (int i = 0; i < m_input_tensor.GetElementCount(); ++i) {
			input_data.get()[i] = i;
		}
		_print_tensor("input", m_input_tensor, input_data.get());
		_create_resource(input_data.get(), input_size, m_input_rc, m_upload_rc);
	}

	auto filter_size = m_filter_tensor.GetTensorSizeInBytes();
	std::unique_ptr<FLOAT> filter_data = std::unique_ptr<FLOAT>(new FLOAT[m_filter_tensor.GetElementCount()]);
	for (int i = 0; i < m_filter_tensor.GetElementCount(); ++i) {
		filter_data.get()[i] = 1.0f;
	}
	_print_tensor("filter", m_filter_tensor, filter_data.get());
	_create_resource(filter_data.get(), filter_size, m_filter_rc, m_upload_rc2);

	DML_BUFFER_BINDING input_buffer_bd = { m_input_rc.get(), 0, m_input_rc->GetDesc().Width };
	DML_BINDING_DESC input_binding = { DML_BINDING_TYPE_BUFFER, &input_buffer_bd };
	DML_BUFFER_BINDING filter_buffer_bd = { m_filter_rc.get(), 0, m_filter_rc->GetDesc().Width };
	DML_BINDING_DESC filter_binding = { DML_BINDING_TYPE_BUFFER, &filter_buffer_bd };
	DML_BUFFER_BINDING bias_buffer_bd = { nullptr, 0, 0 };
	DML_BINDING_DESC bias_binding = { DML_BINDING_TYPE_NONE, nullptr };

	DML_BINDING_DESC binding_descs[3] = { input_binding, filter_binding, bias_binding };

	m_binding_tlb->BindInputs(3, binding_descs);
}
```
The output with data type set to DML_TENSOR_DATA_TYPE_FLOAT32 is:
```c++
input tensor dimension: 5x5
0       1       2       3       4
5       6       7       8       9
10      11      12      13      14
15      16      17      18      19
20      21      22      23      24
filter tensor dimension: 3x3
1       1       1
1       1       1
1       1       1
output tensor dimension: 7x7
0       1       3       6       9       7       4
5       12      21      27      33      24      13
15      33      54      63      72      51      27
30      63      99      108     117     81      42
45      93      144     153     162     111     57
35      72      111     117     123     84      43
20      41      63      66      69      47      24
```
The output with data type set to DML_TENSOR_DATA_TYPE_FLOAT16 is:
```
input tensor dimension: 5x5
0       1       2       3       4
5       6       7       8       9
10      11      12      13      14
15      16      17      18      19
20      21      22      23      24
filter tensor dimension: 3x3
1       1       1
1       1       1
1       1       1
output tensor dimension: 7x7
0       0       1.875   3.75    5.625   7.5     0
0       11.25   15      20.625  26.25   22.5    7.5
9.375   30      45      52.5    60      48.75   16.875
18.75   58.125  82.5    90      97.5    76.875  26.25
28.125  86.25   120     127.5   135     105     35.625
37.5    67.5    108.75  114.375 120     78.75   45
0       37.5    39.375  41.25   43.125  45      0
````


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Different transposed convolution result by using float32 vs float16. #101

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Different transposed convolution result by using float32 vs float16. #101

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions