diff --git a/aten/src/ATen/native/cpu/SoftMaxKernel.cpp b/aten/src/ATen/native/cpu/SoftMaxKernel.cpp index d838b868f69ed..af3d47e3fe6df 100644 --- a/aten/src/ATen/native/cpu/SoftMaxKernel.cpp +++ b/aten/src/ATen/native/cpu/SoftMaxKernel.cpp @@ -65,19 +65,25 @@ inline void _vec_log_softmax_lastdim( } // See [Note AVX-SSE transitions] for why this should call the // vectorized version (aside from perf improvements). - vec256::map2( - [](Vec x, Vec y) { return x.log() + y; }, + vec256::map( + [](Vec x) { return x.log(); }, tmp_sum_scalar, tmp_sum_scalar, - max_input_arr, loop_end); for (int64_t j = 0; j < loop_end; j++) { int64_t i = ii + j; scalar_t* input_data = input_data_base + i * dim_size; scalar_t* output_data = output_data_base + i * dim_size; scalar_t tmp_sum = tmp_sum_scalar[j]; + scalar_t max_input = max_input_arr[j]; + + // It's necessary to keep the order of the operations below. + // In some cases that input is large digits and the difference + // is small, if we compute `max_input` plus `tmp_sum` before, + // there would be a numerical problem. See an example in + // https://github.com/pytorch/pytorch/issues/11752#issuecomment-422883379 vec256::map( - [tmp_sum](Vec x) { return x - Vec(tmp_sum); }, + [tmp_sum, max_input](Vec x) { return x - Vec(max_input) - Vec(tmp_sum); }, output_data, input_data, dim_size); diff --git a/test/test_nn.py b/test/test_nn.py index f53fba483cc9d..8d726b4a67589 100644 --- a/test/test_nn.py +++ b/test/test_nn.py @@ -7847,6 +7847,11 @@ def test_softmin(self): self.assertEqual(F.softmin(x, 1), F.softmax(-x, 1)) self.assertEqual(F.softmin(x, 0), F.softmax(-x, 0)) + def test_log_softmax(self): + x_small = torch.ones(1, 2, dtype=torch.float32) + x_big = x_small + 1e16 + self.assertEqual(F.log_softmax(x_small, -1), F.log_softmax(x_big, -1)) + def test_adaptive_log_softmax(self): # args validation with self.assertRaises(ValueError):