diff --git a/aten/src/ATen/native/cpu/SoftMaxKernel.cpp b/aten/src/ATen/native/cpu/SoftMaxKernel.cpp
index d838b868f69ed..af3d47e3fe6df 100644
--- a/aten/src/ATen/native/cpu/SoftMaxKernel.cpp
+++ b/aten/src/ATen/native/cpu/SoftMaxKernel.cpp
@@ -65,19 +65,25 @@ inline void _vec_log_softmax_lastdim(
           }
           // See [Note AVX-SSE transitions] for why this should call the
           // vectorized version (aside from perf improvements).
-          vec256::map2(
-              [](Vec x, Vec y) { return x.log() + y; },
+          vec256::map(
+              [](Vec x) { return x.log(); },
               tmp_sum_scalar,
               tmp_sum_scalar,
-              max_input_arr,
               loop_end);
           for (int64_t j = 0; j < loop_end; j++) {
             int64_t i = ii + j;
             scalar_t* input_data = input_data_base + i * dim_size;
             scalar_t* output_data = output_data_base + i * dim_size;
             scalar_t tmp_sum = tmp_sum_scalar[j];
+            scalar_t max_input = max_input_arr[j];
+            
+            // It's necessary to keep the order of the operations below.
+            // In some cases that input is large digits and the difference
+            // is small, if we compute `max_input` plus `tmp_sum` before,
+            // there would be a numerical problem. See an example in
+            // https://github.com/pytorch/pytorch/issues/11752#issuecomment-422883379
             vec256::map(
-                [tmp_sum](Vec x) { return x - Vec(tmp_sum); },
+                [tmp_sum, max_input](Vec x) { return x - Vec(max_input) - Vec(tmp_sum); },
                 output_data,
                 input_data,
                 dim_size);
diff --git a/test/test_nn.py b/test/test_nn.py
index f53fba483cc9d..8d726b4a67589 100644
--- a/test/test_nn.py
+++ b/test/test_nn.py
@@ -7847,6 +7847,11 @@ def test_softmin(self):
         self.assertEqual(F.softmin(x, 1), F.softmax(-x, 1))
         self.assertEqual(F.softmin(x, 0), F.softmax(-x, 0))
 
+    def test_log_softmax(self):
+        x_small = torch.ones(1, 2, dtype=torch.float32)
+        x_big = x_small + 1e16
+        self.assertEqual(F.log_softmax(x_small, -1), F.log_softmax(x_big, -1))
+
     def test_adaptive_log_softmax(self):
         # args validation
         with self.assertRaises(ValueError):