diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index 1b98eb04e0d818..34a7cb5a72a829 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -478,6 +478,8 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
   Value *ExclScan = nullptr;
   Value *NewV = nullptr;
 
+  const bool NeedResult = !I.use_empty();
+
   // If we have a divergent value in each lane, we need to combine the value
   // using DPP.
   if (ValDivergent) {
@@ -488,7 +490,8 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
     const AtomicRMWInst::BinOp ScanOp =
         Op == AtomicRMWInst::Sub ? AtomicRMWInst::Add : Op;
     NewV = buildScan(B, ScanOp, NewV, Identity);
-    ExclScan = buildShiftRight(B, NewV, Identity);
+    if (NeedResult)
+      ExclScan = buildShiftRight(B, NewV, Identity);
 
     // Read the value from the last lane, which has accumlated the values of
     // each active lane in the wavefront. This will be our new value which we
@@ -581,7 +584,6 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
   // original instruction.
   B.SetInsertPoint(&I);
 
-  const bool NeedResult = !I.use_empty();
   if (NeedResult) {
     // Create a PHI node to get our new atomic result into the exit block.
     PHINode *const PHI = B.CreatePHI(Ty, 2);