[LICM] Disallow sinking of unordered atomic loads into loops

Sinking of unordered atomic load into loop must be disallowed because it turns a single load into multiple loads. The relevant section of the documentation is: http://llvm.org/docs/Atomics.html#unordered, specifically the Notes for Optimizers section. Here is the full text of this section: > Notes for optimizers > In terms of the optimizer, this **prohibits any transformation that > transforms a single load into multiple loads**, transforms a store into > multiple stores, narrows a store, or stores a value which would not be > stored otherwise. Some examples of unsafe optimizations are narrowing > an assignment into a bitfield, rematerializing a load, and turning loads > and stores into a memcpy call. Reordering unordered operations is safe, > though, and optimizers should take advantage of that because unordered > operations are common in languages that need them. Patch by Daniil Suchkov! Reviewed By: reames Differential Revision: https://reviews.llvm.org/D38392 llvm-svn: 315438
llvm · Oct 11, 2017 · 0c8dd05 · 0c8dd05
1 parent 25d8655
commit 0c8dd05
Show file tree

Hide file tree

Showing 2 changed files with 104 additions and 5 deletions.
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -577,10 +577,13 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                               Loop *CurLoop, AliasSetTracker *CurAST,
                               LoopSafetyInfo *SafetyInfo,
                               OptimizationRemarkEmitter *ORE) {
+  // SafetyInfo is nullptr if we are checking for sinking from preheader to
+  // loop body.
+  const bool SinkingToLoopBody = !SafetyInfo;
   // Loads have extra constraints we have to verify before we can hoist them.
   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
     if (!LI->isUnordered())
-      return false; // Don't hoist volatile/atomic loads!
+      return false; // Don't sink/hoist volatile or ordered atomic loads!
 
     // Loads from constant memory are always safe to move, even if they end up
     // in the same alias set as something that ends up being modified.
@@ -589,6 +592,9 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
     if (LI->getMetadata(LLVMContext::MD_invariant_load))
       return true;
 
+    if (LI->isAtomic() && SinkingToLoopBody)
+      return false; // Don't sink unordered atomic loads to loop body.
+
     // This checks for an invariant.start dominating the load.
     if (isLoadInvariantInLoop(LI, DT, CurLoop))
       return true;
@@ -664,9 +670,9 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
       !isa<InsertValueInst>(I))
     return false;
 
-  // SafetyInfo is nullptr if we are checking for sinking from preheader to
-  // loop body. It will be always safe as there is no speculative execution.
-  if (!SafetyInfo)
+  // If we are checking for sinking from preheader to loop body it will be
+  // always safe as there is no speculative execution.
+  if (SinkingToLoopBody)
     return true;
 
   // TODO: Plumb the context instruction through to make hoisting and sinking

diff --git a/llvm/test/Transforms/LICM/loopsink.ll b/llvm/test/Transforms/LICM/loopsink.ll
@@ -1,5 +1,5 @@
 ; RUN: opt -S -loop-sink < %s | FileCheck %s
-; RUN: opt -S -passes=loop-sink < %s | FileCheck %s
+; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
 
 @g = global i32 0, align 4
 
@@ -280,6 +280,99 @@ define i32 @t5(i32, i32*) #0 !prof !0 {
   ret i32 10
 }
 
+;     b1
+;    /  \
+;   b2  b3
+;    \  /
+;     b4
+; preheader: 1000
+; b2: 15
+; b3: 7
+; Do not sink unordered atomic load to b2
+; CHECK: t6
+; CHECK: .preheader:
+; CHECK:  load atomic i32, i32* @g unordered, align 4
+; CHECK: .b2:
+; CHECK-NOT: load atomic i32, i32* @g unordered, align 4
+define i32 @t6(i32, i32) #0 !prof !0 {
+  %3 = icmp eq i32 %1, 0
+  br i1 %3, label %.exit, label %.preheader
+
+.preheader:
+  %invariant = load atomic i32, i32* @g unordered, align 4
+  br label %.b1
+
+.b1:
+  %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ]
+  %c1 = icmp sgt i32 %iv, %0
+  br i1 %c1, label %.b2, label %.b3, !prof !1
+
+.b2:
+  %t1 = add nsw i32 %invariant, %iv
+  br label %.b4
+
+.b3:
+  %t2 = add nsw i32 %iv, 100
+  br label %.b4
+
+.b4:
+  %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ]
+  %t3 = add nuw nsw i32 %iv, 1
+  %c2 = icmp eq i32 %t3, %p1
+  br i1 %c2, label %.b1, label %.exit, !prof !3
+
+.exit:
+  ret i32 10
+}
+
+@g_const = constant i32 0, align 4
+
+;     b1
+;    /  \
+;   b2  b3
+;    \  /
+;     b4
+; preheader: 1000
+; b2: 0.5
+; b3: 999.5
+; Sink unordered atomic load to b2. It is allowed to sink into loop unordered
+; load from constant.
+; CHECK: t7
+; CHECK: .preheader:
+; CHECK-NOT:  load atomic i32, i32* @g_const unordered, align 4
+; CHECK: .b2:
+; CHECK: load atomic i32, i32* @g_const unordered, align 4
+define i32 @t7(i32, i32) #0 !prof !0 {
+  %3 = icmp eq i32 %1, 0
+  br i1 %3, label %.exit, label %.preheader
+
+.preheader:
+  %invariant = load atomic i32, i32* @g_const unordered, align 4
+  br label %.b1
+
+.b1:
+  %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ]
+  %c1 = icmp sgt i32 %iv, %0
+  br i1 %c1, label %.b2, label %.b3, !prof !1
+
+.b2:
+  %t1 = add nsw i32 %invariant, %iv
+  br label %.b4
+
+.b3:
+  %t2 = add nsw i32 %iv, 100
+  br label %.b4
+
+.b4:
+  %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ]
+  %t3 = add nuw nsw i32 %iv, 1
+  %c2 = icmp eq i32 %t3, %p1
+  br i1 %c2, label %.b1, label %.exit, !prof !3
+
+.exit:
+  ret i32 10
+}
+
 declare i32 @foo()
 
 !0 = !{!"function_entry_count", i64 1}