Skip to content

Conversation

nikic
Copy link
Contributor

@nikic nikic commented Apr 14, 2025

Backport of 716b02d, with conflicts in the test resolved.

Optimization of alloca instructions may lead to invalid alias tags.
Incorrect alias tags can result in incorrect optimization outcomes for
Fortran source code compiled by Flang with flags: `-O3 -mmlir
-local-alloc-tbaa -flto`.

This commit removes alias tags when memcpy optimization replaces two
arrays with one array, thus ensuring correct compilation of Fortran
source code using flags: `-O3 -mmlir -local-alloc-tbaa -flto`.

This commit is also a proposal to fix the reported issue:
llvm#133984

---------

Co-authored-by: Shilei Tian <i@tianshilei.me>
(cherry picked from commit 716b02d)
@llvmbot
Copy link
Member

llvmbot commented Apr 14, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Nikita Popov (nikic)

Changes

Backport of 716b02d, with conflicts in the test resolved.


Full diff: https://github.com/llvm/llvm-project/pull/135615.diff

3 Files Affected:

  • (modified) llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp (+12-7)
  • (added) llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll (+77)
  • (modified) llvm/test/Transforms/MemCpyOpt/stack-move.ll (+5-5)
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 971d6012f6129..9202c341da92e 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1518,7 +1518,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
   // to remove them.
 
   SmallVector<Instruction *, 4> LifetimeMarkers;
-  SmallSet<Instruction *, 4> NoAliasInstrs;
+  SmallSet<Instruction *, 4> AAMetadataInstrs;
   bool SrcNotDom = false;
 
   // Recursively track the user and check whether modified alias exist.
@@ -1573,8 +1573,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
               continue;
             }
           }
-          if (UI->hasMetadata(LLVMContext::MD_noalias))
-            NoAliasInstrs.insert(UI);
+          AAMetadataInstrs.insert(UI);
+
           if (!ModRefCallback(UI))
             return false;
         }
@@ -1679,11 +1679,16 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
   }
 
   // As this transformation can cause memory accesses that didn't previously
-  // alias to begin to alias one another, we remove !noalias metadata from any
-  // uses of either alloca. This is conservative, but more precision doesn't
-  // seem worthwhile right now.
-  for (Instruction *I : NoAliasInstrs)
+  // alias to begin to alias one another, we remove !alias.scope, !noalias,
+  // !tbaa and !tbaa_struct metadata from any uses of either alloca.
+  // This is conservative, but more precision doesn't seem worthwhile
+  // right now.
+  for (Instruction *I : AAMetadataInstrs) {
+    I->setMetadata(LLVMContext::MD_alias_scope, nullptr);
     I->setMetadata(LLVMContext::MD_noalias, nullptr);
+    I->setMetadata(LLVMContext::MD_tbaa, nullptr);
+    I->setMetadata(LLVMContext::MD_tbaa_struct, nullptr);
+  }
 
   LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
   NumStackMove++;
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
new file mode 100644
index 0000000000000..6e446e5ff267c
--- /dev/null
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=memcpyopt,dse -S -verify-memoryssa | FileCheck %s
+
+define void @test() local_unnamed_addr {
+; CHECK-LABEL: define void @test() local_unnamed_addr {
+; CHECK-NEXT:    [[TEST_ARRAY_B:%.*]] = alloca [31 x float], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 1
+; CHECK-NEXT:    store float 0x3E6AA51880000000, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
+; CHECK-NEXT:    ret void
+;
+  %test_array_a = alloca [31 x float], align 4
+  %test_array_b = alloca [31 x float], align 4
+  %1 = getelementptr float, ptr %test_array_b, i64 1
+  store float 0x3E6AA51880000000, ptr %1, align 4, !tbaa !4
+  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(124) %test_array_a, ptr noundef nonnull align 4 dereferenceable(124) %test_array_b, i64 124, i1 false)
+  %2 = getelementptr float, ptr %test_array_a, i64 1
+  %3 = load float, ptr %2, align 4, !tbaa !7
+  ret void
+}
+
+%struct.Outer = type { float, double, %struct.Inner }
+%struct.Inner = type { i32, float }
+
+; Function Attrs: nounwind uwtable
+define dso_local float @f() {
+; CHECK-LABEL: define dso_local float @f() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TEST1:%.*]] = alloca [[STRUCT_OUTER:%.*]], align 8
+; CHECK-NEXT:    [[F:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
+; CHECK-NEXT:    store float 0.000000e+00, ptr [[F]], align 8
+; CHECK-NEXT:    [[F1:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F1]], align 8
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], 2.000000e+00
+; CHECK-NEXT:    store float [[ADD]], ptr [[F1]], align 8
+; CHECK-NEXT:    [[F2:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F2]], align 8
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+entry:
+  %test = alloca %struct.Outer, align 8
+  %test1 = alloca %struct.Outer, align 8
+  %f = getelementptr inbounds nuw %struct.Outer, ptr %test1, i32 0, i32 0
+  store float 0.000000e+00, ptr %f, align 8, !tbaa !9
+  %inner_a = getelementptr inbounds nuw %struct.Outer, ptr %test1, i32 0, i32 2
+  %i = getelementptr inbounds nuw %struct.Inner, ptr %inner_a, i32 0, i32 0
+  store i32 0, ptr %i, align 8, !tbaa !17
+  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %test, ptr align 8 %test1, i64 24, i1 false)
+  %f1 = getelementptr inbounds nuw %struct.Outer, ptr %test, i32 0, i32 0
+  %0 = load float, ptr %f1, align 8, !tbaa !9
+  %add = fadd float %0, 2.000000e+00
+  store float %add, ptr %f1, align 8, !tbaa !9
+  %f2 = getelementptr inbounds nuw %struct.Outer, ptr %test, i32 0, i32 0
+  %1 = load float, ptr %f2, align 8, !tbaa !9
+  ret float %1
+}
+
+!1 = !{!"any data access", !2, i64 0}
+!2 = !{!"any access", !3, i64 0}
+!3 = !{!"Flang function root test"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"allocated data/test_array_a", !6, i64 0}
+!6 = !{!"allocated data", !1, i64 0}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"allocated data/test_array_b", !6, i64 0}
+!9 = !{!10, !11, i64 0}
+!10 = !{!"Outer", !11, i64 0, !14, i64 8, !15, i64 16}
+!11 = !{!"float", !12, i64 0}
+!12 = !{!"omnipotent char", !13, i64 0}
+!13 = !{!"Simple C/C++ TBAA"}
+!14 = !{!"double", !12, i64 0}
+!15 = !{!"Inner", !16, i64 0, !11, i64 4}
+!16 = !{!"int", !12, i64 0}
+!17 = !{!10, !16, i64 16}
+
+
diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
index 6089c0a4d7cf5..5ff6f01021208 100644
--- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
@@ -259,7 +259,7 @@ define void @remove_scoped_noalias() {
 ; CHECK-LABEL: define void @remove_scoped_noalias() {
 ; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
 ; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
 ; CHECK-NEXT:    ret void
 ;
@@ -283,7 +283,7 @@ define void @remove_alloca_metadata() {
 ; CHECK-LABEL: define void @remove_alloca_metadata() {
 ; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
 ; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
 ; CHECK-NEXT:    ret void
 ;
@@ -308,7 +308,7 @@ define void @noalias_on_lifetime() {
 ; CHECK-LABEL: define void @noalias_on_lifetime() {
 ; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
 ; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
 ; CHECK-NEXT:    ret void
 ;
@@ -399,10 +399,10 @@ define void @terminator_lastuse() personality i32 0 {
 ; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
 ; CHECK-NEXT:    [[RV:%.*]] = invoke i32 @use_nocapture(ptr [[SRC]])
-; CHECK-NEXT:    to label [[SUC:%.*]] unwind label [[UNW:%.*]]
+; CHECK-NEXT:            to label [[SUC:%.*]] unwind label [[UNW:%.*]]
 ; CHECK:       unw:
 ; CHECK-NEXT:    [[LP:%.*]] = landingpad i32
-; CHECK-NEXT:    cleanup
+; CHECK-NEXT:            cleanup
 ; CHECK-NEXT:    resume i32 0
 ; CHECK:       suc:
 ; CHECK-NEXT:    ret void

@nikic nikic moved this from Needs Triage to Needs Review in LLVM Release Status Apr 14, 2025
@nikic nikic requested a review from DominikAdamski April 14, 2025 12:14
@tstellar tstellar moved this from Needs Review to Needs Merge in LLVM Release Status Apr 14, 2025
@tstellar
Copy link
Collaborator

@nikic Was an approver for the PR in main.

@tstellar
Copy link
Collaborator

Merged: 2131242

@tstellar tstellar closed this Apr 15, 2025
@tstellar tstellar moved this from Needs Merge to Done in LLVM Release Status Apr 15, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
Development

Successfully merging this pull request may close these issues.

4 participants