Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[InstCombine] Canonicalize the stored value type with inttoptr/ptrtoint #76339

Closed
wants to merge 2 commits into from

Conversation

dtcxzyw
Copy link
Member

@dtcxzyw dtcxzyw commented Dec 24, 2023

This patch folds away the inttoptr/ptrtoint of the stored value by storing the original type. It will enable more optimizations.
An example in z3/src/smt/smt_enode.h:

diff --git a/bench/z3/optimized/smt_enode.cpp.ll b/bench/z3/optimized/smt_enode.cpp.ll
index 6f42a9f4..91bc9b0e 100644
--- a/bench/z3/optimized/smt_enode.cpp.ll
+++ b/bench/z3/optimized/smt_enode.cpp.ll
@@ -4,12 +4,12 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:
 target triple = "x86_64-unknown-linux-gnu"
 
 %"class.std::ios_base::Init" = type { i8 }
-%"class.smt::eq_justification" = type { ptr }
 %"class.smt::enode" = type { ptr, ptr, ptr, ptr, i32, i32, i32, i16, i32, i8, i8, %class.ptr_vector, %class.id_var_list, %"struct.smt::trans_justification", %class.approx_set, %class.approx_set, [0 x ptr] }
 %class.ptr_vector = type { %class.vector }
 %class.vector = type { ptr }
 %class.id_var_list = type { i32, ptr }
 %"struct.smt::trans_justification" = type { ptr, %"class.smt::eq_justification" }
+%"class.smt::eq_justification" = type { ptr }
 %class.approx_set = type { %class.approx_set_tpl }
 %class.approx_set_tpl = type { i64 }
 %class.ast = type { i32, i24, i32, i32 }
@@ -454,7 +454,7 @@ $_ZTI11value_trailI10approx_setE = comdat any
 
 @_ZStL8__ioinit = internal global %"class.std::ios_base::Init" zeroinitializer, align 1
 @__dso_handle = external hidden global i8
-@_ZN3smtL21null_eq_justificationE = internal unnamed_addr global %"class.smt::eq_justification" zeroinitializer, align 8
+@_ZN3smtL21null_eq_justificationE.0 = internal unnamed_addr global i1 false, align 8
 @.str = private unnamed_addr constant [2 x i8] c"#\00", align 1
 @.str.7 = private unnamed_addr constant [8 x i8] c"  ->  #\00", align 1
 @.str.8 = private unnamed_addr constant [9 x i8] c", lbls: \00", align 1
@@ -504,7 +504,8 @@ entry:
   %m_next.i.i = getelementptr inbounds %"class.smt::enode", ptr %mem, i64 0, i32 12, i32 1
   %m_justification.i.i = getelementptr inbounds %"class.smt::enode", ptr %mem, i64 0, i32 13, i32 1
   tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %m_next.i.i, i8 0, i64 16, i1 false)
-  %0 = load i64, ptr @_ZN3smtL21null_eq_justificationE, align 8
+  %.b = load i1, ptr @_ZN3smtL21null_eq_justificationE.0, align 8
+  %0 = select i1 %.b, i64 3, i64 0
   store i64 %0, ptr %m_justification.i.i, align 8
   %m_lbls.i = getelementptr inbounds %"class.smt::enode", ptr %mem, i64 0, i32 14
   tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %m_lbls.i, i8 0, i64 16, i1 false)
@@ -2125,7 +2126,7 @@ define internal void @_GLOBAL__sub_I_smt_enode.cpp() #17 section ".text.startup"
 entry:
   tail call void @_ZNSt8ios_base4InitC1Ev(ptr noundef nonnull align 1 dereferenceable(1) @_ZStL8__ioinit)
   %0 = tail call i32 @__cxa_atexit(ptr nonnull @_ZNSt8ios_base4InitD1Ev, ptr nonnull @_ZStL8__ioinit, ptr nonnull @__dso_handle) #20
-  store ptr inttoptr (i64 3 to ptr), ptr @_ZN3smtL21null_eq_justificationE, align 8
+  store i1 true, ptr @_ZN3smtL21null_eq_justificationE.0, align 8
   ret void
 }

@llvmbot
Copy link
Collaborator

llvmbot commented Dec 24, 2023

@llvm/pr-subscribers-llvm-transforms

Author: Yingwei Zheng (dtcxzyw)

Changes

This patch folds away the inttoptr/ptrtoint of the stored value by storing the original type. It will enable more optimizations.
An example in z3/src/smt/smt_enode.h:

diff --git a/bench/z3/optimized/smt_enode.cpp.ll b/bench/z3/optimized/smt_enode.cpp.ll
index 6f42a9f4..91bc9b0e 100644
--- a/bench/z3/optimized/smt_enode.cpp.ll
+++ b/bench/z3/optimized/smt_enode.cpp.ll
@@ -4,12 +4,12 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:
 target triple = "x86_64-unknown-linux-gnu"
 
 %"class.std::ios_base::Init" = type { i8 }
-%"class.smt::eq_justification" = type { ptr }
 %"class.smt::enode" = type { ptr, ptr, ptr, ptr, i32, i32, i32, i16, i32, i8, i8, %class.ptr_vector, %class.id_var_list, %"struct.smt::trans_justification", %class.approx_set, %class.approx_set, [0 x ptr] }
 %class.ptr_vector = type { %class.vector }
 %class.vector = type { ptr }
 %class.id_var_list = type { i32, ptr }
 %"struct.smt::trans_justification" = type { ptr, %"class.smt::eq_justification" }
+%"class.smt::eq_justification" = type { ptr }
 %class.approx_set = type { %class.approx_set_tpl }
 %class.approx_set_tpl = type { i64 }
 %class.ast = type { i32, i24, i32, i32 }
@@ -454,7 +454,7 @@ $_ZTI11value_trailI10approx_setE = comdat any
 
 @<!-- -->_ZStL8__ioinit = internal global %"class.std::ios_base::Init" zeroinitializer, align 1
 @<!-- -->__dso_handle = external hidden global i8
-@<!-- -->_ZN3smtL21null_eq_justificationE = internal unnamed_addr global %"class.smt::eq_justification" zeroinitializer, align 8
+@<!-- -->_ZN3smtL21null_eq_justificationE.0 = internal unnamed_addr global i1 false, align 8
 @.str = private unnamed_addr constant [2 x i8] c"#\00", align 1
 @.str.7 = private unnamed_addr constant [8 x i8] c"  -&gt;  #\00", align 1
 @.str.8 = private unnamed_addr constant [9 x i8] c", lbls: \00", align 1
@@ -504,7 +504,8 @@ entry:
   %m_next.i.i = getelementptr inbounds %"class.smt::enode", ptr %mem, i64 0, i32 12, i32 1
   %m_justification.i.i = getelementptr inbounds %"class.smt::enode", ptr %mem, i64 0, i32 13, i32 1
   tail call void @<!-- -->llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %m_next.i.i, i8 0, i64 16, i1 false)
-  %0 = load i64, ptr @<!-- -->_ZN3smtL21null_eq_justificationE, align 8
+  %.b = load i1, ptr @<!-- -->_ZN3smtL21null_eq_justificationE.0, align 8
+  %0 = select i1 %.b, i64 3, i64 0
   store i64 %0, ptr %m_justification.i.i, align 8
   %m_lbls.i = getelementptr inbounds %"class.smt::enode", ptr %mem, i64 0, i32 14
   tail call void @<!-- -->llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %m_lbls.i, i8 0, i64 16, i1 false)
@@ -2125,7 +2126,7 @@ define internal void @<!-- -->_GLOBAL__sub_I_smt_enode.cpp() #<!-- -->17 section ".text.startup"
 entry:
   tail call void @<!-- -->_ZNSt8ios_base4InitC1Ev(ptr noundef nonnull align 1 dereferenceable(1) @<!-- -->_ZStL8__ioinit)
   %0 = tail call i32 @<!-- -->__cxa_atexit(ptr nonnull @<!-- -->_ZNSt8ios_base4InitD1Ev, ptr nonnull @<!-- -->_ZStL8__ioinit, ptr nonnull @<!-- -->__dso_handle) #<!-- -->20
-  store ptr inttoptr (i64 3 to ptr), ptr @<!-- -->_ZN3smtL21null_eq_justificationE, align 8
+  store i1 true, ptr @<!-- -->_ZN3smtL21null_eq_justificationE.0, align 8
   ret void
 }

Full diff: https://github.com/llvm/llvm-project/pull/76339.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp (+14)
  • (added) llvm/test/Transforms/InstCombine/store-inttoptr-ptrtoint.ll (+86)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index bb2a77daa60a76..7a5a811c5e0b2e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1209,6 +1209,20 @@ static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI) {
       return true;
     }
 
+  // Fold away inttoptr/ptrtoint of the stored value by storing the original
+  // type.
+  Value *X;
+  if (match(V, m_IntToPtr(m_Value(X))) || match(V, m_PtrToInt(m_Value(X)))) {
+    const DataLayout &DL = IC.getDataLayout();
+    if (DL.getTypeStoreSize(V->getType()->getScalarType()) !=
+        DL.getTypeStoreSize(X->getType()->getScalarType()))
+      return false;
+    if (!SI.isAtomic() || isSupportedAtomicType(X->getType())) {
+      combineStoreToNewValue(IC, SI, X);
+      return true;
+    }
+  }
+
   // FIXME: We should also canonicalize stores of vectors when their elements
   // are cast to other types.
   return false;
diff --git a/llvm/test/Transforms/InstCombine/store-inttoptr-ptrtoint.ll b/llvm/test/Transforms/InstCombine/store-inttoptr-ptrtoint.ll
new file mode 100644
index 00000000000000..8336f5eccd3841
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/store-inttoptr-ptrtoint.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+
+declare void @use64(i64)
+declare void @useptr(ptr)
+define void @test_inttoptr(ptr %p, i64 %x) {
+; CHECK-LABEL: define void @test_inttoptr(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) {
+; CHECK-NEXT:    store i64 [[X]], ptr [[P]], align 8
+; CHECK-NEXT:    ret void
+;
+  %y = inttoptr i64 %x to ptr
+  store ptr %y, ptr %p, align 8
+  ret void
+}
+define void @test_inttoptr_multiuse(ptr %p, i64 %x) {
+; CHECK-LABEL: define void @test_inttoptr_multiuse(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) {
+; CHECK-NEXT:    [[Y:%.*]] = inttoptr i64 [[X]] to ptr
+; CHECK-NEXT:    call void @useptr(ptr [[Y]])
+; CHECK-NEXT:    store i64 [[X]], ptr [[P]], align 8
+; CHECK-NEXT:    ret void
+;
+  %y = inttoptr i64 %x to ptr
+  call void @useptr(ptr %y)
+  store ptr %y, ptr %p, align 8
+  ret void
+}
+define void @test_inttoptr_constant_expr(ptr %p) {
+; CHECK-LABEL: define void @test_inttoptr_constant_expr(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    store i64 65, ptr [[P]], align 8
+; CHECK-NEXT:    ret void
+;
+  store ptr inttoptr (i64 65 to ptr), ptr %p, align 8
+  ret void
+}
+define void @test_ptrtoint(ptr %p, ptr %x) {
+; CHECK-LABEL: define void @test_ptrtoint(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[X:%.*]]) {
+; CHECK-NEXT:    store ptr [[X]], ptr [[P]], align 8
+; CHECK-NEXT:    ret void
+;
+  %y = ptrtoint ptr %x to i64
+  store i64 %y, ptr %p, align 8
+  ret void
+}
+define void @test_ptrtoint_multiuse(ptr %p, ptr %x) {
+; CHECK-LABEL: define void @test_ptrtoint_multiuse(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[X:%.*]]) {
+; CHECK-NEXT:    [[Y:%.*]] = ptrtoint ptr [[X]] to i64
+; CHECK-NEXT:    call void @use64(i64 [[Y]])
+; CHECK-NEXT:    store ptr [[X]], ptr [[P]], align 8
+; CHECK-NEXT:    ret void
+;
+  %y = ptrtoint ptr %x to i64
+  call void @use64(i64 %y)
+  store i64 %y, ptr %p, align 8
+  ret void
+}
+; Negative tests
+define void @test_inttoptr_mismatched_size(ptr %p, i32 %x) {
+; CHECK-LABEL: define void @test_inttoptr_mismatched_size(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[X]] to i64
+; CHECK-NEXT:    store i64 [[TMP1]], ptr [[P]], align 8
+; CHECK-NEXT:    ret void
+;
+  %y = inttoptr i32 %x to ptr
+  store ptr %y, ptr %p, align 8
+  ret void
+}
+define void @test_ptrtoint_mismatched_size(ptr %p, ptr %x) {
+; CHECK-LABEL: define void @test_ptrtoint_mismatched_size(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[X:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[X]] to i64
+; CHECK-NEXT:    [[Y:%.*]] = trunc i64 [[TMP1]] to i32
+; CHECK-NEXT:    store i32 [[Y]], ptr [[P]], align 4
+; CHECK-NEXT:    ret void
+;
+  %y = ptrtoint ptr %x to i32
+  store i32 %y, ptr %p, align 4
+  ret void
+}

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Dec 24, 2023
@nikic
Copy link
Contributor

nikic commented Dec 24, 2023

This looks like the store variant of the load fold we explicitly removed in 544a6aa. Generally we don't want to introduce type-punning between pointers and integers, as the semantics are very unclear (but it's very likely that at least one of these transforms is unsound).

@dtcxzyw
Copy link
Member Author

dtcxzyw commented Dec 24, 2023

Thank you for the explanation!

@dtcxzyw dtcxzyw closed this Dec 24, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants