Skip to content

Commit

Permalink
[InstCombine] Preserve inbounds when folding select of GEP
Browse files Browse the repository at this point in the history
The select base, (gep base, offset) to gep base, select (0, offset)
fold used to drop inbounds, because the gep base, 0 this introduces
might not be inbounds. After the semantics change in D154051, such
a GEP is always considered inbounds, in which allows us to preserve
the flag here.

As the PhaseOrdering test demonstrates, this can result in major
optimization improvements in some cases.

Differential Revision: https://reviews.llvm.org/D154055
  • Loading branch information
nikic committed Jul 7, 2023
1 parent 99074aa commit 336d728
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 24 deletions.
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3400,6 +3400,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
std::swap(NewT, NewF);
Value *NewSI =
Builder.CreateSelect(CondVal, NewT, NewF, SI.getName() + ".idx", &SI);
if (Gep->isInBounds())
return GetElementPtrInst::CreateInBounds(ElementType, Ptr, {NewSI});
return GetElementPtrInst::Create(ElementType, Ptr, {NewSI});
};
if (auto *TrueGep = dyn_cast<GetElementPtrInst>(TrueVal))
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/Transforms/InstCombine/select-gep.ll
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ define ptr @test2a(ptr %p, i64 %x, i64 %y) {
; CHECK-LABEL: @test2a(
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[SELECT_IDX:%.*]] = select i1 [[CMP]], i64 [[X]], i64 0
; CHECK-NEXT: [[SELECT:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[SELECT_IDX]]
; CHECK-NEXT: [[SELECT:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[SELECT_IDX]]
; CHECK-NEXT: ret ptr [[SELECT]]
;
%gep = getelementptr inbounds i32, ptr %p, i64 %x
Expand All @@ -89,7 +89,7 @@ define ptr @test2b(ptr %p, i64 %x, i64 %y) {
; CHECK-LABEL: @test2b(
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[SELECT_IDX:%.*]] = select i1 [[CMP]], i64 0, i64 [[X]]
; CHECK-NEXT: [[SELECT:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[SELECT_IDX]]
; CHECK-NEXT: [[SELECT:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[SELECT_IDX]]
; CHECK-NEXT: ret ptr [[SELECT]]
;
%gep = getelementptr inbounds i32, ptr %p, i64 %x
Expand All @@ -104,7 +104,7 @@ define ptr @test2c(ptr %p, i64 %x, i64 %y) {
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[X:%.*]]
; CHECK-NEXT: [[ICMP:%.*]] = icmp ugt i64 [[X]], [[Y:%.*]]
; CHECK-NEXT: [[SEL_IDX:%.*]] = select i1 [[ICMP]], i64 0, i64 6
; CHECK-NEXT: [[SEL:%.*]] = getelementptr i32, ptr [[GEP1]], i64 [[SEL_IDX]]
; CHECK-NEXT: [[SEL:%.*]] = getelementptr inbounds i32, ptr [[GEP1]], i64 [[SEL_IDX]]
; CHECK-NEXT: ret ptr [[SEL]]
;
%gep1 = getelementptr inbounds i32, ptr %p, i64 %x
Expand All @@ -120,7 +120,7 @@ define ptr @test2d(ptr %p, i64 %x, i64 %y) {
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[X:%.*]]
; CHECK-NEXT: [[ICMP:%.*]] = icmp ugt i64 [[X]], [[Y:%.*]]
; CHECK-NEXT: [[SEL_IDX:%.*]] = select i1 [[ICMP]], i64 6, i64 0
; CHECK-NEXT: [[SEL:%.*]] = getelementptr i32, ptr [[GEP1]], i64 [[SEL_IDX]]
; CHECK-NEXT: [[SEL:%.*]] = getelementptr inbounds i32, ptr [[GEP1]], i64 [[SEL_IDX]]
; CHECK-NEXT: ret ptr [[SEL]]
;
%gep1 = getelementptr inbounds i32, ptr %p, i64 %x
Expand Down Expand Up @@ -231,7 +231,7 @@ define ptr @test6(ptr %p, i64 %x, i64 %y) {
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[X:%.*]]
; CHECK-NEXT: [[ICMP:%.*]] = icmp ugt i64 [[X]], [[Y:%.*]]
; CHECK-NEXT: [[SEL_IDX:%.*]] = select i1 [[ICMP]], i64 [[Y]], i64 0
; CHECK-NEXT: [[SEL:%.*]] = getelementptr i32, ptr [[GEP1]], i64 [[SEL_IDX]]
; CHECK-NEXT: [[SEL:%.*]] = getelementptr inbounds i32, ptr [[GEP1]], i64 [[SEL_IDX]]
; CHECK-NEXT: call void @use_i32p(ptr [[GEP1]])
; CHECK-NEXT: ret ptr [[SEL]]
;
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/InstCombine/stpncpy-1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ define void @fold_stpncpy_overlap(ptr %dst, i64 %n) {
; ANY-NEXT: [[STXNCPY_CHAR0:%.*]] = load i8, ptr [[DST]], align 1
; ANY-NEXT: [[STPNCPY_CHAR0CMP:%.*]] = icmp ne i8 [[STXNCPY_CHAR0]], 0
; ANY-NEXT: [[STPNCPY_SEL_IDX:%.*]] = zext i1 [[STPNCPY_CHAR0CMP]] to i64
; ANY-NEXT: [[STPNCPY_SEL:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STPNCPY_SEL_IDX]]
; ANY-NEXT: call void @sink(ptr nonnull [[DST]], ptr [[STPNCPY_SEL]])
; ANY-NEXT: [[STPNCPY_SEL:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[STPNCPY_SEL_IDX]]
; ANY-NEXT: call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_SEL]])
; ANY-NEXT: ret void
;
; Fold stpncpy(D, D, 0) to just D.
Expand Down Expand Up @@ -398,8 +398,8 @@ define void @fold_stpncpy_s(ptr %dst, ptr %src) {
; ANY-NEXT: store i8 [[STXNCPY_CHAR0]], ptr [[DST]], align 1
; ANY-NEXT: [[STPNCPY_CHAR0CMP:%.*]] = icmp ne i8 [[STXNCPY_CHAR0]], 0
; ANY-NEXT: [[STPNCPY_SEL_IDX:%.*]] = zext i1 [[STPNCPY_CHAR0CMP]] to i64
; ANY-NEXT: [[STPNCPY_SEL:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STPNCPY_SEL_IDX]]
; ANY-NEXT: call void @sink(ptr nonnull [[DST]], ptr [[STPNCPY_SEL]])
; ANY-NEXT: [[STPNCPY_SEL:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[STPNCPY_SEL_IDX]]
; ANY-NEXT: call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_SEL]])
; ANY-NEXT: ret void
;
; Fold stpncpy(D, S, 0) to just D.
Expand Down
17 changes: 2 additions & 15 deletions llvm/test/Transforms/PhaseOrdering/gep-null-compare-in-loop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -52,22 +52,9 @@ bb:

define i32 @using_malloc() {
; CHECK-LABEL: define i32 @using_malloc
; CHECK-SAME: () local_unnamed_addr #[[ATTR1:[0-9]+]] {
; CHECK-SAME: () local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[ALLOC:%.*]] = tail call dereferenceable_or_null(64) ptr @malloc(i64 64)
; CHECK-NEXT: store i32 1, ptr [[ALLOC]], align 4
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr [[ALLOC]], i64 1
; CHECK-NEXT: store i32 2, ptr [[GETELEMENTPTR]], align 4
; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr i32, ptr [[ALLOC]], i64 2
; CHECK-NEXT: store i32 3, ptr [[GETELEMENTPTR1]], align 4
; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = getelementptr i32, ptr [[ALLOC]], i64 1
; CHECK-NEXT: [[LOAD_I:%.*]] = load i32, ptr [[ALLOC]], align 4
; CHECK-NEXT: [[SPEC_SELECT_I_1:%.*]] = getelementptr i32, ptr [[ALLOC]], i64 2
; CHECK-NEXT: [[LOAD_I_1:%.*]] = load i32, ptr [[SPEC_SELECT_I]], align 4
; CHECK-NEXT: [[ADD_I_1:%.*]] = add i32 [[LOAD_I_1]], [[LOAD_I]]
; CHECK-NEXT: [[LOAD_I_2:%.*]] = load i32, ptr [[SPEC_SELECT_I_1]], align 4
; CHECK-NEXT: [[ADD_I_2:%.*]] = add i32 [[LOAD_I_2]], [[ADD_I_1]]
; CHECK-NEXT: ret i32 [[ADD_I_2]]
; CHECK-NEXT: ret i32 6
;
bb:
%alloc = call dereferenceable_or_null(64) ptr @malloc(i64 64)
Expand Down

0 comments on commit 336d728

Please sign in to comment.