Skip to content

Commit 349523e

Browse files
authored
[InstCombine] Merge constant offset geps across variable geps (#156326)
Fold: %gep1 = ptradd %p, C1 %gep2 = ptradd %gep1, %x %res = ptradd %gep2, C2 To: %gep = ptradd %gep, %x %res = ptradd %gep, C1+C2 An alternative to this would be to generally canonicalize constant offset GEPs to the right. I found the results of doing that somewhat mixed, so I'm going for this more obviously beneficial change for now. Proof for flag preservation on reassociation: https://alive2.llvm.org/ce/z/gmpAMg
1 parent d0d79fd commit 349523e

File tree

4 files changed

+213
-14
lines changed

4 files changed

+213
-14
lines changed

llvm/include/llvm/IR/GEPNoWrapFlags.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,16 @@ class GEPNoWrapFlags {
8484
return Res;
8585
}
8686

87+
/// Given (gep (gep p, x), y), determine the nowrap flags for
88+
/// (gep (gep, p, y), x).
89+
GEPNoWrapFlags intersectForReassociate(GEPNoWrapFlags Other) const {
90+
GEPNoWrapFlags Res = *this & Other;
91+
// We can only preserve inbounds and nusw if nuw is also set.
92+
if (!Res.hasNoUnsignedWrap())
93+
return none();
94+
return Res;
95+
}
96+
8797
bool operator==(GEPNoWrapFlags Other) const { return Flags == Other.Flags; }
8898
bool operator!=(GEPNoWrapFlags Other) const { return !(*this == Other); }
8999

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2674,6 +2674,62 @@ static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP,
26742674
return nullptr;
26752675
}
26762676

2677+
/// Combine constant offsets separated by variable offsets.
2678+
/// ptradd (ptradd (ptradd p, C1), x), C2 -> ptradd (ptradd p, x), C1+C2
2679+
static Instruction *combineConstantOffsets(GetElementPtrInst &GEP,
2680+
InstCombinerImpl &IC) {
2681+
if (!GEP.hasAllConstantIndices())
2682+
return nullptr;
2683+
2684+
GEPNoWrapFlags NW = GEPNoWrapFlags::all();
2685+
SmallVector<GetElementPtrInst *> Skipped;
2686+
auto *InnerGEP = dyn_cast<GetElementPtrInst>(GEP.getPointerOperand());
2687+
while (true) {
2688+
if (!InnerGEP)
2689+
return nullptr;
2690+
2691+
NW = NW.intersectForReassociate(InnerGEP->getNoWrapFlags());
2692+
if (InnerGEP->hasAllConstantIndices())
2693+
break;
2694+
2695+
if (!InnerGEP->hasOneUse())
2696+
return nullptr;
2697+
2698+
Skipped.push_back(InnerGEP);
2699+
InnerGEP = dyn_cast<GetElementPtrInst>(InnerGEP->getPointerOperand());
2700+
}
2701+
2702+
// The two constant offset GEPs are directly adjacent: Let normal offset
2703+
// merging handle it.
2704+
if (Skipped.empty())
2705+
return nullptr;
2706+
2707+
// FIXME: This one-use check is not strictly necessary. Consider relaxing it
2708+
// if profitable.
2709+
if (!InnerGEP->hasOneUse())
2710+
return nullptr;
2711+
2712+
// Don't bother with vector splats.
2713+
Type *Ty = GEP.getType();
2714+
if (InnerGEP->getType() != Ty)
2715+
return nullptr;
2716+
2717+
const DataLayout &DL = IC.getDataLayout();
2718+
APInt Offset(DL.getIndexTypeSizeInBits(Ty), 0);
2719+
if (!GEP.accumulateConstantOffset(DL, Offset) ||
2720+
!InnerGEP->accumulateConstantOffset(DL, Offset))
2721+
return nullptr;
2722+
2723+
IC.replaceOperand(*Skipped.back(), 0, InnerGEP->getPointerOperand());
2724+
for (GetElementPtrInst *SkippedGEP : Skipped)
2725+
SkippedGEP->setNoWrapFlags(NW);
2726+
2727+
return IC.replaceInstUsesWith(
2728+
GEP,
2729+
IC.Builder.CreatePtrAdd(Skipped.front(), IC.Builder.getInt(Offset), "",
2730+
NW.intersectForOffsetAdd(GEP.getNoWrapFlags())));
2731+
}
2732+
26772733
Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
26782734
GEPOperator *Src) {
26792735
// Combine Indices - If the source pointer to this getelementptr instruction
@@ -2685,6 +2741,9 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
26852741
if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, *this))
26862742
return I;
26872743

2744+
if (auto *I = combineConstantOffsets(GEP, *this))
2745+
return I;
2746+
26882747
// For constant GEPs, use a more general offset-based folding approach.
26892748
Type *PtrTy = Src->getType()->getScalarType();
26902749
if (GEP.hasAllConstantIndices() &&

llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll

Lines changed: 143 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,9 @@ define ptr @partialConstant2(ptr %p, i64 %a, i64 %b) {
5050
; result = ((ptr) p + a) + 3
5151
define ptr @merge(ptr %p, i64 %a) {
5252
; CHECK-LABEL: @merge(
53-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 4
54-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
55-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8
56-
; CHECK-NEXT: ret ptr [[TMP3]]
53+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
54+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 12
55+
; CHECK-NEXT: ret ptr [[TMP2]]
5756
;
5857
%1 = getelementptr inbounds i32, ptr %p, i64 1
5958
%2 = getelementptr inbounds i32, ptr %1, i64 %a
@@ -67,13 +66,11 @@ define ptr @merge(ptr %p, i64 %a) {
6766
; result = (ptr) ((ptr) ((ptr) ptr + a) + (a * b)) + 9
6867
define ptr @nested(ptr %p, i64 %a, i64 %b) {
6968
; CHECK-LABEL: @nested(
70-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 16
71-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]]
72-
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[A]], [[B:%.*]]
73-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 128
74-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i64 [[TMP3]]
75-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 16
76-
; CHECK-NEXT: ret ptr [[TMP6]]
69+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[A:%.*]]
70+
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[A]], [[B:%.*]]
71+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP1]], i64 [[TMP2]]
72+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP3]], i64 160
73+
; CHECK-NEXT: ret ptr [[TMP4]]
7774
;
7875
%1 = getelementptr inbounds <3 x i32>, ptr %p, i64 1
7976
%2 = getelementptr inbounds i8, ptr %1, i64 %a
@@ -125,3 +122,138 @@ define ptr @multipleUses3(ptr %p) {
125122
%3 = getelementptr inbounds i32, ptr %1, i64 %2
126123
ret ptr %3
127124
}
125+
126+
define ptr @merge_nuw(ptr %p, i64 %a) {
127+
; CHECK-LABEL: @merge_nuw(
128+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[A:%.*]]
129+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nuw i8, ptr [[GEP2]], i64 5
130+
; CHECK-NEXT: ret ptr [[GEP3]]
131+
;
132+
%gep1 = getelementptr nuw i8, ptr %p, i64 1
133+
%gep2 = getelementptr nuw i32, ptr %gep1, i64 %a
134+
%gep3 = getelementptr nuw i32, ptr %gep2, i64 1
135+
ret ptr %gep3
136+
}
137+
138+
define ptr @merge_nuw_inbounds(ptr %p, i64 %a) {
139+
; CHECK-LABEL: @merge_nuw_inbounds(
140+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[A:%.*]]
141+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP2]], i64 5
142+
; CHECK-NEXT: ret ptr [[GEP3]]
143+
;
144+
%gep1 = getelementptr inbounds nuw i8, ptr %p, i64 1
145+
%gep2 = getelementptr inbounds nuw i32, ptr %gep1, i64 %a
146+
%gep3 = getelementptr inbounds nuw i32, ptr %gep2, i64 1
147+
ret ptr %gep3
148+
}
149+
150+
; It would be okay to preserve nusw here, as the constant addition does not
151+
; overflow.
152+
define ptr @merge_nuw_nusw(ptr %p, i64 %a) {
153+
; CHECK-LABEL: @merge_nuw_nusw(
154+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[A:%.*]]
155+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nuw i8, ptr [[GEP2]], i64 5
156+
; CHECK-NEXT: ret ptr [[GEP3]]
157+
;
158+
%gep1 = getelementptr nusw nuw i8, ptr %p, i64 1
159+
%gep2 = getelementptr nusw nuw i32, ptr %gep1, i64 %a
160+
%gep3 = getelementptr nusw nuw i32, ptr %gep2, i64 1
161+
ret ptr %gep3
162+
}
163+
164+
; Can't preserve nusw on the final GEP
165+
define ptr @merge_nuw_nusw_overflow(ptr %p, i64 %a) {
166+
; CHECK-LABEL: @merge_nuw_nusw_overflow(
167+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[A:%.*]]
168+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nuw i8, ptr [[GEP2]], i64 -2305843009213693952
169+
; CHECK-NEXT: ret ptr [[GEP3]]
170+
;
171+
%gep1 = getelementptr nusw nuw i8, ptr %p, i64 u0x7000000000000000
172+
%gep2 = getelementptr nusw nuw i32, ptr %gep1, i64 %a
173+
%gep3 = getelementptr nusw nuw i8, ptr %gep2, i64 u0x7000000000000000
174+
ret ptr %gep3
175+
}
176+
177+
define ptr @merge_missing_nuw1(ptr %p, i64 %a) {
178+
; CHECK-LABEL: @merge_missing_nuw1(
179+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
180+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5
181+
; CHECK-NEXT: ret ptr [[GEP3]]
182+
;
183+
%gep1 = getelementptr i8, ptr %p, i64 1
184+
%gep2 = getelementptr nuw i32, ptr %gep1, i64 %a
185+
%gep3 = getelementptr nuw i32, ptr %gep2, i64 1
186+
ret ptr %gep3
187+
}
188+
189+
define ptr @merge_missing_nuw2(ptr %p, i64 %a) {
190+
; CHECK-LABEL: @merge_missing_nuw2(
191+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
192+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5
193+
; CHECK-NEXT: ret ptr [[GEP3]]
194+
;
195+
%gep1 = getelementptr nuw i8, ptr %p, i64 1
196+
%gep2 = getelementptr i32, ptr %gep1, i64 %a
197+
%gep3 = getelementptr nuw i32, ptr %gep2, i64 1
198+
ret ptr %gep3
199+
}
200+
201+
define ptr @merge_missing_nuw3(ptr %p, i64 %a) {
202+
; CHECK-LABEL: @merge_missing_nuw3(
203+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[A:%.*]]
204+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5
205+
; CHECK-NEXT: ret ptr [[GEP3]]
206+
;
207+
%gep1 = getelementptr nuw i8, ptr %p, i64 1
208+
%gep2 = getelementptr nuw i32, ptr %gep1, i64 %a
209+
%gep3 = getelementptr i32, ptr %gep2, i64 1
210+
ret ptr %gep3
211+
}
212+
213+
define ptr @merge_nuw_missing_inbounds(ptr %p, i64 %a) {
214+
; CHECK-LABEL: @merge_nuw_missing_inbounds(
215+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[A:%.*]]
216+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nuw i8, ptr [[GEP2]], i64 5
217+
; CHECK-NEXT: ret ptr [[GEP3]]
218+
;
219+
%gep1 = getelementptr nuw i8, ptr %p, i64 1
220+
%gep2 = getelementptr inbounds nuw i32, ptr %gep1, i64 %a
221+
%gep3 = getelementptr inbounds nuw i32, ptr %gep2, i64 1
222+
ret ptr %gep3
223+
}
224+
225+
define ptr @merge_nuw_missing_nusw(ptr %p, i64 %a) {
226+
; CHECK-LABEL: @merge_nuw_missing_nusw(
227+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[A:%.*]]
228+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nuw i8, ptr [[GEP2]], i64 5
229+
; CHECK-NEXT: ret ptr [[GEP3]]
230+
;
231+
%gep1 = getelementptr nusw nuw i8, ptr %p, i64 1
232+
%gep2 = getelementptr nuw i32, ptr %gep1, i64 %a
233+
%gep3 = getelementptr nusw nuw i32, ptr %gep2, i64 1
234+
ret ptr %gep3
235+
}
236+
237+
define ptr @merge_inbounds_missing_nuw(ptr %p, i64 %a) {
238+
; CHECK-LABEL: @merge_inbounds_missing_nuw(
239+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
240+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5
241+
; CHECK-NEXT: ret ptr [[GEP3]]
242+
;
243+
%gep1 = getelementptr inbounds nuw i8, ptr %p, i64 1
244+
%gep2 = getelementptr inbounds i32, ptr %gep1, i64 %a
245+
%gep3 = getelementptr inbounds nuw i32, ptr %gep2, i64 1
246+
ret ptr %gep3
247+
}
248+
249+
define ptr @merge_nusw_missing_nuw(ptr %p, i64 %a) {
250+
; CHECK-LABEL: @merge_nusw_missing_nuw(
251+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
252+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5
253+
; CHECK-NEXT: ret ptr [[GEP3]]
254+
;
255+
%gep1 = getelementptr nusw nuw i8, ptr %p, i64 1
256+
%gep2 = getelementptr nusw i32, ptr %gep1, i64 %a
257+
%gep3 = getelementptr nusw nuw i32, ptr %gep2, i64 1
258+
ret ptr %gep3
259+
}

llvm/test/Transforms/InstCombine/gepofconstgepi8.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,8 @@ define ptr @test_zero_sext_add_nsw(ptr %base, i32 %a) {
8585
; CHECK-LABEL: define ptr @test_zero_sext_add_nsw(
8686
; CHECK-SAME: ptr [[BASE:%.*]], i32 [[A:%.*]]) {
8787
; CHECK-NEXT: entry:
88-
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
8988
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[A]] to i64
90-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P1]], i64 [[TMP0]]
91-
; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4
89+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[TMP0]]
9290
; CHECK-NEXT: ret ptr [[P2]]
9391
;
9492
entry:

0 commit comments

Comments
 (0)