Skip to content

Commit

Permalink
[AggressiveInstCombine] folding load for constant global patterened a…
Browse files Browse the repository at this point in the history
…rrays and structs by GEP-indices Differential Revision: https://reviews.llvm.org/D146622 Fixes #61615
  • Loading branch information
khei4 committed May 9, 2023
1 parent c096e91 commit 0574a4b
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 42 deletions.
59 changes: 48 additions & 11 deletions llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
Expand Up @@ -821,6 +821,48 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
return true;
}

// Calculate GEP Stride and accumulated const ModOffset. Return Stride and
// ModOffset
static std::pair<APInt, APInt>
getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL) {
unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
std::optional<APInt> Stride;
APInt ModOffset(BW, 0);
// Return a minimum gep stride, greatest common divisor of consective gep
// index scales(c.f. Bézout's identity).
while (auto *GEP = dyn_cast<GEPOperator>(PtrOp)) {
MapVector<Value *, APInt> VarOffsets;
if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset))
break;

for (auto [V, Scale] : VarOffsets) {
// Only keep a power of two factor for non-inbounds
if (!GEP->isInBounds())
Scale = APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());

if (!Stride)
Stride = Scale;
else
Stride = APIntOps::GreatestCommonDivisor(*Stride, Scale);
}

PtrOp = GEP->getPointerOperand();
}

// Check whether pointer arrives back at Global Variable.
// Even if it doesn't, we can check by alignment.
if (!isa<GlobalVariable>(PtrOp))
return {APInt(BW, 1), APInt(BW, 0)};

// In consideration of signed GEP indices, non-negligible offset become
// remainder of division by minimum GEP stride.
ModOffset = ModOffset.srem(*Stride);
if (ModOffset.isNegative())
ModOffset += *Stride;

return {*Stride, ModOffset};
}

/// If C is a constant patterned array and all valid loaded results for given
/// alignment are same to a constant, return that constant.
static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
Expand All @@ -835,29 +877,24 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
return false;

Type *LoadTy = LI->getType();
Constant *C = GV->getInitializer();

// Bail for large initializers in excess of 4K to avoid too many scans.
Constant *C = GV->getInitializer();
uint64_t GVSize = DL.getTypeAllocSize(C->getType());
if (!GVSize || 4096 < GVSize)
return false;

// Check whether pointer arrives back at Global Variable.
// If PtrOp is neither GlobalVariable nor GEP, it might not arrive back at
// GlobalVariable.
// TODO: implement GEP handling
Type *LoadTy = LI->getType();
unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
// TODO: Determine stride based on GEPs.
APInt Stride(BW, 1);
APInt ConstOffset(BW, 0);
auto [Stride, ConstOffset] = getStrideAndModOffsetOfGEP(PtrOp, DL);

// Any possible offset could be multiple of GEP stride. And any valid
// offset is multiple of load alignment, so checking only multiples of bigger
// one is sufficient to say results' equality.
if (auto LA = LI->getAlign();
LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value())
LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) {
ConstOffset = APInt(BW, 0);
Stride = APInt(BW, LA.value());
}

Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL);
if (!Ca)
Expand Down
57 changes: 26 additions & 31 deletions llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll
Expand Up @@ -48,47 +48,50 @@ define i8 @inbounds_gep_load_i8_align2_volatile(i64 %idx){
declare ptr @llvm.ptrmask.p0.i64(ptr , i64)

; can't be folded because ptrmask can change ptr, while preserving provenance
define i8 @inbounds_gep_load_i8_align2_ptrmasked(i64 %idx, i64 %mask){
; CHECK-LABEL: @inbounds_gep_load_i8_align2_ptrmasked(
; CHECK-NEXT: ret i8 1
; This invalidates GEP indices analysis
define i8 @inbounds_gep_load_i16_align1_ptrmasked(i64 %idx, i64 %mask){
; CHECK-LABEL: @inbounds_gep_load_i16_align1_ptrmasked(
; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr @constarray1, i64 [[MASK:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[IDX:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1
; CHECK-NEXT: ret i8 [[TMP3]]
;
%1 = call ptr @llvm.ptrmask.p0.i64(ptr @constarray1, i64 %mask)
%2 = getelementptr inbounds i8, ptr %1, i64 %idx
%3 = load i8, ptr %2, align 2
%2 = getelementptr inbounds i16, ptr %1, i64 %idx
%3 = load i8, ptr %2, align 1
ret i8 %3
}

; TODO: this will be ret i32 65537(LE), 16777472(BE)
define i32 @inbounds_gep_i16_load_i32_align1(i64 %idx){
; CHECK-LABEL: @inbounds_gep_i16_load_i32_align1(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @constarray1, i64 [[IDX:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1
; CHECK-NEXT: ret i32 [[TMP2]]
; LE-LABEL: @inbounds_gep_i16_load_i32_align1(
; LE-NEXT: ret i32 65537
;
; BE-LABEL: @inbounds_gep_i16_load_i32_align1(
; BE-NEXT: ret i32 16777472
;
%1 = getelementptr inbounds i16, ptr @constarray1, i64 %idx
%2 = load i32, ptr %1, align 1
ret i32 %2
}

; TODO: this will be ret i32 65537(LE), 16777472(BE)
define i32 @inbounds_gep_i32_load_i32_align8(i64 %idx){
; CHECK-LABEL: @inbounds_gep_i32_load_i32_align8(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr @constarray1, i64 [[IDX:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8
; CHECK-NEXT: ret i32 [[TMP2]]
; LE-LABEL: @inbounds_gep_i32_load_i32_align8(
; LE-NEXT: ret i32 65537
;
; BE-LABEL: @inbounds_gep_i32_load_i32_align8(
; BE-NEXT: ret i32 16777472
;
%1 = getelementptr inbounds i32, ptr @constarray1, i64 %idx
%2 = load i32, ptr %1, align 8
ret i32 %2
}

; TODO: this will be ret i32 65547(LE), 16777472(BE)
define i32 @inbounds_gep_i32_load_i32_const_offset(i64 %idx){
; CHECK-LABEL: @inbounds_gep_i32_load_i32_const_offset(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @constarray2, i64 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDX:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: ret i32 [[TMP3]]
; LE-LABEL: @inbounds_gep_i32_load_i32_const_offset(
; LE-NEXT: ret i32 65537
;
; BE-LABEL: @inbounds_gep_i32_load_i32_const_offset(
; BE-NEXT: ret i32 16777472
;
%1 = getelementptr inbounds i16, ptr @constarray2, i64 1
%2 = getelementptr inbounds i32, ptr %1, i64 %idx
Expand Down Expand Up @@ -125,13 +128,9 @@ define i32 @gep_load_i32_align2_const_offset_wrap(i64 %idx){
ret i32 %3
}

; TODO: this will be ret i32 42
define i32 @inbounds_gep_i32_load_i32_const_ptr_array(i64 %idx){
; CHECK-LABEL: @inbounds_gep_i32_load_i32_const_ptr_array(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr @constptrarray, i64 [[IDX:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: ret i32 [[TMP3]]
; CHECK-NEXT: ret i32 42
;
%1 = getelementptr inbounds ptr, ptr @constptrarray, i64 %idx
%2 = load ptr, ptr %1, align 4
Expand Down Expand Up @@ -163,16 +162,12 @@ define i32 @inbounds_gep_i8_load_i32_align1_packedstruct(i64 %idx){
ret i32 %2
}

; TODO: this coould be folded into 65537(LE), 16777472(BE)
define i32 @inbounds_gep_i32_load_i32_align4_struct_with_const_offset(i64 %idx){
; LE-LABEL: @inbounds_gep_i32_load_i32_align4_struct_with_const_offset(
; LE-NEXT: ret i32 65537
;
; BE-LABEL: @inbounds_gep_i32_load_i32_align4_struct_with_const_offset(
; BE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @conststruct, i64 1
; BE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDX:%.*]]
; BE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
; BE-NEXT: ret i32 [[TMP3]]
; BE-NEXT: ret i32 16777472
;
%1 = getelementptr inbounds i16, ptr @conststruct, i64 1
%2 = getelementptr inbounds i32, ptr %1, i64 %idx
Expand Down

0 comments on commit 0574a4b

Please sign in to comment.