Skip to content

Commit

Permalink
[InstCombine] Canonicalize GEP of GEP by swapping constant-indexed GE…
Browse files Browse the repository at this point in the history
…P to the back

Canonicalize GEP of GEP by swapping GEP with some suffix constant indices to the back (and GEP with all constant indices to the back of that), this allows more constant index GEP merging to happen. Exceptions are: If swapping violates use-def relations, or anti-optimizes LICM

For constant indexed GEP of GEP, if they cannot be merged directly, they will be casted to i8* and merged.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D125845
  • Loading branch information
huangjd committed Oct 20, 2022
1 parent 93f1b48 commit 6c767ce
Show file tree
Hide file tree
Showing 7 changed files with 141 additions and 79 deletions.
45 changes: 39 additions & 6 deletions llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Expand Up @@ -1943,6 +1943,14 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
return nullptr;

// LICM moves a GEP with constant indices to the front, while canonicalization
// swaps it to the back of a non-constant GEP. If both transformations can be
// applied, LICM takes priority because it generally provides greater
// optimization by reducing instruction count in the loop body, but performing
// canonicalization swapping first negates the LICM opportunity while it does
// not necessarily reduce instruction count.
bool ShouldCanonicalizeSwap = true;

if (Src->getResultElementType() == GEP.getSourceElementType() &&
Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 &&
Src->hasOneUse()) {
Expand All @@ -1952,6 +1960,12 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
if (LI) {
// Try to reassociate loop invariant GEP chains to enable LICM.
if (Loop *L = LI->getLoopFor(GEP.getParent())) {
// If SO1 is invariant and GO1 is variant, they should not be swapped by
// canonicalization even if it can be applied, otherwise it triggers
// LICM swapping in the next iteration, causing an infinite loop.
if (!L->isLoopInvariant(GO1) && L->isLoopInvariant(SO1))
ShouldCanonicalizeSwap = false;

// Reassociate the two GEPs if SO1 is variant in the loop and GO1 is
// invariant: this breaks the dependence between GEPs and allows LICM
// to hoist the invariant part out of the loop.
Expand All @@ -1976,12 +1990,31 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
}
}

// Note that if our source is a gep chain itself then we wait for that
// chain to be resolved before we perform this transformation. This
// avoids us creating a TON of code in some cases.
if (auto *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0)))
if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
return nullptr; // Wait until our source is folded to completion.
// Canonicalize swapping. Swap GEP with constant index suffix to the back if
// it doesn't violate def-use relations or contradict with loop invariant
// swap above. This allows more potential applications of constant-indexed GEP
// optimizations below.
if (ShouldCanonicalizeSwap && Src->hasOneUse() &&
Src->getPointerOperandType() == GEP.getPointerOperandType() &&
Src->getType()->isVectorTy() == GEP.getType()->isVectorTy() &&
!isa<GlobalValue>(Src->getPointerOperand())) {
// When swapping, GEP with all constant indices are more prioritized than
// GEP with only the last few indices (but not all) being constant because
// it may be merged with GEP with all constant indices.
if ((isa<ConstantInt>(*(Src->indices().end() - 1)) &&
!isa<ConstantInt>(*(GEP.indices().end() - 1))) ||
(Src->hasAllConstantIndices() && !GEP.hasAllConstantIndices())) {
// Cannot guarantee inbounds after swapping because the non-const GEP can
// have arbitrary sign.
Value *NewSrc = Builder.CreateGEP(
GEP.getSourceElementType(), Src->getOperand(0),
SmallVector<Value *>(GEP.indices()), Src->getName());
GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
Src->getSourceElementType(), NewSrc,
SmallVector<Value *>(Src->indices()), GEP.getName());
return NewGEP;
}
}

// For constant GEPs, use a more general offset-based folding approach.
// Only do this for opaque pointers, as the result element type may change.
Expand Down
112 changes: 66 additions & 46 deletions llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
@@ -1,21 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -opaque-pointers -S | FileCheck %s
; RUN: opt < %s -passes='require<loops>,instcombine' -opaque-pointers -S | FileCheck %s

; Constant-indexed GEP instructions in a chain of GEP instructions should be
; swapped to the end whenever such transformation is valid. This allows them to
; be merged.

declare void @use(i1)


; The constant-indexed GEP instruction should be swapped to the end, even
; without merging.
; result = (((ptr) p + a) + b) + 1
; result = (((i32*) p + a) + b) + 1
define ptr @basic(ptr %p, i64 %a, i64 %b) {
; CHECK-LABEL: @basic(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[B:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i64 1
; CHECK-NEXT: ret ptr [[TMP3]]
;
%1 = getelementptr inbounds i32, ptr %p, i64 1
Expand All @@ -27,33 +25,34 @@ define ptr @basic(ptr %p, i64 %a, i64 %b) {
; GEP with the last index being a constant should also be swapped.
define ptr @partialConstant1(ptr %p, i64 %a, i64 %b) {
; CHECK-LABEL: @partialConstant1(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]]
; CHECK-NEXT: ret ptr [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [4 x i32], ptr [[TMP1]], i64 [[A:%.*]], i64 1
; CHECK-NEXT: ret ptr [[TMP2]]
;
%1 = getelementptr inbounds [4 x i32], ptr %p, i64 %a, i64 1
%2 = getelementptr inbounds i32, ptr %p, i64 %b
%2 = getelementptr inbounds i32, ptr %1, i64 %b
ret ptr %2
}

; Negative test. GEP should not be swapped if the last index is not a constant.
define ptr @partialConstant2(ptr %p, i64 %a, i64 %b) {
; CHECK-LABEL: @partialConstant2(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]]
; CHECK-NEXT: ret ptr [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x i32], ptr [[P:%.*]], i64 1, i64 [[A:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[B:%.*]]
; CHECK-NEXT: ret ptr [[TMP2]]
;
%1 = getelementptr inbounds [4 x i32], ptr %p, i64 1, i64 %a
%2 = getelementptr inbounds i32, ptr %p, i64 %b
%2 = getelementptr inbounds i32, ptr %1, i64 %b
ret ptr %2
}

; Constant-indexed GEP are merged after swawpping.
; result = ((ptr) p + a) + 3
; Constant-indexed GEP are merged after swapping.
; result = ((i32*) p + a) + 3
define ptr @merge(ptr %p, i64 %a) {
; CHECK-LABEL: @merge(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2
; CHECK-NEXT: ret ptr [[TMP3]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 3
; CHECK-NEXT: ret ptr [[TMP2]]
;
%1 = getelementptr inbounds i32, ptr %p, i64 1
%2 = getelementptr inbounds i32, ptr %1, i64 %a
Expand All @@ -64,16 +63,14 @@ define ptr @merge(ptr %p, i64 %a) {
; Multiple constant-indexed GEP. Note that the first two cannot be merged at
; first, but after the second and third are merged, the result can be merged
; with the first one on the next pass.
; result = (ptr) ((ptr) ((ptr) ptr + a) + (a * b)) + 9
; result = (<3 x i32>*) ((i16*) ((i8*) ptr + a) + (a * b)) + 9
define ptr @nested(ptr %p, i64 %a, i64 %b) {
; CHECK-LABEL: @nested(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <3 x i32>, ptr [[P:%.*]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[A]], [[B:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <5 x i32>, ptr [[TMP2]], i64 4
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, ptr [[TMP5]], i64 1
; CHECK-NEXT: ret ptr [[TMP6]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[A:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[A]], [[B:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP1]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <3 x i32>, ptr [[TMP3]], i64 10
; CHECK-NEXT: ret ptr [[TMP4]]
;
%1 = getelementptr inbounds <3 x i32>, ptr %p, i64 1
%2 = getelementptr inbounds i8, ptr %1, i64 %a
Expand All @@ -87,9 +84,9 @@ define ptr @nested(ptr %p, i64 %a, i64 %b) {
; It is valid to swap if the source operand of the first GEP has multiple uses.
define ptr @multipleUses1(ptr %p) {
; CHECK-LABEL: @multipleUses1(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i64 1
; CHECK-NEXT: ret ptr [[TMP3]]
;
%1 = getelementptr inbounds i32, ptr %p, i64 1
Expand All @@ -98,23 +95,9 @@ define ptr @multipleUses1(ptr %p) {
ret ptr %3
}

; It is valid to swap if the second GEP has multiple uses.
define ptr @multipleUses2(ptr %p, i64 %a) {
; CHECK-LABEL: @multipleUses2(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
; CHECK-NEXT: call void @use(ptr nonnull [[TMP2]])
; CHECK-NEXT: ret ptr [[TMP2]]
;
%1 = getelementptr inbounds i32, ptr %p, i64 1
%2 = getelementptr inbounds i32, ptr %1, i64 %a
call void @use(ptr %2)
ret ptr %2
}

; Negative test. It is not valid to swap if the first GEP has multiple uses.
define ptr @multipleUses3(ptr %p) {
; CHECK-LABEL: @multipleUses3(
define ptr @multipleUses2(ptr %p) {
; CHECK-LABEL: @multipleUses2(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]]
Expand All @@ -125,3 +108,40 @@ define ptr @multipleUses3(ptr %p) {
%3 = getelementptr inbounds i32, ptr %1, i64 %2
ret ptr %3
}

; Negative test. LICM should take priority over canonicalization, so the first
; GEP should not be swapped, even if it contains a constant index.
define i64 @licm(ptr %p) {
; CHECK-LABEL: @licm(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[P1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 4
; CHECK-NEXT: [[P2:%.*]] = getelementptr i64, ptr [[P1]], i64 [[I]]
; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[P2]], align 4
; CHECK-NEXT: [[ADD]] = add nsw i64 [[SUM]], [[LOAD]]
; CHECK-NEXT: [[INEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[I]], 1000000
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret i64 [[ADD]]
;
entry:
br label %for.body

for.body:
%i = phi i64 [ 0, %entry ], [ %inext, %for.body ]
%sum = phi i64 [ 0, %entry ], [ %add, %for.body ]
%p1 = getelementptr i64, ptr %p, i64 4
%p2 = getelementptr i64, ptr %p1, i64 %i
%load = load i64, ptr %p2
%add = add nsw i64 %sum, %load
%inext = add nuw nsw i64 %i, 1
%exitcond = icmp eq i64 %i, 1000000
br i1 %exitcond, label %for.end, label %for.body

for.end:
ret i64 %add
}
45 changes: 29 additions & 16 deletions llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
Expand Up @@ -9,7 +9,7 @@ target datalayout = "i24:8:8"
%struct.B = type { i8, [3 x i16], %struct.A, float }
%struct.C = type { i8, i32, i32 }

; result = (ptr) p + 3
; result = (i32*) p + 3
define ptr @mergeBasic(ptr %p) {
; CHECK-LABEL: @mergeBasic(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 3
Expand All @@ -20,8 +20,8 @@ define ptr @mergeBasic(ptr %p) {
ret ptr %2
}

; Converted to ptr and merged.
; result = (ptr) p + 10
; Converted to i8* and merged.
; result = (i8*) p + 10
define ptr @mergeDifferentTypes(ptr %p) {
; CHECK-LABEL: @mergeDifferentTypes(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 10
Expand All @@ -32,8 +32,8 @@ define ptr @mergeDifferentTypes(ptr %p) {
ret ptr %2
}

; Converted to ptr and merged.
; result = (ptr) p + 10
; Converted to i8* and merged.
; result = (i8*) p + 10
define ptr @mergeReverse(ptr %p) {
; CHECK-LABEL: @mergeReverse(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 10
Expand All @@ -55,7 +55,7 @@ define ptr @zeroSum(ptr %p) {
ret ptr %2
}

; result = (ptr) ((ptr) p + 1) + 17
; result = (i8*) (([20 x i8]*) p + 1) + 17
define ptr @array1(ptr %p) {
; CHECK-LABEL: @array1(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [20 x i8], ptr [[P:%.*]], i64 1, i64 17
Expand All @@ -66,8 +66,8 @@ define ptr @array1(ptr %p) {
ret ptr %2
}

; Converted to ptr and merged.
; result = (ptr) p + 20
; Converted to i8* and merged.
; result = (i8*) p + 20
define ptr @array2(ptr %p) {
; CHECK-LABEL: @array2(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 20
Expand All @@ -78,8 +78,8 @@ define ptr @array2(ptr %p) {
ret ptr %2
}

; Converted to ptr and merged.
; result = (ptr) p + 36
; Converted to i8* and merged.
; result = (i8*) p + 36
define ptr @struct1(ptr %p) {
; CHECK-LABEL: @struct1(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 36
Expand All @@ -101,7 +101,7 @@ define ptr @struct2(ptr %p) {
ret ptr %2
}

; result = (ptr) &((struct.B) p)[0].member2.member0 + 7
; result = (i8*) &((struct.B) p)[0].member2.member0 + 7
define ptr @structStruct(ptr %p) {
; CHECK-LABEL: @structStruct(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 0, i32 2, i32 0, i64 7
Expand All @@ -115,7 +115,7 @@ define ptr @structStruct(ptr %p) {
; First GEP offset is not divisible by last GEP's source element size, but first
; GEP points to an array such that the last GEP offset is divisible by the
; array's element size, so the first GEP can be rewritten with an extra index.
; result = (ptr) &((struct.B*) p)[i].member1 + 2
; result = (i16*) &((struct.B*) p)[i].member1 + 2
define ptr @appendIndex(ptr %p, i64 %i) {
; CHECK-LABEL: @appendIndex(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 [[I:%.*]], i32 1, i64 2
Expand All @@ -126,10 +126,23 @@ define ptr @appendIndex(ptr %p, i64 %i) {
ret ptr %2
}

; Offset of either GEP is not divisible by the other's size, converted to ptr
; After canonicalizing, the second GEP is moved to the front, and then merged
; with the first one with rewritten indices.
; result = (i8*) &((struct.A*) &((struct.B*) p)[i].member2).member0 + 2
define ptr @appendIndexReverse(ptr %p, i64 %i) {
; CHECK-LABEL: @appendIndexReverse(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 [[I:%.*]], i32 2, i32 0, i64 2
; CHECK-NEXT: ret ptr [[TMP1]]
;
%1 = getelementptr inbounds i64, ptr %p, i64 1
%2 = getelementptr inbounds %struct.B, ptr %1, i64 %i, i32 1
ret ptr %2
}

; Offset of either GEP is not divisible by the other's size, converted to i8*
; and merged.
; Here i24 is 8-bit aligned.
; result = (ptr) p + 7
; result = (i8*) p + 7
define ptr @notDivisible(ptr %p) {
; CHECK-LABEL: @notDivisible(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 7
Expand All @@ -144,8 +157,8 @@ define ptr @notDivisible(ptr %p) {
; or divisible by the other's size.
define ptr @partialConstant2(ptr %p, i64 %a) {
; CHECK-LABEL: @partialConstant2(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i64], ptr [[TMP1]], i64 [[A:%.*]], i64 2
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [4 x i64], ptr [[P:%.*]], i64 [[A:%.*]], i64 2
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 1
; CHECK-NEXT: ret ptr [[TMP2]]
;
%1 = getelementptr inbounds i32, ptr %p, i64 1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/InstCombine/shift.ll
Expand Up @@ -1723,10 +1723,10 @@ define void @ashr_out_of_range(ptr %A) {
define void @ashr_out_of_range_1(ptr %A) {
; CHECK-LABEL: @ashr_out_of_range_1(
; CHECK-NEXT: [[L:%.*]] = load i177, ptr [[A:%.*]], align 4
; CHECK-NEXT: [[G11:%.*]] = getelementptr i177, ptr [[A]], i64 -1
; CHECK-NEXT: [[B24_LOBIT:%.*]] = ashr i177 [[L]], 175
; CHECK-NEXT: [[TMP1:%.*]] = trunc i177 [[B24_LOBIT]] to i64
; CHECK-NEXT: [[G62:%.*]] = getelementptr i177, ptr [[G11]], i64 [[TMP1]]
; CHECK-NEXT: [[G111:%.*]] = getelementptr i177, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[G62:%.*]] = getelementptr i177, ptr [[G111]], i64 -1
; CHECK-NEXT: store i177 0, ptr [[G62]], align 4
; CHECK-NEXT: ret void
;
Expand Down
Expand Up @@ -38,8 +38,7 @@ define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond,
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <4 x double>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -4
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 -3
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -7
; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>*
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8
; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x double> [[WIDE_LOAD1]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
Expand All @@ -50,8 +49,7 @@ define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond,
; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i1> [[TMP8]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP12]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison)
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP10]], i64 -4
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[TMP13]], i64 -3
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[TMP10]], i64 -7
; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i1> [[TMP9]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP15]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison)
Expand Down

0 comments on commit 6c767ce

Please sign in to comment.