diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index c6de57cb34c69..81439e62bfbbf 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2803,6 +2803,47 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, if (Src->getResultElementType() != GEP.getSourceElementType()) return nullptr; + // Fold chained GEP with constant base into single GEP: + // gep i8, (gep i8, %base, C1), (select Cond, C2, C3) + // -> gep i8, %base, (select Cond, C1+C2, C1+C3) + if (Src->hasOneUse() && GEP.getNumIndices() == 1 && + Src->getNumIndices() == 1) { + Value *SrcIdx = *Src->idx_begin(); + Value *GEPIdx = *GEP.idx_begin(); + const APInt *ConstOffset, *TrueVal, *FalseVal; + Value *Cond; + + if ((match(SrcIdx, m_APInt(ConstOffset)) && + match(GEPIdx, + m_Select(m_Value(Cond), m_APInt(TrueVal), m_APInt(FalseVal)))) || + (match(GEPIdx, m_APInt(ConstOffset)) && + match(SrcIdx, + m_Select(m_Value(Cond), m_APInt(TrueVal), m_APInt(FalseVal))))) { + auto *Select = isa(GEPIdx) ? cast(GEPIdx) + : cast(SrcIdx); + + // Make sure the select has only one use. + if (!Select->hasOneUse()) + return nullptr; + + if (TrueVal->getBitWidth() != ConstOffset->getBitWidth() || + FalseVal->getBitWidth() != ConstOffset->getBitWidth()) + return nullptr; + + APInt NewTrueVal = *ConstOffset + *TrueVal; + APInt NewFalseVal = *ConstOffset + *FalseVal; + Constant *NewTrue = ConstantInt::get(Select->getType(), NewTrueVal); + Constant *NewFalse = ConstantInt::get(Select->getType(), NewFalseVal); + Value *NewSelect = Builder.CreateSelect(Cond, NewTrue, NewFalse); + GEPNoWrapFlags Flags = + getMergedGEPNoWrapFlags(*Src, *cast(&GEP)); + return replaceInstUsesWith(GEP, + Builder.CreateGEP(GEP.getResultElementType(), + Src->getPointerOperand(), + NewSelect, "", Flags)); + } + } + // Find out whether the last index in the source GEP is a sequential idx. bool EndsWithSequential = false; for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); diff --git a/llvm/test/Transforms/InstCombine/gep-fold-chained-const-select.ll b/llvm/test/Transforms/InstCombine/gep-fold-chained-const-select.ll new file mode 100644 index 0000000000000..2bbbaef287e99 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/gep-fold-chained-const-select.ll @@ -0,0 +1,208 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -passes=instcombine | FileCheck %s + +define ptr @src_origin(i32 %arg0, ptr %arg1) { +; CHECK-LABEL: @src_origin( +; CHECK-NEXT: [[V1:%.*]] = icmp sgt i32 [[ARG0:%.*]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[V1]], i64 63252, i64 29452 +; CHECK-NEXT: [[V3:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], i64 [[TMP1]] +; CHECK-NEXT: ret ptr [[V3]] +; + %v0 = getelementptr i8, ptr %arg1, i64 8148 + %v1 = icmp sgt i32 %arg0, 3 + %v2 = select i1 %v1, i64 55104, i64 21304 + %v3 = getelementptr i8, ptr %v0, i64 %v2 + ret ptr %v3 +} + +define ptr @src_nuw(i32 %arg0, ptr %arg1) { +; CHECK-LABEL: @src_nuw( +; CHECK-NEXT: [[V1:%.*]] = icmp sgt i32 [[ARG0:%.*]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[V1]], i64 63252, i64 29452 +; CHECK-NEXT: [[V3:%.*]] = getelementptr nuw i8, ptr [[ARG1:%.*]], i64 [[TMP1]] +; CHECK-NEXT: ret ptr [[V3]] +; + %v0 = getelementptr nuw i8, ptr %arg1, i64 8148 + %v1 = icmp sgt i32 %arg0, 3 + %v2 = select i1 %v1, i64 55104, i64 21304 + %v3 = getelementptr nuw i8, ptr %v0, i64 %v2 + ret ptr %v3 +} + +define ptr @src_inbounds_nuw(i32 %arg0, ptr %arg1) { +; CHECK-LABEL: @src_inbounds_nuw( +; CHECK-NEXT: [[V1:%.*]] = icmp sgt i32 [[ARG0:%.*]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[V1]], i64 63252, i64 29452 +; CHECK-NEXT: [[V3:%.*]] = getelementptr nuw i8, ptr [[ARG1:%.*]], i64 [[TMP1]] +; CHECK-NEXT: ret ptr [[V3]] +; + %v0 = getelementptr inbounds nuw i8, ptr %arg1, i64 8148 + %v1 = icmp sgt i32 %arg0, 3 + %v2 = select i1 %v1, i64 55104, i64 21304 + %v3 = getelementptr nuw i8, ptr %v0, i64 %v2 + ret ptr %v3 +} + +define ptr @src_swap(i32 %arg0, ptr %arg1) { +; CHECK-LABEL: @src_swap( +; CHECK-NEXT: [[V1:%.*]] = icmp sgt i32 [[ARG0:%.*]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[V1]], i64 63252, i64 29452 +; CHECK-NEXT: [[V0:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], i64 [[TMP1]] +; CHECK-NEXT: ret ptr [[V0]] +; + %v1 = icmp sgt i32 %arg0, 3 + %v2 = select i1 %v1, i64 55104, i64 21304 + %v0 = getelementptr i8, ptr %arg1, i64 %v2 + %v3 = getelementptr i8, ptr %v0, i64 8148 + ret ptr %v3 +} + +define <2 x ptr> @src_splat(i32 %arg0, <2 x ptr> %arg1) { +; CHECK-LABEL: @src_splat( +; CHECK-NEXT: [[V1:%.*]] = icmp sgt i32 [[ARG0:%.*]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[V1]], <2 x i64> splat (i64 63252), <2 x i64> splat (i64 29452) +; CHECK-NEXT: [[V3:%.*]] = getelementptr i8, <2 x ptr> [[ARG1:%.*]], <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x ptr> [[V3]] +; + %v0 = getelementptr i8, <2 x ptr> %arg1, <2 x i64> splat (i64 8148) + %v1 = icmp sgt i32 %arg0, 3 + %v2 = select i1 %v1, <2 x i64> splat (i64 55104), <2 x i64> splat (i64 21304) + %v3 = getelementptr i8, <2 x ptr> %v0, <2 x i64> %v2 + ret <2 x ptr> %v3 +} + +define <2 x ptr> @src_splat_scalar_ptr(i32 %arg0, ptr %arg1) { +; CHECK-LABEL: @src_splat_scalar_ptr( +; CHECK-NEXT: [[V1:%.*]] = icmp sgt i32 [[ARG0:%.*]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[V1]], <2 x i64> splat (i64 63252), <2 x i64> splat (i64 29452) +; CHECK-NEXT: [[V3:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x ptr> [[V3]] +; + %v0 = getelementptr i8, ptr %arg1, <2 x i64> splat (i64 8148) + %v1 = icmp sgt i32 %arg0, 3 + %v2 = select i1 %v1, <2 x i64> splat (i64 55104), <2 x i64> splat (i64 21304) + %v3 = getelementptr i8, <2 x ptr> %v0, <2 x i64> %v2 + ret <2 x ptr> %v3 +} + +; Fail 1: Different GEP type +define ptr @src_fail_different_type(i32 %arg0, ptr %arg1) { +; CHECK-LABEL: @src_fail_different_type( +; CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG1:%.*]], i64 8148 +; CHECK-NEXT: [[V1:%.*]] = icmp sgt i32 [[ARG0:%.*]], 3 +; CHECK-NEXT: [[V2:%.*]] = select i1 [[V1]], i64 55104, i64 21304 +; CHECK-NEXT: [[V3:%.*]] = getelementptr i16, ptr [[V0]], i64 [[V2]] +; CHECK-NEXT: ret ptr [[V3]] +; + %v0 = getelementptr inbounds nuw i8, ptr %arg1, i64 8148 + %v1 = icmp sgt i32 %arg0, 3 + %v2 = select i1 %v1, i64 55104, i64 21304 + %v3 = getelementptr i16, ptr %v0, i64 %v2 + ret ptr %v3 +} + +; Fail 2: No constant idx +define ptr @src_fail_no_constant_idx(i32 %arg0, ptr %arg1, i64 %arg2) { +; CHECK-LABEL: @src_fail_no_constant_idx( +; CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG1:%.*]], i64 [[ARG2:%.*]] +; CHECK-NEXT: [[V1:%.*]] = icmp sgt i32 [[ARG0:%.*]], 3 +; CHECK-NEXT: [[V2:%.*]] = select i1 [[V1]], i64 55104, i64 21304 +; CHECK-NEXT: [[V3:%.*]] = getelementptr i8, ptr [[V0]], i64 [[V2]] +; CHECK-NEXT: ret ptr [[V3]] +; + %v0 = getelementptr inbounds nuw i8, ptr %arg1, i64 %arg2 + %v1 = icmp sgt i32 %arg0, 3 + %v2 = select i1 %v1, i64 55104, i64 21304 + %v3 = getelementptr i8, ptr %v0, i64 %v2 + ret ptr %v3 +} + +; Fail 3: Multiple use of select +define ptr @src_fail_select_multiple_use(i32 %arg0, ptr %arg1, ptr %arg2) { +; CHECK-LABEL: @src_fail_select_multiple_use( +; CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG1:%.*]], i64 8148 +; CHECK-NEXT: [[V1:%.*]] = icmp sgt i32 [[ARG0:%.*]], 3 +; CHECK-NEXT: [[V2:%.*]] = select i1 [[V1]], i64 55104, i64 21304 +; CHECK-NEXT: [[V3:%.*]] = getelementptr i8, ptr [[V0]], i64 [[V2]] +; CHECK-NEXT: [[V4:%.*]] = getelementptr i8, ptr [[ARG2:%.*]], i64 [[V2]] +; CHECK-NEXT: store ptr [[V3]], ptr [[V4]], align 8 +; CHECK-NEXT: ret ptr [[V3]] +; + %v0 = getelementptr inbounds nuw i8, ptr %arg1, i64 8148 + %v1 = icmp sgt i32 %arg0, 3 + %v2 = select i1 %v1, i64 55104, i64 21304 + %v3 = getelementptr i8, ptr %v0, i64 %v2 + %v4 = getelementptr i8, ptr %arg2, i64 %v2 + store ptr %v3, ptr %v4, align 8 + ret ptr %v3 +} + +declare void @use(ptr) + +; Fail 4: Multiple use of source GEP +define ptr @src_fail_source_gep_multiple_use(i32 %arg0, ptr %arg1, ptr %arg2) { +; CHECK-LABEL: @src_fail_source_gep_multiple_use( +; CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG1:%.*]], i64 8148 +; CHECK-NEXT: [[V1:%.*]] = icmp sgt i32 [[ARG0:%.*]], 3 +; CHECK-NEXT: [[V2:%.*]] = select i1 [[V1]], i64 55104, i64 21304 +; CHECK-NEXT: call void @use(ptr nonnull [[V0]]) +; CHECK-NEXT: [[V3:%.*]] = getelementptr i8, ptr [[V0]], i64 [[V2]] +; CHECK-NEXT: ret ptr [[V3]] +; + %v0 = getelementptr inbounds nuw i8, ptr %arg1, i64 8148 + %v1 = icmp sgt i32 %arg0, 3 + %v2 = select i1 %v1, i64 55104, i64 21304 + call void @use(ptr %v0) + %v3 = getelementptr i8, ptr %v0, i64 %v2 + ret ptr %v3 +} + +define ptr @src_fail_source_gep_multiple_swap(i32 %arg0, ptr %arg1) { +; CHECK-LABEL: @src_fail_source_gep_multiple_swap( +; CHECK-NEXT: [[V1:%.*]] = icmp sgt i32 [[ARG0:%.*]], 3 +; CHECK-NEXT: [[V2:%.*]] = select i1 [[V1]], i64 55104, i64 21304 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG1:%.*]], i64 [[V2]] +; CHECK-NEXT: call void @use(ptr nonnull [[GEP1]]) +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 8148 +; CHECK-NEXT: ret ptr [[GEP2]] +; + %v1 = icmp sgt i32 %arg0, 3 + %v2 = select i1 %v1, i64 55104, i64 21304 + %gep1 = getelementptr inbounds nuw i8, ptr %arg1, i64 %v2 + call void @use(ptr %gep1) + %gep2 = getelementptr i8, ptr %gep1, i64 8148 + ret ptr %gep2 +} + +; TODO: constant vector index + +define <2 x ptr> @fail_vector_const_vector_select(i32 %arg0, <2 x ptr> %arg1) { +; CHECK-LABEL: @fail_vector_const_vector_select( +; CHECK-NEXT: [[V0:%.*]] = getelementptr i8, <2 x ptr> [[ARG1:%.*]], <2 x i64> +; CHECK-NEXT: [[V1:%.*]] = icmp sgt i32 [[ARG0:%.*]], 3 +; CHECK-NEXT: [[V2:%.*]] = select i1 [[V1]], <2 x i64> , <2 x i64> +; CHECK-NEXT: [[V3:%.*]] = getelementptr i8, <2 x ptr> [[V0]], <2 x i64> [[V2]] +; CHECK-NEXT: ret <2 x ptr> [[V3]] +; + %v0 = getelementptr i8, <2 x ptr> %arg1, <2 x i64> + %v1 = icmp sgt i32 %arg0, 3 + %v2 = select i1 %v1, <2 x i64> , <2 x i64> + %v3 = getelementptr i8, <2 x ptr> %v0, <2 x i64> %v2 + ret <2 x ptr> %v3 +} + + +define <2 x ptr> @fail_scalar_const_vector_select(i32 %arg0, <2 x ptr> %arg1) { +; CHECK-LABEL: @fail_scalar_const_vector_select( +; CHECK-NEXT: [[V0:%.*]] = getelementptr i8, <2 x ptr> [[ARG1:%.*]], i64 8148 +; CHECK-NEXT: [[V1:%.*]] = icmp sgt i32 [[ARG0:%.*]], 3 +; CHECK-NEXT: [[V2:%.*]] = select i1 [[V1]], <2 x i64> , <2 x i64> +; CHECK-NEXT: [[V3:%.*]] = getelementptr i8, <2 x ptr> [[V0]], <2 x i64> [[V2]] +; CHECK-NEXT: ret <2 x ptr> [[V3]] +; + %v0 = getelementptr i8, <2 x ptr> %arg1, i64 8148 + %v1 = icmp sgt i32 %arg0, 3 + %v2 = select i1 %v1, <2 x i64> , <2 x i64> + %v3 = getelementptr i8, <2 x ptr> %v0, <2 x i64> %v2 + ret <2 x ptr> %v3 +}