diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index fb6f3287e3d26..f5114fa40c70a 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1002,6 +1002,16 @@ class TargetTransformInfo { /// more beneficial constant hoisting is). InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const; + + /// It can be advantageous to detach complex constants from their uses to make + /// their generation cheaper. This hook allows targets to report when such + /// transformations might negatively effect the code generation of the + /// underlying operation. The motivating example is divides whereby hoisting + /// constants prevents the code generator's ability to transform them into + /// combinations of simpler operations. + bool preferToKeepConstantsAttached(const Instruction &Inst, + const Function &Fn) const; + /// @} /// \name Vector Target Information @@ -1873,6 +1883,8 @@ class TargetTransformInfo::Concept { virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) = 0; + virtual bool preferToKeepConstantsAttached(const Instruction &Inst, + const Function &Fn) const = 0; virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0; virtual unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const = 0; @@ -2430,6 +2442,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { TargetCostKind CostKind) override { return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind); } + bool preferToKeepConstantsAttached(const Instruction &Inst, + const Function &Fn) const override { + return Impl.preferToKeepConstantsAttached(Inst, Fn); + } unsigned getNumberOfRegisters(unsigned ClassID) const override { return Impl.getNumberOfRegisters(ClassID); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 171858e69d5d1..1d8f523e9792b 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -427,6 +427,11 @@ class TargetTransformInfoImplBase { return TTI::TCC_Free; } + bool preferToKeepConstantsAttached(const Instruction &Inst, + const Function &Fn) const { + return false; + } + unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index e05ce2890a08c..5e7bdcdf72a49 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -545,6 +545,25 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return TargetTransformInfo::TCC_Expensive; } + bool preferToKeepConstantsAttached(const Instruction &Inst, + const Function &Fn) const { + switch (Inst.getOpcode()) { + default: + break; + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::UDiv: + case Instruction::URem: { + if (!isa(Inst.getOperand(1))) + return false; + EVT VT = getTLI()->getValueType(DL, Inst.getType()); + return !getTLI()->isIntDivCheap(VT, Fn.getAttributes()); + } + }; + + return false; + } + unsigned getInliningThresholdMultiplier() const { return 1; } unsigned adjustInliningThreshold(const CallBase *CB) { return 0; } unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const { diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 63b1b7567c8e9..3f76dfdaac317 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -682,6 +682,11 @@ TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, return Cost; } +bool TargetTransformInfo::preferToKeepConstantsAttached( + const Instruction &Inst, const Function &Fn) const { + return TTIImpl->preferToKeepConstantsAttached(Inst, Fn); +} + unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const { return TTIImpl->getNumberOfRegisters(ClassID); } diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp index 3e5d979f11cc5..1fb9d7fff32f6 100644 --- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -523,7 +523,8 @@ void ConstantHoistingPass::collectConstantCandidates(Function &Fn) { if (!DT->isReachableFromEntry(&BB)) continue; for (Instruction &Inst : BB) - collectConstantCandidates(ConstCandMap, &Inst); + if (!TTI->preferToKeepConstantsAttached(Inst, Fn)) + collectConstantCandidates(ConstCandMap, &Inst); } } diff --git a/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll b/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll index 015f52157b9e7..196a104adc023 100644 --- a/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll +++ b/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll @@ -1,27 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -mtriple=arm64-darwin-unknown -S -passes=consthoist < %s | FileCheck %s -define i128 @test1(i128 %a) nounwind { -; CHECK-LABEL: test1 -; CHECK: %const = bitcast i128 12297829382473034410122878 to i128 +define i128 @test1(i128 %a) { +; CHECK-LABEL: define i128 @test1( +; CHECK-SAME: i128 [[A:%.*]]) { +; CHECK-NEXT: [[CONST:%.*]] = bitcast i128 12297829382473034410122878 to i128 +; CHECK-NEXT: [[TMP1:%.*]] = add i128 [[A]], [[CONST]] +; CHECK-NEXT: [[TMP2:%.*]] = add i128 [[TMP1]], [[CONST]] +; CHECK-NEXT: ret i128 [[TMP2]] +; %1 = add i128 %a, 12297829382473034410122878 %2 = add i128 %1, 12297829382473034410122878 ret i128 %2 } ; Check that we don't hoist large, but cheap constants -define i512 @test2(i512 %a) nounwind { -; CHECK-LABEL: test2 -; CHECK-NOT: %const = bitcast i512 7 to i512 +define i512 @test2(i512 %a) { +; CHECK-LABEL: define i512 @test2( +; CHECK-SAME: i512 [[A:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = and i512 [[A]], 7 +; CHECK-NEXT: [[TMP2:%.*]] = or i512 [[TMP1]], 7 +; CHECK-NEXT: ret i512 [[TMP2]] +; %1 = and i512 %a, 7 %2 = or i512 %1, 7 ret i512 %2 } ; Check that we don't hoist the shift value of a shift instruction. -define i512 @test3(i512 %a) nounwind { -; CHECK-LABEL: test3 -; CHECK-NOT: %const = bitcast i512 504 to i512 +define i512 @test3(i512 %a) { +; CHECK-LABEL: define i512 @test3( +; CHECK-SAME: i512 [[A:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = shl i512 [[A]], 504 +; CHECK-NEXT: [[TMP2:%.*]] = ashr i512 [[TMP1]], 504 +; CHECK-NEXT: ret i512 [[TMP2]] +; %1 = shl i512 %a, 504 %2 = ashr i512 %1, 504 ret i512 %2 } + +; Ensure the code generator has the information necessary to simply sdiv. +define i64 @sdiv(i64 %a) { +; CHECK-LABEL: define i64 @sdiv( +; CHECK-SAME: i64 [[A:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = sdiv i64 [[A]], 4294967087 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 4294967087 +; CHECK-NEXT: ret i64 [[TMP2]] +; + %1 = sdiv i64 %a, 4294967087 + %2 = add i64 %1, 4294967087 + ret i64 %2 +} + +; Ensure the code generator has the information necessary to simply srem. +define i64 @srem(i64 %a) { +; CHECK-LABEL: define i64 @srem( +; CHECK-SAME: i64 [[A:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = srem i64 [[A]], 4294967087 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 4294967087 +; CHECK-NEXT: ret i64 [[TMP2]] +; + %1 = srem i64 %a, 4294967087 + %2 = add i64 %1, 4294967087 + ret i64 %2 +} + +; Ensure the code generator has the information necessary to simply udiv. +define i64 @udiv(i64 %a) { +; CHECK-LABEL: define i64 @udiv( +; CHECK-SAME: i64 [[A:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = udiv i64 [[A]], 4294967087 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 4294967087 +; CHECK-NEXT: ret i64 [[TMP2]] +; + %1 = udiv i64 %a, 4294967087 + %2 = add i64 %1, 4294967087 + ret i64 %2 +} + +; Ensure the code generator has the information necessary to simply urem. +define i64 @urem(i64 %a) { +; CHECK-LABEL: define i64 @urem( +; CHECK-SAME: i64 [[A:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = urem i64 [[A]], 4294967087 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 4294967087 +; CHECK-NEXT: ret i64 [[TMP2]] +; + %1 = urem i64 %a, 4294967087 + %2 = add i64 %1, 4294967087 + ret i64 %2 +} + +; Code generator will not decompose divide like operations when the divisor is +; no a constant. +define i64 @sdiv_non_const_divisor(i64 %a) { +; CHECK-LABEL: define i64 @sdiv_non_const_divisor( +; CHECK-SAME: i64 [[A:%.*]]) { +; CHECK-NEXT: [[CONST:%.*]] = bitcast i64 4294967087 to i64 +; CHECK-NEXT: [[TMP1:%.*]] = sdiv i64 [[CONST]], [[A]] +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[CONST]] +; CHECK-NEXT: ret i64 [[TMP2]] +; + %1 = sdiv i64 4294967087, %a + %2 = add i64 %1, 4294967087 + ret i64 %2 +} + +; Code generator emits divide instructions when optimising for size. +define i64 @sdiv_minsize(i64 %a) minsize { +; CHECK-LABEL: define i64 @sdiv_minsize( +; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[CONST:%.*]] = bitcast i64 4294967087 to i64 +; CHECK-NEXT: [[TMP1:%.*]] = sdiv i64 [[A]], [[CONST]] +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[CONST]] +; CHECK-NEXT: ret i64 [[TMP2]] +; + %1 = sdiv i64 %a, 4294967087 + %2 = add i64 %1, 4294967087 + ret i64 %2 +} + +define <2 x i64> @sdiv_v2i64(<2 x i64> %a) { +; CHECK-LABEL: define <2 x i64> @sdiv_v2i64( +; CHECK-SAME: <2 x i64> [[A:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i64> [[A]], +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[TMP1]], +; CHECK-NEXT: ret <2 x i64> [[TMP2]] +; + %1 = sdiv <2 x i64> %a, + %2 = add <2 x i64> %1, + ret <2 x i64> %2 +}