diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 0ff178e1f1959..e9088a4d9275c 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -58,6 +58,7 @@ add_llvm_target(RISCVCodeGen RISCVMoveMerger.cpp RISCVOptWInstrs.cpp RISCVPostRAExpandPseudoInsts.cpp + RISCVPromoteConstant.cpp RISCVPushPopOptimizer.cpp RISCVRedundantCopyElimination.cpp RISCVRegisterInfo.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index ae9410193efe1..51e8e8574ed15 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -20,6 +20,7 @@ namespace llvm { class FunctionPass; class InstructionSelector; +class ModulePass; class PassRegistry; class RISCVRegisterBankInfo; class RISCVSubtarget; @@ -111,6 +112,9 @@ void initializeRISCVO0PreLegalizerCombinerPass(PassRegistry &); FunctionPass *createRISCVPreLegalizerCombiner(); void initializeRISCVPreLegalizerCombinerPass(PassRegistry &); +ModulePass *createRISCVPromoteConstantPass(); +void initializeRISCVPromoteConstantPass(PassRegistry &); + FunctionPass *createRISCVVLOptimizerPass(); void initializeRISCVVLOptimizerPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVPromoteConstant.cpp b/llvm/lib/Target/RISCV/RISCVPromoteConstant.cpp new file mode 100644 index 0000000000000..348927132d004 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVPromoteConstant.cpp @@ -0,0 +1,192 @@ +//==- RISCVPromoteConstant.cpp - Promote constant fp to global for RISC-V --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-promote-const" +#define RISCV_PROMOTE_CONSTANT_NAME "RISC-V Promote Constants" + +STATISTIC(NumPromoted, "Number of constant literals promoted to globals"); +STATISTIC(NumPromotedUses, "Number of uses of promoted literal constants"); + +namespace { + +class RISCVPromoteConstant : public ModulePass { +public: + static char ID; + RISCVPromoteConstant() : ModulePass(ID) {} + + StringRef getPassName() const override { return RISCV_PROMOTE_CONSTANT_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesCFG(); + } + + /// Iterate over the functions and promote the double fp constants that + /// would otherwise go into the constant pool to a constant array. + bool runOnModule(Module &M) override { + if (skipModule(M)) + return false; + // TargetMachine and Subtarget are needed to query isFPImmlegal. + const TargetPassConfig &TPC = getAnalysis(); + const TargetMachine &TM = TPC.getTM(); + bool Changed = false; + for (Function &F : M) { + const RISCVSubtarget &ST = TM.getSubtarget(F); + const RISCVTargetLowering *TLI = ST.getTargetLowering(); + Changed |= runOnFunction(F, TLI); + } + return Changed; + } + +private: + bool runOnFunction(Function &F, const RISCVTargetLowering *TLI); +}; +} // end anonymous namespace + +char RISCVPromoteConstant::ID = 0; + +INITIALIZE_PASS(RISCVPromoteConstant, DEBUG_TYPE, RISCV_PROMOTE_CONSTANT_NAME, + false, false) + +ModulePass *llvm::createRISCVPromoteConstantPass() { + return new RISCVPromoteConstant(); +} + +bool RISCVPromoteConstant::runOnFunction(Function &F, + const RISCVTargetLowering *TLI) { + if (F.hasOptNone() || F.hasOptSize()) + return false; + + // Bail out and make no transformation if the target doesn't support + // doubles, or if we're not targeting RV64 as we currently see some + // regressions for those targets. + if (!TLI->isTypeLegal(MVT::f64) || !TLI->isTypeLegal(MVT::i64)) + return false; + + // Collect all unique double constants and their uses in the function. Use + // MapVector to preserve insertion order. + MapVector> ConstUsesMap; + + for (Instruction &I : instructions(F)) { + for (Use &U : I.operands()) { + auto *C = dyn_cast(U.get()); + if (!C || !C->getType()->isDoubleTy()) + continue; + if (TLI->isFPImmLegal(C->getValueAPF(), MVT::f64, + /*ForCodeSize=*/false)) + continue; + ConstUsesMap[C].push_back(&U); + } + } + + int PromotableConstants = ConstUsesMap.size(); + LLVM_DEBUG(dbgs() << "Found " << PromotableConstants + << " promotable constants in " << F.getName() << "\n"); + // Bail out if no promotable constants found, or if only one is found. + if (PromotableConstants < 2) { + LLVM_DEBUG(dbgs() << "Performing no promotions as insufficient promotable " + "constants found\n"); + return false; + } + + NumPromoted += PromotableConstants; + + // Create a global array containing the promoted constants. + Module *M = F.getParent(); + Type *DoubleTy = Type::getDoubleTy(M->getContext()); + + SmallVector ConstantVector; + for (auto const &Pair : ConstUsesMap) + ConstantVector.push_back(Pair.first); + + ArrayType *ArrayTy = ArrayType::get(DoubleTy, ConstantVector.size()); + Constant *GlobalArrayInitializer = + ConstantArray::get(ArrayTy, ConstantVector); + + auto *GlobalArray = new GlobalVariable( + *M, ArrayTy, + /*isConstant=*/true, GlobalValue::InternalLinkage, GlobalArrayInitializer, + ".promoted_doubles." + F.getName()); + + // A cache to hold the loaded value for a given constant within a basic block. + DenseMap, Value *> LocalLoads; + + // Replace all uses with the loaded value. + unsigned Idx = 0; + for (auto const &Pair : ConstUsesMap) { + ConstantFP *Const = Pair.first; + const SmallVector &Uses = Pair.second; + + for (Use *U : Uses) { + Instruction *UserInst = cast(U->getUser()); + BasicBlock *InsertionBB; + BasicBlock::iterator InsertionPt; + + // If the user is a PHI node, we must insert the load in the + // corresponding predecessor basic block. Otherwise, it's inserted into + // the same block as the use. + if (auto *PN = dyn_cast(UserInst)) + InsertionBB = PN->getIncomingBlock(*U); + else + InsertionBB = UserInst->getParent(); + + // It is always safe to insert in the first insertion point in the BB, + // so do that and let other passes reorder. + InsertionPt = InsertionBB->getFirstInsertionPt(); + + auto CacheKey = std::make_pair(Const, InsertionBB); + Value *LoadedVal = nullptr; + + // Re-use a load if it exists in the insertion block. + if (LocalLoads.count(CacheKey)) { + LoadedVal = LocalLoads.at(CacheKey); + } else { + // Otherwise, create a new GEP and Load at the correct insertion point. + IRBuilder<> Builder(InsertionBB, InsertionPt); + Value *ElementPtr = Builder.CreateConstInBoundsGEP2_64( + GlobalArray->getValueType(), GlobalArray, 0, Idx, "double.addr"); + LoadedVal = Builder.CreateLoad(DoubleTy, ElementPtr, "double.val"); + + // Cache the newly created load for this block. + LocalLoads[CacheKey] = LoadedVal; + } + + U->set(LoadedVal); + ++NumPromotedUses; + } + ++Idx; + } + + return true; +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index f81b1e1260ee3..0551887430869 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -139,6 +139,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVExpandAtomicPseudoPass(*PR); initializeRISCVRedundantCopyEliminationPass(*PR); initializeRISCVAsmPrinterPass(*PR); + initializeRISCVPromoteConstantPass(*PR); } static Reloc::Model getEffectiveRelocModel(const Triple &TT, @@ -463,6 +464,8 @@ void RISCVPassConfig::addIRPasses() { } bool RISCVPassConfig::addPreISel() { + if (TM->getOptLevel() != CodeGenOptLevel::None) + addPass(createRISCVPromoteConstantPass()); if (TM->getOptLevel() != CodeGenOptLevel::None) { // Add a barrier before instruction selection so that we will not get // deleted block address after enabling default outlining. See D99707 for diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vararg.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vararg.ll index 74961d12c1c85..7233ce6d593d0 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vararg.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vararg.ll @@ -67,8 +67,8 @@ define i32 @va1(ptr %fmt, ...) { ; RV32-NEXT: G_VASTART [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.va) ; RV32-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load (p0) from %ir.va) ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; RV32-NEXT: %20:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s32) - ; RV32-NEXT: G_STORE %20(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) + ; RV32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s32) + ; RV32-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) ; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur) ; RV32-NEXT: $x10 = COPY [[LOAD1]](s32) ; RV32-NEXT: PseudoRET implicit $x10 @@ -105,8 +105,8 @@ define i32 @va1(ptr %fmt, ...) { ; RV64-NEXT: G_VASTART [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.va) ; RV64-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load (p0) from %ir.va, align 4) ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; RV64-NEXT: %20:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s64) - ; RV64-NEXT: G_STORE %20(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) + ; RV64-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s64) + ; RV64-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) ; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur) ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32) ; RV64-NEXT: $x10 = COPY [[ANYEXT]](s64) @@ -687,8 +687,8 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; RV32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] ; RV32-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32) ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32-NEXT: %25:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s32) - ; RV32-NEXT: G_STORE %25(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) + ; RV32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s32) + ; RV32-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) ; RV32-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32) ; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3) ; RV32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64) @@ -733,8 +733,8 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; RV64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] ; RV64-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32) ; RV64-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; RV64-NEXT: %25:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s64) - ; RV64-NEXT: G_STORE %25(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) + ; RV64-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s64) + ; RV64-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) ; RV64-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32) ; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3) ; RV64-NEXT: $x10 = COPY [[LOAD1]](s64) @@ -974,8 +974,8 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] ; RV32-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32) ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32-NEXT: %24:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s32) - ; RV32-NEXT: G_STORE %24(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) + ; RV32-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s32) + ; RV32-NEXT: G_STORE [[PTR_ADD5]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) ; RV32-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32) ; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3) ; RV32-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[MV]], [[LOAD1]] @@ -1020,8 +1020,8 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] ; RV64-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32) ; RV64-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; RV64-NEXT: %25:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s64) - ; RV64-NEXT: G_STORE %25(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) + ; RV64-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s64) + ; RV64-NEXT: G_STORE [[PTR_ADD6]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) ; RV64-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32) ; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3) ; RV64-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[COPY1]], [[LOAD1]] @@ -1724,8 +1724,8 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV32-NEXT: G_VASTART [[FRAME_INDEX2]](p0) :: (store (s32) into %ir.va) ; RV32-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (dereferenceable load (p0) from %ir.va) ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; RV32-NEXT: %21:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s32) - ; RV32-NEXT: G_STORE %21(p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va) + ; RV32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s32) + ; RV32-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va) ; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur) ; RV32-NEXT: $x10 = COPY [[LOAD1]](s32) ; RV32-NEXT: PseudoRET implicit $x10 @@ -1763,8 +1763,8 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV64-NEXT: G_VASTART [[FRAME_INDEX2]](p0) :: (store (s64) into %ir.va) ; RV64-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (dereferenceable load (p0) from %ir.va, align 4) ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; RV64-NEXT: %21:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s64) - ; RV64-NEXT: G_STORE %21(p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va, align 4) + ; RV64-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s64) + ; RV64-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va, align 4) ; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur) ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32) ; RV64-NEXT: $x10 = COPY [[ANYEXT]](s64) diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index ea08061221fd4..769823d1c4216 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -75,6 +75,7 @@ ; CHECK-NEXT: CodeGen Prepare ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Exception handling preparation +; CHECK-NEXT: RISC-V Promote Constants ; CHECK-NEXT: A No-Op Barrier Pass ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Merge internal globals diff --git a/llvm/test/CodeGen/RISCV/riscv-promote-constant.ll b/llvm/test/CodeGen/RISCV/riscv-promote-constant.ll new file mode 100644 index 0000000000000..2bde6013b3640 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/riscv-promote-constant.ll @@ -0,0 +1,148 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt %s -S -riscv-promote-const -mtriple=riscv64 -mattr=+d | FileCheck %s + +; No promotion should take place, as the pass skips floats. +define float @multiple_floats(float %a, float %b) { +; CHECK-LABEL: define float @multiple_floats( +; CHECK-SAME: float [[A:%.*]], float [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[A]], 1.000000e+00 +; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[B]], 2.000000e+00 +; CHECK-NEXT: [[SUM_F:%.*]] = fadd float [[ADD1]], [[ADD2]] +; CHECK-NEXT: ret float [[SUM_F]] +; +entry: + %add1 = fadd float %a, 1.0 + %add2 = fadd float %b, 2.0 + %sum_f = fadd float %add1, %add2 + ret float %sum_f +} + +; No promotion should take place as cases with a single constant are skipped. +define double @single_double(double %a) { +; CHECK-LABEL: define double @single_double( +; CHECK-SAME: double [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[A]], 4.210000e+01 +; CHECK-NEXT: ret double [[ADD]] +; +entry: + %add = fadd double %a, 42.1 + ret double %add +} + +; Promotion should happen as we have at least two unique constants that would +; otherwise go in the constant pool. +define double @multiple_doubles(double %a, double %b) { +; CHECK-LABEL: define double @multiple_doubles( +; CHECK-SAME: double [[A:%.*]], double [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[DOUBLE_VAL1:%.*]] = load double, ptr getelementptr inbounds ([2 x double], ptr @.promoted_doubles.multiple_doubles, i64 0, i64 1), align 8 +; CHECK-NEXT: [[ADD3:%.*]] = load double, ptr @.promoted_doubles.multiple_doubles, align 8 +; CHECK-NEXT: [[ADD2:%.*]] = fadd double [[A]], [[ADD3]] +; CHECK-NEXT: [[ADD4:%.*]] = fadd double [[B]], [[DOUBLE_VAL1]] +; CHECK-NEXT: [[SUM:%.*]] = fadd double [[ADD2]], [[ADD3]] +; CHECK-NEXT: [[SUM1:%.*]] = fadd double [[ADD4]], [[SUM]] +; CHECK-NEXT: ret double [[SUM1]] +; +entry: + %add1 = fadd double %a, 2.718 + %add2 = fadd double %b, 42.1 + %add3 = fadd double %add1, 2.718 + %sum = fadd double %add2, %add3 + ret double %sum +} + +; Promotion should not happen as the constants will be materialised rather +; than using the constant pool. +define double @multiple_doubles_no_promote(double %a, double %b) { +; CHECK-LABEL: define double @multiple_doubles_no_promote( +; CHECK-SAME: double [[A:%.*]], double [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[A]], 1.000000e+00 +; CHECK-NEXT: [[ADD2:%.*]] = fadd double [[B]], 2.000000e+00 +; CHECK-NEXT: [[ADD3:%.*]] = fadd double [[ADD1]], 1.000000e+00 +; CHECK-NEXT: [[SUM:%.*]] = fadd double [[ADD2]], [[ADD3]] +; CHECK-NEXT: ret double [[SUM]] +; +entry: + %add1 = fadd double %a, 1.0 + %add2 = fadd double %b, 2.0 + %add3 = fadd double %add1, 1.0 + %sum = fadd double %add2, %add3 + ret double %sum +} + +; The same constant shouldn't be loaded more than once per BB. +define double @multiple_doubles_multi_bb(double %a, i1 %cond) { +; CHECK-LABEL: define double @multiple_doubles_multi_bb( +; CHECK-SAME: double [[A:%.*]], i1 [[COND:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[COND]], label %[[IF_TRUE:.*]], label %[[IF_FALSE:.*]] +; CHECK: [[IF_TRUE]]: +; CHECK-NEXT: [[DOUBLE_VAL2:%.*]] = load double, ptr getelementptr inbounds ([2 x double], ptr @.promoted_doubles.multiple_doubles_multi_bb, i64 0, i64 1), align 8 +; CHECK-NEXT: [[DOUBLE_VAL:%.*]] = load double, ptr @.promoted_doubles.multiple_doubles_multi_bb, align 8 +; CHECK-NEXT: [[ADD_T:%.*]] = fadd double [[A]], [[DOUBLE_VAL]] +; CHECK-NEXT: [[MUL_T:%.*]] = fmul double [[ADD_T]], [[DOUBLE_VAL2]] +; CHECK-NEXT: [[SUB_T:%.*]] = fsub double [[MUL_T]], [[DOUBLE_VAL]] +; CHECK-NEXT: br label %[[IF_END:.*]] +; CHECK: [[IF_FALSE]]: +; CHECK-NEXT: [[DOUBLE_VAL3:%.*]] = load double, ptr getelementptr inbounds ([2 x double], ptr @.promoted_doubles.multiple_doubles_multi_bb, i64 0, i64 1), align 8 +; CHECK-NEXT: [[DOUBLE_VAL1:%.*]] = load double, ptr @.promoted_doubles.multiple_doubles_multi_bb, align 8 +; CHECK-NEXT: [[ADD_F:%.*]] = fadd double [[A]], [[DOUBLE_VAL1]] +; CHECK-NEXT: [[MUL_F:%.*]] = fmul double [[ADD_F]], [[DOUBLE_VAL3]] +; CHECK-NEXT: [[SUB_F:%.*]] = fsub double [[MUL_F]], [[DOUBLE_VAL1]] +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi double [ [[SUB_T]], %[[IF_TRUE]] ], [ [[SUB_F]], %[[IF_FALSE]] ] +; CHECK-NEXT: ret double [[PHI_RES]] +; +entry: + br i1 %cond, label %if.true, label %if.false + +if.true: + %add.t = fadd double %a, 1.23 + %mul.t = fmul double %add.t, 4.56 + %sub.t = fsub double %mul.t, 1.23 + br label %if.end + +if.false: + %add.f = fadd double %a, 1.23 + %mul.f = fmul double %add.f, 4.56 + %sub.f = fsub double %mul.f, 1.23 + br label %if.end + +if.end: + %phi.res = phi double [ %sub.t, %if.true ], [ %sub.f, %if.false ] + ret double %phi.res +} + +; Check the insertion point in the case we have a phi taking a constant C and +; the source block also uses that same constant. +define double @multiple_doubles_phi(double %a, i1 %cond) { +; CHECK-LABEL: define double @multiple_doubles_phi( +; CHECK-SAME: double [[A:%.*]], i1 [[COND:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 [[COND]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[DOUBLE_VAL:%.*]] = load double, ptr @.promoted_doubles.multiple_doubles_phi, align 8 +; CHECK-NEXT: [[MUL:%.*]] = fmul double [[A]], [[DOUBLE_VAL]] +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: [[PHI_VAL:%.*]] = phi double [ [[DOUBLE_VAL]], %[[IF_THEN]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: [[DOUBLE_VAL1:%.*]] = load double, ptr getelementptr inbounds ([2 x double], ptr @.promoted_doubles.multiple_doubles_phi, i64 0, i64 1), align 8 +; CHECK-NEXT: [[RES:%.*]] = fadd double [[PHI_VAL]], [[DOUBLE_VAL1]] +; CHECK-NEXT: ret double [[RES]] +; +entry: + br i1 %cond, label %if.then, label %if.end + +if.then: + %mul = fmul double %a, 1.23 + br label %if.end + +if.end: + %phi.val = phi double [ 1.23, %if.then ], [ %a, %entry ] + %res = fadd double %phi.val, 4.56 + ret double %res +}