-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[RISCV] Introduce pass to promote double constants to a global array #160536
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
8212e06
a25a17d
99f5483
58ae1a6
431e132
747c375
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
//==- RISCVPromoteConstant.cpp - Promote constant fp to global for RISC-V --==// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "RISCV.h" | ||
#include "RISCVSubtarget.h" | ||
#include "llvm/ADT/DenseMap.h" | ||
#include "llvm/ADT/SmallVector.h" | ||
#include "llvm/ADT/Statistic.h" | ||
#include "llvm/CodeGen/TargetLowering.h" | ||
#include "llvm/CodeGen/TargetPassConfig.h" | ||
#include "llvm/IR/BasicBlock.h" | ||
#include "llvm/IR/Constant.h" | ||
#include "llvm/IR/Constants.h" | ||
#include "llvm/IR/Function.h" | ||
#include "llvm/IR/GlobalValue.h" | ||
#include "llvm/IR/GlobalVariable.h" | ||
#include "llvm/IR/IRBuilder.h" | ||
#include "llvm/IR/InstIterator.h" | ||
#include "llvm/IR/Instruction.h" | ||
#include "llvm/IR/Instructions.h" | ||
#include "llvm/IR/Module.h" | ||
#include "llvm/IR/Type.h" | ||
#include "llvm/InitializePasses.h" | ||
#include "llvm/Pass.h" | ||
#include "llvm/Support/Casting.h" | ||
#include "llvm/Support/Debug.h" | ||
|
||
using namespace llvm; | ||
|
||
#define DEBUG_TYPE "riscv-promote-const" | ||
#define RISCV_PROMOTE_CONSTANT_NAME "RISC-V Promote Constants" | ||
|
||
STATISTIC(NumPromoted, "Number of constant literals promoted to globals"); | ||
STATISTIC(NumPromotedUses, "Number of uses of promoted literal constants"); | ||
|
||
namespace { | ||
|
||
class RISCVPromoteConstant : public ModulePass { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also add new pass manager version There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on my understanding form There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On this particular suggestion, I would actively prefer this be done in a follow on change. We do not have new pass manager versions of most RISCV codegen passes, and we should do these together. |
||
public: | ||
static char ID; | ||
RISCVPromoteConstant() : ModulePass(ID) {} | ||
|
||
StringRef getPassName() const override { return RISCV_PROMOTE_CONSTANT_NAME; } | ||
|
||
void getAnalysisUsage(AnalysisUsage &AU) const override { | ||
AU.addRequired<TargetPassConfig>(); | ||
AU.setPreservesCFG(); | ||
} | ||
|
||
/// Iterate over the functions and promote the double fp constants that | ||
/// would otherwise go into the constant pool to a constant array. | ||
bool runOnModule(Module &M) override { | ||
if (skipModule(M)) | ||
return false; | ||
// TargetMachine and Subtarget are needed to query isFPImmlegal. | ||
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); | ||
const TargetMachine &TM = TPC.getTM<TargetMachine>(); | ||
bool Changed = false; | ||
for (Function &F : M) { | ||
const RISCVSubtarget &ST = TM.getSubtarget<RISCVSubtarget>(F); | ||
const RISCVTargetLowering *TLI = ST.getTargetLowering(); | ||
Changed |= runOnFunction(F, TLI); | ||
} | ||
return Changed; | ||
} | ||
|
||
private: | ||
bool runOnFunction(Function &F, const RISCVTargetLowering *TLI); | ||
}; | ||
} // end anonymous namespace | ||
|
||
char RISCVPromoteConstant::ID = 0; | ||
|
||
INITIALIZE_PASS(RISCVPromoteConstant, DEBUG_TYPE, RISCV_PROMOTE_CONSTANT_NAME, | ||
false, false) | ||
|
||
ModulePass *llvm::createRISCVPromoteConstantPass() { | ||
return new RISCVPromoteConstant(); | ||
} | ||
|
||
bool RISCVPromoteConstant::runOnFunction(Function &F, | ||
const RISCVTargetLowering *TLI) { | ||
if (F.hasOptNone() || F.hasOptSize()) | ||
return false; | ||
|
||
// Bail out and make no transformation if the target doesn't support | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to skip optnone functions / skipFunction There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. skipFunction isn't available in a ModulePass, but I've added a skip if OptNone (and you can see if call check skipModule in runOnModule). |
||
// doubles, or if we're not targeting RV64 as we currently see some | ||
// regressions for those targets. | ||
if (!TLI->isTypeLegal(MVT::f64) || !TLI->isTypeLegal(MVT::i64)) | ||
return false; | ||
|
||
// Collect all unique double constants and their uses in the function. Use | ||
// MapVector to preserve insertion order. | ||
MapVector<ConstantFP *, SmallVector<Use *, 8>> ConstUsesMap; | ||
|
||
for (Instruction &I : instructions(F)) { | ||
for (Use &U : I.operands()) { | ||
auto *C = dyn_cast<ConstantFP>(U.get()); | ||
if (!C || !C->getType()->isDoubleTy()) | ||
continue; | ||
if (TLI->isFPImmLegal(C->getValueAPF(), MVT::f64, | ||
/*ForCodeSize=*/false)) | ||
continue; | ||
ConstUsesMap[C].push_back(&U); | ||
} | ||
} | ||
|
||
int PromotableConstants = ConstUsesMap.size(); | ||
LLVM_DEBUG(dbgs() << "Found " << PromotableConstants | ||
<< " promotable constants in " << F.getName() << "\n"); | ||
// Bail out if no promotable constants found, or if only one is found. | ||
if (PromotableConstants < 2) { | ||
LLVM_DEBUG(dbgs() << "Performing no promotions as insufficient promotable " | ||
"constants found\n"); | ||
return false; | ||
} | ||
|
||
NumPromoted += PromotableConstants; | ||
|
||
// Create a global array containing the promoted constants. | ||
Module *M = F.getParent(); | ||
Type *DoubleTy = Type::getDoubleTy(M->getContext()); | ||
|
||
SmallVector<Constant *, 16> ConstantVector; | ||
for (auto const &Pair : ConstUsesMap) | ||
ConstantVector.push_back(Pair.first); | ||
|
||
ArrayType *ArrayTy = ArrayType::get(DoubleTy, ConstantVector.size()); | ||
Constant *GlobalArrayInitializer = | ||
ConstantArray::get(ArrayTy, ConstantVector); | ||
|
||
auto *GlobalArray = new GlobalVariable( | ||
*M, ArrayTy, | ||
/*isConstant=*/true, GlobalValue::InternalLinkage, GlobalArrayInitializer, | ||
".promoted_doubles." + F.getName()); | ||
|
||
// A cache to hold the loaded value for a given constant within a basic block. | ||
DenseMap<std::pair<ConstantFP *, BasicBlock *>, Value *> LocalLoads; | ||
|
||
// Replace all uses with the loaded value. | ||
unsigned Idx = 0; | ||
for (auto const &Pair : ConstUsesMap) { | ||
ConstantFP *Const = Pair.first; | ||
const SmallVector<Use *, 8> &Uses = Pair.second; | ||
|
||
for (Use *U : Uses) { | ||
Instruction *UserInst = cast<Instruction>(U->getUser()); | ||
BasicBlock *InsertionBB; | ||
BasicBlock::iterator InsertionPt; | ||
|
||
// If the user is a PHI node, we must insert the load in the | ||
// corresponding predecessor basic block. Otherwise, it's inserted into | ||
// the same block as the use. | ||
if (auto *PN = dyn_cast<PHINode>(UserInst)) | ||
InsertionBB = PN->getIncomingBlock(*U); | ||
else | ||
InsertionBB = UserInst->getParent(); | ||
|
||
// It is always safe to insert in the first insertion point in the BB, | ||
// so do that and let other passes reorder. | ||
InsertionPt = InsertionBB->getFirstInsertionPt(); | ||
|
||
auto CacheKey = std::make_pair(Const, InsertionBB); | ||
asb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Value *LoadedVal = nullptr; | ||
|
||
// Re-use a load if it exists in the insertion block. | ||
if (LocalLoads.count(CacheKey)) { | ||
LoadedVal = LocalLoads.at(CacheKey); | ||
} else { | ||
// Otherwise, create a new GEP and Load at the correct insertion point. | ||
IRBuilder<> Builder(InsertionBB, InsertionPt); | ||
Value *ElementPtr = Builder.CreateConstInBoundsGEP2_64( | ||
GlobalArray->getValueType(), GlobalArray, 0, Idx, "double.addr"); | ||
LoadedVal = Builder.CreateLoad(DoubleTy, ElementPtr, "double.val"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alignment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The loads will be created with appropraite alignment - but of course the lack of an IR->IR test means that wasn't obvious! I've added tests now, which confirms this. |
||
|
||
// Cache the newly created load for this block. | ||
LocalLoads[CacheKey] = LoadedVal; | ||
} | ||
|
||
U->set(LoadedVal); | ||
++NumPromotedUses; | ||
} | ||
++Idx; | ||
} | ||
|
||
return true; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -67,8 +67,8 @@ define i32 @va1(ptr %fmt, ...) { | |
; RV32-NEXT: G_VASTART [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.va) | ||
; RV32-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load (p0) from %ir.va) | ||
; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 | ||
; RV32-NEXT: %20:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s32) | ||
; RV32-NEXT: G_STORE %20(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) | ||
; RV32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s32) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this change actually from the review, or just a stale auto-update? It sorta looks like the later. |
||
; RV32-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) | ||
; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur) | ||
; RV32-NEXT: $x10 = COPY [[LOAD1]](s32) | ||
; RV32-NEXT: PseudoRET implicit $x10 | ||
|
@@ -105,8 +105,8 @@ define i32 @va1(ptr %fmt, ...) { | |
; RV64-NEXT: G_VASTART [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.va) | ||
; RV64-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load (p0) from %ir.va, align 4) | ||
; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 | ||
; RV64-NEXT: %20:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s64) | ||
; RV64-NEXT: G_STORE %20(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) | ||
; RV64-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s64) | ||
; RV64-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) | ||
; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur) | ||
; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32) | ||
; RV64-NEXT: $x10 = COPY [[ANYEXT]](s64) | ||
|
@@ -687,8 +687,8 @@ define i64 @va2(ptr %fmt, ...) nounwind { | |
; RV32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] | ||
; RV32-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32) | ||
; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 | ||
; RV32-NEXT: %25:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s32) | ||
; RV32-NEXT: G_STORE %25(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) | ||
; RV32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s32) | ||
; RV32-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) | ||
; RV32-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32) | ||
; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3) | ||
; RV32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64) | ||
|
@@ -733,8 +733,8 @@ define i64 @va2(ptr %fmt, ...) nounwind { | |
; RV64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] | ||
; RV64-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32) | ||
; RV64-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 | ||
; RV64-NEXT: %25:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s64) | ||
; RV64-NEXT: G_STORE %25(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) | ||
; RV64-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s64) | ||
; RV64-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) | ||
; RV64-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32) | ||
; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3) | ||
; RV64-NEXT: $x10 = COPY [[LOAD1]](s64) | ||
|
@@ -974,8 +974,8 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { | |
; RV32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] | ||
; RV32-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32) | ||
; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 | ||
; RV32-NEXT: %24:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s32) | ||
; RV32-NEXT: G_STORE %24(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) | ||
; RV32-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s32) | ||
; RV32-NEXT: G_STORE [[PTR_ADD5]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) | ||
; RV32-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32) | ||
; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3) | ||
; RV32-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[MV]], [[LOAD1]] | ||
|
@@ -1020,8 +1020,8 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { | |
; RV64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] | ||
; RV64-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32) | ||
; RV64-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 | ||
; RV64-NEXT: %25:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s64) | ||
; RV64-NEXT: G_STORE %25(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) | ||
; RV64-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s64) | ||
; RV64-NEXT: G_STORE [[PTR_ADD6]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) | ||
; RV64-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32) | ||
; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3) | ||
; RV64-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[COPY1]], [[LOAD1]] | ||
|
@@ -1724,8 +1724,8 @@ define i32 @va_large_stack(ptr %fmt, ...) { | |
; RV32-NEXT: G_VASTART [[FRAME_INDEX2]](p0) :: (store (s32) into %ir.va) | ||
; RV32-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (dereferenceable load (p0) from %ir.va) | ||
; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 | ||
; RV32-NEXT: %21:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s32) | ||
; RV32-NEXT: G_STORE %21(p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va) | ||
; RV32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s32) | ||
; RV32-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va) | ||
; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur) | ||
; RV32-NEXT: $x10 = COPY [[LOAD1]](s32) | ||
; RV32-NEXT: PseudoRET implicit $x10 | ||
|
@@ -1763,8 +1763,8 @@ define i32 @va_large_stack(ptr %fmt, ...) { | |
; RV64-NEXT: G_VASTART [[FRAME_INDEX2]](p0) :: (store (s64) into %ir.va) | ||
; RV64-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (dereferenceable load (p0) from %ir.va, align 4) | ||
; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 | ||
; RV64-NEXT: %21:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s64) | ||
; RV64-NEXT: G_STORE %21(p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va, align 4) | ||
; RV64-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s64) | ||
; RV64-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va, align 4) | ||
; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur) | ||
; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32) | ||
; RV64-NEXT: $x10 = COPY [[ANYEXT]](s64) | ||
|
Uh oh!
There was an error while loading. Please reload this page.