19 changes: 15 additions & 4 deletions lld/test/ELF/hexagon-shared.s
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ r0 = add(r1,##bar@GOT)
{ r0 = add(r0,##bar@GOT)
memw(r0) = r2 }

# R_HEX_GOT_16_X, pred add
if (p0) r0 = add(r0,##bar@GOT)
if (!p0) r0 = add(r0,##bar@GOT)
{ p0 = cmp.gtu(r0, r1)
if (p0.new) r0 = add(r0,##bar@GOT) }
{ p0 = cmp.gtu(r0, r1)
if (!p0.new) r0 = add(r0,##bar@GOT) }

# foo is local so no plt will be generated
foo:
Expand Down Expand Up @@ -78,12 +85,16 @@ pvar:
# PLT-NEXT: r28 = memw(r14+#0) }
# PLT-NEXT: jumpr r28 }

# TEXT: 8c 00 01 00 0001008c
# TEXT: { call 0x102d0 }
# TEXT: if (p0) jump:nt 0x102d0
# TEXT: r0 = #0 ; jump 0x102d0
# TEXT: bc 00 01 00 000100bc
# TEXT: { call 0x10300 }
# TEXT: if (p0) jump:nt 0x10300
# TEXT: r0 = #0 ; jump 0x10300
# TEXT: r0 = add(r1,##-65548)
# TEXT: r0 = add(r0,##-65548); memw(r0+#0) = r2 }
# TEXT: if (p0) r0 = add(r0,##-65548)
# TEXT: if (!p0) r0 = add(r0,##-65548)
# TEXT: if (p0.new) r0 = add(r0,##-65548)
# TEXT: if (!p0.new) r0 = add(r0,##-65548)

# GOT: .got:
# GOT: 00 00 00 00 00000000 <unknown>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
#include "llvm/ExecutionEngine/Orc/IRPartitionLayer.h"
#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
Expand Down Expand Up @@ -48,6 +49,7 @@ class KaleidoscopeJIT {
RTDyldObjectLinkingLayer ObjectLayer;
IRCompileLayer CompileLayer;
IRTransformLayer OptimizeLayer;
IRPartitionLayer IPLayer;
CompileOnDemandLayer CODLayer;

JITDylib &MainJD;
Expand All @@ -68,8 +70,8 @@ class KaleidoscopeJIT {
CompileLayer(*this->ES, ObjectLayer,
std::make_unique<ConcurrentIRCompiler>(std::move(JTMB))),
OptimizeLayer(*this->ES, CompileLayer, optimizeModule),
CODLayer(*this->ES, OptimizeLayer,
this->EPCIU->getLazyCallThroughManager(),
IPLayer(*this->ES, OptimizeLayer),
CODLayer(*this->ES, IPLayer, this->EPCIU->getLazyCallThroughManager(),
[this] { return this->EPCIU->createIndirectStubsManager(); }),
MainJD(this->ES->createBareJITDylib("<main>")) {
MainJD.addGenerator(
Expand Down
9 changes: 6 additions & 3 deletions llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "llvm/ExecutionEngine/Orc/Core.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
#include "llvm/ExecutionEngine/Orc/IRPartitionLayer.h"
#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
Expand Down Expand Up @@ -109,13 +110,14 @@ class SpeculativeJIT {
IndirectStubsManagerBuilderFunction ISMBuilder,
std::unique_ptr<DynamicLibrarySearchGenerator> ProcessSymbolsGenerator)
: ES(std::move(ES)), DL(std::move(DL)),
MainJD(this->ES->createBareJITDylib("<main>")), LCTMgr(std::move(LCTMgr)),
MainJD(this->ES->createBareJITDylib("<main>")),
LCTMgr(std::move(LCTMgr)),
CompileLayer(*this->ES, ObjLayer,
std::make_unique<ConcurrentIRCompiler>(std::move(JTMB))),
S(Imps, *this->ES),
SpeculateLayer(*this->ES, CompileLayer, S, Mangle, BlockFreqQuery()),
CODLayer(*this->ES, SpeculateLayer, *this->LCTMgr,
std::move(ISMBuilder)) {
IPLayer(*this->ES, SpeculateLayer),
CODLayer(*this->ES, IPLayer, *this->LCTMgr, std::move(ISMBuilder)) {
MainJD.addGenerator(std::move(ProcessSymbolsGenerator));
this->CODLayer.setImplMap(&Imps);
ExitOnErr(S.addSpeculationRuntime(MainJD, Mangle));
Expand All @@ -141,6 +143,7 @@ class SpeculativeJIT {
Speculator S;
RTDyldObjectLinkingLayer ObjLayer{*ES, createMemMgr};
IRSpeculationLayer SpeculateLayer;
IRPartitionLayer IPLayer;
CompileOnDemandLayer CODLayer;
};

Expand Down
36 changes: 2 additions & 34 deletions llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,37 +53,15 @@ namespace llvm {
namespace orc {

class CompileOnDemandLayer : public IRLayer {
friend class PartitioningIRMaterializationUnit;

public:
/// Builder for IndirectStubsManagers.
using IndirectStubsManagerBuilder =
std::function<std::unique_ptr<IndirectStubsManager>()>;

using GlobalValueSet = std::set<const GlobalValue *>;

/// Partitioning function.
using PartitionFunction =
std::function<std::optional<GlobalValueSet>(GlobalValueSet Requested)>;

/// Off-the-shelf partitioning which compiles all requested symbols (usually
/// a single function at a time).
static std::optional<GlobalValueSet>
compileRequested(GlobalValueSet Requested);

/// Off-the-shelf partitioning which compiles whole modules whenever any
/// symbol in them is requested.
static std::optional<GlobalValueSet>
compileWholeModule(GlobalValueSet Requested);

/// Construct a CompileOnDemandLayer.
CompileOnDemandLayer(ExecutionSession &ES, IRLayer &BaseLayer,
LazyCallThroughManager &LCTMgr,
IndirectStubsManagerBuilder BuildIndirectStubsManager);

/// Sets the partition function.
void setPartitionFunction(PartitionFunction Partition);

LazyCallThroughManager &LCTMgr,
IndirectStubsManagerBuilder BuildIndirectStubsManager);
/// Sets the ImplSymbolMap
void setImplMap(ImplSymbolMap *Imp);

Expand All @@ -110,22 +88,12 @@ class CompileOnDemandLayer : public IRLayer {

PerDylibResources &getPerDylibResources(JITDylib &TargetD);

void cleanUpModule(Module &M);

void expandPartition(GlobalValueSet &Partition);

void emitPartition(std::unique_ptr<MaterializationResponsibility> R,
ThreadSafeModule TSM,
IRMaterializationUnit::SymbolNameToDefinitionMap Defs);

mutable std::mutex CODLayerMutex;

IRLayer &BaseLayer;
LazyCallThroughManager &LCTMgr;
IndirectStubsManagerBuilder BuildIndirectStubsManager;
PerDylibResourcesMap DylibResources;
PartitionFunction Partition = compileRequested;
SymbolLinkagePromoter PromoteSymbols;
ImplSymbolMap *AliaseeImpls = nullptr;
};

Expand Down
85 changes: 85 additions & 0 deletions llvm/include/llvm/ExecutionEngine/Orc/IRPartitionLayer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
//===- IRPartitionLayer.h - Partition IR module on lookup -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// JIT layer for breaking up modules into smaller submodules that only contains
// looked up symbols.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_EXECUTIONENGINE_ORC_IRPARTITIONLAYER_H
#define LLVM_EXECUTIONENGINE_ORC_IRPARTITIONLAYER_H

#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
#include "llvm/ExecutionEngine/Orc/Layer.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"

namespace llvm {
namespace orc {

/// A layer that breaks up IR modules into smaller submodules that only contains
/// looked up symbols.
class IRPartitionLayer : public IRLayer {
friend class PartitioningIRMaterializationUnit;

public:
using GlobalValueSet = std::set<const GlobalValue *>;

/// Partitioning function.
using PartitionFunction =
std::function<std::optional<GlobalValueSet>(GlobalValueSet Requested)>;

/// Construct a IRPartitionLayer.
IRPartitionLayer(ExecutionSession &ES, IRLayer &BaseLayer);

/// Off-the-shelf partitioning which compiles all requested symbols (usually
/// a single function at a time).
static std::optional<GlobalValueSet>
compileRequested(GlobalValueSet Requested);

/// Off-the-shelf partitioning which compiles whole modules whenever any
/// symbol in them is requested.
static std::optional<GlobalValueSet>
compileWholeModule(GlobalValueSet Requested);

/// Sets the partition function.
void setPartitionFunction(PartitionFunction Partition);

/// Emits the given module. This should not be called by clients: it will be
/// called by the JIT when a definition added via the add method is requested.
void emit(std::unique_ptr<MaterializationResponsibility> R,
ThreadSafeModule TSM) override;

private:
void cleanUpModule(Module &M);

void expandPartition(GlobalValueSet &Partition);

void emitPartition(std::unique_ptr<MaterializationResponsibility> R,
ThreadSafeModule TSM,
IRMaterializationUnit::SymbolNameToDefinitionMap Defs);

IRLayer &BaseLayer;
PartitionFunction Partition = compileRequested;
SymbolLinkagePromoter PromoteSymbols;
};

} // namespace orc
} // namespace llvm

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
//===- JITLinkRedirectableSymbolManager.h - JITLink redirection -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Redirectable Symbol Manager implementation using JITLink
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_EXECUTIONENGINE_ORC_JITLINKREDIRECABLEMANAGER_H
#define LLVM_EXECUTIONENGINE_ORC_JITLINKREDIRECABLEMANAGER_H

#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/Orc/RedirectionManager.h"
#include "llvm/Support/StringSaver.h"

namespace llvm {
namespace orc {

class JITLinkRedirectableSymbolManager : public RedirectableSymbolManager,
public ResourceManager {
public:
/// Create redirection manager that uses JITLink based implementaion.
static Expected<std::unique_ptr<RedirectableSymbolManager>>
Create(ObjectLinkingLayer &ObjLinkingLayer, JITDylib &JD) {
Error Err = Error::success();
auto RM = std::unique_ptr<RedirectableSymbolManager>(
new JITLinkRedirectableSymbolManager(ObjLinkingLayer, JD, Err));
if (Err)
return Err;
return std::move(RM);
}

void emitRedirectableSymbols(std::unique_ptr<MaterializationResponsibility> R,
const SymbolAddrMap &InitialDests) override;

Error redirect(JITDylib &TargetJD, const SymbolAddrMap &NewDests) override;

Error handleRemoveResources(JITDylib &TargetJD, ResourceKey K) override;

void handleTransferResources(JITDylib &TargetJD, ResourceKey DstK,
ResourceKey SrcK) override;

private:
using StubHandle = unsigned;
constexpr static unsigned StubBlockSize = 256;
constexpr static StringRef JumpStubPrefix = "$__IND_JUMP_STUBS";
constexpr static StringRef StubPtrPrefix = "$IND_JUMP_PTR_";
constexpr static StringRef JumpStubTableName = "$IND_JUMP_";
constexpr static StringRef StubPtrTableName = "$__IND_JUMP_PTRS";

JITLinkRedirectableSymbolManager(ObjectLinkingLayer &ObjLinkingLayer,
JITDylib &JD, Error &Err)
: ObjLinkingLayer(ObjLinkingLayer), JD(JD),
AnonymousPtrCreator(jitlink::getAnonymousPointerCreator(
ObjLinkingLayer.getExecutionSession().getTargetTriple())),
PtrJumpStubCreator(jitlink::getPointerJumpStubCreator(
ObjLinkingLayer.getExecutionSession().getTargetTriple())) {
if (!AnonymousPtrCreator || !PtrJumpStubCreator)
Err = make_error<StringError>("Architecture not supported",
inconvertibleErrorCode());
if (Err)
return;
ObjLinkingLayer.getExecutionSession().registerResourceManager(*this);
}

~JITLinkRedirectableSymbolManager() {
ObjLinkingLayer.getExecutionSession().deregisterResourceManager(*this);
}

StringRef JumpStubSymbolName(unsigned I) {
return *ObjLinkingLayer.getExecutionSession().intern(
(JumpStubPrefix + Twine(I)).str());
}

StringRef StubPtrSymbolName(unsigned I) {
return *ObjLinkingLayer.getExecutionSession().intern(
(StubPtrPrefix + Twine(I)).str());
}

unsigned GetNumAvailableStubs() const { return AvailableStubs.size(); }

Error redirectInner(JITDylib &TargetJD, const SymbolAddrMap &NewDests);
Error grow(unsigned Need);

ObjectLinkingLayer &ObjLinkingLayer;
JITDylib &JD;
jitlink::AnonymousPointerCreator AnonymousPtrCreator;
jitlink::PointerJumpStubCreator PtrJumpStubCreator;

std::vector<StubHandle> AvailableStubs;
using SymbolToStubMap = DenseMap<SymbolStringPtr, StubHandle>;
DenseMap<JITDylib *, SymbolToStubMap> SymbolToStubs;
std::vector<ExecutorSymbolDef> JumpStubs;
std::vector<ExecutorSymbolDef> StubPointers;
DenseMap<ResourceKey, std::vector<SymbolStringPtr>> TrackedResources;

std::mutex Mutex;
};

} // namespace orc
} // namespace llvm

#endif
7 changes: 4 additions & 3 deletions llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
#include "llvm/ExecutionEngine/Orc/IRPartitionLayer.h"
#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h"
Expand Down Expand Up @@ -271,9 +272,8 @@ class LLLazyJIT : public LLJIT {
public:

/// Sets the partition function.
void
setPartitionFunction(CompileOnDemandLayer::PartitionFunction Partition) {
CODLayer->setPartitionFunction(std::move(Partition));
void setPartitionFunction(IRPartitionLayer::PartitionFunction Partition) {
IPLayer->setPartitionFunction(std::move(Partition));
}

/// Returns a reference to the on-demand layer.
Expand All @@ -293,6 +293,7 @@ class LLLazyJIT : public LLJIT {
LLLazyJIT(LLLazyJITBuilderState &S, Error &Err);

std::unique_ptr<LazyCallThroughManager> LCTMgr;
std::unique_ptr<IRPartitionLayer> IPLayer;
std::unique_ptr<CompileOnDemandLayer> CODLayer;
};

Expand Down
181 changes: 181 additions & 0 deletions llvm/include/llvm/ExecutionEngine/Orc/ReOptimizeLayer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
//===- ReOptimizeLayer.h - Re-optimization layer interface ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Re-optimization layer interface.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_EXECUTIONENGINE_ORC_REOPTIMIZELAYER_H
#define LLVM_EXECUTIONENGINE_ORC_REOPTIMIZELAYER_H

#include "llvm/ExecutionEngine/Orc/Core.h"
#include "llvm/ExecutionEngine/Orc/Layer.h"
#include "llvm/ExecutionEngine/Orc/Mangling.h"
#include "llvm/ExecutionEngine/Orc/RedirectionManager.h"
#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"

namespace llvm {
namespace orc {

class ReOptimizeLayer : public IRLayer, public ResourceManager {
public:
using ReOptMaterializationUnitID = uint64_t;

/// AddProfilerFunc will be called when ReOptimizeLayer emits the first
/// version of a materialization unit in order to inject profiling code and
/// reoptimization request code.
using AddProfilerFunc = unique_function<Error(
ReOptimizeLayer &Parent, ReOptMaterializationUnitID MUID,
unsigned CurVersion, ThreadSafeModule &TSM)>;

/// ReOptimizeFunc will be called when ReOptimizeLayer reoptimization of a
/// materialization unit was requested in order to reoptimize the IR module
/// based on profile data. OldRT is the ResourceTracker that tracks the old
/// function definitions. The OldRT must be kept alive until it can be
/// guaranteed that every invocation of the old function definitions has been
/// terminated.
using ReOptimizeFunc = unique_function<Error(
ReOptimizeLayer &Parent, ReOptMaterializationUnitID MUID,
unsigned CurVersion, ResourceTrackerSP OldRT, ThreadSafeModule &TSM)>;

ReOptimizeLayer(ExecutionSession &ES, DataLayout &DL, IRLayer &BaseLayer,
RedirectableSymbolManager &RM)
: IRLayer(ES, BaseLayer.getManglingOptions()), ES(ES), Mangle(ES, DL),
BaseLayer(BaseLayer), RSManager(RM), ReOptFunc(identity),
ProfilerFunc(reoptimizeIfCallFrequent) {}

void setReoptimizeFunc(ReOptimizeFunc ReOptFunc) {
this->ReOptFunc = std::move(ReOptFunc);
}

void setAddProfilerFunc(AddProfilerFunc ProfilerFunc) {
this->ProfilerFunc = std::move(ProfilerFunc);
}

/// Registers reoptimize runtime dispatch handlers to given PlatformJD. The
/// reoptimization request will not be handled if dispatch handler is not
/// registered by using this function.
Error reigsterRuntimeFunctions(JITDylib &PlatformJD);

/// Emits the given module. This should not be called by clients: it will be
/// called by the JIT when a definition added via the add method is requested.
void emit(std::unique_ptr<MaterializationResponsibility> R,
ThreadSafeModule TSM) override;

static const uint64_t CallCountThreshold = 10;

/// Basic AddProfilerFunc that reoptimizes the function when the call count
/// exceeds CallCountThreshold.
static Error reoptimizeIfCallFrequent(ReOptimizeLayer &Parent,
ReOptMaterializationUnitID MUID,
unsigned CurVersion,
ThreadSafeModule &TSM);

static Error identity(ReOptimizeLayer &Parent,
ReOptMaterializationUnitID MUID, unsigned CurVersion,
ResourceTrackerSP OldRT, ThreadSafeModule &TSM) {
return Error::success();
}

// Create IR reoptimize request fucntion call.
static void createReoptimizeCall(Module &M, Instruction &IP,
GlobalVariable *ArgBuffer);

Error handleRemoveResources(JITDylib &JD, ResourceKey K) override;
void handleTransferResources(JITDylib &JD, ResourceKey DstK,
ResourceKey SrcK) override;

private:
class ReOptMaterializationUnitState {
public:
ReOptMaterializationUnitState() = default;
ReOptMaterializationUnitState(ReOptMaterializationUnitID ID,
ThreadSafeModule TSM)
: ID(ID), TSM(std::move(TSM)) {}
ReOptMaterializationUnitState(ReOptMaterializationUnitState &&Other)
: ID(Other.ID), TSM(std::move(Other.TSM)), RT(std::move(Other.RT)),
Reoptimizing(std::move(Other.Reoptimizing)),
CurVersion(Other.CurVersion) {}

ReOptMaterializationUnitID getID() { return ID; }

const ThreadSafeModule &getThreadSafeModule() { return TSM; }

ResourceTrackerSP getResourceTracker() {
std::unique_lock<std::mutex> Lock(Mutex);
return RT;
}

void setResourceTracker(ResourceTrackerSP RT) {
std::unique_lock<std::mutex> Lock(Mutex);
this->RT = RT;
}

uint32_t getCurVersion() {
std::unique_lock<std::mutex> Lock(Mutex);
return CurVersion;
}

bool tryStartReoptimize();
void reoptimizeSucceeded();
void reoptimizeFailed();

private:
std::mutex Mutex;
ReOptMaterializationUnitID ID;
ThreadSafeModule TSM;
ResourceTrackerSP RT;
bool Reoptimizing = false;
uint32_t CurVersion = 0;
};

using SPSReoptimizeArgList =
shared::SPSArgList<ReOptMaterializationUnitID, uint32_t>;
using SendErrorFn = unique_function<void(Error)>;

Expected<SymbolMap> emitMUImplSymbols(ReOptMaterializationUnitState &MUState,
uint32_t Version, JITDylib &JD,
ThreadSafeModule TSM);

void rt_reoptimize(SendErrorFn SendResult, ReOptMaterializationUnitID MUID,
uint32_t CurVersion);

static Expected<Constant *>
createReoptimizeArgBuffer(Module &M, ReOptMaterializationUnitID MUID,
uint32_t CurVersion);

ReOptMaterializationUnitState &
createMaterializationUnitState(const ThreadSafeModule &TSM);

void
registerMaterializationUnitResource(ResourceKey Key,
ReOptMaterializationUnitState &State);

ReOptMaterializationUnitState &
getMaterializationUnitState(ReOptMaterializationUnitID MUID);

ExecutionSession &ES;
MangleAndInterner Mangle;
IRLayer &BaseLayer;
RedirectableSymbolManager &RSManager;

ReOptimizeFunc ReOptFunc;
AddProfilerFunc ProfilerFunc;

std::mutex Mutex;
std::map<ReOptMaterializationUnitID, ReOptMaterializationUnitState> MUStates;
DenseMap<ResourceKey, DenseSet<ReOptMaterializationUnitID>> MUResources;
ReOptMaterializationUnitID NextID = 1;
};

} // namespace orc
} // namespace llvm

#endif
103 changes: 103 additions & 0 deletions llvm/include/llvm/ExecutionEngine/Orc/RedirectionManager.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
//===- RedirectionManager.h - Redirection manager interface -----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Redirection manager interface that redirects a call to symbol to another.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_EXECUTIONENGINE_ORC_REDIRECTIONMANAGER_H
#define LLVM_EXECUTIONENGINE_ORC_REDIRECTIONMANAGER_H

#include "llvm/ExecutionEngine/Orc/Core.h"

namespace llvm {
namespace orc {

/// Base class for performing redirection of call to symbol to another symbol in
/// runtime.
class RedirectionManager {
public:
/// Symbol name to symbol definition map.
using SymbolAddrMap = DenseMap<SymbolStringPtr, ExecutorSymbolDef>;

virtual ~RedirectionManager() = default;
/// Change the redirection destination of given symbols to new destination
/// symbols.
virtual Error redirect(JITDylib &JD, const SymbolAddrMap &NewDests) = 0;

/// Change the redirection destination of given symbol to new destination
/// symbol.
virtual Error redirect(JITDylib &JD, SymbolStringPtr Symbol,
ExecutorSymbolDef NewDest) {
return redirect(JD, {{Symbol, NewDest}});
}

private:
virtual void anchor();
};

/// Base class for managing redirectable symbols in which a call
/// gets redirected to another symbol in runtime.
class RedirectableSymbolManager : public RedirectionManager {
public:
/// Create redirectable symbols with given symbol names and initial
/// desitnation symbol addresses.
Error createRedirectableSymbols(ResourceTrackerSP RT,
const SymbolMap &InitialDests);

/// Create a single redirectable symbol with given symbol name and initial
/// desitnation symbol address.
Error createRedirectableSymbol(ResourceTrackerSP RT, SymbolStringPtr Symbol,
ExecutorSymbolDef InitialDest) {
return createRedirectableSymbols(RT, {{Symbol, InitialDest}});
}

/// Emit redirectable symbol
virtual void
emitRedirectableSymbols(std::unique_ptr<MaterializationResponsibility> MR,
const SymbolMap &InitialDests) = 0;
};

/// RedirectableMaterializationUnit materializes redirectable symbol
/// by invoking RedirectableSymbolManager::emitRedirectableSymbols
class RedirectableMaterializationUnit : public MaterializationUnit {
public:
RedirectableMaterializationUnit(RedirectableSymbolManager &RM,
const SymbolMap &InitialDests)
: MaterializationUnit(convertToFlags(InitialDests)), RM(RM),
InitialDests(InitialDests) {}

StringRef getName() const override {
return "RedirectableSymbolMaterializationUnit";
}

void materialize(std::unique_ptr<MaterializationResponsibility> R) override {
RM.emitRedirectableSymbols(std::move(R), std::move(InitialDests));
}

void discard(const JITDylib &JD, const SymbolStringPtr &Name) override {
InitialDests.erase(Name);
}

private:
static MaterializationUnit::Interface
convertToFlags(const SymbolMap &InitialDests) {
SymbolFlagsMap Flags;
for (auto [K, V] : InitialDests)
Flags[K] = V.getFlags();
return MaterializationUnit::Interface(Flags, {});
}

RedirectableSymbolManager &RM;
SymbolMap InitialDests;
};

} // namespace orc
} // namespace llvm

#endif
1 change: 1 addition & 0 deletions llvm/include/llvm/SandboxIR/Context.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ class Context {
}
/// Get or create a sandboxir::Constant from an existing LLVM IR \p LLVMC.
Constant *getOrCreateConstant(llvm::Constant *LLVMC);
friend class Utils; // For getMemoryBase

// Friends for getOrCreateConstant().
#define DEF_CONST(ID, CLASS) friend class CLASS;
Expand Down
10 changes: 10 additions & 0 deletions llvm/include/llvm/SandboxIR/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,16 @@ class Utils {
return const_cast<Instruction *>(I);
}

/// \Returns the base Value for load or store instruction \p LSI.
template <typename LoadOrStoreT>
static Value *getMemInstructionBase(const LoadOrStoreT *LSI) {
static_assert(std::is_same_v<LoadOrStoreT, LoadInst> ||
std::is_same_v<LoadOrStoreT, StoreInst>,
"Expected sandboxir::Load or sandboxir::Store!");
return LSI->Ctx.getOrCreateValue(
getUnderlyingObject(LSI->getPointerOperand()->Val));
}

/// \Returns the number of bits required to represent the operands or return
/// value of \p V in \p DL.
static unsigned getNumBits(Value *V, const DataLayout &DL) {
Expand Down
78 changes: 42 additions & 36 deletions llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Interval.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,66 +74,72 @@ template <typename T, typename IntervalType> class IntervalIterator {
};

template <typename T> class Interval {
T *From;
T *To;
T *Top;
T *Bottom;

public:
Interval() : From(nullptr), To(nullptr) {}
Interval(T *From, T *To) : From(From), To(To) {
assert((From == To || From->comesBefore(To)) &&
"From should come before From!");
Interval() : Top(nullptr), Bottom(nullptr) {}
Interval(T *Top, T *Bottom) : Top(Top), Bottom(Bottom) {
assert((Top == Bottom || Top->comesBefore(Bottom)) &&
"Top should come before Bottom!");
}
Interval(ArrayRef<T *> Elems) {
assert(!Elems.empty() && "Expected non-empty Elems!");
From = Elems[0];
To = Elems[0];
Top = Elems[0];
Bottom = Elems[0];
for (auto *I : drop_begin(Elems)) {
if (I->comesBefore(From))
From = I;
else if (To->comesBefore(I))
To = I;
if (I->comesBefore(Top))
Top = I;
else if (Bottom->comesBefore(I))
Bottom = I;
}
}
bool empty() const {
assert(((From == nullptr && To == nullptr) ||
(From != nullptr && To != nullptr)) &&
assert(((Top == nullptr && Bottom == nullptr) ||
(Top != nullptr && Bottom != nullptr)) &&
"Either none or both should be null");
return From == nullptr;
return Top == nullptr;
}
bool contains(T *I) const {
if (empty())
return false;
return (From == I || From->comesBefore(I)) &&
(I == To || I->comesBefore(To));
return (Top == I || Top->comesBefore(I)) &&
(I == Bottom || I->comesBefore(Bottom));
}
T *top() const { return From; }
T *bottom() const { return To; }
T *top() const { return Top; }
T *bottom() const { return Bottom; }

using iterator = IntervalIterator<T, Interval>;
iterator begin() { return iterator(From, *this); }
iterator begin() { return iterator(Top, *this); }
iterator end() {
return iterator(To != nullptr ? To->getNextNode() : nullptr, *this);
return iterator(Bottom != nullptr ? Bottom->getNextNode() : nullptr, *this);
}
iterator begin() const {
return iterator(From, const_cast<Interval &>(*this));
return iterator(Top, const_cast<Interval &>(*this));
}
iterator end() const {
return iterator(To != nullptr ? To->getNextNode() : nullptr,
return iterator(Bottom != nullptr ? Bottom->getNextNode() : nullptr,
const_cast<Interval &>(*this));
}
/// Equality.
bool operator==(const Interval &Other) const {
return From == Other.From && To == Other.To;
return Top == Other.Top && Bottom == Other.Bottom;
}
/// Inequality.
bool operator!=(const Interval &Other) const { return !(*this == Other); }
/// \Returns true if this interval comes before \p Other in program order.
/// This expects disjoint intervals.
bool comesBefore(const Interval &Other) const {
assert(disjoint(Other) && "Expect disjoint intervals!");
return bottom()->comesBefore(Other.top());
}
/// \Returns true if this and \p Other have nothing in common.
bool disjoint(const Interval &Other) const {
if (Other.empty())
return true;
if (empty())
return true;
return Other.To->comesBefore(From) || To->comesBefore(Other.From);
return Other.Bottom->comesBefore(Top) || Bottom->comesBefore(Other.Top);
}
/// \Returns the intersection between this and \p Other.
// Example:
Expand All @@ -148,14 +154,14 @@ template <typename T> class Interval {
// 1. No overlap
// A---B this
// C--D Other
if (To->comesBefore(Other.From) || Other.To->comesBefore(From))
if (Bottom->comesBefore(Other.Top) || Other.Bottom->comesBefore(Top))
return Interval();
// 2. Overlap.
// A---B this
// C--D Other
auto NewFromI = From->comesBefore(Other.From) ? Other.From : From;
auto NewToI = To->comesBefore(Other.To) ? To : Other.To;
return Interval(NewFromI, NewToI);
auto NewTopI = Top->comesBefore(Other.Top) ? Other.Top : Top;
auto NewBottomI = Bottom->comesBefore(Other.Bottom) ? Bottom : Other.Bottom;
return Interval(NewTopI, NewBottomI);
}
/// Difference operation. This returns up to two intervals.
// Example:
Expand All @@ -172,11 +178,11 @@ template <typename T> class Interval {
Interval Intersection = intersection(Other);
SmallVector<Interval, 2> Result;
// Part 1, skip if empty.
if (From != Intersection.From)
Result.emplace_back(From, Intersection.From->getPrevNode());
if (Top != Intersection.Top)
Result.emplace_back(Top, Intersection.Top->getPrevNode());
// Part 2, skip if empty.
if (Intersection.To != To)
Result.emplace_back(Intersection.To->getNextNode(), To);
if (Intersection.Bottom != Bottom)
Result.emplace_back(Intersection.Bottom->getNextNode(), Bottom);
return Result;
}
/// \Returns the interval difference `this - Other`. This will crash in Debug
Expand All @@ -196,9 +202,9 @@ template <typename T> class Interval {
return Other;
if (Other.empty())
return *this;
auto *NewFrom = From->comesBefore(Other.From) ? From : Other.From;
auto *NewTo = To->comesBefore(Other.To) ? Other.To : To;
return {NewFrom, NewTo};
auto *NewTop = Top->comesBefore(Other.Top) ? Top : Other.Top;
auto *NewBottom = Bottom->comesBefore(Other.Bottom) ? Other.Bottom : Bottom;
return {NewTop, NewBottom};
}

#ifndef NDEBUG
Expand Down
112 changes: 70 additions & 42 deletions llvm/lib/CodeGen/MachineVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,10 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ModRef.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
Expand All @@ -93,21 +95,31 @@ using namespace llvm;

namespace {

/// Used the by the ReportedErrors class to guarantee only one error is reported
/// at one time.
static ManagedStatic<sys::SmartMutex<true>> ReportedErrorsLock;

struct MachineVerifier {
MachineVerifier(MachineFunctionAnalysisManager &MFAM, const char *b,
raw_ostream *OS)
: MFAM(&MFAM), OS(OS ? *OS : nulls()), Banner(b) {}
raw_ostream *OS, bool AbortOnError = true)
: MFAM(&MFAM), OS(OS ? *OS : nulls()), Banner(b),
ReportedErrs(AbortOnError) {}

MachineVerifier(Pass *pass, const char *b, raw_ostream *OS)
: PASS(pass), OS(OS ? *OS : nulls()), Banner(b) {}
MachineVerifier(Pass *pass, const char *b, raw_ostream *OS,
bool AbortOnError = true)
: PASS(pass), OS(OS ? *OS : nulls()), Banner(b),
ReportedErrs(AbortOnError) {}

MachineVerifier(const char *b, LiveVariables *LiveVars,
LiveIntervals *LiveInts, LiveStacks *LiveStks,
SlotIndexes *Indexes, raw_ostream *OS)
SlotIndexes *Indexes, raw_ostream *OS,
bool AbortOnError = true)
: OS(OS ? *OS : nulls()), Banner(b), LiveVars(LiveVars),
LiveInts(LiveInts), LiveStks(LiveStks), Indexes(Indexes) {}
LiveInts(LiveInts), LiveStks(LiveStks), Indexes(Indexes),
ReportedErrs(AbortOnError) {}

unsigned verify(const MachineFunction &MF);
/// \returns true if no problems were found.
bool verify(const MachineFunction &MF);

MachineFunctionAnalysisManager *MFAM = nullptr;
Pass *const PASS = nullptr;
Expand All @@ -120,8 +132,6 @@ struct MachineVerifier {
const MachineRegisterInfo *MRI = nullptr;
const RegisterBankInfo *RBI = nullptr;

unsigned foundErrors = 0;

// Avoid querying the MachineFunctionProperties for each operand.
bool isFunctionRegBankSelected = false;
bool isFunctionSelected = false;
Expand Down Expand Up @@ -231,6 +241,44 @@ struct MachineVerifier {
LiveStacks *LiveStks = nullptr;
SlotIndexes *Indexes = nullptr;

/// A class to track the number of reported error and to guarantee that only
/// one error is reported at one time.
class ReportedErrors {
unsigned NumReported = 0;
bool AbortOnError;

public:
/// \param AbortOnError -- If set, abort after printing the first error.
ReportedErrors(bool AbortOnError) : AbortOnError(AbortOnError) {}

~ReportedErrors() {
if (!hasError())
return;
if (AbortOnError)
report_fatal_error("Found " + Twine(NumReported) +
" machine code errors.");
// Since we haven't aborted, release the lock to allow other threads to
// report errors.
ReportedErrorsLock->unlock();
}

/// Increment the number of reported errors.
/// \returns true if this is the first reported error.
bool increment() {
// If this is the first error this thread has encountered, grab the lock
// to prevent other threads from reporting errors at the same time.
// Otherwise we assume we already have the lock.
if (!hasError())
ReportedErrorsLock->lock();
++NumReported;
return NumReported == 1;
}

/// \returns true if an error was reported.
bool hasError() { return NumReported; }
};
ReportedErrors ReportedErrs;

// This is calculated only when trying to verify convergence control tokens.
// Similar to the LLVM IR verifier, we calculate this locally instead of
// relying on the pass manager.
Expand Down Expand Up @@ -337,11 +385,7 @@ struct MachineVerifierLegacyPass : public MachineFunctionPass {
MachineFunctionProperties::Property::FailsVerification))
return false;

unsigned FoundErrors =
MachineVerifier(this, Banner.c_str(), &errs()).verify(MF);
if (FoundErrors)
report_fatal_error("Found " + Twine(FoundErrors) +
" machine code errors.");
MachineVerifier(this, Banner.c_str(), &errs()).verify(MF);
return false;
}
};
Expand All @@ -357,10 +401,7 @@ MachineVerifierPass::run(MachineFunction &MF,
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailsVerification))
return PreservedAnalyses::all();
unsigned FoundErrors =
MachineVerifier(MFAM, Banner.c_str(), &errs()).verify(MF);
if (FoundErrors)
report_fatal_error("Found " + Twine(FoundErrors) + " machine code errors.");
MachineVerifier(MFAM, Banner.c_str(), &errs()).verify(MF);
return PreservedAnalyses::all();
}

Expand All @@ -380,31 +421,20 @@ void llvm::verifyMachineFunction(const std::string &Banner,
// LiveIntervals *LiveInts;
// LiveStacks *LiveStks;
// SlotIndexes *Indexes;
unsigned FoundErrors =
MachineVerifier(nullptr, Banner.c_str(), &errs()).verify(MF);
if (FoundErrors)
report_fatal_error("Found " + Twine(FoundErrors) + " machine code errors.");
MachineVerifier(nullptr, Banner.c_str(), &errs()).verify(MF);
}

bool MachineFunction::verify(Pass *p, const char *Banner, raw_ostream *OS,
bool AbortOnErrors) const {
MachineFunction &MF = const_cast<MachineFunction&>(*this);
unsigned FoundErrors = MachineVerifier(p, Banner, OS).verify(MF);
if (AbortOnErrors && FoundErrors)
report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors.");
return FoundErrors == 0;
bool AbortOnError) const {
return MachineVerifier(p, Banner, OS, AbortOnError).verify(*this);
}

bool MachineFunction::verify(LiveIntervals *LiveInts, SlotIndexes *Indexes,
const char *Banner, raw_ostream *OS,
bool AbortOnErrors) const {
MachineFunction &MF = const_cast<MachineFunction &>(*this);
unsigned FoundErrors =
MachineVerifier(Banner, nullptr, LiveInts, nullptr, Indexes, OS)
.verify(MF);
if (AbortOnErrors && FoundErrors)
report_fatal_error("Found " + Twine(FoundErrors) + " machine code errors.");
return FoundErrors == 0;
bool AbortOnError) const {
return MachineVerifier(Banner, /*LiveVars=*/nullptr, LiveInts,
/*LiveStks=*/nullptr, Indexes, OS, AbortOnError)
.verify(*this);
}

void MachineVerifier::verifySlotIndexes() const {
Expand All @@ -430,9 +460,7 @@ void MachineVerifier::verifyProperties(const MachineFunction &MF) {
report("Function has NoVRegs property but there are VReg operands", &MF);
}

unsigned MachineVerifier::verify(const MachineFunction &MF) {
foundErrors = 0;

bool MachineVerifier::verify(const MachineFunction &MF) {
this->MF = &MF;
TM = &MF.getTarget();
TII = MF.getSubtarget().getInstrInfo();
Expand All @@ -447,7 +475,7 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) {
// it's expected that the MIR is somewhat broken but that's ok since we'll
// reset it and clear the FailedISel attribute in ResetMachineFunctions.
if (isFunctionFailedISel)
return foundErrors;
return true;

isFunctionRegBankSelected = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::RegBankSelected);
Expand Down Expand Up @@ -544,13 +572,13 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) {
regMasks.clear();
MBBInfoMap.clear();

return foundErrors;
return !ReportedErrs.hasError();
}

void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
assert(MF);
OS << '\n';
if (!foundErrors++) {
if (ReportedErrs.increment()) {
if (Banner)
OS << "# " << Banner << '\n';

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ add_llvm_component_library(LLVMOrcJIT
IndirectionUtils.cpp
IRCompileLayer.cpp
IRTransformLayer.cpp
IRPartitionLayer.cpp
JITTargetMachineBuilder.cpp
LazyReexports.cpp
Layer.cpp
Expand All @@ -50,6 +51,9 @@ add_llvm_component_library(LLVMOrcJIT
ExecutorProcessControl.cpp
TaskDispatch.cpp
ThreadSafeModule.cpp
RedirectionManager.cpp
JITLinkRedirectableSymbolManager.cpp
ReOptimizeLayer.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/ExecutionEngine/Orc

Expand Down
282 changes: 5 additions & 277 deletions llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/Layer.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/FormatVariadic.h"
Expand All @@ -17,115 +18,17 @@
using namespace llvm;
using namespace llvm::orc;

static ThreadSafeModule extractSubModule(ThreadSafeModule &TSM,
StringRef Suffix,
GVPredicate ShouldExtract) {

auto DeleteExtractedDefs = [](GlobalValue &GV) {
// Bump the linkage: this global will be provided by the external module.
GV.setLinkage(GlobalValue::ExternalLinkage);

// Delete the definition in the source module.
if (isa<Function>(GV)) {
auto &F = cast<Function>(GV);
F.deleteBody();
F.setPersonalityFn(nullptr);
} else if (isa<GlobalVariable>(GV)) {
cast<GlobalVariable>(GV).setInitializer(nullptr);
} else if (isa<GlobalAlias>(GV)) {
// We need to turn deleted aliases into function or variable decls based
// on the type of their aliasee.
auto &A = cast<GlobalAlias>(GV);
Constant *Aliasee = A.getAliasee();
assert(A.hasName() && "Anonymous alias?");
assert(Aliasee->hasName() && "Anonymous aliasee");
std::string AliasName = std::string(A.getName());

if (isa<Function>(Aliasee)) {
auto *F = cloneFunctionDecl(*A.getParent(), *cast<Function>(Aliasee));
A.replaceAllUsesWith(F);
A.eraseFromParent();
F->setName(AliasName);
} else if (isa<GlobalVariable>(Aliasee)) {
auto *G = cloneGlobalVariableDecl(*A.getParent(),
*cast<GlobalVariable>(Aliasee));
A.replaceAllUsesWith(G);
A.eraseFromParent();
G->setName(AliasName);
} else
llvm_unreachable("Alias to unsupported type");
} else
llvm_unreachable("Unsupported global type");
};

auto NewTSM = cloneToNewContext(TSM, ShouldExtract, DeleteExtractedDefs);
NewTSM.withModuleDo([&](Module &M) {
M.setModuleIdentifier((M.getModuleIdentifier() + Suffix).str());
});

return NewTSM;
}

namespace llvm {
namespace orc {

class PartitioningIRMaterializationUnit : public IRMaterializationUnit {
public:
PartitioningIRMaterializationUnit(ExecutionSession &ES,
const IRSymbolMapper::ManglingOptions &MO,
ThreadSafeModule TSM,
CompileOnDemandLayer &Parent)
: IRMaterializationUnit(ES, MO, std::move(TSM)), Parent(Parent) {}

PartitioningIRMaterializationUnit(
ThreadSafeModule TSM, Interface I,
SymbolNameToDefinitionMap SymbolToDefinition,
CompileOnDemandLayer &Parent)
: IRMaterializationUnit(std::move(TSM), std::move(I),
std::move(SymbolToDefinition)),
Parent(Parent) {}

private:
void materialize(std::unique_ptr<MaterializationResponsibility> R) override {
Parent.emitPartition(std::move(R), std::move(TSM),
std::move(SymbolToDefinition));
}

void discard(const JITDylib &V, const SymbolStringPtr &Name) override {
// All original symbols were materialized by the CODLayer and should be
// final. The function bodies provided by M should never be overridden.
llvm_unreachable("Discard should never be called on an "
"ExtractingIRMaterializationUnit");
}

mutable std::mutex SourceModuleMutex;
CompileOnDemandLayer &Parent;
};

std::optional<CompileOnDemandLayer::GlobalValueSet>
CompileOnDemandLayer::compileRequested(GlobalValueSet Requested) {
return std::move(Requested);
}

std::optional<CompileOnDemandLayer::GlobalValueSet>
CompileOnDemandLayer::compileWholeModule(GlobalValueSet Requested) {
return std::nullopt;
}

CompileOnDemandLayer::CompileOnDemandLayer(
ExecutionSession &ES, IRLayer &BaseLayer, LazyCallThroughManager &LCTMgr,
IndirectStubsManagerBuilder BuildIndirectStubsManager)
: IRLayer(ES, BaseLayer.getManglingOptions()), BaseLayer(BaseLayer),
LCTMgr(LCTMgr),
BuildIndirectStubsManager(std::move(BuildIndirectStubsManager)) {}

void CompileOnDemandLayer::setPartitionFunction(PartitionFunction Partition) {
this->Partition = std::move(Partition);
}

void CompileOnDemandLayer::setImplMap(ImplSymbolMap *Imp) {
this->AliaseeImpls = Imp;
}

void CompileOnDemandLayer::emit(
std::unique_ptr<MaterializationResponsibility> R, ThreadSafeModule TSM) {
assert(TSM && "Null module");
Expand All @@ -138,10 +41,6 @@ void CompileOnDemandLayer::emit(

SymbolAliasMap NonCallables;
SymbolAliasMap Callables;
TSM.withModuleDo([&](Module &M) {
// First, do some cleanup on the module:
cleanUpModule(M);
});

for (auto &KV : R->getSymbols()) {
auto &Name = KV.first;
Expand All @@ -152,11 +51,10 @@ void CompileOnDemandLayer::emit(
NonCallables[Name] = SymbolAliasMapEntry(Name, Flags);
}

// Create a partitioning materialization unit and lodge it with the
// implementation dylib.
// Lodge symbols with the implementation dylib.
if (auto Err = PDR.getImplDylib().define(
std::make_unique<PartitioningIRMaterializationUnit>(
ES, *getManglingOptions(), std::move(TSM), *this))) {
std::make_unique<BasicIRLayerMaterializationUnit>(
BaseLayer, *getManglingOptions(), std::move(TSM)))) {
ES.reportError(std::move(Err));
R->failMaterialization();
return;
Expand Down Expand Up @@ -210,173 +108,3 @@ CompileOnDemandLayer::getPerDylibResources(JITDylib &TargetD) {

return I->second;
}

void CompileOnDemandLayer::cleanUpModule(Module &M) {
for (auto &F : M.functions()) {
if (F.isDeclaration())
continue;

if (F.hasAvailableExternallyLinkage()) {
F.deleteBody();
F.setPersonalityFn(nullptr);
continue;
}
}
}

void CompileOnDemandLayer::expandPartition(GlobalValueSet &Partition) {
// Expands the partition to ensure the following rules hold:
// (1) If any alias is in the partition, its aliasee is also in the partition.
// (2) If any aliasee is in the partition, its aliases are also in the
// partiton.
// (3) If any global variable is in the partition then all global variables
// are in the partition.
assert(!Partition.empty() && "Unexpected empty partition");

const Module &M = *(*Partition.begin())->getParent();
bool ContainsGlobalVariables = false;
std::vector<const GlobalValue *> GVsToAdd;

for (const auto *GV : Partition)
if (isa<GlobalAlias>(GV))
GVsToAdd.push_back(
cast<GlobalValue>(cast<GlobalAlias>(GV)->getAliasee()));
else if (isa<GlobalVariable>(GV))
ContainsGlobalVariables = true;

for (auto &A : M.aliases())
if (Partition.count(cast<GlobalValue>(A.getAliasee())))
GVsToAdd.push_back(&A);

if (ContainsGlobalVariables)
for (auto &G : M.globals())
GVsToAdd.push_back(&G);

for (const auto *GV : GVsToAdd)
Partition.insert(GV);
}

void CompileOnDemandLayer::emitPartition(
std::unique_ptr<MaterializationResponsibility> R, ThreadSafeModule TSM,
IRMaterializationUnit::SymbolNameToDefinitionMap Defs) {

// FIXME: Need a 'notify lazy-extracting/emitting' callback to tie the
// extracted module key, extracted module, and source module key
// together. This could be used, for example, to provide a specific
// memory manager instance to the linking layer.

auto &ES = getExecutionSession();
GlobalValueSet RequestedGVs;
for (auto &Name : R->getRequestedSymbols()) {
if (Name == R->getInitializerSymbol())
TSM.withModuleDo([&](Module &M) {
for (auto &GV : getStaticInitGVs(M))
RequestedGVs.insert(&GV);
});
else {
assert(Defs.count(Name) && "No definition for symbol");
RequestedGVs.insert(Defs[Name]);
}
}

/// Perform partitioning with the context lock held, since the partition
/// function is allowed to access the globals to compute the partition.
auto GVsToExtract =
TSM.withModuleDo([&](Module &M) { return Partition(RequestedGVs); });

// Take a 'None' partition to mean the whole module (as opposed to an empty
// partition, which means "materialize nothing"). Emit the whole module
// unmodified to the base layer.
if (GVsToExtract == std::nullopt) {
Defs.clear();
BaseLayer.emit(std::move(R), std::move(TSM));
return;
}

// If the partition is empty, return the whole module to the symbol table.
if (GVsToExtract->empty()) {
if (auto Err =
R->replace(std::make_unique<PartitioningIRMaterializationUnit>(
std::move(TSM),
MaterializationUnit::Interface(R->getSymbols(),
R->getInitializerSymbol()),
std::move(Defs), *this))) {
getExecutionSession().reportError(std::move(Err));
R->failMaterialization();
return;
}
return;
}

// Ok -- we actually need to partition the symbols. Promote the symbol
// linkages/names, expand the partition to include any required symbols
// (i.e. symbols that can't be separated from our partition), and
// then extract the partition.
//
// FIXME: We apply this promotion once per partitioning. It's safe, but
// overkill.
auto ExtractedTSM =
TSM.withModuleDo([&](Module &M) -> Expected<ThreadSafeModule> {
auto PromotedGlobals = PromoteSymbols(M);
if (!PromotedGlobals.empty()) {

MangleAndInterner Mangle(ES, M.getDataLayout());
SymbolFlagsMap SymbolFlags;
IRSymbolMapper::add(ES, *getManglingOptions(),
PromotedGlobals, SymbolFlags);

if (auto Err = R->defineMaterializing(SymbolFlags))
return std::move(Err);
}

expandPartition(*GVsToExtract);

// Submodule name is given by hashing the names of the globals.
std::string SubModuleName;
{
std::vector<const GlobalValue*> HashGVs;
HashGVs.reserve(GVsToExtract->size());
for (const auto *GV : *GVsToExtract)
HashGVs.push_back(GV);
llvm::sort(HashGVs, [](const GlobalValue *LHS, const GlobalValue *RHS) {
return LHS->getName() < RHS->getName();
});
hash_code HC(0);
for (const auto *GV : HashGVs) {
assert(GV->hasName() && "All GVs to extract should be named by now");
auto GVName = GV->getName();
HC = hash_combine(HC, hash_combine_range(GVName.begin(), GVName.end()));
}
raw_string_ostream(SubModuleName)
<< ".submodule."
<< formatv(sizeof(size_t) == 8 ? "{0:x16}" : "{0:x8}",
static_cast<size_t>(HC))
<< ".ll";
}

// Extract the requested partiton (plus any necessary aliases) and
// put the rest back into the impl dylib.
auto ShouldExtract = [&](const GlobalValue &GV) -> bool {
return GVsToExtract->count(&GV);
};

return extractSubModule(TSM, SubModuleName , ShouldExtract);
});

if (!ExtractedTSM) {
ES.reportError(ExtractedTSM.takeError());
R->failMaterialization();
return;
}

if (auto Err = R->replace(std::make_unique<PartitioningIRMaterializationUnit>(
ES, *getManglingOptions(), std::move(TSM), *this))) {
ES.reportError(std::move(Err));
R->failMaterialization();
return;
}
BaseLayer.emit(std::move(R), std::move(*ExtractedTSM));
}

} // end namespace orc
} // end namespace llvm
303 changes: 303 additions & 0 deletions llvm/lib/ExecutionEngine/Orc/IRPartitionLayer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,303 @@
//===----- IRPartitionLayer.cpp - Partition IR module into submodules -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/ExecutionEngine/Orc/IRPartitionLayer.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"

using namespace llvm;
using namespace llvm::orc;

static ThreadSafeModule extractSubModule(ThreadSafeModule &TSM,
StringRef Suffix,
GVPredicate ShouldExtract) {

auto DeleteExtractedDefs = [](GlobalValue &GV) {
// Bump the linkage: this global will be provided by the external module.
GV.setLinkage(GlobalValue::ExternalLinkage);

// Delete the definition in the source module.
if (isa<Function>(GV)) {
auto &F = cast<Function>(GV);
F.deleteBody();
F.setPersonalityFn(nullptr);
} else if (isa<GlobalVariable>(GV)) {
cast<GlobalVariable>(GV).setInitializer(nullptr);
} else if (isa<GlobalAlias>(GV)) {
// We need to turn deleted aliases into function or variable decls based
// on the type of their aliasee.
auto &A = cast<GlobalAlias>(GV);
Constant *Aliasee = A.getAliasee();
assert(A.hasName() && "Anonymous alias?");
assert(Aliasee->hasName() && "Anonymous aliasee");
std::string AliasName = std::string(A.getName());

if (isa<Function>(Aliasee)) {
auto *F = cloneFunctionDecl(*A.getParent(), *cast<Function>(Aliasee));
A.replaceAllUsesWith(F);
A.eraseFromParent();
F->setName(AliasName);
} else if (isa<GlobalVariable>(Aliasee)) {
auto *G = cloneGlobalVariableDecl(*A.getParent(),
*cast<GlobalVariable>(Aliasee));
A.replaceAllUsesWith(G);
A.eraseFromParent();
G->setName(AliasName);
} else
llvm_unreachable("Alias to unsupported type");
} else
llvm_unreachable("Unsupported global type");
};

auto NewTSM = cloneToNewContext(TSM, ShouldExtract, DeleteExtractedDefs);
NewTSM.withModuleDo([&](Module &M) {
M.setModuleIdentifier((M.getModuleIdentifier() + Suffix).str());
});

return NewTSM;
}

namespace llvm {
namespace orc {

class PartitioningIRMaterializationUnit : public IRMaterializationUnit {
public:
PartitioningIRMaterializationUnit(ExecutionSession &ES,
const IRSymbolMapper::ManglingOptions &MO,
ThreadSafeModule TSM,
IRPartitionLayer &Parent)
: IRMaterializationUnit(ES, MO, std::move(TSM)), Parent(Parent) {}

PartitioningIRMaterializationUnit(
ThreadSafeModule TSM, Interface I,
SymbolNameToDefinitionMap SymbolToDefinition, IRPartitionLayer &Parent)
: IRMaterializationUnit(std::move(TSM), std::move(I),
std::move(SymbolToDefinition)),
Parent(Parent) {}

private:
void materialize(std::unique_ptr<MaterializationResponsibility> R) override {
Parent.emitPartition(std::move(R), std::move(TSM),
std::move(SymbolToDefinition));
}

void discard(const JITDylib &V, const SymbolStringPtr &Name) override {
// All original symbols were materialized by the CODLayer and should be
// final. The function bodies provided by M should never be overridden.
llvm_unreachable("Discard should never be called on an "
"ExtractingIRMaterializationUnit");
}

IRPartitionLayer &Parent;
};

} // namespace orc
} // namespace llvm

IRPartitionLayer::IRPartitionLayer(ExecutionSession &ES, IRLayer &BaseLayer)
: IRLayer(ES, BaseLayer.getManglingOptions()), BaseLayer(BaseLayer) {}

void IRPartitionLayer::setPartitionFunction(PartitionFunction Partition) {
this->Partition = Partition;
}

std::optional<IRPartitionLayer::GlobalValueSet>
IRPartitionLayer::compileRequested(GlobalValueSet Requested) {
return std::move(Requested);
}

std::optional<IRPartitionLayer::GlobalValueSet>
IRPartitionLayer::compileWholeModule(GlobalValueSet Requested) {
return std::nullopt;
}

void IRPartitionLayer::emit(std::unique_ptr<MaterializationResponsibility> R,
ThreadSafeModule TSM) {
assert(TSM && "Null module");

auto &ES = getExecutionSession();
TSM.withModuleDo([&](Module &M) {
// First, do some cleanup on the module:
cleanUpModule(M);
});

// Create a partitioning materialization unit and pass the responsibility.
if (auto Err = R->replace(std::make_unique<PartitioningIRMaterializationUnit>(
ES, *getManglingOptions(), std::move(TSM), *this))) {
ES.reportError(std::move(Err));
R->failMaterialization();
return;
}
}

void IRPartitionLayer::cleanUpModule(Module &M) {
for (auto &F : M.functions()) {
if (F.isDeclaration())
continue;

if (F.hasAvailableExternallyLinkage()) {
F.deleteBody();
F.setPersonalityFn(nullptr);
continue;
}
}
}

void IRPartitionLayer::expandPartition(GlobalValueSet &Partition) {
// Expands the partition to ensure the following rules hold:
// (1) If any alias is in the partition, its aliasee is also in the partition.
// (2) If any aliasee is in the partition, its aliases are also in the
// partiton.
// (3) If any global variable is in the partition then all global variables
// are in the partition.
assert(!Partition.empty() && "Unexpected empty partition");

const Module &M = *(*Partition.begin())->getParent();
bool ContainsGlobalVariables = false;
std::vector<const GlobalValue *> GVsToAdd;

for (const auto *GV : Partition)
if (isa<GlobalAlias>(GV))
GVsToAdd.push_back(
cast<GlobalValue>(cast<GlobalAlias>(GV)->getAliasee()));
else if (isa<GlobalVariable>(GV))
ContainsGlobalVariables = true;

for (auto &A : M.aliases())
if (Partition.count(cast<GlobalValue>(A.getAliasee())))
GVsToAdd.push_back(&A);

if (ContainsGlobalVariables)
for (auto &G : M.globals())
GVsToAdd.push_back(&G);

for (const auto *GV : GVsToAdd)
Partition.insert(GV);
}

void IRPartitionLayer::emitPartition(
std::unique_ptr<MaterializationResponsibility> R, ThreadSafeModule TSM,
IRMaterializationUnit::SymbolNameToDefinitionMap Defs) {

// FIXME: Need a 'notify lazy-extracting/emitting' callback to tie the
// extracted module key, extracted module, and source module key
// together. This could be used, for example, to provide a specific
// memory manager instance to the linking layer.

auto &ES = getExecutionSession();
GlobalValueSet RequestedGVs;
for (auto &Name : R->getRequestedSymbols()) {
if (Name == R->getInitializerSymbol())
TSM.withModuleDo([&](Module &M) {
for (auto &GV : getStaticInitGVs(M))
RequestedGVs.insert(&GV);
});
else {
assert(Defs.count(Name) && "No definition for symbol");
RequestedGVs.insert(Defs[Name]);
}
}

/// Perform partitioning with the context lock held, since the partition
/// function is allowed to access the globals to compute the partition.
auto GVsToExtract =
TSM.withModuleDo([&](Module &M) { return Partition(RequestedGVs); });

// Take a 'None' partition to mean the whole module (as opposed to an empty
// partition, which means "materialize nothing"). Emit the whole module
// unmodified to the base layer.
if (GVsToExtract == std::nullopt) {
Defs.clear();
BaseLayer.emit(std::move(R), std::move(TSM));
return;
}

// If the partition is empty, return the whole module to the symbol table.
if (GVsToExtract->empty()) {
if (auto Err =
R->replace(std::make_unique<PartitioningIRMaterializationUnit>(
std::move(TSM),
MaterializationUnit::Interface(R->getSymbols(),
R->getInitializerSymbol()),
std::move(Defs), *this))) {
getExecutionSession().reportError(std::move(Err));
R->failMaterialization();
return;
}
return;
}

// Ok -- we actually need to partition the symbols. Promote the symbol
// linkages/names, expand the partition to include any required symbols
// (i.e. symbols that can't be separated from our partition), and
// then extract the partition.
//
// FIXME: We apply this promotion once per partitioning. It's safe, but
// overkill.
auto ExtractedTSM = TSM.withModuleDo([&](Module &M)
-> Expected<ThreadSafeModule> {
auto PromotedGlobals = PromoteSymbols(M);
if (!PromotedGlobals.empty()) {

MangleAndInterner Mangle(ES, M.getDataLayout());
SymbolFlagsMap SymbolFlags;
IRSymbolMapper::add(ES, *getManglingOptions(), PromotedGlobals,
SymbolFlags);

if (auto Err = R->defineMaterializing(SymbolFlags))
return std::move(Err);
}

expandPartition(*GVsToExtract);

// Submodule name is given by hashing the names of the globals.
std::string SubModuleName;
{
std::vector<const GlobalValue *> HashGVs;
HashGVs.reserve(GVsToExtract->size());
for (const auto *GV : *GVsToExtract)
HashGVs.push_back(GV);
llvm::sort(HashGVs, [](const GlobalValue *LHS, const GlobalValue *RHS) {
return LHS->getName() < RHS->getName();
});
hash_code HC(0);
for (const auto *GV : HashGVs) {
assert(GV->hasName() && "All GVs to extract should be named by now");
auto GVName = GV->getName();
HC = hash_combine(HC, hash_combine_range(GVName.begin(), GVName.end()));
}
raw_string_ostream(SubModuleName)
<< ".submodule."
<< formatv(sizeof(size_t) == 8 ? "{0:x16}" : "{0:x8}",
static_cast<size_t>(HC))
<< ".ll";
}

// Extract the requested partiton (plus any necessary aliases) and
// put the rest back into the impl dylib.
auto ShouldExtract = [&](const GlobalValue &GV) -> bool {
return GVsToExtract->count(&GV);
};

return extractSubModule(TSM, SubModuleName, ShouldExtract);
});

if (!ExtractedTSM) {
ES.reportError(ExtractedTSM.takeError());
R->failMaterialization();
return;
}

if (auto Err = R->replace(std::make_unique<PartitioningIRMaterializationUnit>(
ES, *getManglingOptions(), std::move(TSM), *this))) {
ES.reportError(std::move(Err));
R->failMaterialization();
return;
}
BaseLayer.emit(std::move(R), std::move(*ExtractedTSM));
}
182 changes: 182 additions & 0 deletions llvm/lib/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
//===-- JITLinkRedirectableSymbolManager.cpp - JITLink redirection in Orc -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h"
#include "llvm/ExecutionEngine/Orc/Core.h"

#define DEBUG_TYPE "orc"

using namespace llvm;
using namespace llvm::orc;

void JITLinkRedirectableSymbolManager::emitRedirectableSymbols(
std::unique_ptr<MaterializationResponsibility> R,
const SymbolAddrMap &InitialDests) {
auto &ES = ObjLinkingLayer.getExecutionSession();
std::unique_lock<std::mutex> Lock(Mutex);
if (GetNumAvailableStubs() < InitialDests.size())
if (auto Err = grow(InitialDests.size() - GetNumAvailableStubs())) {
ES.reportError(std::move(Err));
R->failMaterialization();
return;
}

JITDylib &TargetJD = R->getTargetJITDylib();
SymbolMap NewSymbolDefs;
std::vector<SymbolStringPtr> Symbols;
for (auto &[K, V] : InitialDests) {
StubHandle StubID = AvailableStubs.back();
if (SymbolToStubs[&TargetJD].count(K)) {
ES.reportError(make_error<StringError>(
"Tried to create duplicate redirectable symbols",
inconvertibleErrorCode()));
R->failMaterialization();
return;
}
SymbolToStubs[&TargetJD][K] = StubID;
NewSymbolDefs[K] = JumpStubs[StubID];
NewSymbolDefs[K].setFlags(V.getFlags());
Symbols.push_back(K);
AvailableStubs.pop_back();
}

// FIXME: when this fails we can return stubs to the pool
if (auto Err = redirectInner(TargetJD, InitialDests)) {
ES.reportError(std::move(Err));
R->failMaterialization();
return;
}

if (auto Err = R->replace(absoluteSymbols(NewSymbolDefs))) {
ES.reportError(std::move(Err));
R->failMaterialization();
return;
}

auto Err = R->withResourceKeyDo([&](ResourceKey Key) {
TrackedResources[Key].insert(TrackedResources[Key].end(), Symbols.begin(),
Symbols.end());
});
if (Err) {
ES.reportError(std::move(Err));
R->failMaterialization();
return;
}
}

Error JITLinkRedirectableSymbolManager::redirect(
JITDylib &TargetJD, const SymbolAddrMap &NewDests) {
std::unique_lock<std::mutex> Lock(Mutex);
return redirectInner(TargetJD, NewDests);
}

Error JITLinkRedirectableSymbolManager::redirectInner(
JITDylib &TargetJD, const SymbolAddrMap &NewDests) {
std::vector<tpctypes::PointerWrite> PtrWrites;
for (auto &[K, V] : NewDests) {
if (!SymbolToStubs[&TargetJD].count(K))
return make_error<StringError>(
"Tried to redirect non-existent redirectalbe symbol",
inconvertibleErrorCode());
StubHandle StubID = SymbolToStubs[&TargetJD].at(K);
PtrWrites.push_back({StubPointers[StubID].getAddress(), V.getAddress()});
}
return ObjLinkingLayer.getExecutionSession()
.getExecutorProcessControl()
.getMemoryAccess()
.writePointers(PtrWrites);
}

Error JITLinkRedirectableSymbolManager::grow(unsigned Need) {
unsigned OldSize = JumpStubs.size();
unsigned NumNewStubs = alignTo(Need, StubBlockSize);
unsigned NewSize = OldSize + NumNewStubs;

JumpStubs.resize(NewSize);
StubPointers.resize(NewSize);
AvailableStubs.reserve(NewSize);

SymbolLookupSet LookupSymbols;
DenseMap<SymbolStringPtr, ExecutorSymbolDef *> NewDefsMap;

auto &ES = ObjLinkingLayer.getExecutionSession();
Triple TT = ES.getTargetTriple();
auto G = std::make_unique<jitlink::LinkGraph>(
"<INDIRECT STUBS>", TT, TT.isArch64Bit() ? 8 : 4,
TT.isLittleEndian() ? endianness::little : endianness::big,
jitlink::getGenericEdgeKindName);
auto &PointerSection =
G->createSection(StubPtrTableName, MemProt::Write | MemProt::Read);
auto &StubsSection =
G->createSection(JumpStubTableName, MemProt::Exec | MemProt::Read);

// FIXME: We can batch the stubs into one block and use address to access them
for (size_t I = OldSize; I < NewSize; I++) {
auto Pointer = AnonymousPtrCreator(*G, PointerSection, nullptr, 0);
if (auto Err = Pointer.takeError())
return Err;

StringRef PtrSymName = StubPtrSymbolName(I);
Pointer->setName(PtrSymName);
Pointer->setScope(jitlink::Scope::Default);
LookupSymbols.add(ES.intern(PtrSymName));
NewDefsMap[ES.intern(PtrSymName)] = &StubPointers[I];

auto Stub = PtrJumpStubCreator(*G, StubsSection, *Pointer);
if (auto Err = Stub.takeError())
return Err;

StringRef JumpStubSymName = JumpStubSymbolName(I);
Stub->setName(JumpStubSymName);
Stub->setScope(jitlink::Scope::Default);
LookupSymbols.add(ES.intern(JumpStubSymName));
NewDefsMap[ES.intern(JumpStubSymName)] = &JumpStubs[I];
}

if (auto Err = ObjLinkingLayer.add(JD, std::move(G)))
return Err;

auto LookupResult = ES.lookup(makeJITDylibSearchOrder(&JD), LookupSymbols);
if (auto Err = LookupResult.takeError())
return Err;

for (auto &[K, V] : *LookupResult)
*NewDefsMap.at(K) = V;

for (size_t I = OldSize; I < NewSize; I++)
AvailableStubs.push_back(I);

return Error::success();
}

Error JITLinkRedirectableSymbolManager::handleRemoveResources(
JITDylib &TargetJD, ResourceKey K) {
std::unique_lock<std::mutex> Lock(Mutex);
for (auto &Symbol : TrackedResources[K]) {
if (!SymbolToStubs[&TargetJD].count(Symbol))
return make_error<StringError>(
"Tried to remove non-existent redirectable symbol",
inconvertibleErrorCode());
AvailableStubs.push_back(SymbolToStubs[&TargetJD].at(Symbol));
SymbolToStubs[&TargetJD].erase(Symbol);
if (SymbolToStubs[&TargetJD].empty())
SymbolToStubs.erase(&TargetJD);
}
TrackedResources.erase(K);

return Error::success();
}

void JITLinkRedirectableSymbolManager::handleTransferResources(
JITDylib &TargetJD, ResourceKey DstK, ResourceKey SrcK) {
std::unique_lock<std::mutex> Lock(Mutex);
TrackedResources[DstK].insert(TrackedResources[DstK].end(),
TrackedResources[SrcK].begin(),
TrackedResources[SrcK].end());
TrackedResources.erase(SrcK);
}
7 changes: 5 additions & 2 deletions llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1293,9 +1293,12 @@ LLLazyJIT::LLLazyJIT(LLLazyJITBuilderState &S, Error &Err) : LLJIT(S, Err) {
return;
}

// Create the IP Layer.
IPLayer = std::make_unique<IRPartitionLayer>(*ES, *InitHelperTransformLayer);

// Create the COD layer.
CODLayer = std::make_unique<CompileOnDemandLayer>(
*ES, *InitHelperTransformLayer, *LCTMgr, std::move(ISMBuilder));
CODLayer = std::make_unique<CompileOnDemandLayer>(*ES, *IPLayer, *LCTMgr,
std::move(ISMBuilder));

if (*S.SupportConcurrentCompilation)
CODLayer->setCloneToNewContextOnEmit(true);
Expand Down
279 changes: 279 additions & 0 deletions llvm/lib/ExecutionEngine/Orc/ReOptimizeLayer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
#include "llvm/ExecutionEngine/Orc/ReOptimizeLayer.h"
#include "llvm/ExecutionEngine/Orc/Mangling.h"

using namespace llvm;
using namespace orc;

bool ReOptimizeLayer::ReOptMaterializationUnitState::tryStartReoptimize() {
std::unique_lock<std::mutex> Lock(Mutex);
if (Reoptimizing)
return false;

Reoptimizing = true;
return true;
}

void ReOptimizeLayer::ReOptMaterializationUnitState::reoptimizeSucceeded() {
std::unique_lock<std::mutex> Lock(Mutex);
assert(Reoptimizing && "Tried to mark unstarted reoptimization as done");
Reoptimizing = false;
CurVersion++;
}

void ReOptimizeLayer::ReOptMaterializationUnitState::reoptimizeFailed() {
std::unique_lock<std::mutex> Lock(Mutex);
assert(Reoptimizing && "Tried to mark unstarted reoptimization as done");
Reoptimizing = false;
}

Error ReOptimizeLayer::reigsterRuntimeFunctions(JITDylib &PlatformJD) {
ExecutionSession::JITDispatchHandlerAssociationMap WFs;
using ReoptimizeSPSSig = shared::SPSError(uint64_t, uint32_t);
WFs[Mangle("__orc_rt_reoptimize_tag")] =
ES.wrapAsyncWithSPS<ReoptimizeSPSSig>(this,
&ReOptimizeLayer::rt_reoptimize);
return ES.registerJITDispatchHandlers(PlatformJD, std::move(WFs));
}

void ReOptimizeLayer::emit(std::unique_ptr<MaterializationResponsibility> R,
ThreadSafeModule TSM) {
auto &JD = R->getTargetJITDylib();

bool HasNonCallable = false;
for (auto &KV : R->getSymbols()) {
auto &Flags = KV.second;
if (!Flags.isCallable())
HasNonCallable = true;
}

if (HasNonCallable) {
BaseLayer.emit(std::move(R), std::move(TSM));
return;
}

auto &MUState = createMaterializationUnitState(TSM);

if (auto Err = R->withResourceKeyDo([&](ResourceKey Key) {
registerMaterializationUnitResource(Key, MUState);
})) {
ES.reportError(std::move(Err));
R->failMaterialization();
return;
}

if (auto Err =
ProfilerFunc(*this, MUState.getID(), MUState.getCurVersion(), TSM)) {
ES.reportError(std::move(Err));
R->failMaterialization();
return;
}

auto InitialDests =
emitMUImplSymbols(MUState, MUState.getCurVersion(), JD, std::move(TSM));
if (!InitialDests) {
ES.reportError(InitialDests.takeError());
R->failMaterialization();
return;
}

RSManager.emitRedirectableSymbols(std::move(R), std::move(*InitialDests));
}

Error ReOptimizeLayer::reoptimizeIfCallFrequent(ReOptimizeLayer &Parent,
ReOptMaterializationUnitID MUID,
unsigned CurVersion,
ThreadSafeModule &TSM) {
return TSM.withModuleDo([&](Module &M) -> Error {
Type *I64Ty = Type::getInt64Ty(M.getContext());
GlobalVariable *Counter = new GlobalVariable(
M, I64Ty, false, GlobalValue::InternalLinkage,
Constant::getNullValue(I64Ty), "__orc_reopt_counter");
auto ArgBufferConst = createReoptimizeArgBuffer(M, MUID, CurVersion);
if (auto Err = ArgBufferConst.takeError())
return Err;
GlobalVariable *ArgBuffer =
new GlobalVariable(M, (*ArgBufferConst)->getType(), true,
GlobalValue::InternalLinkage, (*ArgBufferConst));
for (auto &F : M) {
if (F.isDeclaration())
continue;
auto &BB = F.getEntryBlock();
auto *IP = &*BB.getFirstInsertionPt();
IRBuilder<> IRB(IP);
Value *Threshold = ConstantInt::get(I64Ty, CallCountThreshold, true);
Value *Cnt = IRB.CreateLoad(I64Ty, Counter);
// Use EQ to prevent further reoptimize calls.
Value *Cmp = IRB.CreateICmpEQ(Cnt, Threshold);
Value *Added = IRB.CreateAdd(Cnt, ConstantInt::get(I64Ty, 1));
(void)IRB.CreateStore(Added, Counter);
Instruction *SplitTerminator = SplitBlockAndInsertIfThen(Cmp, IP, false);
createReoptimizeCall(M, *SplitTerminator, ArgBuffer);
}
return Error::success();
});
}

Expected<SymbolMap>
ReOptimizeLayer::emitMUImplSymbols(ReOptMaterializationUnitState &MUState,
uint32_t Version, JITDylib &JD,
ThreadSafeModule TSM) {
DenseMap<SymbolStringPtr, SymbolStringPtr> RenamedMap;
cantFail(TSM.withModuleDo([&](Module &M) -> Error {
MangleAndInterner Mangle(ES, M.getDataLayout());
for (auto &F : M)
if (!F.isDeclaration()) {
std::string NewName =
(F.getName() + ".__def__." + Twine(Version)).str();
RenamedMap[Mangle(F.getName())] = Mangle(NewName);
F.setName(NewName);
}
return Error::success();
}));

auto RT = JD.createResourceTracker();
if (auto Err =
JD.define(std::make_unique<BasicIRLayerMaterializationUnit>(
BaseLayer, *getManglingOptions(), std::move(TSM)),
RT))
return Err;
MUState.setResourceTracker(RT);

SymbolLookupSet LookupSymbols;
for (auto [K, V] : RenamedMap)
LookupSymbols.add(V);

auto ImplSymbols =
ES.lookup({{&JD, JITDylibLookupFlags::MatchAllSymbols}}, LookupSymbols,
LookupKind::Static, SymbolState::Resolved);
if (auto Err = ImplSymbols.takeError())
return Err;

SymbolMap Result;
for (auto [K, V] : RenamedMap)
Result[K] = (*ImplSymbols)[V];

return Result;
}

void ReOptimizeLayer::rt_reoptimize(SendErrorFn SendResult,
ReOptMaterializationUnitID MUID,
uint32_t CurVersion) {
auto &MUState = getMaterializationUnitState(MUID);
if (CurVersion < MUState.getCurVersion() || !MUState.tryStartReoptimize()) {
SendResult(Error::success());
return;
}

ThreadSafeModule TSM = cloneToNewContext(MUState.getThreadSafeModule());
auto OldRT = MUState.getResourceTracker();
auto &JD = OldRT->getJITDylib();

if (auto Err = ReOptFunc(*this, MUID, CurVersion + 1, OldRT, TSM)) {
ES.reportError(std::move(Err));
MUState.reoptimizeFailed();
SendResult(Error::success());
return;
}

auto SymbolDests =
emitMUImplSymbols(MUState, CurVersion + 1, JD, std::move(TSM));
if (!SymbolDests) {
ES.reportError(SymbolDests.takeError());
MUState.reoptimizeFailed();
SendResult(Error::success());
return;
}

if (auto Err = RSManager.redirect(JD, std::move(*SymbolDests))) {
ES.reportError(std::move(Err));
MUState.reoptimizeFailed();
SendResult(Error::success());
return;
}

MUState.reoptimizeSucceeded();
SendResult(Error::success());
}

Expected<Constant *> ReOptimizeLayer::createReoptimizeArgBuffer(
Module &M, ReOptMaterializationUnitID MUID, uint32_t CurVersion) {
size_t ArgBufferSize = SPSReoptimizeArgList::size(MUID, CurVersion);
std::vector<char> ArgBuffer(ArgBufferSize);
shared::SPSOutputBuffer OB(ArgBuffer.data(), ArgBuffer.size());
if (!SPSReoptimizeArgList::serialize(OB, MUID, CurVersion))
return make_error<StringError>("Could not serealize args list",
inconvertibleErrorCode());
return ConstantDataArray::get(M.getContext(), ArrayRef(ArgBuffer));
}

void ReOptimizeLayer::createReoptimizeCall(Module &M, Instruction &IP,
GlobalVariable *ArgBuffer) {
GlobalVariable *DispatchCtx =
M.getGlobalVariable("__orc_rt_jit_dispatch_ctx");
if (!DispatchCtx)
DispatchCtx = new GlobalVariable(M, PointerType::get(M.getContext(), 0),
false, GlobalValue::ExternalLinkage,
nullptr, "__orc_rt_jit_dispatch_ctx");
GlobalVariable *ReoptimizeTag =
M.getGlobalVariable("__orc_rt_reoptimize_tag");
if (!ReoptimizeTag)
ReoptimizeTag = new GlobalVariable(M, PointerType::get(M.getContext(), 0),
false, GlobalValue::ExternalLinkage,
nullptr, "__orc_rt_reoptimize_tag");
Function *DispatchFunc = M.getFunction("__orc_rt_jit_dispatch");
if (!DispatchFunc) {
std::vector<Type *> Args = {PointerType::get(M.getContext(), 0),
PointerType::get(M.getContext(), 0),
PointerType::get(M.getContext(), 0),
IntegerType::get(M.getContext(), 64)};
FunctionType *FuncTy =
FunctionType::get(Type::getVoidTy(M.getContext()), Args, false);
DispatchFunc = Function::Create(FuncTy, GlobalValue::ExternalLinkage,
"__orc_rt_jit_dispatch", &M);
}
size_t ArgBufferSizeConst =
SPSReoptimizeArgList::size(ReOptMaterializationUnitID{}, uint32_t{});
Constant *ArgBufferSize = ConstantInt::get(
IntegerType::get(M.getContext(), 64), ArgBufferSizeConst, false);
IRBuilder<> IRB(&IP);
(void)IRB.CreateCall(DispatchFunc,
{DispatchCtx, ReoptimizeTag, ArgBuffer, ArgBufferSize});
}

ReOptimizeLayer::ReOptMaterializationUnitState &
ReOptimizeLayer::createMaterializationUnitState(const ThreadSafeModule &TSM) {
std::unique_lock<std::mutex> Lock(Mutex);
ReOptMaterializationUnitID MUID = NextID;
MUStates.emplace(MUID,
ReOptMaterializationUnitState(MUID, cloneToNewContext(TSM)));
++NextID;
return MUStates.at(MUID);
}

ReOptimizeLayer::ReOptMaterializationUnitState &
ReOptimizeLayer::getMaterializationUnitState(ReOptMaterializationUnitID MUID) {
std::unique_lock<std::mutex> Lock(Mutex);
return MUStates.at(MUID);
}

void ReOptimizeLayer::registerMaterializationUnitResource(
ResourceKey Key, ReOptMaterializationUnitState &State) {
std::unique_lock<std::mutex> Lock(Mutex);
MUResources[Key].insert(State.getID());
}

Error ReOptimizeLayer::handleRemoveResources(JITDylib &JD, ResourceKey K) {
std::unique_lock<std::mutex> Lock(Mutex);
for (auto MUID : MUResources[K])
MUStates.erase(MUID);

MUResources.erase(K);
return Error::success();
}

void ReOptimizeLayer::handleTransferResources(JITDylib &JD, ResourceKey DstK,
ResourceKey SrcK) {
std::unique_lock<std::mutex> Lock(Mutex);
MUResources[DstK].insert(MUResources[SrcK].begin(), MUResources[SrcK].end());
MUResources.erase(SrcK);
}
24 changes: 24 additions & 0 deletions llvm/lib/ExecutionEngine/Orc/RedirectionManager.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//===---- RedirectionManager.cpp - Redirection manager interface in Orc ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/ExecutionEngine/Orc/RedirectionManager.h"

#define DEBUG_TYPE "orc"

using namespace llvm;
using namespace llvm::orc;

void RedirectionManager::anchor() {}

Error RedirectableSymbolManager::createRedirectableSymbols(
ResourceTrackerSP RT, const SymbolMap &InitialDests) {
auto &JD = RT->getJITDylib();
return JD.define(
std::make_unique<RedirectableMaterializationUnit>(*this, InitialDests),
RT);
}
10 changes: 6 additions & 4 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ static cl::opt<bool>
cl::desc("Enable the loop data prefetch pass"),
cl::init(true));

static cl::opt<bool> EnableMISchedLoadClustering(
"riscv-misched-load-clustering", cl::Hidden,
cl::desc("Enable load clustering in the machine scheduler"),
static cl::opt<bool> EnableMISchedLoadStoreClustering(
"riscv-misched-load-store-clustering", cl::Hidden,
cl::desc("Enable load and store clustering in the machine scheduler"),
cl::init(true));

static cl::opt<bool> EnableVSETVLIAfterRVVRegAlloc(
Expand Down Expand Up @@ -352,10 +352,12 @@ class RISCVPassConfig : public TargetPassConfig {
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override {
ScheduleDAGMILive *DAG = nullptr;
if (EnableMISchedLoadClustering) {
if (EnableMISchedLoadStoreClustering) {
DAG = createGenericSchedLive(C);
DAG->addMutation(createLoadClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
DAG->addMutation(createStoreClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
}
return DAG;
}
Expand Down
18 changes: 10 additions & 8 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7025,8 +7025,7 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
// Cannot represent the loads as consecutive vectorizable nodes -
// just exit.
unsigned ConsecutiveNodesSize = 0;
if (!LoadEntriesToVectorize.empty() &&
InterleaveFactor == 0 &&
if (!LoadEntriesToVectorize.empty() && InterleaveFactor == 0 &&
any_of(zip(LoadEntriesToVectorize, LoadSetsToVectorize),
[&, Slice = Slice](const auto &P) {
const auto *It = find_if(Slice, [&](Value *V) {
Expand Down Expand Up @@ -11843,12 +11842,15 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
for (ExternalUser &EU : ExternalUses) {
// Uses by ephemeral values are free (because the ephemeral value will be
// removed prior to code generation, and so the extraction will be
// removed as well) as well as uses in unreachable blocks or in landing pads
// (rarely executed).
if (EphValues.count(EU.User) ||
(EU.User &&
(!DT->isReachableFromEntry(cast<Instruction>(EU.User)->getParent()) ||
cast<Instruction>(EU.User)->getParent()->isLandingPad())))
// removed as well).
if (EphValues.count(EU.User))
continue;

// Used in unreachable blocks or in landing pads (rarely executed).
if (BasicBlock *UserParent =
EU.User ? cast<Instruction>(EU.User)->getParent() : nullptr;
UserParent &&
(!DT->isReachableFromEntry(UserParent) || UserParent->isLandingPad()))
continue;

// We only add extract cost once for the same scalar.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,11 +231,7 @@ void DependencyGraph::createNewNodes(const Interval<Instruction> &NewInterval) {
}
// Link new MemDGNode chain with the old one, if any.
if (!DAGInterval.empty()) {
// TODO: Implement Interval::comesBefore() to replace this check.
bool NewIsAbove = NewInterval.bottom()->comesBefore(DAGInterval.top());
assert(
(NewIsAbove || DAGInterval.bottom()->comesBefore(NewInterval.top())) &&
"Expected NewInterval below DAGInterval.");
bool NewIsAbove = NewInterval.comesBefore(DAGInterval);
const auto &TopInterval = NewIsAbove ? NewInterval : DAGInterval;
const auto &BotInterval = NewIsAbove ? DAGInterval : NewInterval;
MemDGNode *LinkTopN =
Expand Down
42 changes: 21 additions & 21 deletions llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ define i32 @va1(ptr %fmt, ...) {
; RV64-NEXT: sd a2, 32(sp)
; RV64-NEXT: sd a3, 40(sp)
; RV64-NEXT: sd a4, 48(sp)
; RV64-NEXT: sd a5, 56(sp)
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: addi a1, sp, 24
; RV64-NEXT: sd a1, 8(sp)
; RV64-NEXT: lw a0, 4(a0)
; RV64-NEXT: lwu a1, 8(sp)
; RV64-NEXT: sd a5, 56(sp)
; RV64-NEXT: sd a6, 64(sp)
; RV64-NEXT: sd a7, 72(sp)
; RV64-NEXT: slli a0, a0, 32
Expand Down Expand Up @@ -129,12 +129,12 @@ define i32 @va1(ptr %fmt, ...) {
; RV64-WITHFP-NEXT: sd a2, 16(s0)
; RV64-WITHFP-NEXT: sd a3, 24(s0)
; RV64-WITHFP-NEXT: sd a4, 32(s0)
; RV64-WITHFP-NEXT: sd a5, 40(s0)
; RV64-WITHFP-NEXT: addi a0, s0, -24
; RV64-WITHFP-NEXT: addi a1, s0, 8
; RV64-WITHFP-NEXT: sd a1, -24(s0)
; RV64-WITHFP-NEXT: lw a0, 4(a0)
; RV64-WITHFP-NEXT: lwu a1, -24(s0)
; RV64-WITHFP-NEXT: sd a5, 40(s0)
; RV64-WITHFP-NEXT: sd a6, 48(s0)
; RV64-WITHFP-NEXT: sd a7, 56(s0)
; RV64-WITHFP-NEXT: slli a0, a0, 32
Expand Down Expand Up @@ -844,11 +844,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; ILP32-LABEL: va3:
; ILP32: # %bb.0:
; ILP32-NEXT: addi sp, sp, -32
; ILP32-NEXT: sw a3, 12(sp)
; ILP32-NEXT: sw a4, 16(sp)
; ILP32-NEXT: addi a0, sp, 12
; ILP32-NEXT: sw a0, 4(sp)
; ILP32-NEXT: lw a0, 4(sp)
; ILP32-NEXT: sw a3, 12(sp)
; ILP32-NEXT: sw a4, 16(sp)
; ILP32-NEXT: sw a5, 20(sp)
; ILP32-NEXT: sw a6, 24(sp)
; ILP32-NEXT: sw a7, 28(sp)
Expand All @@ -868,11 +868,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV32D-ILP32-LABEL: va3:
; RV32D-ILP32: # %bb.0:
; RV32D-ILP32-NEXT: addi sp, sp, -48
; RV32D-ILP32-NEXT: sw a3, 28(sp)
; RV32D-ILP32-NEXT: sw a4, 32(sp)
; RV32D-ILP32-NEXT: addi a0, sp, 28
; RV32D-ILP32-NEXT: sw a0, 20(sp)
; RV32D-ILP32-NEXT: lw a0, 20(sp)
; RV32D-ILP32-NEXT: sw a3, 28(sp)
; RV32D-ILP32-NEXT: sw a4, 32(sp)
; RV32D-ILP32-NEXT: sw a5, 36(sp)
; RV32D-ILP32-NEXT: sw a6, 40(sp)
; RV32D-ILP32-NEXT: sw a7, 44(sp)
Expand All @@ -894,11 +894,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV32D-ILP32F-LABEL: va3:
; RV32D-ILP32F: # %bb.0:
; RV32D-ILP32F-NEXT: addi sp, sp, -48
; RV32D-ILP32F-NEXT: sw a3, 28(sp)
; RV32D-ILP32F-NEXT: sw a4, 32(sp)
; RV32D-ILP32F-NEXT: addi a0, sp, 28
; RV32D-ILP32F-NEXT: sw a0, 20(sp)
; RV32D-ILP32F-NEXT: lw a0, 20(sp)
; RV32D-ILP32F-NEXT: sw a3, 28(sp)
; RV32D-ILP32F-NEXT: sw a4, 32(sp)
; RV32D-ILP32F-NEXT: sw a5, 36(sp)
; RV32D-ILP32F-NEXT: sw a6, 40(sp)
; RV32D-ILP32F-NEXT: sw a7, 44(sp)
Expand All @@ -920,11 +920,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV32D-ILP32D-LABEL: va3:
; RV32D-ILP32D: # %bb.0:
; RV32D-ILP32D-NEXT: addi sp, sp, -48
; RV32D-ILP32D-NEXT: sw a3, 28(sp)
; RV32D-ILP32D-NEXT: sw a4, 32(sp)
; RV32D-ILP32D-NEXT: addi a0, sp, 28
; RV32D-ILP32D-NEXT: sw a0, 20(sp)
; RV32D-ILP32D-NEXT: lw a0, 20(sp)
; RV32D-ILP32D-NEXT: sw a3, 28(sp)
; RV32D-ILP32D-NEXT: sw a4, 32(sp)
; RV32D-ILP32D-NEXT: sw a5, 36(sp)
; RV32D-ILP32D-NEXT: sw a6, 40(sp)
; RV32D-ILP32D-NEXT: sw a7, 44(sp)
Expand All @@ -946,12 +946,12 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV64-LABEL: va3:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -64
; RV64-NEXT: sd a2, 16(sp)
; RV64-NEXT: sd a3, 24(sp)
; RV64-NEXT: sd a4, 32(sp)
; RV64-NEXT: addi a0, sp, 16
; RV64-NEXT: sd a0, 8(sp)
; RV64-NEXT: ld a0, 8(sp)
; RV64-NEXT: sd a2, 16(sp)
; RV64-NEXT: sd a3, 24(sp)
; RV64-NEXT: sd a4, 32(sp)
; RV64-NEXT: sd a5, 40(sp)
; RV64-NEXT: sd a6, 48(sp)
; RV64-NEXT: sd a7, 56(sp)
Expand All @@ -970,11 +970,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV32-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill
; RV32-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill
; RV32-WITHFP-NEXT: addi s0, sp, 24
; RV32-WITHFP-NEXT: sw a3, 4(s0)
; RV32-WITHFP-NEXT: sw a4, 8(s0)
; RV32-WITHFP-NEXT: addi a0, s0, 4
; RV32-WITHFP-NEXT: sw a0, -12(s0)
; RV32-WITHFP-NEXT: lw a0, -12(s0)
; RV32-WITHFP-NEXT: sw a3, 4(s0)
; RV32-WITHFP-NEXT: sw a4, 8(s0)
; RV32-WITHFP-NEXT: sw a5, 12(s0)
; RV32-WITHFP-NEXT: sw a6, 16(s0)
; RV32-WITHFP-NEXT: sw a7, 20(s0)
Expand All @@ -999,12 +999,12 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV64-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64-WITHFP-NEXT: addi s0, sp, 32
; RV64-WITHFP-NEXT: sd a2, 0(s0)
; RV64-WITHFP-NEXT: sd a3, 8(s0)
; RV64-WITHFP-NEXT: sd a4, 16(s0)
; RV64-WITHFP-NEXT: mv a0, s0
; RV64-WITHFP-NEXT: sd a0, -24(s0)
; RV64-WITHFP-NEXT: ld a0, -24(s0)
; RV64-WITHFP-NEXT: sd a2, 0(s0)
; RV64-WITHFP-NEXT: sd a3, 8(s0)
; RV64-WITHFP-NEXT: sd a4, 16(s0)
; RV64-WITHFP-NEXT: sd a5, 24(s0)
; RV64-WITHFP-NEXT: sd a6, 32(s0)
; RV64-WITHFP-NEXT: sd a7, 40(s0)
Expand Down Expand Up @@ -1622,9 +1622,6 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; RV64-NEXT: lui a0, 24414
; RV64-NEXT: add a0, sp, a0
; RV64-NEXT: sd a4, 304(a0)
; RV64-NEXT: lui a0, 24414
; RV64-NEXT: add a0, sp, a0
; RV64-NEXT: sd a5, 312(a0)
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: lui a1, 24414
; RV64-NEXT: addiw a1, a1, 280
Expand All @@ -1634,6 +1631,9 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; RV64-NEXT: lwu a1, 8(sp)
; RV64-NEXT: lui a2, 24414
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: sd a5, 312(a2)
; RV64-NEXT: lui a2, 24414
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: sd a6, 320(a2)
; RV64-NEXT: lui a2, 24414
; RV64-NEXT: add a2, sp, a2
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/RISCV/abds-neg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -705,8 +705,8 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a4, a4, a3
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: sw a1, 0(a0)
; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a2, 4(a0)
; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a5, 12(a0)
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
Expand Down Expand Up @@ -824,8 +824,8 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a4, a4, a3
; RV32ZBB-NEXT: neg a1, a1
; RV32ZBB-NEXT: sw a1, 0(a0)
; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a2, 4(a0)
; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a5, 12(a0)
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBB-NEXT: addi sp, sp, 16
Expand Down Expand Up @@ -952,8 +952,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a4, a4, a3
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: sw a1, 0(a0)
; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a2, 4(a0)
; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a5, 12(a0)
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
Expand Down Expand Up @@ -1071,8 +1071,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a4, a4, a3
; RV32ZBB-NEXT: neg a1, a1
; RV32ZBB-NEXT: sw a1, 0(a0)
; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a2, 4(a0)
; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a5, 12(a0)
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBB-NEXT: addi sp, sp, 16
Expand Down Expand Up @@ -1918,9 +1918,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a1, a1, t2
; RV32I-NEXT: sub a2, a2, a3
; RV32I-NEXT: .LBB22_11:
; RV32I-NEXT: sw a6, 8(a0)
; RV32I-NEXT: sw a1, 4(a0)
; RV32I-NEXT: sw a2, 0(a0)
; RV32I-NEXT: sw a1, 4(a0)
; RV32I-NEXT: sw a6, 8(a0)
; RV32I-NEXT: sw a5, 12(a0)
; RV32I-NEXT: ret
;
Expand Down Expand Up @@ -2005,9 +2005,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a1, a1, t2
; RV32ZBB-NEXT: sub a2, a2, a3
; RV32ZBB-NEXT: .LBB22_11:
; RV32ZBB-NEXT: sw a6, 8(a0)
; RV32ZBB-NEXT: sw a1, 4(a0)
; RV32ZBB-NEXT: sw a2, 0(a0)
; RV32ZBB-NEXT: sw a1, 4(a0)
; RV32ZBB-NEXT: sw a6, 8(a0)
; RV32ZBB-NEXT: sw a5, 12(a0)
; RV32ZBB-NEXT: ret
;
Expand Down
Loading