184 changes: 111 additions & 73 deletions llvm/include/llvm/Support/ThreadPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,8 @@ namespace llvm {

class ThreadPoolTaskGroup;

/// A ThreadPool for asynchronous parallel execution on a defined number of
/// threads.
///
/// The pool keeps a vector of threads alive, waiting on a condition variable
/// for some work to become available.
/// This defines the abstract base interface for a ThreadPool allowing
/// asynchronous parallel execution on a defined number of threads.
///
/// It is possible to reuse one thread pool for different groups of tasks
/// by grouping tasks using ThreadPoolTaskGroup. All tasks are processed using
Expand All @@ -49,16 +46,31 @@ class ThreadPoolTaskGroup;
/// available threads are used up by tasks waiting for a task that has no thread
/// left to run on (this includes waiting on the returned future). It should be
/// generally safe to wait() for a group as long as groups do not form a cycle.
class ThreadPool {
class ThreadPoolInterface {
/// The actual method to enqueue a task to be defined by the concrete
/// implementation.
virtual void asyncEnqueue(std::function<void()> Task,
ThreadPoolTaskGroup *Group) = 0;

public:
/// Construct a pool using the hardware strategy \p S for mapping hardware
/// execution resources (threads, cores, CPUs)
/// Defaults to using the maximum execution resources in the system, but
/// accounting for the affinity mask.
ThreadPool(ThreadPoolStrategy S = hardware_concurrency());
/// Destroying the pool will drain the pending tasks and wait. The current
/// thread may participate in the execution of the pending tasks.
virtual ~ThreadPoolInterface();

/// Blocking destructor: the pool will wait for all the threads to complete.
~ThreadPool();
/// Blocking wait for all the threads to complete and the queue to be empty.
/// It is an error to try to add new tasks while blocking on this call.
/// Calling wait() from a task would deadlock waiting for itself.
virtual void wait() = 0;

/// Blocking wait for only all the threads in the given group to complete.
/// It is possible to wait even inside a task, but waiting (directly or
/// indirectly) on itself will deadlock. If called from a task running on a
/// worker thread, the call may process pending tasks while waiting in order
/// not to waste the thread.
virtual void wait(ThreadPoolTaskGroup &Group) = 0;

/// Returns the maximum number of worker this pool can eventually grow to.
virtual unsigned getMaxConcurrency() const = 0;

/// Asynchronous submission of a task to the pool. The returned future can be
/// used to wait for the task to finish and is *non-blocking* on destruction.
Expand Down Expand Up @@ -92,102 +104,85 @@ class ThreadPool {
&Group);
}

private:
/// Asynchronous submission of a task to the pool. The returned future can be
/// used to wait for the task to finish and is *non-blocking* on destruction.
template <typename ResTy>
std::shared_future<ResTy> asyncImpl(std::function<ResTy()> Task,
ThreadPoolTaskGroup *Group) {
auto Future = std::async(std::launch::deferred, std::move(Task)).share();
asyncEnqueue([Future]() { Future.wait(); }, Group);
return Future;
}
};

#if LLVM_ENABLE_THREADS
/// A ThreadPool implementation using std::threads.
///
/// The pool keeps a vector of threads alive, waiting on a condition variable
/// for some work to become available.
class StdThreadPool : public ThreadPoolInterface {
public:
/// Construct a pool using the hardware strategy \p S for mapping hardware
/// execution resources (threads, cores, CPUs)
/// Defaults to using the maximum execution resources in the system, but
/// accounting for the affinity mask.
StdThreadPool(ThreadPoolStrategy S = hardware_concurrency());

/// Blocking destructor: the pool will wait for all the threads to complete.
~StdThreadPool() override;

/// Blocking wait for all the threads to complete and the queue to be empty.
/// It is an error to try to add new tasks while blocking on this call.
/// Calling wait() from a task would deadlock waiting for itself.
void wait();
void wait() override;

/// Blocking wait for only all the threads in the given group to complete.
/// It is possible to wait even inside a task, but waiting (directly or
/// indirectly) on itself will deadlock. If called from a task running on a
/// worker thread, the call may process pending tasks while waiting in order
/// not to waste the thread.
void wait(ThreadPoolTaskGroup &Group);
void wait(ThreadPoolTaskGroup &Group) override;

// Returns the maximum number of worker threads in the pool, not the current
// number of threads!
unsigned getMaxConcurrency() const { return MaxThreadCount; }
/// Returns the maximum number of worker threads in the pool, not the current
/// number of threads!
unsigned getMaxConcurrency() const override { return MaxThreadCount; }

// TODO: misleading legacy name warning!
// TODO: Remove, misleading legacy name warning!
LLVM_DEPRECATED("Use getMaxConcurrency instead", "getMaxConcurrency")
unsigned getThreadCount() const { return MaxThreadCount; }

/// Returns true if the current thread is a worker thread of this thread pool.
bool isWorkerThread() const;

private:
/// Helpers to create a promise and a callable wrapper of \p Task that sets
/// the result of the promise. Returns the callable and a future to access the
/// result.
template <typename ResTy>
static std::pair<std::function<void()>, std::future<ResTy>>
createTaskAndFuture(std::function<ResTy()> Task) {
std::shared_ptr<std::promise<ResTy>> Promise =
std::make_shared<std::promise<ResTy>>();
auto F = Promise->get_future();
return {
[Promise = std::move(Promise), Task]() { Promise->set_value(Task()); },
std::move(F)};
}
static std::pair<std::function<void()>, std::future<void>>
createTaskAndFuture(std::function<void()> Task) {
std::shared_ptr<std::promise<void>> Promise =
std::make_shared<std::promise<void>>();
auto F = Promise->get_future();
return {[Promise = std::move(Promise), Task]() {
Task();
Promise->set_value();
},
std::move(F)};
}

/// Returns true if all tasks in the given group have finished (nullptr means
/// all tasks regardless of their group). QueueLock must be locked.
bool workCompletedUnlocked(ThreadPoolTaskGroup *Group) const;

/// Asynchronous submission of a task to the pool. The returned future can be
/// used to wait for the task to finish and is *non-blocking* on destruction.
template <typename ResTy>
std::shared_future<ResTy> asyncImpl(std::function<ResTy()> Task,
ThreadPoolTaskGroup *Group) {

#if LLVM_ENABLE_THREADS
/// Wrap the Task in a std::function<void()> that sets the result of the
/// corresponding future.
auto R = createTaskAndFuture(Task);

void asyncEnqueue(std::function<void()> Task,
ThreadPoolTaskGroup *Group) override {
int requestedThreads;
{
// Lock the queue and push the new task
std::unique_lock<std::mutex> LockGuard(QueueLock);

// Don't allow enqueueing after disabling the pool
assert(EnableFlag && "Queuing a thread during ThreadPool destruction");
Tasks.emplace_back(std::make_pair(std::move(R.first), Group));
Tasks.emplace_back(std::make_pair(std::move(Task), Group));
requestedThreads = ActiveThreads + Tasks.size();
}
QueueCondition.notify_one();
grow(requestedThreads);
return R.second.share();

#else // LLVM_ENABLE_THREADS Disabled

// Get a Future with launch::deferred execution using std::async
auto Future = std::async(std::launch::deferred, std::move(Task)).share();
// Wrap the future so that both ThreadPool::wait() can operate and the
// returned future can be sync'ed on.
Tasks.emplace_back(std::make_pair([Future]() { Future.get(); }, Group));
return Future;
#endif
}

#if LLVM_ENABLE_THREADS
// Grow to ensure that we have at least `requested` Threads, but do not go
// over MaxThreadCount.
/// Grow to ensure that we have at least `requested` Threads, but do not go
/// over MaxThreadCount.
void grow(int requested);

void processTasks(ThreadPoolTaskGroup *WaitingForGroup);
#endif

/// Threads in flight
std::vector<llvm::thread> Threads;
Expand All @@ -209,25 +204,68 @@ class ThreadPool {
/// Number of threads active for tasks in the given group (only non-zero).
DenseMap<ThreadPoolTaskGroup *, unsigned> ActiveGroups;

#if LLVM_ENABLE_THREADS // avoids warning for unused variable
/// Signal for the destruction of the pool, asking thread to exit.
bool EnableFlag = true;
#endif

const ThreadPoolStrategy Strategy;

/// Maximum number of threads to potentially grow this pool to.
const unsigned MaxThreadCount;
};

#endif // LLVM_ENABLE_THREADS Disabled

/// A non-threaded implementation.
class SingleThreadExecutor : public ThreadPoolInterface {
public:
/// Construct a non-threaded pool, ignoring using the hardware strategy.
SingleThreadExecutor(ThreadPoolStrategy ignored = {});

/// Blocking destructor: the pool will first execute the pending tasks.
~SingleThreadExecutor() override;

/// Blocking wait for all the tasks to execute first
void wait() override;

/// Blocking wait for only all the tasks in the given group to complete.
void wait(ThreadPoolTaskGroup &Group) override;

/// Returns always 1: there is no concurrency.
unsigned getMaxConcurrency() const override { return 1; }

// TODO: Remove, misleading legacy name warning!
LLVM_DEPRECATED("Use getMaxConcurrency instead", "getMaxConcurrency")
unsigned getThreadCount() const { return 1; }

/// Returns true if the current thread is a worker thread of this thread pool.
bool isWorkerThread() const;

private:
/// Asynchronous submission of a task to the pool. The returned future can be
/// used to wait for the task to finish and is *non-blocking* on destruction.
void asyncEnqueue(std::function<void()> Task,
ThreadPoolTaskGroup *Group) override {
Tasks.emplace_back(std::make_pair(std::move(Task), Group));
}

/// Tasks waiting for execution in the pool.
std::deque<std::pair<std::function<void()>, ThreadPoolTaskGroup *>> Tasks;
};

#if LLVM_ENABLE_THREADS
using ThreadPool = StdThreadPool;
#else
using ThreadPool = SingleThreadExecutor;
#endif

/// A group of tasks to be run on a thread pool. Thread pool tasks in different
/// groups can run on the same threadpool but can be waited for separately.
/// It is even possible for tasks of one group to submit and wait for tasks
/// of another group, as long as this does not form a loop.
class ThreadPoolTaskGroup {
public:
/// The ThreadPool argument is the thread pool to forward calls to.
ThreadPoolTaskGroup(ThreadPool &Pool) : Pool(Pool) {}
ThreadPoolTaskGroup(ThreadPoolInterface &Pool) : Pool(Pool) {}

/// Blocking destructor: will wait for all the tasks in the group to complete
/// by calling ThreadPool::wait().
Expand All @@ -244,7 +282,7 @@ class ThreadPoolTaskGroup {
void wait() { Pool.wait(*this); }

private:
ThreadPool &Pool;
ThreadPoolInterface &Pool;
};

} // namespace llvm
Expand Down
346 changes: 169 additions & 177 deletions llvm/include/llvm/TargetParser/AArch64TargetParser.h

Large diffs are not rendered by default.

321 changes: 175 additions & 146 deletions llvm/include/llvm/TargetParser/ARMTargetParser.def

Large diffs are not rendered by default.

74 changes: 37 additions & 37 deletions llvm/include/llvm/TargetParser/ARMTargetParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,44 +29,44 @@ namespace ARM {
// Arch extension modifiers for CPUs.
// Note that this is not the same as the AArch64 list
enum ArchExtKind : uint64_t {
AEK_INVALID = 0,
AEK_NONE = 1,
AEK_CRC = 1 << 1,
AEK_CRYPTO = 1 << 2,
AEK_FP = 1 << 3,
AEK_HWDIVTHUMB = 1 << 4,
AEK_HWDIVARM = 1 << 5,
AEK_MP = 1 << 6,
AEK_SIMD = 1 << 7,
AEK_SEC = 1 << 8,
AEK_VIRT = 1 << 9,
AEK_DSP = 1 << 10,
AEK_FP16 = 1 << 11,
AEK_RAS = 1 << 12,
AEK_DOTPROD = 1 << 13,
AEK_SHA2 = 1 << 14,
AEK_AES = 1 << 15,
AEK_FP16FML = 1 << 16,
AEK_SB = 1 << 17,
AEK_FP_DP = 1 << 18,
AEK_LOB = 1 << 19,
AEK_BF16 = 1 << 20,
AEK_I8MM = 1 << 21,
AEK_CDECP0 = 1 << 22,
AEK_CDECP1 = 1 << 23,
AEK_CDECP2 = 1 << 24,
AEK_CDECP3 = 1 << 25,
AEK_CDECP4 = 1 << 26,
AEK_CDECP5 = 1 << 27,
AEK_CDECP6 = 1 << 28,
AEK_CDECP7 = 1 << 29,
AEK_PACBTI = 1 << 30,
AEK_INVALID = 0,
AEK_NONE = 1,
AEK_CRC = 1 << 1,
AEK_CRYPTO = 1 << 2,
AEK_FP = 1 << 3,
AEK_HWDIVTHUMB = 1 << 4,
AEK_HWDIVARM = 1 << 5,
AEK_MP = 1 << 6,
AEK_SIMD = 1 << 7,
AEK_SEC = 1 << 8,
AEK_VIRT = 1 << 9,
AEK_DSP = 1 << 10,
AEK_FP16 = 1 << 11,
AEK_RAS = 1 << 12,
AEK_DOTPROD = 1 << 13,
AEK_SHA2 = 1 << 14,
AEK_AES = 1 << 15,
AEK_FP16FML = 1 << 16,
AEK_SB = 1 << 17,
AEK_FP_DP = 1 << 18,
AEK_LOB = 1 << 19,
AEK_BF16 = 1 << 20,
AEK_I8MM = 1 << 21,
AEK_CDECP0 = 1 << 22,
AEK_CDECP1 = 1 << 23,
AEK_CDECP2 = 1 << 24,
AEK_CDECP3 = 1 << 25,
AEK_CDECP4 = 1 << 26,
AEK_CDECP5 = 1 << 27,
AEK_CDECP6 = 1 << 28,
AEK_CDECP7 = 1 << 29,
AEK_PACBTI = 1 << 30,
// Unsupported extensions.
AEK_OS = 1ULL << 59,
AEK_IWMMXT = 1ULL << 60,
AEK_IWMMXT2 = 1ULL << 61,
AEK_MAVERICK = 1ULL << 62,
AEK_XSCALE = 1ULL << 63,
AEK_OS = 1ULL << 59,
AEK_IWMMXT = 1ULL << 60,
AEK_IWMMXT2 = 1ULL << 61,
AEK_MAVERICK = 1ULL << 62,
AEK_XSCALE = 1ULL << 63,
};

// List of Arch Extension names.
Expand Down
23 changes: 4 additions & 19 deletions llvm/lib/AsmParser/LLParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5130,11 +5130,7 @@ bool LLParser::parseDIStringType(MDNode *&Result, bool IsDistinct) {
/// ::= !DIDerivedType(tag: DW_TAG_pointer_type, name: "int", file: !0,
/// line: 7, scope: !1, baseType: !2, size: 32,
/// align: 32, offset: 0, flags: 0, extraData: !3,
/// dwarfAddressSpace: 3, ptrAuthKey: 1,
/// ptrAuthIsAddressDiscriminated: true,
/// ptrAuthExtraDiscriminator: 0x1234,
/// ptrAuthIsaPointer: 1, ptrAuthAuthenticatesNullValues:1
/// )
/// dwarfAddressSpace: 3)
bool LLParser::parseDIDerivedType(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(tag, DwarfTagField, ); \
Expand All @@ -5149,30 +5145,19 @@ bool LLParser::parseDIDerivedType(MDNode *&Result, bool IsDistinct) {
OPTIONAL(flags, DIFlagField, ); \
OPTIONAL(extraData, MDField, ); \
OPTIONAL(dwarfAddressSpace, MDUnsignedField, (UINT32_MAX, UINT32_MAX)); \
OPTIONAL(annotations, MDField, ); \
OPTIONAL(ptrAuthKey, MDUnsignedField, (0, 7)); \
OPTIONAL(ptrAuthIsAddressDiscriminated, MDBoolField, ); \
OPTIONAL(ptrAuthExtraDiscriminator, MDUnsignedField, (0, 0xffff)); \
OPTIONAL(ptrAuthIsaPointer, MDBoolField, ); \
OPTIONAL(ptrAuthAuthenticatesNullValues, MDBoolField, );
OPTIONAL(annotations, MDField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS

std::optional<unsigned> DWARFAddressSpace;
if (dwarfAddressSpace.Val != UINT32_MAX)
DWARFAddressSpace = dwarfAddressSpace.Val;
std::optional<DIDerivedType::PtrAuthData> PtrAuthData;
if (ptrAuthKey.Val)
PtrAuthData = DIDerivedType::PtrAuthData(
(unsigned)ptrAuthKey.Val, ptrAuthIsAddressDiscriminated.Val,
(unsigned)ptrAuthExtraDiscriminator.Val, ptrAuthIsaPointer.Val,
ptrAuthAuthenticatesNullValues.Val);

Result = GET_OR_DISTINCT(DIDerivedType,
(Context, tag.Val, name.Val, file.Val, line.Val,
scope.Val, baseType.Val, size.Val, align.Val,
offset.Val, DWARFAddressSpace, PtrAuthData,
flags.Val, extraData.Val, annotations.Val));
offset.Val, DWARFAddressSpace, flags.Val,
extraData.Val, annotations.Val));
return false;
}

Expand Down
18 changes: 4 additions & 14 deletions llvm/lib/Bitcode/Reader/MetadataLoader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1556,7 +1556,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
case bitc::METADATA_DERIVED_TYPE: {
if (Record.size() < 12 || Record.size() > 15)
if (Record.size() < 12 || Record.size() > 14)
return error("Invalid record");

// DWARF address space is encoded as N->getDWARFAddressSpace() + 1. 0 means
Expand All @@ -1566,18 +1566,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
DWARFAddressSpace = Record[12] - 1;

Metadata *Annotations = nullptr;
std::optional<DIDerivedType::PtrAuthData> PtrAuthData;

// Only look for annotations/ptrauth if both are allocated.
// If not, we can't tell which was intended to be embedded, as both ptrauth
// and annotations have been expected at Record[13] at various times.
if (Record.size() > 14) {
if (Record[13])
Annotations = getMDOrNull(Record[13]);

if (Record[14])
PtrAuthData = DIDerivedType::PtrAuthData(Record[14]);
}
if (Record.size() > 13 && Record[13])
Annotations = getMDOrNull(Record[13]);

IsDistinct = Record[0];
DINode::DIFlags Flags = static_cast<DINode::DIFlags>(Record[10]);
Expand All @@ -1587,7 +1577,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getMDOrNull(Record[3]), Record[4],
getDITypeRefOrNull(Record[5]),
getDITypeRefOrNull(Record[6]), Record[7], Record[8],
Record[9], DWARFAddressSpace, PtrAuthData, Flags,
Record[9], DWARFAddressSpace, Flags,
getDITypeRefOrNull(Record[11]), Annotations)),
NextMetadataNo);
NextMetadataNo++;
Expand Down
5 changes: 0 additions & 5 deletions llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1804,11 +1804,6 @@ void ModuleBitcodeWriter::writeDIDerivedType(const DIDerivedType *N,

Record.push_back(VE.getMetadataOrNullID(N->getAnnotations().get()));

if (auto PtrAuthData = N->getPtrAuthData())
Record.push_back(PtrAuthData->Payload.RawData);
else
Record.push_back(0);

Stream.EmitRecord(bitc::METADATA_DERIVED_TYPE, Record, Abbrev);
Record.clear();
}
Expand Down
14 changes: 0 additions & 14 deletions llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -803,20 +803,6 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
if (DTy->getDWARFAddressSpace())
addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4,
*DTy->getDWARFAddressSpace());
if (auto Key = DTy->getPtrAuthKey())
addUInt(Buffer, dwarf::DW_AT_LLVM_ptrauth_key, dwarf::DW_FORM_data1, *Key);
if (auto AddrDisc = DTy->isPtrAuthAddressDiscriminated())
if (AddrDisc.value())
addFlag(Buffer, dwarf::DW_AT_LLVM_ptrauth_address_discriminated);
if (auto Disc = DTy->getPtrAuthExtraDiscriminator())
addUInt(Buffer, dwarf::DW_AT_LLVM_ptrauth_extra_discriminator,
dwarf::DW_FORM_data2, *Disc);
if (auto IsaPointer = DTy->isPtrAuthIsaPointer())
if (*IsaPointer)
addFlag(Buffer, dwarf::DW_AT_LLVM_ptrauth_isa_pointer);
if (auto AuthenticatesNullValues = DTy->getPtrAuthAuthenticatesNullValues())
if (*AuthenticatesNullValues)
addFlag(Buffer, dwarf::DW_AT_LLVM_ptrauth_authenticates_null_values);
}

void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) {
Expand Down
152 changes: 2 additions & 150 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
Expand Down Expand Up @@ -94,7 +93,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
Expand Down Expand Up @@ -2448,147 +2446,6 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
SL->SwitchCases.push_back(CB);
}

// Collect dependencies on V recursively. This is used for the cost analysis in
// `shouldKeepJumpConditionsTogether`.
static bool
collectInstructionDeps(SmallPtrSet<const Instruction *, 8> *Deps,
const Value *V,
SmallPtrSet<const Instruction *, 8> *Necessary = nullptr,
unsigned Depth = 0) {
// Return false if we have an incomplete count.
if (Depth >= SelectionDAG::MaxRecursionDepth)
return false;

auto *I = dyn_cast<Instruction>(V);
if (I == nullptr)
return true;

if (Necessary != nullptr) {
// This instruction is necessary for the other side of the condition so
// don't count it.
if (Necessary->contains(I))
return true;
}

// Already added this dep.
if (!Deps->insert(I).second)
return true;

for (unsigned OpIdx = 0, E = I->getNumOperands(); OpIdx < E; ++OpIdx)
if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary,
Depth + 1))
return false;
return true;
}

bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
TargetLoweringBase::CondMergingParams Params) const {
if (I.getNumSuccessors() != 2)
return false;

if (Params.BaseCost < 0)
return false;

// Baseline cost.
InstructionCost CostThresh = Params.BaseCost;

BranchProbabilityInfo *BPI = nullptr;
if (Params.LikelyBias || Params.UnlikelyBias)
BPI = FuncInfo.BPI;
if (BPI != nullptr) {
// See if we are either likely to get an early out or compute both lhs/rhs
// of the condition.
BasicBlock *IfFalse = I.getSuccessor(0);
BasicBlock *IfTrue = I.getSuccessor(1);

std::optional<bool> Likely;
if (BPI->isEdgeHot(I.getParent(), IfTrue))
Likely = true;
else if (BPI->isEdgeHot(I.getParent(), IfFalse))
Likely = false;

if (Likely) {
if (Opc == (*Likely ? Instruction::And : Instruction::Or))
// Its likely we will have to compute both lhs and rhs of condition
CostThresh += Params.LikelyBias;
else {
if (Params.UnlikelyBias < 0)
return false;
// Its likely we will get an early out.
CostThresh -= Params.UnlikelyBias;
}
}
}

if (CostThresh <= 0)
return false;

// Collect "all" instructions that lhs condition is dependent on.
SmallPtrSet<const Instruction *, 8> LhsDeps, RhsDeps;
collectInstructionDeps(&LhsDeps, Lhs);
// Collect "all" instructions that rhs condition is dependent on AND are
// dependencies of lhs. This gives us an estimate on which instructions we
// stand to save by splitting the condition.
if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps))
return false;
// Add the compare instruction itself unless its a dependency on the LHS.
if (const auto *RhsI = dyn_cast<Instruction>(Rhs))
if (!LhsDeps.contains(RhsI))
RhsDeps.insert(RhsI);

const auto &TLI = DAG.getTargetLoweringInfo();
const auto &TTI =
TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction());

InstructionCost CostOfIncluding = 0;
// See if this instruction will need to computed independently of whether RHS
// is.
auto ShouldCountInsn = [&RhsDeps](const Instruction *Ins) {
for (const auto *U : Ins->users()) {
// If user is independent of RHS calculation we don't need to count it.
if (auto *UIns = dyn_cast<Instruction>(U))
if (!RhsDeps.contains(UIns))
return false;
}
return true;
};

// Prune instructions from RHS Deps that are dependencies of unrelated
// instructions. The value (SelectionDAG::MaxRecursionDepth) is fairly
// arbitrary and just meant to cap the how much time we spend in the pruning
// loop. Its highly unlikely to come into affect.
const unsigned MaxPruneIters = SelectionDAG::MaxRecursionDepth;
// Stop after a certain point. No incorrectness from including too many
// instructions.
for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) {
const Instruction *ToDrop = nullptr;
for (const auto *Ins : RhsDeps) {
if (!ShouldCountInsn(Ins)) {
ToDrop = Ins;
break;
}
}
if (ToDrop == nullptr)
break;
RhsDeps.erase(ToDrop);
}

for (const auto *Ins : RhsDeps) {
// Finally accumulate latency that we can only attribute to computing the
// RHS condition. Use latency because we are essentially trying to calculate
// the cost of the dependency chain.
// Possible TODO: We could try to estimate ILP and make this more precise.
CostOfIncluding +=
TTI.getInstructionCost(Ins, TargetTransformInfo::TCK_Latency);

if (CostOfIncluding > CostThresh)
return false;
}
return true;
}

void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
Expand Down Expand Up @@ -2803,13 +2660,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
Opcode = Instruction::Or;

if (Opcode &&
!(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) &&
!shouldKeepJumpConditionsTogether(
FuncInfo, I, Opcode, BOp0, BOp1,
DAG.getTargetLoweringInfo().getJumpConditionMergingParams(
Opcode, BOp0, BOp1))) {
if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
getEdgeProbability(BrMBB, Succ0MBB),
getEdgeProbability(BrMBB, Succ1MBB),
Expand Down
5 changes: 0 additions & 5 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -385,11 +385,6 @@ class SelectionDAGBuilder {
N = NewN;
}

bool shouldKeepJumpConditionsTogether(
const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
TargetLoweringBase::CondMergingParams Params) const;

void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/DWARFLinker/Classic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ add_llvm_component_library(LLVMDWARFLinkerClassic
DEPENDS
intrinsics_gen

LINK_LIBS
${LLVM_PTHREAD_LIB}

LINK_COMPONENTS
AsmPrinter
BinaryFormat
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/DWARFLinker/Parallel/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ add_llvm_component_library(LLVMDWARFLinkerParallel
DEPENDS
intrinsics_gen

LINK_LIBS
${LLVM_PTHREAD_LIB}

LINK_COMPONENTS
AsmPrinter
BinaryFormat
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/DebugInfo/GSYM/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ add_llvm_component_library(LLVMDebugInfoGSYM
DEPENDS
LLVMMC

LINK_LIBS
${LLVM_PTHREAD_LIB}

LINK_COMPONENTS
MC
Object
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Debuginfod/Debuginfod.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,8 @@ DebuginfodLogEntry DebuginfodLog::pop() {
}

DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef,
DebuginfodLog &Log, ThreadPool &Pool,
DebuginfodLog &Log,
ThreadPoolInterface &Pool,
double MinInterval)
: Log(Log), Pool(Pool), MinInterval(MinInterval) {
for (StringRef Path : PathsRef)
Expand Down
11 changes: 0 additions & 11 deletions llvm/lib/IR/AsmWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2135,17 +2135,6 @@ static void writeDIDerivedType(raw_ostream &Out, const DIDerivedType *N,
Printer.printInt("dwarfAddressSpace", *DWARFAddressSpace,
/* ShouldSkipZero */ false);
Printer.printMetadata("annotations", N->getRawAnnotations());
if (auto Key = N->getPtrAuthKey())
Printer.printInt("ptrAuthKey", *Key);
if (auto AddrDisc = N->isPtrAuthAddressDiscriminated())
Printer.printBool("ptrAuthIsAddressDiscriminated", *AddrDisc);
if (auto Disc = N->getPtrAuthExtraDiscriminator())
Printer.printInt("ptrAuthExtraDiscriminator", *Disc);
if (auto IsaPointer = N->isPtrAuthIsaPointer())
Printer.printBool("ptrAuthIsaPointer", *IsaPointer);
if (auto AuthenticatesNullValues = N->getPtrAuthAuthenticatesNullValues())
Printer.printBool("ptrAuthAuthenticatesNullValues",
*AuthenticatesNullValues);
Out << ")";
}

Expand Down
58 changes: 21 additions & 37 deletions llvm/lib/IR/DIBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,20 +296,7 @@ DIStringType *DIBuilder::createStringType(StringRef Name,

DIDerivedType *DIBuilder::createQualifiedType(unsigned Tag, DIType *FromTy) {
return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, FromTy, 0,
0, 0, std::nullopt, std::nullopt, DINode::FlagZero);
}

DIDerivedType *DIBuilder::createPtrAuthQualifiedType(
DIType *FromTy, unsigned Key, bool IsAddressDiscriminated,
unsigned ExtraDiscriminator, bool IsaPointer,
bool AuthenticatesNullValues) {
return DIDerivedType::get(
VMContext, dwarf::DW_TAG_LLVM_ptrauth_type, "", nullptr, 0, nullptr,
FromTy, 0, 0, 0, std::nullopt,
std::optional<DIDerivedType::PtrAuthData>({Key, IsAddressDiscriminated,
ExtraDiscriminator, IsaPointer,
AuthenticatesNullValues}),
DINode::FlagZero);
0, 0, std::nullopt, DINode::FlagZero);
}

DIDerivedType *
Expand All @@ -320,8 +307,8 @@ DIBuilder::createPointerType(DIType *PointeeTy, uint64_t SizeInBits,
// FIXME: Why is there a name here?
return DIDerivedType::get(VMContext, dwarf::DW_TAG_pointer_type, Name,
nullptr, 0, nullptr, PointeeTy, SizeInBits,
AlignInBits, 0, DWARFAddressSpace, std::nullopt,
DINode::FlagZero, nullptr, Annotations);
AlignInBits, 0, DWARFAddressSpace, DINode::FlagZero,
nullptr, Annotations);
}

DIDerivedType *DIBuilder::createMemberPointerType(DIType *PointeeTy,
Expand All @@ -331,8 +318,7 @@ DIDerivedType *DIBuilder::createMemberPointerType(DIType *PointeeTy,
DINode::DIFlags Flags) {
return DIDerivedType::get(VMContext, dwarf::DW_TAG_ptr_to_member_type, "",
nullptr, 0, nullptr, PointeeTy, SizeInBits,
AlignInBits, 0, std::nullopt, std::nullopt, Flags,
Base);
AlignInBits, 0, std::nullopt, Flags, Base);
}

DIDerivedType *
Expand All @@ -341,7 +327,7 @@ DIBuilder::createReferenceType(unsigned Tag, DIType *RTy, uint64_t SizeInBits,
std::optional<unsigned> DWARFAddressSpace) {
assert(RTy && "Unable to create reference type");
return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, RTy,
SizeInBits, AlignInBits, 0, DWARFAddressSpace, {},
SizeInBits, AlignInBits, 0, DWARFAddressSpace,
DINode::FlagZero);
}

Expand All @@ -352,16 +338,15 @@ DIDerivedType *DIBuilder::createTypedef(DIType *Ty, StringRef Name,
DINodeArray Annotations) {
return DIDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name, File,
LineNo, getNonCompileUnitScope(Context), Ty, 0,
AlignInBits, 0, std::nullopt, std::nullopt, Flags,
nullptr, Annotations);
AlignInBits, 0, std::nullopt, Flags, nullptr,
Annotations);
}

DIDerivedType *DIBuilder::createFriend(DIType *Ty, DIType *FriendTy) {
assert(Ty && "Invalid type!");
assert(FriendTy && "Invalid friend type!");
return DIDerivedType::get(VMContext, dwarf::DW_TAG_friend, "", nullptr, 0, Ty,
FriendTy, 0, 0, 0, std::nullopt, std::nullopt,
DINode::FlagZero);
FriendTy, 0, 0, 0, std::nullopt, DINode::FlagZero);
}

DIDerivedType *DIBuilder::createInheritance(DIType *Ty, DIType *BaseTy,
Expand All @@ -373,7 +358,7 @@ DIDerivedType *DIBuilder::createInheritance(DIType *Ty, DIType *BaseTy,
ConstantInt::get(IntegerType::get(VMContext, 32), VBPtrOffset));
return DIDerivedType::get(VMContext, dwarf::DW_TAG_inheritance, "", nullptr,
0, Ty, BaseTy, 0, 0, BaseOffset, std::nullopt,
std::nullopt, Flags, ExtraData);
Flags, ExtraData);
}

DIDerivedType *DIBuilder::createMemberType(
Expand All @@ -383,7 +368,7 @@ DIDerivedType *DIBuilder::createMemberType(
return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File,
LineNumber, getNonCompileUnitScope(Scope), Ty,
SizeInBits, AlignInBits, OffsetInBits, std::nullopt,
std::nullopt, Flags, nullptr, Annotations);
Flags, nullptr, Annotations);
}

static ConstantAsMetadata *getConstantOrNull(Constant *C) {
Expand All @@ -396,10 +381,10 @@ DIDerivedType *DIBuilder::createVariantMemberType(
DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
Constant *Discriminant, DINode::DIFlags Flags, DIType *Ty) {
return DIDerivedType::get(
VMContext, dwarf::DW_TAG_member, Name, File, LineNumber,
getNonCompileUnitScope(Scope), Ty, SizeInBits, AlignInBits, OffsetInBits,
std::nullopt, std::nullopt, Flags, getConstantOrNull(Discriminant));
return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File,
LineNumber, getNonCompileUnitScope(Scope), Ty,
SizeInBits, AlignInBits, OffsetInBits, std::nullopt,
Flags, getConstantOrNull(Discriminant));
}

DIDerivedType *DIBuilder::createBitFieldMemberType(
Expand All @@ -410,7 +395,7 @@ DIDerivedType *DIBuilder::createBitFieldMemberType(
return DIDerivedType::get(
VMContext, dwarf::DW_TAG_member, Name, File, LineNumber,
getNonCompileUnitScope(Scope), Ty, SizeInBits, /*AlignInBits=*/0,
OffsetInBits, std::nullopt, std::nullopt, Flags,
OffsetInBits, std::nullopt, Flags,
ConstantAsMetadata::get(ConstantInt::get(IntegerType::get(VMContext, 64),
StorageOffsetInBits)),
Annotations);
Expand All @@ -424,8 +409,7 @@ DIBuilder::createStaticMemberType(DIScope *Scope, StringRef Name, DIFile *File,
Flags |= DINode::FlagStaticMember;
return DIDerivedType::get(VMContext, Tag, Name, File, LineNumber,
getNonCompileUnitScope(Scope), Ty, 0, AlignInBits,
0, std::nullopt, std::nullopt, Flags,
getConstantOrNull(Val));
0, std::nullopt, Flags, getConstantOrNull(Val));
}

DIDerivedType *
Expand All @@ -436,7 +420,7 @@ DIBuilder::createObjCIVar(StringRef Name, DIFile *File, unsigned LineNumber,
return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File,
LineNumber, getNonCompileUnitScope(File), Ty,
SizeInBits, AlignInBits, OffsetInBits, std::nullopt,
std::nullopt, Flags, PropertyNode);
Flags, PropertyNode);
}

DIObjCProperty *
Expand Down Expand Up @@ -571,10 +555,10 @@ DIDerivedType *DIBuilder::createSetType(DIScope *Scope, StringRef Name,
DIFile *File, unsigned LineNo,
uint64_t SizeInBits,
uint32_t AlignInBits, DIType *Ty) {
auto *R = DIDerivedType::get(VMContext, dwarf::DW_TAG_set_type, Name, File,
LineNo, getNonCompileUnitScope(Scope), Ty,
SizeInBits, AlignInBits, 0, std::nullopt,
std::nullopt, DINode::FlagZero);
auto *R =
DIDerivedType::get(VMContext, dwarf::DW_TAG_set_type, Name, File, LineNo,
getNonCompileUnitScope(Scope), Ty, SizeInBits,
AlignInBits, 0, std::nullopt, DINode::FlagZero);
trackIfUnresolved(R);
return R;
}
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/IR/DebugInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1336,9 +1336,9 @@ LLVMMetadataRef LLVMDIBuilderCreatePointerType(
LLVMDIBuilderRef Builder, LLVMMetadataRef PointeeTy,
uint64_t SizeInBits, uint32_t AlignInBits, unsigned AddressSpace,
const char *Name, size_t NameLen) {
return wrap(unwrap(Builder)->createPointerType(
unwrapDI<DIType>(PointeeTy), SizeInBits, AlignInBits, AddressSpace,
{Name, NameLen}));
return wrap(unwrap(Builder)->createPointerType(unwrapDI<DIType>(PointeeTy),
SizeInBits, AlignInBits,
AddressSpace, {Name, NameLen}));
}

LLVMMetadataRef LLVMDIBuilderCreateStructType(
Expand Down
32 changes: 11 additions & 21 deletions llvm/lib/IR/DebugInfoMetadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,6 @@ cl::opt<bool> EnableFSDiscriminator(
cl::desc("Enable adding flow sensitive discriminators"));
} // namespace llvm

uint32_t DIType::getAlignInBits() const {
return (getTag() == dwarf::DW_TAG_LLVM_ptrauth_type ? 0 : SubclassData32);
}

const DIExpression::FragmentInfo DebugVariable::DefaultFragment = {
std::numeric_limits<uint64_t>::max(), std::numeric_limits<uint64_t>::min()};

Expand Down Expand Up @@ -735,32 +731,26 @@ Constant *DIDerivedType::getDiscriminantValue() const {
return nullptr;
}

DIDerivedType *DIDerivedType::getImpl(
LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
unsigned Line, Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
uint32_t AlignInBits, uint64_t OffsetInBits,
std::optional<unsigned> DWARFAddressSpace,
std::optional<PtrAuthData> PtrAuthData, DIFlags Flags, Metadata *ExtraData,
Metadata *Annotations, StorageType Storage, bool ShouldCreate) {
DIDerivedType *
DIDerivedType::getImpl(LLVMContext &Context, unsigned Tag, MDString *Name,
Metadata *File, unsigned Line, Metadata *Scope,
Metadata *BaseType, uint64_t SizeInBits,
uint32_t AlignInBits, uint64_t OffsetInBits,
std::optional<unsigned> DWARFAddressSpace, DIFlags Flags,
Metadata *ExtraData, Metadata *Annotations,
StorageType Storage, bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
DEFINE_GETIMPL_LOOKUP(DIDerivedType,
(Tag, Name, File, Line, Scope, BaseType, SizeInBits,
AlignInBits, OffsetInBits, DWARFAddressSpace,
PtrAuthData, Flags, ExtraData, Annotations));
AlignInBits, OffsetInBits, DWARFAddressSpace, Flags,
ExtraData, Annotations));
Metadata *Ops[] = {File, Scope, Name, BaseType, ExtraData, Annotations};
DEFINE_GETIMPL_STORE(DIDerivedType,
(Tag, Line, SizeInBits, AlignInBits, OffsetInBits,
DWARFAddressSpace, PtrAuthData, Flags),
DWARFAddressSpace, Flags),
Ops);
}

std::optional<DIDerivedType::PtrAuthData>
DIDerivedType::getPtrAuthData() const {
return getTag() == dwarf::DW_TAG_LLVM_ptrauth_type
? std::optional<PtrAuthData>(PtrAuthData(SubclassData32))
: std::nullopt;
}

DICompositeType *DICompositeType::getImpl(
LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
unsigned Line, Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
Expand Down
15 changes: 5 additions & 10 deletions llvm/lib/IR/LLVMContextImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -539,29 +539,25 @@ template <> struct MDNodeKeyImpl<DIDerivedType> {
uint64_t OffsetInBits;
uint32_t AlignInBits;
std::optional<unsigned> DWARFAddressSpace;
std::optional<DIDerivedType::PtrAuthData> PtrAuthData;
unsigned Flags;
Metadata *ExtraData;
Metadata *Annotations;

MDNodeKeyImpl(unsigned Tag, MDString *Name, Metadata *File, unsigned Line,
Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
uint32_t AlignInBits, uint64_t OffsetInBits,
std::optional<unsigned> DWARFAddressSpace,
std::optional<DIDerivedType::PtrAuthData> PtrAuthData,
unsigned Flags, Metadata *ExtraData, Metadata *Annotations)
std::optional<unsigned> DWARFAddressSpace, unsigned Flags,
Metadata *ExtraData, Metadata *Annotations)
: Tag(Tag), Name(Name), File(File), Line(Line), Scope(Scope),
BaseType(BaseType), SizeInBits(SizeInBits), OffsetInBits(OffsetInBits),
AlignInBits(AlignInBits), DWARFAddressSpace(DWARFAddressSpace),
PtrAuthData(PtrAuthData), Flags(Flags), ExtraData(ExtraData),
Annotations(Annotations) {}
Flags(Flags), ExtraData(ExtraData), Annotations(Annotations) {}
MDNodeKeyImpl(const DIDerivedType *N)
: Tag(N->getTag()), Name(N->getRawName()), File(N->getRawFile()),
Line(N->getLine()), Scope(N->getRawScope()),
BaseType(N->getRawBaseType()), SizeInBits(N->getSizeInBits()),
OffsetInBits(N->getOffsetInBits()), AlignInBits(N->getAlignInBits()),
DWARFAddressSpace(N->getDWARFAddressSpace()),
PtrAuthData(N->getPtrAuthData()), Flags(N->getFlags()),
DWARFAddressSpace(N->getDWARFAddressSpace()), Flags(N->getFlags()),
ExtraData(N->getRawExtraData()), Annotations(N->getRawAnnotations()) {}

bool isKeyOf(const DIDerivedType *RHS) const {
Expand All @@ -572,8 +568,7 @@ template <> struct MDNodeKeyImpl<DIDerivedType> {
AlignInBits == RHS->getAlignInBits() &&
OffsetInBits == RHS->getOffsetInBits() &&
DWARFAddressSpace == RHS->getDWARFAddressSpace() &&
PtrAuthData == RHS->getPtrAuthData() && Flags == RHS->getFlags() &&
ExtraData == RHS->getRawExtraData() &&
Flags == RHS->getFlags() && ExtraData == RHS->getRawExtraData() &&
Annotations == RHS->getRawAnnotations();
}

Expand Down
1 change: 0 additions & 1 deletion llvm/lib/IR/Verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1223,7 +1223,6 @@ void Verifier::visitDIDerivedType(const DIDerivedType &N) {
N.getTag() == dwarf::DW_TAG_volatile_type ||
N.getTag() == dwarf::DW_TAG_restrict_type ||
N.getTag() == dwarf::DW_TAG_atomic_type ||
N.getTag() == dwarf::DW_TAG_LLVM_ptrauth_type ||
N.getTag() == dwarf::DW_TAG_member ||
(N.getTag() == dwarf::DW_TAG_variable && N.isStaticMember()) ||
N.getTag() == dwarf::DW_TAG_inheritance ||
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/LTO/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ add_llvm_component_library(LLVMLTO
intrinsics_gen
llvm_vcsrevision_h

LINK_LIBS
${LLVM_PTHREAD_LIB}

LINK_COMPONENTS
AggressiveInstCombine
Analysis
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Support/LockFileManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@ LockFileManager::LockFileManager(StringRef FileName)
S.append(std::string(UniqueLockFileName));
setError(Out.error(), S);
sys::fs::remove(UniqueLockFileName);
// Don't call report_fatal_error.
Out.clear_error();
return;
}
}
Expand Down
41 changes: 19 additions & 22 deletions llvm/lib/Support/ThreadPool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,13 @@

#include "llvm/Config/llvm-config.h"

#if LLVM_ENABLE_THREADS
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Threading.h"
#else
#include "llvm/Support/raw_ostream.h"
#endif

using namespace llvm;

#if LLVM_ENABLE_THREADS
ThreadPoolInterface::~ThreadPoolInterface() = default;

// A note on thread groups: Tasks are by default in no group (represented
// by nullptr ThreadPoolTaskGroup pointer in the Tasks queue) and functionality
Expand All @@ -33,10 +30,12 @@ using namespace llvm;
// queue, and functions called to work only on tasks from one group take that
// pointer.

ThreadPool::ThreadPool(ThreadPoolStrategy S)
#if LLVM_ENABLE_THREADS

StdThreadPool::StdThreadPool(ThreadPoolStrategy S)
: Strategy(S), MaxThreadCount(S.compute_thread_count()) {}

void ThreadPool::grow(int requested) {
void StdThreadPool::grow(int requested) {
llvm::sys::ScopedWriter LockGuard(ThreadsLock);
if (Threads.size() >= MaxThreadCount)
return; // Already hit the max thread pool size.
Expand All @@ -58,7 +57,7 @@ static LLVM_THREAD_LOCAL std::vector<ThreadPoolTaskGroup *>
#endif

// WaitingForGroup == nullptr means all tasks regardless of their group.
void ThreadPool::processTasks(ThreadPoolTaskGroup *WaitingForGroup) {
void StdThreadPool::processTasks(ThreadPoolTaskGroup *WaitingForGroup) {
while (true) {
std::function<void()> Task;
ThreadPoolTaskGroup *GroupOfTask;
Expand Down Expand Up @@ -111,7 +110,7 @@ void ThreadPool::processTasks(ThreadPoolTaskGroup *WaitingForGroup) {
bool Notify;
bool NotifyGroup;
{
// Adjust `ActiveThreads`, in case someone waits on ThreadPool::wait()
// Adjust `ActiveThreads`, in case someone waits on StdThreadPool::wait()
std::lock_guard<std::mutex> LockGuard(QueueLock);
--ActiveThreads;
if (GroupOfTask != nullptr) {
Expand All @@ -123,7 +122,7 @@ void ThreadPool::processTasks(ThreadPoolTaskGroup *WaitingForGroup) {
NotifyGroup = GroupOfTask != nullptr && Notify;
}
// Notify task completion if this is the last active thread, in case
// someone waits on ThreadPool::wait().
// someone waits on StdThreadPool::wait().
if (Notify)
CompletionCondition.notify_all();
// If this was a task in a group, notify also threads waiting for tasks
Expand All @@ -134,23 +133,23 @@ void ThreadPool::processTasks(ThreadPoolTaskGroup *WaitingForGroup) {
}
}

bool ThreadPool::workCompletedUnlocked(ThreadPoolTaskGroup *Group) const {
bool StdThreadPool::workCompletedUnlocked(ThreadPoolTaskGroup *Group) const {
if (Group == nullptr)
return !ActiveThreads && Tasks.empty();
return ActiveGroups.count(Group) == 0 &&
!llvm::any_of(Tasks,
[Group](const auto &T) { return T.second == Group; });
}

void ThreadPool::wait() {
void StdThreadPool::wait() {
assert(!isWorkerThread()); // Would deadlock waiting for itself.
// Wait for all threads to complete and the queue to be empty
std::unique_lock<std::mutex> LockGuard(QueueLock);
CompletionCondition.wait(LockGuard,
[&] { return workCompletedUnlocked(nullptr); });
}

void ThreadPool::wait(ThreadPoolTaskGroup &Group) {
void StdThreadPool::wait(ThreadPoolTaskGroup &Group) {
// Wait for all threads in the group to complete.
if (!isWorkerThread()) {
std::unique_lock<std::mutex> LockGuard(QueueLock);
Expand All @@ -167,7 +166,7 @@ void ThreadPool::wait(ThreadPoolTaskGroup &Group) {
processTasks(&Group);
}

bool ThreadPool::isWorkerThread() const {
bool StdThreadPool::isWorkerThread() const {
llvm::sys::ScopedReader LockGuard(ThreadsLock);
llvm::thread::id CurrentThreadId = llvm::this_thread::get_id();
for (const llvm::thread &Thread : Threads)
Expand All @@ -177,7 +176,7 @@ bool ThreadPool::isWorkerThread() const {
}

// The destructor joins all threads, waiting for completion.
ThreadPool::~ThreadPool() {
StdThreadPool::~StdThreadPool() {
{
std::unique_lock<std::mutex> LockGuard(QueueLock);
EnableFlag = false;
Expand All @@ -188,18 +187,18 @@ ThreadPool::~ThreadPool() {
Worker.join();
}

#else // LLVM_ENABLE_THREADS Disabled
#endif // LLVM_ENABLE_THREADS Disabled

// No threads are launched, issue a warning if ThreadCount is not 0
ThreadPool::ThreadPool(ThreadPoolStrategy S) : MaxThreadCount(1) {
SingleThreadExecutor::SingleThreadExecutor(ThreadPoolStrategy S) {
int ThreadCount = S.compute_thread_count();
if (ThreadCount != 1) {
errs() << "Warning: request a ThreadPool with " << ThreadCount
<< " threads, but LLVM_ENABLE_THREADS has been turned off\n";
}
}

void ThreadPool::wait() {
void SingleThreadExecutor::wait() {
// Sequential implementation running the tasks
while (!Tasks.empty()) {
auto Task = std::move(Tasks.front().first);
Expand All @@ -208,16 +207,14 @@ void ThreadPool::wait() {
}
}

void ThreadPool::wait(ThreadPoolTaskGroup &) {
void SingleThreadExecutor::wait(ThreadPoolTaskGroup &) {
// Simply wait for all, this works even if recursive (the running task
// is already removed from the queue).
wait();
}

bool ThreadPool::isWorkerThread() const {
bool SingleThreadExecutor::isWorkerThread() const {
report_fatal_error("LLVM compiled without multithreading");
}

ThreadPool::~ThreadPool() { wait(); }

#endif
SingleThreadExecutor::~SingleThreadExecutor() { wait(); }
9 changes: 2 additions & 7 deletions llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -412,20 +412,15 @@ class AArch64IndirectThunks : public MachineFunctionPass {
private:
std::tuple<SLSBLRThunkInserter> TIs;

// FIXME: When LLVM moves to C++17, these can become folds
template <typename... ThunkInserterT>
static void initTIs(Module &M,
std::tuple<ThunkInserterT...> &ThunkInserters) {
(void)std::initializer_list<int>{
(std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...};
(..., std::get<ThunkInserterT>(ThunkInserters).init(M));
}
template <typename... ThunkInserterT>
static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
std::tuple<ThunkInserterT...> &ThunkInserters) {
bool Modified = false;
(void)std::initializer_list<int>{
Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...};
return Modified;
return (0 | ... | std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF));
}
};

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/ARM/ARMInstrNEON.td
Original file line number Diff line number Diff line change
Expand Up @@ -7362,8 +7362,10 @@ let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
}

let Predicates = [HasV8, HasAES] in {
let isCommutable = 1 in {
def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
}
def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
}
Expand Down
9 changes: 2 additions & 7 deletions llvm/lib/Target/ARM/ARMSLSHardening.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,20 +404,15 @@ class ARMIndirectThunks : public MachineFunctionPass {
private:
std::tuple<SLSBLRThunkInserter> TIs;

// FIXME: When LLVM moves to C++17, these can become folds
template <typename... ThunkInserterT>
static void initTIs(Module &M,
std::tuple<ThunkInserterT...> &ThunkInserters) {
(void)std::initializer_list<int>{
(std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...};
(..., std::get<ThunkInserterT>(ThunkInserters).init(M));
}
template <typename... ThunkInserterT>
static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
std::tuple<ThunkInserterT...> &ThunkInserters) {
bool Modified = false;
(void)std::initializer_list<int>{
Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...};
return Modified;
return (0 | ... | std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF));
}
};

Expand Down
6 changes: 0 additions & 6 deletions llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1191,12 +1191,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
continue;

// For SVR4, don't emit a move for the CR spill slot if we haven't
// spilled CRs.
if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
&& !MustSaveCR)
continue;

// For 64-bit SVR4 when we have spilled CRs, the spill location
// is SP+8, not a frame-relative slot.
if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4057,8 +4057,7 @@ static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
MVT InterVT =
MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
DAG.getUNDEF(InterVT), Lo,
DAG.getRegister(RISCV::X0, MVT::i32));
DAG.getUNDEF(InterVT), Lo, NewVL);
return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
}
}
Expand Down
4 changes: 1 addition & 3 deletions llvm/lib/Target/RISCV/RISCVRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -585,9 +585,7 @@ def GPRPair : RegisterClass<"RISCV", [XLenPairFVT], 64, (add
)>;

// The register class is added for inline assembly for vector mask types.
def VM : VReg<VMaskVTs,
(add (sequence "V%u", 8, 31),
(sequence "V%u", 0, 7)), 1>;
def VM : VReg<VMaskVTs, (add VR), 1>;

foreach m = LMULList in {
foreach nf = NFList<m>.L in {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ static cl::opt<unsigned> SLPMaxVF(
InstructionCost
RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
TTI::TargetCostKind CostKind) {
// Check if the type is valid for all CostKind
if (!VT.isVector())
return InstructionCost::getInvalid();
size_t NumInstr = OpCodes.size();
if (CostKind == TTI::TCK_CodeSize)
return NumInstr;
Expand Down
27 changes: 27 additions & 0 deletions llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1315,6 +1315,33 @@ inline bool isKMasked(uint64_t TSFlags) {
inline bool isKMergeMasked(uint64_t TSFlags) {
return isKMasked(TSFlags) && (TSFlags & X86II::EVEX_Z) == 0;
}

/// \returns true if the intruction needs a SIB.
inline bool needSIB(unsigned BaseReg, unsigned IndexReg, bool In64BitMode) {
// The SIB byte must be used if there is an index register.
if (IndexReg)
return true;

// The SIB byte must be used if the base is ESP/RSP/R12/R20/R28, all of
// which encode to an R/M value of 4, which indicates that a SIB byte is
// present.
switch (BaseReg) {
default:
// If there is no base register and we're in 64-bit mode, we need a SIB
// byte to emit an addr that is just 'disp32' (the non-RIP relative form).
return In64BitMode && !BaseReg;
case X86::ESP:
case X86::RSP:
case X86::R12:
case X86::R12D:
case X86::R20:
case X86::R20D:
case X86::R28:
case X86::R28D:
return true;
}
}

} // namespace X86II
} // namespace llvm
#endif
13 changes: 2 additions & 11 deletions llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -753,17 +753,8 @@ void X86MCCodeEmitter::emitMemModRMByte(
bool AllowDisp8 = !UseDisp32;

// Determine whether a SIB byte is needed.
if ( // The SIB byte must be used if there is an index register or the
// encoding requires a SIB byte.
!ForceSIB && IndexReg.getReg() == 0 &&
// The SIB byte must be used if the base is ESP/RSP/R12/R20/R28, all of
// which encode to an R/M value of 4, which indicates that a SIB byte is
// present.
BaseRegNo != N86::ESP &&
// If there is no base register and we're in 64-bit mode, we need a SIB
// byte to emit an addr that is just 'disp32' (the non-RIP relative form).
(!STI.hasFeature(X86::Is64Bit) || BaseReg != 0)) {

if (!ForceSIB && !X86II::needSIB(BaseReg, IndexReg.getReg(),
STI.hasFeature(X86::Is64Bit))) {
if (BaseReg == 0) { // [disp32] in X86-32 mode
emitByte(modRMByte(0, RegOpcodeField, 5), CB);
emitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, StartByte, CB, Fixups);
Expand Down
85 changes: 85 additions & 0 deletions llvm/lib/Target/X86/X86ExpandPseudo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,91 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case X86::CALL64m_RVMARKER:
expandCALL_RVMARKER(MBB, MBBI);
return true;
case X86::ADD32mi_ND:
case X86::ADD64mi32_ND:
case X86::SUB32mi_ND:
case X86::SUB64mi32_ND:
case X86::AND32mi_ND:
case X86::AND64mi32_ND:
case X86::OR32mi_ND:
case X86::OR64mi32_ND:
case X86::XOR32mi_ND:
case X86::XOR64mi32_ND:
case X86::ADC32mi_ND:
case X86::ADC64mi32_ND:
case X86::SBB32mi_ND:
case X86::SBB64mi32_ND: {
// It's possible for an EVEX-encoded legacy instruction to reach the 15-byte
// instruction length limit: 4 bytes of EVEX prefix + 1 byte of opcode + 1
// byte of ModRM + 1 byte of SIB + 4 bytes of displacement + 4 bytes of
// immediate = 15 bytes in total, e.g.
//
// subq $184, %fs:257(%rbx, %rcx), %rax
//
// In such a case, no additional (ADSIZE or segment override) prefix can be
// used. To resolve the issue, we split the “long” instruction into 2
// instructions:
//
// movq %fs:257(%rbx, %rcx),%rax
// subq $184, %rax
//
// Therefore we consider the OPmi_ND to be a pseudo instruction to some
// extent.
const MachineOperand &ImmOp =
MI.getOperand(MI.getNumExplicitOperands() - 1);
// If the immediate is a expr, conservatively estimate 4 bytes.
if (ImmOp.isImm() && isInt<8>(ImmOp.getImm()))
return false;
int MemOpNo = X86::getFirstAddrOperandIdx(MI);
const MachineOperand &DispOp = MI.getOperand(MemOpNo + X86::AddrDisp);
Register Base = MI.getOperand(MemOpNo + X86::AddrBaseReg).getReg();
// If the displacement is a expr, conservatively estimate 4 bytes.
if (Base && DispOp.isImm() && isInt<8>(DispOp.getImm()))
return false;
// There can only be one of three: SIB, segment override register, ADSIZE
Register Index = MI.getOperand(MemOpNo + X86::AddrIndexReg).getReg();
unsigned Count = !!MI.getOperand(MemOpNo + X86::AddrSegmentReg).getReg();
if (X86II::needSIB(Base, Index, /*In64BitMode=*/true))
++Count;
if (X86MCRegisterClasses[X86::GR32RegClassID].contains(Base) ||
X86MCRegisterClasses[X86::GR32RegClassID].contains(Index))
++Count;
if (Count < 2)
return false;
unsigned Opc, LoadOpc;
switch (Opcode) {
#define MI_TO_RI(OP) \
case X86::OP##32mi_ND: \
Opc = X86::OP##32ri; \
LoadOpc = X86::MOV32rm; \
break; \
case X86::OP##64mi32_ND: \
Opc = X86::OP##64ri32; \
LoadOpc = X86::MOV64rm; \
break;

default:
llvm_unreachable("Unexpected Opcode");
MI_TO_RI(ADD);
MI_TO_RI(SUB);
MI_TO_RI(AND);
MI_TO_RI(OR);
MI_TO_RI(XOR);
MI_TO_RI(ADC);
MI_TO_RI(SBB);
#undef MI_TO_RI
}
// Insert OPri.
Register DestReg = MI.getOperand(0).getReg();
BuildMI(MBB, std::next(MBBI), DL, TII->get(Opc), DestReg)
.addReg(DestReg)
.add(ImmOp);
// Change OPmi_ND to MOVrm.
for (unsigned I = MI.getNumImplicitOperands() + 1; I != 0; --I)
MI.removeOperand(MI.getNumOperands() - 1);
MI.setDesc(TII->get(LoadOpc));
return true;
}
}
llvm_unreachable("Previous switch has a fallthrough?");
}
Expand Down
61 changes: 11 additions & 50 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,37 +77,6 @@ static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
"alignment set by x86-experimental-pref-loop-alignment."),
cl::Hidden);

static cl::opt<int> BrMergingBaseCostThresh(
"x86-br-merging-base-cost", cl::init(1),
cl::desc(
"Sets the cost threshold for when multiple conditionals will be merged "
"into one branch versus be split in multiple branches. Merging "
"conditionals saves branches at the cost of additional instructions. "
"This value sets the instruction cost limit, below which conditionals "
"will be merged, and above which conditionals will be split."),
cl::Hidden);

static cl::opt<int> BrMergingLikelyBias(
"x86-br-merging-likely-bias", cl::init(0),
cl::desc("Increases 'x86-br-merging-base-cost' in cases that it is likely "
"that all conditionals will be executed. For example for merging "
"the conditionals (a == b && c > d), if its known that a == b is "
"likely, then it is likely that if the conditionals are split "
"both sides will be executed, so it may be desirable to increase "
"the instruction cost threshold."),
cl::Hidden);

static cl::opt<int> BrMergingUnlikelyBias(
"x86-br-merging-unlikely-bias", cl::init(1),
cl::desc(
"Decreases 'x86-br-merging-base-cost' in cases that it is unlikely "
"that all conditionals will be executed. For example for merging "
"the conditionals (a == b && c > d), if its known that a == b is "
"unlikely, then it is unlikely that if the conditionals are split "
"both sides will be executed, so it may be desirable to decrease "
"the instruction cost threshold."),
cl::Hidden);

static cl::opt<bool> MulConstantOptimization(
"mul-constant-optimization", cl::init(true),
cl::desc("Replace 'mul x, Const' with more effective instructions like "
Expand Down Expand Up @@ -3364,24 +3333,6 @@ unsigned X86TargetLowering::preferedOpcodeForCmpEqPiecesOfOperand(
return ISD::SRL;
}

TargetLoweringBase::CondMergingParams
X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
const Value *Lhs,
const Value *Rhs) const {
using namespace llvm::PatternMatch;
int BaseCost = BrMergingBaseCostThresh.getValue();
// a == b && a == c is a fast pattern on x86.
ICmpInst::Predicate Pred;
if (BaseCost >= 0 && Opc == Instruction::And &&
match(Lhs, m_ICmp(Pred, m_Value(), m_Value())) &&
Pred == ICmpInst::ICMP_EQ &&
match(Rhs, m_ICmp(Pred, m_Value(), m_Value())) &&
Pred == ICmpInst::ICMP_EQ)
BaseCost += 1;
return {BaseCost, BrMergingLikelyBias.getValue(),
BrMergingUnlikelyBias.getValue()};
}

bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const {
return N->getOpcode() != ISD::FP_EXTEND;
}
Expand Down Expand Up @@ -28981,6 +28932,7 @@ static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG,
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
unsigned X86Opc = getTargetVShiftUniformOpcode(Op.getOpcode(), false);
unsigned EltSizeInBits = VT.getScalarSizeInBits();

auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) {
assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type");
Expand Down Expand Up @@ -29027,7 +28979,7 @@ static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG,
return SDValue();

// If the shift amount is out of range, return undef.
if (APIntShiftAmt.uge(VT.getScalarSizeInBits()))
if (APIntShiftAmt.uge(EltSizeInBits))
return DAG.getUNDEF(VT);

uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
Expand Down Expand Up @@ -29055,6 +29007,15 @@ static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG,
Op.getOpcode() == ISD::SRA)
return ArithmeticShiftRight64(ShiftAmt);

// If we're logical shifting an all-signbits value then we can just perform as
// a mask.
if ((Op.getOpcode() == ISD::SHL || Op.getOpcode() == ISD::SRL) &&
DAG.ComputeNumSignBits(R) == EltSizeInBits) {
SDValue Mask = DAG.getAllOnesConstant(dl, VT);
Mask = DAG.getNode(Op.getOpcode(), dl, VT, Mask, Amt);
return DAG.getNode(ISD::AND, dl, VT, R, Mask);
}

if (VT == MVT::v16i8 || (Subtarget.hasInt256() && VT == MVT::v32i8) ||
(Subtarget.hasBWI() && VT == MVT::v64i8)) {
unsigned NumElts = VT.getVectorNumElements();
Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1150,10 +1150,6 @@ namespace llvm {

bool preferScalarizeSplat(SDNode *N) const override;

CondMergingParams
getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
const Value *Rhs) const override;

bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;

Expand Down
9 changes: 2 additions & 7 deletions llvm/lib/Target/X86/X86IndirectThunks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,20 +118,15 @@ class X86IndirectThunks : public MachineFunctionPass {
private:
std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs;

// FIXME: When LLVM moves to C++17, these can become folds
template <typename... ThunkInserterT>
static void initTIs(Module &M,
std::tuple<ThunkInserterT...> &ThunkInserters) {
(void)std::initializer_list<int>{
(std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...};
(..., std::get<ThunkInserterT>(ThunkInserters).init(M));
}
template <typename... ThunkInserterT>
static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
std::tuple<ThunkInserterT...> &ThunkInserters) {
bool Modified = false;
(void)std::initializer_list<int>{
Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...};
return Modified;
return (0 | ... | std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF));
}
};

Expand Down
31 changes: 26 additions & 5 deletions llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -857,6 +857,9 @@ struct DSEState {
// no longer be captured.
bool ShouldIterateEndOfFunctionDSE;

/// Dead instructions to be removed at the end of DSE.
SmallVector<Instruction *> ToRemove;

// Class contains self-reference, make sure it's not copied/moved.
DSEState(const DSEState &) = delete;
DSEState &operator=(const DSEState &) = delete;
Expand Down Expand Up @@ -1692,7 +1695,8 @@ struct DSEState {
return {MaybeDeadAccess};
}

// Delete dead memory defs
/// Delete dead memory defs and recursively add their operands to ToRemove if
/// they became dead.
void deleteDeadInstruction(Instruction *SI) {
MemorySSAUpdater Updater(&MSSA);
SmallVector<Instruction *, 32> NowDeadInsts;
Expand All @@ -1708,8 +1712,11 @@ struct DSEState {
salvageKnowledge(DeadInst);

// Remove the Instruction from MSSA.
if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
bool IsMemDef = MA && isa<MemoryDef>(MA);
if (MA) {
if (IsMemDef) {
auto *MD = cast<MemoryDef>(MA);
SkipStores.insert(MD);
if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
if (SI->getValueOperand()->getType()->isPointerTy()) {
Expand All @@ -1730,13 +1737,21 @@ struct DSEState {
// Remove its operands
for (Use &O : DeadInst->operands())
if (Instruction *OpI = dyn_cast<Instruction>(O)) {
O = nullptr;
O.set(PoisonValue::get(O->getType()));
if (isInstructionTriviallyDead(OpI, &TLI))
NowDeadInsts.push_back(OpI);
}

EI.removeInstruction(DeadInst);
DeadInst->eraseFromParent();
// Remove memory defs directly if they don't produce results, but only
// queue other dead instructions for later removal. They may have been
// used as memory locations that have been cached by BatchAA. Removing
// them here may lead to newly created instructions to be allocated at the
// same address, yielding stale cache entries.
if (IsMemDef && DeadInst->getType()->isVoidTy())
DeadInst->eraseFromParent();
else
ToRemove.push_back(DeadInst);
}
}

Expand Down Expand Up @@ -2287,6 +2302,12 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,

MadeChange |= State.eliminateRedundantStoresOfExistingValues();
MadeChange |= State.eliminateDeadWritesAtEndOfFunction();

while (!State.ToRemove.empty()) {
Instruction *DeadInst = State.ToRemove.pop_back_val();
DeadInst->eraseFromParent();
}

return MadeChange;
}
} // end anonymous namespace
Expand Down
53 changes: 53 additions & 0 deletions llvm/test/Analysis/CostModel/RISCV/vector-cost-without-v.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -mtriple=riscv64 -mattr=+f,+d --passes=loop-unroll-full -S | FileCheck %s

; Check it doesn't crash when the vector extension is not enabled.
define void @foo() {
; CHECK-LABEL: define void @foo(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr null, align 4
; CHECK-NEXT: [[SPLAT_SPLAT_I_I_I:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x i32> zeroinitializer
; CHECK-NEXT: [[CMP1_I_I_I:%.*]] = fcmp ogt <2 x float> zeroinitializer, zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLAT3_I_I_I:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
; CHECK-NEXT: [[XOR3_I_I_I_I_I:%.*]] = select <2 x i1> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr null, align 4
; CHECK-NEXT: [[SPLAT_SPLAT8_I_I_I:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x i32> zeroinitializer
; CHECK-NEXT: [[SUB_I_I_I:%.*]] = fsub <2 x float> zeroinitializer, zeroinitializer
; CHECK-NEXT: [[MUL_I_I_I:%.*]] = shl i64 0, 0
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr null, align 4
; CHECK-NEXT: [[SPLAT_SPLAT_I_I_I_I:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x i32> zeroinitializer
; CHECK-NEXT: [[XOR3_I_I_I_V_I_I:%.*]] = select <2 x i1> zeroinitializer, <2 x float> zeroinitializer, <2 x float> zeroinitializer
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV1]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV1]], 8
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %for.body

for.body: ; preds = %for.body, %entry
%indvars.iv1 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%0 = load float, ptr null, align 4
%splat.splat.i.i.i = shufflevector <2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x i32> zeroinitializer
%cmp1.i.i.i = fcmp ogt <2 x float> zeroinitializer, zeroinitializer
%splat.splat3.i.i.i = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
%xor3.i.i.i.i.i = select <2 x i1> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
%1 = load float, ptr null, align 4
%splat.splat8.i.i.i = shufflevector <2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x i32> zeroinitializer
%sub.i.i.i = fsub <2 x float> zeroinitializer, zeroinitializer
%mul.i.i.i = shl i64 0, 0
%2 = load float, ptr null, align 4
%splat.splat.i.i.i.i = shufflevector <2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x i32> zeroinitializer
%xor3.i.i.i.v.i.i = select <2 x i1> zeroinitializer, <2 x float> zeroinitializer, <2 x float> zeroinitializer
%indvars.iv.next = add i64 %indvars.iv1, 1
%exitcond = icmp ne i64 %indvars.iv1, 8
br i1 %exitcond, label %for.body, label %exit

exit: ; preds = %for.body
ret void
}
16 changes: 2 additions & 14 deletions llvm/test/Assembler/debug-info.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
; RUN: verify-uselistorder %s

; CHECK: !named = !{!0, !0, !1, !2, !3, !4, !5, !6, !7, !8, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43}
!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45, !46}
; CHECK: !named = !{!0, !0, !1, !2, !3, !4, !5, !6, !7, !8, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39}
!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42}

; CHECK: !0 = !DISubrange(count: 3, lowerBound: 0)
; CHECK-NEXT: !1 = !DISubrange(count: 3, lowerBound: 4)
Expand Down Expand Up @@ -99,15 +99,3 @@
; CHECK-NEXT: !39 = !DIBasicType(name: "u64.le", size: 64, align: 1, encoding: DW_ATE_unsigned, flags: DIFlagLittleEndian)
!41 = !DIBasicType(name: "u64.be", size: 64, align: 1, encoding: DW_ATE_unsigned, flags: DIFlagBigEndian)
!42 = !DIBasicType(name: "u64.le", size: 64, align: 1, encoding: DW_ATE_unsigned, flags: DIFlagLittleEndian)

; CHECK: !DIDerivedType(tag: DW_TAG_LLVM_ptrauth_type, baseType: !13, ptrAuthKey: 2, ptrAuthIsAddressDiscriminated: true, ptrAuthExtraDiscriminator: 1234, ptrAuthIsaPointer: false, ptrAuthAuthenticatesNullValues: false)
!43 = !DIDerivedType(tag: DW_TAG_LLVM_ptrauth_type, baseType: !15, ptrAuthKey: 2, ptrAuthIsAddressDiscriminated: true, ptrAuthExtraDiscriminator: 1234)

; CHECK: !DIDerivedType(tag: DW_TAG_LLVM_ptrauth_type, baseType: !13, ptrAuthKey: 2, ptrAuthIsAddressDiscriminated: true, ptrAuthExtraDiscriminator: 1234, ptrAuthIsaPointer: true, ptrAuthAuthenticatesNullValues: false)
!44 = !DIDerivedType(tag: DW_TAG_LLVM_ptrauth_type, baseType: !15, ptrAuthKey: 2, ptrAuthIsAddressDiscriminated: true, ptrAuthExtraDiscriminator: 1234, ptrAuthIsaPointer: true)

; CHECK: !DIDerivedType(tag: DW_TAG_LLVM_ptrauth_type, baseType: !13, ptrAuthKey: 2, ptrAuthIsAddressDiscriminated: true, ptrAuthExtraDiscriminator: 1234, ptrAuthIsaPointer: false, ptrAuthAuthenticatesNullValues: true)
!45 = !DIDerivedType(tag: DW_TAG_LLVM_ptrauth_type, baseType: !15, ptrAuthKey: 2, ptrAuthIsAddressDiscriminated: true, ptrAuthExtraDiscriminator: 1234, ptrAuthAuthenticatesNullValues: true)

; CHECK: !DIDerivedType(tag: DW_TAG_LLVM_ptrauth_type, baseType: !13, ptrAuthKey: 2, ptrAuthIsAddressDiscriminated: true, ptrAuthExtraDiscriminator: 1234, ptrAuthIsaPointer: true, ptrAuthAuthenticatesNullValues: true)
!46 = !DIDerivedType(tag: DW_TAG_LLVM_ptrauth_type, baseType: !15, ptrAuthKey: 2, ptrAuthIsAddressDiscriminated: true, ptrAuthExtraDiscriminator: 1234, ptrAuthIsaPointer: true, ptrAuthAuthenticatesNullValues: true)
72 changes: 40 additions & 32 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir
Original file line number Diff line number Diff line change
Expand Up @@ -426,44 +426,48 @@ body: |
; SI: liveins: $sgpr0_sgpr1
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
; SI-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
;
; VI-LABEL: name: fabs_s64_ss
; VI: liveins: $sgpr0_sgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
; VI-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
;
; GFX9-LABEL: name: fabs_s64_ss
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
;
; GFX10-LABEL: name: fabs_s64_ss
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_FABS %0
Expand Down Expand Up @@ -639,44 +643,48 @@ body: |
; SI: liveins: $sgpr0_sgpr1
; SI-NEXT: {{ $}}
; SI-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0
; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1
; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub1
; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc
; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[DEF]].sub0
; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
;
; VI-LABEL: name: fabs_s64_ss_no_src_constraint
; VI: liveins: $sgpr0_sgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0
; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1
; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub1
; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc
; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[DEF]].sub0
; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
;
; GFX9-LABEL: name: fabs_s64_ss_no_src_constraint
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub1
; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[DEF]].sub0
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
;
; GFX10-LABEL: name: fabs_s64_ss_no_src_constraint
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub1
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[DEF]].sub0
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:sgpr(s64) = IMPLICIT_DEF
%1:sgpr(s64) = G_FABS %0:sgpr(s64)
Expand Down
72 changes: 40 additions & 32 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir
Original file line number Diff line number Diff line change
Expand Up @@ -426,44 +426,48 @@ body: |
; SI: liveins: $sgpr0_sgpr1
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1
; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]]
; SI-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
;
; VI-LABEL: name: fneg_s64_ss
; VI: liveins: $sgpr0_sgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1
; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]]
; VI-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
;
; GFX9-LABEL: name: fneg_s64_ss
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]]
; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
;
; GFX10-LABEL: name: fneg_s64_ss
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]]
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_FNEG %0
Expand Down Expand Up @@ -1023,44 +1027,48 @@ body: |
; SI: liveins: $sgpr0_sgpr1
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1
; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]]
; SI-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
;
; VI-LABEL: name: fneg_fabs_s64_ss
; VI: liveins: $sgpr0_sgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1
; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]]
; VI-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
;
; GFX9-LABEL: name: fneg_fabs_s64_ss
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]]
; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
;
; GFX10-LABEL: name: fneg_fabs_s64_ss
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]]
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_FABS %0
Expand Down
16 changes: 10 additions & 6 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,13 @@ body: |
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 1048576, implicit-def $scc
; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]]
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 31
; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[COPY]]
; GCN-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[S_SEXT_I32_I16_]], [[S_MOV_B32_]], implicit-def dead $scc
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_ASHR_I32_]]
; GCN-NEXT: [[S_SEXT_I32_I16_1:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = S_SEXT_I32_I16 [[COPY]]
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_SEXT_I32_I16_1]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:sgpr(s64) = G_SEXT %1
Expand All @@ -127,9 +130,10 @@ body: |
; GCN-LABEL: name: sext_sgpr_s32_to_sgpr_s64
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY $sgpr0
; GCN-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], 31, implicit-def dead $scc
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_ASHR_I32_]], %subreg.sub1
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_ASHR_I32_]]
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s64) = G_SEXT %0
Expand Down
10 changes: 6 additions & 4 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,12 @@ body: |
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
; GCN-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 1048576, implicit-def $scc
; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]]
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_1]], implicit-def dead $scc
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[S_AND_B32_]]
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:sgpr(s64) = G_ZEXT %1
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/CodeGen/ARM/aes-erratum-fix.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ define arm_aapcs_vfpcc void @aese_via_call1(ptr %0) nounwind {
; CHECK-FIX-NEXT: bl get_input
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aese.8 q0, q8
; CHECK-FIX-NEXT: aesmc.8 q8, q0
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%2 = call arm_aapcs_vfpcc <16 x i8> @get_input()
Expand All @@ -70,8 +70,8 @@ define arm_aapcs_vfpcc void @aese_via_call2(half %0, ptr %1) nounwind {
; CHECK-FIX-NEXT: bl get_inputf16
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aese.8 q0, q8
; CHECK-FIX-NEXT: aesmc.8 q8, q0
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0)
Expand All @@ -91,8 +91,8 @@ define arm_aapcs_vfpcc void @aese_via_call3(float %0, ptr %1) nounwind {
; CHECK-FIX-NEXT: bl get_inputf32
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aese.8 q0, q8
; CHECK-FIX-NEXT: aesmc.8 q8, q0
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0)
Expand Down Expand Up @@ -123,10 +123,10 @@ define arm_aapcs_vfpcc void @aese_once_via_ptr(ptr %0, ptr %1) nounwind {
define arm_aapcs_vfpcc <16 x i8> @aese_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
; CHECK-FIX-LABEL: aese_once_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: aese.8 q1, q0
; CHECK-FIX-NEXT: aesmc.8 q0, q1
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: aese.8 q0, q1
; CHECK-FIX-NEXT: aesmc.8 q0, q0
; CHECK-FIX-NEXT: bx lr
%3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %1, <16 x i8> %0)
%4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
Expand All @@ -142,8 +142,8 @@ define arm_aapcs_vfpcc void @aese_twice_via_ptr(ptr %0, ptr %1) nounwind {
; CHECK-FIX-NEXT: aesmc.8 q8, q9
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r0]
; CHECK-FIX-NEXT: aese.8 q8, q9
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: aese.8 q9, q8
; CHECK-FIX-NEXT: aesmc.8 q8, q9
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
%3 = load <16 x i8>, ptr %1, align 8
Expand Down Expand Up @@ -2202,8 +2202,8 @@ define arm_aapcs_vfpcc void @aesd_via_call1(ptr %0) nounwind {
; CHECK-FIX-NEXT: bl get_input
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aesd.8 q0, q8
; CHECK-FIX-NEXT: aesimc.8 q8, q0
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%2 = call arm_aapcs_vfpcc <16 x i8> @get_input()
Expand All @@ -2223,8 +2223,8 @@ define arm_aapcs_vfpcc void @aesd_via_call2(half %0, ptr %1) nounwind {
; CHECK-FIX-NEXT: bl get_inputf16
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aesd.8 q0, q8
; CHECK-FIX-NEXT: aesimc.8 q8, q0
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0)
Expand All @@ -2244,8 +2244,8 @@ define arm_aapcs_vfpcc void @aesd_via_call3(float %0, ptr %1) nounwind {
; CHECK-FIX-NEXT: bl get_inputf32
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aesd.8 q0, q8
; CHECK-FIX-NEXT: aesimc.8 q8, q0
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0)
Expand Down Expand Up @@ -2276,10 +2276,10 @@ define arm_aapcs_vfpcc void @aesd_once_via_ptr(ptr %0, ptr %1) nounwind {
define arm_aapcs_vfpcc <16 x i8> @aesd_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
; CHECK-FIX-LABEL: aesd_once_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: aesd.8 q1, q0
; CHECK-FIX-NEXT: aesimc.8 q0, q1
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: aesd.8 q0, q1
; CHECK-FIX-NEXT: aesimc.8 q0, q0
; CHECK-FIX-NEXT: bx lr
%3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %1, <16 x i8> %0)
%4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3)
Expand All @@ -2295,8 +2295,8 @@ define arm_aapcs_vfpcc void @aesd_twice_via_ptr(ptr %0, ptr %1) nounwind {
; CHECK-FIX-NEXT: aesimc.8 q8, q9
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r0]
; CHECK-FIX-NEXT: aesd.8 q8, q9
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: aesd.8 q9, q8
; CHECK-FIX-NEXT: aesimc.8 q8, q9
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
%3 = load <16 x i8>, ptr %1, align 8
Expand Down
41 changes: 41 additions & 0 deletions llvm/test/CodeGen/ARM/aes.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc %s -o - -mtriple=armv8-none-eabi -mattr=+aes | FileCheck %s

declare <16 x i8> @llvm.arm.neon.aese(<16 x i8> %d, <16 x i8> %k)
declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %d, <16 x i8> %k)

define arm_aapcs_vfpcc <16 x i8> @aese(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: aese:
; CHECK: @ %bb.0:
; CHECK-NEXT: aese.8 q0, q1
; CHECK-NEXT: bx lr
%r = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %r
}

define arm_aapcs_vfpcc <16 x i8> @aese_c(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: aese_c:
; CHECK: @ %bb.0:
; CHECK-NEXT: aese.8 q0, q1
; CHECK-NEXT: bx lr
%r = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %b, <16 x i8> %a)
ret <16 x i8> %r
}

define arm_aapcs_vfpcc <16 x i8> @aesd(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: aesd:
; CHECK: @ %bb.0:
; CHECK-NEXT: aesd.8 q0, q1
; CHECK-NEXT: bx lr
%r = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %r
}

define arm_aapcs_vfpcc <16 x i8> @aesd_c(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: aesd_c:
; CHECK: @ %bb.0:
; CHECK-NEXT: aesd.8 q0, q1
; CHECK-NEXT: bx lr
%r = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %b, <16 x i8> %a)
ret <16 x i8> %r
}
Loading