5 changes: 0 additions & 5 deletions llvm/cmake/modules/LLVMConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,6 @@ if(LLVM_ENABLE_LIBEDIT)
find_package(LibEdit)
endif()

set(LLVM_ENABLE_TERMINFO @LLVM_ENABLE_TERMINFO@)
if(LLVM_ENABLE_TERMINFO)
find_package(Terminfo)
endif()

set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS@)

set(LLVM_ENABLE_UNWIND_TABLES @LLVM_ENABLE_UNWIND_TABLES@)
Expand Down
1 change: 1 addition & 0 deletions llvm/docs/DirectXUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ User Guide for the DirectX Target

DirectX/DXILArchitecture
DirectX/DXContainer
DirectX/DXILOpTableGenDesign

Introduction
============
Expand Down
4 changes: 4 additions & 0 deletions llvm/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ Changes to LLVM infrastructure
Changes to building LLVM
------------------------

- The ``LLVM_ENABLE_TERMINFO`` flag has been removed. LLVM no longer depends on
terminfo and now always uses the ``TERM`` environment variable for color
support autodetection.

Changes to TableGen
-------------------

Expand Down
3 changes: 0 additions & 3 deletions llvm/include/llvm/Config/config.h.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,6 @@
/* Define to 1 if you have the <sys/types.h> header file. */
#cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H}

/* Define if the setupterm() function is supported this platform. */
#cmakedefine LLVM_ENABLE_TERMINFO ${LLVM_ENABLE_TERMINFO}

/* Define to 1 if you have the <termios.h> header file. */
#cmakedefine HAVE_TERMIOS_H ${HAVE_TERMIOS_H}

Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/ProfileData/InstrProfReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,10 @@ class IndexedMemProfReader {
std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
/// MemProf call stack data on-disk indexed via call stack id.
std::unique_ptr<MemProfCallStackHashTable> MemProfCallStackTable;
/// The starting address of the frame array.
const unsigned char *FrameBase = nullptr;
/// The starting address of the call stack array.
const unsigned char *CallStackBase = nullptr;

Error deserializeV012(const unsigned char *Start, const unsigned char *Ptr,
uint64_t FirstWord, memprof::IndexedVersion Version);
Expand Down
60 changes: 56 additions & 4 deletions llvm/include/llvm/ProfileData/MemProf.h
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,9 @@ struct IndexedMemProfRecord {
// Serializes the memprof records in \p Records to the ostream \p OS based
// on the schema provided in \p Schema.
void serialize(const MemProfSchema &Schema, raw_ostream &OS,
IndexedVersion Version);
IndexedVersion Version,
llvm::DenseMap<memprof::CallStackId, uint32_t>
*MemProfCallStackIndexes = nullptr);

// Deserializes memprof records from the Buffer.
static IndexedMemProfRecord deserialize(const MemProfSchema &Schema,
Expand Down Expand Up @@ -557,11 +559,17 @@ class RecordWriterTrait {
// The MemProf version to use for the serialization.
IndexedVersion Version;

// Mappings from CallStackId to the indexes into the call stack array.
llvm::DenseMap<memprof::CallStackId, uint32_t> *MemProfCallStackIndexes;

public:
// We do not support the default constructor, which does not set Version.
RecordWriterTrait() = delete;
RecordWriterTrait(const MemProfSchema *Schema, IndexedVersion V)
: Schema(Schema), Version(V) {}
RecordWriterTrait(
const MemProfSchema *Schema, IndexedVersion V,
llvm::DenseMap<memprof::CallStackId, uint32_t> *MemProfCallStackIndexes)
: Schema(Schema), Version(V),
MemProfCallStackIndexes(MemProfCallStackIndexes) {}

static hash_value_type ComputeHash(key_type_ref K) { return K; }

Expand All @@ -586,7 +594,7 @@ class RecordWriterTrait {
void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
offset_type /*Unused*/) {
assert(Schema != nullptr && "MemProf schema is not initialized!");
V.serialize(*Schema, Out, Version);
V.serialize(*Schema, Out, Version, MemProfCallStackIndexes);
// Clear the IndexedMemProfRecord which results in clearing/freeing its
// vectors of allocs and callsites. This is owned by the associated on-disk
// hash table, but unused after this point. See also the comment added to
Expand Down Expand Up @@ -835,6 +843,50 @@ template <typename MapTy> struct CallStackIdConverter {
}
};

// A function object that returns a Frame stored at a given index into the Frame
// array in the profile.
struct LinearFrameIdConverter {
const unsigned char *FrameBase;

LinearFrameIdConverter() = delete;
LinearFrameIdConverter(const unsigned char *FrameBase)
: FrameBase(FrameBase) {}

Frame operator()(uint32_t LinearId) {
uint64_t Offset = static_cast<uint64_t>(LinearId) * Frame::serializedSize();
return Frame::deserialize(FrameBase + Offset);
}
};

// A function object that returns a call stack stored at a given index into the
// call stack array in the profile.
struct LinearCallStackIdConverter {
const unsigned char *CallStackBase;
std::function<Frame(uint32_t)> FrameIdToFrame;

LinearCallStackIdConverter() = delete;
LinearCallStackIdConverter(const unsigned char *CallStackBase,
std::function<Frame(uint32_t)> FrameIdToFrame)
: CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {}

llvm::SmallVector<Frame> operator()(uint32_t LinearCSId) {
llvm::SmallVector<Frame> Frames;

const unsigned char *Ptr =
CallStackBase + static_cast<uint64_t>(LinearCSId) * sizeof(uint32_t);
uint32_t NumFrames =
support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
Frames.reserve(NumFrames);
for (; NumFrames; --NumFrames) {
uint32_t Elem =
support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
Frames.push_back(FrameIdToFrame(Elem));
}

return Frames;
}
};

struct IndexedMemProfData {
// A map to hold memprof data per function. The lower 64 bits obtained from
// the md5 hash of the function name is used to index into the map.
Expand Down
36 changes: 19 additions & 17 deletions llvm/lib/ProfileData/InstrProfReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1261,16 +1261,10 @@ Error IndexedMemProfReader::deserializeV012(const unsigned char *Start,
Error IndexedMemProfReader::deserializeV3(const unsigned char *Start,
const unsigned char *Ptr,
memprof::IndexedVersion Version) {
// The value returned from FrameTableGenerator.Emit.
const uint64_t FrameTableOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
// The offset in the stream right before invoking
// CallStackTableGenerator.Emit.
const uint64_t CallStackPayloadOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
// The value returned from CallStackTableGenerator.Emit.
const uint64_t CallStackTableOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
// The offset in the stream right before invoking RecordTableGenerator.Emit.
const uint64_t RecordPayloadOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
Expand All @@ -1284,16 +1278,8 @@ Error IndexedMemProfReader::deserializeV3(const unsigned char *Start,
return SchemaOr.takeError();
Schema = SchemaOr.get();

// Initialize the frame table reader with the payload and bucket offsets.
MemProfFrameTable.reset(MemProfFrameHashTable::Create(
/*Buckets=*/Start + FrameTableOffset,
/*Payload=*/Ptr,
/*Base=*/Start));

MemProfCallStackTable.reset(MemProfCallStackHashTable::Create(
/*Buckets=*/Start + CallStackTableOffset,
/*Payload=*/Start + CallStackPayloadOffset,
/*Base=*/Start));
FrameBase = Ptr;
CallStackBase = Start + CallStackPayloadOffset;

// Now initialize the table reader with a pointer into data buffer.
MemProfRecordTable.reset(MemProfRecordHashTable::Create(
Expand Down Expand Up @@ -1605,6 +1591,16 @@ getMemProfRecordV2(const memprof::IndexedMemProfRecord &IndexedRecord,
return Record;
}

static Expected<memprof::MemProfRecord>
getMemProfRecordV3(const memprof::IndexedMemProfRecord &IndexedRecord,
const unsigned char *FrameBase,
const unsigned char *CallStackBase) {
memprof::LinearFrameIdConverter FrameIdConv(FrameBase);
memprof::LinearCallStackIdConverter CSIdConv(CallStackBase, FrameIdConv);
memprof::MemProfRecord Record = IndexedRecord.toMemProfRecord(CSIdConv);
return Record;
}

Expected<memprof::MemProfRecord>
IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
// TODO: Add memprof specific errors.
Expand All @@ -1626,11 +1622,17 @@ IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
"MemProfCallStackTable must not be available");
return getMemProfRecordV0(IndexedRecord, *MemProfFrameTable);
case memprof::Version2:
case memprof::Version3:
assert(MemProfFrameTable && "MemProfFrameTable must be available");
assert(MemProfCallStackTable && "MemProfCallStackTable must be available");
return getMemProfRecordV2(IndexedRecord, *MemProfFrameTable,
*MemProfCallStackTable);
case memprof::Version3:
assert(!MemProfFrameTable && "MemProfFrameTable must not be available");
assert(!MemProfCallStackTable &&
"MemProfCallStackTable must not be available");
assert(FrameBase && "FrameBase must be available");
assert(CallStackBase && "CallStackBase must be available");
return getMemProfRecordV3(IndexedRecord, FrameBase, CallStackBase);
}

return make_error<InstrProfError>(
Expand Down
90 changes: 77 additions & 13 deletions llvm/lib/ProfileData/InstrProfWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class ProfOStream {

uint64_t tell() { return OS.tell(); }
void write(uint64_t V) { LE.write<uint64_t>(V); }
void write32(uint32_t V) { LE.write<uint32_t>(V); }
void writeByte(uint8_t V) { LE.write<uint8_t>(V); }

// \c patch can only be called when all data is written and flushed.
Expand Down Expand Up @@ -452,8 +453,11 @@ static uint64_t writeMemProfRecords(
ProfOStream &OS,
llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
&MemProfRecordData,
memprof::MemProfSchema *Schema, memprof::IndexedVersion Version) {
memprof::RecordWriterTrait RecordWriter(Schema, Version);
memprof::MemProfSchema *Schema, memprof::IndexedVersion Version,
llvm::DenseMap<memprof::CallStackId, uint32_t> *MemProfCallStackIndexes =
nullptr) {
memprof::RecordWriterTrait RecordWriter(Schema, Version,
MemProfCallStackIndexes);
OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
RecordTableGenerator;
for (auto &[GUID, Record] : MemProfRecordData) {
Expand Down Expand Up @@ -485,6 +489,39 @@ static uint64_t writeMemProfFrames(
return FrameTableGenerator.Emit(OS.OS);
}

// Serialize MemProfFrameData. Return the mapping from FrameIds to their
// indexes within the frame array.
static llvm::DenseMap<memprof::FrameId, uint32_t> writeMemProfFrameArray(
ProfOStream &OS,
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {
// Mappings from FrameIds to array indexes.
llvm::DenseMap<memprof::FrameId, uint32_t> MemProfFrameIndexes;

// Sort the FrameIDs for stability.
std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder;
FrameIdOrder.reserve(MemProfFrameData.size());
for (const auto &[Id, Frame] : MemProfFrameData)
FrameIdOrder.emplace_back(Id, &Frame);
assert(MemProfFrameData.size() == FrameIdOrder.size());
llvm::sort(FrameIdOrder);

// Serialize all frames while creating mappings from linear IDs to FrameIds.
uint64_t Index = 0;
MemProfFrameIndexes.reserve(FrameIdOrder.size());
for (const auto &[Id, F] : FrameIdOrder) {
F->serialize(OS.OS);
MemProfFrameIndexes.insert({Id, Index});
++Index;
}
assert(MemProfFrameData.size() == Index);
assert(MemProfFrameData.size() == MemProfFrameIndexes.size());

// Release the memory of this MapVector as it is no longer needed.
MemProfFrameData.clear();

return MemProfFrameIndexes;
}

static uint64_t writeMemProfCallStacks(
ProfOStream &OS,
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
Expand All @@ -499,6 +536,33 @@ static uint64_t writeMemProfCallStacks(
return CallStackTableGenerator.Emit(OS.OS);
}

static llvm::DenseMap<memprof::CallStackId, uint32_t>
writeMemProfCallStackArray(
ProfOStream &OS,
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
&MemProfCallStackData,
llvm::DenseMap<memprof::FrameId, uint32_t> &MemProfFrameIndexes) {
llvm::DenseMap<memprof::CallStackId, uint32_t> MemProfCallStackIndexes;

MemProfCallStackIndexes.reserve(MemProfCallStackData.size());
uint64_t CallStackBase = OS.tell();
for (const auto &[CSId, CallStack] : MemProfCallStackData) {
uint64_t CallStackIndex = (OS.tell() - CallStackBase) / sizeof(uint32_t);
MemProfCallStackIndexes.insert({CSId, CallStackIndex});
const llvm::SmallVector<memprof::FrameId> CS = CallStack;
OS.write32(CS.size());
for (const auto F : CS) {
assert(MemProfFrameIndexes.contains(F));
OS.write32(MemProfFrameIndexes[F]);
}
}

// Release the memory of this vector as it is no longer needed.
MemProfCallStackData.clear();

return MemProfCallStackIndexes;
}

// Write out MemProf Version0 as follows:
// uint64_t RecordTableOffset = RecordTableGenerator.Emit
// uint64_t FramePayloadOffset = Offset for the frame payload
Expand Down Expand Up @@ -619,9 +683,7 @@ static Error writeMemProfV2(ProfOStream &OS,

// Write out MemProf Version3 as follows:
// uint64_t Version
// uint64_t FrameTableOffset = FrameTableGenerator.Emit
// uint64_t CallStackPayloadOffset = Offset for the call stack payload
// uint64_t CallStackTableOffset = CallStackTableGenerator.Emit
// uint64_t RecordPayloadOffset = Offset for the record payload
// uint64_t RecordTableOffset = RecordTableGenerator.Emit
// uint64_t Num schema entries
Expand All @@ -637,9 +699,7 @@ static Error writeMemProfV3(ProfOStream &OS,
bool MemProfFullSchema) {
OS.write(memprof::Version3);
uint64_t HeaderUpdatePos = OS.tell();
OS.write(0ULL); // Reserve space for the memprof frame table offset.
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
OS.write(0ULL); // Reserve space for the memprof call stack table offset.
OS.write(0ULL); // Reserve space for the memprof record payload offset.
OS.write(0ULL); // Reserve space for the memprof record table offset.

Expand All @@ -648,19 +708,23 @@ static Error writeMemProfV3(ProfOStream &OS,
Schema = memprof::getFullSchema();
writeMemProfSchema(OS, Schema);

uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.FrameData);
llvm::DenseMap<memprof::FrameId, uint32_t> MemProfFrameIndexes =
writeMemProfFrameArray(OS, MemProfData.FrameData);

uint64_t CallStackPayloadOffset = OS.tell();
uint64_t CallStackTableOffset =
writeMemProfCallStacks(OS, MemProfData.CallStackData);
llvm::DenseMap<memprof::CallStackId, uint32_t> MemProfCallStackIndexes =
writeMemProfCallStackArray(OS, MemProfData.CallStackData,
MemProfFrameIndexes);

uint64_t RecordPayloadOffset = OS.tell();
uint64_t RecordTableOffset = writeMemProfRecords(OS, MemProfData.RecordData,
&Schema, memprof::Version3);
uint64_t RecordTableOffset =
writeMemProfRecords(OS, MemProfData.RecordData, &Schema,
memprof::Version3, &MemProfCallStackIndexes);

uint64_t Header[] = {
FrameTableOffset, CallStackPayloadOffset, CallStackTableOffset,
RecordPayloadOffset, RecordTableOffset,
CallStackPayloadOffset,
RecordPayloadOffset,
RecordTableOffset,
};
OS.patch({{HeaderUpdatePos, Header, std::size(Header)}});

Expand Down
32 changes: 29 additions & 3 deletions llvm/lib/ProfileData/MemProf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,17 +143,43 @@ static void serializeV2(const IndexedMemProfRecord &Record,
LE.write<CallStackId>(CSId);
}

void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
raw_ostream &OS, IndexedVersion Version) {
static void
serializeV3(const IndexedMemProfRecord &Record, const MemProfSchema &Schema,
raw_ostream &OS,
llvm::DenseMap<CallStackId, uint32_t> &MemProfCallStackIndexes) {
using namespace support;

endian::Writer LE(OS, llvm::endianness::little);

LE.write<uint64_t>(Record.AllocSites.size());
for (const IndexedAllocationInfo &N : Record.AllocSites) {
assert(MemProfCallStackIndexes.contains(N.CSId));
LE.write<uint64_t>(MemProfCallStackIndexes[N.CSId]);
N.Info.serialize(Schema, OS);
}

// Related contexts.
LE.write<uint64_t>(Record.CallSiteIds.size());
for (const auto &CSId : Record.CallSiteIds) {
assert(MemProfCallStackIndexes.contains(CSId));
LE.write<uint64_t>(MemProfCallStackIndexes[CSId]);
}
}

void IndexedMemProfRecord::serialize(
const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version,
llvm::DenseMap<CallStackId, uint32_t> *MemProfCallStackIndexes) {
switch (Version) {
case Version0:
case Version1:
serializeV0(*this, Schema, OS);
return;
case Version2:
case Version3:
serializeV2(*this, Schema, OS);
return;
case Version3:
serializeV3(*this, Schema, OS, *MemProfCallStackIndexes);
return;
}
llvm_unreachable("unsupported MemProf version");
}
Expand Down
11 changes: 0 additions & 11 deletions llvm/lib/Support/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,6 @@ elseif( CMAKE_HOST_UNIX )
STRING(REGEX REPLACE "^lib" "" Backtrace_LIBFILE ${Backtrace_LIBFILE})
set(system_libs ${system_libs} ${Backtrace_LIBFILE})
endif()
if( LLVM_ENABLE_TERMINFO )
set(imported_libs ${imported_libs} Terminfo::terminfo)
endif()
set(system_libs ${system_libs} ${LLVM_ATOMIC_LIB})
set(system_libs ${system_libs} ${LLVM_PTHREAD_LIB})
if( UNIX AND NOT (BEOS OR HAIKU) )
Expand Down Expand Up @@ -325,14 +322,6 @@ if(LLVM_ENABLE_ZSTD)
set(llvm_system_libs ${llvm_system_libs} "${zstd_library}")
endif()

if(LLVM_ENABLE_TERMINFO)
if(NOT terminfo_library)
get_property(terminfo_library TARGET Terminfo::terminfo PROPERTY LOCATION)
endif()
get_library_name(${terminfo_library} terminfo_library)
set(llvm_system_libs ${llvm_system_libs} "${terminfo_library}")
endif()

set_property(TARGET LLVMSupport PROPERTY LLVM_SYSTEM_LIBS "${llvm_system_libs}")


Expand Down
60 changes: 4 additions & 56 deletions llvm/lib/Support/Unix/Process.inc
Original file line number Diff line number Diff line change
Expand Up @@ -341,17 +341,9 @@ unsigned Process::StandardErrColumns() {
return getColumns();
}

#ifdef LLVM_ENABLE_TERMINFO
// We manually declare these extern functions because finding the correct
// headers from various terminfo, curses, or other sources is harder than
// writing their specs down.
extern "C" int setupterm(char *term, int filedes, int *errret);
extern "C" struct term *set_curterm(struct term *termp);
extern "C" int del_curterm(struct term *termp);
extern "C" int tigetnum(char *capname);
#endif

bool checkTerminalEnvironmentForColors() {
static bool terminalHasColors() {
// Check if the current terminal is one of terminals that are known to support
// ANSI color escape codes.
if (const char *TermStr = std::getenv("TERM")) {
return StringSwitch<bool>(TermStr)
.Case("ansi", true)
Expand All @@ -368,54 +360,10 @@ bool checkTerminalEnvironmentForColors() {
return false;
}

static bool terminalHasColors(int fd) {
#ifdef LLVM_ENABLE_TERMINFO
// First, acquire a global lock because these C routines are thread hostile.
static std::mutex TermColorMutex;
std::lock_guard<std::mutex> G(TermColorMutex);

struct term *previous_term = set_curterm(nullptr);
int errret = 0;
if (setupterm(nullptr, fd, &errret) != 0)
// Regardless of why, if we can't get terminfo, we shouldn't try to print
// colors.
return false;

// Test whether the terminal as set up supports color output. How to do this
// isn't entirely obvious. We can use the curses routine 'has_colors' but it
// would be nice to avoid a dependency on curses proper when we can make do
// with a minimal terminfo parsing library. Also, we don't really care whether
// the terminal supports the curses-specific color changing routines, merely
// if it will interpret ANSI color escape codes in a reasonable way. Thus, the
// strategy here is just to query the baseline colors capability and if it
// supports colors at all to assume it will translate the escape codes into
// whatever range of colors it does support. We can add more detailed tests
// here if users report them as necessary.
//
// The 'tigetnum' routine returns -2 or -1 on errors, and might return 0 if
// the terminfo says that no colors are supported.
int colors_ti = tigetnum(const_cast<char *>("colors"));
bool HasColors =
colors_ti >= 0 ? colors_ti : checkTerminalEnvironmentForColors();

// Now extract the structure allocated by setupterm and free its memory
// through a really silly dance.
struct term *termp = set_curterm(previous_term);
(void)del_curterm(termp); // Drop any errors here.

// Return true if we found a color capabilities for the current terminal.
return HasColors;
#else
// When the terminfo database is not available, check if the current terminal
// is one of terminals that are known to support ANSI color escape codes.
return checkTerminalEnvironmentForColors();
#endif
}

bool Process::FileDescriptorHasColors(int fd) {
// A file descriptor has colors if it is displayed and the terminal has
// colors.
return FileDescriptorIsDisplayed(fd) && terminalHasColors(fd);
return FileDescriptorIsDisplayed(fd) && terminalHasColors();
}

bool Process::StandardOutHasColors() {
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -2972,7 +2972,9 @@ class VPIRBasicBlock : public VPBasicBlock {

public:
VPIRBasicBlock(BasicBlock *IRBB)
: VPBasicBlock(VPIRBasicBlockSC, "ph"), IRBB(IRBB) {}
: VPBasicBlock(VPIRBasicBlockSC,
(Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
IRBB(IRBB) {}

~VPIRBasicBlock() override {}

Expand Down
64 changes: 33 additions & 31 deletions llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-2 %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=8 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-8 %s

define amdgpu_kernel void @ptr1_i8_kernel_preload_arg(ptr addrspace(1) %out, i8 %arg0) {
define amdgpu_kernel void @ptr1_i8_kernel_preload_arg(ptr addrspace(1) %out, i8 %arg0) #0 {
; GFX940-NO-PRELOAD-LABEL: ptr1_i8_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
Expand Down Expand Up @@ -74,7 +74,7 @@ define amdgpu_kernel void @ptr1_i8_kernel_preload_arg(ptr addrspace(1) %out, i8
ret void
}

define amdgpu_kernel void @ptr1_i8_zext_kernel_preload_arg(ptr addrspace(1) %out, i8 zeroext %arg0) {
define amdgpu_kernel void @ptr1_i8_zext_kernel_preload_arg(ptr addrspace(1) %out, i8 zeroext %arg0) #0 {
; GFX940-NO-PRELOAD-LABEL: ptr1_i8_zext_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
Expand Down Expand Up @@ -145,7 +145,7 @@ define amdgpu_kernel void @ptr1_i8_zext_kernel_preload_arg(ptr addrspace(1) %out
ret void
}

define amdgpu_kernel void @ptr1_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %arg0) {
define amdgpu_kernel void @ptr1_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %arg0) #0 {
; GFX940-NO-PRELOAD-LABEL: ptr1_i16_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
Expand Down Expand Up @@ -212,7 +212,7 @@ define amdgpu_kernel void @ptr1_i16_kernel_preload_arg(ptr addrspace(1) %out, i1
ret void
}

define amdgpu_kernel void @ptr1_i32_kernel_preload_arg(ptr addrspace(1) %out, i32 %arg0) {
define amdgpu_kernel void @ptr1_i32_kernel_preload_arg(ptr addrspace(1) %out, i32 %arg0) #0 {
; GFX940-NO-PRELOAD-LABEL: ptr1_i32_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
Expand Down Expand Up @@ -273,7 +273,7 @@ define amdgpu_kernel void @ptr1_i32_kernel_preload_arg(ptr addrspace(1) %out, i3
}


define amdgpu_kernel void @i32_ptr1_i32_kernel_preload_arg(i32 %arg0, ptr addrspace(1) %out, i32 %arg1) {
define amdgpu_kernel void @i32_ptr1_i32_kernel_preload_arg(i32 %arg0, ptr addrspace(1) %out, i32 %arg1) #0 {
; GFX940-NO-PRELOAD-LABEL: i32_ptr1_i32_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x10
Expand Down Expand Up @@ -346,7 +346,7 @@ define amdgpu_kernel void @i32_ptr1_i32_kernel_preload_arg(i32 %arg0, ptr addrsp
ret void
}

define amdgpu_kernel void @ptr1_i16_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %arg0, i16 %arg1) {
define amdgpu_kernel void @ptr1_i16_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %arg0, i16 %arg1) #0 {
; GFX940-NO-PRELOAD-LABEL: ptr1_i16_i16_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
Expand Down Expand Up @@ -431,7 +431,7 @@ define amdgpu_kernel void @ptr1_i16_i16_kernel_preload_arg(ptr addrspace(1) %out
ret void
}

define amdgpu_kernel void @ptr1_v2i8_kernel_preload_arg(ptr addrspace(1) %out, <2 x i8> %in) {
define amdgpu_kernel void @ptr1_v2i8_kernel_preload_arg(ptr addrspace(1) %out, <2 x i8> %in) #0 {
; GFX940-NO-PRELOAD-LABEL: ptr1_v2i8_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
Expand Down Expand Up @@ -500,7 +500,7 @@ define amdgpu_kernel void @ptr1_v2i8_kernel_preload_arg(ptr addrspace(1) %out, <
}


define amdgpu_kernel void @byref_kernel_preload_arg(ptr addrspace(1) %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) {
define amdgpu_kernel void @byref_kernel_preload_arg(ptr addrspace(1) %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) #0 {
; GFX940-NO-PRELOAD-LABEL: byref_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x100
Expand Down Expand Up @@ -595,7 +595,7 @@ define amdgpu_kernel void @byref_kernel_preload_arg(ptr addrspace(1) %out, ptr a
}


define amdgpu_kernel void @v8i32_kernel_preload_arg(ptr addrspace(1) nocapture %out, <8 x i32> %in) nounwind {
define amdgpu_kernel void @v8i32_kernel_preload_arg(ptr addrspace(1) nocapture %out, <8 x i32> %in) #0 {
; GFX940-NO-PRELOAD-LABEL: v8i32_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20
Expand Down Expand Up @@ -717,7 +717,7 @@ define amdgpu_kernel void @v8i32_kernel_preload_arg(ptr addrspace(1) nocapture %
ret void
}

define amdgpu_kernel void @v3i16_kernel_preload_arg(ptr addrspace(1) nocapture %out, <3 x i16> %in) nounwind {
define amdgpu_kernel void @v3i16_kernel_preload_arg(ptr addrspace(1) nocapture %out, <3 x i16> %in) #0 {
; GFX940-NO-PRELOAD-LABEL: v3i16_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
Expand Down Expand Up @@ -787,7 +787,7 @@ define amdgpu_kernel void @v3i16_kernel_preload_arg(ptr addrspace(1) nocapture %
ret void
}

define amdgpu_kernel void @v3i32_kernel_preload_arg(ptr addrspace(1) nocapture %out, <3 x i32> %in) nounwind {
define amdgpu_kernel void @v3i32_kernel_preload_arg(ptr addrspace(1) nocapture %out, <3 x i32> %in) #0 {
; GFX940-NO-PRELOAD-LABEL: v3i32_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
Expand Down Expand Up @@ -859,7 +859,7 @@ define amdgpu_kernel void @v3i32_kernel_preload_arg(ptr addrspace(1) nocapture %
ret void
}

define amdgpu_kernel void @v3f32_kernel_preload_arg(ptr addrspace(1) nocapture %out, <3 x float> %in) nounwind {
define amdgpu_kernel void @v3f32_kernel_preload_arg(ptr addrspace(1) nocapture %out, <3 x float> %in) #0 {
; GFX940-NO-PRELOAD-LABEL: v3f32_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
Expand Down Expand Up @@ -931,7 +931,7 @@ define amdgpu_kernel void @v3f32_kernel_preload_arg(ptr addrspace(1) nocapture %
ret void
}

define amdgpu_kernel void @v5i8_kernel_preload_arg(ptr addrspace(1) nocapture %out, <5 x i8> %in) nounwind {
define amdgpu_kernel void @v5i8_kernel_preload_arg(ptr addrspace(1) nocapture %out, <5 x i8> %in) #0 {
; GFX940-NO-PRELOAD-LABEL: v5i8_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
Expand Down Expand Up @@ -1029,7 +1029,7 @@ define amdgpu_kernel void @v5i8_kernel_preload_arg(ptr addrspace(1) nocapture %o
ret void
}

define amdgpu_kernel void @v5f64_kernel_preload_arg(ptr addrspace(1) nocapture %out, <5 x double> %in) nounwind {
define amdgpu_kernel void @v5f64_kernel_preload_arg(ptr addrspace(1) nocapture %out, <5 x double> %in) #0 {
; GFX940-NO-PRELOAD-LABEL: v5f64_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x60
Expand Down Expand Up @@ -1169,7 +1169,7 @@ define amdgpu_kernel void @v5f64_kernel_preload_arg(ptr addrspace(1) nocapture %
ret void
}

define amdgpu_kernel void @v8i8_kernel_preload_arg(ptr addrspace(1) %out, <8 x i8> %in) {
define amdgpu_kernel void @v8i8_kernel_preload_arg(ptr addrspace(1) %out, <8 x i8> %in) #0 {
; GFX940-NO-PRELOAD-LABEL: v8i8_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
Expand Down Expand Up @@ -1289,7 +1289,7 @@ define amdgpu_kernel void @v8i8_kernel_preload_arg(ptr addrspace(1) %out, <8 x i
ret void
}

define amdgpu_kernel void @i64_kernel_preload_arg(ptr addrspace(1) %out, i64 %a) {
define amdgpu_kernel void @i64_kernel_preload_arg(ptr addrspace(1) %out, i64 %a) #0 {
; GFX940-NO-PRELOAD-LABEL: i64_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
Expand Down Expand Up @@ -1349,7 +1349,7 @@ define amdgpu_kernel void @i64_kernel_preload_arg(ptr addrspace(1) %out, i64 %a)
ret void
}

define amdgpu_kernel void @f64_kernel_preload_arg(ptr addrspace(1) %out, double %in) {
define amdgpu_kernel void @f64_kernel_preload_arg(ptr addrspace(1) %out, double %in) #0 {
; GFX940-NO-PRELOAD-LABEL: f64_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
Expand Down Expand Up @@ -1409,7 +1409,7 @@ define amdgpu_kernel void @f64_kernel_preload_arg(ptr addrspace(1) %out, double
ret void
}

define amdgpu_kernel void @half_kernel_preload_arg(ptr addrspace(1) %out, half %in) {
define amdgpu_kernel void @half_kernel_preload_arg(ptr addrspace(1) %out, half %in) #0 {
; GFX940-NO-PRELOAD-LABEL: half_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
Expand Down Expand Up @@ -1469,7 +1469,7 @@ define amdgpu_kernel void @half_kernel_preload_arg(ptr addrspace(1) %out, half %
ret void
}

define amdgpu_kernel void @bfloat_kernel_preload_arg(ptr addrspace(1) %out, bfloat %in) {
define amdgpu_kernel void @bfloat_kernel_preload_arg(ptr addrspace(1) %out, bfloat %in) #0 {
; GFX940-NO-PRELOAD-LABEL: bfloat_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
Expand Down Expand Up @@ -1529,7 +1529,7 @@ define amdgpu_kernel void @bfloat_kernel_preload_arg(ptr addrspace(1) %out, bflo
ret void
}

define amdgpu_kernel void @v2bfloat_kernel_preload_arg(ptr addrspace(1) %out, <2 x bfloat> %in) {
define amdgpu_kernel void @v2bfloat_kernel_preload_arg(ptr addrspace(1) %out, <2 x bfloat> %in) #0 {
; GFX940-NO-PRELOAD-LABEL: v2bfloat_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
Expand Down Expand Up @@ -1589,7 +1589,7 @@ define amdgpu_kernel void @v2bfloat_kernel_preload_arg(ptr addrspace(1) %out, <2
ret void
}

define amdgpu_kernel void @v3bfloat_kernel_preload_arg(ptr addrspace(1) %out, <3 x bfloat> %in) {
define amdgpu_kernel void @v3bfloat_kernel_preload_arg(ptr addrspace(1) %out, <3 x bfloat> %in) #0 {
; GFX940-NO-PRELOAD-LABEL: v3bfloat_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
Expand Down Expand Up @@ -1659,7 +1659,7 @@ define amdgpu_kernel void @v3bfloat_kernel_preload_arg(ptr addrspace(1) %out, <3
ret void
}

define amdgpu_kernel void @v6bfloat_kernel_preload_arg(ptr addrspace(1) %out, <6 x bfloat> %in) {
define amdgpu_kernel void @v6bfloat_kernel_preload_arg(ptr addrspace(1) %out, <6 x bfloat> %in) #0 {
; GFX940-NO-PRELOAD-LABEL: v6bfloat_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
Expand Down Expand Up @@ -1731,7 +1731,7 @@ define amdgpu_kernel void @v6bfloat_kernel_preload_arg(ptr addrspace(1) %out, <6
ret void
}

define amdgpu_kernel void @half_v7bfloat_kernel_preload_arg(ptr addrspace(1) %out, half %in, <7 x bfloat> %in2, ptr addrspace(1) %out2) {
define amdgpu_kernel void @half_v7bfloat_kernel_preload_arg(ptr addrspace(1) %out, half %in, <7 x bfloat> %in2, ptr addrspace(1) %out2) #0 {
; GFX940-NO-PRELOAD-LABEL: half_v7bfloat_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s10, s[0:1], 0x8
Expand Down Expand Up @@ -1840,7 +1840,7 @@ define amdgpu_kernel void @half_v7bfloat_kernel_preload_arg(ptr addrspace(1) %ou
ret void
}

define amdgpu_kernel void @i1_kernel_preload_arg(ptr addrspace(1) %out, i1 %in) {
define amdgpu_kernel void @i1_kernel_preload_arg(ptr addrspace(1) %out, i1 %in) #0 {
; GFX940-NO-PRELOAD-LABEL: i1_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
Expand Down Expand Up @@ -1906,7 +1906,7 @@ define amdgpu_kernel void @i1_kernel_preload_arg(ptr addrspace(1) %out, i1 %in)
ret void
}

define amdgpu_kernel void @fp128_kernel_preload_arg(ptr addrspace(1) %out, fp128 %in) {
define amdgpu_kernel void @fp128_kernel_preload_arg(ptr addrspace(1) %out, fp128 %in) #0 {
; GFX940-NO-PRELOAD-LABEL: fp128_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
Expand Down Expand Up @@ -1980,7 +1980,7 @@ define amdgpu_kernel void @fp128_kernel_preload_arg(ptr addrspace(1) %out, fp128
ret void
}

define amdgpu_kernel void @v7i8_kernel_preload_arg(ptr addrspace(1) %out, <7 x i8> %in) {
define amdgpu_kernel void @v7i8_kernel_preload_arg(ptr addrspace(1) %out, <7 x i8> %in) #0 {
; GFX940-NO-PRELOAD-LABEL: v7i8_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
Expand Down Expand Up @@ -2096,7 +2096,7 @@ define amdgpu_kernel void @v7i8_kernel_preload_arg(ptr addrspace(1) %out, <7 x i
ret void
}

define amdgpu_kernel void @v7half_kernel_preload_arg(ptr addrspace(1) %out, <7 x half> %in) {
define amdgpu_kernel void @v7half_kernel_preload_arg(ptr addrspace(1) %out, <7 x half> %in) #0 {
; GFX940-NO-PRELOAD-LABEL: v7half_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
Expand Down Expand Up @@ -2181,7 +2181,7 @@ define amdgpu_kernel void @v7half_kernel_preload_arg(ptr addrspace(1) %out, <7 x
}

; Test when previous argument was not dword aligned.
define amdgpu_kernel void @i16_i32_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, i32 %in2, ptr addrspace(1) %out2) {
define amdgpu_kernel void @i16_i32_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, i32 %in2, ptr addrspace(1) %out2) #0 {
; GFX940-NO-PRELOAD-LABEL: i16_i32_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
Expand Down Expand Up @@ -2260,7 +2260,7 @@ define amdgpu_kernel void @i16_i32_kernel_preload_arg(ptr addrspace(1) %out, i16
ret void
}

define amdgpu_kernel void @i16_v3i32_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, <3 x i32> %in2, ptr addrspace(1) %out2) {
define amdgpu_kernel void @i16_v3i32_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, <3 x i32> %in2, ptr addrspace(1) %out2) #0 {
; GFX940-NO-PRELOAD-LABEL: i16_v3i32_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
Expand Down Expand Up @@ -2359,7 +2359,7 @@ define amdgpu_kernel void @i16_v3i32_kernel_preload_arg(ptr addrspace(1) %out, i
ret void
}

define amdgpu_kernel void @i16_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, i16 %in2, ptr addrspace(1) %out2) {
define amdgpu_kernel void @i16_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, i16 %in2, ptr addrspace(1) %out2) #0 {
; GFX940-NO-PRELOAD-LABEL: i16_i16_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s6, s[0:1], 0x8
Expand Down Expand Up @@ -2436,7 +2436,7 @@ define amdgpu_kernel void @i16_i16_kernel_preload_arg(ptr addrspace(1) %out, i16
ret void
}

define amdgpu_kernel void @i16_v2i8_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, <2 x i8> %in2, ptr addrspace(1) %out2) {
define amdgpu_kernel void @i16_v2i8_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, <2 x i8> %in2, ptr addrspace(1) %out2) #0 {
; GFX940-NO-PRELOAD-LABEL: i16_v2i8_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s6, s[0:1], 0x8
Expand Down Expand Up @@ -2520,3 +2520,5 @@ define amdgpu_kernel void @i16_v2i8_kernel_preload_arg(ptr addrspace(1) %out, i1
store <2 x i8> %in2, ptr addrspace(1) %out2
ret void
}

attributes #0 = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/apx/asm-constraint-2-jR.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: not llc -mtriple=x86_64 %s 2>&1 | FileCheck %s --check-prefix=ERR
; RUN: not llc -mtriple=x86_64 %s -o %t 2>&1 | FileCheck %s --check-prefix=ERR
; RUN: llc -mtriple=x86_64 -mattr=+egpr < %s | FileCheck %s
; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s
; RUN: not llc -mtriple=x86_64 -mattr=+inline-asm-use-gpr32 %s 2>&1 | FileCheck %s --check-prefix=ERR
; RUN: not llc -mtriple=x86_64 -mattr=+inline-asm-use-gpr32 %s -o %t 2>&1 | FileCheck %s --check-prefix=ERR

; ERR: error: inline assembly requires more registers than available

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ target triple = "aarch64-unknown-linux-gnu"
; VPLANS-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
; VPLANS-NEXT: vp<[[TC:%[0-9]+]]> = original trip-count
; VPLANS-EMPTY:
; VPLANS-NEXT: ph:
; VPLANS-NEXT: ir-bb<entry>:
; VPLANS-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 umax %n)
; VPLANS-NEXT: No successors
; VPLANS-EMPTY:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
; CHECK: ph:
; CHECK: ir-bb<for.body.preheader>:
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64)
; CHECK-NEXT: No successors
; CHECK: vector.ph:
Expand Down Expand Up @@ -195,7 +195,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
; CHECK: ph:
; CHECK: ir-bb<for.body.preheader>:
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64)
; CHECK-NEXT: No successors
; CHECK: vector.ph:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ph:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 smax (1 + (sext i8 %y to i32))<nsw>)
; CHECK-NEXT: No successors
; CHECK-EMPTY:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
; DBG-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; DBG-NEXT: vp<[[TC:%.+]]> = original trip-count
; DBG-EMPTY:
; DBG-NEXT: ph:
; DBG-NEXT: ir-bb<entry>:
; DBG-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1000 + (-1 * %start))
; DBG-NEXT: No successors
; DBG-EMPTY:
Expand Down Expand Up @@ -179,7 +179,7 @@ exit:
; DBG-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
; DBG-NEXT: vp<[[TC:%.+]]> = original trip-count
; DBG-EMPTY:
; DBG-NEXT: ph:
; DBG-NEXT: ir-bb<entry>:
; DBG-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 (1 smax %n) to i64)
; DBG-NEXT: No successors
; DBG-EMPTY:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw
; CHECK-NEXT: edge [fontname=Courier, fontsize=30]
; CHECK-NEXT: compound=true
; CHECK-NEXT: N0 [label =
; CHECK-NEXT: "ph:\l" +
; CHECK-NEXT: "ir-bb\<for.body.preheader\>:\l" +
; CHECK-NEXT: "No successors\l"
; CHECK-NEXT: ]
; CHECK-NEXT: N1 [label =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ph:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i4 (trunc i64 %N to i4) to i64)
; CHECK-NEXT: No successors
; CHECK-EMPTY:
Expand Down Expand Up @@ -45,7 +45,7 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ph:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i4 (trunc i64 %N to i4) to i64)
; CHECK-NEXT: No successors
; CHECK-EMPTY:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/vplan-printing.ll
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ph:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 smax %n)
; CHECK-NEXT: No successors
; CHECK-EMPTY:
Expand Down Expand Up @@ -507,7 +507,7 @@ define void @print_expand_scev(i64 %y, ptr %ptr) {
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ph:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + ((15 + (%y /u 492802768830814060))<nuw><nsw> /u (1 + (%y /u 492802768830814060))<nuw><nsw>))<nuw><nsw>
; CHECK-NEXT: EMIT vp<[[EXP_SCEV:%.+]]> = EXPAND SCEV (1 + (%y /u 492802768830814060))<nuw><nsw>
; CHECK-NEXT: No successors
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ph:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k))<nuw><nsw>
; CHECK-NEXT: No successors
; CHECK-EMPTY:
Expand Down Expand Up @@ -85,7 +85,7 @@ exit:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ph:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k))<nuw><nsw>
; CHECK-NEXT: No successors
; CHECK-EMPTY:
Expand Down Expand Up @@ -169,7 +169,7 @@ exit:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ph:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k))<nuw><nsw>
; CHECK-NEXT: No successors
; CHECK-EMPTY:
Expand Down Expand Up @@ -326,7 +326,7 @@ define void @pred_cfg1(i32 %k, i32 %j) {
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ph:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k))<nuw><nsw>
; CHECK-NEXT: No successors
; CHECK-EMPTY:
Expand Down Expand Up @@ -423,7 +423,7 @@ define void @pred_cfg2(i32 %k, i32 %j) {
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ph:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k))<nuw><nsw>
; CHECK-NEXT: No successors
; CHECK-EMPTY:
Expand Down Expand Up @@ -529,7 +529,7 @@ define void @pred_cfg3(i32 %k, i32 %j) {
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ph:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k))<nuw><nsw>
; CHECK-NEXT: No successors
; CHECK-EMPTY:
Expand Down Expand Up @@ -635,7 +635,7 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) {
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ph:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k))<nuw><nsw>
; CHECK-NEXT: No successors
; CHECK-EMPTY:
Expand Down Expand Up @@ -740,7 +740,7 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) {
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ph:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k))<nuw><nsw>
; CHECK-NEXT: No successors
; CHECK-EMPTY:
Expand Down Expand Up @@ -807,7 +807,7 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) {
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ph:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k))<nuw><nsw>
; CHECK-NEXT: No successors
; CHECK-EMPTY:
Expand Down Expand Up @@ -1094,7 +1094,7 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ph:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV ((-1 * (ptrtoint ptr %end to i64)) + (ptrtoint ptr %start to i64))
; CHECK-NEXT: No successors
; CHECK-EMPTY:
Expand Down
2 changes: 1 addition & 1 deletion llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ node [shape=rect, fontname=Courier, fontsize=30]
edge [fontname=Courier, fontsize=30]
compound=true
N0 [label =
"ph:\l" +
"ir-bb\<entry\>:\l" +
" EMIT vp\<%1\> = EXPAND SCEV (-1 + %N)\l" +
"No successors\l"
]
Expand Down
2 changes: 1 addition & 1 deletion llvm/utils/gn/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ configure is used for three classes of feature checks:

For the last two points, it would be nice if LLVM didn't have a single
``config.h`` header, but one header per toggle. That way, when e.g.
``llvm_enable_terminfo`` is toggled, only the 3 files caring about that setting
``llvm_enable_zlib`` is toggled, only the 3 files caring about that setting
would need to be rebuilt, instead of everything including ``config.h``.

GN doesn't believe in users setting arbitrary cflags from an environment
Expand Down
12 changes: 0 additions & 12 deletions llvm/utils/gn/build/libs/terminfo/BUILD.gn

This file was deleted.

4 changes: 0 additions & 4 deletions llvm/utils/gn/build/libs/terminfo/enable.gni

This file was deleted.

7 changes: 0 additions & 7 deletions llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import("//llvm/utils/gn/build/buildflags.gni")
import("//llvm/utils/gn/build/libs/curl/enable.gni")
import("//llvm/utils/gn/build/libs/edit/enable.gni")
import("//llvm/utils/gn/build/libs/pthread/enable.gni")
import("//llvm/utils/gn/build/libs/terminfo/enable.gni")
import("//llvm/utils/gn/build/libs/xar/enable.gni")
import("//llvm/utils/gn/build/libs/xml/enable.gni")
import("//llvm/utils/gn/build/libs/zlib/enable.gni")
Expand Down Expand Up @@ -294,12 +293,6 @@ write_cmake_config("config") {
values += [ "HAVE_LIBEDIT=" ]
}

if (llvm_enable_terminfo) {
values += [ "LLVM_ENABLE_TERMINFO=1" ]
} else {
values += [ "LLVM_ENABLE_TERMINFO=" ]
}

if (llvm_enable_libxml2) {
values += [ "LLVM_ENABLE_LIBXML2=1" ]
} else {
Expand Down
1 change: 0 additions & 1 deletion llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ static_library("Support") {
"//llvm/include/llvm/Support:write_vcsrevision",
"//llvm/lib/Demangle",
"//llvm/utils/gn/build/libs/pthread",
"//llvm/utils/gn/build/libs/terminfo",
"//llvm/utils/gn/build/libs/zlib",
]

Expand Down
6 changes: 1 addition & 5 deletions llvm/utils/gn/secondary/llvm/tools/llvm-config/BUILD.gn
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import("//llvm/lib/Target/targets_string.gni")
import("//llvm/utils/gn/build/buildflags.gni")
import("//llvm/utils/gn/build/libs/pthread/enable.gni")
import("//llvm/utils/gn/build/libs/terminfo/enable.gni")
import("//llvm/utils/gn/build/libs/xml/enable.gni")
import("//llvm/utils/gn/build/libs/zlib/enable.gni")
import("//llvm/utils/gn/build/write_cmake_config.gni")
Expand Down Expand Up @@ -36,7 +35,7 @@ write_cmake_config("BuildVariables.inc") {
lib = ""
}

# Windows doesn't use any of libxml2, terminfo, zlib by default.
# Windows doesn't use any of libxml2, zlib by default.
# Make GN not warn about these variables being unused.
not_needed([
"l",
Expand All @@ -63,9 +62,6 @@ write_cmake_config("BuildVariables.inc") {
if (llvm_enable_libxml2) {
system_libs += " ${l}xml2${lib}"
}
if (llvm_enable_terminfo) {
system_libs += " ${l}ncurses${lib}"
}
if (llvm_enable_zlib) {
system_libs += " ${l}z${lib}"
}
Expand Down
3 changes: 0 additions & 3 deletions utils/bazel/.bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,6 @@ build --experimental_cc_shared_library
build:zlib_external --repo_env=BAZEL_LLVM_ZLIB_STRATEGY=external
build:zlib_system --repo_env=BAZEL_LLVM_ZLIB_STRATEGY=system

build:terminfo_external --repo_env=BAZEL_LLVM_TERMINFO_STRATEGY=external
build:terminfo_system --repo_env=BAZEL_LLVM_TERMINFO_STRATEGY=system

###############################################################################
# Options for "generic_clang" builds: these options should generally apply to
# builds using a Clang-based compiler, and default to the `clang` executable on
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,6 @@
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1

/* Define if the setupterm() function is supported this platform. */
/* LLVM_ENABLE_TERMINFO defined in Bazel */

/* Define to 1 if you have the <termios.h> header file. */
#define HAVE_TERMIOS_H 1

Expand Down
3 changes: 0 additions & 3 deletions utils/bazel/llvm_configs/config.h.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,6 @@
/* Define to 1 if you have the <sys/types.h> header file. */
#cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H}

/* Define if the setupterm() function is supported this platform. */
#cmakedefine LLVM_ENABLE_TERMINFO ${LLVM_ENABLE_TERMINFO}

/* Define to 1 if you have the <termios.h> header file. */
#cmakedefine HAVE_TERMIOS_H ${HAVE_TERMIOS_H}

Expand Down