223 changes: 109 additions & 114 deletions clang/lib/CodeGen/CGBuiltin.cpp

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions clang/lib/Driver/ToolChains/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/InputInfo.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/SanitizerArgs.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/Error.h"
Expand Down Expand Up @@ -946,6 +947,11 @@ void ROCMToolChain::addClangTargetOptions(
DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
FastRelaxedMath, CorrectSqrt, ABIVer, false));

if (getSanitizerArgs(DriverArgs).needsAsanRt()) {
CC1Args.push_back("-mlink-bitcode-file");
CC1Args.push_back(
DriverArgs.MakeArgString(RocmInstallation->getAsanRTLPath()));
}
for (StringRef BCFile : BCLibs) {
CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(BCFile));
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Driver/ToolChains/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain {
getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
const std::string &GPUArch,
bool isOpenMP = false) const;
SanitizerMask getSupportedSanitizers() const override {
return SanitizerKind::Address;
}
};

} // end namespace toolchains
Expand Down
16 changes: 16 additions & 0 deletions clang/test/Driver/rocm-device-libs.cl
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,20 @@
// RUN: %S/opencl.cl \
// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s

// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx908:xnack+ -fsanitize=address \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck --check-prefixes=ASAN,COMMON %s

// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx908:xnack+ \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck --check-prefixes=NOASAN %s

// COMMON: "-triple" "amdgcn-amd-amdhsa"
// ASAN-SAME: "-mlink-bitcode-file" "{{.*}}/amdgcn/bitcode/asanrtl.bc"
// COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/opencl.bc"
// COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/ocml.bc"
// COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/ockl.bc"
Expand Down Expand Up @@ -169,6 +180,11 @@
// COMMON-UNSAFE-MATH-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_finite_only_off.bc"
// COMMON-UNSAFE-MATH-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_correctly_rounded_sqrt_off.bc"

// ASAN-SAME: "-fsanitize=address"

// NOASAN-NOT: "-fsanitize=address"
// NOASAN-NOT: amdgcn/bitcode/asanrtl.bc

// WAVE64: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_wavefrontsize64_on.bc"
// WAVE32: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_wavefrontsize64_off.bc"

Expand Down
19 changes: 15 additions & 4 deletions clang/utils/TableGen/ClangAttrEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -903,13 +903,15 @@ namespace {
StringRef shortType;
std::vector<StringRef> values, enums, uniques;
bool isExternal;
bool isCovered;

public:
EnumArgument(const Record &Arg, StringRef Attr)
: Argument(Arg, Attr), values(Arg.getValueAsListOfStrings("Values")),
enums(Arg.getValueAsListOfStrings("Enums")),
uniques(uniqueEnumsInOrder(enums)),
isExternal(Arg.getValueAsBit("IsExternalType")) {
isExternal(Arg.getValueAsBit("IsExternalType")),
isCovered(Arg.getValueAsBit("IsCovered")) {
StringRef Type = Arg.getValueAsString("Type");
shortType = isExternal ? Type.rsplit("::").second : Type;
// If shortType didn't contain :: at all rsplit will give us an empty
Expand Down Expand Up @@ -993,7 +995,7 @@ namespace {
OS << " OS << \" " << I << "\";\n";
OS << " break;\n";
}
if (isExternal) {
if (!isCovered) {
OS << " default:\n";
OS << " llvm_unreachable(\"Invalid attribute value\");\n";
}
Expand Down Expand Up @@ -1036,7 +1038,7 @@ namespace {
OS << " case " << fullType << "::" << enums[I] << ": return \""
<< values[I] << "\";\n";
}
if (isExternal) {
if (!isCovered) {
OS << " default: llvm_unreachable(\"Invalid attribute value\");\n";
}
OS << " }\n"
Expand All @@ -1050,6 +1052,7 @@ namespace {
StringRef shortType;
std::vector<StringRef> values, enums, uniques;
bool isExternal;
bool isCovered;

protected:
void writeValueImpl(raw_ostream &OS) const override {
Expand All @@ -1068,7 +1071,8 @@ namespace {
values(Arg.getValueAsListOfStrings("Values")),
enums(Arg.getValueAsListOfStrings("Enums")),
uniques(uniqueEnumsInOrder(enums)),
isExternal(Arg.getValueAsBit("IsExternalType")) {
isExternal(Arg.getValueAsBit("IsExternalType")),
isCovered(Arg.getValueAsBit("IsCovered")) {
StringRef Type = Arg.getValueAsString("Type");
shortType = isExternal ? Type.rsplit("::").second : Type;
// If shortType didn't contain :: at all rsplit will give us an empty
Expand Down Expand Up @@ -1111,6 +1115,10 @@ namespace {
OS << " OS << \" " << UI << "\";\n";
OS << " break;\n";
}
if (!isCovered) {
OS << " default:\n";
OS << " llvm_unreachable(\"Invalid attribute value\");\n";
}
OS << " }\n";
OS << " }\n";
}
Expand Down Expand Up @@ -1168,6 +1176,9 @@ namespace {
OS << " case " << fullType << "::" << enums[I] << ": return \""
<< values[I] << "\";\n";
}
if (!isCovered) {
OS << " default: llvm_unreachable(\"Invalid attribute value\");\n";
}
OS << " }\n"
<< " llvm_unreachable(\"No enumerator with that value\");\n"
<< "}\n";
Expand Down
6 changes: 3 additions & 3 deletions compiler-rt/lib/scudo/standalone/tests/memtag_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ namespace scudo {

TEST(MemtagBasicDeathTest, Unsupported) {
if (archSupportsMemoryTagging())
GTEST_SKIP();
TEST_SKIP("Memory tagging is not supported");
// Skip when running with HWASan.
if (&__hwasan_init != 0)
GTEST_SKIP();
TEST_SKIP("Incompatible with HWASan");

EXPECT_DEATH(archMemoryTagGranuleSize(), "not supported");
EXPECT_DEATH(untagPointer((uptr)0), "not supported");
Expand All @@ -48,7 +48,7 @@ class MemtagTest : public Test {
protected:
void SetUp() override {
if (!archSupportsMemoryTagging() || !systemDetectsMemoryTagFaultsTestOnly())
GTEST_SKIP() << "Memory tagging is not supported";
TEST_SKIP("Memory tagging is not supported");

BufferSize = getPageSizeCached();
ASSERT_FALSE(MemMap.isAllocated());
Expand Down
5 changes: 5 additions & 0 deletions compiler-rt/lib/scudo/standalone/tests/scudo_unit_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,14 @@
#if SCUDO_FUCHSIA
#include <zxtest/zxtest.h>
using Test = ::zxtest::Test;
#define TEST_SKIP(message) ZXTEST_SKIP(message)
#else
#include "gtest/gtest.h"
using Test = ::testing::Test;
#define TEST_SKIP(message) \
do { \
GTEST_SKIP() << message; \
} while (0)
#endif

// If EXPECT_DEATH isn't defined, make it a no-op.
Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/lib/scudo/standalone/tests/strings_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ TEST(ScudoStringsTest, CapacityIncreaseFails) {
MAP_ALLOWNOMEM)) {
MemMap.unmap(MemMap.getBase(), MemMap.getCapacity());
setrlimit(RLIMIT_AS, &Limit);
GTEST_SKIP() << "Limiting address space does not prevent mmap.";
TEST_SKIP("Limiting address space does not prevent mmap.");
}

// Test requires that the default length is at least 6 characters.
Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/lib/scudo/standalone/tests/vector_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ TEST(ScudoVectorTest, ReallocateFails) {
MAP_ALLOWNOMEM)) {
MemMap.unmap(MemMap.getBase(), MemMap.getCapacity());
setrlimit(RLIMIT_AS, &Limit);
GTEST_SKIP() << "Limiting address space does not prevent mmap.";
TEST_SKIP("Limiting address space does not prevent mmap.");
}

V.resize(capacity);
Expand Down
2 changes: 1 addition & 1 deletion flang/test/Preprocessing/directive-contin-with-pp.F90
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
! RUN: %flang -fc1 -fdebug-unparse -fopenmp %s 2>&1 | FileCheck %s
! RUN: %flang_fc1 -fdebug-unparse -fopenmp %s 2>&1 | FileCheck %s

#define DIR_START !dir$
#define DIR_CONT !dir$&
Expand Down
24 changes: 15 additions & 9 deletions libc/src/stdlib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -380,18 +380,24 @@ elseif(LIBC_TARGET_OS_IS_GPU)
aligned_alloc
)
else()
# Only add malloc in full build mode. Use the system malloc in overlay mode.
if(LLVM_LIBC_FULL_BUILD)
# Only use freelist malloc for baremetal targets.
add_entrypoint_object(
freelist_malloc
SRCS
freelist_malloc.cpp
HDRS
malloc.h
DEPENDS
libc.src.__support.freelist_heap
COMPILE_OPTIONS
-DLIBC_FREELIST_MALLOC_SIZE=${LIBC_CONF_FREELIST_MALLOC_BUFFER_SIZE}
)
if(LIBC_TARGET_OS_IS_BAREMETAL)
add_entrypoint_object(
malloc
SRCS
freelist_malloc.cpp
HDRS
malloc.h
ALIAS
DEPENDS
libc.src.__support.freelist_heap
COMPILE_OPTIONS
-DLIBC_FREELIST_MALLOC_SIZE=${LIBC_CONF_FREELIST_MALLOC_BUFFER_SIZE}
.freelist_malloc
)
else()
add_entrypoint_external(
Expand Down
6 changes: 5 additions & 1 deletion libc/startup/linux/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ function(merge_relocatable_object name)
)
# Pass -r to the driver is much cleaner than passing -Wl,-r: the compiler knows it is
# a relocatable linking and will not pass other irrelevant flags to the linker.
target_link_options(${relocatable_target} PRIVATE -r -nostdlib)
set(link_opts -r -nostdlib)
if (explicit_target_triple AND LLVM_ENABLE_LLD)
list(APPEND link_opts --target=${explicit_target_triple})
endif()
target_link_options(${relocatable_target} PRIVATE ${link_opts})
set_target_properties(
${relocatable_target}
PROPERTIES
Expand Down
2 changes: 1 addition & 1 deletion libc/test/src/__support/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ if(LLVM_LIBC_FULL_BUILD)
DEPENDS
libc.src.__support.CPP.span
libc.src.__support.freelist_heap
libc.src.stdlib.malloc
libc.src.stdlib.freelist_malloc
libc.src.string.memcmp
libc.src.string.memcpy
)
Expand Down
7 changes: 4 additions & 3 deletions lld/ELF/LinkerScript.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1410,7 +1410,7 @@ LinkerScript::assignAddresses() {
state = &st;
errorOnMissingSection = true;
st.outSec = aether;
pendingErrors.clear();
recordedErrors.clear();

SymbolAssignmentMap oldValues = getSymbolAssignmentValues(sectionCommands);
for (SectionCommand *cmd : sectionCommands) {
Expand Down Expand Up @@ -1661,7 +1661,8 @@ void LinkerScript::printMemoryUsage(raw_ostream& os) {
}

void LinkerScript::recordError(const Twine &msg) {
pendingErrors.push_back(msg.str());
auto &str = recordedErrors.emplace_back();
msg.toVector(str);
}

static void checkMemoryRegion(const MemoryRegion *region,
Expand All @@ -1676,7 +1677,7 @@ static void checkMemoryRegion(const MemoryRegion *region,
}

void LinkerScript::checkFinalScriptConditions() const {
for (StringRef err : pendingErrors)
for (StringRef err : recordedErrors)
errorOrWarn(err);
for (const OutputSection *sec : outputSections) {
if (const MemoryRegion *memoryRegion = sec->memRegion)
Expand Down
2 changes: 1 addition & 1 deletion lld/ELF/LinkerScript.h
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ class LinkerScript final {
bool seenDataAlign = false;
bool seenRelroEnd = false;
bool errorOnMissingSection = false;
SmallVector<std::string, 0> pendingErrors;
SmallVector<SmallString<0>, 0> recordedErrors;

// List of section patterns specified with KEEP commands. They will
// be kept even if they are unused and --gc-sections is specified.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,7 @@ void ClangASTSource::FindExternalVisibleDecls(
FindDeclInModules(context, name);
}

if (!context.m_found_type) {
if (!context.m_found_type && m_ast_context->getLangOpts().ObjC) {
FindDeclInObjCRuntime(context, name);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,11 @@ def test(self):
(_, process, _, _) = lldbutil.run_to_name_breakpoint(self, "main")

self.assertState(process.GetState(), lldb.eStateStopped)

# Tests that we can use builtin Objective-C identifiers.
self.expect("expr id", error=False)

# Tests that we can lookup Objective-C decls in the ObjC runtime plugin.
self.expect_expr(
"NSString *c; c == nullptr", result_value="true", result_type="bool"
)
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,14 @@ def test_early_process_launch(self):
logfile_early = os.path.join(self.getBuildDir(), "types-log-early.txt")
self.addTearDownHook(lambda: self.runCmd("log disable lldb types"))
self.runCmd("log enable -f %s lldb types" % logfile_early)
self.runCmd("expression global = 15")
self.runCmd("expression --language objc -- global = 15")

err = process.Continue()
self.assertTrue(err.Success())

logfile_later = os.path.join(self.getBuildDir(), "types-log-later.txt")
self.runCmd("log enable -f %s lldb types" % logfile_later)
self.runCmd("expression global = 25")
self.runCmd("expression --language objc -- global = 25")

self.assertTrue(os.path.exists(logfile_early))
self.assertTrue(os.path.exists(logfile_later))
Expand Down
21 changes: 21 additions & 0 deletions lldb/test/Shell/Expr/TestObjCInCXXContext.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// UNSUPPORTED: system-linux, system-windows

// Tests that we don't consult the the Objective-C runtime
// plugin when in a purely C++ context.
//
// RUN: %clangxx_host %p/Inputs/objc-cast.cpp -g -o %t
// RUN: %lldb %t \
// RUN: -o "b main" -o run \
// RUN: -o "expression --language objective-c -- NSString * a; a" \
// RUN: -o "expression --language objective-c++ -- NSString * b; b" \
// RUN: -o "expression NSString" \
// RUN: 2>&1 | FileCheck %s

// CHECK: (lldb) expression --language objective-c -- NSString * a; a
// CHECK-NEXT: (NSString *){{.*}}= nil

// CHECK: (lldb) expression --language objective-c++ -- NSString * b; b
// CHECK-NEXT: (NSString *){{.*}}= nil

// CHECK: (lldb) expression NSString
// CHECK-NEXT: error:{{.*}} use of undeclared identifier 'NSString'
21 changes: 21 additions & 0 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1631,6 +1631,27 @@ Currently, only the following parameter attributes are defined:
``readonly`` or a ``memory`` attribute that does not contain
``argmem: write``.

``initializes((Lo1, Hi1), ...)``
This attribute indicates that the function initializes the ranges of the
pointer parameter's memory, ``[%p+LoN, %p+HiN)``. Initialization of memory
means the first memory access is a non-volatile, non-atomic write. The
write must happen before the function returns. If the function unwinds,
the write may not happen.

This attribute only holds for the memory accessed via this pointer
parameter. Other arbitrary accesses to the same memory via other pointers
are allowed.

The ``writable`` or ``dereferenceable`` attribute do not imply the
``initializes`` attribute. The ``initializes`` attribute does not imply
``writeonly`` since ``initializes`` allows reading from the pointer
after writing.

This attribute is a list of constant ranges in ascending order with no
overlapping or consecutive list elements. ``LoN/HiN`` are 64-bit integers,
and negative values are allowed in case the argument points partway into
an allocation. An empty list is not allowed.

``dead_on_unwind``
At a high level, this attribute indicates that the pointer argument is dead
if the call unwinds, in the sense that the caller will not depend on the
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/AsmParser/LLParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ namespace llvm {
std::vector<unsigned> &FwdRefAttrGrps,
bool inAttrGrp, LocTy &BuiltinLoc);
bool parseRangeAttr(AttrBuilder &B);
bool parseInitializesAttr(AttrBuilder &B);
bool parseRequiredTypeAttr(AttrBuilder &B, lltok::Kind AttrToken,
Attribute::AttrKind AttrKind);

Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Bitcode/LLVMBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,7 @@ enum AttributeKindCodes {
ATTR_KIND_DEAD_ON_UNWIND = 91,
ATTR_KIND_RANGE = 92,
ATTR_KIND_SANITIZE_NUMERICAL_STABILITY = 93,
ATTR_KIND_INITIALIZES = 94,
};

enum ComdatSelectionKindCodes {
Expand Down
10 changes: 1 addition & 9 deletions llvm/include/llvm/Frontend/HLSL/HLSLResource.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,8 @@ class MDNode;

namespace hlsl {

enum class ResourceClass : uint8_t {
SRV = 0,
UAV,
CBuffer,
Sampler,
Invalid,
NumClasses = Invalid,
};

// For now we use DXIL ABI enum values directly. This may change in the future.
using dxil::ResourceClass;
using dxil::ElementType;
using dxil::ResourceKind;

Expand Down
24 changes: 24 additions & 0 deletions llvm/include/llvm/IR/Attributes.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class AttributeImpl;
class AttributeListImpl;
class AttributeSetNode;
class ConstantRange;
class ConstantRangeList;
class FoldingSetNodeID;
class Function;
class LLVMContext;
Expand Down Expand Up @@ -107,6 +108,10 @@ class Attribute {
static bool isConstantRangeAttrKind(AttrKind Kind) {
return Kind >= FirstConstantRangeAttr && Kind <= LastConstantRangeAttr;
}
static bool isConstantRangeListAttrKind(AttrKind Kind) {
return Kind >= FirstConstantRangeListAttr &&
Kind <= LastConstantRangeListAttr;
}

static bool canUseAsFnAttr(AttrKind Kind);
static bool canUseAsParamAttr(AttrKind Kind);
Expand All @@ -131,6 +136,8 @@ class Attribute {
static Attribute get(LLVMContext &Context, AttrKind Kind, Type *Ty);
static Attribute get(LLVMContext &Context, AttrKind Kind,
const ConstantRange &CR);
static Attribute get(LLVMContext &Context, AttrKind Kind,
ArrayRef<ConstantRange> Val);

/// Return a uniquified Attribute object that has the specific
/// alignment set.
Expand Down Expand Up @@ -189,6 +196,9 @@ class Attribute {
/// Return true if the attribute is a ConstantRange attribute.
bool isConstantRangeAttribute() const;

/// Return true if the attribute is a ConstantRangeList attribute.
bool isConstantRangeListAttribute() const;

/// Return true if the attribute is any kind of attribute.
bool isValid() const { return pImpl; }

Expand Down Expand Up @@ -226,6 +236,10 @@ class Attribute {
/// attribute to be a ConstantRange attribute.
const ConstantRange &getValueAsConstantRange() const;

/// Return the attribute's value as a ConstantRange array. This requires the
/// attribute to be a ConstantRangeList attribute.
ArrayRef<ConstantRange> getValueAsConstantRangeList() const;

/// Returns the alignment field of an attribute as a byte alignment
/// value.
MaybeAlign getAlignment() const;
Expand Down Expand Up @@ -267,6 +281,9 @@ class Attribute {
/// Returns the value of the range attribute.
const ConstantRange &getRange() const;

/// Returns the value of the initializes attribute.
ArrayRef<ConstantRange> getInitializes() const;

/// The Attribute is converted to a string of equivalent mnemonic. This
/// is, presumably, for writing out the mnemonics for the assembly writer.
std::string getAsString(bool InAttrGrp = false) const;
Expand Down Expand Up @@ -1222,6 +1239,13 @@ class AttrBuilder {
/// Add range attribute.
AttrBuilder &addRangeAttr(const ConstantRange &CR);

/// Add a ConstantRangeList attribute with the given ranges.
AttrBuilder &addConstantRangeListAttr(Attribute::AttrKind Kind,
ArrayRef<ConstantRange> Val);

/// Add initializes attribute.
AttrBuilder &addInitializesAttr(const ConstantRangeList &CRL);

ArrayRef<Attribute> attrs() const { return Attrs; }

bool operator==(const AttrBuilder &B) const;
Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/IR/Attributes.td
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ class ComplexStrAttr<string S, list<AttrProperty> P> : Attr<S, P>;
/// ConstantRange attribute.
class ConstantRangeAttr<string S, list<AttrProperty> P> : Attr<S, P>;

/// ConstantRangeList attribute.
class ConstantRangeListAttr<string S, list<AttrProperty> P> : Attr<S, P>;

/// Target-independent enum attributes.

/// Alignment of parameter (5 bits) stored as log2 of alignment with +1 bias.
Expand Down Expand Up @@ -112,6 +115,9 @@ def FnRetThunkExtern : EnumAttr<"fn_ret_thunk_extern", [FnAttr]>;
/// Pass structure in an alloca.
def InAlloca : TypeAttr<"inalloca", [ParamAttr]>;

/// Pointer argument memory is initialized.
def Initializes : ConstantRangeListAttr<"initializes", [ParamAttr]>;

/// Source said inlining was desirable.
def InlineHint : EnumAttr<"inlinehint", [FnAttr]>;

Expand Down
93 changes: 93 additions & 0 deletions llvm/include/llvm/IR/ConstantRangeList.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
//===- ConstantRangeList.h - A list of constant ranges ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Represent a list of signed ConstantRange and do NOT support wrap around the
// end of the numeric range. Ranges in the list are ordered and not overlapping.
// Ranges should have the same bitwidth. Each range's lower should be less than
// its upper.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_IR_CONSTANTRANGELIST_H
#define LLVM_IR_CONSTANTRANGELIST_H

#include "llvm/ADT/APInt.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/Support/Debug.h"
#include <cstddef>
#include <cstdint>

namespace llvm {

class raw_ostream;

/// This class represents a list of constant ranges.
class [[nodiscard]] ConstantRangeList {
SmallVector<ConstantRange, 2> Ranges;

public:
ConstantRangeList() = default;
ConstantRangeList(ArrayRef<ConstantRange> RangesRef) {
assert(isOrderedRanges(RangesRef));
for (const ConstantRange &R : RangesRef) {
assert(R.getBitWidth() == getBitWidth());
Ranges.push_back(R);
}
}

// Return true if the ranges are non-overlapping and increasing.
static bool isOrderedRanges(ArrayRef<ConstantRange> RangesRef);
static std::optional<ConstantRangeList>
getConstantRangeList(ArrayRef<ConstantRange> RangesRef);

ArrayRef<ConstantRange> rangesRef() const { return Ranges; }
SmallVectorImpl<ConstantRange>::iterator begin() { return Ranges.begin(); }
SmallVectorImpl<ConstantRange>::iterator end() { return Ranges.end(); }
SmallVectorImpl<ConstantRange>::const_iterator begin() const {
return Ranges.begin();
}
SmallVectorImpl<ConstantRange>::const_iterator end() const {
return Ranges.end();
}
ConstantRange getRange(unsigned i) const { return Ranges[i]; }

/// Return true if this list contains no members.
bool empty() const { return Ranges.empty(); }

/// Get the bit width of this ConstantRangeList.
uint32_t getBitWidth() const { return 64; }

/// Return the number of ranges in this ConstantRangeList.
size_t size() const { return Ranges.size(); }

/// Insert a new range to Ranges and keep the list ordered.
void insert(const ConstantRange &NewRange);
void insert(int64_t Lower, int64_t Upper) {
insert(ConstantRange(APInt(64, Lower, /*isSigned=*/true),
APInt(64, Upper, /*isSigned=*/true)));
}

/// Return true if this range list is equal to another range list.
bool operator==(const ConstantRangeList &CRL) const {
return Ranges == CRL.Ranges;
}
bool operator!=(const ConstantRangeList &CRL) const {
return !operator==(CRL);
}

/// Print out the ranges to a stream.
void print(raw_ostream &OS) const;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void dump() const;
#endif
};

} // end namespace llvm

#endif // LLVM_IR_CONSTANTRANGELIST_H
7 changes: 7 additions & 0 deletions llvm/include/llvm/Support/DXILABI.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ enum class ParameterKind : uint8_t {
DXILHandle,
};

enum class ResourceClass : uint8_t {
SRV = 0,
UAV,
CBuffer,
Sampler,
};

/// The kind of resource for an SRV or UAV resource. Sometimes referred to as
/// "Shape" in the DXIL docs.
enum class ResourceKind : uint32_t {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Analysis/CallGraphSCCPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ using namespace llvm;
namespace llvm {
cl::opt<unsigned> MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden,
cl::init(4));
}
} // namespace llvm

STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC");

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Analysis/CallPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ using namespace llvm;

namespace llvm {
template <class GraphType> struct GraphTraits;
}
} // namespace llvm

// This option shows static (relative) call counts.
// FIXME:
Expand Down Expand Up @@ -215,7 +215,7 @@ struct DOTGraphTraits<CallGraphDOTInfo *> : public DefaultDOTGraphTraits {
}
};

} // end llvm namespace
} // namespace llvm

namespace {
void doCallGraphDOTPrinting(
Expand Down
184 changes: 92 additions & 92 deletions llvm/lib/Analysis/CaptureTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,127 +72,127 @@ bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) {
}

namespace {
struct SimpleCaptureTracker : public CaptureTracker {
explicit SimpleCaptureTracker(bool ReturnCaptures)
: ReturnCaptures(ReturnCaptures) {}
struct SimpleCaptureTracker : public CaptureTracker {
explicit SimpleCaptureTracker(bool ReturnCaptures)
: ReturnCaptures(ReturnCaptures) {}

void tooManyUses() override {
LLVM_DEBUG(dbgs() << "Captured due to too many uses\n");
Captured = true;
}
void tooManyUses() override {
LLVM_DEBUG(dbgs() << "Captured due to too many uses\n");
Captured = true;
}

bool captured(const Use *U) override {
if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
return false;
bool captured(const Use *U) override {
if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
return false;

LLVM_DEBUG(dbgs() << "Captured by: " << *U->getUser() << "\n");
LLVM_DEBUG(dbgs() << "Captured by: " << *U->getUser() << "\n");

Captured = true;
return true;
}
Captured = true;
return true;
}

bool ReturnCaptures;
bool ReturnCaptures;

bool Captured = false;
};
bool Captured = false;
};

/// Only find pointer captures which happen before the given instruction. Uses
/// the dominator tree to determine whether one instruction is before another.
/// Only support the case where the Value is defined in the same basic block
/// as the given instruction and the use.
struct CapturesBefore : public CaptureTracker {
/// Only find pointer captures which happen before the given instruction. Uses
/// the dominator tree to determine whether one instruction is before another.
/// Only support the case where the Value is defined in the same basic block
/// as the given instruction and the use.
struct CapturesBefore : public CaptureTracker {

CapturesBefore(bool ReturnCaptures, const Instruction *I,
const DominatorTree *DT, bool IncludeI, const LoopInfo *LI)
: BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures),
IncludeI(IncludeI), LI(LI) {}
CapturesBefore(bool ReturnCaptures, const Instruction *I,
const DominatorTree *DT, bool IncludeI, const LoopInfo *LI)
: BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures),
IncludeI(IncludeI), LI(LI) {}

void tooManyUses() override { Captured = true; }
void tooManyUses() override { Captured = true; }

bool isSafeToPrune(Instruction *I) {
if (BeforeHere == I)
return !IncludeI;
bool isSafeToPrune(Instruction *I) {
if (BeforeHere == I)
return !IncludeI;

// We explore this usage only if the usage can reach "BeforeHere".
// If use is not reachable from entry, there is no need to explore.
if (!DT->isReachableFromEntry(I->getParent()))
return true;
// We explore this usage only if the usage can reach "BeforeHere".
// If use is not reachable from entry, there is no need to explore.
if (!DT->isReachableFromEntry(I->getParent()))
return true;

// Check whether there is a path from I to BeforeHere.
return !isPotentiallyReachable(I, BeforeHere, nullptr, DT, LI);
}
// Check whether there is a path from I to BeforeHere.
return !isPotentiallyReachable(I, BeforeHere, nullptr, DT, LI);
}

bool captured(const Use *U) override {
Instruction *I = cast<Instruction>(U->getUser());
if (isa<ReturnInst>(I) && !ReturnCaptures)
return false;
bool captured(const Use *U) override {
Instruction *I = cast<Instruction>(U->getUser());
if (isa<ReturnInst>(I) && !ReturnCaptures)
return false;

// Check isSafeToPrune() here rather than in shouldExplore() to avoid
// an expensive reachability query for every instruction we look at.
// Instead we only do one for actual capturing candidates.
if (isSafeToPrune(I))
return false;
// Check isSafeToPrune() here rather than in shouldExplore() to avoid
// an expensive reachability query for every instruction we look at.
// Instead we only do one for actual capturing candidates.
if (isSafeToPrune(I))
return false;

Captured = true;
return true;
}
Captured = true;
return true;
}

const Instruction *BeforeHere;
const DominatorTree *DT;
const Instruction *BeforeHere;
const DominatorTree *DT;

bool ReturnCaptures;
bool IncludeI;
bool ReturnCaptures;
bool IncludeI;

bool Captured = false;
bool Captured = false;

const LoopInfo *LI;
};
const LoopInfo *LI;
};

/// Find the 'earliest' instruction before which the pointer is known not to
/// be captured. Here an instruction A is considered earlier than instruction
/// B, if A dominates B. If 2 escapes do not dominate each other, the
/// terminator of the common dominator is chosen. If not all uses cannot be
/// analyzed, the earliest escape is set to the first instruction in the
/// function entry block.
// NOTE: Users have to make sure instructions compared against the earliest
// escape are not in a cycle.
struct EarliestCaptures : public CaptureTracker {

EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT)
: DT(DT), ReturnCaptures(ReturnCaptures), F(F) {}

void tooManyUses() override {
Captured = true;
EarliestCapture = &*F.getEntryBlock().begin();
}
/// Find the 'earliest' instruction before which the pointer is known not to
/// be captured. Here an instruction A is considered earlier than instruction
/// B, if A dominates B. If 2 escapes do not dominate each other, the
/// terminator of the common dominator is chosen. If not all uses cannot be
/// analyzed, the earliest escape is set to the first instruction in the
/// function entry block.
// NOTE: Users have to make sure instructions compared against the earliest
// escape are not in a cycle.
struct EarliestCaptures : public CaptureTracker {

bool captured(const Use *U) override {
Instruction *I = cast<Instruction>(U->getUser());
if (isa<ReturnInst>(I) && !ReturnCaptures)
return false;
EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT)
: DT(DT), ReturnCaptures(ReturnCaptures), F(F) {}

if (!EarliestCapture)
EarliestCapture = I;
else
EarliestCapture = DT.findNearestCommonDominator(EarliestCapture, I);
Captured = true;
void tooManyUses() override {
Captured = true;
EarliestCapture = &*F.getEntryBlock().begin();
}

// Return false to continue analysis; we need to see all potential
// captures.
bool captured(const Use *U) override {
Instruction *I = cast<Instruction>(U->getUser());
if (isa<ReturnInst>(I) && !ReturnCaptures)
return false;
}

Instruction *EarliestCapture = nullptr;
if (!EarliestCapture)
EarliestCapture = I;
else
EarliestCapture = DT.findNearestCommonDominator(EarliestCapture, I);
Captured = true;

const DominatorTree &DT;
// Return false to continue analysis; we need to see all potential
// captures.
return false;
}

bool ReturnCaptures;
Instruction *EarliestCapture = nullptr;

bool Captured = false;
const DominatorTree &DT;

Function &F;
};
}
bool ReturnCaptures;

bool Captured = false;

Function &F;
};
} // namespace

/// PointerMayBeCaptured - Return true if this pointer value may be captured
/// by the enclosing function (which is required to exist). This routine can
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Analysis/CycleAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ using namespace llvm;

namespace llvm {
class Module;
}
} // namespace llvm

CycleInfo CycleAnalysis::run(Function &F, FunctionAnalysisManager &) {
CycleInfo CI;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ cl::opt<unsigned> MediumBasicBlockInstructionThreshold(
"medium-basic-block-instruction-threshold", cl::Hidden, cl::init(15),
cl::desc("The minimum number of instructions a basic block should contain "
"before being considered medium-sized."));
}
} // namespace llvm

static cl::opt<unsigned> CallWithManyArgumentsThreshold(
"call-with-many-arguments-threshold", cl::Hidden, cl::init(4),
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Analysis/ImportedFunctionsInliningStatistics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose",
"printing of statistics for each inlined function")),
cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
}
} // namespace llvm

ImportedFunctionsInliningStatistics::InlineGraphNode &
ImportedFunctionsInliningStatistics::createInlineGraphNode(const Function &F) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Analysis/InlineAdvisor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ static cl::opt<bool>

namespace llvm {
extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats;
}
} // namespace llvm

namespace {
using namespace llvm::ore;
Expand Down
187 changes: 94 additions & 93 deletions llvm/lib/Analysis/LazyValueInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,10 @@ INITIALIZE_PASS_END(LazyValueInfoWrapperPass, "lazy-value-info",
"Lazy Value Information Analysis", false, true)

namespace llvm {
FunctionPass *createLazyValueInfoPass() { return new LazyValueInfoWrapperPass(); }
FunctionPass *createLazyValueInfoPass() {
return new LazyValueInfoWrapperPass();
}
} // namespace llvm

AnalysisKey LazyValueAnalysis::Key;

Expand Down Expand Up @@ -151,114 +153,113 @@ namespace {
} // end anonymous namespace

namespace {
using NonNullPointerSet = SmallDenseSet<AssertingVH<Value>, 2>;

/// This is the cache kept by LazyValueInfo which
/// maintains information about queries across the clients' queries.
class LazyValueInfoCache {
/// This is all of the cached information for one basic block. It contains
/// the per-value lattice elements, as well as a separate set for
/// overdefined values to reduce memory usage. Additionally pointers
/// dereferenced in the block are cached for nullability queries.
struct BlockCacheEntry {
SmallDenseMap<AssertingVH<Value>, ValueLatticeElement, 4> LatticeElements;
SmallDenseSet<AssertingVH<Value>, 4> OverDefined;
// std::nullopt indicates that the nonnull pointers for this basic block
// block have not been computed yet.
std::optional<NonNullPointerSet> NonNullPointers;
};

/// Cached information per basic block.
DenseMap<PoisoningVH<BasicBlock>, std::unique_ptr<BlockCacheEntry>>
BlockCache;
/// Set of value handles used to erase values from the cache on deletion.
DenseSet<LVIValueHandle, DenseMapInfo<Value *>> ValueHandles;

const BlockCacheEntry *getBlockEntry(BasicBlock *BB) const {
auto It = BlockCache.find_as(BB);
if (It == BlockCache.end())
return nullptr;
return It->second.get();
}
using NonNullPointerSet = SmallDenseSet<AssertingVH<Value>, 2>;

/// This is the cache kept by LazyValueInfo which
/// maintains information about queries across the clients' queries.
class LazyValueInfoCache {
/// This is all of the cached information for one basic block. It contains
/// the per-value lattice elements, as well as a separate set for
/// overdefined values to reduce memory usage. Additionally pointers
/// dereferenced in the block are cached for nullability queries.
struct BlockCacheEntry {
SmallDenseMap<AssertingVH<Value>, ValueLatticeElement, 4> LatticeElements;
SmallDenseSet<AssertingVH<Value>, 4> OverDefined;
// std::nullopt indicates that the nonnull pointers for this basic block
// block have not been computed yet.
std::optional<NonNullPointerSet> NonNullPointers;
};

BlockCacheEntry *getOrCreateBlockEntry(BasicBlock *BB) {
auto It = BlockCache.find_as(BB);
if (It == BlockCache.end())
It = BlockCache.insert({ BB, std::make_unique<BlockCacheEntry>() })
.first;
/// Cached information per basic block.
DenseMap<PoisoningVH<BasicBlock>, std::unique_ptr<BlockCacheEntry>>
BlockCache;
/// Set of value handles used to erase values from the cache on deletion.
DenseSet<LVIValueHandle, DenseMapInfo<Value *>> ValueHandles;

const BlockCacheEntry *getBlockEntry(BasicBlock *BB) const {
auto It = BlockCache.find_as(BB);
if (It == BlockCache.end())
return nullptr;
return It->second.get();
}

return It->second.get();
}
BlockCacheEntry *getOrCreateBlockEntry(BasicBlock *BB) {
auto It = BlockCache.find_as(BB);
if (It == BlockCache.end())
It = BlockCache.insert({BB, std::make_unique<BlockCacheEntry>()}).first;

void addValueHandle(Value *Val) {
auto HandleIt = ValueHandles.find_as(Val);
if (HandleIt == ValueHandles.end())
ValueHandles.insert({ Val, this });
}
return It->second.get();
}

public:
void insertResult(Value *Val, BasicBlock *BB,
const ValueLatticeElement &Result) {
BlockCacheEntry *Entry = getOrCreateBlockEntry(BB);
void addValueHandle(Value *Val) {
auto HandleIt = ValueHandles.find_as(Val);
if (HandleIt == ValueHandles.end())
ValueHandles.insert({Val, this});
}

// Insert over-defined values into their own cache to reduce memory
// overhead.
if (Result.isOverdefined())
Entry->OverDefined.insert(Val);
else
Entry->LatticeElements.insert({ Val, Result });
public:
void insertResult(Value *Val, BasicBlock *BB,
const ValueLatticeElement &Result) {
BlockCacheEntry *Entry = getOrCreateBlockEntry(BB);

// Insert over-defined values into their own cache to reduce memory
// overhead.
if (Result.isOverdefined())
Entry->OverDefined.insert(Val);
else
Entry->LatticeElements.insert({Val, Result});

addValueHandle(Val);
}

addValueHandle(Val);
}
std::optional<ValueLatticeElement> getCachedValueInfo(Value *V,
BasicBlock *BB) const {
const BlockCacheEntry *Entry = getBlockEntry(BB);
if (!Entry)
return std::nullopt;

std::optional<ValueLatticeElement>
getCachedValueInfo(Value *V, BasicBlock *BB) const {
const BlockCacheEntry *Entry = getBlockEntry(BB);
if (!Entry)
return std::nullopt;
if (Entry->OverDefined.count(V))
return ValueLatticeElement::getOverdefined();

if (Entry->OverDefined.count(V))
return ValueLatticeElement::getOverdefined();
auto LatticeIt = Entry->LatticeElements.find_as(V);
if (LatticeIt == Entry->LatticeElements.end())
return std::nullopt;

auto LatticeIt = Entry->LatticeElements.find_as(V);
if (LatticeIt == Entry->LatticeElements.end())
return std::nullopt;
return LatticeIt->second;
}

return LatticeIt->second;
bool
isNonNullAtEndOfBlock(Value *V, BasicBlock *BB,
function_ref<NonNullPointerSet(BasicBlock *)> InitFn) {
BlockCacheEntry *Entry = getOrCreateBlockEntry(BB);
if (!Entry->NonNullPointers) {
Entry->NonNullPointers = InitFn(BB);
for (Value *V : *Entry->NonNullPointers)
addValueHandle(V);
}

bool isNonNullAtEndOfBlock(
Value *V, BasicBlock *BB,
function_ref<NonNullPointerSet(BasicBlock *)> InitFn) {
BlockCacheEntry *Entry = getOrCreateBlockEntry(BB);
if (!Entry->NonNullPointers) {
Entry->NonNullPointers = InitFn(BB);
for (Value *V : *Entry->NonNullPointers)
addValueHandle(V);
}

return Entry->NonNullPointers->count(V);
}
return Entry->NonNullPointers->count(V);
}

/// clear - Empty the cache.
void clear() {
BlockCache.clear();
ValueHandles.clear();
}
/// clear - Empty the cache.
void clear() {
BlockCache.clear();
ValueHandles.clear();
}

/// Inform the cache that a given value has been deleted.
void eraseValue(Value *V);
/// Inform the cache that a given value has been deleted.
void eraseValue(Value *V);

/// This is part of the update interface to inform the cache
/// that a block has been deleted.
void eraseBlock(BasicBlock *BB);
/// This is part of the update interface to inform the cache
/// that a block has been deleted.
void eraseBlock(BasicBlock *BB);

/// Updates the cache to remove any influence an overdefined value in
/// OldSucc might have (unless also overdefined in NewSucc). This just
/// flushes elements from the cache and does not add any.
void threadEdgeImpl(BasicBlock *OldSucc,BasicBlock *NewSucc);
};
}
/// Updates the cache to remove any influence an overdefined value in
/// OldSucc might have (unless also overdefined in NewSucc). This just
/// flushes elements from the cache and does not add any.
void threadEdgeImpl(BasicBlock *OldSucc, BasicBlock *NewSucc);
};
} // namespace

void LazyValueInfoCache::eraseValue(Value *V) {
for (auto &Pair : BlockCache) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Analysis/LoopAnalysisManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ LoopAnalysisManagerFunctionProxy::run(Function &F,
FunctionAnalysisManager &AM) {
return Result(*InnerAM, AM.getResult<LoopAnalysis>(F));
}
}
} // namespace llvm

PreservedAnalyses llvm::getLoopPassPreservedAnalyses() {
PreservedAnalyses PA;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Analysis/LoopPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class PrintLoopPassWrapper : public LoopPass {
};

char PrintLoopPassWrapper::ID = 0;
}
} // namespace

//===----------------------------------------------------------------------===//
// LPPassManager
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Analysis/ScalarEvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13689,7 +13689,7 @@ raw_ostream &operator<<(raw_ostream &OS, ScalarEvolution::BlockDisposition BD) {
}
return OS;
}
}
} // namespace llvm

void ScalarEvolution::print(raw_ostream &OS) const {
// ScalarEvolution's implementation of the print method is to print
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Analysis/ScalarEvolutionDivision.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

namespace llvm {
class Type;
}
} // namespace llvm

using namespace llvm;

Expand Down
49 changes: 49 additions & 0 deletions llvm/lib/AsmParser/LLParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Comdat.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/ConstantRangeList.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
Expand Down Expand Up @@ -1626,6 +1627,8 @@ bool LLParser::parseEnumAttribute(Attribute::AttrKind Attr, AttrBuilder &B,
}
case Attribute::Range:
return parseRangeAttr(B);
case Attribute::Initializes:
return parseInitializesAttr(B);
default:
B.addAttribute(Attr);
Lex.Lex();
Expand Down Expand Up @@ -3101,6 +3104,52 @@ bool LLParser::parseRangeAttr(AttrBuilder &B) {
return false;
}

/// parseInitializesAttr
/// ::= initializes((Lo1,Hi1),(Lo2,Hi2),...)
bool LLParser::parseInitializesAttr(AttrBuilder &B) {
Lex.Lex();

auto ParseAPSInt = [&](APInt &Val) {
if (Lex.getKind() != lltok::APSInt)
return tokError("expected integer");
Val = Lex.getAPSIntVal().extend(64);
Lex.Lex();
return false;
};

if (parseToken(lltok::lparen, "expected '('"))
return true;

SmallVector<ConstantRange, 2> RangeList;
// Parse each constant range.
do {
APInt Lower, Upper;
if (parseToken(lltok::lparen, "expected '('"))
return true;

if (ParseAPSInt(Lower) || parseToken(lltok::comma, "expected ','") ||
ParseAPSInt(Upper))
return true;

if (Lower == Upper)
return tokError("the range should not represent the full or empty set!");

if (parseToken(lltok::rparen, "expected ')'"))
return true;

RangeList.push_back(ConstantRange(Lower, Upper));
} while (EatIfPresent(lltok::comma));

if (parseToken(lltok::rparen, "expected ')'"))
return true;

auto CRLOrNull = ConstantRangeList::getConstantRangeList(RangeList);
if (!CRLOrNull.has_value())
return tokError("Invalid (unordered or overlapping) range list");
B.addInitializesAttr(*CRLOrNull);
return false;
}

/// parseOptionalOperandBundles
/// ::= /*empty*/
/// ::= '[' OperandBundle [, OperandBundle ]* ']'
Expand Down
49 changes: 44 additions & 5 deletions llvm/lib/Bitcode/Reader/BitcodeReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Comdat.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRangeList.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
Expand Down Expand Up @@ -838,10 +839,10 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
}

Expected<ConstantRange> readConstantRange(ArrayRef<uint64_t> Record,
unsigned &OpNum) {
if (Record.size() - OpNum < 3)
unsigned &OpNum,
unsigned BitWidth) {
if (Record.size() - OpNum < 2)
return error("Too few records for range");
unsigned BitWidth = Record[OpNum++];
if (BitWidth > 64) {
unsigned LowerActiveWords = Record[OpNum];
unsigned UpperActiveWords = Record[OpNum++] >> 32;
Expand All @@ -861,6 +862,14 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
}
}

Expected<ConstantRange>
readBitWidthAndConstantRange(ArrayRef<uint64_t> Record, unsigned &OpNum) {
if (Record.size() - OpNum < 1)
return error("Too few records for range");
unsigned BitWidth = Record[OpNum++];
return readConstantRange(Record, OpNum, BitWidth);
}

/// Upgrades old-style typeless byval/sret/inalloca attributes by adding the
/// corresponding argument's pointee type. Also upgrades intrinsics that now
/// require an elementtype attribute.
Expand Down Expand Up @@ -2174,6 +2183,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::DeadOnUnwind;
case bitc::ATTR_KIND_RANGE:
return Attribute::Range;
case bitc::ATTR_KIND_INITIALIZES:
return Attribute::Initializes;
}
}

Expand Down Expand Up @@ -2352,12 +2363,39 @@ Error BitcodeReader::parseAttributeGroupBlock() {
if (!Attribute::isConstantRangeAttrKind(Kind))
return error("Not a ConstantRange attribute");

Expected<ConstantRange> MaybeCR = readConstantRange(Record, i);
Expected<ConstantRange> MaybeCR =
readBitWidthAndConstantRange(Record, i);
if (!MaybeCR)
return MaybeCR.takeError();
i--;

B.addConstantRangeAttr(Kind, MaybeCR.get());
} else if (Record[i] == 8) {
Attribute::AttrKind Kind;

i++;
if (Error Err = parseAttrKind(Record[i++], &Kind))
return Err;
if (!Attribute::isConstantRangeListAttrKind(Kind))
return error("Not a constant range list attribute");

SmallVector<ConstantRange, 2> Val;
if (i + 2 > e)
return error("Too few records for constant range list");
unsigned RangeSize = Record[i++];
unsigned BitWidth = Record[i++];
for (unsigned Idx = 0; Idx < RangeSize; ++Idx) {
Expected<ConstantRange> MaybeCR =
readConstantRange(Record, i, BitWidth);
if (!MaybeCR)
return MaybeCR.takeError();
Val.push_back(MaybeCR.get());
}
i--;

if (!ConstantRangeList::isOrderedRanges(Val))
return error("Invalid (unordered or overlapping) range list");
B.addConstantRangeListAttr(Kind, Val);
} else {
return error("Invalid attribute group entry");
}
Expand Down Expand Up @@ -3372,7 +3410,8 @@ Error BitcodeReader::parseConstants() {
(void)InRangeIndex;
} else if (BitCode == bitc::CST_CODE_CE_GEP_WITH_INRANGE) {
Flags = Record[OpNum++];
Expected<ConstantRange> MaybeInRange = readConstantRange(Record, OpNum);
Expected<ConstantRange> MaybeInRange =
readBitWidthAndConstantRange(Record, OpNum);
if (!MaybeInRange)
return MaybeInRange.takeError();
InRange = MaybeInRange.get();
Expand Down
25 changes: 19 additions & 6 deletions llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Comdat.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRangeList.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
Expand Down Expand Up @@ -870,6 +871,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_DEAD_ON_UNWIND;
case Attribute::Range:
return bitc::ATTR_KIND_RANGE;
case Attribute::Initializes:
return bitc::ATTR_KIND_INITIALIZES;
case Attribute::EndAttrKinds:
llvm_unreachable("Can not encode end-attribute kinds marker.");
case Attribute::None:
Expand Down Expand Up @@ -901,9 +904,10 @@ static void emitWideAPInt(SmallVectorImpl<uint64_t> &Vals, const APInt &A) {
}

static void emitConstantRange(SmallVectorImpl<uint64_t> &Record,
const ConstantRange &CR) {
const ConstantRange &CR, bool EmitBitWidth) {
unsigned BitWidth = CR.getBitWidth();
Record.push_back(BitWidth);
if (EmitBitWidth)
Record.push_back(BitWidth);
if (BitWidth > 64) {
Record.push_back(CR.getLower().getActiveWords() |
(uint64_t(CR.getUpper().getActiveWords()) << 32));
Expand Down Expand Up @@ -954,11 +958,20 @@ void ModuleBitcodeWriter::writeAttributeGroupTable() {
Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
if (Ty)
Record.push_back(VE.getTypeID(Attr.getValueAsType()));
} else {
assert(Attr.isConstantRangeAttribute());
} else if (Attr.isConstantRangeAttribute()) {
Record.push_back(7);
Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
emitConstantRange(Record, Attr.getValueAsConstantRange());
emitConstantRange(Record, Attr.getValueAsConstantRange(),
/*EmitBitWidth=*/true);
} else {
assert(Attr.isConstantRangeListAttribute());
Record.push_back(8);
Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
ArrayRef<ConstantRange> Val = Attr.getValueAsConstantRangeList();
Record.push_back(Val.size());
Record.push_back(Val[0].getBitWidth());
for (auto &CR : Val)
emitConstantRange(Record, CR, /*EmitBitWidth=*/false);
}
}

Expand Down Expand Up @@ -2788,7 +2801,7 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal,
Record.push_back(getOptimizationFlags(GO));
if (std::optional<ConstantRange> Range = GO->getInRange()) {
Code = bitc::CST_CODE_CE_GEP_WITH_INRANGE;
emitConstantRange(Record, *Range);
emitConstantRange(Record, *Range, /*EmitBitWidth=*/true);
}
for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
Record.push_back(VE.getTypeID(C->getOperand(i)->getType()));
Expand Down
53 changes: 52 additions & 1 deletion llvm/lib/IR/AttributeImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/ConstantRangeList.h"
#include "llvm/Support/TrailingObjects.h"
#include <cassert>
#include <cstddef>
Expand Down Expand Up @@ -48,6 +49,7 @@ class AttributeImpl : public FoldingSetNode {
StringAttrEntry,
TypeAttrEntry,
ConstantRangeAttrEntry,
ConstantRangeListAttrEntry,
};

AttributeImpl(AttrEntryKind KindID) : KindID(KindID) {}
Expand All @@ -64,6 +66,9 @@ class AttributeImpl : public FoldingSetNode {
bool isConstantRangeAttribute() const {
return KindID == ConstantRangeAttrEntry;
}
bool isConstantRangeListAttribute() const {
return KindID == ConstantRangeListAttrEntry;
}

bool hasAttribute(Attribute::AttrKind A) const;
bool hasAttribute(StringRef Kind) const;
Expand All @@ -79,6 +84,8 @@ class AttributeImpl : public FoldingSetNode {

const ConstantRange &getValueAsConstantRange() const;

ArrayRef<ConstantRange> getValueAsConstantRangeList() const;

/// Used when sorting the attributes.
bool operator<(const AttributeImpl &AI) const;

Expand All @@ -91,8 +98,10 @@ class AttributeImpl : public FoldingSetNode {
Profile(ID, getKindAsString(), getValueAsString());
else if (isTypeAttribute())
Profile(ID, getKindAsEnum(), getValueAsType());
else
else if (isConstantRangeAttribute())
Profile(ID, getKindAsEnum(), getValueAsConstantRange());
else
Profile(ID, getKindAsEnum(), getValueAsConstantRangeList());
}

static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind) {
Expand Down Expand Up @@ -124,6 +133,16 @@ class AttributeImpl : public FoldingSetNode {
CR.getLower().Profile(ID);
CR.getUpper().Profile(ID);
}

static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
ArrayRef<ConstantRange> Val) {
ID.AddInteger(Kind);
ID.AddInteger(Val.size());
for (auto &CR : Val) {
CR.getLower().Profile(ID);
CR.getUpper().Profile(ID);
}
}
};

static_assert(std::is_trivially_destructible<AttributeImpl>::value,
Expand Down Expand Up @@ -222,6 +241,38 @@ class ConstantRangeAttributeImpl : public EnumAttributeImpl {
const ConstantRange &getConstantRangeValue() const { return CR; }
};

class ConstantRangeListAttributeImpl final
: public EnumAttributeImpl,
private TrailingObjects<ConstantRangeListAttributeImpl, ConstantRange> {
friend TrailingObjects;

unsigned Size;
size_t numTrailingObjects(OverloadToken<ConstantRange>) const { return Size; }

public:
ConstantRangeListAttributeImpl(Attribute::AttrKind Kind,
ArrayRef<ConstantRange> Val)
: EnumAttributeImpl(ConstantRangeListAttrEntry, Kind), Size(Val.size()) {
assert(Size > 0);
ConstantRange *TrailingCR = getTrailingObjects<ConstantRange>();
std::uninitialized_copy(Val.begin(), Val.end(), TrailingCR);
}

~ConstantRangeListAttributeImpl() {
ConstantRange *TrailingCR = getTrailingObjects<ConstantRange>();
for (unsigned I = 0; I != Size; ++I)
TrailingCR[I].~ConstantRange();
}

ArrayRef<ConstantRange> getConstantRangeListValue() const {
return ArrayRef(getTrailingObjects<ConstantRange>(), Size);
}

static size_t totalSizeToAlloc(ArrayRef<ConstantRange> Val) {
return TrailingObjects::totalSizeToAlloc<ConstantRange>(Val.size());
}
};

class AttributeBitSet {
/// Bitset with a bit for each available attribute Attribute::AttrKind.
uint8_t AvailableAttrs[12] = {};
Expand Down
90 changes: 87 additions & 3 deletions llvm/lib/IR/Attributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/ConstantRangeList.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
Expand Down Expand Up @@ -191,6 +192,43 @@ Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind,
return Attribute(PA);
}

Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind,
ArrayRef<ConstantRange> Val) {
assert(Attribute::isConstantRangeListAttrKind(Kind) &&
"Not a ConstantRangeList attribute");
LLVMContextImpl *pImpl = Context.pImpl;
FoldingSetNodeID ID;
ID.AddInteger(Kind);
ID.AddInteger(Val.size());
for (auto &CR : Val) {
CR.getLower().Profile(ID);
CR.getUpper().Profile(ID);
}

void *InsertPoint;
AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint);

if (!PA) {
// If we didn't find any existing attributes of the same shape then create a
// new one and insert it.
// ConstantRangeListAttributeImpl is a dynamically sized class and cannot
// use SpecificBumpPtrAllocator. Instead, we use normal Alloc for
// allocation and record the allocated pointer in
// `ConstantRangeListAttributes`. LLVMContext destructor will call the
// destructor of the allocated pointer explicitly.
void *Mem = pImpl->Alloc.Allocate(
ConstantRangeListAttributeImpl::totalSizeToAlloc(Val),
alignof(ConstantRangeListAttributeImpl));
PA = new (Mem) ConstantRangeListAttributeImpl(Kind, Val);
pImpl->AttrsSet.InsertNode(PA, InsertPoint);
pImpl->ConstantRangeListAttributes.push_back(
reinterpret_cast<ConstantRangeListAttributeImpl *>(PA));
}

// Return the Attribute that we found or created.
return Attribute(PA);
}

Attribute Attribute::getWithAlignment(LLVMContext &Context, Align A) {
assert(A <= llvm::Value::MaximumAlignment && "Alignment too large.");
return get(Context, Alignment, A.value());
Expand Down Expand Up @@ -317,10 +355,14 @@ bool Attribute::isConstantRangeAttribute() const {
return pImpl && pImpl->isConstantRangeAttribute();
}

bool Attribute::isConstantRangeListAttribute() const {
return pImpl && pImpl->isConstantRangeListAttribute();
}

Attribute::AttrKind Attribute::getKindAsEnum() const {
if (!pImpl) return None;
assert((isEnumAttribute() || isIntAttribute() || isTypeAttribute() ||
isConstantRangeAttribute()) &&
isConstantRangeAttribute() || isConstantRangeListAttribute()) &&
"Invalid attribute type to get the kind as an enum!");
return pImpl->getKindAsEnum();
}
Expand Down Expand Up @@ -366,6 +408,12 @@ const ConstantRange &Attribute::getValueAsConstantRange() const {
return pImpl->getValueAsConstantRange();
}

ArrayRef<ConstantRange> Attribute::getValueAsConstantRangeList() const {
assert(isConstantRangeListAttribute() &&
"Invalid attribute type to get the value as a ConstantRangeList!");
return pImpl->getValueAsConstantRangeList();
}

bool Attribute::hasAttribute(AttrKind Kind) const {
return (pImpl && pImpl->hasAttribute(Kind)) || (!pImpl && Kind == None);
}
Expand Down Expand Up @@ -450,6 +498,12 @@ const ConstantRange &Attribute::getRange() const {
return pImpl->getValueAsConstantRange();
}

ArrayRef<ConstantRange> Attribute::getInitializes() const {
assert(hasAttribute(Attribute::Initializes) &&
"Trying to get initializes attr from non-ConstantRangeList attribute");
return pImpl->getValueAsConstantRangeList();
}

static const char *getModRefStr(ModRefInfo MR) {
switch (MR) {
case ModRefInfo::NoModRef:
Expand Down Expand Up @@ -611,6 +665,17 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return Result;
}

if (hasAttribute(Attribute::Initializes)) {
std::string Result;
raw_string_ostream OS(Result);
ConstantRangeList CRL = getInitializes();
OS << "initializes(";
CRL.print(OS);
OS << ")";
OS.flush();
return Result;
}

// Convert target-dependent attributes to strings of the form:
//
// "kind"
Expand Down Expand Up @@ -701,7 +766,7 @@ bool AttributeImpl::hasAttribute(StringRef Kind) const {

Attribute::AttrKind AttributeImpl::getKindAsEnum() const {
assert(isEnumAttribute() || isIntAttribute() || isTypeAttribute() ||
isConstantRangeAttribute());
isConstantRangeAttribute() || isConstantRangeListAttribute());
return static_cast<const EnumAttributeImpl *>(this)->getEnumKind();
}

Expand Down Expand Up @@ -736,6 +801,12 @@ const ConstantRange &AttributeImpl::getValueAsConstantRange() const {
->getConstantRangeValue();
}

ArrayRef<ConstantRange> AttributeImpl::getValueAsConstantRangeList() const {
assert(isConstantRangeListAttribute());
return static_cast<const ConstantRangeListAttributeImpl *>(this)
->getConstantRangeListValue();
}

bool AttributeImpl::operator<(const AttributeImpl &AI) const {
if (this == &AI)
return false;
Expand All @@ -750,6 +821,8 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const {
assert(!AI.isEnumAttribute() && "Non-unique attribute");
assert(!AI.isTypeAttribute() && "Comparison of types would be unstable");
assert(!AI.isConstantRangeAttribute() && "Unclear how to compare ranges");
assert(!AI.isConstantRangeListAttribute() &&
"Unclear how to compare range list");
// TODO: Is this actually needed?
assert(AI.isIntAttribute() && "Only possibility left");
return getValueAsInt() < AI.getValueAsInt();
Expand Down Expand Up @@ -1954,6 +2027,16 @@ AttrBuilder &AttrBuilder::addRangeAttr(const ConstantRange &CR) {
return addConstantRangeAttr(Attribute::Range, CR);
}

AttrBuilder &
AttrBuilder::addConstantRangeListAttr(Attribute::AttrKind Kind,
ArrayRef<ConstantRange> Val) {
return addAttribute(Attribute::get(Ctx, Kind, Val));
}

AttrBuilder &AttrBuilder::addInitializesAttr(const ConstantRangeList &CRL) {
return addConstantRangeListAttr(Attribute::Initializes, CRL.rangesRef());
}

AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
// TODO: Could make this O(n) as we're merging two sorted lists.
for (const auto &I : B.attrs())
Expand Down Expand Up @@ -2042,7 +2125,8 @@ AttributeMask AttributeFuncs::typeIncompatible(Type *Ty,
.addAttribute(Attribute::Dereferenceable)
.addAttribute(Attribute::DereferenceableOrNull)
.addAttribute(Attribute::Writable)
.addAttribute(Attribute::DeadOnUnwind);
.addAttribute(Attribute::DeadOnUnwind)
.addAttribute(Attribute::Initializes);
if (ASK & ASK_UNSAFE_TO_DROP)
Incompatible.addAttribute(Attribute::Nest)
.addAttribute(Attribute::SwiftError)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/IR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ add_llvm_component_library(LLVMCore
Comdat.cpp
ConstantFold.cpp
ConstantRange.cpp
ConstantRangeList.cpp
Constants.cpp
ConvergenceVerifier.cpp
Core.cpp
Expand Down
95 changes: 95 additions & 0 deletions llvm/lib/IR/ConstantRangeList.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
//===- ConstantRangeList.cpp - ConstantRangeList implementation -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/IR/ConstantRangeList.h"
#include <cstddef>

using namespace llvm;

bool ConstantRangeList::isOrderedRanges(ArrayRef<ConstantRange> RangesRef) {
if (RangesRef.empty())
return true;
auto Range = RangesRef[0];
if (Range.getLower().sge(Range.getUpper()))
return false;
for (unsigned i = 1; i < RangesRef.size(); i++) {
auto CurRange = RangesRef[i];
auto PreRange = RangesRef[i - 1];
if (CurRange.getLower().sge(CurRange.getUpper()) ||
CurRange.getLower().sle(PreRange.getUpper()))
return false;
}
return true;
}

std::optional<ConstantRangeList>
ConstantRangeList::getConstantRangeList(ArrayRef<ConstantRange> RangesRef) {
if (!isOrderedRanges(RangesRef))
return std::nullopt;
return ConstantRangeList(RangesRef);
}

void ConstantRangeList::insert(const ConstantRange &NewRange) {
if (NewRange.isEmptySet())
return;
assert(!NewRange.isFullSet() && "Do not support full set");
assert(NewRange.getLower().slt(NewRange.getUpper()));
assert(getBitWidth() == NewRange.getBitWidth());
// Handle common cases.
if (empty() || Ranges.back().getUpper().slt(NewRange.getLower())) {
Ranges.push_back(NewRange);
return;
}
if (NewRange.getUpper().slt(Ranges.front().getLower())) {
Ranges.insert(Ranges.begin(), NewRange);
return;
}

auto LowerBound = lower_bound(
Ranges, NewRange, [](const ConstantRange &a, const ConstantRange &b) {
return a.getLower().slt(b.getLower());
});
if (LowerBound != Ranges.end() && LowerBound->contains(NewRange))
return;

// Slow insert.
SmallVector<ConstantRange, 2> ExistingTail(LowerBound, Ranges.end());
Ranges.erase(LowerBound, Ranges.end());
// Merge consecutive ranges.
if (!Ranges.empty() && NewRange.getLower().sle(Ranges.back().getUpper())) {
APInt NewLower = Ranges.back().getLower();
APInt NewUpper =
APIntOps::smax(NewRange.getUpper(), Ranges.back().getUpper());
Ranges.back() = ConstantRange(NewLower, NewUpper);
} else {
Ranges.push_back(NewRange);
}
for (auto Iter = ExistingTail.begin(); Iter != ExistingTail.end(); Iter++) {
if (Ranges.back().getUpper().slt(Iter->getLower())) {
Ranges.push_back(*Iter);
} else {
APInt NewLower = Ranges.back().getLower();
APInt NewUpper =
APIntOps::smax(Iter->getUpper(), Ranges.back().getUpper());
Ranges.back() = ConstantRange(NewLower, NewUpper);
}
}
}

void ConstantRangeList::print(raw_ostream &OS) const {
interleaveComma(Ranges, OS, [&](ConstantRange CR) {
OS << "(" << CR.getLower() << ", " << CR.getUpper() << ")";
});
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void ConstantRangeList::dump() const {
print(dbgs());
dbgs() << '\n';
}
#endif
3 changes: 3 additions & 0 deletions llvm/lib/IR/LLVMContextImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ LLVMContextImpl::~LLVMContextImpl() {
// Destroy MDNodes.
for (MDNode *I : DistinctMDNodes)
I->deleteAsSubclass();

for (auto *ConstantRangeListAttribute : ConstantRangeListAttributes)
ConstantRangeListAttribute->~ConstantRangeListAttributeImpl();
#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \
for (CLASS * I : CLASS##s) \
delete I;
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/IR/LLVMContextImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class AttributeListImpl;
class AttributeSetNode;
class BasicBlock;
class ConstantRangeAttributeImpl;
class ConstantRangeListAttributeImpl;
struct DiagnosticHandler;
class DbgMarker;
class ElementCount;
Expand Down Expand Up @@ -1534,6 +1535,13 @@ class LLVMContextImpl {
// them on context teardown.
std::vector<MDNode *> DistinctMDNodes;

// ConstantRangeListAttributeImpl is a TrailingObjects/ArrayRef of
// ConstantRange. Since this is a dynamically sized class, it's not
// possible to use SpecificBumpPtrAllocator. Instead, we use normal Alloc
// for allocation and record all allocated pointers in this vector. In the
// LLVMContext destructor, call the destuctors of everything in the vector.
std::vector<ConstantRangeListAttributeImpl *> ConstantRangeListAttributes;

DenseMap<Type *, std::unique_ptr<ConstantAggregateZero>> CAZConstants;

using ArrayConstantsTy = ConstantUniqueMap<ConstantArray>;
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/IR/Verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
#include "llvm/IR/Comdat.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/ConstantRangeList.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/ConvergenceVerifier.h"
#include "llvm/IR/DataLayout.h"
Expand Down Expand Up @@ -2059,6 +2060,14 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
}
}

if (Attrs.hasAttribute(Attribute::Initializes)) {
auto Inits = Attrs.getAttribute(Attribute::Initializes).getInitializes();
Check(!Inits.empty(), "Attribute 'initializes' does not support empty list",
V);
Check(ConstantRangeList::isOrderedRanges(Inits),
"Attribute 'initializes' does not support unordered ranges", V);
}

if (Attrs.hasAttribute(Attribute::NoFPClass)) {
uint64_t Val = Attrs.getAttribute(Attribute::NoFPClass).getValueAsInt();
Check(Val != 0, "Attribute 'nofpclass' must have at least one test bit set",
Expand Down
72 changes: 32 additions & 40 deletions llvm/lib/Target/AMDGPU/BUFInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@ multiclass MUBUF_Pseudo_Load_Pats_Common<string BaseInst, ValueType load_vt = i3
}

multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag>{
let SubtargetPredicate = HasUnrestrictedSOffset in {
let OtherPredicates = [HasUnrestrictedSOffset] in {
defm : MUBUF_Pseudo_Load_Pats_Common<BaseInst, load_vt, ld>;
}
defm : MUBUF_Pseudo_Load_Pats_Common<BaseInst # "_VBUFFER", load_vt, ld>;
Expand Down Expand Up @@ -629,7 +629,7 @@ multiclass MUBUF_Pseudo_Store_Pats_Common<string BaseInst, ValueType store_vt =
}

multiclass MUBUF_Pseudo_Store_Pats<string BaseInst, ValueType store_vt = i32, SDPatternOperator st = null_frag> {
let SubtargetPredicate = HasUnrestrictedSOffset in {
let OtherPredicates = [HasUnrestrictedSOffset] in {
defm : MUBUF_Pseudo_Store_Pats_Common<BaseInst, store_vt, st>;
}
defm : MUBUF_Pseudo_Store_Pats_Common<BaseInst # "_VBUFFER", store_vt, st>;
Expand Down Expand Up @@ -1227,12 +1227,12 @@ defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
"buffer_atomic_pk_add_f16", VGPR_32, v2f16
>;

let OtherPredicates = [HasAtomicFaddRtnInsts] in
let SubtargetPredicate = HasAtomicFaddRtnInsts in
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_RTN<
"buffer_atomic_add_f32", VGPR_32, f32, null_frag
>;

let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in
let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_RTN <
"buffer_atomic_pk_add_f16", VGPR_32, v2f16, null_frag
>;
Expand Down Expand Up @@ -1699,9 +1699,11 @@ multiclass SIBufferAtomicPat_Common<string OpPrefix, ValueType vt, string Inst,

multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
list<string> RtnModes = ["ret", "noret"]> {
let SubtargetPredicate = HasUnrestrictedSOffset in {
let OtherPredicates = [HasUnrestrictedSOffset] in {
defm : SIBufferAtomicPat_Common<OpPrefix, vt, Inst, RtnModes>;
}

// FIXME: This needs a !HasUnrestrictedSOffset predicate
defm : SIBufferAtomicPat_Common<OpPrefix, vt, Inst # "_VBUFFER", RtnModes>;
}

Expand Down Expand Up @@ -1732,18 +1734,19 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i64, "BUFFER_ATOMIC_XOR_X2">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i64, "BUFFER_ATOMIC_INC_X2">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">;

let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
let SubtargetPredicate = HasAtomicCSubNoRtnInsts in
defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["noret"]>;

let SubtargetPredicate = isGFX12Plus in {
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2bf16, "BUFFER_ATOMIC_PK_ADD_BF16_VBUFFER">;
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["ret"]>;
}

let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["noret"]>;
let SubtargetPredicate = HasAtomicCSubNoRtnInsts in {
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["noret"]>;
}

let OtherPredicates = [isGFX6GFX7GFX10Plus] in {
let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
}
Expand Down Expand Up @@ -1803,29 +1806,21 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
defm : BufferAtomicPatterns_NO_RTN_Common<name, vt, opcode # "_VBUFFER">;
}

let OtherPredicates = [HasAtomicFaddNoRtnInsts] in
let SubtargetPredicate = HasAtomicFaddNoRtnInsts in
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32", ["noret"]>;

let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in {
let SubtargetPredicate = isGFX9Only in
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["noret"]>;

let SubtargetPredicate = isGFX12Plus in
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16_VBUFFER", ["noret"]>;
} // End OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts]
let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["noret"]>;
} // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts

let OtherPredicates = [HasAtomicFaddRtnInsts] in
let SubtargetPredicate = HasAtomicFaddRtnInsts in
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32", ["ret"]>;

let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in {
let SubtargetPredicate = isGFX9Only in
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>;

let SubtargetPredicate = isGFX12Plus in
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16_VBUFFER", ["ret"]>;
} // End OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts]
let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>;
} // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts

let OtherPredicates = [HasBufferFlatGlobalAtomicsF64] in {
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
Expand Down Expand Up @@ -1901,7 +1896,7 @@ multiclass SIBufferAtomicCmpSwapPat_Common<ValueType vt, ValueType data_vt, stri
}

multiclass SIBufferAtomicCmpSwapPat<ValueType vt, ValueType data_vt, string Inst> {
let SubtargetPredicate = HasUnrestrictedSOffset in {
let OtherPredicates = [HasUnrestrictedSOffset] in {
defm : SIBufferAtomicCmpSwapPat_Common<vt, data_vt, Inst>;
}
defm : SIBufferAtomicCmpSwapPat_Common<vt, data_vt, Inst # "_VBUFFER">;
Expand Down Expand Up @@ -1952,7 +1947,7 @@ multiclass MUBUFLoad_PatternOffset_Common <string Instr, ValueType vt,

multiclass MUBUFLoad_PatternOffset <string Instr, ValueType vt,
PatFrag ld> {
let SubtargetPredicate = HasUnrestrictedSOffset in {
let OtherPredicates = [HasUnrestrictedSOffset] in {
defm : MUBUFLoad_PatternOffset_Common<Instr, vt, ld>;
}
defm : MUBUFLoad_PatternOffset_Common<Instr # "_VBUFFER", vt, ld>;
Expand Down Expand Up @@ -2193,7 +2188,7 @@ multiclass MTBUF_LoadIntrinsicPat_Common<SDPatternOperator name, ValueType vt,

multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode, ValueType memoryVt = vt> {
let SubtargetPredicate = HasUnrestrictedSOffset in {
let OtherPredicates = [HasUnrestrictedSOffset] in {
defm : MTBUF_LoadIntrinsicPat_Common<name, vt, opcode, memoryVt>;
}
defm : MTBUF_LoadIntrinsicPat_Common<name, vt, opcode # "_VBUFFER", memoryVt>;
Expand All @@ -2208,15 +2203,15 @@ defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2f32, "TBUFFER_LOAD_FORMAT_XY">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v3f32, "TBUFFER_LOAD_FORMAT_XYZ">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4f32, "TBUFFER_LOAD_FORMAT_XYZW">;

let OtherPredicates = [HasUnpackedD16VMem] in {
let SubtargetPredicate = HasUnpackedD16VMem in {
defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, i32, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, v2i32, "TBUFFER_LOAD_FORMAT_D16_XY_gfx80">;
defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, v3i32, "TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80">;
defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, v4i32, "TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.

let OtherPredicates = [HasPackedD16VMem] in {
let SubtargetPredicate = HasPackedD16VMem in {
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, i32, "TBUFFER_LOAD_FORMAT_D16_X">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2f16, "TBUFFER_LOAD_FORMAT_D16_XY">;
Expand Down Expand Up @@ -2265,7 +2260,7 @@ multiclass MTBUF_StoreIntrinsicPat_Common<SDPatternOperator name, ValueType vt,

multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode, ValueType memoryVt = vt> {
let SubtargetPredicate = HasUnrestrictedSOffset in {
let OtherPredicates = [HasUnrestrictedSOffset] in {
defm : MTBUF_StoreIntrinsicPat_Common<name, vt, opcode, memoryVt>;
}
defm : MTBUF_StoreIntrinsicPat_Common<name, vt, opcode # "_VBUFFER", memoryVt>;
Expand All @@ -2280,15 +2275,15 @@ defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2f32, "TBUFFER_STORE_FORMAT_XY"
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v3f32, "TBUFFER_STORE_FORMAT_XYZ">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4f32, "TBUFFER_STORE_FORMAT_XYZW">;

let OtherPredicates = [HasUnpackedD16VMem] in {
let SubtargetPredicate = HasUnpackedD16VMem in {
defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X_gfx80">;
defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, i32, "TBUFFER_STORE_FORMAT_D16_X_gfx80">;
defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, v2i32, "TBUFFER_STORE_FORMAT_D16_XY_gfx80">;
defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, v3i32, "TBUFFER_STORE_FORMAT_D16_XYZ_gfx80">;
defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, v4i32, "TBUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.

let OtherPredicates = [HasPackedD16VMem] in {
let SubtargetPredicate = HasPackedD16VMem in {
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, i32, "TBUFFER_STORE_FORMAT_D16_X">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2f16, "TBUFFER_STORE_FORMAT_D16_XY">;
Expand Down Expand Up @@ -3082,9 +3077,9 @@ multiclass MUBUF_Real_vi_gfx90a<bits<7> op, bit isTFE = 0> : MUBUF_Real_vi<op> {
}

if ps.FPAtomic then {
let SubtargetPredicate = isGFX90AOnly,
AssemblerPredicate = isGFX90AOnly in
defm NAME : MUBUF_Real_gfx90a<op, 0>;
let AssemblerPredicate = isGFX90AOnly in
defm NAME : MUBUF_Real_gfx90a<op, 0>;

def _gfx940 : MUBUF_Real_gfx940<op, ps>;
}
}
Expand Down Expand Up @@ -3267,10 +3262,7 @@ defm BUFFER_WBINVL1_VOL : MUBUF_Real_vi <0x3f>;


defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>;

let SubtargetPredicate = HasAtomicFaddNoRtnInsts in {
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomic_vi <0x4d>;
} // End SubtargetPredicate = HasAtomicFaddNoRtnInsts

let SubtargetPredicate = isGFX90APlus in {
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_vi<0x4f>;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Utils/CodeExtractor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1001,6 +1001,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::Writable:
case Attribute::DeadOnUnwind:
case Attribute::Range:
case Attribute::Initializes:
// These are not really attributes.
case Attribute::None:
case Attribute::EndAttrKinds:
Expand Down
17 changes: 1 addition & 16 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,16 +344,6 @@ class LoopVectorizationPlanner {
/// A builder used to construct the current plan.
VPBuilder Builder;

/// Computes the cost of \p Plan for vectorization factor \p VF.
///
/// The current implementation requires access to the
/// LoopVectorizationLegality to handle inductions and reductions, which is
/// why it is kept separate from the VPlan-only cost infrastructure.
///
/// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
/// been retired.
InstructionCost cost(VPlan &Plan, ElementCount VF) const;

public:
LoopVectorizationPlanner(
Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
Expand All @@ -375,9 +365,6 @@ class LoopVectorizationPlanner {
/// Return the best VPlan for \p VF.
VPlan &getBestPlanFor(ElementCount VF) const;

/// Return the most profitable plan and fix its VF to the most profitable one.
VPlan &getBestPlan() const;

/// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
/// according to the best selected \p VF and \p UF.
///
Expand Down Expand Up @@ -456,9 +443,7 @@ class LoopVectorizationPlanner {
ElementCount MinVF);

/// \return The most profitable vectorization factor and the cost of that VF.
/// This method checks every VF in \p CandidateVFs. This is now only used to
/// verify the decisions by the new VPlan-based cost-model and will be retired
/// once the VPlan-based cost-model is stabilized.
/// This method checks every VF in \p CandidateVFs.
VectorizationFactor
selectVectorizationFactor(const ElementCountSet &CandidateVFs);

Expand Down
229 changes: 22 additions & 207 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ static cl::opt<unsigned> ForceTargetMaxVectorInterleaveFactor(
cl::desc("A flag that overrides the target's max interleave factor for "
"vectorized loops."));

cl::opt<unsigned> ForceTargetInstructionCost(
static cl::opt<unsigned> ForceTargetInstructionCost(
"force-target-instruction-cost", cl::init(0), cl::Hidden,
cl::desc("A flag that overrides the target's expected cost for "
"an instruction to a single constant value. Mostly "
Expand Down Expand Up @@ -412,6 +412,14 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
}

/// A helper function that returns the reciprocal of the block probability of
/// predicated blocks. If we return X, we are assuming the predicated block
/// will execute once for every X iterations of the loop header.
///
/// TODO: We should use actual block probability here, if available. Currently,
/// we always assume predicated blocks have a 50% chance of executing.
static unsigned getReciprocalPredBlockProb() { return 2; }

/// Returns "best known" trip count for the specified loop \p L as defined by
/// the following procedure:
/// 1) Returns exact trip count if it is known.
Expand Down Expand Up @@ -1613,16 +1621,6 @@ class LoopVectorizationCostModel {
/// \p VF is the vectorization factor chosen for the original loop.
bool isEpilogueVectorizationProfitable(const ElementCount VF) const;

/// Return the cost of instructions in an inloop reduction pattern, if I is
/// part of that pattern.
std::optional<InstructionCost>
getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy,
TTI::TargetCostKind CostKind) const;

/// Returns the execution time cost of an instruction for a given vector
/// width. Vector width of one means scalar.
VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);

private:
unsigned NumPredStores = 0;

Expand All @@ -1648,11 +1646,21 @@ class LoopVectorizationCostModel {
/// of elements.
ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements);

/// Returns the execution time cost of an instruction for a given vector
/// width. Vector width of one means scalar.
VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);

/// The cost-computation logic from getInstructionCost which provides
/// the vector type as an output parameter.
InstructionCost getInstructionCost(Instruction *I, ElementCount VF,
Type *&VectorTy);

/// Return the cost of instructions in an inloop reduction pattern, if I is
/// part of that pattern.
std::optional<InstructionCost>
getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy,
TTI::TargetCostKind CostKind) const;

/// Calculate vectorization cost of memory instruction \p I.
InstructionCost getMemoryInstructionCost(Instruction *I, ElementCount VF);

Expand Down Expand Up @@ -7289,10 +7297,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
if (!MaxFactors.hasVector())
return VectorizationFactor::Disabled();

// Select the optimal vectorization factor according to the legacy cost-model.
// This is now only used to verify the decisions by the new VPlan-based
// cost-model and will be retired once the VPlan-based cost-model is
// stabilized.
// Select the optimal vectorization factor.
VectorizationFactor VF = selectVectorizationFactor(VFCandidates);
assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
if (!hasPlanWithVF(VF.Width)) {
Expand All @@ -7303,189 +7308,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
return VF;
}

InstructionCost VPCostContext::getLegacyCost(Instruction *UI,
ElementCount VF) const {
return CM.getInstructionCost(UI, VF).first;
}

bool VPCostContext::skipCostComputation(Instruction *UI, bool IsVector) const {
return (IsVector && CM.VecValuesToIgnore.contains(UI)) ||
SkipCostComputation.contains(UI);
}

InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
ElementCount VF) const {
InstructionCost Cost = 0;
LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext();
VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx, CM);

// Cost modeling for inductions is inaccurate in the legacy cost model
// compared to the recipes that are generated. To match here initially during
// VPlan cost model bring up directly use the induction costs from the legacy
// cost model. Note that we do this as pre-processing; the VPlan may not have
// any recipes associated with the original induction increment instruction
// and may replace truncates with VPWidenIntOrFpInductionRecipe. We precompute
// the cost of induction phis and increments (both that are represented by
// recipes and those that are not), to avoid distinguishing between them here,
// and skip all recipes that represent induction phis and increments (the
// former case) later on, if they exist, to avoid counting them twice.
// Similarly we pre-compute the cost of any optimized truncates.
// TODO: Switch to more accurate costing based on VPlan.
for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
Instruction *IVInc = cast<Instruction>(
IV->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
SmallVector<Instruction *> IVInsts = {IV, IVInc};
for (User *U : IV->users()) {
auto *CI = cast<Instruction>(U);
if (!CostCtx.CM.isOptimizableIVTruncate(CI, VF))
continue;
IVInsts.push_back(CI);
}
for (Instruction *IVInst : IVInsts) {
if (!CostCtx.SkipCostComputation.insert(IVInst).second)
continue;
InstructionCost InductionCost = CostCtx.getLegacyCost(IVInst, VF);
LLVM_DEBUG({
dbgs() << "Cost of " << InductionCost << " for VF " << VF
<< ": induction instruction " << *IVInst << "\n";
});
Cost += InductionCost;
}
}

/// Compute the cost of all exiting conditions of the loop using the legacy
/// cost model. This is to match the legacy behavior, which adds the cost of
/// all exit conditions. Note that this over-estimates the cost, as there will
/// be a single condition to control the vector loop.
SmallVector<BasicBlock *> Exiting;
CM.TheLoop->getExitingBlocks(Exiting);
SetVector<Instruction *> ExitInstrs;
// Collect all exit conditions.
for (BasicBlock *EB : Exiting) {
auto *Term = dyn_cast<BranchInst>(EB->getTerminator());
if (!Term)
continue;
if (auto *CondI = dyn_cast<Instruction>(Term->getOperand(0))) {
ExitInstrs.insert(CondI);
}
}
// Compute the cost of all instructions only feeding the exit conditions.
for (unsigned I = 0; I != ExitInstrs.size(); ++I) {
Instruction *CondI = ExitInstrs[I];
if (!OrigLoop->contains(CondI) ||
!CostCtx.SkipCostComputation.insert(CondI).second)
continue;
Cost += CostCtx.getLegacyCost(CondI, VF);
for (Value *Op : CondI->operands()) {
auto *OpI = dyn_cast<Instruction>(Op);
if (!OpI || any_of(OpI->users(), [&ExitInstrs](User *U) {
return !ExitInstrs.contains(cast<Instruction>(U));
}))
continue;
ExitInstrs.insert(OpI);
}
}

// The legacy cost model has special logic to compute the cost of in-loop
// reductions, which may be smaller than the sum of all instructions involved
// in the reduction. For AnyOf reductions, VPlan codegen may remove the select
// which the legacy cost model uses to assign cost. Pre-compute their costs
// for now.
// TODO: Switch to costing based on VPlan once the logic has been ported.
for (const auto &[RedPhi, RdxDesc] : Legal->getReductionVars()) {
if (!CM.isInLoopReduction(RedPhi) &&
!RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind()))
continue;

// AnyOf reduction codegen may remove the select. To match the legacy cost
// model, pre-compute the cost for AnyOf reductions here.
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
auto *Select = cast<SelectInst>(*find_if(
RedPhi->users(), [](User *U) { return isa<SelectInst>(U); }));
assert(!CostCtx.SkipCostComputation.contains(Select) &&
"reduction op visited multiple times");
CostCtx.SkipCostComputation.insert(Select);
auto ReductionCost = CostCtx.getLegacyCost(Select, VF);
LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF
<< ":\n any-of reduction " << *Select << "\n");
Cost += ReductionCost;
continue;
}

const auto &ChainOps = RdxDesc.getReductionOpChain(RedPhi, OrigLoop);
SetVector<Instruction *> ChainOpsAndOperands(ChainOps.begin(),
ChainOps.end());
// Also include the operands of instructions in the chain, as the cost-model
// may mark extends as free.
for (auto *ChainOp : ChainOps) {
for (Value *Op : ChainOp->operands()) {
if (auto *I = dyn_cast<Instruction>(Op))
ChainOpsAndOperands.insert(I);
}
}

// Pre-compute the cost for I, if it has a reduction pattern cost.
for (Instruction *I : ChainOpsAndOperands) {
auto ReductionCost = CM.getReductionPatternCost(
I, VF, ToVectorTy(I->getType(), VF), TTI::TCK_RecipThroughput);
if (!ReductionCost)
continue;

assert(!CostCtx.SkipCostComputation.contains(I) &&
"reduction op visited multiple times");
CostCtx.SkipCostComputation.insert(I);
LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF
<< ":\n in-loop reduction " << *I << "\n");
Cost += *ReductionCost;
}
}

// Now compute and add the VPlan-based cost.
Cost += Plan.cost(VF, CostCtx);
LLVM_DEBUG(dbgs() << "Cost for VF " << VF << ": " << Cost << "\n");
return Cost;
}

VPlan &LoopVectorizationPlanner::getBestPlan() const {
// If there is a single VPlan with a single VF, return it directly.
VPlan &FirstPlan = *VPlans[0];
if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1)
return FirstPlan;

VPlan *BestPlan = &FirstPlan;
ElementCount ScalarVF = ElementCount::getFixed(1);
assert(hasPlanWithVF(ScalarVF) &&
"More than a single plan/VF w/o any plan having scalar VF");

InstructionCost ScalarCost = cost(getBestPlanFor(ScalarVF), ScalarVF);
VectorizationFactor BestFactor(ScalarVF, ScalarCost, ScalarCost);

bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled;
if (ForceVectorization) {
// Ignore scalar width, because the user explicitly wants vectorization.
// Initialize cost to max so that VF = 2 is, at least, chosen during cost
// evaluation.
BestFactor.Cost = InstructionCost::getMax();
}

for (auto &P : VPlans) {
for (ElementCount VF : P->vectorFactors()) {
if (VF.isScalar())
continue;
InstructionCost Cost = cost(*P, VF);
VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
if (isMoreProfitable(CurrentFactor, BestFactor)) {
BestFactor = CurrentFactor;
BestPlan = &*P;
}
}
}
BestPlan->setVF(BestFactor.Width);
return *BestPlan;
}

VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
assert(count_if(VPlans,
[VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
Expand Down Expand Up @@ -10344,15 +10166,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
PSI, Checks);

VPlan &BestPlan = LVP.getBestPlan();
assert(size(BestPlan.vectorFactors()) == 1 &&
"Plan should have a single VF");
ElementCount Width = *BestPlan.vectorFactors().begin();
LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width
<< "\n");
assert(VF.Width == Width &&
"VPlan cost model and legacy cost model disagreed");
LVP.executePlan(Width, IC, BestPlan, LB, DT, false);
VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
++LoopsVectorized;

// Add metadata to disable runtime unrolling a scalar loop when there
Expand Down
89 changes: 1 addition & 88 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -752,72 +752,6 @@ void VPRegionBlock::execute(VPTransformState *State) {
State->Instance.reset();
}

InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) {
InstructionCost Cost = 0;
for (VPRecipeBase &R : Recipes)
Cost += R.cost(VF, Ctx);
return Cost;
}

InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) {
if (!isReplicator()) {
InstructionCost Cost = 0;
for (VPBlockBase *Block : vp_depth_first_shallow(getEntry()))
Cost += Block->cost(VF, Ctx);
InstructionCost BackedgeCost =
Ctx.TTI.getCFInstrCost(Instruction::Br, TTI::TCK_RecipThroughput);
LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
<< ": vector loop backedge\n");
Cost += BackedgeCost;
return Cost;
}

// Compute the cost of a replicate region. Replicating isn't supported for
// scalable vectors, return an invalid cost for them.
// TODO: Discard scalable VPlans with replicate recipes earlier after
// construction.
if (VF.isScalable())
return InstructionCost::getInvalid();

// First compute the cost of the conditionally executed recipes, followed by
// account for the branching cost, except if the mask is a header mask or
// uniform condition.
using namespace llvm::VPlanPatternMatch;
VPBasicBlock *Then = cast<VPBasicBlock>(getEntry()->getSuccessors()[0]);
InstructionCost ThenCost = Then->cost(VF, Ctx);

// Note the cost estimates below closely match the current legacy cost model.
auto *BOM = cast<VPBranchOnMaskRecipe>(&getEntryBasicBlock()->front());
VPValue *Cond = BOM->getOperand(0);

// Check if Cond is a header mask and don't account for branching costs as the
// header mask will always be true except in the last iteration.
if (vputils::isHeaderMask(Cond, *getPlan()))
return ThenCost;

// For the scalar case, we may not always execute the original predicated
// block, Thus, scale the block's cost by the probability of executing it.
if (VF.isScalar())
return ThenCost / getReciprocalPredBlockProb();

// Check if Cond is a uniform compare and don't account for branching costs as
// a uniform condition corresponds to a single branch per VF.
if (vputils::isUniformBoolean(Cond))
return ThenCost;

// Add the cost for branches around scalarized and predicated blocks.
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;

auto *Vec_i1Ty = VectorType::get(IntegerType::getInt1Ty(Ctx.LLVMCtx), VF);
auto FixedVF = VF.getFixedValue(); // Known to be non scalable.
InstructionCost Cost = ThenCost;
Cost += Ctx.TTI.getScalarizationOverhead(Vec_i1Ty, APInt::getAllOnes(FixedVF),
/*Insert*/ false, /*Extract*/ true,
CostKind);
Cost += Ctx.TTI.getCFInstrCost(Instruction::Br, CostKind) * FixedVF;
return Cost;
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
Expand Down Expand Up @@ -1007,12 +941,6 @@ void VPlan::execute(VPTransformState *State) {
"DT not preserved correctly");
}

InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
// For now only return the cost of the vector loop region, ignoring any other
// blocks, like the preheader or middle blocks.
return getVectorLoopRegion()->cost(VF, Ctx);
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPlan::printLiveIns(raw_ostream &O) const {
VPSlotTracker SlotTracker(this);
Expand Down Expand Up @@ -1555,8 +1483,7 @@ bool vputils::isHeaderMask(VPValue *V, VPlan &Plan) {
auto IsWideCanonicalIV = [](VPValue *A) {
return isa<VPWidenCanonicalIVRecipe>(A) ||
(isa<VPWidenIntOrFpInductionRecipe>(A) &&
cast<VPWidenIntOrFpInductionRecipe>(A)->isCanonical()) ||
match(A, m_ScalarIVSteps(m_CanonicalIV(), m_SpecificInt(1)));
cast<VPWidenIntOrFpInductionRecipe>(A)->isCanonical());
};

VPValue *A, *B;
Expand All @@ -1568,17 +1495,3 @@ bool vputils::isHeaderMask(VPValue *V, VPlan &Plan) {
return match(V, m_Binary<Instruction::ICmp>(m_VPValue(A), m_VPValue(B))) &&
IsWideCanonicalIV(A) && B == Plan.getOrCreateBackedgeTakenCount();
}

bool vputils::isUniformBoolean(VPValue *Cond) {
if (match(Cond, m_Not(m_VPValue())))
Cond = Cond->getDefiningRecipe()->getOperand(0);
auto *R = Cond->getDefiningRecipe();
if (!R)
return true;
// TODO: match additional patterns preserving uniformity of booleans, e.g.,
// AND/OR/etc.
return match(R, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue())) &&
all_of(R->operands(), [](VPValue *Op) {
return vputils::isUniformAfterVectorization(Op);
});
}
Loading