16 changes: 8 additions & 8 deletions compiler-rt/test/hwasan/TestCases/allocator_returns_null.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,21 +87,21 @@ int main(int argc, char **argv) {
}

// CHECK-mCRASH: malloc:
// CHECK-mCRASH: SUMMARY: HWAddressSanitizer: allocation-size-too-big
// CHECK-mCRASH: SUMMARY: HWAddressSanitizer: allocation-size-too-big {{.*}} in malloc
// CHECK-cCRASH: calloc:
// CHECK-cCRASH: SUMMARY: HWAddressSanitizer: allocation-size-too-big
// CHECK-cCRASH: SUMMARY: HWAddressSanitizer: allocation-size-too-big {{.*}} in calloc
// CHECK-coCRASH: calloc-overflow:
// CHECK-coCRASH: SUMMARY: HWAddressSanitizer: calloc-overflow
// CHECK-coCRASH: SUMMARY: HWAddressSanitizer: calloc-overflow {{.*}} in calloc
// CHECK-rCRASH: realloc:
// CHECK-rCRASH: SUMMARY: HWAddressSanitizer: allocation-size-too-big
// CHECK-rCRASH: SUMMARY: HWAddressSanitizer: allocation-size-too-big {{.*}} in realloc
// CHECK-mrCRASH: realloc-after-malloc:
// CHECK-mrCRASH: SUMMARY: HWAddressSanitizer: allocation-size-too-big
// CHECK-mrCRASH: SUMMARY: HWAddressSanitizer: allocation-size-too-big {{.*}} in realloc
// CHECK-nCRASH: new:
// CHECK-nCRASH: SUMMARY: HWAddressSanitizer: allocation-size-too-big
// CHECK-nCRASH: SUMMARY: HWAddressSanitizer: allocation-size-too-big {{.*}} in operator new
// CHECK-nCRASH-OOM: new:
// CHECK-nCRASH-OOM: SUMMARY: HWAddressSanitizer: out-of-memory
// CHECK-nCRASH-OOM: SUMMARY: HWAddressSanitizer: out-of-memory {{.*}} in operator new
// CHECK-nnCRASH: new-nothrow:
// CHECK-nnCRASH: SUMMARY: HWAddressSanitizer: allocation-size-too-big
// CHECK-nnCRASH: SUMMARY: HWAddressSanitizer: allocation-size-too-big {{.*}} in operator new

// CHECK-mNULL: malloc:
// CHECK-mNULL: errno: 12
Expand Down
4 changes: 4 additions & 0 deletions libc/include/llvm-libc-types/off_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
#ifndef __LLVM_LIBC_TYPES_OFF_T_H__
#define __LLVM_LIBC_TYPES_OFF_T_H__

#if defined(__LP64__) || defined(__riscv)
typedef __INT64_TYPE__ off_t;
#else
typedef __INT32_TYPE__ off_t;
#endif // __LP64__ || __riscv

#endif // __LLVM_LIBC_TYPES_OFF_T_H__
4 changes: 2 additions & 2 deletions libc/src/__support/HashTable/sse2/bitmask_impl.inc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ struct Group {
LIBC_INLINE IteratableBitMask match_byte(uint8_t byte) const {
auto cmp = _mm_cmpeq_epi8(data, _mm_set1_epi8(byte));
auto bitmask = static_cast<uint16_t>(_mm_movemask_epi8(cmp));
return {bitmask};
return {{bitmask}};
}

LIBC_INLINE BitMask mask_available() const {
Expand All @@ -42,7 +42,7 @@ struct Group {
}

LIBC_INLINE IteratableBitMask occupied() const {
return {static_cast<uint16_t>(~mask_available().word)};
return {{static_cast<uint16_t>(~mask_available().word)}};
}
};
} // namespace internal
Expand Down
3 changes: 2 additions & 1 deletion libc/src/string/memory_utils/op_x86.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,8 @@ template <> LIBC_INLINE uint32_t neq<__m512i>(CPtr p1, CPtr p2, size_t offset) {
const auto a = load<__m512i>(p1, offset);
const auto b = load<__m512i>(p2, offset);
const uint64_t xored = _mm512_cmpneq_epi8_mask(a, b);
return (xored >> 32) | (xored & 0xFFFFFFFF);
return static_cast<uint32_t>(xored >> 32) |
static_cast<uint32_t>(xored & 0xFFFFFFFF);
}
template <>
LIBC_INLINE MemcmpReturnType cmp_neq<__m512i>(CPtr p1, CPtr p2, size_t offset) {
Expand Down
3 changes: 3 additions & 0 deletions libc/src/sys/mman/linux/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ add_entrypoint_object(
libc.include.sys_syscall
libc.src.__support.OSUtil.osutil
libc.src.errno.errno
COMPILE_OPTIONS
# TODO: https://github.com/llvm/llvm-project/issues/77395
-Wno-shorten-64-to-32
)

add_entrypoint_object(
Expand Down
2 changes: 1 addition & 1 deletion libcxx/include/string
Original file line number Diff line number Diff line change
Expand Up @@ -922,7 +922,7 @@ public:
// Turning off ASan instrumentation for variable initialization with _LIBCPP_STRING_INTERNAL_MEMORY_ACCESS
// does not work consistently during initialization of __r_, so we instead unpoison __str's memory manually first.
// __str's memory needs to be unpoisoned only in the case where it's a short string.
: __r_([](basic_string &__s){ if(!__s.__is_long()) __s.__annotate_delete(); return std::move(__s.__r_); }(__str)) {
: __r_(((__str.__is_long() ? 0 : (__str.__annotate_delete(), 0)), std::move(__str.__r_))) {
__str.__r_.first() = __rep();
__str.__annotate_new(0);
if (!__is_long())
Expand Down
174 changes: 100 additions & 74 deletions libcxxabi/src/private_typeinfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
// is_equal() with use_strcmp=false so the string names are not compared.

#include <cstdint>
#include <cassert>
#include <string.h>

#ifdef _LIBCXXABI_FORGIVING_DYNAMIC_CAST
Expand Down Expand Up @@ -160,15 +161,9 @@ const void* dyn_cast_to_derived(const void* static_ptr,
// Fallback to the slow path to check that static_type is a public
// base type of dynamic_type.
// Using giant short cut. Add that information to info.
__dynamic_cast_info info = {
dst_type,
static_ptr,
static_type,
src2dst_offset,
0, 0, 0, 0, 0, 0, 0, 0,
1, // number_of_dst_type
false, false, false
};
__dynamic_cast_info info = {dst_type, static_ptr, static_type, src2dst_offset, 0, 0, 0, 0, 0, 0, 0, 0,
1, // number_of_dst_type
false, false, false, true, nullptr};
// Do the search
dst_type->search_above_dst(&info, dynamic_ptr, dynamic_ptr, public_path, false);
#ifdef _LIBCXXABI_FORGIVING_DYNAMIC_CAST
Expand All @@ -187,13 +182,8 @@ const void* dyn_cast_to_derived(const void* static_ptr,
"should have public visibility. At least one of them is hidden. %s"
", %s.\n", static_type->name(), dst_type->name());
// Redo the search comparing type_info's using strcmp
info = {
dst_type,
static_ptr,
static_type,
src2dst_offset,
0, 0, 0, 0, 0, 0, 0, 0, 0, false, false, false
};
info = {dst_type, static_ptr, static_type, src2dst_offset, 0, 0, 0, 0, 0, 0,
0, 0, 0, false, false, false, true, nullptr};
info.number_of_dst_type = 1;
dst_type->search_above_dst(&info, dynamic_ptr, dynamic_ptr, public_path, true);
}
Expand Down Expand Up @@ -232,15 +222,24 @@ const void* dyn_cast_try_downcast(const void* static_ptr,
}

// Try to search a path from dynamic_type to dst_type.
__dynamic_cast_info dynamic_to_dst_info = {
dynamic_type,
dst_ptr_to_static,
dst_type,
src2dst_offset,
0, 0, 0, 0, 0, 0, 0, 0,
1, // number_of_dst_type
false, false, false
};
__dynamic_cast_info dynamic_to_dst_info = {dynamic_type,
dst_ptr_to_static,
dst_type,
src2dst_offset,
0,
0,
0,
0,
0,
0,
0,
0,
1, // number_of_dst_type
false,
false,
false,
true,
nullptr};
dynamic_type->search_above_dst(&dynamic_to_dst_info, dynamic_ptr, dynamic_ptr, public_path, false);
if (dynamic_to_dst_info.path_dst_ptr_to_static_ptr != unknown) {
// We have found at least one path from dynamic_ptr to dst_ptr. The
Expand All @@ -261,13 +260,8 @@ const void* dyn_cast_slow(const void* static_ptr,
// Not using giant short cut. Do the search

// Initialize info struct for this search.
__dynamic_cast_info info = {
dst_type,
static_ptr,
static_type,
src2dst_offset,
0, 0, 0, 0, 0, 0, 0, 0, 0, false, false, false
};
__dynamic_cast_info info = {dst_type, static_ptr, static_type, src2dst_offset, 0, 0, 0, 0, 0, 0,
0, 0, 0, false, false, false, true, nullptr};

dynamic_type->search_below_dst(&info, dynamic_ptr, public_path, false);
#ifdef _LIBCXXABI_FORGIVING_DYNAMIC_CAST
Expand All @@ -287,13 +281,8 @@ const void* dyn_cast_slow(const void* static_ptr,
"%s, %s, %s.\n", static_type->name(), dynamic_type->name(),
dst_type->name());
// Redo the search comparing type_info's using strcmp
info = {
dst_type,
static_ptr,
static_type,
src2dst_offset,
0, 0, 0, 0, 0, 0, 0, 0, 0, false, false, false
};
info = {dst_type, static_ptr, static_type, src2dst_offset, 0, 0, 0, 0, 0, 0,
0, 0, 0, false, false, false, true, nullptr};
dynamic_type->search_below_dst(&info, dynamic_ptr, public_path, true);
}
#endif // _LIBCXXABI_FORGIVING_DYNAMIC_CAST
Expand Down Expand Up @@ -481,7 +470,8 @@ __class_type_info::can_catch(const __shim_type_info* thrown_type,
if (thrown_class_type == 0)
return false;
// bullet 2
__dynamic_cast_info info = {thrown_class_type, 0, this, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,};
assert(adjustedPtr && "catching a class without an object?");
__dynamic_cast_info info = {thrown_class_type, 0, this, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, true, nullptr};
info.number_of_dst_type = 1;
thrown_class_type->has_unambiguous_public_base(&info, adjustedPtr, public_path);
if (info.path_dst_ptr_to_static_ptr == public_path)
Expand All @@ -496,32 +486,46 @@ __class_type_info::can_catch(const __shim_type_info* thrown_type,
#pragma clang diagnostic pop
#endif

// When we have an object to inspect - we just pass the pointer to the sub-
// object that matched the static_type we just checked. If that is different
// from any previously recorded pointer to that object type, then we have
// an ambiguous case.

// When we have no object to inspect, we need to account for virtual bases
// explicitly.
// info->vbase_cookie is a pointer to the name of the innermost virtual base
// type, or nullptr if there is no virtual base on the path so far.
// adjustedPtr points to the subobject we just found.
// If vbase_cookie != any previously recorded (including the case of nullptr
// representing an already-found static sub-object) then we have an ambiguous
// case. Assuming that the vbase_cookie values agree; if then we have a
// different offset (adjustedPtr) from any previously recorded, this indicates
// an ambiguous case within the virtual base.

void
__class_type_info::process_found_base_class(__dynamic_cast_info* info,
void* adjustedPtr,
int path_below) const
{
if (info->dst_ptr_leading_to_static_ptr == 0)
{
// First time here
info->dst_ptr_leading_to_static_ptr = adjustedPtr;
info->path_dst_ptr_to_static_ptr = path_below;
info->number_to_static_ptr = 1;
}
else if (info->dst_ptr_leading_to_static_ptr == adjustedPtr)
{
// We've been here before. Update path to "most public"
if (info->path_dst_ptr_to_static_ptr == not_public_path)
info->path_dst_ptr_to_static_ptr = path_below;
}
else
{
// We've detected an ambiguous cast from (thrown_class_type, adjustedPtr)
// to a static_type
info->number_to_static_ptr += 1;
info->path_dst_ptr_to_static_ptr = not_public_path;
info->search_done = true;
}
if (info->number_to_static_ptr == 0) {
// First time we found this base
info->dst_ptr_leading_to_static_ptr = adjustedPtr;
info->path_dst_ptr_to_static_ptr = path_below;
// stash the virtual base cookie.
info->dst_ptr_not_leading_to_static_ptr = info->vbase_cookie;
info->number_to_static_ptr = 1;
} else if (info->dst_ptr_not_leading_to_static_ptr == info->vbase_cookie &&
info->dst_ptr_leading_to_static_ptr == adjustedPtr) {
// We've been here before. Update path to "most public"
if (info->path_dst_ptr_to_static_ptr == not_public_path)
info->path_dst_ptr_to_static_ptr = path_below;
} else {
// We've detected an ambiguous cast from (thrown_class_type, adjustedPtr)
// to a static_type.
info->number_to_static_ptr += 1;
info->path_dst_ptr_to_static_ptr = not_public_path;
info->search_done = true;
}
}

void
Expand Down Expand Up @@ -549,16 +553,30 @@ __base_class_type_info::has_unambiguous_public_base(__dynamic_cast_info* info,
void* adjustedPtr,
int path_below) const
{
ptrdiff_t offset_to_base = 0;
if (adjustedPtr != nullptr)
{
offset_to_base = __offset_flags >> __offset_shift;
if (__offset_flags & __virtual_mask)
{
const char* vtable = *static_cast<const char*const*>(adjustedPtr);
offset_to_base = update_offset_to_base(vtable, offset_to_base);
}
bool is_virtual = __offset_flags & __virtual_mask;
ptrdiff_t offset_to_base = 0;
if (info->have_object) {
/* We have an object to inspect, we can look through its vtables to
find the layout. */
offset_to_base = __offset_flags >> __offset_shift;
if (is_virtual) {
const char* vtable = *static_cast<const char* const*>(adjustedPtr);
offset_to_base = update_offset_to_base(vtable, offset_to_base);
}
} else if (!is_virtual) {
/* We have no object; however, for non-virtual bases, (since we do not
need to inspect any content) we can pretend to have an object based
at '0'. */
offset_to_base = __offset_flags >> __offset_shift;
} else {
/* No object to inspect, and the next base is virtual.
We cannot indirect through the vtable to find the actual object offset.
So, update vbase_cookie to the new innermost virtual base using the
pointer to the typeinfo name as a key. */
info->vbase_cookie = static_cast<const void*>(__base_type->name());
// .. and reset the pointer.
adjustedPtr = nullptr;
}
__base_type->has_unambiguous_public_base(
info,
static_cast<char*>(adjustedPtr) + offset_to_base,
Expand Down Expand Up @@ -679,14 +697,22 @@ __pointer_type_info::can_catch(const __shim_type_info* thrown_type,
dynamic_cast<const __class_type_info*>(thrown_pointer_type->__pointee);
if (thrown_class_type == 0)
return false;
__dynamic_cast_info info = {thrown_class_type, 0, catch_class_type, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,};
bool have_object = adjustedPtr != nullptr;
__dynamic_cast_info info = {thrown_class_type, 0, catch_class_type, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
have_object, nullptr};
info.number_of_dst_type = 1;
thrown_class_type->has_unambiguous_public_base(&info, adjustedPtr, public_path);
if (info.path_dst_ptr_to_static_ptr == public_path)
{
if (adjustedPtr != NULL)
adjustedPtr = const_cast<void*>(info.dst_ptr_leading_to_static_ptr);
return true;
// In the case of a thrown null pointer, we have no object but we might
// well have computed the offset to where a public sub-object would be.
// However, we do not want to return that offset to the user; we still
// want them to catch a null ptr.
if (have_object)
adjustedPtr = const_cast<void*>(info.dst_ptr_leading_to_static_ptr);
else
adjustedPtr = nullptr;
return true;
}
return false;
}
Expand Down
7 changes: 7 additions & 0 deletions libcxxabi/src/private_typeinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,13 @@ struct _LIBCXXABI_HIDDEN __dynamic_cast_info
bool found_any_static_type;
// Set whenever a search can be stopped
bool search_done;

// Data that modifies the search mechanism.

// There is no object (seen when we throw a null pointer to object).
bool have_object;
// Virtual base
const void* vbase_cookie;
};

// Has no base class
Expand Down
194 changes: 194 additions & 0 deletions libcxxabi/test/catch_null_pointer_to_object_pr64953.pass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This test case checks specifically the cases under bullet 3.3:
//
// C++ ABI 15.3:
// A handler is a match for an exception object of type E if
// * The handler is of type cv T or cv T& and E and T are the same type
// (ignoring the top-level cv-qualifiers), or
// * the handler is of type cv T or cv T& and T is an unambiguous base
// class of E, or
// > * the handler is of type cv1 T* cv2 and E is a pointer type that can <
// > be converted to the type of the handler by either or both of <
// > o a standard pointer conversion (4.10 [conv.ptr]) not involving <
// > conversions to private or protected or ambiguous classes <
// > o a qualification conversion <
// * the handler is a pointer or pointer to member type and E is
// std::nullptr_t
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: no-exceptions
// This test requires the fix to
// https://github.com/llvm/llvm-project/issues/64953, which is in libc++abi.dylib.
// The fix is not contained in older macOS system dylibs, so the test will fail
// there.
// FIXME: In the case that we are testing `natively` with the CI scripts we
// currently pass the newly-built libraries to the execution, this leads to an
// XPASS here so that we have to make these UNSUPPORTED for now (they should be
// XFAILs when tested against current [macOS14] and previous installed libc++abi
// as described above).
// UNSUPPORTED: stdlib=apple-libc++ && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}}{{.*}}
// UNSUPPORTED: stdlib=apple-libc++ && target={{.+}}-apple-macosx{{11|12|13|14}}{{.*}}

#include <exception>
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>

struct Base {
int b;
};
struct Base2 {
int b;
};
struct Derived1 : Base {
int b;
};
struct Derived2 : Base {
int b;
};
struct Derived3 : Base2 {
int b;
};
struct Private : private Base {
int b;
};
struct Protected : protected Base {
int b;
};
struct Virtual1 : virtual Base {
int b;
};
struct Virtual2 : virtual Base {
int b;
};

struct Ambiguous1 : Derived1, Derived2 {
int b;
};
struct Ambiguous2 : Derived1, Private {
int b;
};
struct Ambiguous3 : Derived1, Protected {
int b;
};

struct NoPublic1 : Private, Base2 {
int b;
};
struct NoPublic2 : Protected, Base2 {
int b;
};

struct Catchable1 : Derived3, Derived1 {
int b;
};
struct Catchable2 : Virtual1, Virtual2 {
int b;
};
struct Catchable3 : virtual Base, Virtual2 {
int b;
};

// Check that, when we have a null pointer-to-object that we catch a nullptr.
template <typename T // Handler type
,
typename E // Thrown exception type
>
void assert_catches() {
try {
throw static_cast<E>(0);
printf("%s\n", __PRETTY_FUNCTION__);
assert(false && "Statements after throw must be unreachable");
} catch (T t) {
assert(t == nullptr);
return;
} catch (...) {
printf("%s\n", __PRETTY_FUNCTION__);
assert(false && "Should not have entered catch-all");
}

printf("%s\n", __PRETTY_FUNCTION__);
assert(false && "The catch should have returned");
}

template <typename T // Handler type
,
typename E // Thrown exception type
>
void assert_cannot_catch() {
try {
throw static_cast<E>(0);
printf("%s\n", __PRETTY_FUNCTION__);
assert(false && "Statements after throw must be unreachable");
} catch (T t) {
printf("%s\n", __PRETTY_FUNCTION__);
assert(false && "Should not have entered the catch");
} catch (...) {
assert(true);
return;
}

printf("%s\n", __PRETTY_FUNCTION__);
assert(false && "The catch-all should have returned");
}

// Check that when we have a pointer-to-actual-object we, in fact, get the
// adjusted pointer to the base class.
template <typename T // Handler type
,
typename O // Object type
>
void assert_catches_bp() {
O* o = new (O);
try {
throw o;
printf("%s\n", __PRETTY_FUNCTION__);
assert(false && "Statements after throw must be unreachable");
} catch (T t) {
assert(t == static_cast<T>(o));
//__builtin_printf("o = %p t = %p\n", o, t);
delete o;
return;
} catch (...) {
printf("%s\n", __PRETTY_FUNCTION__);
assert(false && "Should not have entered catch-all");
}

printf("%s\n", __PRETTY_FUNCTION__);
assert(false && "The catch should have returned");
}

void f1() {
assert_catches<Base*, Catchable1*>();
assert_catches<Base*, Catchable2*>();
assert_catches<Base*, Catchable3*>();
}

void f2() {
assert_cannot_catch<Base*, Ambiguous1*>();
assert_cannot_catch<Base*, Ambiguous2*>();
assert_cannot_catch<Base*, Ambiguous3*>();
assert_cannot_catch<Base*, NoPublic1*>();
assert_cannot_catch<Base*, NoPublic2*>();
}

void f3() {
assert_catches_bp<Base*, Catchable1>();
assert_catches_bp<Base*, Catchable2>();
assert_catches_bp<Base*, Catchable3>();
}

int main(int, char**) {
f1();
f2();
f3();
return 0;
}
27 changes: 18 additions & 9 deletions lld/ELF/ScriptParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,13 +531,17 @@ void ScriptParser::readSearchDir() {
// linker's sections sanity check failures.
// https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description
SmallVector<SectionCommand *, 0> ScriptParser::readOverlay() {
// VA and LMA expressions are optional, though for simplicity of
// implementation we assume they are not. That is what OVERLAY was designed
// for first of all: to allow sections with overlapping VAs at different LMAs.
Expr addrExpr = readExpr();
expect(":");
expect("AT");
Expr lmaExpr = readParenExpr();
Expr addrExpr;
if (consume(":")) {
addrExpr = [] { return script->getDot(); };
} else {
addrExpr = readExpr();
expect(":");
}
// When AT is omitted, LMA should equal VMA. script->getDot() when evaluating
// lmaExpr will ensure this, even if the start address is specified.
Expr lmaExpr =
consume("AT") ? readParenExpr() : [] { return script->getDot(); };
expect("{");

SmallVector<SectionCommand *, 0> v;
Expand All @@ -547,10 +551,15 @@ SmallVector<SectionCommand *, 0> ScriptParser::readOverlay() {
// starting from the base load address specified.
OutputDesc *osd = readOverlaySectionDescription();
osd->osec.addrExpr = addrExpr;
if (prev)
if (prev) {
osd->osec.lmaExpr = [=] { return prev->getLMA() + prev->size; };
else
} else {
osd->osec.lmaExpr = lmaExpr;
// Use first section address for subsequent sections as initial addrExpr
// can be DOT. Ensure the first section, even if empty, is not discarded.
osd->osec.usedInExpression = true;
addrExpr = [=]() -> ExprValue { return {&osd->osec, false, 0, ""}; };
}
v.push_back(osd);
prev = &osd->osec;
}
Expand Down
4 changes: 2 additions & 2 deletions lld/MinGW/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,8 @@ def appcontainer: F<"appcontainer">, HelpText<"Set the appcontainer flag in the
defm delayload: Eq<"delayload", "DLL to load only on demand">;
defm mllvm: EqNoHelp<"mllvm">;
defm pdb: Eq<"pdb", "Output PDB debug info file, chosen implicitly if the argument is empty">;
defm thinlto_cache_dir: EqLong<"thinlto-cache-dir",
"Path to ThinLTO cached object file directory">;
def thinlto_cache_dir: JJ<"thinlto-cache-dir=">,
HelpText<"Path to ThinLTO cached object file directory">;
defm Xlink : Eq<"Xlink", "Pass <arg> to the COFF linker">, MetaVarName<"<arg>">;
defm guard_cf : B<"guard-cf", "Enable Control Flow Guard" ,
"Do not enable Control Flow Guard (default)">;
Expand Down
19 changes: 11 additions & 8 deletions lld/test/ELF/linkerscript/overlay.test
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,20 @@
# CHECK-NEXT: .small1 PROGBITS 0000000000001000 002000 000004
# CHECK-NEXT: .small2 PROGBITS 0000000000001008 002008 000004
# CHECK-NEXT: .big2 PROGBITS 0000000000001008 003008 000008
# CHECK-NEXT: .empty3 PROGBITS 0000000000001010 003010 000000
# CHECK-NEXT: .small3 PROGBITS 0000000000001010 003010 000004
# CHECK-NEXT: .big3 PROGBITS 0000000000001014 003014 000008
# CHECK-NEXT: .text PROGBITS 0000000000001024 003024 000001
# CHECK-NEXT: .big3 PROGBITS 0000000000001010 004010 000008
# CHECK-NEXT: .text PROGBITS 0000000000001018 004018 000001

# CHECK: Program Headers:
# CHECK: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
# CHECK-NEXT: LOAD 0x001000 0x0000000000001000 0x0000000000001000 0x000008 0x000008 R 0x1000
# CHECK-NEXT: LOAD 0x002000 0x0000000000001000 0x0000000000001008 0x000004 0x000004 R 0x1000
# CHECK-NEXT: LOAD 0x002008 0x0000000000001008 0x0000000000002008 0x000004 0x000004 R 0x1000
# CHECK-NEXT: LOAD 0x003008 0x0000000000001008 0x000000000000200c 0x000008 0x000008 R 0x1000
## FIXME Fix p_paddr when the first section in an overlay is empty and discarded.
# CHECK-NEXT: LOAD 0x003010 0x0000000000001010 0x0000000000000000 0x000004 0x000004 R 0x1000
# CHECK-NEXT: LOAD 0x003014 0x0000000000001014 0x0000000000000004 0x000008 0x000008 R 0x1000
# CHECK-NEXT: LOAD 0x003024 0x0000000000001024 0x0000000000000014 0x000001 0x000001 R E 0x1000
# CHECK-NEXT: LOAD 0x003010 0x0000000000001010 0x0000000000002014 0x000004 0x000004 R 0x1000
# CHECK-NEXT: LOAD 0x004010 0x0000000000001010 0x0000000000002018 0x000008 0x000008 R 0x1000
# CHECK-NEXT: LOAD 0x004018 0x0000000000001018 0x0000000000002020 0x000001 0x000001 R E 0x1000

# RUN: not ld.lld a.o -T err1.t 2>&1 | FileCheck %s --check-prefix=ERR1 --match-full-lines --strict-whitespace
# ERR1:{{.*}}error: err1.t:3: { expected, but got 0x3000
Expand Down Expand Up @@ -57,14 +57,17 @@ _start:

#--- a.t
SECTIONS {
OVERLAY 0x1000 : AT( 0x1000 ) {
## LMA defaults to VMA
OVERLAY 0x1000 : {
.big1 { *(.big1) }
.small1 { *(.small1) }
}
OVERLAY 0x1008 : AT (0x2008) {
## .big2 starts at ADDR(.small2)
OVERLAY : AT (0x2008) {
.small2 { *(.small2) }
.big2 { *(.big2) }
}
## .empty3 is not discarded. .small3 and .big3 share its address.
OVERLAY . : AT (0x2014) {
.empty3 { *(.empty3) }
.small3 { *(.small3) }
Expand Down
2 changes: 2 additions & 0 deletions lldb/include/lldb/API/SBBreakpoint.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ class LLDB_API SBBreakpoint {

SBError SetScriptCallbackBody(const char *script_body_text);

LLDB_DEPRECATED_FIXME("Doesn't provide error handling",
"AddNameWithErrorHandling")
bool AddName(const char *new_name);

SBError AddNameWithErrorHandling(const char *new_name);
Expand Down
28 changes: 7 additions & 21 deletions lldb/include/lldb/Utility/StructuredData.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,31 +221,17 @@ class StructuredData {
}

template <class IntType>
bool GetItemAtIndexAsInteger(size_t idx, IntType &result) const {
ObjectSP value_sp = GetItemAtIndex(idx);
if (value_sp.get()) {
std::optional<IntType> GetItemAtIndexAsInteger(size_t idx) const {
if (auto item_sp = GetItemAtIndex(idx)) {
if constexpr (std::numeric_limits<IntType>::is_signed) {
if (auto signed_value = value_sp->GetAsSignedInteger()) {
result = static_cast<IntType>(signed_value->GetValue());
return true;
}
if (auto *signed_value = item_sp->GetAsSignedInteger())
return static_cast<IntType>(signed_value->GetValue());
} else {
if (auto unsigned_value = value_sp->GetAsUnsignedInteger()) {
result = static_cast<IntType>(unsigned_value->GetValue());
return true;
}
if (auto *unsigned_value = item_sp->GetAsUnsignedInteger())
return static_cast<IntType>(unsigned_value->GetValue());
}
}
return false;
}

template <class IntType>
bool GetItemAtIndexAsInteger(size_t idx, IntType &result,
IntType default_val) const {
bool success = GetItemAtIndexAsInteger(idx, result);
if (!success)
result = default_val;
return success;
return {};
}

std::optional<llvm::StringRef> GetItemAtIndexAsString(size_t idx) const {
Expand Down
8 changes: 4 additions & 4 deletions lldb/source/Breakpoint/BreakpointResolverName.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,14 +161,14 @@ BreakpointResolverSP BreakpointResolverName::CreateFromStructuredData(
error.SetErrorString("BRN::CFSD: name entry is not a string.");
return nullptr;
}
std::underlying_type<FunctionNameType>::type fnt;
success = names_mask_array->GetItemAtIndexAsInteger(i, fnt);
if (!success) {
auto maybe_fnt = names_mask_array->GetItemAtIndexAsInteger<
std::underlying_type<FunctionNameType>::type>(i);
if (!maybe_fnt) {
error.SetErrorString("BRN::CFSD: name mask entry is not an integer.");
return nullptr;
}
names.push_back(std::string(*maybe_name));
name_masks.push_back(static_cast<FunctionNameType>(fnt));
name_masks.push_back(static_cast<FunctionNameType>(*maybe_fnt));
}

std::shared_ptr<BreakpointResolverName> resolver_sp =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -592,9 +592,10 @@ addr_t InstrumentationRuntimeTSan::GetFirstNonInternalFramePc(
if (skip_one_frame && i == 0)
continue;

addr_t addr;
if (!trace_array->GetItemAtIndexAsInteger(i, addr))
auto maybe_addr = trace_array->GetItemAtIndexAsInteger<addr_t>(i);
if (!maybe_addr)
continue;
addr_t addr = *maybe_addr;

lldb_private::Address so_addr;
if (!process_sp->GetTarget().GetSectionLoadList().ResolveLoadAddress(
Expand Down
15 changes: 7 additions & 8 deletions lldb/source/Target/DynamicRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -349,10 +349,8 @@ DynamicRegisterInfo::SetRegisterInfo(const StructuredData::Dictionary &dict,
const size_t num_regs = invalidate_reg_list->GetSize();
if (num_regs > 0) {
for (uint32_t idx = 0; idx < num_regs; ++idx) {
uint64_t invalidate_reg_num;
std::optional<llvm::StringRef> maybe_invalidate_reg_name =
invalidate_reg_list->GetItemAtIndexAsString(idx);
if (maybe_invalidate_reg_name) {
if (auto maybe_invalidate_reg_name =
invalidate_reg_list->GetItemAtIndexAsString(idx)) {
const RegisterInfo *invalidate_reg_info =
GetRegisterInfo(*maybe_invalidate_reg_name);
if (invalidate_reg_info) {
Expand All @@ -365,10 +363,11 @@ DynamicRegisterInfo::SetRegisterInfo(const StructuredData::Dictionary &dict,
"\"%s\" while parsing register \"%s\"\n",
maybe_invalidate_reg_name->str().c_str(), reg_info.name);
}
} else if (invalidate_reg_list->GetItemAtIndexAsInteger(
idx, invalidate_reg_num)) {
if (invalidate_reg_num != UINT64_MAX)
m_invalidate_regs_map[i].push_back(invalidate_reg_num);
} else if (auto maybe_invalidate_reg_num =
invalidate_reg_list->GetItemAtIndexAsInteger<uint64_t>(
idx)) {
if (*maybe_invalidate_reg_num != UINT64_MAX)
m_invalidate_regs_map[i].push_back(*maybe_invalidate_reg_num);
else
printf("error: 'invalidate-regs' list value wasn't a valid "
"integer\n");
Expand Down
3 changes: 1 addition & 2 deletions llvm/docs/CommandLine.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1521,8 +1521,7 @@ passed to the constructor as ``const char*``.
Note that declaring an option category and associating it with an option before
parsing options (e.g. statically) will change the output of ``-help`` from
uncategorized to categorized. If an option category is declared but not
associated with an option then it will be hidden from the output of ``-help``
but will be shown in the output of ``-help-hidden``.
associated with an option then it will be hidden from the output of ``-help``.

.. _different parser:
.. _discussed previously:
Expand Down
15 changes: 11 additions & 4 deletions llvm/include/llvm/CodeGen/AccelTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,15 @@ class AccelTableBase {
std::vector<AccelTableData *> Values;
MCSymbol *Sym;

/// Get all AccelTableData cast as a `T`.
template <typename T = AccelTableData *> auto getValues() const {
static_assert(std::is_pointer<T>());
static_assert(
std::is_base_of<AccelTableData, std::remove_pointer_t<T>>());
return map_range(
Values, [](AccelTableData *Data) { return static_cast<T>(Data); });
}

#ifndef NDEBUG
void print(raw_ostream &OS) const;
void dump() const { print(dbgs()); }
Expand Down Expand Up @@ -319,8 +328,7 @@ class DWARF5AccelTable : public AccelTable<DWARF5AccelTableData> {
/// Needs to be called after DIE offsets are computed.
void convertDieToOffset() {
for (auto &Entry : Entries) {
for (AccelTableData *Value : Entry.second.Values) {
DWARF5AccelTableData *Data = static_cast<DWARF5AccelTableData *>(Value);
for (auto *Data : Entry.second.getValues<DWARF5AccelTableData *>()) {
// For TU we normalize as each Unit is emitted.
// So when this is invoked after CU construction we will be in mixed
// state.
Expand All @@ -332,8 +340,7 @@ class DWARF5AccelTable : public AccelTable<DWARF5AccelTableData> {

void addTypeEntries(DWARF5AccelTable &Table) {
for (auto &Entry : Table.getEntries()) {
for (AccelTableData *Value : Entry.second.Values) {
DWARF5AccelTableData *Data = static_cast<DWARF5AccelTableData *>(Value);
for (auto *Data : Entry.second.getValues<DWARF5AccelTableData *>()) {
addName(Entry.second.Name, Data->getDieOffset(), Data->getDieTag(),
Data->getUnitID(), true);
}
Expand Down
11 changes: 5 additions & 6 deletions llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,8 @@ void AppleAccelTableWriter::emitData() const {
Asm->emitDwarfStringOffset(Hash->Name);
Asm->OutStreamer->AddComment("Num DIEs");
Asm->emitInt32(Hash->Values.size());
for (const auto *V : Hash->Values)
static_cast<const AppleAccelTableData *>(V)->emit(Asm);
for (const auto *V : Hash->getValues<const AppleAccelTableData *>())
V->emit(Asm);
PrevHash = Hash->HashValue;
}
// Emit the final end marker for the bucket.
Expand Down Expand Up @@ -415,11 +415,10 @@ static uint32_t constructAbbreviationTag(
void Dwarf5AccelTableWriter::populateAbbrevsMap() {
for (auto &Bucket : Contents.getBuckets()) {
for (auto *Hash : Bucket) {
for (auto *Value : Hash->Values) {
for (auto *Value : Hash->getValues<DWARF5AccelTableData *>()) {
std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
getIndexForEntry(*static_cast<const DWARF5AccelTableData *>(Value));
unsigned Tag =
static_cast<const DWARF5AccelTableData *>(Value)->getDieTag();
getIndexForEntry(*Value);
unsigned Tag = Value->getDieTag();
uint32_t AbbrvTag = constructAbbreviationTag(Tag, EntryRet);
if (Abbreviations.count(AbbrvTag) == 0) {
SmallVector<DWARF5AccelTableData::AttributeEncoding, 2> UA;
Expand Down
9 changes: 1 addition & 8 deletions llvm/lib/Support/CommandLine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2474,8 +2474,7 @@ class CategorizedHelpPrinter : public HelpPrinter {
for (OptionCategory *Category : SortedCategories) {
// Hide empty categories for --help, but show for --help-hidden.
const auto &CategoryOptions = CategorizedOptions[Category];
bool IsEmptyCategory = CategoryOptions.empty();
if (!ShowHidden && IsEmptyCategory)
if (CategoryOptions.empty())
continue;

// Print category information.
Expand All @@ -2488,12 +2487,6 @@ class CategorizedHelpPrinter : public HelpPrinter {
else
outs() << "\n";

// When using --help-hidden explicitly state if the category has no
// options associated with it.
if (IsEmptyCategory) {
outs() << " This option category has no options.\n";
continue;
}
// Loop over the options in the category and print.
for (const Option *Opt : CategoryOptions)
Opt->printOptionInfo(MaxArgLen);
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,9 @@ def AArch64msb_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
def AArch64eor3 : PatFrags<(ops node:$op1, node:$op2, node:$op3),
[(int_aarch64_sve_eor3 node:$op1, node:$op2, node:$op3),
(xor node:$op1, (xor node:$op2, node:$op3))]>;
def AArch64bcax : PatFrags<(ops node:$op1, node:$op2, node:$op3),
[(int_aarch64_sve_bcax node:$op1, node:$op2, node:$op3),
(xor node:$op1, (and node:$op2, (vnot node:$op3)))]>;

def AArch64fmla_m1 : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
[(int_aarch64_sve_fmla node:$pg, node:$za, node:$zn, node:$zm),
Expand Down Expand Up @@ -3714,7 +3717,7 @@ let Predicates = [HasSVE2orSME] in {

// SVE2 bitwise ternary operations
defm EOR3_ZZZZ : sve2_int_bitwise_ternary_op<0b000, "eor3", AArch64eor3>;
defm BCAX_ZZZZ : sve2_int_bitwise_ternary_op<0b010, "bcax", int_aarch64_sve_bcax>;
defm BCAX_ZZZZ : sve2_int_bitwise_ternary_op<0b010, "bcax", AArch64bcax>;
defm BSL_ZZZZ : sve2_int_bitwise_ternary_op<0b001, "bsl", int_aarch64_sve_bsl, AArch64bsp>;
defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", int_aarch64_sve_bsl1n>;
defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", int_aarch64_sve_bsl2n>;
Expand Down
34 changes: 0 additions & 34 deletions llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,6 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
return {Intrinsic::ceil, FTZ_MustBeOn};
case Intrinsic::nvvm_fabs_d:
return {Intrinsic::fabs, FTZ_Any};
case Intrinsic::nvvm_fabs_f:
return {Intrinsic::fabs, FTZ_MustBeOff};
case Intrinsic::nvvm_fabs_ftz_f:
return {Intrinsic::fabs, FTZ_MustBeOn};
case Intrinsic::nvvm_floor_d:
return {Intrinsic::floor, FTZ_Any};
case Intrinsic::nvvm_floor_f:
Expand Down Expand Up @@ -264,12 +260,6 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
return {Intrinsic::minimum, FTZ_MustBeOff, true};
case Intrinsic::nvvm_fmin_ftz_nan_f16x2:
return {Intrinsic::minimum, FTZ_MustBeOn, true};
case Intrinsic::nvvm_round_d:
return {Intrinsic::round, FTZ_Any};
case Intrinsic::nvvm_round_f:
return {Intrinsic::round, FTZ_MustBeOff};
case Intrinsic::nvvm_round_ftz_f:
return {Intrinsic::round, FTZ_MustBeOn};
case Intrinsic::nvvm_sqrt_rn_d:
return {Intrinsic::sqrt, FTZ_Any};
case Intrinsic::nvvm_sqrt_f:
Expand All @@ -278,10 +268,6 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
// the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
// the versions with explicit ftz-ness.
return {Intrinsic::sqrt, FTZ_Any};
case Intrinsic::nvvm_sqrt_rn_f:
return {Intrinsic::sqrt, FTZ_MustBeOff};
case Intrinsic::nvvm_sqrt_rn_ftz_f:
return {Intrinsic::sqrt, FTZ_MustBeOn};
case Intrinsic::nvvm_trunc_d:
return {Intrinsic::trunc, FTZ_Any};
case Intrinsic::nvvm_trunc_f:
Expand Down Expand Up @@ -316,24 +302,8 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
return {Instruction::UIToFP};

// NVVM intrinsics that map to LLVM binary ops.
case Intrinsic::nvvm_add_rn_d:
return {Instruction::FAdd, FTZ_Any};
case Intrinsic::nvvm_add_rn_f:
return {Instruction::FAdd, FTZ_MustBeOff};
case Intrinsic::nvvm_add_rn_ftz_f:
return {Instruction::FAdd, FTZ_MustBeOn};
case Intrinsic::nvvm_mul_rn_d:
return {Instruction::FMul, FTZ_Any};
case Intrinsic::nvvm_mul_rn_f:
return {Instruction::FMul, FTZ_MustBeOff};
case Intrinsic::nvvm_mul_rn_ftz_f:
return {Instruction::FMul, FTZ_MustBeOn};
case Intrinsic::nvvm_div_rn_d:
return {Instruction::FDiv, FTZ_Any};
case Intrinsic::nvvm_div_rn_f:
return {Instruction::FDiv, FTZ_MustBeOff};
case Intrinsic::nvvm_div_rn_ftz_f:
return {Instruction::FDiv, FTZ_MustBeOn};

// The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
// need special handling.
Expand All @@ -342,10 +312,6 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
// as well.
case Intrinsic::nvvm_rcp_rn_d:
return {SPC_Reciprocal, FTZ_Any};
case Intrinsic::nvvm_rcp_rn_f:
return {SPC_Reciprocal, FTZ_MustBeOff};
case Intrinsic::nvvm_rcp_rn_ftz_f:
return {SPC_Reciprocal, FTZ_MustBeOn};

// We do not currently simplify intrinsics that give an approximate
// answer. These include:
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
return expandRV32ZdinxStore(MBB, MBBI);
case RISCV::PseudoRV32ZdinxLD:
return expandRV32ZdinxLoad(MBB, MBBI);
case RISCV::PseudoCCMOVGPRNoX0:
case RISCV::PseudoCCMOVGPR:
case RISCV::PseudoCCADD:
case RISCV::PseudoCCSUB:
Expand Down Expand Up @@ -191,7 +192,8 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
Register DestReg = MI.getOperand(0).getReg();
assert(MI.getOperand(4).getReg() == DestReg);

if (MI.getOpcode() == RISCV::PseudoCCMOVGPR) {
if (MI.getOpcode() == RISCV::PseudoCCMOVGPR ||
MI.getOpcode() == RISCV::PseudoCCMOVGPRNoX0) {
// Add MV.
BuildMI(TrueBB, DL, TII->get(RISCV::ADDI), DestReg)
.add(MI.getOperand(5))
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/RISCV/RISCVFeatures.td
Original file line number Diff line number Diff line change
Expand Up @@ -1021,6 +1021,12 @@ def TuneShortForwardBranchOpt
def HasShortForwardBranchOpt : Predicate<"Subtarget->hasShortForwardBranchOpt()">;
def NoShortForwardBranchOpt : Predicate<"!Subtarget->hasShortForwardBranchOpt()">;

def TuneConditionalCompressedMoveFusion
: SubtargetFeature<"conditional-cmv-fusion", "HasConditionalCompressedMoveFusion",
"true", "Enable branch+c.mv fusion">;
def HasConditionalMoveFusion : Predicate<"Subtarget->hasConditionalMoveFusion()">;
def NoConditionalMoveFusion : Predicate<"!Subtarget->hasConditionalMoveFusion()">;

def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
"SiFive 7-Series processors",
[TuneNoDefaultUnroll,
Expand Down
10 changes: 5 additions & 5 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6920,7 +6920,7 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
MVT VT = N->getSimpleValueType(0);
SDLoc DL(N);

if (!Subtarget.hasShortForwardBranchOpt()) {
if (!Subtarget.hasConditionalMoveFusion()) {
// (select c, -1, y) -> -c | y
if (isAllOnesConstant(TrueV)) {
SDValue Neg = DAG.getNegative(CondV, DL, VT);
Expand Down Expand Up @@ -7084,7 +7084,7 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {

// (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
// Unless we have the short forward branch optimization.
if (!Subtarget.hasShortForwardBranchOpt())
if (!Subtarget.hasConditionalMoveFusion())
return DAG.getNode(
ISD::OR, DL, VT,
DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
Expand Down Expand Up @@ -12209,7 +12209,7 @@ static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
if (VT.isVector())
return SDValue();

if (!Subtarget.hasShortForwardBranchOpt()) {
if (!Subtarget.hasConditionalMoveFusion()) {
// (select cond, x, (and x, c)) has custom lowering with Zicond.
if ((!Subtarget.hasStdExtZicond() &&
!Subtarget.hasVendorXVentanaCondOps()) ||
Expand Down Expand Up @@ -14440,7 +14440,7 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
return V;

if (Subtarget.hasShortForwardBranchOpt())
if (Subtarget.hasConditionalMoveFusion())
return SDValue();

SDValue TrueVal = N->getOperand(1);
Expand Down Expand Up @@ -15178,7 +15178,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
{LHS, RHS, CC, TrueV, FalseV});

if (!Subtarget.hasShortForwardBranchOpt()) {
if (!Subtarget.hasConditionalMoveFusion()) {
// (select c, -1, y) -> -c | y
if (isAllOnesConstant(TrueV)) {
SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2650,6 +2650,7 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case RISCV::TH_MULSH:
// Operands 2 and 3 are commutable.
return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
case RISCV::PseudoCCMOVGPRNoX0:
case RISCV::PseudoCCMOVGPR:
// Operands 4 and 5 are commutable.
return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
Expand Down Expand Up @@ -2806,6 +2807,7 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1,
OpIdx2);
}
case RISCV::PseudoCCMOVGPRNoX0:
case RISCV::PseudoCCMOVGPR: {
// CCMOV can be commuted by inverting the condition.
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
Expand Down
20 changes: 19 additions & 1 deletion llvm/lib/Target/RISCV/RISCVInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1371,6 +1371,24 @@ def PseudoCCMOVGPR : Pseudo<(outs GPR:$dst),
ReadSFBALU, ReadSFBALU]>;
}

// This should always expand to a branch+c.mv so the size is 6 or 4 if the
// branch is compressible.
let Predicates = [HasConditionalMoveFusion, NoShortForwardBranchOpt],
Constraints = "$dst = $falsev", isCommutable = 1, Size = 6 in {
// This instruction moves $truev to $dst when the condition is true. It will
// be expanded to control flow in RISCVExpandPseudoInsts.
// We use GPRNoX0 because c.mv cannot encode X0.
def PseudoCCMOVGPRNoX0 : Pseudo<(outs GPRNoX0:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPRNoX0:$falsev, GPRNoX0:$truev),
[(set GPRNoX0:$dst,
(riscv_selectcc_frag:$cc (XLenVT GPR:$lhs),
(XLenVT GPR:$rhs),
cond, (XLenVT GPRNoX0:$truev),
(XLenVT GPRNoX0:$falsev)))]>,
Sched<[]>;
}

// Conditional binops, that updates update $dst to (op rs1, rs2) when condition
// is true. Returns $falsev otherwise. Selected by optimizeSelect.
// TODO: Can we use DefaultOperands on the regular binop to accomplish this more
Expand Down Expand Up @@ -1519,7 +1537,7 @@ multiclass SelectCC_GPR_rrirr<DAGOperand valty, ValueType vt> {
(IntCCtoRISCVCC $cc), valty:$truev, valty:$falsev)>;
}

let Predicates = [NoShortForwardBranchOpt] in
let Predicates = [NoConditionalMoveFusion] in
defm Select_GPR : SelectCC_GPR_rrirr<GPR, XLenVT>;

class SelectCompressOpt<CondCode Cond>
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/RISCV/RISCVProcessors.td
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,8 @@ def SIFIVE_P450 : RISCVProcessorModel<"sifive-p450", NoSchedModel,
FeatureStdExtZba,
FeatureStdExtZbb,
FeatureStdExtZbs,
FeatureStdExtZfhmin]>;
FeatureStdExtZfhmin],
[TuneConditionalCompressedMoveFusion]>;

def SYNTACORE_SCR1_BASE : RISCVProcessorModel<"syntacore-scr1-base",
SyntacoreSCR1Model,
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ defvar VMaskVTs = [vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t,
defvar VM1VTs = [vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t,
vbfloat16m1_t, vfloat16m1_t, vfloat32m1_t,
vfloat64m1_t, vint8mf2_t, vint8mf4_t, vint8mf8_t,
vint16mf2_t, vint16mf4_t, vint32mf2_t,
vint16mf2_t, vint16mf4_t, vint32mf2_t,
vfloat16mf4_t, vfloat16mf2_t, vbfloat16mf4_t,
vbfloat16mf2_t, vfloat32mf2_t];

Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/RISCV/RISCVSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,13 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool hasHalfFPLoadStoreMove() const {
return HasStdExtZfhmin || HasStdExtZfbfmin;
}

bool hasConditionalMoveFusion() const {
// Do we support fusing a branch+mv or branch+c.mv as a conditional move.
return (hasConditionalCompressedMoveFusion() && hasStdExtCOrZca()) ||
hasShortForwardBranchOpt();
}

bool is64Bit() const { return IsRV64; }
MVT getXLenVT() const {
return is64Bit() ? MVT::i64 : MVT::i32;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2232,6 +2232,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
static const TypeConversionCostTblEntry AVX512FConversionTbl[] = {
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 },
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
{ ISD::FP_EXTEND, MVT::v16f64, MVT::v16f32, 4 }, // 2*vcvtps2pd+vextractf64x4
{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 },

{ ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, // sext+vpslld+vptestmd
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/CostModel/X86/cast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,7 @@ define void @fp_conv(<8 x float> %a, <16 x float>%b, <4 x float> %c) {
; AVX512-LABEL: 'fp_conv'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A1 = fpext <4 x float> %c to <4 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A2 = fpext <8 x float> %a to <8 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A3 = fpext <16 x float> %b to <16 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A3 = fpext <16 x float> %b to <16 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A4 = fptrunc <4 x double> undef to <4 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A5 = fptrunc <8 x double> undef to <8 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
Expand Down
143 changes: 143 additions & 0 deletions llvm/test/CodeGen/AArch64/sve2-bcax.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck --check-prefix=SVE %s
; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s -o - | FileCheck --check-prefix=SVE2 %s

define <vscale x 2 x i64> @bcax_nxv2i64_1(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
; SVE-LABEL: bcax_nxv2i64_1:
; SVE: // %bb.0:
; SVE-NEXT: bic z1.d, z2.d, z1.d
; SVE-NEXT: eor z0.d, z1.d, z0.d
; SVE-NEXT: ret
;
; SVE2-LABEL: bcax_nxv2i64_1:
; SVE2: // %bb.0:
; SVE2-NEXT: bcax z0.d, z0.d, z2.d, z1.d
; SVE2-NEXT: ret
%4 = xor <vscale x 2 x i64> %1, splat (i64 -1)
%5 = and <vscale x 2 x i64> %4, %2
%6 = xor <vscale x 2 x i64> %5, %0
ret <vscale x 2 x i64> %6
}

define <vscale x 2 x i64> @bcax_nxv2i64_2(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
; SVE-LABEL: bcax_nxv2i64_2:
; SVE: // %bb.0:
; SVE-NEXT: bic z0.d, z0.d, z1.d
; SVE-NEXT: eor z0.d, z0.d, z2.d
; SVE-NEXT: ret
;
; SVE2-LABEL: bcax_nxv2i64_2:
; SVE2: // %bb.0:
; SVE2-NEXT: bcax z2.d, z2.d, z0.d, z1.d
; SVE2-NEXT: mov z0.d, z2.d
; SVE2-NEXT: ret
%4 = xor <vscale x 2 x i64> %1, splat (i64 -1)
%5 = and <vscale x 2 x i64> %4, %0
%6 = xor <vscale x 2 x i64> %5, %2
ret <vscale x 2 x i64> %6
}

define <vscale x 4 x i32> @bcax_nxv4i32_1(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
; SVE-LABEL: bcax_nxv4i32_1:
; SVE: // %bb.0:
; SVE-NEXT: bic z1.d, z2.d, z1.d
; SVE-NEXT: eor z0.d, z1.d, z0.d
; SVE-NEXT: ret
;
; SVE2-LABEL: bcax_nxv4i32_1:
; SVE2: // %bb.0:
; SVE2-NEXT: bcax z0.d, z0.d, z2.d, z1.d
; SVE2-NEXT: ret
%4 = xor <vscale x 4 x i32> %1, splat (i32 -1)
%5 = and <vscale x 4 x i32> %4, %2
%6 = xor <vscale x 4 x i32> %5, %0
ret <vscale x 4 x i32> %6
}

define <vscale x 4 x i32> @bcax_nxv4i32_2(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
; SVE-LABEL: bcax_nxv4i32_2:
; SVE: // %bb.0:
; SVE-NEXT: bic z0.d, z0.d, z1.d
; SVE-NEXT: eor z0.d, z0.d, z2.d
; SVE-NEXT: ret
;
; SVE2-LABEL: bcax_nxv4i32_2:
; SVE2: // %bb.0:
; SVE2-NEXT: bcax z2.d, z2.d, z0.d, z1.d
; SVE2-NEXT: mov z0.d, z2.d
; SVE2-NEXT: ret
%4 = xor <vscale x 4 x i32> %1, splat (i32 -1)
%5 = and <vscale x 4 x i32> %4, %0
%6 = xor <vscale x 4 x i32> %5, %2
ret <vscale x 4 x i32> %6
}

define <vscale x 8 x i16> @bcax_nxv8i16_1(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
; SVE-LABEL: bcax_nxv8i16_1:
; SVE: // %bb.0:
; SVE-NEXT: bic z1.d, z2.d, z1.d
; SVE-NEXT: eor z0.d, z1.d, z0.d
; SVE-NEXT: ret
;
; SVE2-LABEL: bcax_nxv8i16_1:
; SVE2: // %bb.0:
; SVE2-NEXT: bcax z0.d, z0.d, z2.d, z1.d
; SVE2-NEXT: ret
%4 = xor <vscale x 8 x i16> %1, splat (i16 -1)
%5 = and <vscale x 8 x i16> %4, %2
%6 = xor <vscale x 8 x i16> %5, %0
ret <vscale x 8 x i16> %6
}

define <vscale x 8 x i16> @bcax_nxv8i16_2(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
; SVE-LABEL: bcax_nxv8i16_2:
; SVE: // %bb.0:
; SVE-NEXT: bic z0.d, z0.d, z1.d
; SVE-NEXT: eor z0.d, z0.d, z2.d
; SVE-NEXT: ret
;
; SVE2-LABEL: bcax_nxv8i16_2:
; SVE2: // %bb.0:
; SVE2-NEXT: bcax z2.d, z2.d, z0.d, z1.d
; SVE2-NEXT: mov z0.d, z2.d
; SVE2-NEXT: ret
%4 = xor <vscale x 8 x i16> %1, splat (i16 -1)
%5 = and <vscale x 8 x i16> %4, %0
%6 = xor <vscale x 8 x i16> %5, %2
ret <vscale x 8 x i16> %6
}

define <vscale x 16 x i8> @bcax_nxv16i8_1(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
; SVE-LABEL: bcax_nxv16i8_1:
; SVE: // %bb.0:
; SVE-NEXT: bic z1.d, z2.d, z1.d
; SVE-NEXT: eor z0.d, z1.d, z0.d
; SVE-NEXT: ret
;
; SVE2-LABEL: bcax_nxv16i8_1:
; SVE2: // %bb.0:
; SVE2-NEXT: bcax z0.d, z0.d, z2.d, z1.d
; SVE2-NEXT: ret
%4 = xor <vscale x 16 x i8> %1, splat (i8 -1)
%5 = and <vscale x 16 x i8> %4, %2
%6 = xor <vscale x 16 x i8> %5, %0
ret <vscale x 16 x i8> %6
}

define <vscale x 16 x i8> @bcax_nxv16i8_2(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
; SVE-LABEL: bcax_nxv16i8_2:
; SVE: // %bb.0:
; SVE-NEXT: bic z0.d, z0.d, z1.d
; SVE-NEXT: eor z0.d, z0.d, z2.d
; SVE-NEXT: ret
;
; SVE2-LABEL: bcax_nxv16i8_2:
; SVE2: // %bb.0:
; SVE2-NEXT: bcax z2.d, z2.d, z0.d, z1.d
; SVE2-NEXT: mov z0.d, z2.d
; SVE2-NEXT: ret
%4 = xor <vscale x 16 x i8> %1, splat (i8 -1)
%5 = and <vscale x 16 x i8> %4, %0
%6 = xor <vscale x 16 x i8> %5, %2
ret <vscale x 16 x i8> %6
}
461 changes: 461 additions & 0 deletions llvm/test/CodeGen/RISCV/cmov-branch-opt.ll

Large diffs are not rendered by default.

48 changes: 17 additions & 31 deletions llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,13 @@ define double @fabs_double(double %a) #0 {
}
; CHECK-LABEL: @fabs_float
define float @fabs_float(float %a) #0 {
; NOFTZ: call float @llvm.fabs.f32
; FTZ: call float @llvm.nvvm.fabs.f
; CHECK: call float @llvm.nvvm.fabs.f
%ret = call float @llvm.nvvm.fabs.f(float %a)
ret float %ret
}
; CHECK-LABEL: @fabs_float_ftz
define float @fabs_float_ftz(float %a) #0 {
; NOFTZ: call float @llvm.nvvm.fabs.ftz.f
; FTZ: call float @llvm.fabs.f32
; CHECK: call float @llvm.nvvm.fabs.ftz.f
%ret = call float @llvm.nvvm.fabs.ftz.f(float %a)
ret float %ret
}
Expand Down Expand Up @@ -148,21 +146,19 @@ define float @fmin_float_ftz(float %a, float %b) #0 {

; CHECK-LABEL: @round_double
define double @round_double(double %a) #0 {
; CHECK: call double @llvm.round.f64
; CHECK: call double @llvm.nvvm.round.d
%ret = call double @llvm.nvvm.round.d(double %a)
ret double %ret
}
; CHECK-LABEL: @round_float
define float @round_float(float %a) #0 {
; NOFTZ: call float @llvm.round.f32
; FTZ: call float @llvm.nvvm.round.f
; CHECK: call float @llvm.nvvm.round.f
%ret = call float @llvm.nvvm.round.f(float %a)
ret float %ret
}
; CHECK-LABEL: @round_float_ftz
define float @round_float_ftz(float %a) #0 {
; NOFTZ: call float @llvm.nvvm.round.ftz.f
; FTZ: call float @llvm.round.f32
; CHECK: call float @llvm.nvvm.round.ftz.f
%ret = call float @llvm.nvvm.round.ftz.f(float %a)
ret float %ret
}
Expand Down Expand Up @@ -292,42 +288,38 @@ define float @test_ull2f(i64 %a) #0 {

; CHECK-LABEL: @test_add_rn_d
define double @test_add_rn_d(double %a, double %b) #0 {
; CHECK: fadd
; CHECK: call double @llvm.nvvm.add.rn.d
%ret = call double @llvm.nvvm.add.rn.d(double %a, double %b)
ret double %ret
}
; CHECK-LABEL: @test_add_rn_f
define float @test_add_rn_f(float %a, float %b) #0 {
; NOFTZ: fadd
; FTZ: call float @llvm.nvvm.add.rn.f
; CHECK: call float @llvm.nvvm.add.rn.f
%ret = call float @llvm.nvvm.add.rn.f(float %a, float %b)
ret float %ret
}
; CHECK-LABEL: @test_add_rn_f_ftz
define float @test_add_rn_f_ftz(float %a, float %b) #0 {
; NOFTZ: call float @llvm.nvvm.add.rn.f
; FTZ: fadd
; CHECK: call float @llvm.nvvm.add.rn.ftz.f(float %a, float %b)
%ret = call float @llvm.nvvm.add.rn.ftz.f(float %a, float %b)
ret float %ret
}

; CHECK-LABEL: @test_mul_rn_d
define double @test_mul_rn_d(double %a, double %b) #0 {
; CHECK: fmul
; CHECK: call double @llvm.nvvm.mul.rn.d
%ret = call double @llvm.nvvm.mul.rn.d(double %a, double %b)
ret double %ret
}
; CHECK-LABEL: @test_mul_rn_f
define float @test_mul_rn_f(float %a, float %b) #0 {
; NOFTZ: fmul
; FTZ: call float @llvm.nvvm.mul.rn.f
; CHECK: call float @llvm.nvvm.mul.rn.f
%ret = call float @llvm.nvvm.mul.rn.f(float %a, float %b)
ret float %ret
}
; CHECK-LABEL: @test_mul_rn_f_ftz
define float @test_mul_rn_f_ftz(float %a, float %b) #0 {
; NOFTZ: call float @llvm.nvvm.mul.rn.f
; FTZ: fmul
; CHECK: call float @llvm.nvvm.mul.rn.ftz.f(float %a, float %b)
%ret = call float @llvm.nvvm.mul.rn.ftz.f(float %a, float %b)
ret float %ret
}
Expand All @@ -340,15 +332,13 @@ define double @test_div_rn_d(double %a, double %b) #0 {
}
; CHECK-LABEL: @test_div_rn_f
define float @test_div_rn_f(float %a, float %b) #0 {
; NOFTZ: fdiv
; FTZ: call float @llvm.nvvm.div.rn.f
; CHECK: call float @llvm.nvvm.div.rn.f
%ret = call float @llvm.nvvm.div.rn.f(float %a, float %b)
ret float %ret
}
; CHECK-LABEL: @test_div_rn_f_ftz
define float @test_div_rn_f_ftz(float %a, float %b) #0 {
; NOFTZ: call float @llvm.nvvm.div.rn.f
; FTZ: fdiv
; CHECK: call float @llvm.nvvm.div.rn.ftz.f(float %a, float %b)
%ret = call float @llvm.nvvm.div.rn.ftz.f(float %a, float %b)
ret float %ret
}
Expand All @@ -357,15 +347,13 @@ define float @test_div_rn_f_ftz(float %a, float %b) #0 {

; CHECK-LABEL: @test_rcp_rn_f
define float @test_rcp_rn_f(float %a) #0 {
; NOFTZ: fdiv float 1.0{{.*}} %a
; FTZ: call float @llvm.nvvm.rcp.rn.f
; CHECK: call float @llvm.nvvm.rcp.rn.f
%ret = call float @llvm.nvvm.rcp.rn.f(float %a)
ret float %ret
}
; CHECK-LABEL: @test_rcp_rn_f_ftz
define float @test_rcp_rn_f_ftz(float %a) #0 {
; NOFTZ: call float @llvm.nvvm.rcp.rn.f
; FTZ: fdiv float 1.0{{.*}} %a
; CHECK: call float @llvm.nvvm.rcp.rn.ftz.f(float %a)
%ret = call float @llvm.nvvm.rcp.rn.ftz.f(float %a)
ret float %ret
}
Expand All @@ -385,15 +373,13 @@ define float @test_sqrt_f(float %a) #0 {
}
; CHECK-LABEL: @test_sqrt_rn_f
define float @test_sqrt_rn_f(float %a) #0 {
; NOFTZ: call float @llvm.sqrt.f32(float %a)
; FTZ: call float @llvm.nvvm.sqrt.rn.f
; CHECK: call float @llvm.nvvm.sqrt.rn.f
%ret = call float @llvm.nvvm.sqrt.rn.f(float %a)
ret float %ret
}
; CHECK-LABEL: @test_sqrt_rn_f_ftz
define float @test_sqrt_rn_f_ftz(float %a) #0 {
; NOFTZ: call float @llvm.nvvm.sqrt.rn.f
; FTZ: call float @llvm.sqrt.f32(float %a)
; CHECK: call float @llvm.nvvm.sqrt.rn.ftz.f(float %a)
%ret = call float @llvm.nvvm.sqrt.rn.ftz.f(float %a)
ret float %ret
}
Expand Down
4 changes: 0 additions & 4 deletions llvm/test/tools/llvm-debuginfo-analyzer/cmdline.test
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,6 @@ HELP-ALL: =system - Display PDB's MS system elements.
HELP-ALL: =typename - Include Parameters in templates.
HELP-ALL: =underlying - Underlying type for type definitions.
HELP-ALL: =zero - Zero line numbers.
HELP-ALL: Color Options:
HELP-ALL: This option category has no options.
HELP-ALL: Compare Options:
HELP-ALL: These control the view comparison.
HELP-ALL: --compare=<value> - Elements to compare.
Expand All @@ -81,8 +79,6 @@ HELP-ALL: =scopes - Scopes.
HELP-ALL: =symbols - Symbols.
HELP-ALL: =types - Types.
HELP-ALL: --compare-context - Add the view as compare context.
HELP-ALL: General options:
HELP-ALL: This option category has no options.
HELP-ALL: Generic Options:
HELP-ALL: -h - Alias for --help
HELP-ALL: --help - Display available options (--help-hidden for more)
Expand Down
25 changes: 25 additions & 0 deletions llvm/unittests/Support/CommandLineTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2301,4 +2301,29 @@ TEST(CommandLineTest, SubCommandGroups) {
EXPECT_FALSE(SC3.OptionsMap.contains("opt12"));
}

TEST(CommandLineTest, HelpWithEmptyCategory) {
cl::ResetCommandLineParser();

cl::OptionCategory Category1("First Category");
cl::OptionCategory Category2("Second Category");
StackOption<int> Opt1("opt1", cl::cat(Category1));
StackOption<int> Opt2("opt2", cl::cat(Category2));
cl::HideUnrelatedOptions(Category2);

const char *args[] = {"prog"};
EXPECT_TRUE(cl::ParseCommandLineOptions(std::size(args), args, StringRef(),
&llvm::nulls()));
auto Output = interceptStdout(
[]() { cl::PrintHelpMessage(/*Hidden=*/false, /*Categorized=*/true); });
EXPECT_EQ(std::string::npos, Output.find("First Category"))
<< "An empty category should not be printed";

Output = interceptStdout(
[]() { cl::PrintHelpMessage(/*Hidden=*/true, /*Categorized=*/true); });
EXPECT_EQ(std::string::npos, Output.find("First Category"))
<< "An empty category should not be printed";

cl::ResetCommandLineParser();
}

} // anonymous namespace
12 changes: 3 additions & 9 deletions mlir/include/mlir/Conversion/GPUToSPIRV/GPUToSPIRV.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,10 @@ void populateGPUToSPIRVPatterns(SPIRVTypeConverter &typeConverter,
void populateGpuWMMAToSPIRVCoopMatrixKHRConversionPatterns(
SPIRVTypeConverter &typeConverter, RewritePatternSet &patterns);

/// Collect a set of patterns to convert WMMA ops from GPU dialect to SPIRV,
/// using the NV Cooperative Matrix extension.
void populateGpuWMMAToSPIRVCoopMatrixNVConversionPatterns(
SPIRVTypeConverter &typeConverter, RewritePatternSet &patterns);

/// Adds `MMAMatrixType` conversions to SPIR-V cooperative matrix type
/// conversion to the type converter. Defaults to KHR cooperative matrix types.
/// When `useNVTypes` is `true`, uses the NV cooperative matrix types.
/// Adds `MMAMatrixType` conversions to SPIR-V cooperative matrix KHR type
/// conversion to the type converter.
void populateMMAToSPIRVCoopMatrixTypeConversion(
SPIRVTypeConverter &typeConverter, bool useNVTypes = false);
SPIRVTypeConverter &typeConverter);
} // namespace mlir

#endif // MLIR_CONVERSION_GPUTOSPIRV_GPUTOSPIRV_H
4 changes: 0 additions & 4 deletions mlir/include/mlir/Conversion/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -564,10 +564,6 @@ def ConvertGPUToSPIRV : Pass<"convert-gpu-to-spirv", "ModuleOp"> {
Option<"use64bitIndex", "use-64bit-index",
"bool", /*default=*/"false",
"Use 64-bit integers to convert index types">,
Option<"useCoopMatrixNV", "use-coop-matrix-nv",
"bool", /*default=*/"false",
"Use the NV cooperative matrix extension insted of the KHR extension"
" to lower GPU WMMA ops">,
];
}

Expand Down
24 changes: 24 additions & 0 deletions mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,30 @@ struct SCFTileAndFuseOptions {
tilingOptions = options;
return *this;
}

/// Control function to check if a slice needs to be fused or not,
/// The control function receives
/// 1) the slice along which fusion is to be done,
/// 2) the producer value that is to be fused
/// 3) a boolean value set to `true` if the fusion is from
/// a destination operand.
/// It retuns two booleans
/// - returns `true` if the fusion should be done through the candidate slice
/// - returns `true` if a replacement for the fused producer needs to be
/// yielded from within the tiled loop. Note that it is valid to return
/// `true` only if the slice fused is disjoint across all iterations of the
/// tiled loop. It is up to the caller to ensure that this is true for the
/// fused producers.
using ControlFnTy = std::function<std::tuple<bool, bool>(
tensor::ExtractSliceOp candidateSliceOp, OpResult originalProducer,
bool isDestinationOperand)>;
ControlFnTy fusionControlFn = [](tensor::ExtractSliceOp, OpResult, bool) {
return std::make_tuple(true, false);
};
SCFTileAndFuseOptions &setFusionControlFn(ControlFnTy controlFn) {
fusionControlFn = controlFn;
return *this;
}
};

/// Fuse the producer of the source of `candidateSliceOp` by computing the
Expand Down
38 changes: 6 additions & 32 deletions mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
Original file line number Diff line number Diff line change
Expand Up @@ -1253,12 +1253,6 @@ def SPIRV_C_RayTracingProvisionalKHR : I32EnumAttrCase<"RayTr
Extension<[SPV_KHR_ray_tracing]>
];
}
def SPIRV_C_CooperativeMatrixNV : I32EnumAttrCase<"CooperativeMatrixNV", 5357> {
list<I32EnumAttrCase> implies = [SPIRV_C_Shader];
list<Availability> availability = [
Extension<[SPV_NV_cooperative_matrix]>
];
}
def SPIRV_C_FragmentShaderSampleInterlockEXT : I32EnumAttrCase<"FragmentShaderSampleInterlockEXT", 5363> {
list<I32EnumAttrCase> implies = [SPIRV_C_Shader];
list<Availability> availability = [
Expand Down Expand Up @@ -1501,7 +1495,7 @@ def SPIRV_CapabilityAttr :
SPIRV_C_ShaderNonUniform, SPIRV_C_RuntimeDescriptorArray,
SPIRV_C_StorageTexelBufferArrayDynamicIndexing, SPIRV_C_RayTracingNV,
SPIRV_C_RayTracingMotionBlurNV, SPIRV_C_PhysicalStorageBufferAddresses,
SPIRV_C_RayTracingProvisionalKHR, SPIRV_C_CooperativeMatrixNV,
SPIRV_C_RayTracingProvisionalKHR,
SPIRV_C_FragmentShaderSampleInterlockEXT,
SPIRV_C_FragmentShaderShadingRateInterlockEXT, SPIRV_C_ShaderSMBuiltinsNV,
SPIRV_C_FragmentShaderPixelInterlockEXT, SPIRV_C_DemoteToHelperInvocation,
Expand Down Expand Up @@ -4123,8 +4117,6 @@ class SignlessOrUnsignedIntOfWidths<list<int> widths> :
def SPIRV_IsArrayType : CPred<"::llvm::isa<::mlir::spirv::ArrayType>($_self)">;
def SPIRV_IsCooperativeMatrixType :
CPred<"::llvm::isa<::mlir::spirv::CooperativeMatrixType>($_self)">;
def SPIRV_IsCooperativeMatrixNVType :
CPred<"::llvm::isa<::mlir::spirv::CooperativeMatrixNVType>($_self)">;
def SPIRV_IsImageType : CPred<"::llvm::isa<::mlir::spirv::ImageType>($_self)">;
def SPIRV_IsJointMatrixType :
CPred<"::llvm::isa<::mlir::spirv::JointMatrixINTELType>($_self)">;
Expand Down Expand Up @@ -4157,9 +4149,6 @@ def SPIRV_AnyArray : DialectType<SPIRV_Dialect, SPIRV_IsArrayType,
def SPIRV_AnyCooperativeMatrix : DialectType<SPIRV_Dialect,
SPIRV_IsCooperativeMatrixType,
"any SPIR-V cooperative matrix type">;
def SPIRV_AnyCooperativeMatrixNV : DialectType<SPIRV_Dialect,
SPIRV_IsCooperativeMatrixNVType,
"any SPIR-V NV cooperative matrix type">;
def SPIRV_AnyImage : DialectType<SPIRV_Dialect, SPIRV_IsImageType,
"any SPIR-V image type">;
def SPIRV_AnyJointMatrix : DialectType<SPIRV_Dialect, SPIRV_IsJointMatrixType,
Expand All @@ -4178,13 +4167,12 @@ def SPIRV_Scalar : AnyTypeOf<[SPIRV_Numerical, SPIRV_Bool]>;
def SPIRV_Aggregate : AnyTypeOf<[SPIRV_AnyArray, SPIRV_AnyRTArray, SPIRV_AnyStruct]>;
def SPIRV_Composite :
AnyTypeOf<[SPIRV_Vector, SPIRV_AnyArray, SPIRV_AnyRTArray, SPIRV_AnyStruct,
SPIRV_AnyCooperativeMatrix, SPIRV_AnyCooperativeMatrixNV,
SPIRV_AnyJointMatrix, SPIRV_AnyMatrix]>;
SPIRV_AnyCooperativeMatrix, SPIRV_AnyJointMatrix, SPIRV_AnyMatrix]>;
def SPIRV_Type : AnyTypeOf<[
SPIRV_Void, SPIRV_Bool, SPIRV_Integer, SPIRV_Float, SPIRV_Vector,
SPIRV_AnyPtr, SPIRV_AnyArray, SPIRV_AnyRTArray, SPIRV_AnyStruct,
SPIRV_AnyCooperativeMatrix, SPIRV_AnyCooperativeMatrixNV,
SPIRV_AnyJointMatrix, SPIRV_AnyMatrix, SPIRV_AnySampledImage
SPIRV_AnyCooperativeMatrix, SPIRV_AnyJointMatrix, SPIRV_AnyMatrix,
SPIRV_AnySampledImage
]>;

def SPIRV_SignedInt : SignedIntOfWidths<[8, 16, 32, 64]>;
Expand All @@ -4195,11 +4183,6 @@ class SPIRV_CoopMatrixOfType<list<Type> allowedTypes> :
"::llvm::cast<::mlir::spirv::CooperativeMatrixType>($_self).getElementType()",
"Cooperative Matrix">;

class SPIRV_CoopMatrixNVOfType<list<Type> allowedTypes> :
ContainerType<AnyTypeOf<allowedTypes>, SPIRV_IsCooperativeMatrixNVType,
"::llvm::cast<::mlir::spirv::CooperativeMatrixNVType>($_self).getElementType()",
"Cooperative Matrix NV">;

class SPIRV_JointMatrixOfType<list<Type> allowedTypes> :
ContainerType<AnyTypeOf<allowedTypes>, SPIRV_IsJointMatrixType,
"::llvm::cast<::mlir::spirv::JointMatrixINTELType>($_self).getElementType()",
Expand All @@ -4213,12 +4196,11 @@ class SPIRV_ScalarOrVectorOf<Type type> :

class SPIRV_ScalarOrVectorOrCoopMatrixOf<Type type> :
AnyTypeOf<[type, SPIRV_VectorOf<type>,
SPIRV_CoopMatrixOfType<[type]>, SPIRV_CoopMatrixNVOfType<[type]>]>;
SPIRV_CoopMatrixOfType<[type]>]>;

class SPIRV_MatrixOrCoopMatrixOf<Type type> :
AnyTypeOf<[SPIRV_AnyMatrix,
SPIRV_CoopMatrixOfType<[type]>,
SPIRV_CoopMatrixNVOfType<[type]>]>;
SPIRV_CoopMatrixOfType<[type]>]>;

def SPIRV_ScalarOrVector : AnyTypeOf<[SPIRV_Scalar, SPIRV_Vector]>;
def SPIRV_ScalarOrVectorOrPtr : AnyTypeOf<[SPIRV_ScalarOrVector, SPIRV_AnyPtr]>;
Expand Down Expand Up @@ -4480,11 +4462,6 @@ def SPIRV_OC_OpCooperativeMatrixLoadKHR : I32EnumAttrCase<"OpCooperativeMatrix
def SPIRV_OC_OpCooperativeMatrixStoreKHR : I32EnumAttrCase<"OpCooperativeMatrixStoreKHR", 4458>;
def SPIRV_OC_OpCooperativeMatrixMulAddKHR : I32EnumAttrCase<"OpCooperativeMatrixMulAddKHR", 4459>;
def SPIRV_OC_OpCooperativeMatrixLengthKHR : I32EnumAttrCase<"OpCooperativeMatrixLengthKHR", 4460>;
def SPIRV_OC_OpTypeCooperativeMatrixNV : I32EnumAttrCase<"OpTypeCooperativeMatrixNV", 5358>;
def SPIRV_OC_OpCooperativeMatrixLoadNV : I32EnumAttrCase<"OpCooperativeMatrixLoadNV", 5359>;
def SPIRV_OC_OpCooperativeMatrixStoreNV : I32EnumAttrCase<"OpCooperativeMatrixStoreNV", 5360>;
def SPIRV_OC_OpCooperativeMatrixMulAddNV : I32EnumAttrCase<"OpCooperativeMatrixMulAddNV", 5361>;
def SPIRV_OC_OpCooperativeMatrixLengthNV : I32EnumAttrCase<"OpCooperativeMatrixLengthNV", 5362>;
def SPIRV_OC_OpSubgroupBlockReadINTEL : I32EnumAttrCase<"OpSubgroupBlockReadINTEL", 5575>;
def SPIRV_OC_OpSubgroupBlockWriteINTEL : I32EnumAttrCase<"OpSubgroupBlockWriteINTEL", 5576>;
def SPIRV_OC_OpAssumeTrueKHR : I32EnumAttrCase<"OpAssumeTrueKHR", 5630>;
Expand Down Expand Up @@ -4585,9 +4562,6 @@ def SPIRV_OpcodeAttr :
SPIRV_OC_OpTypeCooperativeMatrixKHR, SPIRV_OC_OpCooperativeMatrixLoadKHR,
SPIRV_OC_OpCooperativeMatrixStoreKHR, SPIRV_OC_OpCooperativeMatrixMulAddKHR,
SPIRV_OC_OpCooperativeMatrixLengthKHR,
SPIRV_OC_OpTypeCooperativeMatrixNV, SPIRV_OC_OpCooperativeMatrixLoadNV,
SPIRV_OC_OpCooperativeMatrixStoreNV, SPIRV_OC_OpCooperativeMatrixMulAddNV,
SPIRV_OC_OpCooperativeMatrixLengthNV,
SPIRV_OC_OpSubgroupBlockReadINTEL, SPIRV_OC_OpSubgroupBlockWriteINTEL,
SPIRV_OC_OpAssumeTrueKHR, SPIRV_OC_OpAtomicFAddEXT, SPIRV_OC_OpGroupIMulKHR,
SPIRV_OC_OpGroupFMulKHR,
Expand Down
247 changes: 0 additions & 247 deletions mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCooperativeMatrixOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -338,253 +338,6 @@ def SPIRV_KHRCooperativeMatrixMulAddOp : SPIRV_KhrVendorOp<"CooperativeMatrixMul
];
}

//===----------------------------------------------------------------------===//
// SPV_NV_cooperative_matrix extension ops.
//===----------------------------------------------------------------------===//

// -----

def SPIRV_NVCooperativeMatrixLengthOp : SPIRV_NvVendorOp<"CooperativeMatrixLength",
[Pure]> {
let summary = "See extension SPV_NV_cooperative_matrix";

let description = [{
Number of components of a cooperative matrix type accessible to each
invocation when treated as a composite.

Result Type must be an OpTypeInt with 32-bit Width and 0 Signedness.

Type is a cooperative matrix type.

#### Example:

```
%0 = spirv.NV.CooperativeMatrixLength : !spirv.NV.coopmatrix<8x16xi32, Subgroup>
```
}];

let assemblyFormat = "attr-dict `:` $cooperative_matrix_type";

let availability = [
MinVersion<SPIRV_V_1_0>,
MaxVersion<SPIRV_V_1_6>,
Extension<[SPV_NV_cooperative_matrix]>,
Capability<[SPIRV_C_CooperativeMatrixNV]>
];

let arguments = (ins
TypeAttr:$cooperative_matrix_type
);

let results = (outs
SPIRV_Int32:$result
);
}

// -----

def SPIRV_NVCooperativeMatrixLoadOp : SPIRV_NvVendorOp<"CooperativeMatrixLoad", []> {
let summary = "See extension SPV_NV_cooperative_matrix";

let description = [{
Load a cooperative matrix through a pointer.

Result Type is the type of the loaded object. It must be a cooperative
matrix type.

Pointer is a pointer into an array. Its type must be an OpTypePointer whose
Type operand is a scalar or vector type. The storage class of Pointer must
be Workgroup, StorageBuffer, or (if SPV_EXT_physical_storage_buffer is
supported) PhysicalStorageBufferEXT.

Stride is the number of elements in the array in memory between the first
component of consecutive rows (or columns) in the result. It must be a
scalar integer type.

ColumnMajor indicates whether the values loaded from memory are arranged in
column-major or row-major order. It must be a boolean constant instruction,
with false indicating row major and true indicating column major.

Memory Access must be a Memory Access literal. If not present, it is the
same as specifying None.

If ColumnMajor is false, then elements (row,*) of the result are taken in
order from contiguous locations starting at Pointer[row*Stride]. If
ColumnMajor is true, then elements (*,col) of the result are taken in order
from contiguous locations starting from Pointer[col*Stride]. Any ArrayStride
decoration on Pointer is ignored.

For a given dynamic instance of this instruction, all operands of this
instruction must be the same for all invocations in a given scope instance
(where the scope is the scope the cooperative matrix type was created with).
All invocations in a given scope instance must be active or all must be
inactive.

### Custom assembly form

``` {.ebnf}
cooperative-matrixload-op ::= ssa-id `=` `spirv.NV.CooperativeMatrixLoad`
ssa-use `,` ssa-use `,` ssa-use
(`[` memory-access `]`)? ` : `
pointer-type `as`
cooperative-matrix-type
```

#### Example:

```
%0 = spirv.NV.CooperativeMatrixLoad %ptr, %stride, %colMajor
: !spirv.ptr<i32, StorageBuffer> as !spirv.NV.coopmatrix<16x8xi32, Workgroup>
```
}];

let availability = [
MinVersion<SPIRV_V_1_0>,
MaxVersion<SPIRV_V_1_6>,
Extension<[SPV_NV_cooperative_matrix]>,
Capability<[SPIRV_C_CooperativeMatrixNV]>
];

let arguments = (ins
SPIRV_AnyPtr:$pointer,
SPIRV_Integer:$stride,
SPIRV_Bool:$columnmajor,
OptionalAttr<SPIRV_MemoryAccessAttr>:$memory_access
);

let results = (outs
SPIRV_AnyCooperativeMatrixNV:$result
);
}

// -----

def SPIRV_NVCooperativeMatrixMulAddOp : SPIRV_NvVendorOp<"CooperativeMatrixMulAdd",
[Pure, AllTypesMatch<["c", "result"]>]> {
let summary = "See extension SPV_NV_cooperative_matrix";

let description = [{
Linear-algebraic matrix multiply of A by B and then component-wise add C.
The order of the operations is implementation-dependent. The internal
precision of floating-point operations is defined by the client API.
Integer operations are performed at the precision of the Result Type and are
exact unless there is overflow or underflow, in which case the result is
undefined.

Result Type must be a cooperative matrix type with M rows and N columns.

A is a cooperative matrix with M rows and K columns.

B is a cooperative matrix with K rows and N columns.

C is a cooperative matrix with M rows and N columns.

The values of M, N, and K must be consistent across the result and operands.
This is referred to as an MxNxK matrix multiply.

A, B, C, and Result Type must have the same scope, and this defines the
scope of the operation. A, B, C, and Result Type need not necessarily have
the same component type, this is defined by the client API.

If the Component Type of any matrix operand is an integer type, then its
components are treated as signed if its Component Type has Signedness of 1
and are treated as unsigned otherwise.

For a given dynamic instance of this instruction, all invocations in a given
scope instance must be active or all must be inactive (where the scope is
the scope of the operation).

#### Example:

```
%0 = spirv.NV.CooperativeMatrixMulAdd %arg0, %arg1, %arg2, :
!spirv.NV.coopmatrix<8x16xi32, Subgroup>
```
}];

let assemblyFormat = [{
operands attr-dict `:` type($a) `,` type($b) `->` type($c)
}];

let availability = [
MinVersion<SPIRV_V_1_0>,
MaxVersion<SPIRV_V_1_6>,
Extension<[SPV_NV_cooperative_matrix]>,
Capability<[SPIRV_C_CooperativeMatrixNV]>
];

let arguments = (ins
SPIRV_AnyCooperativeMatrixNV:$a,
SPIRV_AnyCooperativeMatrixNV:$b,
SPIRV_AnyCooperativeMatrixNV:$c
);

let results = (outs
SPIRV_AnyCooperativeMatrixNV:$result
);
}

// -----

def SPIRV_NVCooperativeMatrixStoreOp : SPIRV_NvVendorOp<"CooperativeMatrixStore", []> {
let summary = "See extension SPV_NV_cooperative_matrix";

let description = [{
Store a cooperative matrix through a pointer.

Pointer is a pointer into an array. Its type must be an OpTypePointer whose
Type operand is a scalar or vector type. The storage class of Pointer must
be Workgroup, StorageBuffer, or (if SPV_EXT_physical_storage_buffer is
supported) PhysicalStorageBufferEXT.

Object is the object to store. Its type must be an
OpTypeCooperativeMatrixNV.

Stride is the number of elements in the array in memory between the first
component of consecutive rows (or columns) in the result. It must be a
scalar integer type.

ColumnMajor indicates whether the values stored to memory are arranged in
column-major or row-major order. It must be a boolean constant instruction,
with false indicating row major and true indicating column major.

Memory Access must be a Memory Access literal. If not present, it is the
same as specifying None.

``` {.ebnf}
coop-matrix-store-op ::= `spirv.NV.CooperativeMatrixStore `
ssa-use `, ` ssa-use `, `
ssa-use `, ` ssa-use `, `
(`[` memory-access `]`)? `:`
pointer-type `,` coop-matrix-type
```

#### Example:

```
spirv.NV.CooperativeMatrixStore %arg0, %arg2, %arg1, %arg3 :
!spirv.ptr<i32, StorageBuffer>, !spirv.NV.coopmatrix<16x8xi32, Workgroup>
```
}];

let availability = [
MinVersion<SPIRV_V_1_0>,
MaxVersion<SPIRV_V_1_6>,
Extension<[SPV_NV_cooperative_matrix]>,
Capability<[SPIRV_C_CooperativeMatrixNV]>
];

let arguments = (ins
SPIRV_AnyPtr:$pointer,
SPIRV_AnyCooperativeMatrixNV:$object,
SPIRV_Integer:$stride,
SPIRV_Bool:$columnmajor,
OptionalAttr<SPIRV_MemoryAccessAttr>:$memory_access
);

let results = (outs);
}

// -----

#endif // MLIR_DIALECT_SPIRV_IR_COOPERATIVE_MATRIX_OPS
27 changes: 0 additions & 27 deletions mlir/include/mlir/Dialect/SPIRV/IR/SPIRVTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ namespace spirv {
namespace detail {
struct ArrayTypeStorage;
struct CooperativeMatrixTypeStorage;
struct CooperativeMatrixNVTypeStorage;
struct ImageTypeStorage;
struct JointMatrixTypeStorage;
struct MatrixTypeStorage;
Expand Down Expand Up @@ -421,32 +420,6 @@ class CooperativeMatrixType
std::optional<StorageClass> storage = std::nullopt);
};

// SPIR-V NV cooperative matrix type
class CooperativeMatrixNVType
: public Type::TypeBase<CooperativeMatrixNVType, CompositeType,
detail::CooperativeMatrixNVTypeStorage> {
public:
using Base::Base;

static constexpr StringLiteral name = "spirv.NV.coopmatrix";

static CooperativeMatrixNVType get(Type elementType, Scope scope,
unsigned rows, unsigned columns);
Type getElementType() const;

/// Returns the scope of the matrix.
Scope getScope() const;
/// Returns the number of rows of the matrix.
unsigned getRows() const;
/// Returns the number of columns of the matrix.
unsigned getColumns() const;

void getExtensions(SPIRVType::ExtensionArrayRefVector &extensions,
std::optional<StorageClass> storage = std::nullopt);
void getCapabilities(SPIRVType::CapabilityArrayRefVector &capabilities,
std::optional<StorageClass> storage = std::nullopt);
};

// SPIR-V joint matrix type
class JointMatrixINTELType
: public Type::TypeBase<JointMatrixINTELType, CompositeType,
Expand Down
66 changes: 38 additions & 28 deletions mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -569,29 +569,39 @@ struct MulOpConversion : public OpConversionPattern<complex::MulOp> {
mlir::ImplicitLocOpBuilder b(op.getLoc(), rewriter);
auto type = cast<ComplexType>(adaptor.getLhs().getType());
auto elementType = cast<FloatType>(type.getElementType());
arith::FastMathFlagsAttr fmf = op.getFastMathFlagsAttr();
auto fmfValue = fmf.getValue();

Value lhsReal = b.create<complex::ReOp>(elementType, adaptor.getLhs());
Value lhsRealAbs = b.create<math::AbsFOp>(lhsReal);
Value lhsRealAbs = b.create<math::AbsFOp>(lhsReal, fmfValue);
Value lhsImag = b.create<complex::ImOp>(elementType, adaptor.getLhs());
Value lhsImagAbs = b.create<math::AbsFOp>(lhsImag);
Value lhsImagAbs = b.create<math::AbsFOp>(lhsImag, fmfValue);
Value rhsReal = b.create<complex::ReOp>(elementType, adaptor.getRhs());
Value rhsRealAbs = b.create<math::AbsFOp>(rhsReal);
Value rhsRealAbs = b.create<math::AbsFOp>(rhsReal, fmfValue);
Value rhsImag = b.create<complex::ImOp>(elementType, adaptor.getRhs());
Value rhsImagAbs = b.create<math::AbsFOp>(rhsImag);

Value lhsRealTimesRhsReal = b.create<arith::MulFOp>(lhsReal, rhsReal);
Value lhsRealTimesRhsRealAbs = b.create<math::AbsFOp>(lhsRealTimesRhsReal);
Value lhsImagTimesRhsImag = b.create<arith::MulFOp>(lhsImag, rhsImag);
Value lhsImagTimesRhsImagAbs = b.create<math::AbsFOp>(lhsImagTimesRhsImag);
Value real =
b.create<arith::SubFOp>(lhsRealTimesRhsReal, lhsImagTimesRhsImag);

Value lhsImagTimesRhsReal = b.create<arith::MulFOp>(lhsImag, rhsReal);
Value lhsImagTimesRhsRealAbs = b.create<math::AbsFOp>(lhsImagTimesRhsReal);
Value lhsRealTimesRhsImag = b.create<arith::MulFOp>(lhsReal, rhsImag);
Value lhsRealTimesRhsImagAbs = b.create<math::AbsFOp>(lhsRealTimesRhsImag);
Value imag =
b.create<arith::AddFOp>(lhsImagTimesRhsReal, lhsRealTimesRhsImag);
Value rhsImagAbs = b.create<math::AbsFOp>(rhsImag, fmfValue);

Value lhsRealTimesRhsReal =
b.create<arith::MulFOp>(lhsReal, rhsReal, fmfValue);
Value lhsRealTimesRhsRealAbs =
b.create<math::AbsFOp>(lhsRealTimesRhsReal, fmfValue);
Value lhsImagTimesRhsImag =
b.create<arith::MulFOp>(lhsImag, rhsImag, fmfValue);
Value lhsImagTimesRhsImagAbs =
b.create<math::AbsFOp>(lhsImagTimesRhsImag, fmfValue);
Value real = b.create<arith::SubFOp>(lhsRealTimesRhsReal,
lhsImagTimesRhsImag, fmfValue);

Value lhsImagTimesRhsReal =
b.create<arith::MulFOp>(lhsImag, rhsReal, fmfValue);
Value lhsImagTimesRhsRealAbs =
b.create<math::AbsFOp>(lhsImagTimesRhsReal, fmfValue);
Value lhsRealTimesRhsImag =
b.create<arith::MulFOp>(lhsReal, rhsImag, fmfValue);
Value lhsRealTimesRhsImagAbs =
b.create<math::AbsFOp>(lhsRealTimesRhsImag, fmfValue);
Value imag = b.create<arith::AddFOp>(lhsImagTimesRhsReal,
lhsRealTimesRhsImag, fmfValue);

// Handle cases where the "naive" calculation results in NaN values.
Value realIsNan =
Expand Down Expand Up @@ -717,20 +727,20 @@ struct MulOpConversion : public OpConversionPattern<complex::MulOp> {
recalc = b.create<arith::AndIOp>(isNan, recalc);

// Recalculate real part.
lhsRealTimesRhsReal = b.create<arith::MulFOp>(lhsReal, rhsReal);
lhsImagTimesRhsImag = b.create<arith::MulFOp>(lhsImag, rhsImag);
Value newReal =
b.create<arith::SubFOp>(lhsRealTimesRhsReal, lhsImagTimesRhsImag);
lhsRealTimesRhsReal = b.create<arith::MulFOp>(lhsReal, rhsReal, fmfValue);
lhsImagTimesRhsImag = b.create<arith::MulFOp>(lhsImag, rhsImag, fmfValue);
Value newReal = b.create<arith::SubFOp>(lhsRealTimesRhsReal,
lhsImagTimesRhsImag, fmfValue);
real = b.create<arith::SelectOp>(
recalc, b.create<arith::MulFOp>(inf, newReal), real);
recalc, b.create<arith::MulFOp>(inf, newReal, fmfValue), real);

// Recalculate imag part.
lhsImagTimesRhsReal = b.create<arith::MulFOp>(lhsImag, rhsReal);
lhsRealTimesRhsImag = b.create<arith::MulFOp>(lhsReal, rhsImag);
Value newImag =
b.create<arith::AddFOp>(lhsImagTimesRhsReal, lhsRealTimesRhsImag);
lhsImagTimesRhsReal = b.create<arith::MulFOp>(lhsImag, rhsReal, fmfValue);
lhsRealTimesRhsImag = b.create<arith::MulFOp>(lhsReal, rhsImag, fmfValue);
Value newImag = b.create<arith::AddFOp>(lhsImagTimesRhsReal,
lhsRealTimesRhsImag, fmfValue);
imag = b.create<arith::SelectOp>(
recalc, b.create<arith::MulFOp>(inf, newImag), imag);
recalc, b.create<arith::MulFOp>(inf, newImag, fmfValue), imag);

rewriter.replaceOpWithNewOp<complex::CreateOp>(op, type, real, imag);
return success();
Expand Down
12 changes: 3 additions & 9 deletions mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,18 +112,12 @@ void GPUToSPIRVPass::runOnOperation() {
SPIRVConversionOptions options;
options.use64bitIndex = this->use64bitIndex;
SPIRVTypeConverter typeConverter(targetAttr, options);
populateMMAToSPIRVCoopMatrixTypeConversion(typeConverter,
this->useCoopMatrixNV);
populateMMAToSPIRVCoopMatrixTypeConversion(typeConverter);

RewritePatternSet patterns(context);
populateGPUToSPIRVPatterns(typeConverter, patterns);
if (this->useCoopMatrixNV) {
populateGpuWMMAToSPIRVCoopMatrixNVConversionPatterns(typeConverter,
patterns);
} else {
populateGpuWMMAToSPIRVCoopMatrixKHRConversionPatterns(typeConverter,
patterns);
}
populateGpuWMMAToSPIRVCoopMatrixKHRConversionPatterns(typeConverter,
patterns);

// TODO: Change SPIR-V conversion to be progressive and remove the following
// patterns.
Expand Down
133 changes: 5 additions & 128 deletions mlir/lib/Conversion/GPUToSPIRV/WmmaOpsToSPIRV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,18 @@

namespace mlir {
//===----------------------------------------------------------------------===//
// Patterns and helpers used by both the KHR and the NV lowering paths.
// Patterns and helpers.
//===----------------------------------------------------------------------===//

/// Creates a SPIR-V op to replace the given GPU subgroup mma elementwise op
/// when the elementwise op directly supports with cooperative matrix type.
/// Returns false if cannot.
///
/// See SPV_NV_cooperative_matrix for supported elementwise ops.
/// See SPV_KHR_cooperative_matrix for supported elementwise ops.
static bool createElementwiseOp(ConversionPatternRewriter &builder,
gpu::SubgroupMmaElementwiseOp op, Type coopType,
ValueRange operands) {
assert((isa<spirv::CooperativeMatrixType, spirv::CooperativeMatrixNVType>(
coopType)));
assert((isa<spirv::CooperativeMatrixType>(coopType)));

switch (op.getOpType()) {
case gpu::MMAElementwiseOp::ADDF:
Expand Down Expand Up @@ -89,8 +88,7 @@ bool allOperandsHaveSameCoopMatrixType(ValueRange operands) {
llvm::map_range(operands, [](Value v) { return v.getType(); })))
return false;

return isa<spirv::CooperativeMatrixType, spirv::CooperativeMatrixNVType>(
operands.front().getType());
return isa<spirv::CooperativeMatrixType>(operands.front().getType());
}

namespace {
Expand Down Expand Up @@ -292,104 +290,6 @@ struct WmmaMmaOpToSPIRVLowering final

} // namespace
} // namespace khr

//===----------------------------------------------------------------------===//
// SPV_NV_cooperative_matrix
//===----------------------------------------------------------------------===//

namespace nv {
namespace {

/// Converts the GPU MMA loadOp to NVCooperativeMatrixLoad op in the SPIRV
/// dialect.
struct WmmaLoadOpToSPIRVLowering final
: OpConversionPattern<gpu::SubgroupMmaLoadMatrixOp> {
using OpConversionPattern::OpConversionPattern;

LogicalResult
matchAndRewrite(gpu::SubgroupMmaLoadMatrixOp subgroupMmaLoadMatrixOp,
OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
Location loc = subgroupMmaLoadMatrixOp->getLoc();
auto &typeConverter = *getTypeConverter<SPIRVTypeConverter>();

gpu::MMAMatrixType retType =
cast<gpu::MMAMatrixType>(subgroupMmaLoadMatrixOp.getRes().getType());
auto memrefType =
cast<MemRefType>(subgroupMmaLoadMatrixOp.getSrcMemref().getType());
Value bufferPtr =
spirv::getElementPtr(typeConverter, memrefType, adaptor.getSrcMemref(),
adaptor.getIndices(), loc, rewriter);
auto coopType =
typeConverter.convertType<spirv::CooperativeMatrixNVType>(retType);
if (!coopType)
return rewriter.notifyMatchFailure(subgroupMmaLoadMatrixOp,
"type conversion failed");

int64_t stride = subgroupMmaLoadMatrixOp.getLeadDimension().getSExtValue();
auto i32Type = rewriter.getI32Type();
auto strideValue = rewriter.create<spirv::ConstantOp>(
loc, i32Type, IntegerAttr::get(i32Type, stride));
bool isColMajor = static_cast<bool>(subgroupMmaLoadMatrixOp.getTranspose());
auto columnMajor = rewriter.create<spirv::ConstantOp>(
loc, rewriter.getI1Type(), rewriter.getBoolAttr(isColMajor));
rewriter.replaceOpWithNewOp<spirv::NVCooperativeMatrixLoadOp>(
subgroupMmaLoadMatrixOp, coopType, bufferPtr, strideValue, columnMajor,
spirv::MemoryAccessAttr());
return success();
}
};

/// Converts the GPU MMA StoreOp to NVCooperativeMatrixStore op in the SPIRV
/// dialect.
struct WmmaStoreOpToSPIRVLowering final
: OpConversionPattern<gpu::SubgroupMmaStoreMatrixOp> {
using OpConversionPattern::OpConversionPattern;

LogicalResult
matchAndRewrite(gpu::SubgroupMmaStoreMatrixOp subgroupMmaStoreMatrixOp,
OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
Location loc = subgroupMmaStoreMatrixOp->getLoc();
auto memrefType =
cast<MemRefType>(subgroupMmaStoreMatrixOp.getDstMemref().getType());
Value bufferPtr = spirv::getElementPtr(
*getTypeConverter<const SPIRVTypeConverter>(), memrefType,
adaptor.getDstMemref(), adaptor.getIndices(), loc, rewriter);
int64_t stride = subgroupMmaStoreMatrixOp.getLeadDimension().getSExtValue();
auto i32Type = rewriter.getI32Type();
auto strideValue = rewriter.create<spirv::ConstantOp>(
loc, i32Type, IntegerAttr::get(i32Type, stride));
bool useColMajor =
static_cast<bool>(subgroupMmaStoreMatrixOp.getTranspose());
auto columnMajor = rewriter.create<spirv::ConstantOp>(
loc, rewriter.getI1Type(), rewriter.getBoolAttr(useColMajor));
rewriter.replaceOpWithNewOp<spirv::NVCooperativeMatrixStoreOp>(
subgroupMmaStoreMatrixOp, bufferPtr, adaptor.getSrc(), strideValue,
columnMajor, spirv::MemoryAccessAttr());
return success();
}
};

/// Converts GPU MMA Compute to
/// NVCooperativeMatrixMulAdd op in the SPIRV dialect.
struct WmmaMmaOpToSPIRVLowering final
: OpConversionPattern<gpu::SubgroupMmaComputeOp> {
using OpConversionPattern::OpConversionPattern;

LogicalResult
matchAndRewrite(gpu::SubgroupMmaComputeOp subgroupMmaComputeOp,
OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
rewriter.replaceOpWithNewOp<spirv::NVCooperativeMatrixMulAddOp>(
subgroupMmaComputeOp, adaptor.getOpC().getType(), adaptor.getOpA(),
adaptor.getOpB(), adaptor.getOpC());
return success();
}
};

} // namespace
} // namespace nv
} // namespace mlir

void mlir::populateGpuWMMAToSPIRVCoopMatrixKHRConversionPatterns(
Expand All @@ -404,31 +304,8 @@ void mlir::populateGpuWMMAToSPIRVCoopMatrixKHRConversionPatterns(
/*benefit=*/2);
}

void mlir::populateGpuWMMAToSPIRVCoopMatrixNVConversionPatterns(
SPIRVTypeConverter &converter, RewritePatternSet &patterns) {
using namespace mlir;
MLIRContext *context = patterns.getContext();
patterns.add<nv::WmmaLoadOpToSPIRVLowering, nv::WmmaMmaOpToSPIRVLowering,
nv::WmmaStoreOpToSPIRVLowering, WmmaConstantOpToSPIRVLowering,
WmmaElementwiseOpToSPIRVDefaultLowering>(converter, context);
// Give the following patterns higher benefit to prevail over the default one.
patterns.add<WmmaElementwiseOpToSPIRVScalarMulLowering>(converter, context,
/*benefit=*/2);
}

void mlir::populateMMAToSPIRVCoopMatrixTypeConversion(
mlir::SPIRVTypeConverter &typeConverter, bool useNVTypes) {
if (useNVTypes) {
typeConverter.addConversion([](gpu::MMAMatrixType type) {
ArrayRef<int64_t> retTypeShape = type.getShape();
Type elementType = type.getElementType();
return spirv::CooperativeMatrixNVType::get(
elementType, spirv::Scope::Subgroup, retTypeShape[0],
retTypeShape[1]);
});
return;
}

mlir::SPIRVTypeConverter &typeConverter) {
typeConverter.addConversion([](gpu::MMAMatrixType type) {
ArrayRef<int64_t> retTypeShape = type.getShape();
Type elementType = type.getElementType();
Expand Down
65 changes: 47 additions & 18 deletions mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -728,32 +728,36 @@ mlir::scf::tileConsumerAndFuseProducerGreedilyUsingSCFForOp(
}

// 1. First tile the consumer.
SmallVector<scf::ForOp> forLoops;
SetVector<Operation *> fusedProducers, tiledAndFusedOps;
DenseMap<Value, Value> replacements;
llvm::SmallDenseMap<Value, int64_t> yieldedValueToResultNumber;
{
FailureOr<scf::SCFTilingResult> tilingResult =
tileUsingSCFForOp(rewriter, consumer, options.tilingOptions);
if (failed(tilingResult))
return rewriter.notifyMatchFailure(consumer, "failed to tile consumer");
for (auto *tiledOp : tilingResult->tiledOps)
tiledAndFusedOps.insert(tiledOp);
forLoops = castToTypedOperations<scf::ForOp>(tilingResult->loops);
for (auto [index, origValue, replacement] :
llvm::enumerate(consumer->getResults(), tilingResult->replacements)) {
replacements[origValue] = replacement;
yieldedValueToResultNumber[tilingResult->tiledOps.back()->getResult(
index)] = index;
}
}
llvm::SmallDenseMap<Value, size_t> origProducerToLoopResultNum;
FailureOr<scf::SCFTilingResult> tilingResult =
tileUsingSCFForOp(rewriter, consumer, options.tilingOptions);
if (failed(tilingResult))
return rewriter.notifyMatchFailure(consumer, "failed to tile consumer");
for (auto *tiledOp : tilingResult->tiledOps)
tiledAndFusedOps.insert(tiledOp);
SmallVector<scf::ForOp> forLoops =
castToTypedOperations<scf::ForOp>(tilingResult->loops);

// If there are no loops generated, fusion is immaterial.
if (forLoops.empty()) {
DenseMap<Value, Value> replacements;
for (auto [origVal, replacement] :
llvm::zip_equal(consumer->getResults(), tilingResult->replacements)) {
replacements[origVal] = replacement;
}
return scf::SCFTileAndFuseResult{fusedProducers, tiledAndFusedOps,
getAsOperations(forLoops), replacements};
}

// To keep track of replacements for now just record the map from the original
// untiled value to the result number of the for loop. Since the loop gets
// potentially replaced during fusion, keeping the value directly wont work.
DenseMap<Value, size_t> origValToResultNumber;
for (auto [index, result] : llvm::enumerate(consumer->getResults())) {
origValToResultNumber[result] = index;
}

// 2. Typically, the operands of the tiled operation are slices of the
// operands of the untiled operation. These are expressed in IR using
// `tensor.extract_slice` operations with source being the operands of the
Expand All @@ -776,6 +780,18 @@ mlir::scf::tileConsumerAndFuseProducerGreedilyUsingSCFForOp(
tensor::ExtractSliceOp candidateSliceOp = candidates.front();
candidates.pop_front();

// Find the original producer of the slice.
auto [fusableProducer, destinationInitArg] =
getUntiledProducerFromSliceSource(&candidateSliceOp.getSourceMutable(),
forLoops);
if (!fusableProducer)
continue;

auto [fuseSlice, yieldReplacement] = options.fusionControlFn(
candidateSliceOp, fusableProducer, destinationInitArg.has_value());
if (!fuseSlice)
continue;

// The operands of the fused producer might themselved be slices of
// values produced by operations that implement the `TilingInterface`.
// Add these operations to the worklist.
Expand All @@ -784,13 +800,26 @@ mlir::scf::tileConsumerAndFuseProducerGreedilyUsingSCFForOp(
if (!fusedResult)
continue;

if (yieldReplacement) {
yieldReplacementForFusedProducer(rewriter, candidateSliceOp,
fusedResult.value(), forLoops);
origValToResultNumber[fusableProducer] =
forLoops.front().getNumResults() - 1;
}

if (Operation *tiledAndFusedOp =
fusedResult->tiledAndFusedProducer.getDefiningOp()) {
fusedProducers.insert(fusedResult->origProducer.getDefiningOp());
tiledAndFusedOps.insert(tiledAndFusedOp);
addCandidateSlices(tiledAndFusedOp, candidates);
}
}

DenseMap<Value, Value> replacements;
for (auto [origVal, resultNumber] : origValToResultNumber) {
replacements[origVal] = forLoops.front()->getResult(resultNumber);
}

return scf::SCFTileAndFuseResult{fusedProducers, tiledAndFusedOps,
getAsOperations(forLoops), replacements};
}
Expand Down
2 changes: 1 addition & 1 deletion mlir/lib/Dialect/SPIRV/IR/CastOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ static LogicalResult verifyCastOp(Operation *op,
auto [operandElemTy, resultElemTy] =
TypeSwitch<Type, TypePair>(operandType)
.Case<VectorType, spirv::CooperativeMatrixType,
spirv::CooperativeMatrixNVType, spirv::JointMatrixINTELType>(
spirv::JointMatrixINTELType>(
[resultType](auto concreteOperandTy) -> TypePair {
if (auto concreteResultTy =
dyn_cast<decltype(concreteOperandTy)>(resultType)) {
Expand Down
Loading