Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,50 @@
// plate <class charT, class traits = char_traits<charT> >
// class basic_fstream

// explicit basic_fstream(const filesystem::path& s,
// ios_base::openmode mode = ios_base::in|ios_base::out);
// template<class T>
// explicit basic_fstream(const T& s, ios_base::openmode mode = ios_base::in); // Since C++17
// Constraints: is_same_v<T, filesystem::path> is true

#include <fstream>
#include <filesystem>
#include <cassert>
#include <type_traits>

#include "test_macros.h"
#include "test_iterators.h"
#include "platform_support.h"

namespace fs = std::filesystem;

template <class CharT>
constexpr bool test_non_convert_to_path() {
// String types
static_assert(!std::is_constructible_v<std::fstream, std::basic_string_view<CharT>>);
static_assert(!std::is_constructible_v<std::fstream, const std::basic_string_view<CharT>>);

// Char* pointers
if constexpr (!std::is_same_v<CharT, char>)
static_assert(!std::is_constructible_v<std::fstream, const CharT*>);

// Iterators
static_assert(!std::is_convertible_v<std::fstream, cpp17_input_iterator<const CharT*>>);

return true;
}

static_assert(test_non_convert_to_path<char>());

#if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && !defined(TEST_HAS_OPEN_WITH_WCHAR)
static_assert(test_non_convert_to_path<wchar_t>());
#endif // !TEST_HAS_NO_WIDE_CHARACTERS && !TEST_HAS_OPEN_WITH_WCHAR

#ifndef TEST_HAS_NO_CHAR8_T
static_assert(test_non_convert_to_path<char8_t>());
#endif // TEST_HAS_NO_CHAR8_T

static_assert(test_non_convert_to_path<char16_t>());
static_assert(test_non_convert_to_path<char32_t>());

int main(int, char**) {
fs::path p = get_temp_file_name();
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,49 @@
// template <class charT, class traits = char_traits<charT> >
// class basic_ifstream

// explicit basic_ifstream(const filesystem::path& s,
// ios_base::openmode mode = ios_base::in);
// template<class T>
// explicit basic_ifstream(const T& s, ios_base::openmode mode = ios_base::in); // Since C++17
// Constraints: is_same_v<T, filesystem::path> is true

#include <cassert>
#include <filesystem>
#include <fstream>
#include <type_traits>

#include "test_macros.h"
#include "test_iterators.h"

namespace fs = std::filesystem;

template <class CharT>
constexpr bool test_non_convert_to_path() {
// String types
static_assert(!std::is_constructible_v<std::ifstream, std::basic_string_view<CharT>>);
static_assert(!std::is_constructible_v<std::ifstream, const std::basic_string_view<CharT>>);

// Char* pointers
if constexpr (!std::is_same_v<CharT, char>)
static_assert(!std::is_constructible_v<std::ifstream, const CharT*>);

// Iterators
static_assert(!std::is_convertible_v<std::ifstream, cpp17_input_iterator<const CharT*>>);

return true;
}

static_assert(test_non_convert_to_path<char>());

#if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && !defined(TEST_HAS_OPEN_WITH_WCHAR)
static_assert(test_non_convert_to_path<wchar_t>());
#endif // !TEST_HAS_NO_WIDE_CHARACTERS && !TEST_HAS_OPEN_WITH_WCHAR

#ifndef TEST_HAS_NO_CHAR8_T
static_assert(test_non_convert_to_path<char8_t>());
#endif // TEST_HAS_NO_CHAR8_T

static_assert(test_non_convert_to_path<char16_t>());
static_assert(test_non_convert_to_path<char32_t>());

int main(int, char**) {
{
fs::path p;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
// plate <class charT, class traits = char_traits<charT> >
// class basic_ofstream

// explicit basic_ofstream(const filesystem::path& s, ios_base::openmode mode = ios_base::out);
// template<class T>
// explicit basic_ifstream(const T& s, ios_base::openmode mode = ios_base::in); // Since C++17
// Constraints: is_same_v<T, filesystem::path> is true

#include <cassert>
#include <filesystem>
Expand All @@ -24,9 +26,39 @@

#include "platform_support.h"
#include "test_macros.h"
#include "test_iterators.h"

namespace fs = std::filesystem;

template <class CharT>
constexpr bool test_non_convert_to_path() {
// String types
static_assert(!std::is_constructible_v<std::ofstream, std::basic_string_view<CharT>>);
static_assert(!std::is_constructible_v<std::ofstream, const std::basic_string_view<CharT>>);

// Char* pointers
if constexpr (!std::is_same_v<CharT, char>)
static_assert(!std::is_constructible_v<std::ofstream, const CharT*>);

// Iterators
static_assert(!std::is_convertible_v<std::ofstream, cpp17_input_iterator<const CharT*>>);

return true;
}

static_assert(test_non_convert_to_path<char>());

#if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && !defined(TEST_HAS_OPEN_WITH_WCHAR)
static_assert(test_non_convert_to_path<wchar_t>());
#endif // !TEST_HAS_NO_WIDE_CHARACTERS && !TEST_HAS_OPEN_WITH_WCHAR

#ifndef TEST_HAS_NO_CHAR8_T
static_assert(test_non_convert_to_path<char8_t>());
#endif // TEST_HAS_NO_CHAR8_T

static_assert(test_non_convert_to_path<char16_t>());
static_assert(test_non_convert_to_path<char32_t>());

int main(int, char**) {
fs::path p = get_temp_file_name();
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
// void print(ostream& os, format_string<Args...> fmt, Args&&... args);
// template<class... Args>
// void println(ostream& os, format_string<Args...> fmt, Args&&... args);
// void println(ostream& os); // since C++26
//
// void vprint_unicode(ostream& os, string_view fmt, format_args args);
// void vprint_nonunicode(ostream& os, string_view fmt, format_args args);
Expand Down Expand Up @@ -67,7 +68,7 @@ test(std::stringstream& stream, std::string expected, test_format_string<char, A
// *** vprint_unicode ***
{
stream.str("");
;

std::vprint_unicode(stream, fmt.get(), std::make_format_args(args...));
std::string out = stream.str();
TEST_REQUIRE(out == expected,
Expand All @@ -77,7 +78,7 @@ test(std::stringstream& stream, std::string expected, test_format_string<char, A
// *** vprint_nonunicode ***
{
stream.str("");
;

std::vprint_nonunicode(stream, fmt.get(), std::make_format_args(args...));
std::string out = stream.str();
TEST_REQUIRE(out == expected,
Expand All @@ -88,7 +89,7 @@ test(std::stringstream& stream, std::string expected, test_format_string<char, A
{
expected += '\n'; // Tested last since it changes the expected value.
stream.str("");
;

std::println(stream, fmt, std::forward<Args>(args)...);
std::string out = stream.str();
TEST_REQUIRE(out == expected,
Expand All @@ -111,6 +112,7 @@ static void test(std::string expected, std::locale loc, test_format_string<char,
}

#ifndef TEST_HAS_NO_UNICODE

struct numpunct_unicode : std::numpunct<char> {
string_type do_truename() const override { return "gültig"; }
string_type do_falsename() const override { return "ungültig"; }
Expand Down Expand Up @@ -2188,12 +2190,47 @@ static void test_floating_point() {
test_floating_point_default_precision<F>();
}

static void test_println_blank_line(std::stringstream& stream) {
std::string expected{'\n'};
stream.str("");

std::println(stream);
std::string out = stream.str();
TEST_REQUIRE(out == expected,
TEST_WRITE_CONCATENATED("\nExpected output (blank line) ", expected, "\nActual output ", out, '\n'));
}

static void test_println_blank_line(std::locale loc) {
std::stringstream stream;
stream.imbue(loc);
test_println_blank_line(stream);
}

static void test_println_blank_line() {
std::locale::global(std::locale(LOCALE_en_US_UTF_8));
assert(std::locale().name() == LOCALE_en_US_UTF_8);
std::stringstream stream;
test_println_blank_line(stream);

std::locale loc = std::locale(std::locale(), new numpunct<char>());
std::locale::global(loc);
test_println_blank_line(std::locale(LOCALE_en_US_UTF_8));

#ifndef TEST_HAS_NO_UNICODE

std::locale loc_unicode = std::locale(std::locale(), new numpunct_unicode());
test_println_blank_line(loc_unicode);

#endif // TEST_HAS_NO_UNICODE
}

int main(int, char**) {
test_bool();
test_integer();
test_floating_point<float>();
test_floating_point<double>();
test_floating_point<long double>();
test_println_blank_line();

return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

// template<class... Args>
// void println(ostream& os, format_string<Args...> fmt, Args&&... args);
// void println(ostream& os); // since C++26

// [ostream.formatted.print]/3
// If the function is vprint_unicode and os is a stream that refers to
Expand Down Expand Up @@ -55,8 +56,20 @@ auto test_exception = []<class... Args>(std::string_view, std::string_view, Args
// The exceptions are tested by other functions that don't use the basic-format-string as fmt argument.
};

void test_println_blank_line() {
std::string expected{'\n'};

std::stringstream sstr;
std::println(sstr);

std::string out = sstr.str();
TEST_REQUIRE(out == expected,
TEST_WRITE_CONCATENATED("\nExpected output (blank line) ", expected, "\nActual output ", out, '\n'));
};

int main(int, char**) {
print_tests(test_file, test_exception);
test_println_blank_line();

return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@

// template<class... Args>
// void print(FILE* stream, format_string<Args...> fmt, Args&&... args);
// void println(); // Since C++26
// template<class... Args>
// void println(FILE* stream, format_string<Args...> fmt, Args&&... args);
// void println(FILE* stream); // Since C++26
// void vprint_unicode(FILE* stream, string_view fmt, format_args args);
// void vprint_nonunicode(FILE* stream, string_view fmt, format_args args);

Expand Down Expand Up @@ -63,6 +65,20 @@ static void test_println() {
assert(std::string_view(buffer.data(), pos) == "hello world!\n");
}

static void test_println_blank_line() {
std::array<char, 100> buffer{0};

FILE* file = fmemopen(buffer.data(), buffer.size(), "wb");
assert(file);

std::println(file);
long pos = std::ftell(file);
std::fclose(file);

assert(pos > 0);
assert(std::string_view(buffer.data(), pos) == "\n");
}

static void test_vprint_unicode() {
std::array<char, 100> buffer{0};

Expand Down Expand Up @@ -96,6 +112,7 @@ static void test_vprint_nonunicode() {
int main(int, char**) {
test_print();
test_println();
test_println_blank_line();
test_vprint_unicode();
test_vprint_nonunicode();

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
//===----------------------------------------------------------------------===//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
// UNSUPPORTED: no-filesystem
// UNSUPPORTED: executor-has-no-bash
// UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME

// FIXME PRINT How to test println on Windows?
// XFAIL: msvc, target={{.+}}-windows-gnu

// XFAIL: availability-fp_to_chars-missing

// <print>

// void println();

// Testing this properly is quite hard; the function unconditionally
// writes to stdout. When stdout is redirected to a file it is no longer
// considered a terminal. The function is a small wrapper around
//
// template<class... Args>
// void println(FILE* stream, format_string<Args...> fmt, Args&&... args);
//
// So do minimal tests for this function and rely on the FILE* overload
// to do more testing.
//
// The testing is based on the testing for std::cout.

// TODO PRINT Use lit builtin echo

// FILE_DEPENDENCIES: echo.sh
// RUN: %{build}
// RUN: %{exec} bash echo.sh -ne "\n" > %t.expected
// RUN: %{exec} "%t.exe" > %t.actual
// RUN: diff -u %t.actual %t.expected

#include <print>

int main(int, char**) {
std::println();

return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,29 @@ static void test_new_line() {
}
}

static void test_println_blank_line() {
// Text does newline translation.
{
FILE* file = fopen(filename.c_str(), "w");
assert(file);

std::println(file);
#ifndef _WIN32
assert(std::ftell(file) == 1);
#else
assert(std::ftell(file) == 2);
#endif
}
// Binary no newline translation.
{
FILE* file = fopen(filename.c_str(), "wb");
assert(file);

std::println(file);
assert(std::ftell(file) == 1);
}
}

int main(int, char**) {
print_tests(test_file, test_exception);

Expand All @@ -137,6 +160,7 @@ int main(int, char**) {
#endif
test_read_only();
test_new_line();
test_println_blank_line();

return 0;
}
4 changes: 4 additions & 0 deletions libcxx/test/support/test_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,10 @@ inline Tp const& DoNotOptimize(Tp const& value) {
# define TEST_HAS_NO_UNICODE
#endif

#if defined(_LIBCPP_HAS_OPEN_WITH_WCHAR)
# define TEST_HAS_OPEN_WITH_WCHAR
#endif

#if defined(_LIBCPP_HAS_NO_INT128) || defined(_MSVC_STL_VERSION)
# define TEST_HAS_NO_INT128
#endif
Expand Down
1 change: 1 addition & 0 deletions lldb/cmake/caches/Apple-lldb-Linux.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
include(${CMAKE_CURRENT_LIST_DIR}/Apple-lldb-base.cmake)
set(LLVM_ENABLE_EXPORTED_SYMBOLS_IN_EXECUTABLES ON CACHE BOOL "")

set(LLVM_DISTRIBUTION_COMPONENTS
lldb
Expand Down
8 changes: 7 additions & 1 deletion llvm/cmake/modules/AddLLVM.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1038,9 +1038,15 @@ macro(add_llvm_executable name)
add_llvm_symbol_exports( ${name} ${LLVM_EXPORTED_SYMBOL_FILE} )
endif(LLVM_EXPORTED_SYMBOL_FILE)

if (NOT LLVM_ENABLE_EXPORTED_SYMBOLS_IN_EXECUTABLES AND LLVM_LINKER_SUPPORTS_NO_EXPORTED_SYMBOLS)
if (DEFINED LLVM_ENABLE_EXPORTED_SYMBOLS_IN_EXECUTABLES AND
NOT LLVM_ENABLE_EXPORTED_SYMBOLS_IN_EXECUTABLES)
if(LLVM_LINKER_SUPPORTS_NO_EXPORTED_SYMBOLS)
set_property(TARGET ${name} APPEND_STRING PROPERTY
LINK_FLAGS " -Wl,-no_exported_symbols")
else()
message(FATAL_ERROR
"LLVM_ENABLE_EXPORTED_SYMBOLS_IN_EXECUTABLES cannot be disabled when linker does not support \"-no_exported_symbols\"")
endif()
endif()

if (LLVM_LINK_LLVM_DYLIB AND NOT ARG_DISABLE_LLVM_LINK_LLVM_DYLIB)
Expand Down
13 changes: 7 additions & 6 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11112,11 +11112,12 @@ For most of these operations, the type of '<value>' must be an integer
type whose bit width is a power of two greater than or equal to eight
and less than or equal to a target-specific size limit. For xchg, this
may also be a floating point or a pointer type with the same size constraints
as integers. For fadd/fsub/fmax/fmin, this must be a floating point type. The
type of the '``<pointer>``' operand must be a pointer to that type. If
the ``atomicrmw`` is marked as ``volatile``, then the optimizer is not
allowed to modify the number or order of execution of this
``atomicrmw`` with other :ref:`volatile operations <volatile>`.
as integers. For fadd/fsub/fmax/fmin, this must be a floating-point
or fixed vector of floating-point type. The type of the '``<pointer>``'
operand must be a pointer to that type. If the ``atomicrmw`` is marked
as ``volatile``, then the optimizer is not allowed to modify the
number or order of execution of this ``atomicrmw`` with other
:ref:`volatile operations <volatile>`.

Note: if the alignment is not greater or equal to the size of the `<value>`
type, the atomic operation is likely to require a lock and have poor
Expand Down Expand Up @@ -11447,7 +11448,7 @@ and converts the remaining bits to ``ty2``. Since the source size must
be larger than the destination size, ``trunc`` cannot be a *no-op cast*.
It will always truncate bits.

If the ``nuw`` keyword is present, and any of the truncated bits are zero,
If the ``nuw`` keyword is present, and any of the truncated bits are non-zero,
the result is a :ref:`poison value <poisonvalues>`. If the ``nsw`` keyword
is present, and any of the truncated bits are not the same as the top bit
of the truncation result, the result is a :ref:`poison value <poisonvalues>`.
Expand Down
32 changes: 32 additions & 0 deletions llvm/include/llvm/ADT/ADL.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,22 @@ constexpr auto end_impl(RangeT &&range)
return end(std::forward<RangeT>(range));
}

using std::rbegin;

template <typename RangeT>
constexpr auto rbegin_impl(RangeT &&range)
-> decltype(rbegin(std::forward<RangeT>(range))) {
return rbegin(std::forward<RangeT>(range));
}

using std::rend;

template <typename RangeT>
constexpr auto rend_impl(RangeT &&range)
-> decltype(rend(std::forward<RangeT>(range))) {
return rend(std::forward<RangeT>(range));
}

using std::swap;

template <typename T>
Expand Down Expand Up @@ -72,6 +88,22 @@ constexpr auto adl_end(RangeT &&range)
return adl_detail::end_impl(std::forward<RangeT>(range));
}

/// Returns the reverse-begin iterator to \p range using `std::rbegin` and
/// function found through Argument-Dependent Lookup (ADL).
template <typename RangeT>
constexpr auto adl_rbegin(RangeT &&range)
-> decltype(adl_detail::rbegin_impl(std::forward<RangeT>(range))) {
return adl_detail::rbegin_impl(std::forward<RangeT>(range));
}

/// Returns the reverse-end iterator to \p range using `std::rend` and
/// functions found through Argument-Dependent Lookup (ADL).
template <typename RangeT>
constexpr auto adl_rend(RangeT &&range)
-> decltype(adl_detail::rend_impl(std::forward<RangeT>(range))) {
return adl_detail::rend_impl(std::forward<RangeT>(range));
}

/// Swaps \p lhs with \p rhs using `std::swap` and functions found through
/// Argument-Dependent Lookup (ADL).
template <typename T>
Expand Down
33 changes: 12 additions & 21 deletions llvm/include/llvm/ADT/STLExtras.h
Original file line number Diff line number Diff line change
Expand Up @@ -405,32 +405,23 @@ class mapped_iterator_base
}
};

/// Helper to determine if type T has a member called rbegin().
template <typename Ty> class has_rbegin_impl {
using yes = char[1];
using no = char[2];

template <typename Inner>
static yes& test(Inner *I, decltype(I->rbegin()) * = nullptr);

template <typename>
static no& test(...);

public:
static const bool value = sizeof(test<Ty>(nullptr)) == sizeof(yes);
};
namespace detail {
template <typename Range>
using check_has_free_function_rbegin =
decltype(adl_rbegin(std::declval<Range &>()));

/// Metafunction to determine if T& or T has a member called rbegin().
template <typename Ty>
struct has_rbegin : has_rbegin_impl<std::remove_reference_t<Ty>> {};
template <typename Range>
static constexpr bool HasFreeFunctionRBegin =
is_detected<check_has_free_function_rbegin, Range>::value;
} // namespace detail

// Returns an iterator_range over the given container which iterates in reverse.
template <typename ContainerTy> auto reverse(ContainerTy &&C) {
if constexpr (has_rbegin<ContainerTy>::value)
return make_range(C.rbegin(), C.rend());
if constexpr (detail::HasFreeFunctionRBegin<ContainerTy>)
return make_range(adl_rbegin(C), adl_rend(C));
else
return make_range(std::make_reverse_iterator(std::end(C)),
std::make_reverse_iterator(std::begin(C)));
return make_range(std::make_reverse_iterator(adl_end(C)),
std::make_reverse_iterator(adl_begin(C)));
}

/// An iterator adaptor that filters the elements of given inner iterators.
Expand Down
7 changes: 7 additions & 0 deletions llvm/include/llvm/ExecutionEngine/Orc/Core.h
Original file line number Diff line number Diff line change
Expand Up @@ -1237,6 +1237,8 @@ class JITDylib : public ThreadSafeRefCountedBase<JITDylib>,
// * Pending queries holds any not-yet-completed queries that include this
// symbol.
struct MaterializingInfo {
friend class ExecutionSession;

std::shared_ptr<EmissionDepUnit> DefiningEDU;
DenseSet<EmissionDepUnit *> DependantEDUs;

Expand Down Expand Up @@ -1746,6 +1748,11 @@ class ExecutionSession {
/// Dump the state of all the JITDylibs in this session.
void dump(raw_ostream &OS);

/// Check the internal consistency of ExecutionSession data structures.
#ifdef EXPENSIVE_CHECKS
bool verifySessionState(Twine Phase);
#endif

private:
static void logErrorsToStdErr(Error Err) {
logAllUnhandledErrors(std::move(Err), errs(), "JIT session error: ");
Expand Down
19 changes: 10 additions & 9 deletions llvm/lib/Analysis/ValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5355,14 +5355,17 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
const Value *Vec = Op->getOperand(0);
const Value *Elt = Op->getOperand(1);
auto *CIdx = dyn_cast<ConstantInt>(Op->getOperand(2));
// Early out if the index is non-constant or out-of-range.
unsigned NumElts = DemandedElts.getBitWidth();
if (!CIdx || CIdx->getValue().uge(NumElts))
return;
APInt DemandedVecElts = DemandedElts;
bool NeedsElt = true;
// If we know the index we are inserting to, clear it from Vec check.
if (CIdx && CIdx->getValue().ult(NumElts)) {
DemandedVecElts.clearBit(CIdx->getZExtValue());
NeedsElt = DemandedElts[CIdx->getZExtValue()];
}

unsigned EltIdx = CIdx->getZExtValue();
// Do we demand the inserted element?
if (DemandedElts[EltIdx]) {
if (NeedsElt) {
computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1, Q);
// If we don't know any bits, early out.
if (Known.isUnknown())
Expand All @@ -5371,10 +5374,8 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
Known.KnownFPClasses = fcNone;
}

// We don't need the base vector element that has been inserted.
APInt DemandedVecElts = DemandedElts;
DemandedVecElts.clearBit(EltIdx);
if (!!DemandedVecElts) {
// Do we need anymore elements from Vec?
if (!DemandedVecElts.isZero()) {
KnownFPClass Known2;
computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2,
Depth + 1, Q);
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/AsmParser/LLParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8240,6 +8240,8 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
return tokError("atomicrmw cannot be unordered");
if (!Ptr->getType()->isPointerTy())
return error(PtrLoc, "atomicrmw operand must be a pointer");
if (Val->getType()->isScalableTy())
return error(ValLoc, "atomicrmw operand may not be scalable");

if (Operation == AtomicRMWInst::Xchg) {
if (!Val->getType()->isIntegerTy() &&
Expand All @@ -8251,7 +8253,7 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
" operand must be an integer, floating point, or pointer type");
}
} else if (IsFP) {
if (!Val->getType()->isFloatingPointTy()) {
if (!Val->getType()->isFPOrFPVectorTy()) {
return error(ValLoc, "atomicrmw " +
AtomicRMWInst::getOperationName(Operation) +
" operand must be a floating point type");
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/CodeGen/AtomicExpandPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -562,9 +562,9 @@ static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
Value *&Success, Value *&NewLoaded) {
Type *OrigTy = NewVal->getType();

// This code can go away when cmpxchg supports FP types.
// This code can go away when cmpxchg supports FP and vector types.
assert(!OrigTy->isPointerTy());
bool NeedBitcast = OrigTy->isFloatingPointTy();
bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
if (NeedBitcast) {
IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
NewVal = Builder.CreateBitCast(NewVal, IntTy);
Expand Down Expand Up @@ -731,7 +731,7 @@ static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
unsigned ValueSize = DL.getTypeStoreSize(ValueType);

PMV.ValueType = PMV.IntValueType = ValueType;
if (PMV.ValueType->isFloatingPointTy())
if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
PMV.IntValueType =
Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());

Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2925,8 +2925,10 @@ bool CombinerHelper::matchCombineInsertVecElts(
}
return true;
}
// If we didn't end in a G_IMPLICIT_DEF, bail out.
return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
// If we didn't end in a G_IMPLICIT_DEF and the source is not fully
// overwritten, bail out.
return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
all_of(MatchInfo, [](Register Reg) { return !!Reg; });
}

void CombinerHelper::applyCombineInsertVecElts(
Expand Down
10 changes: 9 additions & 1 deletion llvm/lib/CodeGen/MachineBasicBlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1137,7 +1137,6 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(

MachineFunction *MF = getParent();
MachineBasicBlock *PrevFallthrough = getNextNode();
DebugLoc DL; // FIXME: this is nowhere

MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
NMBB->setCallFrameSize(Succ->getCallFrameSize());
Expand Down Expand Up @@ -1218,6 +1217,15 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
SlotIndexUpdateDelegate SlotUpdater(*MF, Indexes);
SmallVector<MachineOperand, 4> Cond;
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();

// In original 'this' BB, there must be a branch instruction targeting at
// Succ. We can not find it out since currently getBranchDestBlock was not
// implemented for all targets. However, if the merged DL has column or line
// number, the scope and non-zero column and line number is same with that
// branch instruction so we can safely use it.
DebugLoc DL, MergedDL = findBranchDebugLoc();
if (MergedDL && (MergedDL.getLine() || MergedDL.getCol()))
DL = MergedDL;
TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL);
}

Expand Down
213 changes: 211 additions & 2 deletions llvm/lib/ExecutionEngine/Orc/Core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1392,7 +1392,6 @@ void JITDylib::transferTracker(ResourceTracker &DstRT, ResourceTracker &SrcRT) {
}

Error JITDylib::defineImpl(MaterializationUnit &MU) {

LLVM_DEBUG({ dbgs() << " " << MU.getSymbols() << "\n"; });

SymbolNameSet Duplicates;
Expand Down Expand Up @@ -1605,6 +1604,11 @@ Error ExecutionSession::endSession() {
LLVM_DEBUG(dbgs() << "Ending ExecutionSession " << this << "\n");

auto JDsToRemove = runSessionLocked([&] {

#ifdef EXPENSIVE_CHECKS
verifySessionState("Entering ExecutionSession::endSession");
#endif

SessionOpen = false;
return JDs;
});
Expand Down Expand Up @@ -1662,7 +1666,6 @@ Expected<JITDylib &> ExecutionSession::createJITDylib(std::string Name) {
}

Error ExecutionSession::removeJITDylibs(std::vector<JITDylibSP> JDsToRemove) {

// Set JD to 'Closing' state and remove JD from the ExecutionSession.
runSessionLocked([&] {
for (auto &JD : JDsToRemove) {
Expand Down Expand Up @@ -1951,6 +1954,196 @@ void ExecutionSession::dump(raw_ostream &OS) {
});
}

#ifdef EXPENSIVE_CHECKS
bool ExecutionSession::verifySessionState(Twine Phase) {
return runSessionLocked([&]() {
bool AllOk = true;

// We'll collect these and verify them later to avoid redundant checks.
DenseSet<JITDylib::EmissionDepUnit *> EDUsToCheck;

for (auto &JD : JDs) {

auto LogFailure = [&]() -> raw_fd_ostream & {
auto &Stream = errs();
if (AllOk)
Stream << "ERROR: Bad ExecutionSession state detected " << Phase
<< "\n";
Stream << " In JITDylib " << JD->getName() << ", ";
AllOk = false;
return Stream;
};

if (JD->State != JITDylib::Open) {
LogFailure()
<< "state is not Open, but JD is in ExecutionSession list.";
}

// Check symbol table.
// 1. If the entry state isn't resolved then check that no address has
// been set.
// 2. Check that if the hasMaterializerAttached flag is set then there is
// an UnmaterializedInfo entry, and vice-versa.
for (auto &[Sym, Entry] : JD->Symbols) {
// Check that unresolved symbols have null addresses.
if (Entry.getState() < SymbolState::Resolved) {
if (Entry.getAddress()) {
LogFailure() << "symbol " << Sym << " has state "
<< Entry.getState()
<< " (not-yet-resolved) but non-null address "
<< Entry.getAddress() << ".\n";
}
}

// Check that the hasMaterializerAttached flag is correct.
auto UMIItr = JD->UnmaterializedInfos.find(Sym);
if (Entry.hasMaterializerAttached()) {
if (UMIItr == JD->UnmaterializedInfos.end()) {
LogFailure() << "symbol " << Sym
<< " entry claims materializer attached, but "
"UnmaterializedInfos has no corresponding entry.\n";
}
} else if (UMIItr != JD->UnmaterializedInfos.end()) {
LogFailure()
<< "symbol " << Sym
<< " entry claims no materializer attached, but "
"UnmaterializedInfos has an unexpected entry for it.\n";
}
}

// Check that every UnmaterializedInfo entry has a corresponding entry
// in the Symbols table.
for (auto &[Sym, UMI] : JD->UnmaterializedInfos) {
auto SymItr = JD->Symbols.find(Sym);
if (SymItr == JD->Symbols.end()) {
LogFailure()
<< "symbol " << Sym
<< " has UnmaterializedInfos entry, but no Symbols entry.\n";
}
}

// Check consistency of the MaterializingInfos table.
for (auto &[Sym, MII] : JD->MaterializingInfos) {

auto SymItr = JD->Symbols.find(Sym);
if (SymItr == JD->Symbols.end()) {
// If there's no Symbols entry for this MaterializingInfos entry then
// report that.
LogFailure()
<< "symbol " << Sym
<< " has MaterializingInfos entry, but no Symbols entry.\n";
} else {
// Otherwise check consistency between Symbols and MaterializingInfos.

// Ready symbols should not have MaterializingInfos.
if (SymItr->second.getState() == SymbolState::Ready) {
LogFailure()
<< "symbol " << Sym
<< " is in Ready state, should not have MaterializingInfo.\n";
}

// Pending queries should be for subsequent states.
auto CurState = static_cast<SymbolState>(
static_cast<std::underlying_type_t<SymbolState>>(
SymItr->second.getState()) + 1);
for (auto &Q : MII.PendingQueries) {
if (Q->getRequiredState() != CurState) {
if (Q->getRequiredState() > CurState)
CurState = Q->getRequiredState();
else
LogFailure() << "symbol " << Sym
<< " has stale or misordered queries.\n";
}
}

// If there's a DefiningEDU then check that...
// 1. The JD matches.
// 2. The symbol is in the EDU's Symbols map.
// 3. The symbol table entry is in the Emitted state.
if (MII.DefiningEDU) {

EDUsToCheck.insert(MII.DefiningEDU.get());

if (MII.DefiningEDU->JD != JD.get()) {
LogFailure() << "symbol " << Sym
<< " has DefiningEDU with incorrect JD"
<< (llvm::is_contained(JDs, MII.DefiningEDU->JD)
? " (JD not currently in ExecutionSession"
: "")
<< "\n";
}

if (SymItr->second.getState() != SymbolState::Emitted) {
LogFailure()
<< "symbol " << Sym
<< " has DefiningEDU, but is not in Emitted state.\n";
}
}

// Check that JDs for any DependantEDUs are also in the session --
// that guarantees that we'll also visit them during this loop.
for (auto &DepEDU : MII.DependantEDUs) {
if (!llvm::is_contained(JDs, DepEDU->JD)) {
LogFailure() << "symbol " << Sym << " has DependantEDU "
<< (void *)DepEDU << " with JD (" << DepEDU->JD
<< ") that isn't in ExecutionSession.\n";
}
}
}
}
}

// Check EDUs.
for (auto *EDU : EDUsToCheck) {
assert(EDU->JD->State == JITDylib::Open && "EDU->JD is not Open");

auto LogFailure = [&]() -> raw_fd_ostream & {
AllOk = false;
auto &Stream = errs();
Stream << "In EDU defining " << EDU->JD->getName() << ": { ";
for (auto &[Sym, Flags] : EDU->Symbols)
Stream << Sym << " ";
Stream << "}, ";
return Stream;
};

if (EDU->Symbols.empty())
LogFailure() << "no symbols defined.\n";
else {
for (auto &[Sym, Flags] : EDU->Symbols) {
if (!Sym)
LogFailure() << "null symbol defined.\n";
else {
if (!EDU->JD->Symbols.count(SymbolStringPtr(Sym))) {
LogFailure() << "symbol " << Sym
<< " isn't present in JD's symbol table.\n";
}
}
}
}

for (auto &[DepJD, Symbols] : EDU->Dependencies) {
if (!llvm::is_contained(JDs, DepJD)) {
LogFailure() << "dependant symbols listed for JD that isn't in "
"ExecutionSession.\n";
} else {
for (auto &DepSym : Symbols) {
if (!DepJD->Symbols.count(SymbolStringPtr(DepSym))) {
LogFailure()
<< "dependant symbol " << DepSym
<< " does not appear in symbol table for dependant JD "
<< DepJD->getName() << ".\n";
}
}
}
}
}

return AllOk;
});
}
#endif // EXPENSIVE_CHECKS

void ExecutionSession::dispatchOutstandingMUs() {
LLVM_DEBUG(dbgs() << "Dispatching MaterializationUnits...\n");
while (true) {
Expand Down Expand Up @@ -3060,6 +3253,9 @@ ExecutionSession::IL_emit(MaterializationResponsibility &MR,
return make_error<StringError>("JITDylib " + TargetJD.getName() +
" is defunct",
inconvertibleErrorCode());
#ifdef EXPENSIVE_CHECKS
verifySessionState("entering ExecutionSession::IL_emit");
#endif

// Walk all EDUs:
// 1. Verifying that dependencies are available (not removed or in the error
Expand Down Expand Up @@ -3217,6 +3413,10 @@ ExecutionSession::IL_emit(MaterializationResponsibility &MR,
IL_makeEDUEmitted(std::move(EDUInfo.EDU), CompletedQueries);
}

#ifdef EXPENSIVE_CHECKS
verifySessionState("exiting ExecutionSession::IL_emit");
#endif

return std::move(CompletedQueries);
}

Expand Down Expand Up @@ -3305,6 +3505,11 @@ std::pair<JITDylib::AsynchronousSymbolQuerySet,
std::shared_ptr<SymbolDependenceMap>>
ExecutionSession::IL_failSymbols(JITDylib &JD,
const SymbolNameVector &SymbolsToFail) {

#ifdef EXPENSIVE_CHECKS
verifySessionState("entering ExecutionSession::IL_failSymbols");
#endif

JITDylib::AsynchronousSymbolQuerySet FailedQueries;
auto FailedSymbolsMap = std::make_shared<SymbolDependenceMap>();
auto ExtractFailedQueries = [&](JITDylib::MaterializingInfo &MI) {
Expand Down Expand Up @@ -3440,6 +3645,10 @@ ExecutionSession::IL_failSymbols(JITDylib &JD,
JD.MaterializingInfos.erase(Name);
}

#ifdef EXPENSIVE_CHECKS
verifySessionState("exiting ExecutionSession::IL_failSymbols");
#endif

return std::make_pair(std::move(FailedQueries), std::move(FailedSymbolsMap));
}

Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/IR/Verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4268,9 +4268,10 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
" operand must have integer or floating point type!",
&RMWI, ElTy);
} else if (AtomicRMWInst::isFPOperation(Op)) {
Check(ElTy->isFloatingPointTy(),
Check(ElTy->isFPOrFPVectorTy() && !isa<ScalableVectorType>(ElTy),
"atomicrmw " + AtomicRMWInst::getOperationName(Op) +
" operand must have floating point type!",
" operand must have floating-point or fixed vector of floating-point "
"type!",
&RMWI, ElTy);
} else {
Check(ElTy->isIntegerTy(),
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/ProfileData/InstrProfWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ static Error writeMemProfV1(
llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
&MemProfRecordData,
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {
OS.write(memprof::Version0);
OS.write(memprof::Version1);
uint64_t HeaderUpdatePos = OS.tell();
OS.write(0ULL); // Reserve space for the memprof record table offset.
OS.write(0ULL); // Reserve space for the memprof frame payload offset.
Expand Down
32 changes: 12 additions & 20 deletions llvm/lib/ProfileData/MemProf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@

namespace llvm {
namespace memprof {
namespace {
size_t serializedSizeV0(const IndexedAllocationInfo &IAI) {
static size_t serializedSizeV0(const IndexedAllocationInfo &IAI) {
size_t Size = 0;
// The number of frames to serialize.
Size += sizeof(uint64_t);
Expand All @@ -22,15 +21,14 @@ size_t serializedSizeV0(const IndexedAllocationInfo &IAI) {
return Size;
}

size_t serializedSizeV2(const IndexedAllocationInfo &IAI) {
static size_t serializedSizeV2(const IndexedAllocationInfo &IAI) {
size_t Size = 0;
// The CallStackId
Size += sizeof(CallStackId);
// The size of the payload.
Size += PortableMemInfoBlock::serializedSize();
return Size;
}
} // namespace

size_t IndexedAllocationInfo::serializedSize(IndexedVersion Version) const {
switch (Version) {
Expand All @@ -43,8 +41,7 @@ size_t IndexedAllocationInfo::serializedSize(IndexedVersion Version) const {
llvm_unreachable("unsupported MemProf version");
}

namespace {
size_t serializedSizeV0(const IndexedMemProfRecord &Record) {
static size_t serializedSizeV0(const IndexedMemProfRecord &Record) {
size_t Result = sizeof(GlobalValue::GUID);
for (const IndexedAllocationInfo &N : Record.AllocSites)
Result += N.serializedSize(Version0);
Expand All @@ -59,7 +56,7 @@ size_t serializedSizeV0(const IndexedMemProfRecord &Record) {
return Result;
}

size_t serializedSizeV2(const IndexedMemProfRecord &Record) {
static size_t serializedSizeV2(const IndexedMemProfRecord &Record) {
size_t Result = sizeof(GlobalValue::GUID);
for (const IndexedAllocationInfo &N : Record.AllocSites)
Result += N.serializedSize(Version2);
Expand All @@ -70,7 +67,6 @@ size_t serializedSizeV2(const IndexedMemProfRecord &Record) {
Result += Record.CallSiteIds.size() * sizeof(CallStackId);
return Result;
}
} // namespace

size_t IndexedMemProfRecord::serializedSize(IndexedVersion Version) const {
switch (Version) {
Expand All @@ -83,9 +79,8 @@ size_t IndexedMemProfRecord::serializedSize(IndexedVersion Version) const {
llvm_unreachable("unsupported MemProf version");
}

namespace {
void serializeV0(const IndexedMemProfRecord &Record,
const MemProfSchema &Schema, raw_ostream &OS) {
static void serializeV0(const IndexedMemProfRecord &Record,
const MemProfSchema &Schema, raw_ostream &OS) {
using namespace support;

endian::Writer LE(OS, llvm::endianness::little);
Expand All @@ -107,8 +102,8 @@ void serializeV0(const IndexedMemProfRecord &Record,
}
}

void serializeV2(const IndexedMemProfRecord &Record,
const MemProfSchema &Schema, raw_ostream &OS) {
static void serializeV2(const IndexedMemProfRecord &Record,
const MemProfSchema &Schema, raw_ostream &OS) {
using namespace support;

endian::Writer LE(OS, llvm::endianness::little);
Expand All @@ -124,7 +119,6 @@ void serializeV2(const IndexedMemProfRecord &Record,
for (const auto &CSId : Record.CallSiteIds)
LE.write<CallStackId>(CSId);
}
} // namespace

void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
raw_ostream &OS, IndexedVersion Version) {
Expand All @@ -140,9 +134,8 @@ void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
llvm_unreachable("unsupported MemProf version");
}

namespace {
IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema,
const unsigned char *Ptr) {
static IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema,
const unsigned char *Ptr) {
using namespace support;

IndexedMemProfRecord Record;
Expand Down Expand Up @@ -185,8 +178,8 @@ IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema,
return Record;
}

IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,
const unsigned char *Ptr) {
static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,
const unsigned char *Ptr) {
using namespace support;

IndexedMemProfRecord Record;
Expand Down Expand Up @@ -214,7 +207,6 @@ IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,

return Record;
}
} // namespace

IndexedMemProfRecord
IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
Expand Down
30 changes: 30 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24555,6 +24555,18 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
return R;
return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
case AArch64ISD::BICi: {
APInt DemandedBits =
APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits());
APInt DemandedElts =
APInt::getAllOnes(N->getValueType(0).getVectorNumElements());

if (DAG.getTargetLoweringInfo().SimplifyDemandedBits(
SDValue(N, 0), DemandedBits, DemandedElts, DCI))
return SDValue();

break;
}
case ISD::XOR:
return performXorCombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
Expand Down Expand Up @@ -27595,6 +27607,24 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
// used - simplify to just Val.
return TLO.CombineTo(Op, ShiftR->getOperand(0));
}
case AArch64ISD::BICi: {
// Fold BICi if all destination bits already known to be zeroed
SDValue Op0 = Op.getOperand(0);
KnownBits KnownOp0 =
TLO.DAG.computeKnownBits(Op0, OriginalDemandedElts, Depth + 1);
// Op0 &= ~(ConstantOperandVal(1) << ConstantOperandVal(2))
uint64_t BitsToClear = Op->getConstantOperandVal(1)
<< Op->getConstantOperandVal(2);
APInt AlreadyZeroedBitsToClear = BitsToClear & KnownOp0.Zero;
if (APInt(Known.getBitWidth(), BitsToClear)
.isSubsetOf(AlreadyZeroedBitsToClear))
return TLO.CombineTo(Op, Op0);

Known = KnownOp0 &
KnownBits::makeConstant(APInt(Known.getBitWidth(), ~BitsToClear));

return false;
}
case ISD::INTRINSIC_WO_CHAIN: {
if (auto ElementSize = IsSVECntIntrinsic(Op)) {
unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def OPC_BRANCH : RISCVOpcode<"BRANCH", 0b1100011>;
def OPC_JALR : RISCVOpcode<"JALR", 0b1100111>;
def OPC_JAL : RISCVOpcode<"JAL", 0b1101111>;
def OPC_SYSTEM : RISCVOpcode<"SYSTEM", 0b1110011>;
def OPC_OP_P : RISCVOpcode<"OP_P", 0b1110111>;
def OPC_OP_VE : RISCVOpcode<"OP_VE", 0b1110111>;
def OPC_CUSTOM_3 : RISCVOpcode<"CUSTOM_3", 0b1111011>;

class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr,
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,11 @@ class GetVTypePredicates<VTypeInfo vti> {
true : [HasVInstructions]);
}

class GetVTypeScalarPredicates<VTypeInfo vti> {
list<Predicate> Predicates = !cond(!eq(vti.Scalar, bf16) : [HasStdExtZfbfmin],
true : []);
}

class VPseudoUSLoadNoMask<VReg RetClass,
int EEW> :
Pseudo<(outs RetClass:$rd),
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -1454,8 +1454,9 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
// Vector Splats
//===----------------------------------------------------------------------===//

foreach fvti = AllFloatVectors in {
let Predicates = GetVTypePredicates<fvti>.Predicates in
foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in {
let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
GetVTypeScalarPredicates<fvti>.Predicates) in
def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl undef, fvti.ScalarRegClass:$rs1, srcvalue)),
(!cast<Instruction>("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
(fvti.Vector (IMPLICIT_DEF)),
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -2599,7 +2599,12 @@ foreach fvti = AllFloatVectors in {
fvti.RegClass:$merge, fvti.RegClass:$rs2,
(fvti.Scalar fvti.ScalarRegClass:$rs1),
(fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
}
}

foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in {
let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
GetVTypeScalarPredicates<fvti>.Predicates) in {
// 13.16. Vector Floating-Point Move Instruction
// If we're splatting fpimm0, use vmv.v.x vd, x0.
def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
Expand Down
10 changes: 5 additions & 5 deletions llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ multiclass VROR_IV_V_X_I<string opcodestr, bits<6> funct6>
// op vd, vs2, vs1
class PALUVVNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr>
: VALUVVNoVm<funct6, opv, opcodestr> {
let Inst{6-0} = OPC_OP_P.Value;
let Inst{6-0} = OPC_OP_VE.Value;
}

// op vd, vs2, vs1
Expand All @@ -74,13 +74,13 @@ class PALUVVNoVmTernary<bits<6> funct6, RISCVVFormat opv, string opcodestr>
opcodestr, "$vd, $vs2, $vs1"> {
let Constraints = "$vd = $vd_wb";
let vm = 1;
let Inst{6-0} = OPC_OP_P.Value;
let Inst{6-0} = OPC_OP_VE.Value;
}

// op vd, vs2, imm
class PALUVINoVm<bits<6> funct6, string opcodestr, Operand optype>
: VALUVINoVm<funct6, opcodestr, optype> {
let Inst{6-0} = OPC_OP_P.Value;
let Inst{6-0} = OPC_OP_VE.Value;
let Inst{14-12} = OPMVV.Value;
}

Expand All @@ -91,7 +91,7 @@ class PALUVINoVmBinary<bits<6> funct6, string opcodestr, Operand optype>
opcodestr, "$vd, $vs2, $imm"> {
let Constraints = "$vd = $vd_wb";
let vm = 1;
let Inst{6-0} = OPC_OP_P.Value;
let Inst{6-0} = OPC_OP_VE.Value;
let Inst{14-12} = OPMVV.Value;
}

Expand All @@ -103,7 +103,7 @@ class PALUVs2NoVmBinary<bits<6> funct6, bits<5> vs1, RISCVVFormat opv,
opcodestr, "$vd, $vs2"> {
let Constraints = "$vd = $vd_wb";
let vm = 1;
let Inst{6-0} = OPC_OP_P.Value;
let Inst{6-0} = OPC_OP_VE.Value;
}

multiclass VAES_MV_V_S<bits<6> funct6_vv, bits<6> funct6_vs, bits<5> vs1,
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,13 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
uint64_t Val = ValC->getZExtValue() & 255;

// If the value is a constant, then we can potentially use larger sets.
if (Alignment > Align(2)) {
if (Alignment >= Align(4)) {
// DWORD aligned
AVT = MVT::i32;
ValReg = X86::EAX;
Val = (Val << 8) | Val;
Val = (Val << 16) | Val;
if (Subtarget.is64Bit() && Alignment > Align(8)) { // QWORD aligned
if (Subtarget.is64Bit() && Alignment >= Align(8)) { // QWORD aligned
AVT = MVT::i64;
ValReg = X86::RAX;
Val = (Val << 32) | Val;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,7 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
// If the successor only has a single pred, split the top of the successor
// block.
assert(SP == BB && "CFG broken");
SP = nullptr;
(void)SP;
return SplitBlock(Succ, &Succ->front(), DT, LI, MSSAU, BBName,
/*Before=*/true);
}
Expand Down
65 changes: 3 additions & 62 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -573,10 +573,6 @@ class InnerLoopVectorizer {
/// Fix the non-induction PHIs in \p Plan.
void fixNonInductionPHIs(VPlan &Plan, VPTransformState &State);

/// Returns true if the reordering of FP operations is not allowed, but we are
/// able to vectorize with strict in-order reductions for the given RdxDesc.
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc);

/// Create a new phi node for the induction variable \p OrigPhi to resume
/// iteration count in the scalar epilogue, from where the vectorized loop
/// left off. \p Step is the SCEV-expanded induction step to use. In cases
Expand Down Expand Up @@ -3714,11 +3710,6 @@ void InnerLoopVectorizer::fixNonInductionPHIs(VPlan &Plan,
}
}

bool InnerLoopVectorizer::useOrderedReductions(
const RecurrenceDescriptor &RdxDesc) {
return Cost->useOrderedReductions(RdxDesc);
}

void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
// We should not collect Scalars more than once per VF. Right now, this
// function is called from collectUniformsAndScalars(), which already does
Expand Down Expand Up @@ -9056,8 +9047,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (CM.blockNeedsPredicationForAnyReason(BB))
CondOp = RecipeBuilder.getBlockInMask(BB);

VPReductionRecipe *RedRecipe = new VPReductionRecipe(
RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp);
VPReductionRecipe *RedRecipe =
new VPReductionRecipe(RdxDesc, CurrentLinkI, PreviousLink, VecOp,
CondOp, CM.useOrderedReductions(RdxDesc));
// Append the recipe to the end of the VPBasicBlock because we need to
// ensure that it comes after all of it's inputs, including CondOp.
// Note that this transformation may leave over dead recipes (including
Expand Down Expand Up @@ -9307,57 +9299,6 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
NeedsMaskForGaps);
}

void VPReductionRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Reduction being replicated.");
Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true);
RecurKind Kind = RdxDesc.getRecurrenceKind();
bool IsOrdered = State.ILV->useOrderedReductions(RdxDesc);
// Propagate the fast-math flags carried by the underlying instruction.
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *NewVecOp = State.get(getVecOp(), Part);
if (VPValue *Cond = getCondOp()) {
Value *NewCond = State.get(Cond, Part, State.VF.isScalar());
VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,
RdxDesc.getFastMathFlags());
if (State.VF.isVector()) {
Iden =
State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
}

Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden);
NewVecOp = Select;
}
Value *NewRed;
Value *NextInChain;
if (IsOrdered) {
if (State.VF.isVector())
NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp,
PrevInChain);
else
NewRed = State.Builder.CreateBinOp(
(Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain,
NewVecOp);
PrevInChain = NewRed;
} else {
PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true);
NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp);
}
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
NewRed, PrevInChain);
} else if (IsOrdered)
NextInChain = NewRed;
else
NextInChain = State.Builder.CreateBinOp(
(Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);
State.set(this, NextInChain, Part, /*IsScalar*/ true);
}
}

void VPReplicateRecipe::execute(VPTransformState &State) {
Instruction *UI = getUnderlyingInstr();
if (State.Instance) { // Generate a single instance.
Expand Down
110 changes: 19 additions & 91 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7056,16 +7056,19 @@ bool BoUpSLP::areAllUsersVectorized(

static std::pair<InstructionCost, InstructionCost>
getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
ArrayRef<Type *> ArgTys) {
TargetTransformInfo *TTI, TargetLibraryInfo *TLI) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);

// Calculate the cost of the scalar and vector calls.
SmallVector<Type *, 4> VecTys;
for (Use &Arg : CI->args())
VecTys.push_back(
FixedVectorType::get(Arg->getType(), VecTy->getNumElements()));
FastMathFlags FMF;
if (auto *FPCI = dyn_cast<FPMathOperator>(CI))
FMF = FPCI->getFastMathFlags();
SmallVector<const Value *> Arguments(CI->args());
IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, ArgTys, FMF,
IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, VecTys, FMF,
dyn_cast<IntrinsicInst>(CI));
auto IntrinsicCost =
TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput);
Expand All @@ -7078,8 +7081,8 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
if (!CI->isNoBuiltin() && VecFunc) {
// Calculate the cost of the vector library call.
// If the corresponding vector call is cheaper, return its cost.
LibCost =
TTI->getCallInstrCost(nullptr, VecTy, ArgTys, TTI::TCK_RecipThroughput);
LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys,
TTI::TCK_RecipThroughput);
}
return {IntrinsicCost, LibCost};
}
Expand Down Expand Up @@ -8505,30 +8508,6 @@ TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const {
return TTI::CastContextHint::None;
}

/// Builds the arguments types vector for the given call instruction with the
/// given \p ID for the specified vector factor.
static SmallVector<Type *> buildIntrinsicArgTypes(const CallInst *CI,
const Intrinsic::ID ID,
const unsigned VF,
unsigned MinBW) {
SmallVector<Type *> ArgTys;
for (auto [Idx, Arg] : enumerate(CI->args())) {
if (ID != Intrinsic::not_intrinsic) {
if (isVectorIntrinsicWithScalarOpAtArg(ID, Idx)) {
ArgTys.push_back(Arg->getType());
continue;
}
if (MinBW > 0) {
ArgTys.push_back(FixedVectorType::get(
IntegerType::get(CI->getContext(), MinBW), VF));
continue;
}
}
ArgTys.push_back(FixedVectorType::get(Arg->getType(), VF));
}
return ArgTys;
}

InstructionCost
BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
SmallPtrSetImpl<Value *> &CheckedExtracts) {
Expand Down Expand Up @@ -9095,11 +9074,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
};
auto GetVectorCost = [=](InstructionCost CommonCost) {
auto *CI = cast<CallInst>(VL0);
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
SmallVector<Type *> ArgTys =
buildIntrinsicArgTypes(CI, ID, VecTy->getNumElements(),
It != MinBWs.end() ? It->second.first : 0);
auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI, ArgTys);
auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
return std::min(VecCallCosts.first, VecCallCosts.second) + CommonCost;
};
return GetCostDiff(GetScalarCost, GetVectorCost);
Expand Down Expand Up @@ -9849,11 +9824,13 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
if (BWIt != MinBWs.end()) {
Type *DstTy = Root.Scalars.front()->getType();
unsigned OriginalSz = DL->getTypeSizeInBits(DstTy);
if (OriginalSz != BWIt->second.first) {
unsigned SrcSz =
ReductionBitWidth == 0 ? BWIt->second.first : ReductionBitWidth;
if (OriginalSz != SrcSz) {
unsigned Opcode = Instruction::Trunc;
if (OriginalSz < BWIt->second.first)
if (OriginalSz > SrcSz)
Opcode = BWIt->second.second ? Instruction::SExt : Instruction::ZExt;
Type *SrcTy = IntegerType::get(DstTy->getContext(), BWIt->second.first);
Type *SrcTy = IntegerType::get(DstTy->getContext(), SrcSz);
Cost += TTI->getCastInstrCost(Opcode, DstTy, SrcTy,
TTI::CastContextHint::None,
TTI::TCK_RecipThroughput);
Expand Down Expand Up @@ -12571,10 +12548,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {

Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);

SmallVector<Type *> ArgTys =
buildIntrinsicArgTypes(CI, ID, VecTy->getNumElements(),
It != MinBWs.end() ? It->second.first : 0);
auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI, ArgTys);
auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
bool UseIntrinsic = ID != Intrinsic::not_intrinsic &&
VecCallCosts.first <= VecCallCosts.second;

Expand All @@ -12583,20 +12557,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
SmallVector<Type *, 2> TysForDecl;
// Add return type if intrinsic is overloaded on it.
if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
TysForDecl.push_back(VecTy);
TysForDecl.push_back(
FixedVectorType::get(CI->getType(), E->Scalars.size()));
auto *CEI = cast<CallInst>(VL0);
for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
ValueList OpVL;
// Some intrinsics have scalar arguments. This argument should not be
// vectorized.
if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) {
ScalarArg = CEI->getArgOperand(I);
// if decided to reduce bitwidth of abs intrinsic, it second argument
// must be set false (do not return poison, if value issigned min).
if (ID == Intrinsic::abs && It != MinBWs.end() &&
It->second.first < DL->getTypeSizeInBits(CEI->getType()))
ScalarArg = Builder.getFalse();
OpVecs.push_back(ScalarArg);
OpVecs.push_back(CEI->getArgOperand(I));
if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
TysForDecl.push_back(ScalarArg->getType());
continue;
Expand All @@ -12609,13 +12579,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
}
ScalarArg = CEI->getArgOperand(I);
if (cast<VectorType>(OpVec->getType())->getElementType() !=
ScalarArg->getType() &&
It == MinBWs.end()) {
ScalarArg->getType()) {
auto *CastTy = FixedVectorType::get(ScalarArg->getType(),
VecTy->getNumElements());
OpVec = Builder.CreateIntCast(OpVec, CastTy, GetOperandSignedness(I));
} else if (It != MinBWs.end()) {
OpVec = Builder.CreateIntCast(OpVec, VecTy, GetOperandSignedness(I));
}
LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n");
OpVecs.push_back(OpVec);
Expand Down Expand Up @@ -14359,45 +14326,6 @@ bool BoUpSLP::collectValuesToDemote(
return TryProcessInstruction(I, *ITE, BitWidth, Ops);
}

case Instruction::Call: {
auto *IC = dyn_cast<IntrinsicInst>(I);
if (!IC)
break;
Intrinsic::ID ID = getVectorIntrinsicIDForCall(IC, TLI);
if (ID != Intrinsic::abs && ID != Intrinsic::smin &&
ID != Intrinsic::smax && ID != Intrinsic::umin && ID != Intrinsic::umax)
break;
SmallVector<Value *> Operands(1, I->getOperand(0));
End = 1;
if (ID != Intrinsic::abs) {
Operands.push_back(I->getOperand(1));
End = 2;
}
InstructionCost BestCost =
std::numeric_limits<InstructionCost::CostType>::max();
unsigned BestBitWidth = BitWidth;
unsigned VF = ITE->Scalars.size();
// Choose the best bitwidth based on cost estimations.
auto Checker = [&](unsigned BitWidth, unsigned) {
unsigned MinBW = PowerOf2Ceil(BitWidth);
SmallVector<Type *> ArgTys = buildIntrinsicArgTypes(IC, ID, VF, MinBW);
auto VecCallCosts = getVectorCallCosts(
IC,
FixedVectorType::get(IntegerType::get(IC->getContext(), MinBW), VF),
TTI, TLI, ArgTys);
InstructionCost Cost = std::min(VecCallCosts.first, VecCallCosts.second);
if (Cost < BestCost) {
BestCost = Cost;
BestBitWidth = BitWidth;
}
return false;
};
[[maybe_unused]] bool NeedToExit;
(void)AttemptCheckBitwidth(Checker, NeedToExit);
BitWidth = BestBitWidth;
return TryProcessInstruction(I, *ITE, BitWidth, Operands);
}

// Otherwise, conservatively give up.
default:
break;
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -2075,13 +2075,15 @@ class VPInterleaveRecipe : public VPRecipeBase {
class VPReductionRecipe : public VPSingleDefRecipe {
/// The recurrence decriptor for the reduction in question.
const RecurrenceDescriptor &RdxDesc;
bool IsOrdered;

public:
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp)
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
bool IsOrdered)
: VPSingleDefRecipe(VPDef::VPReductionSC,
ArrayRef<VPValue *>({ChainOp, VecOp}), I),
RdxDesc(R) {
RdxDesc(R), IsOrdered(IsOrdered) {
if (CondOp)
addOperand(CondOp);
}
Expand All @@ -2090,7 +2092,7 @@ class VPReductionRecipe : public VPSingleDefRecipe {

VPRecipeBase *clone() override {
return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
getVecOp(), getCondOp());
getVecOp(), getCondOp(), IsOrdered);
}

VP_CLASSOF_IMPL(VPDef::VPReductionSC)
Expand Down
51 changes: 51 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1518,7 +1518,58 @@ void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
}
}
}
#endif

void VPReductionRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Reduction being replicated.");
Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true);
RecurKind Kind = RdxDesc.getRecurrenceKind();
// Propagate the fast-math flags carried by the underlying instruction.
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *NewVecOp = State.get(getVecOp(), Part);
if (VPValue *Cond = getCondOp()) {
Value *NewCond = State.get(Cond, Part, State.VF.isScalar());
VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,
RdxDesc.getFastMathFlags());
if (State.VF.isVector()) {
Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
}

Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden);
NewVecOp = Select;
}
Value *NewRed;
Value *NextInChain;
if (IsOrdered) {
if (State.VF.isVector())
NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp,
PrevInChain);
else
NewRed = State.Builder.CreateBinOp(
(Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain,
NewVecOp);
PrevInChain = NewRed;
} else {
PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true);
NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp);
}
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
NewRed, PrevInChain);
} else if (IsOrdered)
NextInChain = NewRed;
else
NextInChain = State.Builder.CreateBinOp(
(Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);
State.set(this, NextInChain, Part, /*IsScalar*/ true);
}
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "REDUCE ";
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1485,6 +1485,10 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
TargetTransformInfo::SK_PermuteTwoSrc, CastSrcTy, Mask, CostKind);
NewCost += TTI.getCastInstrCost(Opcode, ShuffleDstTy, NewShuffleDstTy,
TTI::CastContextHint::None, CostKind);

LLVM_DEBUG(dbgs() << "Found a shuffle feeding two casts: " << I
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
<< "\n");
if (NewCost > OldCost)
return false;

Expand Down
231 changes: 231 additions & 0 deletions llvm/test/Analysis/CostModel/AArch64/shuffle-store.ll

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions llvm/test/Assembler/atomic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,19 @@ define void @fp_atomics(ptr %x) {

ret void
}

define void @fp_vector_atomicrmw(ptr %x, <2 x half> %val) {
; CHECK: %atomic.fadd = atomicrmw fadd ptr %x, <2 x half> %val seq_cst
%atomic.fadd = atomicrmw fadd ptr %x, <2 x half> %val seq_cst

; CHECK: %atomic.fsub = atomicrmw fsub ptr %x, <2 x half> %val seq_cst
%atomic.fsub = atomicrmw fsub ptr %x, <2 x half> %val seq_cst

; CHECK: %atomic.fmax = atomicrmw fmax ptr %x, <2 x half> %val seq_cst
%atomic.fmax = atomicrmw fmax ptr %x, <2 x half> %val seq_cst

; CHECK: %atomic.fmin = atomicrmw fmin ptr %x, <2 x half> %val seq_cst
%atomic.fmin = atomicrmw fmin ptr %x, <2 x half> %val seq_cst

ret void
}
41 changes: 41 additions & 0 deletions llvm/test/Assembler/invalid-atomicrmw-scalable.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
; RUN: split-file %s %t --leading-lines
; RUN: not llvm-as < %t/scalable_fp_vector_atomicrmw_xchg.ll 2>&1 | FileCheck -check-prefix=ERR0 %s
; RUN: not llvm-as < %t/scalable_int_vector_atomicrmw_xchg.ll 2>&1 | FileCheck -check-prefix=ERR1 %s
; RUN: not llvm-as < %t/scalable_ptr_vector_atomicrmw_xchg.ll 2>&1 | FileCheck -check-prefix=ERR2 %s
; RUN: not llvm-as < %t/scalable_fp_vector_atomicrmw_fadd.ll 2>&1 | FileCheck -check-prefix=ERR3 %s
; RUN: not llvm-as < %t/scalable_int_vector_atomicrmw_add.ll 2>&1 | FileCheck -check-prefix=ERR4 %s

;--- scalable_fp_vector_atomicrmw_xchg.ll
define <vscale x 2 x half> @scalable_fp_vector_atomicrmw_xchg(ptr %x, <vscale x 2 x half> %val) {
; ERR0: :41: error: atomicrmw operand may not be scalable
%atomic.xchg = atomicrmw xchg ptr %x, <vscale x 2 x half> %val seq_cst
ret <vscale x 2 x half> %atomic.xchg
}

;--- scalable_int_vector_atomicrmw_xchg.ll
define <vscale x 2 x i16> @scalable_int_vector_atomicrmw_xchg(ptr %x, <vscale x 2 x i16> %val) {
; ERR1: :41: error: atomicrmw operand may not be scalable
%atomic.xchg = atomicrmw xchg ptr %x, <vscale x 2 x i16> %val seq_cst
ret <vscale x 2 x i16> %atomic.xchg
}

;--- scalable_ptr_vector_atomicrmw_xchg.ll
define <vscale x 2 x ptr> @scalable_ptr_vector_atomicrmw_xchg(ptr %x, <vscale x 2 x ptr> %val) {
; ERR2: :41: error: atomicrmw operand may not be scalable
%atomic.xchg = atomicrmw xchg ptr %x, <vscale x 2 x ptr> %val seq_cst
ret <vscale x 2 x ptr> %atomic.xchg
}

;--- scalable_fp_vector_atomicrmw_fadd.ll
define <vscale x 2 x half> @scalable_fp_vector_atomicrmw_fadd(ptr %x, <vscale x 2 x half> %val) {
; ERR3: :41: error: atomicrmw operand may not be scalable
%atomic.fadd = atomicrmw fadd ptr %x, <vscale x 2 x half> %val seq_cst
ret <vscale x 2 x half> %atomic.fadd
}

;--- scalable_int_vector_atomicrmw_add.ll
define <vscale x 2 x i16> @scalable_int_vector_atomicrmw_add(ptr %x, <vscale x 2 x i16> %val) {
; ERR4: :39: error: atomicrmw operand may not be scalable
%atomic.add = atomicrmw add ptr %x, <vscale x 2 x i16> %val seq_cst
ret <vscale x 2 x i16> %atomic.add
}
7 changes: 7 additions & 0 deletions llvm/test/Assembler/invalid-atomicrmw-xchg-fp-vector.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s

; CHECK: error: atomicrmw xchg operand must be an integer, floating point, or pointer type
define <2 x half> @fp_vector_atomicrmw(ptr %x, <2 x half> %val) {
%atomic.xchg = atomicrmw xchg ptr %x, <2 x half> %val seq_cst
ret <2 x half> %atomic.xchg
}
31 changes: 31 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,37 @@ body: |
$x0 = COPY %extract(s64)
RET_ReallyLR implicit $x0

...
---
# This test checks that this combine runs after the insertvec->build_vector
name: extract_from_insert2
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$x1' }
frameInfo:
maxAlignment: 1
body: |
bb.1:
liveins: $q0, $x0, $x1
; CHECK-LABEL: name: extract_from_insert2
; CHECK: liveins: $q0, $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %arg1:_(s64) = COPY $x0
; CHECK-NEXT: %arg2:_(s64) = COPY $x1
; CHECK-NEXT: %ins2:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
; CHECK-NEXT: $q0 = COPY %ins2(<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%arg0:_(<2 x s64>) = COPY $q0
%arg1:_(s64) = COPY $x0
%arg2:_(s64) = COPY $x1
%zero:_(s32) = G_CONSTANT i32 0
%one:_(s32) = G_CONSTANT i32 1
%ins1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %arg0, %arg1(s64), %zero(s32)
%ins2:_(<2 x s64>) = G_INSERT_VECTOR_ELT %ins1, %arg2(s64), %one(s32)
$q0 = COPY %ins2(<2 x s64>)
RET_ReallyLR implicit $q0

...
---
name: extract_from_idx_negative
Expand Down
4 changes: 0 additions & 4 deletions llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: bic v0.8h, #254, lsl #8
; CHECK-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
Expand All @@ -27,7 +26,6 @@ define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: bic v0.8h, #254, lsl #8
; CHECK-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
Expand All @@ -42,7 +40,6 @@ define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: bic v0.8h, #254, lsl #8
; CHECK-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
Expand All @@ -57,7 +54,6 @@ define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: bic v0.8h, #254, lsl #8
; CHECK-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
Expand Down
115 changes: 115 additions & 0 deletions llvm/test/CodeGen/AArch64/atomicrmw-fadd-fp-vector.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefixes=CHECK,NOLSE %s
; RUN: llc -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefixes=CHECK,LSE %s

define <2 x half> @test_atomicrmw_fadd_v2f16_align4(ptr addrspace(1) %ptr, <2 x half> %value) #0 {
; NOLSE-LABEL: test_atomicrmw_fadd_v2f16_align4:
; NOLSE: // %bb.0:
; NOLSE-NEXT: fcvtl v1.4s, v0.4h
; NOLSE-NEXT: ldr s0, [x0]
; NOLSE-NEXT: b .LBB0_2
; NOLSE-NEXT: .LBB0_1: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1
; NOLSE-NEXT: fmov s0, w10
; NOLSE-NEXT: cmp w10, w9
; NOLSE-NEXT: b.eq .LBB0_5
; NOLSE-NEXT: .LBB0_2: // %atomicrmw.start
; NOLSE-NEXT: // =>This Loop Header: Depth=1
; NOLSE-NEXT: // Child Loop BB0_3 Depth 2
; NOLSE-NEXT: fcvtl v2.4s, v0.4h
; NOLSE-NEXT: fmov w9, s0
; NOLSE-NEXT: fadd v2.4s, v2.4s, v1.4s
; NOLSE-NEXT: fcvtn v2.4h, v2.4s
; NOLSE-NEXT: fmov w8, s2
; NOLSE-NEXT: .LBB0_3: // %atomicrmw.start
; NOLSE-NEXT: // Parent Loop BB0_2 Depth=1
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; NOLSE-NEXT: ldaxr w10, [x0]
; NOLSE-NEXT: cmp w10, w9
; NOLSE-NEXT: b.ne .LBB0_1
; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2
; NOLSE-NEXT: stlxr wzr, w8, [x0]
; NOLSE-NEXT: cbnz wzr, .LBB0_3
; NOLSE-NEXT: b .LBB0_1
; NOLSE-NEXT: .LBB0_5: // %atomicrmw.end
; NOLSE-NEXT: // kill: def $d0 killed $d0 killed $q0
; NOLSE-NEXT: ret
;
; LSE-LABEL: test_atomicrmw_fadd_v2f16_align4:
; LSE: // %bb.0:
; LSE-NEXT: fcvtl v1.4s, v0.4h
; LSE-NEXT: ldr s0, [x0]
; LSE-NEXT: .LBB0_1: // %atomicrmw.start
; LSE-NEXT: // =>This Inner Loop Header: Depth=1
; LSE-NEXT: fcvtl v2.4s, v0.4h
; LSE-NEXT: fmov w8, s0
; LSE-NEXT: mov w10, w8
; LSE-NEXT: fadd v2.4s, v2.4s, v1.4s
; LSE-NEXT: fcvtn v2.4h, v2.4s
; LSE-NEXT: fmov w9, s2
; LSE-NEXT: casal w10, w9, [x0]
; LSE-NEXT: fmov s0, w10
; LSE-NEXT: cmp w10, w8
; LSE-NEXT: b.ne .LBB0_1
; LSE-NEXT: // %bb.2: // %atomicrmw.end
; LSE-NEXT: // kill: def $d0 killed $d0 killed $q0
; LSE-NEXT: ret
%res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %value seq_cst, align 4
ret <2 x half> %res
}

define <2 x float> @test_atomicrmw_fadd_v2f32_align8(ptr addrspace(1) %ptr, <2 x float> %value) #0 {
; NOLSE-LABEL: test_atomicrmw_fadd_v2f32_align8:
; NOLSE: // %bb.0:
; NOLSE-NEXT: ldr d1, [x0]
; NOLSE-NEXT: b .LBB1_2
; NOLSE-NEXT: .LBB1_1: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1
; NOLSE-NEXT: fmov d1, x10
; NOLSE-NEXT: cmp x10, x9
; NOLSE-NEXT: b.eq .LBB1_5
; NOLSE-NEXT: .LBB1_2: // %atomicrmw.start
; NOLSE-NEXT: // =>This Loop Header: Depth=1
; NOLSE-NEXT: // Child Loop BB1_3 Depth 2
; NOLSE-NEXT: fadd v2.2s, v1.2s, v0.2s
; NOLSE-NEXT: fmov x9, d1
; NOLSE-NEXT: fmov x8, d2
; NOLSE-NEXT: .LBB1_3: // %atomicrmw.start
; NOLSE-NEXT: // Parent Loop BB1_2 Depth=1
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; NOLSE-NEXT: ldaxr x10, [x0]
; NOLSE-NEXT: cmp x10, x9
; NOLSE-NEXT: b.ne .LBB1_1
; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2
; NOLSE-NEXT: stlxr wzr, x8, [x0]
; NOLSE-NEXT: cbnz wzr, .LBB1_3
; NOLSE-NEXT: b .LBB1_1
; NOLSE-NEXT: .LBB1_5: // %atomicrmw.end
; NOLSE-NEXT: fmov d0, d1
; NOLSE-NEXT: ret
;
; LSE-LABEL: test_atomicrmw_fadd_v2f32_align8:
; LSE: // %bb.0:
; LSE-NEXT: ldr d1, [x0]
; LSE-NEXT: .LBB1_1: // %atomicrmw.start
; LSE-NEXT: // =>This Inner Loop Header: Depth=1
; LSE-NEXT: fadd v2.2s, v1.2s, v0.2s
; LSE-NEXT: fmov x8, d1
; LSE-NEXT: mov x10, x8
; LSE-NEXT: fmov x9, d2
; LSE-NEXT: casal x10, x9, [x0]
; LSE-NEXT: fmov d1, x10
; LSE-NEXT: cmp x10, x8
; LSE-NEXT: b.ne .LBB1_1
; LSE-NEXT: // %bb.2: // %atomicrmw.end
; LSE-NEXT: fmov d0, d1
; LSE-NEXT: ret
%res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x float> %value seq_cst, align 8
ret <2 x float> %res
}

attributes #0 = { nounwind }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
307 changes: 307 additions & 0 deletions llvm/test/CodeGen/RISCV/prolog-epilogue.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,307 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=RV32,RV32I
; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+zba < %s \
; RUN: | FileCheck %s -check-prefixes=RV32,RV32ZBA
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=RV64,RV64I
; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+zba < %s \
; RUN: | FileCheck %s -check-prefixes=RV64,RV64ZBA

declare void @callee(ptr)

define void @frame_16b() {
; RV32-LABEL: frame_16b:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: li a0, 0
; RV32-NEXT: call callee
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: frame_16b:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: li a0, 0
; RV64-NEXT: call callee
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
call void @callee(ptr null)
ret void
}

define void @frame_1024b() {
; RV32-LABEL: frame_1024b:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -1024
; RV32-NEXT: .cfi_def_cfa_offset 1024
; RV32-NEXT: sw ra, 1020(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi a0, sp, 12
; RV32-NEXT: call callee
; RV32-NEXT: lw ra, 1020(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 1024
; RV32-NEXT: ret
;
; RV64-LABEL: frame_1024b:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -1024
; RV64-NEXT: .cfi_def_cfa_offset 1024
; RV64-NEXT: sd ra, 1016(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: call callee
; RV64-NEXT: ld ra, 1016(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 1024
; RV64-NEXT: ret
%a = alloca [1008 x i8]
call void @callee(ptr %a)
ret void
}

define void @frame_2048b() {
; RV32-LABEL: frame_2048b:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -2032
; RV32-NEXT: .cfi_def_cfa_offset 2032
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 2048
; RV32-NEXT: addi a0, sp, 12
; RV32-NEXT: call callee
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: ret
;
; RV64-LABEL: frame_2048b:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
; RV64-NEXT: .cfi_def_cfa_offset 2032
; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 2048
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: call callee
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: ret
%a = alloca [2032 x i8]
call void @callee(ptr %a)
ret void
}

define void @frame_4096b() {
; RV32-LABEL: frame_4096b:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -2032
; RV32-NEXT: .cfi_def_cfa_offset 2032
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi sp, sp, -2048
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 4096
; RV32-NEXT: addi a0, sp, 12
; RV32-NEXT: call callee
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: ret
;
; RV64-LABEL: frame_4096b:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
; RV64-NEXT: .cfi_def_cfa_offset 2032
; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: addi sp, sp, -2048
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 4096
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: call callee
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: addi sp, sp, 32
; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: ret
%a = alloca [4080 x i8]
call void @callee(ptr %a)
ret void
}

;; 2^12-16+2032
define void @frame_4kb() {
; RV32-LABEL: frame_4kb:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -2032
; RV32-NEXT: .cfi_def_cfa_offset 2032
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: lui a0, 1
; RV32-NEXT: sub sp, sp, a0
; RV32-NEXT: .cfi_def_cfa_offset 6128
; RV32-NEXT: addi a0, sp, 12
; RV32-NEXT: call callee
; RV32-NEXT: lui a0, 1
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: ret
;
; RV64-LABEL: frame_4kb:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
; RV64-NEXT: .cfi_def_cfa_offset 2032
; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: lui a0, 1
; RV64-NEXT: sub sp, sp, a0
; RV64-NEXT: .cfi_def_cfa_offset 6128
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: call callee
; RV64-NEXT: lui a0, 1
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: ret
%a = alloca [6112 x i8]
call void @callee(ptr %a)
ret void
}

;; 2^13-16+2032
define void @frame_8kb() {
; RV32-LABEL: frame_8kb:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -2032
; RV32-NEXT: .cfi_def_cfa_offset 2032
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: lui a0, 2
; RV32-NEXT: sub sp, sp, a0
; RV32-NEXT: .cfi_def_cfa_offset 10224
; RV32-NEXT: addi a0, sp, 12
; RV32-NEXT: call callee
; RV32-NEXT: lui a0, 2
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: ret
;
; RV64-LABEL: frame_8kb:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
; RV64-NEXT: .cfi_def_cfa_offset 2032
; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: lui a0, 2
; RV64-NEXT: sub sp, sp, a0
; RV64-NEXT: .cfi_def_cfa_offset 10224
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: call callee
; RV64-NEXT: lui a0, 2
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: ret
%a = alloca [10208 x i8]
call void @callee(ptr %a)
ret void
}

;; 2^14-16+2032
define void @frame_16kb() {
; RV32-LABEL: frame_16kb:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -2032
; RV32-NEXT: .cfi_def_cfa_offset 2032
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: lui a0, 4
; RV32-NEXT: sub sp, sp, a0
; RV32-NEXT: .cfi_def_cfa_offset 18416
; RV32-NEXT: addi a0, sp, 12
; RV32-NEXT: call callee
; RV32-NEXT: lui a0, 4
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: ret
;
; RV64-LABEL: frame_16kb:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
; RV64-NEXT: .cfi_def_cfa_offset 2032
; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: lui a0, 4
; RV64-NEXT: sub sp, sp, a0
; RV64-NEXT: .cfi_def_cfa_offset 18416
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: call callee
; RV64-NEXT: lui a0, 4
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: ret
%a = alloca [18400 x i8]
call void @callee(ptr %a)
ret void
}

;; 2^15-16+2032
define void @frame_32kb() {
; RV32-LABEL: frame_32kb:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -2032
; RV32-NEXT: .cfi_def_cfa_offset 2032
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: lui a0, 8
; RV32-NEXT: sub sp, sp, a0
; RV32-NEXT: .cfi_def_cfa_offset 34800
; RV32-NEXT: addi a0, sp, 12
; RV32-NEXT: call callee
; RV32-NEXT: lui a0, 8
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: ret
;
; RV64-LABEL: frame_32kb:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
; RV64-NEXT: .cfi_def_cfa_offset 2032
; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: lui a0, 8
; RV64-NEXT: sub sp, sp, a0
; RV64-NEXT: .cfi_def_cfa_offset 34800
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: call callee
; RV64-NEXT: lui a0, 8
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: ret
%a = alloca [34784 x i8]
call void @callee(ptr %a)
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32I: {{.*}}
; RV32ZBA: {{.*}}
; RV64I: {{.*}}
; RV64ZBA: {{.*}}
124 changes: 122 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/vfmv.v.f.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+experimental-zfbfmin,+experimental-zvfbfmin \
; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+experimental-zfbfmin,+experimental-zvfbfmin \
; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s

declare <vscale x 1 x half> @llvm.riscv.vfmv.v.f.nxv1f16(
Expand Down Expand Up @@ -528,3 +528,123 @@ entry:

ret <vscale x 8 x double> %a
}

declare <vscale x 1 x bfloat> @llvm.riscv.vfmv.v.f.nxv1bf16(
<vscale x 1 x bfloat>,
bfloat,
iXLen);

define <vscale x 1 x bfloat> @intrinsic_vfmv.v.f_f_nxv1bf16(bfloat %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1bf16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.v.f.nxv1bf16(
<vscale x 1 x bfloat> undef,
bfloat %0,
iXLen %1)

ret <vscale x 1 x bfloat> %a
}

declare <vscale x 2 x bfloat> @llvm.riscv.vfmv.v.f.nxv2bf16(
<vscale x 2 x bfloat>,
bfloat,
iXLen);

define <vscale x 2 x bfloat> @intrinsic_vfmv.v.f_f_nxv2bf16(bfloat %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2bf16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x bfloat> @llvm.riscv.vfmv.v.f.nxv2bf16(
<vscale x 2 x bfloat> undef,
bfloat %0,
iXLen %1)

ret <vscale x 2 x bfloat> %a
}

declare <vscale x 4 x bfloat> @llvm.riscv.vfmv.v.f.nxv4bf16(
<vscale x 4 x bfloat>,
bfloat,
iXLen);

define <vscale x 4 x bfloat> @intrinsic_vfmv.v.f_f_nxv4bf16(bfloat %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4bf16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x bfloat> @llvm.riscv.vfmv.v.f.nxv4bf16(
<vscale x 4 x bfloat> undef,
bfloat %0,
iXLen %1)

ret <vscale x 4 x bfloat> %a
}

declare <vscale x 8 x bfloat> @llvm.riscv.vfmv.v.f.nxv8bf16(
<vscale x 8 x bfloat>,
bfloat,
iXLen);

define <vscale x 8 x bfloat> @intrinsic_vfmv.v.f_f_nxv8bf16(bfloat %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8bf16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x bfloat> @llvm.riscv.vfmv.v.f.nxv8bf16(
<vscale x 8 x bfloat> undef,
bfloat %0,
iXLen %1)

ret <vscale x 8 x bfloat> %a
}

declare <vscale x 16 x bfloat> @llvm.riscv.vfmv.v.f.nxv16bf16(
<vscale x 16 x bfloat>,
bfloat,
iXLen);

define <vscale x 16 x bfloat> @intrinsic_vfmv.v.f_f_nxv16bf16(bfloat %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv16bf16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x bfloat> @llvm.riscv.vfmv.v.f.nxv16bf16(
<vscale x 16 x bfloat> undef,
bfloat %0,
iXLen %1)

ret <vscale x 16 x bfloat> %a
}

declare <vscale x 32 x bfloat> @llvm.riscv.vfmv.v.f.nxv32bf16(
<vscale x 32 x bfloat>,
bfloat,
iXLen);

define <vscale x 32 x bfloat> @intrinsic_vfmv.v.f_f_nxv32bf16(bfloat %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv32bf16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x bfloat> @llvm.riscv.vfmv.v.f.nxv32bf16(
<vscale x 32 x bfloat> undef,
bfloat %0,
iXLen %1)

ret <vscale x 32 x bfloat> %a
}
259 changes: 259 additions & 0 deletions llvm/test/CodeGen/RISCV/stack-offset.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=RV32,RV32I
; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+zba < %s \
; RUN: | FileCheck %s -check-prefixes=RV32,RV32ZBA
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=RV64,RV64I
; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+zba < %s \
; RUN: | FileCheck %s -check-prefixes=RV64,RV64ZBA

declare void @inspect(...)

define void @test() {
; RV32-LABEL: test:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -2032
; RV32-NEXT: .cfi_def_cfa_offset 2032
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi sp, sp, -2048
; RV32-NEXT: addi sp, sp, -1120
; RV32-NEXT: .cfi_def_cfa_offset 5200
; RV32-NEXT: addi a0, sp, 12
; RV32-NEXT: addi a1, sp, 2047
; RV32-NEXT: addi a1, a1, 13
; RV32-NEXT: lui a2, 1
; RV32-NEXT: addi a2, a2, 12
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: lui a3, 1
; RV32-NEXT: addi a3, a3, 1036
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: call inspect
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: addi sp, sp, 1136
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: ret
;
; RV64-LABEL: test:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
; RV64-NEXT: .cfi_def_cfa_offset 2032
; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: addi sp, sp, -2048
; RV64-NEXT: addi sp, sp, -1120
; RV64-NEXT: .cfi_def_cfa_offset 5200
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: addi a1, sp, 2047
; RV64-NEXT: addi a1, a1, 9
; RV64-NEXT: lui a2, 1
; RV64-NEXT: addiw a2, a2, 8
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: lui a3, 1
; RV64-NEXT: addiw a3, a3, 1032
; RV64-NEXT: add a3, sp, a3
; RV64-NEXT: call inspect
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: addi sp, sp, 1136
; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: ret
%p4 = alloca [64 x i8], align 1
%p3 = alloca [1024 x i8], align 1
%p2 = alloca [2048 x i8], align 1
%p1 = alloca [2048 x i8], align 1
call void (...) @inspect(ptr %p1, ptr %p2, ptr %p3, ptr %p4)
ret void
}

define void @align_8() {
; RV32-LABEL: align_8:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -2032
; RV32-NEXT: .cfi_def_cfa_offset 2032
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi sp, sp, -2048
; RV32-NEXT: addi sp, sp, -32
; RV32-NEXT: .cfi_def_cfa_offset 4112
; RV32-NEXT: addi a0, sp, 7
; RV32-NEXT: lui a1, 1
; RV32-NEXT: addi a1, a1, 8
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: call inspect
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: ret
;
; RV64-LABEL: align_8:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
; RV64-NEXT: .cfi_def_cfa_offset 2032
; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: addi sp, sp, -2048
; RV64-NEXT: addi sp, sp, -48
; RV64-NEXT: .cfi_def_cfa_offset 4128
; RV64-NEXT: addi a0, sp, 15
; RV64-NEXT: lui a1, 1
; RV64-NEXT: addiw a1, a1, 16
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: call inspect
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: addi sp, sp, 64
; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: ret
%p2 = alloca i8, align 8
%p1 = alloca [4097 x i8], align 1
call void (...) @inspect(ptr %p1, ptr %p2)
ret void
}

define void @align_4() {
; RV32-LABEL: align_4:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -2032
; RV32-NEXT: .cfi_def_cfa_offset 2032
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi sp, sp, -2048
; RV32-NEXT: addi sp, sp, -32
; RV32-NEXT: .cfi_def_cfa_offset 4112
; RV32-NEXT: addi a0, sp, 7
; RV32-NEXT: lui a1, 1
; RV32-NEXT: addi a1, a1, 8
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: call inspect
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: ret
;
; RV64-LABEL: align_4:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
; RV64-NEXT: .cfi_def_cfa_offset 2032
; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: addi sp, sp, -2048
; RV64-NEXT: addi sp, sp, -48
; RV64-NEXT: .cfi_def_cfa_offset 4128
; RV64-NEXT: addi a0, sp, 19
; RV64-NEXT: lui a1, 1
; RV64-NEXT: addiw a1, a1, 20
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: call inspect
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: addi sp, sp, 64
; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: ret
%p2 = alloca i8, align 4
%p1 = alloca [4097 x i8], align 1
call void (...) @inspect(ptr %p1, ptr %p2)
ret void
}

define void @align_2() {
; RV32-LABEL: align_2:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -2032
; RV32-NEXT: .cfi_def_cfa_offset 2032
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi sp, sp, -2048
; RV32-NEXT: addi sp, sp, -32
; RV32-NEXT: .cfi_def_cfa_offset 4112
; RV32-NEXT: addi a0, sp, 9
; RV32-NEXT: lui a1, 1
; RV32-NEXT: addi a1, a1, 10
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: call inspect
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: ret
;
; RV64-LABEL: align_2:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
; RV64-NEXT: .cfi_def_cfa_offset 2032
; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: addi sp, sp, -2048
; RV64-NEXT: addi sp, sp, -48
; RV64-NEXT: .cfi_def_cfa_offset 4128
; RV64-NEXT: addi a0, sp, 21
; RV64-NEXT: lui a1, 1
; RV64-NEXT: addiw a1, a1, 22
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: call inspect
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: addi sp, sp, 64
; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: ret
%p2 = alloca i8, align 2
%p1 = alloca [4097 x i8], align 1
call void (...) @inspect(ptr %p1, ptr %p2)
ret void
}


define void @align_1() {
; RV32-LABEL: align_1:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -2032
; RV32-NEXT: .cfi_def_cfa_offset 2032
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi sp, sp, -2048
; RV32-NEXT: addi sp, sp, -32
; RV32-NEXT: .cfi_def_cfa_offset 4112
; RV32-NEXT: addi a0, sp, 10
; RV32-NEXT: lui a1, 1
; RV32-NEXT: addi a1, a1, 11
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: call inspect
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: ret
;
; RV64-LABEL: align_1:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
; RV64-NEXT: .cfi_def_cfa_offset 2032
; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: addi sp, sp, -2048
; RV64-NEXT: addi sp, sp, -48
; RV64-NEXT: .cfi_def_cfa_offset 4128
; RV64-NEXT: addi a0, sp, 22
; RV64-NEXT: lui a1, 1
; RV64-NEXT: addiw a1, a1, 23
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: call inspect
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: addi sp, sp, 64
; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: ret
%p2 = alloca i8, align 1
%p1 = alloca [4097 x i8], align 1
call void (...) @inspect(ptr %p1, ptr %p2)
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32I: {{.*}}
; RV32ZBA: {{.*}}
; RV64I: {{.*}}
; RV64ZBA: {{.*}}
84 changes: 84 additions & 0 deletions llvm/test/CodeGen/X86/atomicrmw-fadd-fp-vector.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple x86_64-pc-linux < %s | FileCheck %s

define <2 x half> @test_atomicrmw_fadd_v2f16_align4(ptr addrspace(1) %ptr, <2 x half> %value) #0 {
; CHECK-LABEL: test_atomicrmw_fadd_v2f16_align4:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: subq $88, %rsp
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: psrld $16, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: pinsrw $0, 2(%rdi), %xmm1
; CHECK-NEXT: pinsrw $0, (%rdi), %xmm0
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_1: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: pextrw $0, %xmm0, %eax
; CHECK-NEXT: movzwl %ax, %ebp
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: pextrw $0, %xmm0, %ecx
; CHECK-NEXT: shll $16, %ecx
; CHECK-NEXT: orl %ebp, %ecx
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: pextrw $0, %xmm0, %edx
; CHECK-NEXT: shll $16, %edx
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: pextrw $0, %xmm0, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: orl %edx, %eax
; CHECK-NEXT: lock cmpxchgl %ecx, (%rbx)
; CHECK-NEXT: setne %cl
; CHECK-NEXT: pinsrw $0, %eax, %xmm0
; CHECK-NEXT: shrl $16, %eax
; CHECK-NEXT: pinsrw $0, %eax, %xmm1
; CHECK-NEXT: testb %cl, %cl
; CHECK-NEXT: jne .LBB0_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; CHECK-NEXT: addq $88, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
%res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %value seq_cst, align 4
ret <2 x half> %res
}

define <2 x float> @test_atomicrmw_fadd_v2f32_align8(ptr addrspace(1) %ptr, <2 x float> %value) #0 {
; CHECK-LABEL: test_atomicrmw_fadd_v2f32_align8:
; CHECK: # %bb.0:
; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB1_1: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movq %xmm1, %rax
; CHECK-NEXT: addps %xmm0, %xmm1
; CHECK-NEXT: movq %xmm1, %rcx
; CHECK-NEXT: lock cmpxchgq %rcx, (%rdi)
; CHECK-NEXT: movq %rax, %xmm1
; CHECK-NEXT: jne .LBB1_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x float> %value seq_cst, align 8
ret <2 x float> %res
}

attributes #0 = { nounwind }
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/fsafdo_test1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
; V01: .loc 1 9 5 is_stmt 1 discriminator 2 # foo.c:9:5
; V0: .loc 1 9 5 is_stmt 0 discriminator 11266 # foo.c:9:5
; V0: .loc 1 7 3 is_stmt 1 discriminator 11266 # foo.c:7:3
; V1: .loc 1 9 5 is_stmt 0 discriminator 258 # foo.c:9:5
; V1: .loc 1 9 5 is_stmt 0 discriminator 514 # foo.c:9:5
; V1: .loc 1 7 3 is_stmt 1 discriminator 258 # foo.c:7:3
; Check that variable __llvm_fs_discriminator__ is generated.
; V01: .type __llvm_fs_discriminator__,@object # @__llvm_fs_discriminator__
Expand Down
13 changes: 13 additions & 0 deletions llvm/test/CodeGen/X86/memset-minsize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -136,4 +136,17 @@ entry:
ret void
}

define void @small_memset_to_rep_stos_64(ptr %ptr) minsize nounwind {
; CHECK-LABEL: small_memset_to_rep_stos_64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq $16
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: rep;stosq %rax, %es:(%rdi)
; CHECK-NEXT: retq
entry:
call void @llvm.memset.p0.i64(ptr align 8 %ptr, i8 0, i64 128, i1 false)
ret void
}

declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1)
Loading