50 changes: 24 additions & 26 deletions libcxx/include/complex
Original file line number Diff line number Diff line change
Expand Up @@ -1019,9 +1019,9 @@ inline _LIBCPP_HIDE_FROM_ABI typename __libcpp_complex_overload_traits<_Tp>::_Co

template <class _Tp>
_LIBCPP_HIDE_FROM_ABI complex<_Tp> polar(const _Tp& __rho, const _Tp& __theta = _Tp()) {
if (std::__constexpr_isnan(__rho) || std::signbit(__rho))
if (std::isnan(__rho) || std::signbit(__rho))
return complex<_Tp>(_Tp(NAN), _Tp(NAN));
if (std::__constexpr_isnan(__theta)) {
if (std::isnan(__theta)) {
if (std::__constexpr_isinf(__rho))
return complex<_Tp>(__rho, __theta);
return complex<_Tp>(__theta, __theta);
Expand All @@ -1032,10 +1032,10 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> polar(const _Tp& __rho, const _Tp& __theta =
return complex<_Tp>(_Tp(NAN), _Tp(NAN));
}
_Tp __x = __rho * std::cos(__theta);
if (std::__constexpr_isnan(__x))
if (std::isnan(__x))
__x = 0;
_Tp __y = __rho * std::sin(__theta);
if (std::__constexpr_isnan(__y))
if (std::isnan(__y))
__y = 0;
return complex<_Tp>(__x, __y);
}
Expand All @@ -1062,10 +1062,8 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> sqrt(const complex<_Tp>& __x) {
return complex<_Tp>(_Tp(INFINITY), __x.imag());
if (std::__constexpr_isinf(__x.real())) {
if (__x.real() > _Tp(0))
return complex<_Tp>(
__x.real(), std::__constexpr_isnan(__x.imag()) ? __x.imag() : std::copysign(_Tp(0), __x.imag()));
return complex<_Tp>(
std::__constexpr_isnan(__x.imag()) ? __x.imag() : _Tp(0), std::copysign(__x.real(), __x.imag()));
return complex<_Tp>(__x.real(), std::isnan(__x.imag()) ? __x.imag() : std::copysign(_Tp(0), __x.imag()));
return complex<_Tp>(std::isnan(__x.imag()) ? __x.imag() : _Tp(0), std::copysign(__x.real(), __x.imag()));
}
return std::polar(std::sqrt(std::abs(__x)), std::arg(__x) / _Tp(2));
}
Expand All @@ -1080,9 +1078,9 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> exp(const complex<_Tp>& __x) {
}
if (std::__constexpr_isinf(__x.real())) {
if (__x.real() < _Tp(0)) {
if (!std::__constexpr_isfinite(__i))
if (!std::isfinite(__i))
__i = _Tp(1);
} else if (__i == 0 || !std::__constexpr_isfinite(__i)) {
} else if (__i == 0 || !std::isfinite(__i)) {
if (std::__constexpr_isinf(__i))
__i = _Tp(NAN);
return complex<_Tp>(__x.real(), __i);
Expand Down Expand Up @@ -1131,13 +1129,13 @@ template <class _Tp>
_LIBCPP_HIDE_FROM_ABI complex<_Tp> asinh(const complex<_Tp>& __x) {
const _Tp __pi(atan2(+0., -0.));
if (std::__constexpr_isinf(__x.real())) {
if (std::__constexpr_isnan(__x.imag()))
if (std::isnan(__x.imag()))
return __x;
if (std::__constexpr_isinf(__x.imag()))
return complex<_Tp>(__x.real(), std::copysign(__pi * _Tp(0.25), __x.imag()));
return complex<_Tp>(__x.real(), std::copysign(_Tp(0), __x.imag()));
}
if (std::__constexpr_isnan(__x.real())) {
if (std::isnan(__x.real())) {
if (std::__constexpr_isinf(__x.imag()))
return complex<_Tp>(__x.imag(), __x.real());
if (__x.imag() == 0)
Expand All @@ -1156,7 +1154,7 @@ template <class _Tp>
_LIBCPP_HIDE_FROM_ABI complex<_Tp> acosh(const complex<_Tp>& __x) {
const _Tp __pi(atan2(+0., -0.));
if (std::__constexpr_isinf(__x.real())) {
if (std::__constexpr_isnan(__x.imag()))
if (std::isnan(__x.imag()))
return complex<_Tp>(std::abs(__x.real()), __x.imag());
if (std::__constexpr_isinf(__x.imag())) {
if (__x.real() > 0)
Expand All @@ -1168,7 +1166,7 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> acosh(const complex<_Tp>& __x) {
return complex<_Tp>(-__x.real(), std::copysign(__pi, __x.imag()));
return complex<_Tp>(__x.real(), std::copysign(_Tp(0), __x.imag()));
}
if (std::__constexpr_isnan(__x.real())) {
if (std::isnan(__x.real())) {
if (std::__constexpr_isinf(__x.imag()))
return complex<_Tp>(std::abs(__x.imag()), __x.real());
return complex<_Tp>(__x.real(), __x.real());
Expand All @@ -1187,12 +1185,12 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> atanh(const complex<_Tp>& __x) {
if (std::__constexpr_isinf(__x.imag())) {
return complex<_Tp>(std::copysign(_Tp(0), __x.real()), std::copysign(__pi / _Tp(2), __x.imag()));
}
if (std::__constexpr_isnan(__x.imag())) {
if (std::isnan(__x.imag())) {
if (std::__constexpr_isinf(__x.real()) || __x.real() == 0)
return complex<_Tp>(std::copysign(_Tp(0), __x.real()), __x.imag());
return complex<_Tp>(__x.imag(), __x.imag());
}
if (std::__constexpr_isnan(__x.real())) {
if (std::isnan(__x.real())) {
return complex<_Tp>(__x.real(), __x.real());
}
if (std::__constexpr_isinf(__x.real())) {
Expand All @@ -1209,11 +1207,11 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> atanh(const complex<_Tp>& __x) {

template <class _Tp>
_LIBCPP_HIDE_FROM_ABI complex<_Tp> sinh(const complex<_Tp>& __x) {
if (std::__constexpr_isinf(__x.real()) && !std::__constexpr_isfinite(__x.imag()))
if (std::__constexpr_isinf(__x.real()) && !std::isfinite(__x.imag()))
return complex<_Tp>(__x.real(), _Tp(NAN));
if (__x.real() == 0 && !std::__constexpr_isfinite(__x.imag()))
if (__x.real() == 0 && !std::isfinite(__x.imag()))
return complex<_Tp>(__x.real(), _Tp(NAN));
if (__x.imag() == 0 && !std::__constexpr_isfinite(__x.real()))
if (__x.imag() == 0 && !std::isfinite(__x.real()))
return __x;
return complex<_Tp>(std::sinh(__x.real()) * std::cos(__x.imag()), std::cosh(__x.real()) * std::sin(__x.imag()));
}
Expand All @@ -1222,13 +1220,13 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> sinh(const complex<_Tp>& __x) {

template <class _Tp>
_LIBCPP_HIDE_FROM_ABI complex<_Tp> cosh(const complex<_Tp>& __x) {
if (std::__constexpr_isinf(__x.real()) && !std::__constexpr_isfinite(__x.imag()))
if (std::__constexpr_isinf(__x.real()) && !std::isfinite(__x.imag()))
return complex<_Tp>(std::abs(__x.real()), _Tp(NAN));
if (__x.real() == 0 && !std::__constexpr_isfinite(__x.imag()))
if (__x.real() == 0 && !std::isfinite(__x.imag()))
return complex<_Tp>(_Tp(NAN), __x.real());
if (__x.real() == 0 && __x.imag() == 0)
return complex<_Tp>(_Tp(1), __x.imag());
if (__x.imag() == 0 && !std::__constexpr_isfinite(__x.real()))
if (__x.imag() == 0 && !std::isfinite(__x.real()))
return complex<_Tp>(std::abs(__x.real()), __x.imag());
return complex<_Tp>(std::cosh(__x.real()) * std::cos(__x.imag()), std::sinh(__x.real()) * std::sin(__x.imag()));
}
Expand All @@ -1238,11 +1236,11 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> cosh(const complex<_Tp>& __x) {
template <class _Tp>
_LIBCPP_HIDE_FROM_ABI complex<_Tp> tanh(const complex<_Tp>& __x) {
if (std::__constexpr_isinf(__x.real())) {
if (!std::__constexpr_isfinite(__x.imag()))
if (!std::isfinite(__x.imag()))
return complex<_Tp>(std::copysign(_Tp(1), __x.real()), _Tp(0));
return complex<_Tp>(std::copysign(_Tp(1), __x.real()), std::copysign(_Tp(0), std::sin(_Tp(2) * __x.imag())));
}
if (std::__constexpr_isnan(__x.real()) && __x.imag() == 0)
if (std::isnan(__x.real()) && __x.imag() == 0)
return __x;
_Tp __2r(_Tp(2) * __x.real());
_Tp __2i(_Tp(2) * __x.imag());
Expand All @@ -1267,7 +1265,7 @@ template <class _Tp>
_LIBCPP_HIDE_FROM_ABI complex<_Tp> acos(const complex<_Tp>& __x) {
const _Tp __pi(atan2(+0., -0.));
if (std::__constexpr_isinf(__x.real())) {
if (std::__constexpr_isnan(__x.imag()))
if (std::isnan(__x.imag()))
return complex<_Tp>(__x.imag(), __x.real());
if (std::__constexpr_isinf(__x.imag())) {
if (__x.real() < _Tp(0))
Expand All @@ -1278,7 +1276,7 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> acos(const complex<_Tp>& __x) {
return complex<_Tp>(__pi, std::signbit(__x.imag()) ? -__x.real() : __x.real());
return complex<_Tp>(_Tp(0), std::signbit(__x.imag()) ? __x.real() : -__x.real());
}
if (std::__constexpr_isnan(__x.real())) {
if (std::isnan(__x.real())) {
if (std::__constexpr_isinf(__x.imag()))
return complex<_Tp>(__x.real(), -__x.imag());
return complex<_Tp>(__x.real(), __x.real());
Expand Down
8 changes: 0 additions & 8 deletions libcxx/include/string
Original file line number Diff line number Diff line change
Expand Up @@ -3462,14 +3462,6 @@ inline _LIBCPP_CONSTEXPR_SINCE_CXX20 void basic_string<_CharT, _Traits, _Allocat

// find

template <class _Traits>
struct _LIBCPP_HIDDEN __traits_eq {
typedef typename _Traits::char_type char_type;
_LIBCPP_HIDE_FROM_ABI bool operator()(const char_type& __x, const char_type& __y) _NOEXCEPT {
return _Traits::eq(__x, __y);
}
};

template <class _CharT, class _Traits, class _Allocator>
_LIBCPP_CONSTEXPR_SINCE_CXX20 typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT {
Expand Down
2 changes: 0 additions & 2 deletions libcxx/test/libcxx/numerics/c.math/constexpr-fns.pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@

#include "test_macros.h"

static_assert(std::__constexpr_isnan(0.) == false, "");
static_assert(std::__constexpr_isinf(0.0) == false, "");
static_assert(std::__constexpr_isfinite(0.0) == true, "");

int main(int, char**)
{
Expand Down
7 changes: 1 addition & 6 deletions lld/COFF/Chunks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -842,14 +842,9 @@ const uint8_t arm64Thunk[] = {
0x00, 0x02, 0x1f, 0xd6, // br x16
};

size_t RangeExtensionThunkARM64::getSize() const {
assert(ctx.config.machine == ARM64);
(void)&ctx;
return sizeof(arm64Thunk);
}
size_t RangeExtensionThunkARM64::getSize() const { return sizeof(arm64Thunk); }

void RangeExtensionThunkARM64::writeTo(uint8_t *buf) const {
assert(ctx.config.machine == ARM64);
memcpy(buf, arm64Thunk, sizeof(arm64Thunk));
applyArm64Addr(buf + 0, target->getRVA(), rva, 12);
applyArm64Imm(buf + 4, target->getRVA() & 0xfff, 0);
Expand Down
10 changes: 6 additions & 4 deletions lld/COFF/Chunks.h
Original file line number Diff line number Diff line change
Expand Up @@ -615,20 +615,22 @@ class RangeExtensionThunkARM : public NonSectionCodeChunk {
COFFLinkerContext &ctx;
};

// A ragnge extension thunk used for both ARM64EC and ARM64 machine types.
class RangeExtensionThunkARM64 : public NonSectionCodeChunk {
public:
explicit RangeExtensionThunkARM64(COFFLinkerContext &ctx, Defined *t)
: target(t), ctx(ctx) {
explicit RangeExtensionThunkARM64(MachineTypes machine, Defined *t)
: target(t), machine(machine) {
setAlignment(4);
assert(llvm::COFF::isAnyArm64(machine));
}
size_t getSize() const override;
void writeTo(uint8_t *buf) const override;
MachineTypes getMachine() const override { return ARM64; }
MachineTypes getMachine() const override { return machine; }

Defined *target;

private:
COFFLinkerContext &ctx;
MachineTypes machine;
};

// Windows-specific.
Expand Down
38 changes: 22 additions & 16 deletions lld/COFF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,10 +219,12 @@ class Writer {
void sortECChunks();
void removeUnusedSections();
void assignAddresses();
bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin);
bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin,
MachineTypes machine);
std::pair<Defined *, bool> getThunk(DenseMap<uint64_t, Defined *> &lastThunks,
Defined *target, uint64_t p,
uint16_t type, int margin);
uint16_t type, int margin,
MachineTypes machine);
bool createThunks(OutputSection *os, int margin);
bool verifyRanges(const std::vector<Chunk *> chunks);
void createECCodeMap();
Expand Down Expand Up @@ -396,8 +398,9 @@ void OutputSection::addContributingPartialSection(PartialSection *sec) {

// Check whether the target address S is in range from a relocation
// of type relType at address P.
bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
if (ctx.config.machine == ARMNT) {
bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin,
MachineTypes machine) {
if (machine == ARMNT) {
int64_t diff = AbsoluteDifference(s, p + 4) + margin;
switch (relType) {
case IMAGE_REL_ARM_BRANCH20T:
Expand All @@ -408,7 +411,7 @@ bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
default:
return true;
}
} else if (ctx.config.machine == ARM64) {
} else if (isAnyArm64(machine)) {
int64_t diff = AbsoluteDifference(s, p) + margin;
switch (relType) {
case IMAGE_REL_ARM64_BRANCH26:
Expand All @@ -421,25 +424,25 @@ bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
return true;
}
} else {
llvm_unreachable("Unexpected architecture");
return true;
}
}

// Return the last thunk for the given target if it is in range,
// or create a new one.
std::pair<Defined *, bool>
Writer::getThunk(DenseMap<uint64_t, Defined *> &lastThunks, Defined *target,
uint64_t p, uint16_t type, int margin) {
uint64_t p, uint16_t type, int margin, MachineTypes machine) {
Defined *&lastThunk = lastThunks[target->getRVA()];
if (lastThunk && isInRange(type, lastThunk->getRVA(), p, margin))
if (lastThunk && isInRange(type, lastThunk->getRVA(), p, margin, machine))
return {lastThunk, false};
Chunk *c;
switch (ctx.config.machine) {
case ARMNT:
switch (getMachineArchType(machine)) {
case Triple::thumb:
c = make<RangeExtensionThunkARM>(ctx, target);
break;
case ARM64:
c = make<RangeExtensionThunkARM64>(ctx, target);
case Triple::aarch64:
c = make<RangeExtensionThunkARM64>(machine, target);
break;
default:
llvm_unreachable("Unexpected architecture");
Expand Down Expand Up @@ -471,6 +474,7 @@ bool Writer::createThunks(OutputSection *os, int margin) {
SectionChunk *sc = dyn_cast_or_null<SectionChunk>(os->chunks[i]);
if (!sc)
continue;
MachineTypes machine = sc->getMachine();
size_t thunkInsertionSpot = i + 1;

// Try to get a good enough estimate of where new thunks will be placed.
Expand All @@ -497,11 +501,12 @@ bool Writer::createThunks(OutputSection *os, int margin) {

uint64_t s = sym->getRVA();

if (isInRange(rel.Type, s, p, margin))
if (isInRange(rel.Type, s, p, margin, machine))
continue;

// If the target isn't in range, hook it up to an existing or new thunk.
auto [thunk, wasNew] = getThunk(lastThunks, sym, p, rel.Type, margin);
auto [thunk, wasNew] =
getThunk(lastThunks, sym, p, rel.Type, margin, machine);
if (wasNew) {
Chunk *thunkChunk = thunk->getChunk();
thunkChunk->setRVA(
Expand Down Expand Up @@ -603,6 +608,7 @@ bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
SectionChunk *sc = dyn_cast_or_null<SectionChunk>(c);
if (!sc)
continue;
MachineTypes machine = sc->getMachine();

ArrayRef<coff_relocation> relocs = sc->getRelocs();
for (const coff_relocation &rel : relocs) {
Expand All @@ -615,7 +621,7 @@ bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
uint64_t p = sc->getRVA() + rel.VirtualAddress;
uint64_t s = sym->getRVA();

if (!isInRange(rel.Type, s, p, 0))
if (!isInRange(rel.Type, s, p, 0, machine))
return false;
}
}
Expand All @@ -625,7 +631,7 @@ bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
// Assign addresses and add thunks if necessary.
void Writer::finalizeAddresses() {
assignAddresses();
if (ctx.config.machine != ARMNT && ctx.config.machine != ARM64)
if (ctx.config.machine != ARMNT && !isAnyArm64(ctx.config.machine))
return;

size_t origNumChunks = 0;
Expand Down
186 changes: 186 additions & 0 deletions lld/test/COFF/arm64ec-range-thunks.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# REQUIRES: aarch64, x86
# RUN: split-file %s %t.dir && cd %t.dir

# RUN: llvm-mc -filetype=obj -triple=arm64ec-windows funcs.s -o funcs-arm64ec.obj
# RUN: llvm-mc -filetype=obj -triple=aarch64-windows native-funcs.s -o funcs-aarch64.obj
# RUN: llvm-mc -filetype=obj -triple=x86_64-windows space.s -o space-x86_64.obj
# RUN: llvm-mc -filetype=obj -triple=aarch64-windows space.s -o space-aarch64.obj
# RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj


# Test generating range extension thunks for ARM64EC code. Place some x86_64 chunks in a middle
# and make sure that thunks stay in ARM64EC code range.

# RUN: lld-link -machine:arm64ec -noentry -dll funcs-arm64ec.obj space-x86_64.obj loadconfig-arm64ec.obj -out:test.dll \
# RUN: -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s
# VERBOSE: Added 3 thunks with margin {{.*}} in 1 passes

# RUN: llvm-objdump -d test.dll | FileCheck --check-prefix=DISASM %s

# DISASM: Disassembly of section .code1:
# DISASM-EMPTY:
# DISASM-NEXT: 0000000180003000 <.code1>:
# DISASM-NEXT: 180003000: 36000040 tbz w0, #0x0, 0x180003008 <.code1+0x8>
# DISASM-NEXT: 180003004: d65f03c0 ret
# DISASM-NEXT: 180003008: b0000050 adrp x16, 0x18000c000
# DISASM-NEXT: 18000300c: 91000210 add x16, x16, #0x0
# DISASM-NEXT: 180003010: d61f0200 br x16
# DISASM-EMPTY:
# DISASM-NEXT: Disassembly of section .code2:
# DISASM-EMPTY:
# DISASM-NEXT: 0000000180004000 <.code2>:
# DISASM-NEXT: ...
# DISASM-EMPTY:
# DISASM-NEXT: Disassembly of section .code3:
# DISASM-EMPTY:
# DISASM-NEXT: 0000000180005000 <.code3>:
# DISASM-NEXT: ...
# DISASM-NEXT: 18000c000: 36000060 tbz w0, #0x0, 0x18000c00c <.code3+0x700c>
# DISASM-NEXT: 18000c004: d65f03c0 ret
# DISASM-NEXT: 18000c008: 00000000 udf #0x0
# DISASM-NEXT: 18000c00c: 90000050 adrp x16, 0x180014000 <.code3+0xf000>
# DISASM-NEXT: 18000c010: 91006210 add x16, x16, #0x18
# DISASM-NEXT: 18000c014: d61f0200 br x16
# DISASM-NEXT: ...
# DISASM-NEXT: 180014018: 36000040 tbz w0, #0x0, 0x180014020 <.code3+0xf020>
# DISASM-NEXT: 18001401c: d65f03c0 ret
# DISASM-NEXT: 180014020: f0ffff70 adrp x16, 0x180003000 <.code1>
# DISASM-NEXT: 180014024: 91000210 add x16, x16, #0x0
# DISASM-NEXT: 180014028: d61f0200 br x16

# RUN: llvm-readobj --coff-load-config test.dll | FileCheck --check-prefix=LOADCFG %s

# LOADCFG: CodeMap [
# LOADCFG-NEXT: 0x3000 - 0x3014 ARM64EC
# LOADCFG-NEXT: 0x4000 - 0x4300 X64
# LOADCFG-NEXT: 0x5000 - 0x1402C ARM64EC
# LOADCFG-NEXT: ]


# A similar test using a hybrid binary and native placeholder chunks.

# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj space-aarch64.obj loadconfig-arm64ec.obj -out:testx.dll \
# RUN: -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s
# RUN: llvm-objdump -d testx.dll | FileCheck --check-prefix=DISASM %s

# RUN: llvm-readobj --coff-load-config testx.dll | FileCheck --check-prefix=LOADCFGX %s

# LOADCFGX: CodeMap [
# LOADCFGX-NEXT: 0x3000 - 0x3014 ARM64EC
# LOADCFGX-NEXT: 0x4000 - 0x4300 ARM64
# LOADCFGX-NEXT: 0x5000 - 0x1402C ARM64EC
# LOADCFGX-NEXT: ]


# Test a hybrid ARM64X binary which requires range extension thunks for both native and EC relocations.

# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj funcs-aarch64.obj loadconfig-arm64ec.obj -out:testx2.dll \
# RUN: -verbose 2>&1 | FileCheck -check-prefix=VERBOSEX %s
# VERBOSEX: Added 5 thunks with margin {{.*}} in 1 passes

# RUN: llvm-objdump -d testx2.dll | FileCheck --check-prefix=DISASMX %s

# DISASMX: Disassembly of section .code1:
# DISASMX-EMPTY:
# DISASMX-NEXT: 0000000180003000 <.code1>:
# DISASMX-NEXT: 180003000: 36000040 tbz w0, #0x0, 0x180003008 <.code1+0x8>
# DISASMX-NEXT: 180003004: d65f03c0 ret
# DISASMX-NEXT: 180003008: b0000050 adrp x16, 0x18000c000
# DISASMX-NEXT: 18000300c: 91000210 add x16, x16, #0x0
# DISASMX-NEXT: 180003010: d61f0200 br x16
# DISASMX-EMPTY:
# DISASMX-NEXT: Disassembly of section .code2:
# DISASMX-EMPTY:
# DISASMX-NEXT: 0000000180004000 <.code2>:
# DISASMX-NEXT: 180004000: 36000040 tbz w0, #0x0, 0x180004008 <.code2+0x8>
# DISASMX-NEXT: 180004004: d65f03c0 ret
# DISASMX-NEXT: 180004008: b0000090 adrp x16, 0x180015000
# DISASMX-NEXT: 18000400c: 91000210 add x16, x16, #0x0
# DISASMX-NEXT: 180004010: d61f0200 br x16
# DISASMX-EMPTY:
# DISASMX-NEXT: Disassembly of section .code3:
# DISASMX-EMPTY:
# DISASMX-NEXT: 0000000180005000 <.code3>:
# DISASMX-NEXT: ...
# DISASMX-NEXT: 18000c000: 36000060 tbz w0, #0x0, 0x18000c00c <.code3+0x700c>
# DISASMX-NEXT: 18000c004: d65f03c0 ret
# DISASMX-NEXT: 18000c008: 00000000 udf #0x0
# DISASMX-NEXT: 18000c00c: 90000050 adrp x16, 0x180014000 <.code3+0xf000>
# DISASMX-NEXT: 18000c010: 91006210 add x16, x16, #0x18
# DISASMX-NEXT: 18000c014: d61f0200 br x16
# DISASMX-NEXT: ...
# DISASMX-NEXT: 180014018: 36000040 tbz w0, #0x0, 0x180014020 <.code3+0xf020>
# DISASMX-NEXT: 18001401c: d65f03c0 ret
# DISASMX-NEXT: 180014020: f0ffff70 adrp x16, 0x180003000 <.code1>
# DISASMX-NEXT: 180014024: 91000210 add x16, x16, #0x0
# DISASMX-NEXT: 180014028: d61f0200 br x16
# DISASMX-EMPTY:
# DISASMX-NEXT: Disassembly of section .code4:
# DISASMX-EMPTY:
# DISASMX-NEXT: 0000000180015000 <.code4>:
# DISASMX-NEXT: 180015000: 36000040 tbz w0, #0x0, 0x180015008 <.code4+0x8>
# DISASMX-NEXT: 180015004: d65f03c0 ret
# DISASMX-NEXT: 180015008: f0ffff70 adrp x16, 0x180004000 <.code2>
# DISASMX-NEXT: 18001500c: 91000210 add x16, x16, #0x0
# DISASMX-NEXT: 180015010: d61f0200 br x16

# RUN: llvm-readobj --coff-load-config testx2.dll | FileCheck --check-prefix=LOADCFGX2 %s

# LOADCFGX2: CodeMap [
# LOADCFGX2-NEXT: 0x3000 - 0x3014 ARM64EC
# LOADCFGX2-NEXT: 0x4000 - 0x4014 ARM64
# LOADCFGX2-NEXT: 0x5000 - 0x1402C ARM64EC
# LOADCFGX2-NEXT: 0x15000 - 0x15014 ARM64
# LOADCFGX2-NEXT: ]


#--- funcs.s
.globl main
.globl func1
.globl func2

.section .code1, "xr"
main:
tbz w0, #0, func1
ret

.section .code3$a, "xr"
.space 0x7000

.section .code3$b, "xr"
func1:
tbz w0, #0, func2
ret
.space 1

.section .code3$c, "xr"
.space 0x8000

.section .code3$d, "xr"
.align 2
func2:
tbz w0, #0, main
ret

#--- space.s
.section .code2$a, "xr"
.space 0x100
.section .code2$b, "xr"
.space 0x100
.section .code2$c, "xr"
.space 0x100

#--- native-funcs.s
.globl nmain
.globl nfunc

.section .code2, "xr"
nmain:
tbz w0, #0, nfunc
ret

.section .code4, "xr"
.align 2
nfunc:
tbz w0, #0, nmain
ret
4 changes: 0 additions & 4 deletions lldb/test/API/tools/lldb-dap/console/TestDAP_console.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def check_lldb_command(
),
)

@skipIfWindows
def test_scopes_variables_setVariable_evaluate(self):
"""
Tests that the "scopes" request causes the currently selected
Expand Down Expand Up @@ -80,7 +79,6 @@ def test_scopes_variables_setVariable_evaluate(self):

self.check_lldb_command("frame select", "frame #1", "frame 1 is selected")

@skipIfWindows
def test_custom_escape_prefix(self):
program = self.getBuildArtifact("a.out")
self.build_and_launch(program, commandEscapePrefix="::")
Expand All @@ -96,7 +94,6 @@ def test_custom_escape_prefix(self):
command_escape_prefix="::",
)

@skipIfWindows
def test_empty_escape_prefix(self):
program = self.getBuildArtifact("a.out")
self.build_and_launch(program, commandEscapePrefix="")
Expand Down Expand Up @@ -151,7 +148,6 @@ def test_exit_status_message_sigterm(self):
"Exit status does not contain message 'exited with status'",
)

@skipIfWindows
def test_exit_status_message_ok(self):
program = self.getBuildArtifact("a.out")
self.build_and_launch(program, commandEscapePrefix="")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@


class TestDAP_redirection_to_console(lldbdap_testcase.DAPTestCaseBase):
@skipIfWindows
def test(self):
"""
Without proper stderr and stdout redirection, the following code would throw an
Expand Down
2 changes: 0 additions & 2 deletions lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@


class TestDAP_coreFile(lldbdap_testcase.DAPTestCaseBase):
@skipIfWindows
@skipIfLLVMTargetMissing("X86")
def test_core_file(self):
current_dir = os.path.dirname(__file__)
Expand Down Expand Up @@ -58,7 +57,6 @@ def test_core_file(self):
self.dap_server.request_next(threadId=32259)
self.assertEqual(self.get_stackFrames(), expected_frames)

@skipIfWindows
@skipIfLLVMTargetMissing("X86")
def test_core_file_source_mapping(self):
"""Test that sourceMap property is correctly applied when loading a core"""
Expand Down
30 changes: 10 additions & 20 deletions lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
import lldbdap_testcase
import time
import os
import re


class TestDAP_launch(lldbdap_testcase.DAPTestCaseBase):
@skipIfWindows
def test_default(self):
"""
Tests the default launch of a simple program. No arguments,
Expand All @@ -27,7 +27,6 @@ def test_default(self):
lines = output.splitlines()
self.assertIn(program, lines[0], "make sure program path is in first argument")

@skipIfWindows
def test_termination(self):
"""
Tests the correct termination of lldb-dap upon a 'disconnect'
Expand All @@ -47,7 +46,6 @@ def test_termination(self):
# Check the return code
self.assertEqual(self.dap_server.process.poll(), 0)

@skipIfWindows
def test_stopOnEntry(self):
"""
Tests the default launch of a simple program that stops at the
Expand All @@ -66,7 +64,6 @@ def test_stopOnEntry(self):
reason, "breakpoint", 'verify stop isn\'t "main" breakpoint'
)

@skipIfWindows
def test_cwd(self):
"""
Tests the default launch of a simple program with a current working
Expand All @@ -92,15 +89,17 @@ def test_cwd(self):
)
self.assertTrue(found, "verified program working directory")

@skipIfWindows
def test_debuggerRoot(self):
"""
Tests the "debuggerRoot" will change the working directory of
the lldb-dap debug adaptor.
"""
program = self.getBuildArtifact("a.out")
program_parent_dir = os.path.realpath(os.path.dirname(os.path.dirname(program)))
commands = ["platform shell echo cwd = $PWD"]

var = "%cd%" if lldbplatformutil.getHostPlatform() == "windows" else "$PWD"
commands = [f"platform shell echo cwd = {var}"]

self.build_and_launch(
program, debuggerRoot=program_parent_dir, initCommands=commands
)
Expand All @@ -114,14 +113,13 @@ def test_debuggerRoot(self):
found = True
self.assertEqual(
program_parent_dir,
line[len(prefix) :],
line.strip()[len(prefix) :],
"lldb-dap working dir '%s' == '%s'"
% (program_parent_dir, line[6:]),
% (program_parent_dir, line[len(prefix) :]),
)
self.assertTrue(found, "verified lldb-dap working directory")
self.continue_to_exit()

@skipIfWindows
def test_sourcePath(self):
"""
Tests the "sourcePath" will set the target.source-map.
Expand All @@ -146,7 +144,6 @@ def test_sourcePath(self):
self.assertTrue(found, 'found "sourcePath" in console output')
self.continue_to_exit()

@skipIfWindows
def test_disableSTDIO(self):
"""
Tests the default launch of a simple program with STDIO disabled.
Expand Down Expand Up @@ -182,7 +179,6 @@ def test_shellExpandArguments_enabled(self):
quote_path, line, 'verify "%s" expanded to "%s"' % (glob, program)
)

@skipIfWindows
def test_shellExpandArguments_disabled(self):
"""
Tests the default launch of a simple program with shell expansion
Expand All @@ -204,7 +200,6 @@ def test_shellExpandArguments_disabled(self):
quote_path, line, 'verify "%s" stayed to "%s"' % (glob, glob)
)

@skipIfWindows
def test_args(self):
"""
Tests launch of a simple program with arguments
Expand All @@ -229,7 +224,6 @@ def test_args(self):
'arg[%i] "%s" not in "%s"' % (i + 1, quoted_arg, lines[i]),
)

@skipIfWindows
def test_environment(self):
"""
Tests launch of a simple program with environment variables
Expand Down Expand Up @@ -258,7 +252,6 @@ def test_environment(self):
found, '"%s" must exist in program environment (%s)' % (var, lines)
)

@skipIfWindows
@skipIf(
archs=["arm", "aarch64"]
) # failed run https://lab.llvm.org/buildbot/#/builders/96/builds/6933
Expand Down Expand Up @@ -344,7 +337,6 @@ def test_commands(self):
self.verify_commands("exitCommands", output, exitCommands)
self.verify_commands("terminateCommands", output, terminateCommands)

@skipIfWindows
def test_extra_launch_commands(self):
"""
Tests the "launchCommands" with extra launching settings
Expand Down Expand Up @@ -409,7 +401,6 @@ def test_extra_launch_commands(self):
output = self.get_console(timeout=lldbdap_testcase.DAPTestCaseBase.timeoutval)
self.verify_commands("exitCommands", output, exitCommands)

@skipIfWindows
def test_failing_launch_commands(self):
"""
Tests "launchCommands" failures prevents a launch.
Expand All @@ -418,7 +409,8 @@ def test_failing_launch_commands(self):
program = self.getBuildArtifact("a.out")

# Run an invalid launch command, in this case a bad path.
launchCommands = ['!target create "/bad/path%s"' % (program)]
bad_path = os.path.join("bad", "path")
launchCommands = ['!target create "%s%s"' % (bad_path, program)]

initCommands = ["target list", "platform list"]
preRunCommands = ["image list a.out", "image dump sections a.out"]
Expand Down Expand Up @@ -447,9 +439,8 @@ def test_failing_launch_commands(self):
# Verify all "launchCommands" were founc in console output
# The launch should fail due to the invalid command.
self.verify_commands("launchCommands", output, launchCommands)
self.assertRegex(output, r"bad/path/.*does not exist")
self.assertRegex(output, re.escape(bad_path) + r".*does not exist")

@skipIfWindows
@skipIfNetBSD # Hangs on NetBSD as well
@skipIf(archs=["arm", "aarch64"], oslist=["linux"])
def test_terminate_commands(self):
Expand All @@ -476,7 +467,6 @@ def test_terminate_commands(self):
)
self.verify_commands("terminateCommands", output, terminateCommands)

@skipIfWindows
def test_version(self):
"""
Tests that "initialize" response contains the "version" string the same
Expand Down
4 changes: 4 additions & 0 deletions lldb/test/API/tools/lldb-dap/launch/main.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#include <stdio.h>
#include <stdlib.h>
#ifdef _WIN32
#include <direct.h>
#else
#include <unistd.h>
#endif

int main(int argc, char const *argv[], char const *envp[]) {
for (int i = 0; i < argc; ++i)
Expand Down
1 change: 1 addition & 0 deletions llvm/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ set(LLVM_LINK_COMPONENTS
add_benchmark(DummyYAML DummyYAML.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(xxhash xxhash.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(GetIntrinsicForClangBuiltin GetIntrinsicForClangBuiltin.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(FormatVariadicBM FormatVariadicBM.cpp PARTIAL_SOURCES_INTENDED)
63 changes: 63 additions & 0 deletions llvm/benchmarks/FormatVariadicBM.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
//===- FormatVariadicBM.cpp - formatv() benchmark ---------- --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "benchmark/benchmark.h"
#include "llvm/Support/FormatVariadic.h"
#include <algorithm>
#include <string>
#include <vector>

using namespace llvm;
using namespace std;

// Generate a list of format strings that have `NumReplacements` replacements
// by permuting the replacements and some literal text.
static vector<string> getFormatStrings(int NumReplacements) {
vector<string> Components;
for (int I = 0; I < NumReplacements; I++)
Components.push_back("{" + to_string(I) + "}");
// Intersperse these with some other literal text (_).
const string_view Literal = "____";
for (char C : Literal)
Components.push_back(string(1, C));

vector<string> Formats;
do {
string Concat;
for (const string &C : Components)
Concat += C;
Formats.emplace_back(Concat);
} while (next_permutation(Components.begin(), Components.end()));
return Formats;
}

// Generate the set of formats to exercise outside the benchmark code.
static const vector<vector<string>> Formats = {
getFormatStrings(1), getFormatStrings(2), getFormatStrings(3),
getFormatStrings(4), getFormatStrings(5),
};

// Benchmark formatv() for a variety of format strings and 1-5 replacements.
static void BM_FormatVariadic(benchmark::State &state) {
for (auto _ : state) {
for (const string &Fmt : Formats[0])
formatv(Fmt.c_str(), 1).str();
for (const string &Fmt : Formats[1])
formatv(Fmt.c_str(), 1, 2).str();
for (const string &Fmt : Formats[2])
formatv(Fmt.c_str(), 1, 2, 3).str();
for (const string &Fmt : Formats[3])
formatv(Fmt.c_str(), 1, 2, 3, 4).str();
for (const string &Fmt : Formats[4])
formatv(Fmt.c_str(), 1, 2, 3, 4, 5).str();
}
}

BENCHMARK(BM_FormatVariadic);

BENCHMARK_MAIN();
2 changes: 1 addition & 1 deletion llvm/docs/CodeOfConduct.rst
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ Unported License`_.

.. _Django Project: https://www.djangoproject.com/conduct/
.. _Speak Up! project: http://speakup.io/coc.html
.. _sexual and gender-based violence:
.. _sexual and gender-based violence: https://hr.un.org/sites/hr.un.org/files/SEA%20Glossary%20%20%5BSecond%20Edition%20-%202017%5D%20-%20English_0.pdf
.. _hate crimes: https://hatecrime.osce.org
.. _hate speech: https://www.un.org/en/genocideprevention/documents/UN%20Strategy%20and%20Plan%20of%20Action%20on%20Hate%20Speech%2018%20June%20SYNOPSIS.pdf
.. _Creative Commons Attribution 3.0 Unported License: http://creativecommons.org/licenses/by/3.0/
7 changes: 3 additions & 4 deletions llvm/docs/TestSuiteGuide.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@ Quickstart
environment:

```bash
% mkdir venv
% virtualenv venv
% . venv/bin/activate
% pip install svn+https://llvm.org/svn/llvm-project/llvm/trunk/utils/lit
% python3 -m venv .venv
% . .venv/bin/activate
% pip install git+https://github.com/llvm/llvm-project.git#subdirectory=llvm/utils/lit
% lit --version
lit 0.8.0dev
```
Expand Down
39 changes: 22 additions & 17 deletions llvm/include/llvm/Support/FormatVariadic.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,23 +67,20 @@ class formatv_object_base {
protected:
StringRef Fmt;
ArrayRef<support::detail::format_adapter *> Adapters;

static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
size_t &Align, char &Pad);

static std::pair<ReplacementItem, StringRef>
splitLiteralAndReplacement(StringRef Fmt);
bool Validate;

formatv_object_base(StringRef Fmt,
ArrayRef<support::detail::format_adapter *> Adapters)
: Fmt(Fmt), Adapters(Adapters) {}
ArrayRef<support::detail::format_adapter *> Adapters,
bool Validate)
: Fmt(Fmt), Adapters(Adapters), Validate(Validate) {}

formatv_object_base(formatv_object_base const &rhs) = delete;
formatv_object_base(formatv_object_base &&rhs) = default;

public:
void format(raw_ostream &S) const {
for (auto &R : parseFormatString(Fmt)) {
const auto Replacements = parseFormatString(Fmt, Adapters.size(), Validate);
for (const auto &R : Replacements) {
if (R.Type == ReplacementType::Empty)
continue;
if (R.Type == ReplacementType::Literal) {
Expand All @@ -101,9 +98,10 @@ class formatv_object_base {
Align.format(S, R.Options);
}
}
static SmallVector<ReplacementItem, 2> parseFormatString(StringRef Fmt);

static std::optional<ReplacementItem> parseReplacementItem(StringRef Spec);
// Parse and optionally validate format string (in debug builds).
static SmallVector<ReplacementItem, 2>
parseFormatString(StringRef Fmt, size_t NumArgs, bool Validate);

std::string str() const {
std::string Result;
Expand Down Expand Up @@ -149,8 +147,8 @@ template <typename Tuple> class formatv_object : public formatv_object_base {
};

public:
formatv_object(StringRef Fmt, Tuple &&Params)
: formatv_object_base(Fmt, ParameterPointers),
formatv_object(StringRef Fmt, Tuple &&Params, bool Validate)
: formatv_object_base(Fmt, ParameterPointers, Validate),
Parameters(std::move(Params)) {
ParameterPointers = std::apply(create_adapters(), Parameters);
}
Expand Down Expand Up @@ -247,15 +245,22 @@ template <typename Tuple> class formatv_object : public formatv_object_base {
// assertion. Otherwise, it will try to do something reasonable, but in general
// the details of what that is are undefined.
//

// formatv() with validation enable/disable controlled by the first argument.
template <typename... Ts>
inline auto formatv(const char *Fmt, Ts &&...Vals)
inline auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
-> formatv_object<decltype(std::make_tuple(
support::detail::build_format_adapter(std::forward<Ts>(Vals))...))> {
using ParamTuple = decltype(std::make_tuple(
support::detail::build_format_adapter(std::forward<Ts>(Vals))...));
return formatv_object<ParamTuple>(
Fmt, std::make_tuple(support::detail::build_format_adapter(
std::forward<Ts>(Vals))...));
auto Params = std::make_tuple(
support::detail::build_format_adapter(std::forward<Ts>(Vals))...);
return formatv_object<ParamTuple>(Fmt, std::move(Params), Validate);
}

// formatv() with validation enabled.
template <typename... Ts> inline auto formatv(const char *Fmt, Ts &&...Vals) {
return formatv<Ts...>(true, Fmt, std::forward<Ts>(Vals)...);
}

} // end namespace llvm
Expand Down
21 changes: 13 additions & 8 deletions llvm/include/llvm/Transforms/IPO/Attributor.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,11 +294,16 @@ struct RangeTy {
return *this;
}

/// Comparison for sorting ranges by offset.
/// Comparison for sorting ranges.
///
/// Returns true if the offset \p L is less than that of \p R.
inline static bool OffsetLessThan(const RangeTy &L, const RangeTy &R) {
return L.Offset < R.Offset;
/// Returns true if the offset of \p L is less than that of \p R. If the two
/// offsets are same, compare the sizes instead.
inline static bool LessThan(const RangeTy &L, const RangeTy &R) {
if (L.Offset < R.Offset)
return true;
if (L.Offset == R.Offset)
return L.Size < R.Size;
return false;
}

/// Constants used to represent special offsets or sizes.
Expand Down Expand Up @@ -5809,7 +5814,7 @@ struct AAPointerInfo : public AbstractAttribute {
// Helpers required for std::set_difference
using value_type = RangeTy;
void push_back(const RangeTy &R) {
assert((Ranges.empty() || RangeTy::OffsetLessThan(Ranges.back(), R)) &&
assert((Ranges.empty() || RangeTy::LessThan(Ranges.back(), R)) &&
"Ensure the last element is the greatest.");
Ranges.push_back(R);
}
Expand All @@ -5818,7 +5823,7 @@ struct AAPointerInfo : public AbstractAttribute {
static void set_difference(const RangeList &L, const RangeList &R,
RangeList &D) {
std::set_difference(L.begin(), L.end(), R.begin(), R.end(),
std::back_inserter(D), RangeTy::OffsetLessThan);
std::back_inserter(D), RangeTy::LessThan);
}

unsigned size() const { return Ranges.size(); }
Expand Down Expand Up @@ -5856,7 +5861,7 @@ struct AAPointerInfo : public AbstractAttribute {

/// Insert \p R at the given iterator \p Pos, and merge if necessary.
///
/// This assumes that all ranges before \p Pos are OffsetLessThan \p R, and
/// This assumes that all ranges before \p Pos are LessThan \p R, and
/// then maintains the sorted order for the suffix list.
///
/// \return The place of insertion and true iff anything changed.
Expand All @@ -5868,7 +5873,7 @@ struct AAPointerInfo : public AbstractAttribute {
}

// Maintain this as a sorted vector of unique entries.
auto LB = std::lower_bound(Pos, Ranges.end(), R, RangeTy::OffsetLessThan);
auto LB = std::lower_bound(Pos, Ranges.end(), R, RangeTy::LessThan);
if (LB == Ranges.end() || LB->Offset != R.Offset)
return std::make_pair(Ranges.insert(LB, R), true);
bool Changed = *LB != R;
Expand Down
6 changes: 2 additions & 4 deletions llvm/lib/Analysis/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,6 @@ add_llvm_component_library(LLVMAnalysis
TargetParser
)

include(CheckCXXSymbolExists)
check_cxx_symbol_exists(logf128 math.h HAS_LOGF128)
if(HAS_LOGF128)
target_compile_definitions(LLVMAnalysis PRIVATE HAS_LOGF128)
if(LLVM_HAS_LOGF128)
target_compile_definitions(LLVMAnalysis PRIVATE HAS_LOGF128)
endif()
15 changes: 7 additions & 8 deletions llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2061,10 +2061,8 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
unsigned LastAsmLine =
Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();

bool PrevInstInSameSection =
(!PrevInstBB ||
PrevInstBB->getSectionID() == MI->getParent()->getSectionID());
if (DL == PrevInstLoc && PrevInstInSameSection) {
bool PrevInstInDiffBB = PrevInstBB && PrevInstBB != MI->getParent();
if (DL == PrevInstLoc && !PrevInstInDiffBB) {
// If we have an ongoing unspecified location, nothing to do here.
if (!DL)
return;
Expand Down Expand Up @@ -2093,8 +2091,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
// possibly debug information; we want it to have a source location.
// - Instruction is at the top of a block; we don't want to inherit the
// location from the physically previous (maybe unrelated) block.
if (UnknownLocations == Enable || PrevLabel ||
(PrevInstBB && PrevInstBB != MI->getParent())) {
if (UnknownLocations == Enable || PrevLabel || PrevInstInDiffBB) {
// Preserve the file and column numbers, if we can, to save space in
// the encoded line table.
// Do not update PrevInstLoc, it remembers the last non-0 line.
Expand All @@ -2119,9 +2116,11 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
PrologEndLoc = DebugLoc();
}
// If the line changed, we call that a new statement; unless we went to
// line 0 and came back, in which case it is not a new statement.
// line 0 and came back, in which case it is not a new statement. We also
// mark is_stmt for the first non-0 line in each BB, in case a predecessor BB
// ends with a different line.
unsigned OldLine = PrevInstLoc ? PrevInstLoc.getLine() : LastAsmLine;
if (DL.getLine() && DL.getLine() != OldLine)
if (DL.getLine() && (DL.getLine() != OldLine || PrevInstInDiffBB))
Flags |= DWARF2_FLAG_IS_STMT;

const MDNode *Scope = DL.getScope();
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7031,7 +7031,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// If ISD::IS_FPCLASS should be expanded, do it right now, because the
// expansion can use illegal types. Making expansion early allows
// legalizing these types prior to selection.
if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) {
if (!TLI.isOperationLegal(ISD::IS_FPCLASS, ArgVT) &&
!TLI.isOperationCustom(ISD::IS_FPCLASS, ArgVT)) {
SDValue Result = TLI.expandIS_FPCLASS(DestVT, Op, Test, Flags, sdl, DAG);
setValue(&I, Result);
return;
Expand Down
12 changes: 10 additions & 2 deletions llvm/lib/IR/BasicBlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -975,8 +975,16 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
if (ReadFromTail && Src->getMarker(Last)) {
DbgMarker *FromLast = Src->getMarker(Last);
if (LastIsEnd) {
Dest->adoptDbgRecords(Src, Last, true);
// adoptDbgRecords will release any trailers.
if (Dest == end()) {
// Abosrb the trailing markers from Src.
assert(FromLast == Src->getTrailingDbgRecords());
createMarker(Dest)->absorbDebugValues(*FromLast, true);
FromLast->eraseFromParent();
Src->deleteTrailingDbgRecords();
} else {
// adoptDbgRecords will release any trailers.
Dest->adoptDbgRecords(Src, Last, true);
}
assert(!Src->getTrailingDbgRecords());
} else {
// FIXME: can we use adoptDbgRecords here to reduce allocations?
Expand Down
85 changes: 72 additions & 13 deletions llvm/lib/Support/FormatVariadic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ static std::optional<AlignStyle> translateLocChar(char C) {
LLVM_BUILTIN_UNREACHABLE;
}

bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
size_t &Align, char &Pad) {
static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
size_t &Align, char &Pad) {
Where = AlignStyle::Right;
Align = 0;
Pad = ' ';
Expand All @@ -35,8 +35,7 @@ bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where,

if (Spec.size() > 1) {
// A maximum of 2 characters at the beginning can be used for something
// other
// than the width.
// other than the width.
// If Spec[1] is a loc char, then Spec[0] is a pad char and Spec[2:...]
// contains the width.
// Otherwise, if Spec[0] is a loc char, then Spec[1:...] contains the width.
Expand All @@ -55,8 +54,7 @@ bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
return !Failed;
}

std::optional<ReplacementItem>
formatv_object_base::parseReplacementItem(StringRef Spec) {
static std::optional<ReplacementItem> parseReplacementItem(StringRef Spec) {
StringRef RepString = Spec.trim("{}");

// If the replacement sequence does not start with a non-negative integer,
Expand All @@ -82,15 +80,14 @@ formatv_object_base::parseReplacementItem(StringRef Spec) {
RepString = StringRef();
}
RepString = RepString.trim();
if (!RepString.empty()) {
assert(false && "Unexpected characters found in replacement string!");
}
assert(RepString.empty() &&
"Unexpected characters found in replacement string!");

return ReplacementItem{Spec, Index, Align, Where, Pad, Options};
}

std::pair<ReplacementItem, StringRef>
formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) {
static std::pair<ReplacementItem, StringRef>
splitLiteralAndReplacement(StringRef Fmt) {
while (!Fmt.empty()) {
// Everything up until the first brace is a literal.
if (Fmt.front() != '{') {
Expand Down Expand Up @@ -143,15 +140,77 @@ formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) {
return std::make_pair(ReplacementItem{Fmt}, StringRef());
}

#ifndef NDEBUG
#define ENABLE_VALIDATION 1
#else
#define ENABLE_VALIDATION 0 // Conveniently enable validation in release mode.
#endif

SmallVector<ReplacementItem, 2>
formatv_object_base::parseFormatString(StringRef Fmt) {
formatv_object_base::parseFormatString(StringRef Fmt, size_t NumArgs,
bool Validate) {
SmallVector<ReplacementItem, 2> Replacements;
ReplacementItem I;

#if ENABLE_VALIDATION
const StringRef SavedFmtStr = Fmt;
size_t NumExpectedArgs = 0;
#endif

while (!Fmt.empty()) {
ReplacementItem I;
std::tie(I, Fmt) = splitLiteralAndReplacement(Fmt);
if (I.Type != ReplacementType::Empty)
Replacements.push_back(I);
#if ENABLE_VALIDATION
if (I.Type == ReplacementType::Format)
NumExpectedArgs = std::max(NumExpectedArgs, I.Index + 1);
#endif
}

#if ENABLE_VALIDATION
if (!Validate)
return Replacements;

// Perform additional validation. Verify that the number of arguments matches
// the number of replacement indices and that there are no holes in the
// replacement indices.

// When validation fails, return an array of replacement items that
// will print an error message as the outout of this formatv() (used when
// validation is enabled in release mode).
auto getErrorReplacements = [SavedFmtStr](StringLiteral ErrorMsg) {
return SmallVector<ReplacementItem, 2>{
ReplacementItem("Invalid formatv() call: "), ReplacementItem(ErrorMsg),
ReplacementItem(" for format string: "), ReplacementItem(SavedFmtStr)};
};

if (NumExpectedArgs != NumArgs) {
errs() << formatv(
"Expected {0} Args, but got {1} for format string '{2}'\n",
NumExpectedArgs, NumArgs, SavedFmtStr);
assert(0 && "Invalid formatv() call");
return getErrorReplacements("Unexpected number of arguments");
}

// Find the number of unique indices seen. All replacement indices
// are < NumExpectedArgs.
SmallVector<bool> Indices(NumExpectedArgs);
size_t Count = 0;
for (const ReplacementItem &I : Replacements) {
if (I.Type != ReplacementType::Format || Indices[I.Index])
continue;
Indices[I.Index] = true;
++Count;
}

if (Count != NumExpectedArgs) {
errs() << formatv(
"Replacement field indices cannot have holes for format string '{0}'\n",
SavedFmtStr);
assert(0 && "Invalid format string");
return getErrorReplacements("Replacement indices have holes");
}
#endif // ENABLE_VALIDATION
return Replacements;
}

Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Support/ModRef.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//

#include "llvm/Support/ModRef.h"
#include "llvm/ADT/STLExtras.h"

using namespace llvm;

Expand All @@ -33,7 +34,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, ModRefInfo MR) {
}

raw_ostream &llvm::operator<<(raw_ostream &OS, MemoryEffects ME) {
for (IRMemLocation Loc : MemoryEffects::locations()) {
interleaveComma(MemoryEffects::locations(), OS, [&](IRMemLocation Loc) {
switch (Loc) {
case IRMemLocation::ArgMem:
OS << "ArgMem: ";
Expand All @@ -45,7 +46,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, MemoryEffects ME) {
OS << "Other: ";
break;
}
OS << ME.getModRef(Loc) << ", ";
}
OS << ME.getModRef(Loc);
});
return OS;
}
1 change: 1 addition & 0 deletions llvm/lib/Support/Z3Solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ using namespace llvm;
#include "llvm/ADT/Twine.h"

#include <set>
#include <unordered_map>

#include <z3.h>

Expand Down
59 changes: 42 additions & 17 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27096,21 +27096,37 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
: AtomicExpansionKind::LLSC;
}

// Return true if the atomic operation expansion will lower to use a library
// call, and is thus ineligible to use an LLSC expansion.
static bool rmwOpMayLowerToLibcall(const AArch64Subtarget &Subtarget,
const AtomicRMWInst *RMW) {
if (!RMW->isFloatingPointOperation())
return false;
switch (RMW->getType()->getScalarType()->getTypeID()) {
case Type::FloatTyID:
case Type::DoubleTyID:
case Type::HalfTyID:
case Type::BFloatTyID:
// Will use soft float
return !Subtarget.hasFPARMv8();
default:
// fp128 will emit library calls.
return true;
}

llvm_unreachable("covered type switch");
}

// The "default" for integer RMW operations is to expand to an LL/SC loop.
// However, with the LSE instructions (or outline-atomics mode, which provides
// library routines in place of the LSE-instructions), we can directly emit many
// operations instead.
//
// Floating-point operations are always emitted to a cmpxchg loop, because they
// may trigger a trap which aborts an LLSC sequence.
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
Type *Ty = AI->getType();
unsigned Size = Ty->getPrimitiveSizeInBits();
assert(Size <= 128 && "AtomicExpandPass should've handled larger sizes.");

if (AI->isFloatingPointOperation())
return AtomicExpansionKind::CmpXChg;

bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
(AI->getOperation() == AtomicRMWInst::Xchg ||
AI->getOperation() == AtomicRMWInst::Or ||
Expand All @@ -27120,7 +27136,8 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {

// Nand is not supported in LSE.
// Leave 128 bits to LLSC or CmpXChg.
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128 &&
!AI->isFloatingPointOperation()) {
if (Subtarget->hasLSE())
return AtomicExpansionKind::None;
if (Subtarget->outlineAtomics()) {
Expand All @@ -27146,7 +27163,7 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
// succeed. So at -O0 lower this operation to a CAS loop. Also worthwhile if
// we have a single CAS instruction that can replace the loop.
if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None ||
Subtarget->hasLSE())
Subtarget->hasLSE() || rmwOpMayLowerToLibcall(*Subtarget, AI))
return AtomicExpansionKind::CmpXChg;

return AtomicExpansionKind::LLSC;
Expand Down Expand Up @@ -27193,10 +27210,14 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,

Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
return Builder.CreateOr(
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");

auto *Int128Ty = Type::getInt128Ty(Builder.getContext());
Lo = Builder.CreateZExt(Lo, Int128Ty, "lo64");
Hi = Builder.CreateZExt(Hi, Int128Ty, "hi64");

Value *Or = Builder.CreateOr(
Lo, Builder.CreateShl(Hi, ConstantInt::get(Int128Ty, 64)), "val64");
return Builder.CreateBitCast(Or, ValueTy);
}

Type *Tys[] = { Addr->getType() };
Expand All @@ -27207,8 +27228,8 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
const DataLayout &DL = M->getDataLayout();
IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
CallInst *CI = Builder.CreateCall(Ldxr, Addr);
CI->addParamAttr(
0, Attribute::get(Builder.getContext(), Attribute::ElementType, ValueTy));
CI->addParamAttr(0, Attribute::get(Builder.getContext(),
Attribute::ElementType, IntEltTy));
Value *Trunc = Builder.CreateTrunc(CI, IntEltTy);

return Builder.CreateBitCast(Trunc, ValueTy);
Expand All @@ -27234,9 +27255,13 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
Function *Stxr = Intrinsic::getDeclaration(M, Int);
Type *Int64Ty = Type::getInt64Ty(M->getContext());
Type *Int128Ty = Type::getInt128Ty(M->getContext());

Value *CastVal = Builder.CreateBitCast(Val, Int128Ty);

Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
Value *Lo = Builder.CreateTrunc(CastVal, Int64Ty, "lo");
Value *Hi =
Builder.CreateTrunc(Builder.CreateLShr(CastVal, 64), Int64Ty, "hi");
return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
}

Expand Down
63 changes: 62 additions & 1 deletion llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1073,6 +1073,33 @@ static bool isAllActivePredicate(Value *Pred) {
m_ConstantInt<AArch64SVEPredPattern::all>()));
}

// Simplify unary operation where predicate has all inactive lanes by replacing
// instruction with its operand
static std::optional<Instruction *>
instCombineSVENoActiveReplace(InstCombiner &IC, IntrinsicInst &II,
bool hasInactiveVector) {
int PredOperand = hasInactiveVector ? 1 : 0;
int ReplaceOperand = hasInactiveVector ? 0 : 1;
if (match(II.getOperand(PredOperand), m_ZeroInt())) {
IC.replaceInstUsesWith(II, II.getOperand(ReplaceOperand));
return IC.eraseInstFromFunction(II);
}
return std::nullopt;
}

// Simplify unary operation where predicate has all inactive lanes or
// replace unused first operand with undef when all lanes are active
static std::optional<Instruction *>
instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) {
if (isAllActivePredicate(II.getOperand(1)) &&
!isa<llvm::UndefValue>(II.getOperand(0)) &&
!isa<llvm::PoisonValue>(II.getOperand(0))) {
Value *Undef = llvm::UndefValue::get(II.getType());
return IC.replaceOperand(II, 0, Undef);
}
return instCombineSVENoActiveReplace(IC, II, true);
}

// Erase unary operation where predicate has all inactive lanes
static std::optional<Instruction *>
instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II,
Expand Down Expand Up @@ -2109,7 +2136,41 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
switch (IID) {
default:
break;

case Intrinsic::aarch64_sve_fcvt_bf16f32:
case Intrinsic::aarch64_sve_fcvt_f16f32:
case Intrinsic::aarch64_sve_fcvt_f16f64:
case Intrinsic::aarch64_sve_fcvt_f32f16:
case Intrinsic::aarch64_sve_fcvt_f32f64:
case Intrinsic::aarch64_sve_fcvt_f64f16:
case Intrinsic::aarch64_sve_fcvt_f64f32:
case Intrinsic::aarch64_sve_fcvtlt_f32f16:
case Intrinsic::aarch64_sve_fcvtlt_f64f32:
case Intrinsic::aarch64_sve_fcvtnt_bf16f32:
case Intrinsic::aarch64_sve_fcvtnt_f16f32:
case Intrinsic::aarch64_sve_fcvtnt_f32f64:
case Intrinsic::aarch64_sve_fcvtx_f32f64:
case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
case Intrinsic::aarch64_sve_fcvtzs:
case Intrinsic::aarch64_sve_fcvtzs_i32f16:
case Intrinsic::aarch64_sve_fcvtzs_i32f64:
case Intrinsic::aarch64_sve_fcvtzs_i64f16:
case Intrinsic::aarch64_sve_fcvtzs_i64f32:
case Intrinsic::aarch64_sve_fcvtzu:
case Intrinsic::aarch64_sve_fcvtzu_i32f16:
case Intrinsic::aarch64_sve_fcvtzu_i32f64:
case Intrinsic::aarch64_sve_fcvtzu_i64f16:
case Intrinsic::aarch64_sve_fcvtzu_i64f32:
case Intrinsic::aarch64_sve_scvtf:
case Intrinsic::aarch64_sve_scvtf_f16i32:
case Intrinsic::aarch64_sve_scvtf_f16i64:
case Intrinsic::aarch64_sve_scvtf_f32i64:
case Intrinsic::aarch64_sve_scvtf_f64i32:
case Intrinsic::aarch64_sve_ucvtf:
case Intrinsic::aarch64_sve_ucvtf_f16i32:
case Intrinsic::aarch64_sve_ucvtf_f16i64:
case Intrinsic::aarch64_sve_ucvtf_f32i64:
case Intrinsic::aarch64_sve_ucvtf_f64i32:
return instCombineSVEAllOrNoActiveUnary(IC, II);
case Intrinsic::aarch64_sve_st1_scatter:
case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
case Intrinsic::aarch64_sve_st1_scatter_sxtw:
Expand Down
10 changes: 5 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ void initializeAMDGPURegBankSelectPass(PassRegistry &);
// SI Passes
FunctionPass *createGCNDPPCombinePass();
FunctionPass *createSIAnnotateControlFlowLegacyPass();
FunctionPass *createSIFoldOperandsPass();
FunctionPass *createSIFoldOperandsLegacyPass();
FunctionPass *createSIPeepholeSDWAPass();
FunctionPass *createSILowerI1CopiesLegacyPass();
FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
Expand Down Expand Up @@ -157,11 +157,11 @@ struct AMDGPULowerBufferFatPointersPass
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
extern char &AMDGPURewriteOutArgumentsID;

void initializeGCNDPPCombinePass(PassRegistry &);
extern char &GCNDPPCombineID;
void initializeGCNDPPCombineLegacyPass(PassRegistry &);
extern char &GCNDPPCombineLegacyID;

void initializeSIFoldOperandsPass(PassRegistry &);
extern char &SIFoldOperandsID;
void initializeSIFoldOperandsLegacyPass(PassRegistry &);
extern char &SIFoldOperandsLegacyID;

void initializeSIPeepholeSDWAPass(PassRegistry &);
extern char &SIPeepholeSDWAID;
Expand Down
17 changes: 11 additions & 6 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -426,12 +426,17 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// FIXME: These IS_FPCLASS vector fp types are marked custom so it reaches
// scalarization code. Can be removed when IS_FPCLASS expand isn't called by
// default unless marked custom/legal.
setOperationAction(
ISD::IS_FPCLASS,
{MVT::v2f16, MVT::v3f16, MVT::v4f16, MVT::v16f16, MVT::v2f32, MVT::v3f32,
MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32,
MVT::v2f64, MVT::v3f64, MVT::v4f64, MVT::v8f64, MVT::v16f64},
Custom);
setOperationAction(ISD::IS_FPCLASS,
{MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32,
MVT::v2f64, MVT::v3f64, MVT::v4f64, MVT::v8f64,
MVT::v16f64},
Custom);

if (isTypeLegal(MVT::f16))
setOperationAction(ISD::IS_FPCLASS,
{MVT::v2f16, MVT::v3f16, MVT::v4f16, MVT::v16f16},
Custom);

// Expand to fneg + fadd.
setOperationAction(ISD::FSUB, MVT::f64, Expand);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,6 @@ FUNCTION_PASS_WITH_PARAMS(
MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
#undef MACHINE_FUNCTION_PASS
1,803 changes: 1,273 additions & 530 deletions llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp

Large diffs are not rendered by default.

12 changes: 7 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUUnifyDivergentExitNodes.h"
#include "GCNDPPCombine.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "GCNVOPDUtils.h"
#include "R600.h"
#include "R600TargetMachine.h"
#include "SIFixSGPRCopies.h"
#include "SIFoldOperands.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
Expand Down Expand Up @@ -402,15 +404,15 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeR600VectorRegMergerPass(*PR);
initializeGlobalISel(*PR);
initializeAMDGPUDAGToDAGISelLegacyPass(*PR);
initializeGCNDPPCombinePass(*PR);
initializeGCNDPPCombineLegacyPass(*PR);
initializeSILowerI1CopiesLegacyPass(*PR);
initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
initializeSILowerWWMCopiesPass(*PR);
initializeAMDGPUMarkLastScratchLoadPass(*PR);
initializeSILowerSGPRSpillsPass(*PR);
initializeSIFixSGPRCopiesLegacyPass(*PR);
initializeSIFixVGPRCopiesPass(*PR);
initializeSIFoldOperandsPass(*PR);
initializeSIFoldOperandsLegacyPass(*PR);
initializeSIPeepholeSDWAPass(*PR);
initializeSIShrinkInstructionsPass(*PR);
initializeSIOptimizeExecMaskingPreRAPass(*PR);
Expand Down Expand Up @@ -1270,15 +1272,15 @@ void GCNPassConfig::addMachineSSAOptimization() {
// instructions leftover after the operands are folded as well.
//
// XXX - Can we get away without running DeadMachineInstructionElim again?
addPass(&SIFoldOperandsID);
addPass(&SIFoldOperandsLegacyID);
if (EnableDPPCombine)
addPass(&GCNDPPCombineID);
addPass(&GCNDPPCombineLegacyID);
addPass(&SILoadStoreOptimizerID);
if (isPassEnabled(EnableSDWAPeephole)) {
addPass(&SIPeepholeSDWAID);
addPass(&EarlyMachineLICMID);
addPass(&MachineCSEID);
addPass(&SIFoldOperandsID);
addPass(&SIFoldOperandsLegacyID);
}
addPass(&DeadMachineInstructionElimID);
addPass(createSIShrinkInstructionsPass());
Expand Down
55 changes: 41 additions & 14 deletions llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
// The mov_dpp instruction should reside in the same BB as all its uses
//===----------------------------------------------------------------------===//

#include "GCNDPPCombine.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Expand All @@ -51,7 +52,7 @@ STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");

namespace {

class GCNDPPCombine : public MachineFunctionPass {
class GCNDPPCombine {
MachineRegisterInfo *MRI;
const SIInstrInfo *TII;
const GCNSubtarget *ST;
Expand All @@ -76,12 +77,18 @@ class GCNDPPCombine : public MachineFunctionPass {

bool combineDPPMov(MachineInstr &MI) const;

int getDPPOp(unsigned Op, bool IsShrinkable) const;
bool isShrinkable(MachineInstr &MI) const;

public:
bool run(MachineFunction &MF);
};

class GCNDPPCombineLegacy : public MachineFunctionPass {
public:
static char ID;

GCNDPPCombine() : MachineFunctionPass(ID) {
initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
}
GCNDPPCombineLegacy() : MachineFunctionPass(ID) {}

bool runOnMachineFunction(MachineFunction &MF) override;

Expand All @@ -96,22 +103,19 @@ class GCNDPPCombine : public MachineFunctionPass {
return MachineFunctionProperties()
.set(MachineFunctionProperties::Property::IsSSA);
}

private:
int getDPPOp(unsigned Op, bool IsShrinkable) const;
bool isShrinkable(MachineInstr &MI) const;
};

} // end anonymous namespace

INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
INITIALIZE_PASS(GCNDPPCombineLegacy, DEBUG_TYPE, "GCN DPP Combine", false,
false)

char GCNDPPCombine::ID = 0;
char GCNDPPCombineLegacy::ID = 0;

char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
char &llvm::GCNDPPCombineLegacyID = GCNDPPCombineLegacy::ID;

FunctionPass *llvm::createGCNDPPCombinePass() {
return new GCNDPPCombine();
return new GCNDPPCombineLegacy();
}

bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const {
Expand Down Expand Up @@ -749,9 +753,16 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
return !Rollback;
}

bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
bool GCNDPPCombineLegacy::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;

return GCNDPPCombine().run(MF);
}

bool GCNDPPCombine::run(MachineFunction &MF) {
ST = &MF.getSubtarget<GCNSubtarget>();
if (!ST->hasDPP() || skipFunction(MF.getFunction()))
if (!ST->hasDPP())
return false;

MRI = &MF.getRegInfo();
Expand Down Expand Up @@ -781,3 +792,19 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
}
return Changed;
}

PreservedAnalyses GCNDPPCombinePass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &) {
if (MF.getFunction().hasOptNone())
return PreservedAnalyses::all();

MFPropsModifier _(*this, MF);

bool Changed = GCNDPPCombine().run(MF);
if (!Changed)
return PreservedAnalyses::all();

auto PA = getMachineFunctionPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();
return PA;
}
28 changes: 28 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNDPPCombine.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//=======--- GCNDPPCombine.h - optimization for DPP instructions ---==========//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
#define LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H

#include "llvm/CodeGen/MachinePassManager.h"

namespace llvm {
class GCNDPPCombinePass : public PassInfoMixin<GCNDPPCombinePass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MAM);

MachineFunctionProperties getRequiredProperties() {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::IsSSA);
}
};

} // end namespace llvm

#endif // LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
116 changes: 67 additions & 49 deletions llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
//

#include "SIFoldOperands.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Expand Down Expand Up @@ -66,9 +67,8 @@ struct FoldCandidate {
bool needsShrink() const { return ShrinkOpcode != -1; }
};

class SIFoldOperands : public MachineFunctionPass {
class SIFoldOperandsImpl {
public:
static char ID;
MachineRegisterInfo *MRI;
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
Expand Down Expand Up @@ -121,11 +121,22 @@ class SIFoldOperands : public MachineFunctionPass {
bool tryOptimizeAGPRPhis(MachineBasicBlock &MBB);

public:
SIFoldOperands() : MachineFunctionPass(ID) {
initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
}
SIFoldOperandsImpl() = default;

bool run(MachineFunction &MF);
};

bool runOnMachineFunction(MachineFunction &MF) override;
class SIFoldOperandsLegacy : public MachineFunctionPass {
public:
static char ID;

SIFoldOperandsLegacy() : MachineFunctionPass(ID) {}

bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(MF.getFunction()))
return false;
return SIFoldOperandsImpl().run(MF);
}

StringRef getPassName() const override { return "SI Fold Operands"; }

Expand All @@ -137,12 +148,12 @@ class SIFoldOperands : public MachineFunctionPass {

} // End anonymous namespace.

INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
"SI Fold Operands", false, false)
INITIALIZE_PASS(SIFoldOperandsLegacy, DEBUG_TYPE, "SI Fold Operands", false,
false)

char SIFoldOperands::ID = 0;
char SIFoldOperandsLegacy::ID = 0;

char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
char &llvm::SIFoldOperandsLegacyID = SIFoldOperandsLegacy::ID;

static const TargetRegisterClass *getRegOpRC(const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
Expand Down Expand Up @@ -177,8 +188,8 @@ static unsigned macToMad(unsigned Opc) {

// TODO: Add heuristic that the frame index might not fit in the addressing mode
// immediate offset to avoid materializing in loops.
bool SIFoldOperands::frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
const MachineOperand &OpToFold) const {
bool SIFoldOperandsImpl::frameIndexMayFold(
const MachineInstr &UseMI, int OpNo, const MachineOperand &OpToFold) const {
if (!OpToFold.isFI())
return false;

Expand All @@ -196,11 +207,11 @@ bool SIFoldOperands::frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
return OpNo == VIdx && SIdx == -1;
}

FunctionPass *llvm::createSIFoldOperandsPass() {
return new SIFoldOperands();
FunctionPass *llvm::createSIFoldOperandsLegacyPass() {
return new SIFoldOperandsLegacy();
}

bool SIFoldOperands::canUseImmWithOpSel(FoldCandidate &Fold) const {
bool SIFoldOperandsImpl::canUseImmWithOpSel(FoldCandidate &Fold) const {
MachineInstr *MI = Fold.UseMI;
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
const uint64_t TSFlags = MI->getDesc().TSFlags;
Expand Down Expand Up @@ -230,7 +241,7 @@ bool SIFoldOperands::canUseImmWithOpSel(FoldCandidate &Fold) const {
return true;
}

bool SIFoldOperands::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
MachineInstr *MI = Fold.UseMI;
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
unsigned Opcode = MI->getOpcode();
Expand Down Expand Up @@ -354,7 +365,7 @@ bool SIFoldOperands::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
return false;
}

bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
MachineInstr *MI = Fold.UseMI;
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
assert(Old.isReg());
Expand Down Expand Up @@ -464,9 +475,9 @@ static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
FoldList.emplace_back(MI, OpNo, FoldOp, Commuted, ShrinkOp);
}

bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
MachineOperand *OpToFold) const {
bool SIFoldOperandsImpl::tryAddToFoldList(
SmallVectorImpl<FoldCandidate> &FoldList, MachineInstr *MI, unsigned OpNo,
MachineOperand *OpToFold) const {
const unsigned Opc = MI->getOpcode();

auto tryToFoldAsFMAAKorMK = [&]() {
Expand Down Expand Up @@ -645,16 +656,16 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
return true;
}

bool SIFoldOperands::isUseSafeToFold(const MachineInstr &MI,
const MachineOperand &UseMO) const {
bool SIFoldOperandsImpl::isUseSafeToFold(const MachineInstr &MI,
const MachineOperand &UseMO) const {
// Operands of SDWA instructions must be registers.
return !TII->isSDWA(MI);
}

// Find a def of the UseReg, check if it is a reg_sequence and find initializers
// for each subreg, tracking it to foldable inline immediate if possible.
// Returns true on success.
bool SIFoldOperands::getRegSeqInit(
bool SIFoldOperandsImpl::getRegSeqInit(
SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
Register UseReg, uint8_t OpTy) const {
MachineInstr *Def = MRI->getVRegDef(UseReg);
Expand Down Expand Up @@ -686,7 +697,7 @@ bool SIFoldOperands::getRegSeqInit(
return true;
}

bool SIFoldOperands::tryToFoldACImm(
bool SIFoldOperandsImpl::tryToFoldACImm(
const MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList) const {
const MCInstrDesc &Desc = UseMI->getDesc();
Expand Down Expand Up @@ -752,12 +763,10 @@ bool SIFoldOperands::tryToFoldACImm(
return true;
}

void SIFoldOperands::foldOperand(
MachineOperand &OpToFold,
MachineInstr *UseMI,
int UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList,
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
void SIFoldOperandsImpl::foldOperand(
MachineOperand &OpToFold, MachineInstr *UseMI, int UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList,
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
const MachineOperand *UseOp = &UseMI->getOperand(UseOpIdx);

if (!isUseSafeToFold(*UseMI, *UseOp))
Expand Down Expand Up @@ -1187,7 +1196,7 @@ static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
}

MachineOperand *
SIFoldOperands::getImmOrMaterializedImm(MachineOperand &Op) const {
SIFoldOperandsImpl::getImmOrMaterializedImm(MachineOperand &Op) const {
// If this has a subregister, it obviously is a register source.
if (!Op.isReg() || Op.getSubReg() != AMDGPU::NoSubRegister ||
!Op.getReg().isVirtual())
Expand All @@ -1206,7 +1215,7 @@ SIFoldOperands::getImmOrMaterializedImm(MachineOperand &Op) const {
// Try to simplify operations with a constant that may appear after instruction
// selection.
// TODO: See if a frame index with a fixed offset can fold.
bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const {
bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
if (!MI->allImplicitDefsAreDead())
return false;

Expand Down Expand Up @@ -1307,7 +1316,7 @@ bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const {
}

// Try to fold an instruction into a simpler one
bool SIFoldOperands::tryFoldCndMask(MachineInstr &MI) const {
bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
if (Opc != AMDGPU::V_CNDMASK_B32_e32 && Opc != AMDGPU::V_CNDMASK_B32_e64 &&
Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
Expand Down Expand Up @@ -1346,7 +1355,7 @@ bool SIFoldOperands::tryFoldCndMask(MachineInstr &MI) const {
return true;
}

bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const {
bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const {
if (MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
MI.getOpcode() != AMDGPU::V_AND_B32_e32)
return false;
Expand All @@ -1368,8 +1377,8 @@ bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const {
return true;
}

bool SIFoldOperands::foldInstOperand(MachineInstr &MI,
MachineOperand &OpToFold) const {
bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI,
MachineOperand &OpToFold) const {
// We need mutate the operands of new mov instructions to add implicit
// uses of EXEC, but adding them invalidates the use_iterator, so defer
// this.
Expand Down Expand Up @@ -1442,7 +1451,7 @@ bool SIFoldOperands::foldInstOperand(MachineInstr &MI,
return true;
}

bool SIFoldOperands::tryFoldFoldableCopy(
bool SIFoldOperandsImpl::tryFoldFoldableCopy(
MachineInstr &MI, MachineOperand *&CurrentKnownM0Val) const {
// Specially track simple redefs of m0 to the same value in a block, so we
// can erase the later ones.
Expand Down Expand Up @@ -1519,7 +1528,8 @@ bool SIFoldOperands::tryFoldFoldableCopy(

// Clamp patterns are canonically selected to v_max_* instructions, so only
// handle them.
const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
const MachineOperand *
SIFoldOperandsImpl::isClamp(const MachineInstr &MI) const {
unsigned Op = MI.getOpcode();
switch (Op) {
case AMDGPU::V_MAX_F32_e64:
Expand Down Expand Up @@ -1567,7 +1577,7 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
}

// FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
const MachineOperand *ClampSrc = isClamp(MI);
if (!ClampSrc || !MRI->hasOneNonDBGUser(ClampSrc->getReg()))
return false;
Expand Down Expand Up @@ -1662,7 +1672,7 @@ static int getOModValue(unsigned Opc, int64_t Val) {
// FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
// handled, so will anything other than that break?
std::pair<const MachineOperand *, int>
SIFoldOperands::isOMod(const MachineInstr &MI) const {
SIFoldOperandsImpl::isOMod(const MachineInstr &MI) const {
unsigned Op = MI.getOpcode();
switch (Op) {
case AMDGPU::V_MUL_F64_e64:
Expand Down Expand Up @@ -1740,7 +1750,7 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
}

// FIXME: Does this need to check IEEE bit on function?
bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
bool SIFoldOperandsImpl::tryFoldOMod(MachineInstr &MI) {
const MachineOperand *RegOp;
int OMod;
std::tie(RegOp, OMod) = isOMod(MI);
Expand Down Expand Up @@ -1779,7 +1789,7 @@ bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {

// Try to fold a reg_sequence with vgpr output and agpr inputs into an
// instruction which can take an agpr. So far that means a store.
bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) {
bool SIFoldOperandsImpl::tryFoldRegSequence(MachineInstr &MI) {
assert(MI.isRegSequence());
auto Reg = MI.getOperand(0).getReg();

Expand Down Expand Up @@ -1926,7 +1936,7 @@ static bool isAGPRCopy(const SIRegisterInfo &TRI,
// loop:
// %3:areg = PHI %2:areg, %entry, %X:areg,
// %4:areg = (instr using %3:areg)
bool SIFoldOperands::tryFoldPhiAGPR(MachineInstr &PHI) {
bool SIFoldOperandsImpl::tryFoldPhiAGPR(MachineInstr &PHI) {
assert(PHI.isPHI());

Register PhiOut = PHI.getOperand(0).getReg();
Expand Down Expand Up @@ -2030,7 +2040,7 @@ bool SIFoldOperands::tryFoldPhiAGPR(MachineInstr &PHI) {
}

// Attempt to convert VGPR load to an AGPR load.
bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
bool SIFoldOperandsImpl::tryFoldLoad(MachineInstr &MI) {
assert(MI.mayLoad());
if (!ST->hasGFX90AInsts() || MI.getNumExplicitDefs() != 1)
return false;
Expand Down Expand Up @@ -2117,7 +2127,7 @@ bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
// %0:areg = PHI %tmp_agpr, %a, %x, %c
// %1:areg = PHI %tmp_agpr, %a, %y, %c
// %2:areg = PHI %tmp_agpr, %a, %z, %c
bool SIFoldOperands::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
bool SIFoldOperandsImpl::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
// This is only really needed on GFX908 where AGPR-AGPR copies are
// unreasonably difficult.
if (ST->hasGFX90AInsts())
Expand Down Expand Up @@ -2182,10 +2192,7 @@ bool SIFoldOperands::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
return Changed;
}

bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;

bool SIFoldOperandsImpl::run(MachineFunction &MF) {
MRI = &MF.getRegInfo();
ST = &MF.getSubtarget<GCNSubtarget>();
TII = ST->getInstrInfo();
Expand Down Expand Up @@ -2246,3 +2253,14 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {

return Changed;
}

PreservedAnalyses SIFoldOperandsPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &) {
bool Changed = SIFoldOperandsImpl().run(MF);
if (!Changed) {
return PreservedAnalyses::all();
}
auto PA = getMachineFunctionPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();
return PA;
}
23 changes: 23 additions & 0 deletions llvm/lib/Target/AMDGPU/SIFoldOperands.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//===- SIFoldOperands.h -----------------------------------------*- C++- *-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_AMDGPU_SIFOLDOPERANDS_H
#define LLVM_LIB_TARGET_AMDGPU_SIFOLDOPERANDS_H

#include "llvm/CodeGen/MachinePassManager.h"

namespace llvm {
class SIFoldOperandsPass : public PassInfoMixin<SIFoldOperandsPass> {
public:
SIFoldOperandsPass() = default;
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);
};
} // namespace llvm

#endif // LLVM_LIB_TARGET_AMDGPU_SIFOLDOPERANDS_H
9 changes: 7 additions & 2 deletions llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,13 @@ bool AVRDAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
// offset allowed.
MVT VT = cast<MemSDNode>(Op)->getMemoryVT().getSimpleVT();

// We only accept offsets that fit in 6 bits (unsigned).
if (isUInt<6>(RHSC) && (VT == MVT::i8 || VT == MVT::i16)) {
// We only accept offsets that fit in 6 bits (unsigned), with the exception
// of 16-bit loads - those can only go up to 62, because we desugar them
// into a pair of 8-bit loads like `ldd rx, RHSC` + `ldd ry, RHSC + 1`.
bool OkI8 = VT == MVT::i8 && RHSC <= 63;
bool OkI16 = VT == MVT::i16 && RHSC <= 62;

if (OkI8 || OkI16) {
Base = N.getOperand(0);
Disp = CurDAG->getTargetConstant(RHSC, dl, MVT::i8);

Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1221,6 +1221,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
setOperationAction(ISD::IS_FPCLASS, MVT::f128, Custom);
setOperationAction(ISD::IS_FPCLASS, MVT::ppcf128, Custom);
}

// 128 bit shifts can be accomplished via 3 instructions for SHL and
Expand Down Expand Up @@ -11479,6 +11480,12 @@ SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
uint64_t RHSC = Op.getConstantOperandVal(1);
SDLoc Dl(Op);
FPClassTest Category = static_cast<FPClassTest>(RHSC);
if (LHS.getValueType() == MVT::ppcf128) {
// The higher part determines the value class.
LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
DAG.getConstant(1, Dl, MVT::i32));
}

return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ void RISCVMCCodeEmitter::expandTLSDESCCall(const MCInst &MI,
const RISCVMCExpr *Expr = dyn_cast<RISCVMCExpr>(SrcSymbol.getExpr());
MCRegister Link = MI.getOperand(0).getReg();
MCRegister Dest = MI.getOperand(1).getReg();
MCRegister Imm = MI.getOperand(2).getImm();
int64_t Imm = MI.getOperand(2).getImm();
Fixups.push_back(MCFixup::create(
0, Expr, MCFixupKind(RISCV::fixup_riscv_tlsdesc_call), MI.getLoc()));
MCInst Call =
Expand Down
56 changes: 48 additions & 8 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::FADD, ISD::FSUB,
ISD::FMUL, ISD::FMA,
ISD::FDIV, ISD::FSQRT,
ISD::FABS, ISD::FNEG,
ISD::STRICT_FMA, ISD::STRICT_FADD,
ISD::STRICT_FSUB, ISD::STRICT_FMUL,
ISD::STRICT_FDIV, ISD::STRICT_FSQRT,
Expand All @@ -416,8 +415,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
setOperationAction(ISD::FREM, MVT::bf16, Promote);
// FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
// DAGCombiner::visitFP_ROUND probably needs improvements first.
setOperationAction(ISD::FABS, MVT::bf16, Expand);
setOperationAction(ISD::FNEG, MVT::bf16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
}

Expand All @@ -433,8 +432,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT,
ISD::STRICT_LROUND, ISD::STRICT_LLROUND},
MVT::f16, Legal);
// FIXME: Need to promote f16 FCOPYSIGN to f32, but the
// DAGCombiner::visitFP_ROUND probably needs improvements first.
setOperationAction(ISD::FABS, MVT::f16, Expand);
setOperationAction(ISD::FNEG, MVT::f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
}

Expand Down Expand Up @@ -10328,6 +10327,50 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}

MVT XLenVT = Subtarget.getXLenVT();
auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);

// On some uarchs vrgather.vv will read from every input register for each
// output register, regardless of the indices. However to reverse a vector
// each output register only needs to read from one register. So decompose it
// into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
// O(LMUL^2).
//
// vsetvli a1, zero, e64, m4, ta, ma
// vrgatherei16.vv v12, v8, v16
// ->
// vsetvli a1, zero, e64, m1, ta, ma
// vrgather.vv v15, v8, v16
// vrgather.vv v14, v9, v16
// vrgather.vv v13, v10, v16
// vrgather.vv v12, v11, v16
if (ContainerVT.bitsGT(getLMUL1VT(ContainerVT)) &&
ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);

// Fixed length vectors might not fit exactly into their container, and so
// leave a gap in the front of the vector after being reversed. Slide this
// away.
//
// x x x x 3 2 1 0 <- v4i16 @ vlen=128
// 0 1 2 3 x x x x <- reverse
// x x x x 0 1 2 3 <- vslidedown.vx
if (VecVT.isFixedLengthVector()) {
SDValue Offset = DAG.getNode(
ISD::SUB, DL, XLenVT,
DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
Concat =
getVSlidedown(DAG, Subtarget, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
}
return Concat;
}

unsigned EltSize = ContainerVT.getScalarSizeInBits();
unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
Expand Down Expand Up @@ -10375,9 +10418,6 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
IntVT = IntVT.changeVectorElementType(MVT::i16);
}

MVT XLenVT = Subtarget.getXLenVT();
auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);

// Calculate VLMAX-1 for the desired SEW.
SDValue VLMinus1 = DAG.getNode(
ISD::SUB, DL, XLenVT,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfoV.td
Original file line number Diff line number Diff line change
Expand Up @@ -1665,13 +1665,15 @@ defm VSLIDEUP_V : VSLD_IV_X_I<"vslideup", 0b001110, /*slidesUp=*/true>;
defm VSLIDE1UP_V : VSLD1_MV_X<"vslide1up", 0b001110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
defm VSLIDEDOWN_V : VSLD_IV_X_I<"vslidedown", 0b001111, /*slidesUp=*/false>;
let ActiveElementsAffectResult = 1 in
defm VSLIDE1DOWN_V : VSLD1_MV_X<"vslide1down", 0b001111>;
} // Predicates = [HasVInstructions]

let Predicates = [HasVInstructionsAnyF] in {
let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
defm VFSLIDE1UP_V : VSLD1_FV_F<"vfslide1up", 0b001110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
let ActiveElementsAffectResult = 1 in
defm VFSLIDE1DOWN_V : VSLD1_FV_F<"vfslide1down", 0b001111>;
} // Predicates = [HasVInstructionsAnyF]

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/Sparc/SparcInstrAliases.td
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,10 @@ def : InstAlias<"flush", (FLUSH), 0>;
// unimp -> unimp 0
def : InstAlias<"unimp", (UNIMP 0), 0>;

// Not in spec, but we follow Solaris behavior of having `illtrap`
// interchangeable with `unimp` all the time.
def : MnemonicAlias<"illtrap", "unimp">;

def : MnemonicAlias<"iflush", "flush">;

def : MnemonicAlias<"stub", "stb">;
Expand Down
19 changes: 17 additions & 2 deletions llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2963,14 +2963,29 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {

case Intrinsic::x86_avx_vpermilvar_ps:
case Intrinsic::x86_avx_vpermilvar_ps_256:
case Intrinsic::x86_avx512_vpermilvar_ps_512:
case Intrinsic::x86_avx512_vpermilvar_ps_512: {
if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}

KnownBits KnownMask(32);
if (IC.SimplifyDemandedBits(&II, 1, APInt(32, 0b00011), KnownMask))
return &II;
break;
}

case Intrinsic::x86_avx_vpermilvar_pd:
case Intrinsic::x86_avx_vpermilvar_pd_256:
case Intrinsic::x86_avx512_vpermilvar_pd_512:
case Intrinsic::x86_avx512_vpermilvar_pd_512: {
if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}

KnownBits KnownMask(64);
if (IC.SimplifyDemandedBits(&II, 1, APInt(64, 0b00010), KnownMask))
return &II;
break;
}

case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
Expand Down
26 changes: 15 additions & 11 deletions llvm/lib/Transforms/Utils/SCCPSolver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -820,19 +820,21 @@ class SCCPInstVisitor : public InstVisitor<SCCPInstVisitor> {
markOverdefined(ValueState[V], V);
}

void trackValueOfArgument(Argument *A) {
ValueLatticeElement getArgAttributeVL(Argument *A) {
if (A->getType()->isIntOrIntVectorTy()) {
if (std::optional<ConstantRange> Range = A->getRange()) {
markConstantRange(ValueState[A], A, *Range);
return;
}
}
if (A->hasNonNullAttr()) {
markNotNull(ValueState[A], A);
return;
if (std::optional<ConstantRange> Range = A->getRange())
return ValueLatticeElement::getRange(*Range);
}
if (A->hasNonNullAttr())
return ValueLatticeElement::getNot(Constant::getNullValue(A->getType()));
// Assume nothing about the incoming arguments without attributes.
markOverdefined(A);
return ValueLatticeElement::getOverdefined();
}

void trackValueOfArgument(Argument *A) {
if (A->getType()->isStructTy())
return (void)markOverdefined(A);
mergeInValue(A, getArgAttributeVL(A));
}

bool isStructLatticeConstant(Function *F, StructType *STy);
Expand Down Expand Up @@ -1800,7 +1802,9 @@ void SCCPInstVisitor::handleCallArguments(CallBase &CB) {
getMaxWidenStepsOpts());
}
} else
mergeInValue(&*AI, getValueState(*CAI), getMaxWidenStepsOpts());
mergeInValue(&*AI,
getValueState(*CAI).intersect(getArgAttributeVL(&*AI)),
getMaxWidenStepsOpts());
}
}
}
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6529,6 +6529,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
// Certain instructions can be cheaper to vectorize if they have a constant
// second vector operand. One example of this are shifts on x86.
Value *Op2 = I->getOperand(1);
if (!isa<Constant>(Op2) && PSE.getSE()->isSCEVable(Op2->getType()) &&
isa<SCEVConstant>(PSE.getSCEV(Op2))) {
Op2 = cast<SCEVConstant>(PSE.getSCEV(Op2))->getValue();
}
auto Op2Info = TTI.getOperandInfo(Op2);
if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
Legal->isInvariant(Op2))
Expand Down
72 changes: 36 additions & 36 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4757,13 +4757,12 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
});
});
const unsigned AbsoluteDiff = std::abs(*Diff);
if (IsPossibleStrided &&
(IsAnyPointerUsedOutGraph ||
((Sz > MinProfitableStridedLoads ||
(AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
has_single_bit(AbsoluteDiff))) &&
AbsoluteDiff > Sz) ||
*Diff == -(static_cast<int>(Sz) - 1))) {
if (IsPossibleStrided && (IsAnyPointerUsedOutGraph ||
((Sz > MinProfitableStridedLoads ||
(AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
has_single_bit(AbsoluteDiff))) &&
AbsoluteDiff > Sz) ||
*Diff == -(static_cast<int>(Sz) - 1))) {
int Stride = *Diff / static_cast<int>(Sz - 1);
if (*Diff == Stride * static_cast<int>(Sz - 1)) {
Align Alignment =
Expand All @@ -4778,8 +4777,7 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
if (Ptr == PtrN)
Dist = *Diff;
else if (Ptr != Ptr0)
Dist =
*getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
// If the strides are not the same or repeated, we can't
// vectorize.
if (((Dist / Stride) * Stride) != Dist ||
Expand Down Expand Up @@ -4822,14 +4820,14 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
if (VectorizedCnt == VL.size() / VF) {
// Compare masked gather cost and loads + insersubvector costs.
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
auto [ScalarGEPCost, VectorGEPCost] = getGEPCosts(
TTI, PointerOps, PointerOps.front(), Instruction::GetElementPtr,
CostKind, ScalarTy, VecTy);
auto [ScalarGEPCost, VectorGEPCost] =
getGEPCosts(TTI, PointerOps, PointerOps.front(),
Instruction::GetElementPtr, CostKind, ScalarTy, VecTy);
InstructionCost MaskedGatherCost =
TTI.getGatherScatterOpCost(
Instruction::Load, VecTy,
cast<LoadInst>(VL0)->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind) +
TTI.getGatherScatterOpCost(Instruction::Load, VecTy,
cast<LoadInst>(VL0)->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment,
CostKind) +
VectorGEPCost - ScalarGEPCost;
InstructionCost VecLdCost = 0;
auto *SubVecTy = getWidenedType(ScalarTy, VF);
Expand All @@ -4853,23 +4851,23 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
getGEPCosts(TTI, ArrayRef(PointerOps).slice(I * VF, VF),
LI0->getPointerOperand(), Instruction::Load,
CostKind, ScalarTy, SubVecTy);
VecLdCost +=
TTI.getStridedMemoryOpCost(
Instruction::Load, SubVecTy, LI0->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind) +
VectorGEPCost - ScalarGEPCost;
VecLdCost += TTI.getStridedMemoryOpCost(Instruction::Load, SubVecTy,
LI0->getPointerOperand(),
/*VariableMask=*/false,
CommonAlignment, CostKind) +
VectorGEPCost - ScalarGEPCost;
break;
}
case LoadsState::ScatterVectorize: {
auto [ScalarGEPCost, VectorGEPCost] = getGEPCosts(
TTI, ArrayRef(PointerOps).slice(I * VF, VF),
LI0->getPointerOperand(), Instruction::GetElementPtr,
CostKind, ScalarTy, SubVecTy);
VecLdCost +=
TTI.getGatherScatterOpCost(
Instruction::Load, SubVecTy, LI0->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind) +
VectorGEPCost - ScalarGEPCost;
LI0->getPointerOperand(), Instruction::GetElementPtr, CostKind,
ScalarTy, SubVecTy);
VecLdCost += TTI.getGatherScatterOpCost(Instruction::Load, SubVecTy,
LI0->getPointerOperand(),
/*VariableMask=*/false,
CommonAlignment, CostKind) +
VectorGEPCost - ScalarGEPCost;
break;
}
case LoadsState::Gather:
Expand All @@ -4880,8 +4878,8 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
for (int Idx : seq<int>(0, VL.size()))
ShuffleMask[Idx] = Idx / VF == I ? VL.size() + Idx % VF : Idx;
VecLdCost +=
::getShuffleCost(TTI, TTI::SK_InsertSubvector, VecTy,
ShuffleMask, CostKind, I * VF, SubVecTy);
::getShuffleCost(TTI, TTI::SK_InsertSubvector, VecTy, ShuffleMask,
CostKind, I * VF, SubVecTy);
}
// If masked gather cost is higher - better to vectorize, so
// consider it as a gather node. It will be better estimated
Expand All @@ -4897,10 +4895,9 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
// increases the cost.
Loop *L = LI->getLoopFor(cast<LoadInst>(VL0)->getParent());
bool ProfitableGatherPointers =
L && Sz > 2 &&
static_cast<unsigned>(count_if(PointerOps, [L](Value *V) {
return L->isLoopInvariant(V);
})) <= Sz / 2;
L && Sz > 2 && static_cast<unsigned>(count_if(PointerOps, [L](Value *V) {
return L->isLoopInvariant(V);
})) <= Sz / 2;
if (ProfitableGatherPointers || all_of(PointerOps, [IsSorted](Value *P) {
auto *GEP = dyn_cast<GetElementPtrInst>(P);
return (IsSorted && !GEP && doesNotNeedToBeScheduled(P)) ||
Expand Down Expand Up @@ -5229,6 +5226,9 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
!TE.isAltShuffle())
return TE.ReorderIndices;
if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) {
if (!TE.ReorderIndices.empty())
return TE.ReorderIndices;

auto PHICompare = [&](unsigned I1, unsigned I2) {
Value *V1 = TE.Scalars[I1];
Value *V2 = TE.Scalars[I2];
Expand Down Expand Up @@ -5262,8 +5262,6 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
return false;
return true;
};
if (!TE.ReorderIndices.empty())
return TE.ReorderIndices;
DenseMap<unsigned, unsigned> PhiToId;
SmallVector<unsigned> Phis(TE.Scalars.size());
std::iota(Phis.begin(), Phis.end(), 0);
Expand Down Expand Up @@ -9465,6 +9463,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
return 0;
if (isa<InsertElementInst>(VL[0]))
return InstructionCost::getInvalid();
if (isa<CmpInst>(VL.front()))
ScalarTy = VL.front()->getType();
return processBuildVector<ShuffleCostEstimator, InstructionCost>(
E, ScalarTy, *TTI, VectorizedVals, *this, CheckedExtracts);
}
Expand Down
Loading