27 changes: 25 additions & 2 deletions lld/test/ELF/ppc32-tls-ie.s
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
# IE-REL: FLAGS STATIC_TLS
## A non-preemptable symbol (b) has 0 st_shndx.
# IE-REL: .rela.dyn {
# IE-REL-NEXT: 0x20238 R_PPC_TPREL32 - 0xC
# IE-REL-NEXT: 0x20234 R_PPC_TPREL32 a 0x0
# IE-REL-NEXT: 0x20258 R_PPC_TPREL32 - 0xC
# IE-REL-NEXT: 0x20254 R_PPC_TPREL32 a 0x0
# IE-REL-NEXT: }

## &.got[3] - _GLOBAL_OFFSET_TABLE_ = 12
Expand Down Expand Up @@ -44,19 +44,39 @@ lbzx 10, 8, c@tls
# IE-NEXT: stbx 14, 4, 2
# IE-NEXT: sthx 15, 5, 2
# IE-NEXT: stwx 16, 6, 2
# IE-NEXT: lhax 17, 7, 2
# IE-NEXT: lwax 18, 8, 2
# IE-NEXT: lfsx 19, 9, 2
# IE-NEXT: lfdx 20, 10, 2
# IE-NEXT: stfsx 21, 11, 2
# IE-NEXT: stfdx 22, 12, 2

## In LE, these X-Form instructions are changed to their corresponding D-Form.
# LE-NEXT: lhz 12, -28660(2)
# LE-NEXT: lwz 13, -28660(3)
# LE-NEXT: stb 14, -28660(4)
# LE-NEXT: sth 15, -28660(5)
# LE-NEXT: stw 16, -28660(6)
# LE-NEXT: lha 17, -28660(7)
# LE-NEXT: lwa 18, -28660(8)
# LE-NEXT: lfs 19, -28660(9)
# LE-NEXT: lfd 20, -28660(10)
# LE-NEXT: stfs 21, -28660(11)
# LE-NEXT: stfd 22, -28660(12)

lhzx 12, 2, s@tls
lwzx 13, 3, i@tls
stbx 14, 4, c@tls
sthx 15, 5, s@tls
stwx 16, 6, i@tls
lhax 17, 7, s@tls
lwax 18, 8, i@tls
lfsx 19, 9, f@tls
lfdx 20, 10, d@tls
stfsx 21, 11, f@tls
stfdx 22, 12, d@tls
ldx 23, 13, l@tls
stdx 24, 14, l@tls

.section .tbss
.globl a
Expand All @@ -66,3 +86,6 @@ a:
c:
s:
i:
f:
d:
l:
72 changes: 67 additions & 5 deletions lld/test/ELF/ppc64-tls-ie.s
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@

# IE-REL: FLAGS STATIC_TLS
# IE-REL: .rela.dyn {
# IE-REL-NEXT: 0x204C8 R_PPC64_TPREL64 c 0x0
# IE-REL-NEXT: 0x204D0 R_PPC64_TPREL64 s 0x0
# IE-REL-NEXT: 0x204D8 R_PPC64_TPREL64 i 0x0
# IE-REL-NEXT: 0x204E0 R_PPC64_TPREL64 l 0x0
# IE-REL-NEXT: 0x205A8 R_PPC64_TPREL64 c 0x0
# IE-REL-NEXT: 0x205B0 R_PPC64_TPREL64 s 0x0
# IE-REL-NEXT: 0x205B8 R_PPC64_TPREL64 i 0x0
# IE-REL-NEXT: 0x205C0 R_PPC64_TPREL64 l 0x0
# IE-REL-NEXT: 0x205C8 R_PPC64_TPREL64 f 0x0
# IE-REL-NEXT: 0x205D0 R_PPC64_TPREL64 d 0x0
# IE-REL-NEXT: }

# INPUT-REL: R_PPC64_GOT_TPREL16_HA c 0x0
Expand Down Expand Up @@ -152,10 +154,64 @@ test_ds:
ld 4, l@got@tprel(2)
stdx 3, 4, l@tls

# LE-LABEL: <test_lhax>:
# LE-NEXT: nop
# LE-NEXT: addis 3, 13, 0
# LE-NEXT: lha 3, -28670(3)
test_lhax:
addis 3, 2, s@got@tprel@ha
ld 3, s@got@tprel@l(3)
lhax 3, 3, s@tls

# LE-LABEL: <test_lwax>:
# LE-NEXT: nop
# LE-NEXT: addis 3, 13, 0
# LE-NEXT: lwa 3, -28668(3)
test_lwax:
addis 3, 2, i@got@tprel@ha
ld 3, i@got@tprel@l(3)
lwax 3, 3, i@tls

# LE-LABEL: <test_lfsx>:
# LE-NEXT: nop
# LE-NEXT: addis 3, 13, 0
# LE-NEXT: lfs 3, -28656(3)
test_lfsx:
addis 3, 2, f@got@tprel@ha
ld 3, f@got@tprel@l(3)
lfsx 3, 3, f@tls

# LE-LABEL: <test_lfdx>:
# LE-NEXT: nop
# LE-NEXT: addis 3, 13, 0
# LE-NEXT: lfd 3, -28648(3)
test_lfdx:
addis 3, 2, d@got@tprel@ha
ld 3, d@got@tprel@l(3)
lfdx 3, 3, d@tls

# LE-LABEL: <test_stfsx>:
# LE-NEXT: nop
# LE-NEXT: addis 4, 13, 0
# LE-NEXT: stfs 3, -28656(4)
test_stfsx:
addis 4, 2, f@got@tprel@ha
ld 4, f@got@tprel@l(4)
stfsx 3, 4, f@tls

# LE-LABEL: <test_stfdx>:
# LE-NEXT: nop
# LE-NEXT: addis 4, 13, 0
# LE-NEXT: stfd 3, -28648(4)
test_stfdx:
addis 4, 2, d@got@tprel@ha
ld 4, d@got@tprel@l(4)
stfdx 3, 4, d@tls

# NOREL: There are no relocations in this file.

.section .tdata,"awT",@progbits
.globl c, s, i, l
.globl c, s, i, l, f, d
c:
.byte 97

Expand All @@ -170,3 +226,9 @@ i:
.p2align 3
l:
.quad 55
f:
.long 55

.p2align 3
d:
.quad 55
132 changes: 120 additions & 12 deletions lld/test/ELF/ppc64-tls-pcrel-ie.s
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ SECTIONS {
.text_val 0x1002000 : { *(.text_val) }
.text_twoval 0x1003000 : { *(.text_twoval) }
.text_incrval 0x1004000 : { *(.text_incrval) }
.text_incrval_half 0x1005000 : { *(.text_incrval_half) }
.text_incrval_word 0x1006000 : { *(.text_incrval_word) }
.text_incrval_float 0x1007000 : { *(.text_incrval_float) }
.text_incrval_double 0x1008000 : { *(.text_incrval_double) }
.text_incrval_dword 0x1009000 : { *(.text_incrval_dword) }
.text_incrval_half_zero 0x1010000 : { *(.text_incrval_half_zero) }
}

#--- defs
Expand All @@ -42,26 +48,26 @@ y:

#--- asm
# IE-RELOC: Relocation section '.rela.dyn' at offset 0x10090 contains 2 entries:
# IE-RELOC: 00000000010040f0 0000000100000049 R_PPC64_TPREL64 0000000000000000 x + 0
# IE-RELOC: 00000000010040f8 0000000200000049 R_PPC64_TPREL64 0000000000000000 y + 0
# IE-RELOC: 00000000010100f0 0000000100000049 R_PPC64_TPREL64 0000000000000000 x + 0
# IE-RELOC-NEXT: 00000000010100f8 0000000200000049 R_PPC64_TPREL64 0000000000000000 y + 0

# IE-SYM: Symbol table '.dynsym' contains 3 entries:
# IE-SYM: 1: 0000000000000000 0 TLS GLOBAL DEFAULT UND x
# IE-SYM: 2: 0000000000000000 0 TLS GLOBAL DEFAULT UND y

# IE-GOT: Hex dump of section '.got':
# IE-GOT-NEXT: 0x010040e8 e8c00001 00000000 00000000 00000000
# IE-GOT-NEXT: 0x010100e8 e8800101 00000000 00000000 00000000

# LE-RELOC: There are no relocations in this file.

# LE-SYM: Symbol table '.symtab' contains 8 entries:
# LE-SYM: 6: 0000000000000000 0 TLS GLOBAL DEFAULT 6 x
# LE-SYM: 7: 0000000000000004 0 TLS GLOBAL DEFAULT 6 y
# LE-SYM: Symbol table '.symtab' contains 14 entries:
# LE-SYM: 0000000000000000 0 TLS GLOBAL DEFAULT [[#]] x
# LE-SYM: 0000000000000004 0 TLS GLOBAL DEFAULT [[#]] y

# LE-GOT: could not find section '.got'

# IE-LABEL: <IEAddr>:
# IE-NEXT: pld 3, 12528(0), 1
# IE-NEXT: pld 3, 61680(0), 1
# IE-NEXT: add 3, 3, 13
# IE-NEXT: blr
# LE-LABEL: <IEAddr>:
Expand All @@ -75,7 +81,7 @@ IEAddr:
blr

# IE-LABEL: <IEAddrCopy>:
# IE-NEXT: pld 3, 12512(0), 1
# IE-NEXT: pld 3, 61664(0), 1
# IE-NEXT: add 4, 3, 13
# IE-NEXT: blr
# LE-LABEL: <IEAddrCopy>:
Expand All @@ -89,7 +95,7 @@ IEAddrCopy:
blr

# IE-LABEL: <IEVal>:
# IE-NEXT: pld 3, 8432(0), 1
# IE-NEXT: pld 3, 57584(0), 1
# IE-NEXT: lwzx 3, 3, 13
# IE-NEXT: blr
# LE-LABEL: <IEVal>:
Expand All @@ -103,8 +109,8 @@ IEVal:
blr

# IE-LABEL: <IETwoVal>:
# IE-NEXT: pld 3, 4336(0), 1
# IE-NEXT: pld 4, 4336(0), 1
# IE-NEXT: pld 3, 53488(0), 1
# IE-NEXT: pld 4, 53488(0), 1
# IE-NEXT: lwzx 3, 3, 13
# IE-NEXT: lwzx 4, 4, 13
# IE-NEXT: blr
Expand All @@ -123,7 +129,7 @@ IETwoVal:
blr

# IE-LABEL: <IEIncrementVal>:
# IE-NEXT: pld 4, 248(0), 1
# IE-NEXT: pld 4, 49400(0), 1
# IE-NEXT: lwzx 3, 4, 13
# IE-NEXT: stwx 3, 4, 13
# IE-NEXT: blr
Expand All @@ -138,3 +144,105 @@ IEIncrementVal:
lwzx 3, 4, y@tls@pcrel
stwx 3, 4, y@tls@pcrel
blr

# IE-LABEL: <IEIncrementValHalf>:
# IE-NEXT: pld 4, 45304(0), 1
# IE-NEXT: lhax 3, 4, 13
# IE-NEXT: sthx 3, 4, 13
# IE-NEXT: blr
# LE-LABEL: <IEIncrementValHalf>:
# LE-NEXT: paddi 4, 13, -28668, 0
# LE-NEXT: lha 3, 0(4)
# LE-NEXT: sth 3, 0(4)
# LE-NEXT: blr
.section .text_incrval_half, "ax", %progbits
IEIncrementValHalf:
pld 4, y@got@tprel@pcrel(0), 1
lhax 3, 4, y@tls@pcrel
sthx 3, 4, y@tls@pcrel
blr

# IE-LABEL: <IEIncrementValWord>:
# IE-NEXT: pld 4, 41208(0), 1
# IE-NEXT: lwax 3, 4, 13
# IE-NEXT: stwx 3, 4, 13
# IE-NEXT: blr
# LE-LABEL: <IEIncrementValWord>:
# LE-NEXT: paddi 4, 13, -28668, 0
# LE-NEXT: lwa 3, 0(4)
# LE-NEXT: stw 3, 0(4)
# LE-NEXT: blr
.section .text_incrval_word, "ax", %progbits
IEIncrementValWord:
pld 4, y@got@tprel@pcrel(0), 1
lwax 3, 4, y@tls@pcrel
stwx 3, 4, y@tls@pcrel
blr

# IE-LABEL: <IEIncrementValFloat>:
# IE-NEXT: pld 4, 37112(0), 1
# IE-NEXT: lfsx 3, 4, 13
# IE-NEXT: stfsx 3, 4, 13
# IE-NEXT: blr
# LE-LABEL: <IEIncrementValFloat>:
# LE-NEXT: paddi 4, 13, -28668, 0
# LE-NEXT: lfs 3, 0(4)
# LE-NEXT: stfs 3, 0(4)
# LE-NEXT: blr
.section .text_incrval_float, "ax", %progbits
IEIncrementValFloat:
pld 4, y@got@tprel@pcrel(0), 1
lfsx 3, 4, y@tls@pcrel
stfsx 3, 4, y@tls@pcrel
blr

# IE-LABEL: <IEIncrementValDouble>:
# IE-NEXT: pld 4, 33016(0), 1
# IE-NEXT: lfdx 3, 4, 13
# IE-NEXT: stfdx 3, 4, 13
# IE-NEXT: blr
# LE-LABEL: <IEIncrementValDouble>:
# LE-NEXT: paddi 4, 13, -28668, 0
# LE-NEXT: lfd 3, 0(4)
# LE-NEXT: stfd 3, 0(4)
# LE-NEXT: blr
.section .text_incrval_double, "ax", %progbits
IEIncrementValDouble:
pld 4, y@got@tprel@pcrel(0), 1
lfdx 3, 4, y@tls@pcrel
stfdx 3, 4, y@tls@pcrel
blr

# IE-LABEL: <IEIncrementValDword>:
# IE-NEXT: pld 4, 28920(0), 1
# IE-NEXT: ldx 3, 4, 13
# IE-NEXT: stdx 3, 4, 13
# IE-NEXT: blr
# LE-LABEL: <IEIncrementValDword>:
# LE-NEXT: paddi 4, 13, -28668, 0
# LE-NEXT: ld 3, 0(4)
# LE-NEXT: std 3, 0(4)
# LE-NEXT: blr
.section .text_incrval_dword, "ax", %progbits
IEIncrementValDword:
pld 4, y@got@tprel@pcrel(0), 1
ldx 3, 4, y@tls@pcrel
stdx 3, 4, y@tls@pcrel
blr

# IE-LABEL: <IEIncrementValHalfZero>:
# IE-NEXT: pld 4, 248(0), 1
# IE-NEXT: lhzx 3, 4, 13
# IE-NEXT: sthx 3, 4, 13
# IE-NEXT: blr
# LE-LABEL: <IEIncrementValHalfZero>:
# LE-NEXT: paddi 4, 13, -28668, 0
# LE-NEXT: lhz 3, 0(4)
# LE-NEXT: sth 3, 0(4)
# LE-NEXT: blr
.section .text_incrval_half_zero, "ax", %progbits
IEIncrementValHalfZero:
pld 4, y@got@tprel@pcrel(0), 1
lhzx 3, 4, y@tls@pcrel
sthx 3, 4, y@tls@pcrel
blr
2 changes: 2 additions & 0 deletions lldb/source/Host/windows/FileSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ int FileSystem::Open(const char *path, int flags, int mode) {
std::wstring wpath;
if (!llvm::ConvertUTF8toWide(path, wpath))
return -1;
// All other bits are rejected by _wsopen_s
mode = mode & (_S_IREAD | _S_IWRITE);
int result;
::_wsopen_s(&result, wpath.c_str(), flags, _SH_DENYNO, mode);
return result;
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Option/ArgList.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ class ArgList {
/// \p Default if neither option is given. If both the option and its
/// negation are present, the last one wins.
bool hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default) const;
bool hasFlagNoClaim(OptSpecifier Pos, OptSpecifier Neg, bool Default) const;

/// hasFlag - Given an option \p Pos, an alias \p PosAlias and its negative
/// form \p Neg, return true if the option or its alias is present, false if
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Option/ArgList.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,13 @@ bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default) const {
return Default;
}

bool ArgList::hasFlagNoClaim(OptSpecifier Pos, OptSpecifier Neg,
bool Default) const {
if (Arg *A = getLastArgNoClaim(Pos, Neg))
return A->getOption().matches(Pos);
return Default;
}

bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier PosAlias, OptSpecifier Neg,
bool Default) const {
if (Arg *A = getLastArg(Pos, PosAlias, Neg))
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2253,7 +2253,7 @@ def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;

// Large STG to be expanded into a loop. $sz is the size, $Rn is start address.
// $Rn_wback is one past the end of the range. $Rm is the loop counter.
let isCodeGenOnly=1, mayStore=1 in {
let isCodeGenOnly=1, mayStore=1, Defs=[NZCV] in {
def STGloop_wback
: Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
[], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,15 @@ bool tryToFoldBNEOnCmpXchgResult(MachineBasicBlock &MBB,
if (!(BNEOp0 == DestReg && BNEOp1 == CmpValReg) &&
!(BNEOp0 == CmpValReg && BNEOp1 == DestReg))
return false;

// Make sure the branch is the only user of the AND.
if (MaskReg.isValid()) {
if (BNEOp0 == DestReg && !MBBI->getOperand(0).isKill())
return false;
if (BNEOp1 == DestReg && !MBBI->getOperand(1).isKill())
return false;
}

ToErase.push_back(&*MBBI);
LoopHeadBNETarget = MBBI->getOperand(2).getMBB();
MBBI = skipDebugInstructionsForward(std::next(MBBI), E);
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,8 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
if (FirstSPAdjustAmount)
StackSize = FirstSPAdjustAmount;

if (RVFI->isPushable(MF) && MBBI->getOpcode() == RISCV::CM_POP) {
if (RVFI->isPushable(MF) && MBBI != MBB.end() &&
MBBI->getOpcode() == RISCV::CM_POP) {
// Use available stack adjustment in pop instruction to deallocate stack
// space.
unsigned PushStack = RVFI->getRVPushRegs() * (STI.getXLen() / 8);
Expand Down
17 changes: 14 additions & 3 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2272,8 +2272,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,

if (!Subtarget.useSoftFloat() &&
(Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) {
addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass);
addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass);
addRegisterClass(MVT::v8bf16, Subtarget.hasAVX512() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
addRegisterClass(MVT::v16bf16, Subtarget.hasAVX512() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
// We set the type action of bf16 to TypeSoftPromoteHalf, but we don't
// provide the method to promote BUILD_VECTOR and INSERT_VECTOR_ELT.
// Set the operation action Custom to do the customization later.
Expand All @@ -2288,6 +2290,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
}
setOperationAction(ISD::FP_ROUND, MVT::v8bf16, Custom);
addLegalFPImmediate(APFloat::getZero(APFloat::BFloat()));
}

Expand All @@ -2299,6 +2302,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMUL, MVT::v32bf16, Expand);
setOperationAction(ISD::FDIV, MVT::v32bf16, Expand);
setOperationAction(ISD::BUILD_VECTOR, MVT::v32bf16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v16bf16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32bf16, Custom);
}

Expand Down Expand Up @@ -11360,7 +11364,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())
return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget);

if (VT.getVectorElementType() == MVT::bf16 && Subtarget.hasBF16())
if (VT.getVectorElementType() == MVT::bf16 &&
(Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16()))
return LowerBUILD_VECTORvXbf16(Op, DAG, Subtarget);

if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget))
Expand Down Expand Up @@ -24048,6 +24053,12 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
return Res;
}

if (VT.getScalarType() == MVT::bf16) {
if (SVT.getScalarType() == MVT::f32 && isTypeLegal(VT))
return Op;
return SDValue();
}

if (VT.getScalarType() == MVT::f16 && !Subtarget.hasFP16()) {
if (!Subtarget.hasF16C() || SVT.getScalarType() != MVT::f32)
return SDValue();
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -12976,6 +12976,11 @@ let Predicates = [HasBF16, HasVLX] in {
def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
(VPBROADCASTWZ256rr VR128X:$src)>;

def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))),
(VCVTNEPS2BF16Z256rr VR256X:$src)>;
def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))),
(VCVTNEPS2BF16Z256rm addr:$src)>;

// TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
}

Expand All @@ -12985,6 +12990,11 @@ let Predicates = [HasBF16] in {

def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
(VPBROADCASTWZrr VR128X:$src)>;

def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))),
(VCVTNEPS2BF16Zrr VR512:$src)>;
def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))),
(VCVTNEPS2BF16Zrm addr:$src)>;
// TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
}

Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/X86/X86InstrSSE.td
Original file line number Diff line number Diff line change
Expand Up @@ -8289,6 +8289,11 @@ let Predicates = [HasAVXNECONVERT] in {
f256mem>, T8PS;
let checkVEXPredicate = 1 in
defm VCVTNEPS2BF16 : VCVTNEPS2BF16_BASE, VEX, T8XS, ExplicitVEXPrefix;

def : Pat<(v8bf16 (X86vfpround (v8f32 VR256:$src))),
(VCVTNEPS2BF16Yrr VR256:$src)>;
def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))),
(VCVTNEPS2BF16Yrm addr:$src)>;
}

def : InstAlias<"vcvtneps2bf16x\t{$src, $dst|$dst, $src}",
Expand Down
59 changes: 59 additions & 0 deletions llvm/test/CodeGen/AArch64/memtag-loop-nzcv.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
; RUN: llc -O2 -print-after-isel -mtriple=aarch64-linux-gnu %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=CHECK

; This test function includes a 256-byte buffer. We expect it to require its
; MTE tags to be set to a useful value on entry, and cleared again on exit. At
; the time of writing this test, the pseudo-instructions chosen are
; STGloop_wback and STGloop respectively, but if different pseudos are selected
; in future, that's not a problem. The important thing is that both should
; include that implicit-def of $nzcv, because these pseudo-instructions will
; expand into loops that use the flags for their termination tests.

; CHECK: STGloop_wback 256, {{.*}}, implicit-def dead $nzcv
; CHECK: STGloop 256, {{.*}}, implicit-def dead $nzcv

define i32 @foo(i32 noundef %0) #0 {
%2 = alloca i32, align 4
%3 = alloca [256 x i8], align 1
%4 = alloca i64, align 8
%5 = alloca i32, align 4
%6 = alloca i64, align 8
store i32 %0, ptr %2, align 4
%7 = load i32, ptr %2, align 4
%8 = getelementptr inbounds [256 x i8], ptr %3, i64 0, i64 0
%9 = call i64 @read(i32 noundef %7, ptr noundef %8, i64 noundef 256)
store i64 %9, ptr %4, align 8
store i32 0, ptr %5, align 4
store i64 0, ptr %6, align 8
br label %10

10: ; preds = %21, %1
%11 = load i64, ptr %6, align 8
%12 = load i64, ptr %4, align 8
%13 = icmp ult i64 %11, %12
br i1 %13, label %14, label %24

14: ; preds = %10
%15 = load i64, ptr %6, align 8
%16 = getelementptr inbounds [256 x i8], ptr %3, i64 0, i64 %15
%17 = load i8, ptr %16, align 1
%18 = zext i8 %17 to i32
%19 = load i32, ptr %5, align 4
%20 = add nsw i32 %19, %18
store i32 %20, ptr %5, align 4
br label %21

21: ; preds = %14
%22 = load i64, ptr %6, align 8
%23 = add i64 %22, 1
store i64 %23, ptr %6, align 8
br label %10

24: ; preds = %10
%25 = load i32, ptr %5, align 4
%26 = srem i32 %25, 251
ret i32 %26
}

declare i64 @read(i32 noundef, ptr noundef, i64 noundef)

attributes #0 = { sanitize_memtag "target-features"="+mte" }
48 changes: 48 additions & 0 deletions llvm/test/CodeGen/RISCV/pr65025.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -mtriple=riscv64 -mattr=+a | FileCheck %s

define ptr @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %val) nounwind {
; CHECK-LABEL: cmpxchg_masked_and_branch1:
; CHECK: # %bb.0: # %do_cmpxchg
; CHECK-NEXT: andi a3, a0, -4
; CHECK-NEXT: slli a4, a0, 3
; CHECK-NEXT: li a5, 255
; CHECK-NEXT: sllw a5, a5, a4
; CHECK-NEXT: andi a1, a1, 255
; CHECK-NEXT: sllw a1, a1, a4
; CHECK-NEXT: andi a2, a2, 255
; CHECK-NEXT: sllw a2, a2, a4
; CHECK-NEXT: .LBB0_3: # %do_cmpxchg
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: lr.w.aqrl a4, (a3)
; CHECK-NEXT: and a6, a4, a5
; CHECK-NEXT: bne a6, a1, .LBB0_5
; CHECK-NEXT: # %bb.4: # %do_cmpxchg
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: xor a6, a4, a2
; CHECK-NEXT: and a6, a6, a5
; CHECK-NEXT: xor a6, a4, a6
; CHECK-NEXT: sc.w.rl a6, a6, (a3)
; CHECK-NEXT: bnez a6, .LBB0_3
; CHECK-NEXT: .LBB0_5: # %do_cmpxchg
; CHECK-NEXT: and a2, a4, a5
; CHECK-NEXT: bne a1, a2, .LBB0_2
; CHECK-NEXT: # %bb.1: # %returnptr
; CHECK-NEXT: xor a1, a1, a2
; CHECK-NEXT: snez a1, a1
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_2: # %exit
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: ret
do_cmpxchg:
%0 = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst seq_cst
%1 = extractvalue { i8, i1 } %0, 1
%2 = select i1 %1, ptr %ptr, ptr null
br i1 %1, label %returnptr, label %exit
returnptr:
ret ptr %2
exit:
ret ptr null
}
158 changes: 158 additions & 0 deletions llvm/test/CodeGen/RISCV/zcmp-prolog-epilog-crash.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
# REQUIRES: asserts
# RUN: llc %s -o - -mtriple=riscv32 -mattr=+zcmp -target-abi ilp32 -run-pass=prologepilog \
# RUN: -simplify-mir -verify-machineinstrs | FileCheck %s

--- |
define hidden void @f(fp128 %a) local_unnamed_addr #0 {
entry:
%0 = bitcast fp128 %a to i128
%and.i = lshr i128 %0, 112
%1 = trunc i128 %and.i to i32
%2 = and i32 %1, 32767
%or.i = or i128 poison, 5192296858534827628530496329220096
br label %if.end.i

if.end.i: ; preds = %entry
br i1 poison, label %exit, label %if.then12.i

if.then12.i: ; preds = %if.end.i
%sub13.i = sub nuw nsw i32 16495, %2
%sh_prom.i = zext i32 %sub13.i to i128
%shr14.i = lshr i128 %or.i, %sh_prom.i
%conv15.i = trunc i128 %shr14.i to i32
br label %exit

exit: ; preds = %if.then12.i, %if.end.i
%retval.0.i = phi i32 [ %conv15.i, %if.then12.i ], [ -1, %if.end.i ]
ret void
}
...
---
name: f
alignment: 2
tracksRegLiveness: true
tracksDebugUserValues: true
liveins:
- { reg: '$x10' }
frameInfo:
maxAlignment: 1
localFrameSize: 32
savePoint: '%bb.2'
restorePoint: '%bb.2'
stack:
- { id: 0, size: 32, alignment: 1, local-offset: -32 }
machineFunctionInfo:
varArgsFrameIndex: 0
varArgsSaveSize: 0
body: |
; CHECK-LABEL: name: f
; CHECK: bb.0.entry:
; CHECK-NEXT: liveins: $x10
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $x10 = ADDI $x0, -1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.if.end.i:
; CHECK-NEXT: liveins: $x10
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: BNE $x0, $x0, %bb.3
; CHECK-NEXT: PseudoBR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.if.then12.i:
; CHECK-NEXT: liveins: $x10
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -32
; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
; CHECK-NEXT: SB $x0, $x2, 31 :: (store (s8) into %stack.0 + 31)
; CHECK-NEXT: SB $x0, $x2, 30 :: (store (s8) into %stack.0 + 30)
; CHECK-NEXT: SB $x0, $x2, 29 :: (store (s8) into %stack.0 + 29)
; CHECK-NEXT: SB $x0, $x2, 28 :: (store (s8) into %stack.0 + 28)
; CHECK-NEXT: SB $x0, $x2, 27 :: (store (s8) into %stack.0 + 27)
; CHECK-NEXT: SB $x0, $x2, 26 :: (store (s8) into %stack.0 + 26)
; CHECK-NEXT: SB $x0, $x2, 25 :: (store (s8) into %stack.0 + 25)
; CHECK-NEXT: SB $x0, $x2, 24 :: (store (s8) into %stack.0 + 24)
; CHECK-NEXT: SB $x0, $x2, 23 :: (store (s8) into %stack.0 + 23)
; CHECK-NEXT: SB $x0, $x2, 22 :: (store (s8) into %stack.0 + 22)
; CHECK-NEXT: SB $x0, $x2, 21 :: (store (s8) into %stack.0 + 21)
; CHECK-NEXT: SB $x0, $x2, 20 :: (store (s8) into %stack.0 + 20)
; CHECK-NEXT: SB $x0, $x2, 19 :: (store (s8) into %stack.0 + 19)
; CHECK-NEXT: SB $x0, $x2, 18 :: (store (s8) into %stack.0 + 18)
; CHECK-NEXT: SB $x0, $x2, 17 :: (store (s8) into %stack.0 + 17)
; CHECK-NEXT: SB $x0, $x2, 16 :: (store (s8) into %stack.0 + 16)
; CHECK-NEXT: SB renamable $x10, $x2, 0 :: (store (s8) into %stack.0)
; CHECK-NEXT: SB renamable $x10, $x2, 4 :: (store (s8) into %stack.0 + 4)
; CHECK-NEXT: renamable $x11 = SRLI renamable $x10, 24
; CHECK-NEXT: SB renamable $x11, $x2, 3 :: (store (s8) into %stack.0 + 3)
; CHECK-NEXT: renamable $x12 = SRLI renamable $x10, 16
; CHECK-NEXT: SB renamable $x12, $x2, 2 :: (store (s8) into %stack.0 + 2)
; CHECK-NEXT: renamable $x13 = SRLI renamable $x10, 8
; CHECK-NEXT: SB renamable $x13, $x2, 1 :: (store (s8) into %stack.0 + 1)
; CHECK-NEXT: SB renamable $x10, $x2, 8 :: (store (s8) into %stack.0 + 8)
; CHECK-NEXT: SB renamable $x11, $x2, 7 :: (store (s8) into %stack.0 + 7)
; CHECK-NEXT: SB renamable $x12, $x2, 6 :: (store (s8) into %stack.0 + 6)
; CHECK-NEXT: SB renamable $x13, $x2, 5 :: (store (s8) into %stack.0 + 5)
; CHECK-NEXT: SB killed renamable $x10, $x2, 12 :: (store (s8) into %stack.0 + 12)
; CHECK-NEXT: SB renamable $x11, $x2, 11 :: (store (s8) into %stack.0 + 11)
; CHECK-NEXT: SB renamable $x12, $x2, 10 :: (store (s8) into %stack.0 + 10)
; CHECK-NEXT: SB renamable $x13, $x2, 9 :: (store (s8) into %stack.0 + 9)
; CHECK-NEXT: SB killed renamable $x11, $x2, 15 :: (store (s8) into %stack.0 + 15)
; CHECK-NEXT: SB killed renamable $x12, $x2, 14 :: (store (s8) into %stack.0 + 14)
; CHECK-NEXT: SB killed renamable $x13, $x2, 13 :: (store (s8) into %stack.0 + 13)
; CHECK-NEXT: $x2 = frame-destroy ADDI $x2, 32
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.exit:
; CHECK-NEXT: PseudoRET
bb.0.entry:
liveins: $x10
renamable $x10 = ADDI $x0, -1
bb.1.if.end.i:
liveins: $x10
BNE $x0, $x0, %bb.3
PseudoBR %bb.2
bb.2.if.then12.i:
liveins: $x10
SB $x0, %stack.0, 31 :: (store (s8) into %stack.0 + 31)
SB $x0, %stack.0, 30 :: (store (s8) into %stack.0 + 30)
SB $x0, %stack.0, 29 :: (store (s8) into %stack.0 + 29)
SB $x0, %stack.0, 28 :: (store (s8) into %stack.0 + 28)
SB $x0, %stack.0, 27 :: (store (s8) into %stack.0 + 27)
SB $x0, %stack.0, 26 :: (store (s8) into %stack.0 + 26)
SB $x0, %stack.0, 25 :: (store (s8) into %stack.0 + 25)
SB $x0, %stack.0, 24 :: (store (s8) into %stack.0 + 24)
SB $x0, %stack.0, 23 :: (store (s8) into %stack.0 + 23)
SB $x0, %stack.0, 22 :: (store (s8) into %stack.0 + 22)
SB $x0, %stack.0, 21 :: (store (s8) into %stack.0 + 21)
SB $x0, %stack.0, 20 :: (store (s8) into %stack.0 + 20)
SB $x0, %stack.0, 19 :: (store (s8) into %stack.0 + 19)
SB $x0, %stack.0, 18 :: (store (s8) into %stack.0 + 18)
SB $x0, %stack.0, 17 :: (store (s8) into %stack.0 + 17)
SB $x0, %stack.0, 16 :: (store (s8) into %stack.0 + 16)
SB renamable $x10, %stack.0, 0 :: (store (s8) into %stack.0)
SB renamable $x10, %stack.0, 4 :: (store (s8) into %stack.0 + 4)
renamable $x11 = SRLI renamable $x10, 24
SB renamable $x11, %stack.0, 3 :: (store (s8) into %stack.0 + 3)
renamable $x12 = SRLI renamable $x10, 16
SB renamable $x12, %stack.0, 2 :: (store (s8) into %stack.0 + 2)
renamable $x13 = SRLI renamable $x10, 8
SB renamable $x13, %stack.0, 1 :: (store (s8) into %stack.0 + 1)
SB renamable $x10, %stack.0, 8 :: (store (s8) into %stack.0 + 8)
SB renamable $x11, %stack.0, 7 :: (store (s8) into %stack.0 + 7)
SB renamable $x12, %stack.0, 6 :: (store (s8) into %stack.0 + 6)
SB renamable $x13, %stack.0, 5 :: (store (s8) into %stack.0 + 5)
SB killed renamable $x10, %stack.0, 12 :: (store (s8) into %stack.0 + 12)
SB renamable $x11, %stack.0, 11 :: (store (s8) into %stack.0 + 11)
SB renamable $x12, %stack.0, 10 :: (store (s8) into %stack.0 + 10)
SB renamable $x13, %stack.0, 9 :: (store (s8) into %stack.0 + 9)
SB killed renamable $x11, %stack.0, 15 :: (store (s8) into %stack.0 + 15)
SB killed renamable $x12, %stack.0, 14 :: (store (s8) into %stack.0 + 14)
SB killed renamable $x13, %stack.0, 13 :: (store (s8) into %stack.0 + 13)
bb.3.exit:
PseudoRET
...
2 changes: 0 additions & 2 deletions llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,6 @@ define <8 x bfloat> @test_int_x86_vcvtneps2bf16128(<4 x float> %A) {
; CHECK-LABEL: test_int_x86_vcvtneps2bf16128:
; CHECK: # %bb.0:
; CHECK-NEXT: {vex} vcvtneps2bf16 %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7a,0x72,0xc0]
; CHECK-NEXT: # kill: def $xmm1 killed $xmm0
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%ret = call <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float> %A)
ret <8 x bfloat> %ret
Expand All @@ -209,7 +208,6 @@ define <8 x bfloat> @test_int_x86_vcvtneps2bf16256(<8 x float> %A) {
; CHECK-LABEL: test_int_x86_vcvtneps2bf16256:
; CHECK: # %bb.0:
; CHECK-NEXT: {vex} vcvtneps2bf16 %ymm0, %xmm0 # encoding: [0xc4,0xe2,0x7e,0x72,0xc0]
; CHECK-NEXT: # kill: def $xmm1 killed $xmm0
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%ret = call <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %A)
Expand Down
1,187 changes: 1,064 additions & 123 deletions llvm/test/CodeGen/X86/bfloat.ll

Large diffs are not rendered by default.