diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 011f81ffbecef0..32745ac660d696 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1712,6 +1712,10 @@ SDValue peekThroughOneUseBitcasts(SDValue V); /// If \p V is not an extracted subvector, it is returned as-is. SDValue peekThroughExtractSubvectors(SDValue V); +/// Return the non-truncated source operand of \p V if it exists. +/// If \p V is not a truncation, it is returned as-is. +SDValue peekThroughTruncates(SDValue V); + /// Returns true if \p V is a bitwise not operation. Assumes that an all ones /// constant is canonicalized to be operand 1. bool isBitwiseNot(SDValue V, bool AllowUndefs = false); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3d45c3e2082b02..c848847f24f245 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -20391,9 +20391,13 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } // If this is a load followed by a store to the same location, then the store - // is dead/noop. + // is dead/noop. Peek through any truncates if canCombineTruncStore failed. + // TODO: Add big-endian truncate support with test coverage. // TODO: Can relax for unordered atomics (see D66309) - if (LoadSDNode *Ld = dyn_cast(Value)) { + SDValue TruncVal = DAG.getDataLayout().isLittleEndian() + ? peekThroughTruncates(Value) + : Value; + if (auto *Ld = dyn_cast(TruncVal)) { if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && ST->isUnindexed() && ST->isSimple() && Ld->getAddressSpace() == ST->getAddressSpace() && diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c4d37b12f07357..5cf9497069f78a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -11040,6 +11040,12 @@ SDValue llvm::peekThroughExtractSubvectors(SDValue V) { return V; } +SDValue llvm::peekThroughTruncates(SDValue V) { + while (V.getOpcode() == ISD::TRUNCATE) + V = V.getOperand(0); + return V; +} + bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) { if (V.getOpcode() != ISD::XOR) return false; diff --git a/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll b/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll index 05ad92cc0b3307..7fb07c6b3163e7 100644 --- a/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll +++ b/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll @@ -5,21 +5,19 @@ define void @i24_or(ptr %a) { ; X86-LABEL: i24_or: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %edx -; X86-NEXT: movzbl 2(%ecx), %eax -; X86-NEXT: movb %al, 2(%ecx) -; X86-NEXT: shll $16, %eax -; X86-NEXT: orl %edx, %eax -; X86-NEXT: orl $384, %eax # imm = 0x180 -; X86-NEXT: movw %ax, (%ecx) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %ecx +; X86-NEXT: movzbl 2(%eax), %edx +; X86-NEXT: shll $16, %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: orl $384, %edx # imm = 0x180 +; X86-NEXT: movw %dx, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: i24_or: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: movzbl 2(%rdi), %ecx -; X64-NEXT: movb %cl, 2(%rdi) ; X64-NEXT: shll $16, %ecx ; X64-NEXT: orl %eax, %ecx ; X64-NEXT: orl $384, %ecx # imm = 0x180 @@ -35,21 +33,19 @@ define void @i24_and_or(ptr %a) { ; X86-LABEL: i24_and_or: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: movzbl 2(%eax), %ecx -; X86-NEXT: movb %cl, 2(%eax) -; X86-NEXT: shll $16, %ecx -; X86-NEXT: orl %edx, %ecx -; X86-NEXT: orl $384, %ecx # imm = 0x180 -; X86-NEXT: andl $-128, %ecx -; X86-NEXT: movw %cx, (%eax) +; X86-NEXT: movzwl (%eax), %ecx +; X86-NEXT: movzbl 2(%eax), %edx +; X86-NEXT: shll $16, %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: orl $384, %edx # imm = 0x180 +; X86-NEXT: andl $-128, %edx +; X86-NEXT: movw %dx, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: i24_and_or: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: movzbl 2(%rdi), %ecx -; X64-NEXT: movb %cl, 2(%rdi) ; X64-NEXT: shll $16, %ecx ; X64-NEXT: orl %eax, %ecx ; X64-NEXT: orl $384, %ecx # imm = 0x180 @@ -66,21 +62,20 @@ define void @i24_and_or(ptr %a) { define void @i24_insert_bit(ptr %a, i1 zeroext %bit) { ; X86-LABEL: i24_insert_bit: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %esi ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebx, -8 +; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: movzbl 2(%eax), %ebx -; X86-NEXT: movb %bl, 2(%eax) -; X86-NEXT: shll $16, %ebx -; X86-NEXT: orl %edx, %ebx +; X86-NEXT: movzbl 2(%eax), %esi +; X86-NEXT: shll $16, %esi +; X86-NEXT: orl %edx, %esi ; X86-NEXT: shll $13, %ecx -; X86-NEXT: andl $16769023, %ebx # imm = 0xFFDFFF -; X86-NEXT: orl %ecx, %ebx -; X86-NEXT: movw %bx, (%eax) -; X86-NEXT: popl %ebx +; X86-NEXT: andl $16769023, %esi # imm = 0xFFDFFF +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: movw %si, (%eax) +; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; @@ -88,7 +83,6 @@ define void @i24_insert_bit(ptr %a, i1 zeroext %bit) { ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: movzbl 2(%rdi), %ecx -; X64-NEXT: movb %cl, 2(%rdi) ; X64-NEXT: shll $16, %ecx ; X64-NEXT: orl %eax, %ecx ; X64-NEXT: shll $13, %esi @@ -114,8 +108,6 @@ define void @i56_or(ptr %a) { ; ; X64-LABEL: i56_or: ; X64: # %bb.0: -; X64-NEXT: movzwl 4(%rdi), %eax -; X64-NEXT: movw %ax, 4(%rdi) ; X64-NEXT: orl $384, (%rdi) # imm = 0x180 ; X64-NEXT: retq %aa = load i56, ptr %a, align 1 @@ -138,8 +130,6 @@ define void @i56_and_or(ptr %a) { ; X64: # %bb.0: ; X64-NEXT: movzwl 4(%rdi), %eax ; X64-NEXT: movzbl 6(%rdi), %ecx -; X64-NEXT: movb %cl, 6(%rdi) -; X64-NEXT: # kill: def $ecx killed $ecx def $rcx ; X64-NEXT: shll $16, %ecx ; X64-NEXT: orl %eax, %ecx ; X64-NEXT: shlq $32, %rcx @@ -175,8 +165,6 @@ define void @i56_insert_bit(ptr %a, i1 zeroext %bit) { ; X64: # %bb.0: ; X64-NEXT: movzwl 4(%rdi), %eax ; X64-NEXT: movzbl 6(%rdi), %ecx -; X64-NEXT: movb %cl, 6(%rdi) -; X64-NEXT: # kill: def $ecx killed $ecx def $rcx ; X64-NEXT: shll $16, %ecx ; X64-NEXT: orl %eax, %ecx ; X64-NEXT: shlq $32, %rcx