diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0ed2bd9d6d629..041d7e5b4a4aa 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5531,8 +5531,6 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, // Some constants may need fixing up later if they are too large. if (auto *C = dyn_cast(Op)) { - if (Mask->getValueType(0) != C->getValueType(0)) - return false; if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) && (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue()) NodesWithConsts.insert(N); @@ -5566,9 +5564,9 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, case ISD::AssertZext: { unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - EVT VT = Op.getOpcode() == ISD::AssertZext - ? cast(Op.getOperand(1))->getVT() - : Op.getOperand(0).getValueType(); + EVT VT = Op.getOpcode() == ISD::AssertZext ? + cast(Op.getOperand(1))->getVT() : + Op.getOperand(0).getValueType(); // We can accept extending nodes if the mask is wider or an equal // width to the original type. @@ -5576,15 +5574,6 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, continue; break; } - case ISD::ANY_EXTEND: { - unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); - EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - EVT VT = Op.getOperand(0).getValueType(); - if (ExtVT.bitsGE(VT)) - break; - // Fallthrough to searching for nodes from the operands of the extend. - LLVM_FALLTHROUGH; - } case ISD::OR: case ISD::XOR: case ISD::AND: @@ -5644,14 +5633,12 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) { // masking. if (FixupNode) { LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump()); - SDValue MaskOpT = DAG.getZExtOrTrunc(MaskOp, SDLoc(FixupNode), - FixupNode->getValueType(0)); - SDValue And = - DAG.getNode(ISD::AND, SDLoc(FixupNode), FixupNode->getValueType(0), - SDValue(FixupNode, 0), MaskOpT); + SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode), + FixupNode->getValueType(0), + SDValue(FixupNode, 0), MaskOp); DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And); if (And.getOpcode() == ISD ::AND) - DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOpT); + DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp); } // Narrow any constants that need it. @@ -5660,12 +5647,10 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) { SDValue Op1 = LogicN->getOperand(1); if (isa(Op0)) - std::swap(Op0, Op1); + std::swap(Op0, Op1); - SDValue MaskOpT = - DAG.getZExtOrTrunc(MaskOp, SDLoc(Op1), Op1.getValueType()); - SDValue And = - DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOpT); + SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), + Op1, MaskOp); DAG.UpdateNodeOperands(LogicN, Op0, And); } @@ -5673,14 +5658,12 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) { // Create narrow loads. for (auto *Load : Loads) { LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump()); - SDValue MaskOpT = - DAG.getZExtOrTrunc(MaskOp, SDLoc(Load), Load->getValueType(0)); SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0), - SDValue(Load, 0), MaskOpT); + SDValue(Load, 0), MaskOp); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And); if (And.getOpcode() == ISD ::AND) And = SDValue( - DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOpT), 0); + DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0); SDValue NewLoad = reduceLoadWidth(And.getNode()); assert(NewLoad && "Shouldn't be masking the load if it can't be narrowed"); diff --git a/llvm/test/CodeGen/AArch64/combine-andintoload.ll b/llvm/test/CodeGen/AArch64/combine-andintoload.ll index d7a69fdd11936..693d318b272a2 100644 --- a/llvm/test/CodeGen/AArch64/combine-andintoload.ll +++ b/llvm/test/CodeGen/AArch64/combine-andintoload.ll @@ -5,14 +5,16 @@ define i64 @load32_and16_and(i32* %p, i64 %y) { ; CHECK-LABEL: load32_and16_and: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: and w0, w1, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: and w8, w1, w8 +; CHECK-NEXT: and x0, x8, #0xffff ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load32_and16_and: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldrh w8, [x0, #2] -; CHECKBE-NEXT: and w0, w1, w8 +; CHECKBE-NEXT: ldr w8, [x0] +; CHECKBE-NEXT: and w8, w1, w8 +; CHECKBE-NEXT: and x0, x8, #0xffff ; CHECKBE-NEXT: ret %x = load i32, i32* %p, align 4 %xz = zext i32 %x to i64 @@ -24,14 +26,16 @@ define i64 @load32_and16_and(i32* %p, i64 %y) { define i64 @load32_and16_andr(i32* %p, i64 %y) { ; CHECK-LABEL: load32_and16_andr: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: and w0, w1, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: and w8, w1, w8 +; CHECK-NEXT: and x0, x8, #0xffff ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load32_and16_andr: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldrh w8, [x0, #2] -; CHECKBE-NEXT: and w0, w1, w8 +; CHECKBE-NEXT: ldr w8, [x0] +; CHECKBE-NEXT: and w8, w1, w8 +; CHECKBE-NEXT: and x0, x8, #0xffff ; CHECKBE-NEXT: ret %x = load i32, i32* %p, align 4 %xz = zext i32 %x to i64 @@ -43,14 +47,16 @@ define i64 @load32_and16_andr(i32* %p, i64 %y) { define i64 @load32_and16_and_sext(i32* %p, i64 %y) { ; CHECK-LABEL: load32_and16_and_sext: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: and w0, w1, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: and w8, w1, w8 +; CHECK-NEXT: and x0, x8, #0xffff ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load32_and16_and_sext: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldrh w8, [x0, #2] -; CHECKBE-NEXT: and w0, w1, w8 +; CHECKBE-NEXT: ldr w8, [x0] +; CHECKBE-NEXT: and w8, w1, w8 +; CHECKBE-NEXT: and x0, x8, #0xffff ; CHECKBE-NEXT: ret %x = load i32, i32* %p, align 4 %xz = sext i32 %x to i64 @@ -62,16 +68,16 @@ define i64 @load32_and16_and_sext(i32* %p, i64 %y) { define i64 @load32_and16_or(i32* %p, i64 %y) { ; CHECK-LABEL: load32_and16_or: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: and w9, w1, #0xffff -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: orr w8, w1, w8 +; CHECK-NEXT: and x0, x8, #0xffff ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load32_and16_or: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldrh w8, [x0, #2] -; CHECKBE-NEXT: and w9, w1, #0xffff -; CHECKBE-NEXT: orr w0, w9, w8 +; CHECKBE-NEXT: ldr w8, [x0] +; CHECKBE-NEXT: orr w8, w1, w8 +; CHECKBE-NEXT: and x0, x8, #0xffff ; CHECKBE-NEXT: ret %x = load i32, i32* %p, align 4 %xz = zext i32 %x to i64 @@ -164,14 +170,16 @@ define i64 @load16_and16(i16* %p, i64 %y) { define i64 @load16_and8(i16* %p, i64 %y) { ; CHECK-LABEL: load16_and8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: and w0, w1, w8 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: and w8, w1, w8 +; CHECK-NEXT: and x0, x8, #0xff ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load16_and8: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldrb w8, [x0, #1] -; CHECKBE-NEXT: and w0, w1, w8 +; CHECKBE-NEXT: ldrh w8, [x0] +; CHECKBE-NEXT: and w8, w1, w8 +; CHECKBE-NEXT: and x0, x8, #0xff ; CHECKBE-NEXT: ret %x = load i16, i16* %p, align 4 %xz = zext i16 %x to i64 @@ -224,13 +232,15 @@ define i64 @load8_and16_zext(i8* %p, i8 %y) { ; CHECK-LABEL: load8_and16_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: and w0, w1, w8 +; CHECK-NEXT: and w8, w1, w8 +; CHECK-NEXT: and x0, x8, #0xff ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load8_and16_zext: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: ldrb w8, [x0] -; CHECKBE-NEXT: and w0, w1, w8 +; CHECKBE-NEXT: and w8, w1, w8 +; CHECKBE-NEXT: and x0, x8, #0xff ; CHECKBE-NEXT: ret %x = load i8, i8* %p, align 4 %xz = zext i8 %x to i64 @@ -286,14 +296,16 @@ define i64 @load8_and16_or(i8* %p, i64 %y) { define i64 @load16_and8_manyext(i16* %p, i32 %y) { ; CHECK-LABEL: load16_and8_manyext: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: and w0, w1, w8 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: and w8, w1, w8 +; CHECK-NEXT: and x0, x8, #0xff ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load16_and8_manyext: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldrb w8, [x0, #1] -; CHECKBE-NEXT: and w0, w1, w8 +; CHECKBE-NEXT: ldrh w8, [x0] +; CHECKBE-NEXT: and w8, w1, w8 +; CHECKBE-NEXT: and x0, x8, #0xff ; CHECKBE-NEXT: ret %x = load i16, i16* %p, align 4 %xz = zext i16 %x to i32 @@ -306,16 +318,18 @@ define i64 @load16_and8_manyext(i16* %p, i32 %y) { define i64 @multiple_load(i16* %p, i32* %q) { ; CHECK-LABEL: multiple_load: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x1] -; CHECK-NEXT: and w0, w9, w8 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: and x0, x8, #0xff ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: multiple_load: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldrb w8, [x0, #1] -; CHECKBE-NEXT: ldrb w9, [x1, #3] -; CHECKBE-NEXT: and w0, w9, w8 +; CHECKBE-NEXT: ldrh w8, [x0] +; CHECKBE-NEXT: ldr w9, [x1] +; CHECKBE-NEXT: and w8, w9, w8 +; CHECKBE-NEXT: and x0, x8, #0xff ; CHECKBE-NEXT: ret %x = load i16, i16* %p, align 4 %xz = zext i16 %x to i64 @@ -329,16 +343,18 @@ define i64 @multiple_load(i16* %p, i32* %q) { define i64 @multiple_load_or(i16* %p, i32* %q) { ; CHECK-LABEL: multiple_load_or: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x1] -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: and x0, x8, #0xff ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: multiple_load_or: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldrb w8, [x0, #1] -; CHECKBE-NEXT: ldrb w9, [x1, #3] -; CHECKBE-NEXT: orr w0, w9, w8 +; CHECKBE-NEXT: ldrh w8, [x0] +; CHECKBE-NEXT: ldr w9, [x1] +; CHECKBE-NEXT: orr w8, w9, w8 +; CHECKBE-NEXT: and x0, x8, #0xff ; CHECKBE-NEXT: ret %x = load i16, i16* %p, align 4 %xz = zext i16 %x to i64 @@ -352,16 +368,16 @@ define i64 @multiple_load_or(i16* %p, i32* %q) { define i64 @load32_and16_zexty(i32* %p, i32 %y) { ; CHECK-LABEL: load32_and16_zexty: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: and w9, w1, #0xffff -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: orr w8, w1, w8 +; CHECK-NEXT: and x0, x8, #0xffff ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load32_and16_zexty: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldrh w8, [x0, #2] -; CHECKBE-NEXT: and w9, w1, #0xffff -; CHECKBE-NEXT: orr w0, w9, w8 +; CHECKBE-NEXT: ldr w8, [x0] +; CHECKBE-NEXT: orr w8, w1, w8 +; CHECKBE-NEXT: and x0, x8, #0xffff ; CHECKBE-NEXT: ret %x = load i32, i32* %p, align 4 %xz = zext i32 %x to i64 @@ -374,16 +390,16 @@ define i64 @load32_and16_zexty(i32* %p, i32 %y) { define i64 @load32_and16_sexty(i32* %p, i32 %y) { ; CHECK-LABEL: load32_and16_sexty: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: and w9, w1, #0xffff -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: orr w8, w1, w8 +; CHECK-NEXT: and x0, x8, #0xffff ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load32_and16_sexty: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldrh w8, [x0, #2] -; CHECKBE-NEXT: and w9, w1, #0xffff -; CHECKBE-NEXT: orr w0, w9, w8 +; CHECKBE-NEXT: ldr w8, [x0] +; CHECKBE-NEXT: orr w8, w1, w8 +; CHECKBE-NEXT: and x0, x8, #0xffff ; CHECKBE-NEXT: ret %x = load i32, i32* %p, align 4 %xz = zext i32 %x to i64 diff --git a/llvm/test/CodeGen/X86/pr35763.ll b/llvm/test/CodeGen/X86/pr35763.ll index 53a0a0284d11d..8b3e91dc577ae 100644 --- a/llvm/test/CodeGen/X86/pr35763.ll +++ b/llvm/test/CodeGen/X86/pr35763.ll @@ -10,10 +10,10 @@ define dso_local void @PR35763() { ; CHECK-LABEL: PR35763: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzwl z(%rip), %eax -; CHECK-NEXT: movzwl z+2(%rip), %ecx -; CHECK-NEXT: orl %eax, %ecx -; CHECK-NEXT: movq %rcx, tf_3_var_136(%rip) +; CHECK-NEXT: movl z(%rip), %eax +; CHECK-NEXT: orl z+2(%rip), %eax +; CHECK-NEXT: movzwl %ax, %eax +; CHECK-NEXT: movq %rax, tf_3_var_136(%rip) ; CHECK-NEXT: movl z+6(%rip), %eax ; CHECK-NEXT: movzbl z+10(%rip), %ecx ; CHECK-NEXT: shlq $32, %rcx