diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 6cf0ccf8086a9..f558639a0ec0d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1381,8 +1381,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); // We have target-specific dag combine patterns for the following nodes: - setTargetDAGCombine({ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL, ISD::MUL, - ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR}); + setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL, + ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR}); if (Subtarget.hasFPCVT()) setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC}); @@ -15496,6 +15496,31 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, default: break; case ISD::ADD: return combineADD(N, DCI); + case ISD::AND: { + // We don't want (and (zext (shift...)), C) if C fits in the width of the + // original input as that will prevent us from selecting optimal rotates. + SDValue Op1 = N->getOperand(0); + SDValue Op2 = N->getOperand(1); + if ((Op1.getOpcode() != ISD::ZERO_EXTEND && + Op1.getOpcode() != ISD::ANY_EXTEND) || + !isa(Op2)) + break; + SDValue NarrowOp = Op1.getOperand(0); + unsigned NarrowOpcode = NarrowOp.getOpcode(); + if (NarrowOpcode != ISD::SHL && NarrowOpcode != ISD::SRL && + NarrowOpcode != ISD::ROTL && NarrowOpcode != ISD::ROTR && + NarrowOpcode != ISD::FSHL && NarrowOpcode != ISD::FSHR) + break; + + uint64_t Imm = cast(Op2)->getZExtValue(); + EVT NarrowVT = NarrowOp.getValueType(); + // Make sure that the constant is narrow enough to fit in the narrow type. + if (Imm >= maxUIntN(NarrowVT.getSizeInBits())) + break; + SDValue ConstOp = DAG.getConstant(Imm, dl, NarrowVT); + SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, NarrowVT, NarrowOp, ConstOp); + return DAG.getAnyExtOrTrunc(NarrowAnd, dl, N->getValueType(0)); + } case ISD::SHL: return combineSHL(N, DCI); case ISD::SRA: diff --git a/llvm/test/CodeGen/PowerPC/and-extend-combine.ll b/llvm/test/CodeGen/PowerPC/and-extend-combine.ll new file mode 100644 index 0000000000000..b05d0097154a5 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/and-extend-combine.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -ppc-asm-full-reg-names \ +; RUN: -mcpu=pwr8 -verify-machineinstrs | FileCheck %s +define dso_local ptr @foo(i32 noundef zeroext %arg, ptr nocapture noundef readonly %arg1, ptr noundef writeonly %arg2) local_unnamed_addr { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: rlwinm r3, r3, 31, 17, 28 +; CHECK-NEXT: ldx r4, r4, r3 +; CHECK-NEXT: clrldi r3, r4, 56 +; CHECK-NEXT: add r3, r5, r3 +; CHECK-NEXT: std r4, 0(r5) +; CHECK-NEXT: blr +bb: + %i = lshr i32 %arg, 1 + %i3 = and i32 %i, 32760 + %i4 = zext i32 %i3 to i64 + %i5 = getelementptr inbounds i8, ptr %arg1, i64 %i4 + %i6 = load i64, ptr %i5, align 8 + %i7 = and i64 %i6, 255 + store i64 %i6, ptr %arg2, align 8 + %i8 = getelementptr inbounds i8, ptr %arg2, i64 %i7 + ret ptr %i8 +}