From 82d53ed492bebd2a585fa4c37efc81ad1d6f68d9 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Fri, 24 Feb 2017 18:03:16 +0000 Subject: [PATCH] [PowerPC] Use rldicr instruction for AND with an immediate if possible Emit clrrdi (extended mnemonic for rldicr) for AND-ing with masks that clear bits from the right hand size. Committing on behalf of Hiroshi Inoue. Differential Revision: https://reviews.llvm.org/D29388 llvm-svn: 296143 --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 13 ++++++++++++ .../PowerPC/fp128-bitcast-after-operation.ll | 20 ++++++------------- llvm/test/CodeGen/PowerPC/i64_fp_round.ll | 7 +++++-- llvm/test/CodeGen/PowerPC/srl-mask.ll | 11 ++++++++++ 4 files changed, 35 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index bef89c7b41136..9c72638023bb3 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -2714,6 +2714,19 @@ void PPCDAGToDAGISel::Select(SDNode *N) { CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); return; } + // If this is a negated 64-bit zero-extension mask, + // i.e. the immediate is a sequence of ones from most significant side + // and all zero for reminder, we should use rldicr. + if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && + isMask_64(~Imm64)) { + SDValue Val = N->getOperand(0); + MB = 63 - countTrailingOnes(~Imm64); + SH = 0; + SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; + CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); + return; + } + // AND X, 0 -> 0, not "rlwinm 32". if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { ReplaceUses(SDValue(N, 0), N->getOperand(1)); diff --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll index 37120a56f4dab..c76c7b02bfeec 100644 --- a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll +++ b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll @@ -11,11 +11,9 @@ entry: ; PPC64-DAG: stxsdx 1, 0, [[ADDR_LO:[0-9]+]] ; PPC64-DAG: addi [[ADDR_HI]], [[SP:[0-9]+]], [[OFFSET_HI:-?[0-9]+]] ; PPC64-DAG: addi [[ADDR_LO]], [[SP]], [[OFFSET_LO:-?[0-9]+]] -; PPC64-DAG: li [[MASK_REG:[0-9]+]], 1 -; PPC64: sldi [[MASK_REG]], [[MASK_REG]], 63 ; PPC64-DAG: ld [[HI:[0-9]+]], [[OFFSET_LO]]([[SP]]) ; PPC64-DAG: ld [[LO:[0-9]+]], [[OFFSET_HI]]([[SP]]) -; PPC64: and [[FLIP_BIT:[0-9]+]], [[HI]], [[MASK_REG]] +; PPC64-DAG: rldicr [[FLIP_BIT:[0-9]+]], [[HI]], 0, 0 ; PPC64-DAG: xor 3, [[HI]], [[FLIP_BIT]] ; PPC64-DAG: xor 4, [[LO]], [[FLIP_BIT]] ; PPC64: blr @@ -23,9 +21,7 @@ entry: ; PPC64-P8-LABEL: test_abs: ; PPC64-P8-DAG: mfvsrd [[LO:[0-9]+]], 2 ; PPC64-P8-DAG: mfvsrd [[HI:[0-9]+]], 1 -; PPC64-P8-DAG: li [[MASK_REG:[0-9]+]], 1 -; PPC64-P8-DAG: sldi [[SHIFT_REG:[0-9]+]], [[MASK_REG]], 63 -; PPC64-P8: and [[FLIP_BIT:[0-9]+]], [[HI]], [[SHIFT_REG]] +; PPC64-P8-DAG: rldicr [[FLIP_BIT:[0-9]+]], [[HI]], 0, 0 ; PPC64-P8-DAG: xor 3, [[HI]], [[FLIP_BIT]] ; PPC64-P8-DAG: xor 4, [[LO]], [[FLIP_BIT]] ; PPC64-P8: blr @@ -66,7 +62,7 @@ entry: ; PPC64-P8-DAG: mfvsrd [[LO:[0-9]+]], 2 ; PPC64-P8-DAG: mfvsrd [[HI:[0-9]+]], 1 ; PPC64-P8-DAG: li [[IMM1:[0-9]+]], 1 -; PPC64-P8-DAG: sldi [[FLIP_BIT]], [[IMM1]], 63 +; PPC64-P8-DAG: sldi [[FLIP_BIT:[0-9]+]], [[IMM1]], 63 ; PPC64-P8-NOT: BARRIER ; PPC64-P8-DAG: xor 3, [[HI]], [[FLIP_BIT]] ; PPC64-P8-DAG: xor 4, [[LO]], [[FLIP_BIT]] @@ -93,29 +89,25 @@ entry: ; PPC64-LABEL: test_copysign: ; PPC64-DAG: stxsdx 1, 0, [[ADDR_REG:[0-9]+]] ; PPC64-DAG: addi [[ADDR_REG]], 1, [[OFFSET:-?[0-9]+]] -; PPC64-DAG: li [[SIGN:[0-9]+]], 1 -; PPC64-DAG: sldi [[SIGN]], [[SIGN]], 63 ; PPC64-DAG: li [[HI_TMP:[0-9]+]], 16399 ; PPC64-DAG: sldi [[CST_HI:[0-9]+]], [[HI_TMP]], 48 ; PPC64-DAG: li [[LO_TMP:[0-9]+]], 3019 ; PPC64-DAG: sldi [[CST_LO:[0-9]+]], [[LO_TMP]], 52 ; PPC64-NOT: BARRIER ; PPC64-DAG: ld [[X_HI:[0-9]+]], [[OFFSET]](1) -; PPC64-DAG: and [[NEW_HI_TMP:[0-9]+]], [[X_HI]], [[SIGN]] +; PPC64-DAG: rldicr [[NEW_HI_TMP:[0-9]+]], [[X_HI]], 0, 0 ; PPC64-DAG: or 3, [[NEW_HI_TMP]], [[CST_HI]] -; PPC64-DAG: xor 4, [[SIGN]], [[CST_LO]] +; PPC64-DAG: xor 4, [[NEW_HI_TMP]], [[CST_LO]] ; PPC64: blr ; PPC64-P8-LABEL: test_copysign: ; PPC64-P8-DAG: mfvsrd [[X_HI:[0-9]+]], 1 -; PPC64-P8-DAG: li [[SIGN:[0-9]+]], 1 -; PPC64-P8-DAG: sldi [[SIGN]], [[SIGN]], 63 ; PPC64-P8-DAG: li [[HI_TMP:[0-9]+]], 16399 ; PPC64-P8-DAG: sldi [[CST_HI:[0-9]+]], [[HI_TMP]], 48 ; PPC64-P8-DAG: li [[LO_TMP:[0-9]+]], 3019 ; PPC64-P8-DAG: sldi [[CST_LO:[0-9]+]], [[LO_TMP]], 52 ; PPC64-P8-NOT: BARRIER -; PPC64-P8-DAG: and [[NEW_HI_TMP:[0-9]+]], [[X_HI]], [[SIGN]] +; PPC64-P8-DAG: rldicr [[NEW_HI_TMP:[0-9]+]], [[X_HI]], 0, 0 ; PPC64-P8-DAG: or 3, [[NEW_HI_TMP]], [[CST_HI]] ; PPC64-P8-DAG: xor 4, [[NEW_HI_TMP]], [[CST_LO]] ; PPC64-P8: blr diff --git a/llvm/test/CodeGen/PowerPC/i64_fp_round.ll b/llvm/test/CodeGen/PowerPC/i64_fp_round.ll index 5e959f7356842..9fe7a3bfcbb70 100644 --- a/llvm/test/CodeGen/PowerPC/i64_fp_round.ll +++ b/llvm/test/CodeGen/PowerPC/i64_fp_round.ll @@ -19,11 +19,14 @@ entry: ; CHECK: addi [[REG2:[0-9]+]], [[REG1]], 1 ; CHECK: cmpldi [[REG2]], 1 ; CHECK: isel [[REG3:[0-9]+]], {{[0-9]+}}, 3, 1 +; CHECK-NO-ISEL: rldicr [[REG2:[0-9]+]], {{[0-9]+}}, 0, 52 ; CHECK-NO-ISEL: bc 12, 1, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL: ori 11, 3, 0 +; CHECK-NO-ISEL: ori [[REG3:[0-9]+]], 3, 0 ; CHECK-NO-ISEL-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] ; CHECK-NO-ISEL-NEXT: [[TRUE]] -; CHECK-NO-ISEL-NEXT: addi 11, 4, 0 +; CHECK-NO-ISEL-NEXT: addi [[REG3]], [[REG2]], 0 +; CHECK-NO-ISEL-NEXT: [[SUCCESSOR]] +; CHECK-NO-ISEL: std [[REG3]], -{{[0-9]+}}(1) ; CHECK: std [[REG3]], -{{[0-9]+}}(1) diff --git a/llvm/test/CodeGen/PowerPC/srl-mask.ll b/llvm/test/CodeGen/PowerPC/srl-mask.ll index e581eae0ee576..1a429b1bae361 100644 --- a/llvm/test/CodeGen/PowerPC/srl-mask.ll +++ b/llvm/test/CodeGen/PowerPC/srl-mask.ll @@ -12,5 +12,16 @@ entry: ; CHECK: blr } +; for AND with an immediate like (x & ~0xFFFF) +; we should use rldicl instruction +define i64 @bar(i64 %x) #0 { +entry: +; CHECK-LABEL: @bar + %a = and i64 %x, 18446744073709486080 +; CHECK: rldicr 3, 3, 0, 47 + ret i64 %a +; CHECK: blr +} + attributes #0 = { nounwind }