Skip to content

Commit

Permalink
[DAG] optimize negation of bool
Browse files Browse the repository at this point in the history
Use mask and negate for legalization of i1 source type with SIGN_EXTEND_INREG.
With the mask, this should be no worse than 2 shifts. The mask can be eliminated
in some cases, so that should be better than 2 shifts.

This change exposed some missing folds related to negation:
https://reviews.llvm.org/rL284239
https://reviews.llvm.org/rL284395

There may be others, so please let me know if you see any regressions.

Differential Revision: https://reviews.llvm.org/D25485

llvm-svn: 284611
  • Loading branch information
rotateright committed Oct 19, 2016
1 parent 3838032 commit 3a3aaf6
Show file tree
Hide file tree
Showing 14 changed files with 165 additions and 125 deletions.
21 changes: 19 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Expand Up @@ -2926,10 +2926,27 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
case ISD::SIGN_EXTEND_INREG: {
// NOTE: we could fall back on load/store here too for targets without
// SAR. However, it is doubtful that any exist.
EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
EVT VT = Node->getValueType(0);

// An in-register sign-extend of a boolean is a negation:
// 'true' (1) sign-extended is -1.
// 'false' (0) sign-extended is 0.
// However, we must mask the high bits of the source operand because the
// SIGN_EXTEND_INREG does not guarantee that the high bits are already zero.

// TODO: Do this for vectors too?
if (ExtraVT.getSizeInBits() == 1) {
SDValue One = DAG.getConstant(1, dl, VT);
SDValue And = DAG.getNode(ISD::AND, dl, VT, Node->getOperand(0), One);
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, Zero, And);
Results.push_back(Neg);
break;
}

// NOTE: we could fall back on load/store here too for targets without
// SRA. However, it is doubtful that any exist.
EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
if (VT.isVector())
ShiftAmountTy = VT;
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/ARM/negate-i1.ll
Expand Up @@ -5,8 +5,8 @@
define i32 @select_i32_neg1_or_0(i1 %a) {
; CHECK-LABEL: select_i32_neg1_or_0:
; CHECK-NEXT: @ BB#0:
; CHECK-NEXT: lsl r0, r0, #31
; CHECK-NEXT: asr r0, r0, #31
; CHECK-NEXT: and r0, r0, #1
; CHECK-NEXT: rsb r0, r0, #0
; CHECK-NEXT: mov pc, lr
;
%b = sext i1 %a to i32
Expand All @@ -16,8 +16,7 @@ define i32 @select_i32_neg1_or_0(i1 %a) {
define i32 @select_i32_neg1_or_0_zeroext(i1 zeroext %a) {
; CHECK-LABEL: select_i32_neg1_or_0_zeroext:
; CHECK-NEXT: @ BB#0:
; CHECK-NEXT: lsl r0, r0, #31
; CHECK-NEXT: asr r0, r0, #31
; CHECK-NEXT: rsb r0, r0, #0
; CHECK-NEXT: mov pc, lr
;
%b = sext i1 %a to i32
Expand Down
46 changes: 26 additions & 20 deletions llvm/test/CodeGen/Mips/llvm-ir/add.ll
Expand Up @@ -31,21 +31,27 @@
; RUN: llc < %s -march=mips -mcpu=mips64r6 -target-abi n64 -mattr=+micromips -O2 | FileCheck %s \
; RUN: -check-prefixes=ALL,MMR6,MM64


; FIXME: This code sequence is inefficient as it should be 'subu $[[T0]], $zero, $[[T0]'.
; This sequence is even better as it's a single instruction. See D25485 for the rest of
; the cases where this sequence occurs.

define signext i1 @add_i1(i1 signext %a, i1 signext %b) {
entry:
; ALL-LABEL: add_i1:

; NOT-R2-R6: addu $[[T0:[0-9]+]], $4, $5
; NOT-R2-R6: sll $[[T0]], $[[T0]], 31
; NOT-R2-R6: sra $2, $[[T0]], 31
; NOT-R2-R6: addu $[[T0:[0-9]+]], $4, $5
; NOT-R2-R6: andi $[[T0]], $[[T0]], 1
; NOT-R2-R6: negu $2, $[[T0]]

; R2-R6: addu $[[T0:[0-9]+]], $4, $5
; R2-R6: sll $[[T0]], $[[T0]], 31
; R2-R6: sra $2, $[[T0]], 31
; R2-R6: addu $[[T0:[0-9]+]], $4, $5
; R2-R6: andi $[[T0]], $[[T0]], 1
; R2-R6: negu $2, $[[T0]]

; MMR6: addu16 $[[T0:[0-9]+]], $4, $5
; MMR6: sll $[[T1:[0-9]+]], $[[T0]], 31
; MMR6: sra $2, $[[T1]], 31
; MMR6: andi16 $[[T0]], $[[T0]], 1
; MMR6: li16 $[[T1:[0-9]+]], 0
; MMR6: subu16 $[[T0]], $[[T1]], $[[T0]]

%r = add i1 %a, %b
ret i1 %r
Expand Down Expand Up @@ -303,18 +309,18 @@ define signext i128 @add_i128_4(i128 signext %a) {

define signext i1 @add_i1_3(i1 signext %a) {
; ALL-LABEL: add_i1_3:

; ALL: sll $[[T0:[0-9]+]], $4, 31
; ALL: lui $[[T1:[0-9]+]], 32768

; GP32: addu $[[T0]], $[[T0]], $[[T1]]
; GP32: sra $[[T1]], $[[T0]], 31

; GP64: addu $[[T0]], $[[T0]], $[[T1]]
; GP64: sra $[[T1]], $[[T0]], 31

; MMR6: addu16 $[[T0]], $[[T0]], $[[T1]]
; MMR6: sra $[[T0]], $[[T0]], 31
; GP32: addiu $[[T0:[0-9]+]], $4, 1
; GP32: andi $[[T0]], $[[T0]], 1
; GP32: negu $2, $[[T0]]

; GP64: addiu $[[T0:[0-9]+]], $4, 1
; GP64: andi $[[T0]], $[[T0]], 1
; GP64: negu $2, $[[T0]]

; MMR6: addiur2 $[[T0:[0-9]+]], $4, 1
; MMR6: andi16 $[[T0]], $[[T0]], 1
; MMR6: li16 $[[T1:[0-9]+]], 0
; MMR6: subu16 $2, $[[T1]], $[[T0]]

%r = add i1 3, %a
ret i1 %r
Expand Down
51 changes: 36 additions & 15 deletions llvm/test/CodeGen/Mips/llvm-ir/mul.ll
Expand Up @@ -27,41 +27,47 @@
; RUN: llc < %s -march=mips -mcpu=mips32r6 -mattr=+micromips -relocation-model=pic | \
; RUN: FileCheck %s -check-prefixes=MM32,MM32R6
; RUN: llc < %s -march=mips -mcpu=mips64r6 -mattr=+micromips -target-abi n64 -relocation-model=pic | \
; RUN: FileCheck %s -check-prefix=64R6
; RUN: FileCheck %s -check-prefix=MM64R6

define signext i1 @mul_i1(i1 signext %a, i1 signext %b) {
entry:
; ALL-LABEL: mul_i1:

; M2: mult $4, $5
; M2: mflo $[[T0:[0-9]+]]
; M2: sll $[[T0]], $[[T0]], 31
; M2: sra $2, $[[T0]], 31
; M2: andi $[[T0]], $[[T0]], 1
; M2: negu $2, $[[T0]]

; 32R1-R5: mul $[[T0:[0-9]+]], $4, $5
; 32R1-R5: sll $[[T0]], $[[T0]], 31
; 32R1-R5: sra $2, $[[T0]], 31
; 32R1-R5: andi $[[T0]], $[[T0]], 1
; 32R1-R5: negu $2, $[[T0]]

; 32R6: mul $[[T0:[0-9]+]], $4, $5
; 32R6: sll $[[T0]], $[[T0]], 31
; 32R6: sra $2, $[[T0]], 31
; 32R6: andi $[[T0]], $[[T0]], 1
; 32R6: negu $2, $[[T0]]

; M4: mult $4, $5
; M4: mflo $[[T0:[0-9]+]]
; M4: sll $[[T0]], $[[T0]], 31
; M4: sra $2, $[[T0]], 31
; M4: andi $[[T0]], $[[T0]], 1
; M4: negu $2, $[[T0]]

; 64R1-R5: mul $[[T0:[0-9]+]], $4, $5
; 64R1-R5: sll $[[T0]], $[[T0]], 31
; 64R1-R5: sra $2, $[[T0]], 31
; 64R1-R5: andi $[[T0]], $[[T0]], 1
; 64R1-R5: negu $2, $[[T0]]

; 64R6: mul $[[T0:[0-9]+]], $4, $5
; 64R6: sll $[[T0]], $[[T0]], 31
; 64R6: sra $2, $[[T0]], 31
; 64R6: andi $[[T0]], $[[T0]], 1
; 64R6: negu $2, $[[T0]]

; MM64R6: mul $[[T0:[0-9]+]], $4, $5
; MM64R6: andi16 $[[T0]], $[[T0]], 1
; MM64R6: li16 $[[T1:[0-9]+]], 0
; MM64R6: subu16 $2, $[[T1]], $[[T0]]

; MM32: mul $[[T0:[0-9]+]], $4, $5
; MM32: sll $[[T0]], $[[T0]], 31
; MM32: sra $2, $[[T0]], 31
; MM32: andi16 $[[T0]], $[[T0]], 1
; MM32: li16 $[[T1:[0-9]+]], 0
; MM32: subu16 $2, $[[T1]], $[[T0]]

%r = mul i1 %a, %b
ret i1 %r
Expand Down Expand Up @@ -101,6 +107,9 @@ entry:
; 64R6: mul $[[T0:[0-9]+]], $4, $5
; 64R6: seb $2, $[[T0]]

; MM64R6: mul $[[T0:[0-9]+]], $4, $5
; MM64R6: seb $2, $[[T0]]

; MM32: mul $[[T0:[0-9]+]], $4, $5
; MM32: seb $2, $[[T0]]

Expand Down Expand Up @@ -142,6 +151,9 @@ entry:
; 64R6: mul $[[T0:[0-9]+]], $4, $5
; 64R6: seh $2, $[[T0]]

; MM64R6: mul $[[T0:[0-9]+]], $4, $5
; MM64R6: seh $2, $[[T0]]

; MM32: mul $[[T0:[0-9]+]], $4, $5
; MM32: seh $2, $[[T0]]

Expand All @@ -161,6 +173,7 @@ entry:

; 64R1-R5: mul $2, $4, $5
; 64R6: mul $2, $4, $5
; MM64R6: mul $2, $4, $5

; MM32: mul $2, $4, $5

Expand Down Expand Up @@ -204,6 +217,7 @@ entry:
; 64R1-R5: mflo $2

; 64R6: dmul $2, $4, $5
; MM64R6: dmul $2, $4, $5

; MM32R3: multu $[[T0:[0-9]+]], $7
; MM32R3: mflo $[[T1:[0-9]+]]
Expand Down Expand Up @@ -247,6 +261,13 @@ entry:
; 64R6: daddu $2, $[[T1]], $[[T0]]
; 64R6-DAG: dmul $3, $5, $7

; MM64R6-DAG: dmul $[[T1:[0-9]+]], $5, $6
; MM64R6: dmuhu $[[T2:[0-9]+]], $5, $7
; MM64R6: daddu $[[T3:[0-9]+]], $[[T2]], $[[T1]]
; MM64R6-DAG: dmul $[[T0:[0-9]+]], $4, $7
; MM64R6: daddu $2, $[[T1]], $[[T0]]
; MM64R6-DAG: dmul $3, $5, $7

; MM32: lw $25, %call16(__multi3)($16)

%r = mul i128 %a, %b
Expand Down
22 changes: 12 additions & 10 deletions llvm/test/CodeGen/Mips/llvm-ir/sdiv.ll
Expand Up @@ -40,26 +40,28 @@ entry:
; NOT-R6: div $zero, $4, $5
; NOT-R6: teq $5, $zero, 7
; NOT-R6: mflo $[[T0:[0-9]+]]
; FIXME: The sll/sra instructions are redundant since div is signed.
; NOT-R6: sll $[[T1:[0-9]+]], $[[T0]], 31
; NOT-R6: sra $2, $[[T1]], 31
; FIXME: The andi/negu instructions are redundant since div is signed.
; NOT-R6: andi $[[T0]], $[[T0]], 1
; NOT-R6: negu $2, $[[T0]]

; R6: div $[[T0:[0-9]+]], $4, $5
; R6: teq $5, $zero, 7
; FIXME: The sll/sra instructions are redundant since div is signed.
; R6: sll $[[T1:[0-9]+]], $[[T0]], 31
; R6: sra $2, $[[T1]], 31
; FIXME: The andi/negu instructions are redundant since div is signed.
; R6: andi $[[T0]], $[[T0]], 1
; R6: negu $2, $[[T0]]

; MMR3: div $zero, $4, $5
; MMR3: teq $5, $zero, 7
; MMR3: mflo $[[T0:[0-9]+]]
; MMR3: sll $[[T1:[0-9]+]], $[[T0]], 31
; MMR3: sra $2, $[[T1]], 31
; MMR3: andi16 $[[T0]], $[[T0]], 1
; MMR3: li16 $[[T1:[0-9]+]], 0
; MMR3: subu16 $2, $[[T1]], $[[T0]]

; MMR6: div $[[T0:[0-9]+]], $4, $5
; MMR6: teq $5, $zero, 7
; MMR6: sll $[[T1:[0-9]+]], $[[T0]], 31
; MMR6: sra $2, $[[T1]], 31
; MMR6: andi16 $[[T0]], $[[T0]], 1
; MMR6: li16 $[[T1:[0-9]+]], 0
; MMR6: subu16 $2, $[[T1]], $[[T0]]

%r = sdiv i1 %a, %b
ret i1 %r
Expand Down
18 changes: 10 additions & 8 deletions llvm/test/CodeGen/Mips/llvm-ir/srem.ll
Expand Up @@ -40,24 +40,26 @@ entry:
; NOT-R6: div $zero, $4, $5
; NOT-R6: teq $5, $zero, 7
; NOT-R6: mfhi $[[T0:[0-9]+]]
; NOT-R6: sll $[[T1:[0-9]+]], $[[T0]], 31
; NOT-R6: sra $2, $[[T1]], 31
; NOT-R6: andi $[[T0]], $[[T0]], 1
; NOT-R6: negu $2, $[[T0]]

; R6: mod $[[T0:[0-9]+]], $4, $5
; R6: teq $5, $zero, 7
; R6: sll $[[T3:[0-9]+]], $[[T0]], 31
; R6: sra $2, $[[T3]], 31
; R6: andi $[[T0]], $[[T0]], 1
; R6: negu $2, $[[T0]]

; MMR3: div $zero, $4, $5
; MMR3: teq $5, $zero, 7
; MMR3: mfhi $[[T0:[0-9]+]]
; MMR3: sll $[[T1:[0-9]+]], $[[T0]], 31
; MMR3: sra $2, $[[T1]], 31
; MMR3: andi16 $[[T0]], $[[T0]], 1
; MMR3: li16 $[[T1:[0-9]+]], 0
; MMR3: subu16 $2, $[[T1]], $[[T0]]

; MMR6: mod $[[T0:[0-9]+]], $4, $5
; MMR6: teq $5, $zero, 7
; MMR6: sll $[[T1:[0-9]+]], $[[T0]], 31
; MMR6: sra $2, $[[T1]], 31
; MMR6: andi16 $[[T0]], $[[T0]], 1
; MMR6: li16 $[[T1:[0-9]+]], 0
; MMR6: subu16 $2, $[[T1]], $[[T0]]

%r = srem i1 %a, %b
ret i1 %r
Expand Down
9 changes: 5 additions & 4 deletions llvm/test/CodeGen/Mips/llvm-ir/sub.ll
Expand Up @@ -36,12 +36,13 @@ entry:
; ALL-LABEL: sub_i1:

; NOT-MM: subu $[[T0:[0-9]+]], $4, $5
; NOT-MM: sll $[[T0]], $[[T0]], 31
; NOT-MM: sra $2, $[[T0]], 31
; NOT-MM: andi $[[T0]], $[[T0]], 1
; NOT-MM: negu $2, $[[T0]]

; MM: subu16 $[[T0:[0-9]+]], $4, $5
; MM: sll $[[T1:[0-9]+]], $[[T0]], 31
; MM: sra $[[T0]], $[[T1]], 31
; MM: andi16 $[[T0]], $[[T0]], 1
; MM: li16 $[[T1:[0-9]+]], 0
; MM: subu16 $2, $[[T1]], $[[T0]]

%r = sub i1 %a, %b
ret i1 %r
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/Mips/llvm-ir/urem.ll
Expand Up @@ -42,30 +42,30 @@ entry:
; NOT-R6: divu $zero, $[[T1]], $[[T0]]
; NOT-R6: teq $[[T0]], $zero, 7
; NOT-R6: mfhi $[[T2:[0-9]+]]
; NOT-R6: sll $[[T3:[0-9]+]], $[[T2]], 31
; NOT-R6: sra $2, $[[T3]], 31
; NOT-R6: andi $[[T0]], $[[T0]], 1
; NOT-R6: negu $2, $[[T0]]

; R6: andi $[[T0:[0-9]+]], $5, 1
; R6: andi $[[T1:[0-9]+]], $4, 1
; R6: modu $[[T2:[0-9]+]], $[[T1]], $[[T0]]
; R6: teq $[[T0]], $zero, 7
; R6: sll $[[T3:[0-9]+]], $[[T2]], 31
; R6: sra $2, $[[T3]], 31
; R6: negu $2, $[[T2]]

; MMR3: andi16 $[[T0:[0-9]+]], $5, 1
; MMR3: andi16 $[[T1:[0-9]+]], $4, 1
; MMR3: divu $zero, $[[T1]], $[[T0]]
; MMR3: teq $[[T0]], $zero, 7
; MMR3: mfhi $[[T2:[0-9]+]]
; MMR3: sll $[[T3:[0-9]+]], $[[T2]], 31
; MMR3: sra $2, $[[T3]], 31
; MMR3: andi16 $[[T0]], $[[T0]], 1
; MMR3: li16 $[[T1:[0-9]+]], 0
; MMR3: subu16 $2, $[[T1]], $[[T0]]

; MMR6: andi16 $[[T0:[0-9]+]], $5, 1
; MMR6: andi16 $[[T1:[0-9]+]], $4, 1
; MMR6: modu $[[T2:[0-9]+]], $[[T1]], $[[T0]]
; MMR6: teq $[[T0]], $zero, 7
; MMR6: sll $[[T3:[0-9]+]], $[[T2]], 31
; MMR6: sra $2, $[[T3]], 31
; MMR6: li16 $[[T3:[0-9]+]], 0
; MMR6: subu16 $2, $[[T3]], $[[T2]]

%r = urem i1 %a, %b
ret i1 %r
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/Mips/select.ll
Expand Up @@ -140,9 +140,10 @@ entry:
; 32R2-DAG: mtc1 $6, $[[F1:f0]]
; 32R2: movn.s $[[F1]], $[[F0]], $4

; 32R6: sltu $[[T0:[0-9]+]], $zero, $4
; 32R6: negu $[[T0]], $[[T0]]
; 32R6-DAG: mtc1 $5, $[[F0:f[0-9]+]]
; 32R6-DAG: mtc1 $6, $[[F1:f[0-9]+]]
; 32R6: sltu $[[T0:[0-9]+]], $zero, $4
; 32R6: mtc1 $[[T0]], $[[CC:f0]]
; 32R6: sel.s $[[CC]], $[[F1]], $[[F0]]

Expand Down

0 comments on commit 3a3aaf6

Please sign in to comment.