[X86] Teach the isel optimization for (x << C1) op C2 to (x op (C2>>C…

…1)) << C1 to consider cases where C2>>C1 can fit an unsigned 32-bit immediate For 64-bit operations we should consider if the immediate can be made to fit in an unsigned 32-bits immedate. For OR/XOR this allows us to load the immediate with MOV32ri instead of movabsq. For AND this allows us to fold the immediate. Differential Revision: https://reviews.llvm.org/D59867 llvm-svn: 357196
llvm · Mar 28, 2019 · c25c9b4 · c25c9b4
1 parent fc5ddee
commit c25c9b4
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 30 deletions.
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -3501,39 +3501,45 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
       break;
 
     int64_t Val = Cst->getSExtValue();
-    uint64_t ShlVal = ShlCst->getZExtValue();
+    uint64_t ShAmt = ShlCst->getZExtValue();
 
     // Make sure that we don't change the operation by removing bits.
     // This only matters for OR and XOR, AND is unaffected.
-    uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
+    uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
     if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
       break;
 
-    MVT CstVT = NVT;
-
     // Check the minimum bitwidth for the new constant.
-    // TODO: AND32ri is the same as AND64ri32 with zext imm.
-    // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
     // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
-    if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
-      CstVT = MVT::i8;
-    else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
-      CstVT = MVT::i32;
+    auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
+      ShiftedVal = Val >> ShAmt;
+      if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
+          (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
+        return true;
+      // For 64-bit we can also try unsigned 32 bit immediates.
+      // AND32ri is the same as AND64ri32 with zext imm.
+      // MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
+      ShiftedVal = (uint64_t)Val >> ShAmt;
+      if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
+        return true;
+      return false;
+    };
 
-    // Bail if there is no smaller encoding.
-    if (NVT == CstVT)
-      break;
+    int64_t ShiftedVal;
+    if (CanShrinkImmediate(ShiftedVal)) {
+      SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
+      insertDAGNode(*CurDAG, SDValue(Node, 0), NewCst);
+      SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, N0->getOperand(0),
+                                         NewCst);
+      insertDAGNode(*CurDAG, SDValue(Node, 0), NewBinOp);
+      SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
+                                       N0->getOperand(1));
+      ReplaceNode(Node, NewSHL.getNode());
+      SelectCode(NewSHL.getNode());
+      return;
+    }
 
-    SDValue NewCst = CurDAG->getConstant(Val >> ShlVal, dl, NVT);
-    insertDAGNode(*CurDAG, SDValue(Node, 0), NewCst);
-    SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, N0->getOperand(0),
-                                       NewCst);
-    insertDAGNode(*CurDAG, SDValue(Node, 0), NewBinOp);
-    SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
-                                     N0->getOperand(1));
-    ReplaceNode(Node, NewSHL.getNode());
-    SelectCode(NewSHL.getNode());
-    return;
+    break;
   }
   case X86ISD::SMUL:
     // i16/i32/i64 are handled with isel patterns.

diff --git a/llvm/test/CodeGen/X86/narrow-shl-cst.ll b/llvm/test/CodeGen/X86/narrow-shl-cst.ll
@@ -166,9 +166,9 @@ define i64 @test13(i64 %x, i64* %y) nounwind {
 define i64 @test14(i64 %x, i64* %y) nounwind {
 ; CHECK-LABEL: test14:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shlq $8, %rdi
-; CHECK-NEXT:    movabsq $1095216660480, %rax # imm = 0xFF00000000
-; CHECK-NEXT:    andq %rdi, %rax
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    andl $-16777216, %eax # imm = 0xFF000000
+; CHECK-NEXT:    shlq $8, %rax
 ; CHECK-NEXT:    retq
   %and = shl i64 %x, 8
   %shl = and i64 %and, 1095216660480
@@ -178,9 +178,9 @@ define i64 @test14(i64 %x, i64* %y) nounwind {
 define i64 @test15(i64 %x, i64* %y) nounwind {
 ; CHECK-LABEL: test15:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shlq $8, %rdi
-; CHECK-NEXT:    movabsq $1095216660480, %rax # imm = 0xFF00000000
+; CHECK-NEXT:    movl $4278190080, %eax # imm = 0xFF000000
 ; CHECK-NEXT:    orq %rdi, %rax
+; CHECK-NEXT:    shlq $8, %rax
 ; CHECK-NEXT:    retq
   %or = shl i64 %x, 8
   %shl = or i64 %or, 1095216660480
@@ -190,9 +190,9 @@ define i64 @test15(i64 %x, i64* %y) nounwind {
 define i64 @test16(i64 %x, i64* %y) nounwind {
 ; CHECK-LABEL: test16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shlq $8, %rdi
-; CHECK-NEXT:    movabsq $1095216660480, %rax # imm = 0xFF00000000
+; CHECK-NEXT:    movl $4278190080, %eax # imm = 0xFF000000
 ; CHECK-NEXT:    xorq %rdi, %rax
+; CHECK-NEXT:    shlq $8, %rax
 ; CHECK-NEXT:    retq
   %xor = shl i64 %x, 8
   %shl = xor i64 %xor, 1095216660480