diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index a4dc097446186..980d75ad91a0d 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1804,6 +1804,13 @@ class TargetLoweringBase { /// where the sext is redundant, and use x directly. virtual bool shouldRemoveRedundantExtend(SDValue Op) const { return true; } + /// Indicates if any padding is guaranteed to go at the most significant bits + /// when storing the type to memory and the type size isn't equal to the store + /// size. + bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const { + return VT.isScalarInteger() && !VT.isByteSized(); + } + /// When splitting a value of the specified type into parts, does the Lo /// or Hi part come first? This usually follows the endianness, except /// for ppcf128, where the Hi part always comes first. diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 409d66adfd67d..4939976b8e287 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4621,7 +4621,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, LoadSDNode *Lod = cast(N0.getOperand(0)); APInt bestMask; unsigned bestWidth = 0, bestOffset = 0; - if (Lod->isSimple() && Lod->isUnindexed()) { + if (Lod->isSimple() && Lod->isUnindexed() && + (Lod->getMemoryVT().isByteSized() || + isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) { + unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits(); unsigned origWidth = N0.getValueSizeInBits(); unsigned maskWidth = origWidth; // We can narrow (e.g.) 16-bit extending loads on 32-bit target to @@ -4629,40 +4632,51 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Lod->getExtensionType() != ISD::NON_EXTLOAD) origWidth = Lod->getMemoryVT().getSizeInBits(); const APInt &Mask = N0.getConstantOperandAPInt(1); - for (unsigned width = origWidth / 2; width>=8; width /= 2) { + // Only consider power-of-2 widths (and at least one byte) as candiates + // for the narrowed load. + for (unsigned width = 8; width < origWidth; width *= 2) { + EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width); + if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) + continue; APInt newMask = APInt::getLowBitsSet(maskWidth, width); - for (unsigned offset=0; offsetgetAlign(), ptrOffset / 8); + if (allowsMemoryAccess( + *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(), + NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) && + IsFast) { + bestOffset = ptrOffset / 8; + bestMask = Mask.lshr(offset); + bestWidth = width; + break; + } } - newMask <<= width; + newMask <<= 8; } + if (bestWidth) + break; } } if (bestWidth) { EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth); - if (newVT.isRound() && - shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) { - SDValue Ptr = Lod->getBasePtr(); - if (bestOffset != 0) - Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(bestOffset), - dl); - SDValue NewLoad = - DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, - Lod->getPointerInfo().getWithOffset(bestOffset), - Lod->getOriginalAlign()); - return DAG.getSetCC(dl, VT, - DAG.getNode(ISD::AND, dl, newVT, NewLoad, - DAG.getConstant(bestMask.trunc(bestWidth), - dl, newVT)), - DAG.getConstant(0LL, dl, newVT), Cond); - } + SDValue Ptr = Lod->getBasePtr(); + if (bestOffset != 0) + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset)); + SDValue NewLoad = + DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, + Lod->getPointerInfo().getWithOffset(bestOffset), + Lod->getOriginalAlign()); + SDValue And = + DAG.getNode(ISD::AND, dl, newVT, NewLoad, + DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT)); + return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond); } } diff --git a/llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll b/llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll index 2cba4b46f9a8a..838da59f9e412 100644 --- a/llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll +++ b/llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll @@ -40,7 +40,9 @@ define i1 @test_129_15_0(ptr %y) { ; ; CHECK-BE-LABEL: test_129_15_0: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: ldrh r0, [r0, #14] +; CHECK-BE-NEXT: ldr r1, [r0, #12] +; CHECK-BE-NEXT: ldrb r0, [r0, #16] +; CHECK-BE-NEXT: orr r0, r0, r1, lsl #8 ; CHECK-BE-NEXT: mov r1, #255 ; CHECK-BE-NEXT: orr r1, r1, #32512 ; CHECK-BE-NEXT: ands r0, r0, r1 @@ -49,7 +51,7 @@ define i1 @test_129_15_0(ptr %y) { ; ; CHECK-V7-BE-LABEL: test_129_15_0: ; CHECK-V7-BE: @ %bb.0: -; CHECK-V7-BE-NEXT: ldrh r0, [r0, #14] +; CHECK-V7-BE-NEXT: ldrh r0, [r0, #15] ; CHECK-V7-BE-NEXT: bfc r0, #15, #17 ; CHECK-V7-BE-NEXT: cmp r0, #0 ; CHECK-V7-BE-NEXT: movwne r0, #1 @@ -119,14 +121,14 @@ define i1 @test_33_8_0(ptr %y) { ; ; CHECK-BE-LABEL: test_33_8_0: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: ldrb r0, [r0, #3] +; CHECK-BE-NEXT: ldrb r0, [r0, #4] ; CHECK-BE-NEXT: cmp r0, #0 ; CHECK-BE-NEXT: movne r0, #1 ; CHECK-BE-NEXT: mov pc, lr ; ; CHECK-V7-BE-LABEL: test_33_8_0: ; CHECK-V7-BE: @ %bb.0: -; CHECK-V7-BE-NEXT: ldrb r0, [r0, #3] +; CHECK-V7-BE-NEXT: ldrb r0, [r0, #4] ; CHECK-V7-BE-NEXT: cmp r0, #0 ; CHECK-V7-BE-NEXT: movwne r0, #1 ; CHECK-V7-BE-NEXT: bx lr @@ -179,13 +181,13 @@ define i1 @test_33_1_31(ptr %y) { ; ; CHECK-BE-LABEL: test_33_1_31: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: ldrb r0, [r0] +; CHECK-BE-NEXT: ldrb r0, [r0, #1] ; CHECK-BE-NEXT: lsr r0, r0, #7 ; CHECK-BE-NEXT: mov pc, lr ; ; CHECK-V7-BE-LABEL: test_33_1_31: ; CHECK-V7-BE: @ %bb.0: -; CHECK-V7-BE-NEXT: ldrb r0, [r0] +; CHECK-V7-BE-NEXT: ldrb r0, [r0, #1] ; CHECK-V7-BE-NEXT: lsr r0, r0, #7 ; CHECK-V7-BE-NEXT: bx lr %a = load i33, ptr %y @@ -209,13 +211,13 @@ define i1 @test_33_1_0(ptr %y) { ; ; CHECK-BE-LABEL: test_33_1_0: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: ldrb r0, [r0, #3] +; CHECK-BE-NEXT: ldrb r0, [r0, #4] ; CHECK-BE-NEXT: and r0, r0, #1 ; CHECK-BE-NEXT: mov pc, lr ; ; CHECK-V7-BE-LABEL: test_33_1_0: ; CHECK-V7-BE: @ %bb.0: -; CHECK-V7-BE-NEXT: ldrb r0, [r0, #3] +; CHECK-V7-BE-NEXT: ldrb r0, [r0, #4] ; CHECK-V7-BE-NEXT: and r0, r0, #1 ; CHECK-V7-BE-NEXT: bx lr %a = load i33, ptr %y @@ -309,7 +311,7 @@ define i1 @test_48_16_8(ptr %y) { ; CHECK-LE-LABEL: test_48_16_8: ; CHECK-LE: @ %bb.0: ; CHECK-LE-NEXT: ldrh r0, [r0, #1] -; CHECK-LE-NEXT: cmp r0, #0 +; CHECK-LE-NEXT: lsls r0, r0, #8 ; CHECK-LE-NEXT: movne r0, #1 ; CHECK-LE-NEXT: mov pc, lr ; @@ -444,9 +446,7 @@ define i1 @test_48_17_0(ptr %y) { ; ; CHECK-V7-BE-LABEL: test_48_17_0: ; CHECK-V7-BE: @ %bb.0: -; CHECK-V7-BE-NEXT: ldr r1, [r0] -; CHECK-V7-BE-NEXT: ldrh r0, [r0, #4] -; CHECK-V7-BE-NEXT: orr r0, r0, r1, lsl #16 +; CHECK-V7-BE-NEXT: ldr r0, [r0, #2] ; CHECK-V7-BE-NEXT: bfc r0, #17, #15 ; CHECK-V7-BE-NEXT: cmp r0, #0 ; CHECK-V7-BE-NEXT: movwne r0, #1 @@ -506,15 +506,14 @@ define i1 @test_40_1_32(ptr %y) { ; ; CHECK-BE-LABEL: test_40_1_32: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: ldr r0, [r0] -; CHECK-BE-NEXT: mov r1, #1 -; CHECK-BE-NEXT: and r0, r1, r0, lsr #24 +; CHECK-BE-NEXT: ldrb r0, [r0] +; CHECK-BE-NEXT: and r0, r0, #1 ; CHECK-BE-NEXT: mov pc, lr ; ; CHECK-V7-BE-LABEL: test_40_1_32: ; CHECK-V7-BE: @ %bb.0: -; CHECK-V7-BE-NEXT: ldr r0, [r0] -; CHECK-V7-BE-NEXT: ubfx r0, r0, #24, #1 +; CHECK-V7-BE-NEXT: ldrb r0, [r0] +; CHECK-V7-BE-NEXT: and r0, r0, #1 ; CHECK-V7-BE-NEXT: bx lr %a = load i40, ptr %y %b = and i40 %a, u0x100000000 diff --git a/llvm/test/CodeGen/PowerPC/simplifysetcc_narrow_load.ll b/llvm/test/CodeGen/PowerPC/simplifysetcc_narrow_load.ll index 1a03423fe6aec..49b8e2bc2f7b4 100644 --- a/llvm/test/CodeGen/PowerPC/simplifysetcc_narrow_load.ll +++ b/llvm/test/CodeGen/PowerPC/simplifysetcc_narrow_load.ll @@ -26,7 +26,7 @@ define i1 @test_129_15_0(ptr %y) { ; ; CHECK-BE-LABEL: test_129_15_0: ; CHECK-BE: # %bb.0: -; CHECK-BE-NEXT: lhz 3, 14(3) +; CHECK-BE-NEXT: lhz 3, 15(3) ; CHECK-BE-NEXT: clrlwi 3, 3, 17 ; CHECK-BE-NEXT: addic 4, 3, -1 ; CHECK-BE-NEXT: subfe 3, 4, 3 @@ -69,7 +69,7 @@ define i1 @test_33_8_0(ptr %y) { ; ; CHECK-BE-LABEL: test_33_8_0: ; CHECK-BE: # %bb.0: -; CHECK-BE-NEXT: lbz 3, 3(3) +; CHECK-BE-NEXT: lbz 3, 4(3) ; CHECK-BE-NEXT: addic 4, 3, -1 ; CHECK-BE-NEXT: subfe 3, 4, 3 ; CHECK-BE-NEXT: blr @@ -105,7 +105,7 @@ define i1 @test_33_1_31(ptr %y) { ; ; CHECK-BE-LABEL: test_33_1_31: ; CHECK-BE: # %bb.0: -; CHECK-BE-NEXT: lbz 3, 0(3) +; CHECK-BE-NEXT: lbz 3, 1(3) ; CHECK-BE-NEXT: srwi 3, 3, 7 ; CHECK-BE-NEXT: blr %a = load i33, ptr %y @@ -123,7 +123,7 @@ define i1 @test_33_1_0(ptr %y) { ; ; CHECK-BE-LABEL: test_33_1_0: ; CHECK-BE: # %bb.0: -; CHECK-BE-NEXT: lbz 3, 3(3) +; CHECK-BE-NEXT: lbz 3, 4(3) ; CHECK-BE-NEXT: clrlwi 3, 3, 31 ; CHECK-BE-NEXT: blr %a = load i33, ptr %y @@ -250,12 +250,10 @@ define i1 @test_48_17_0(ptr %y) { ; ; CHECK-BE-LABEL: test_48_17_0: ; CHECK-BE: # %bb.0: -; CHECK-BE-NEXT: lhz 4, 4(3) -; CHECK-BE-NEXT: lwz 3, 0(3) -; CHECK-BE-NEXT: clrlwi 4, 4, 16 -; CHECK-BE-NEXT: rlwimi 4, 3, 16, 15, 15 -; CHECK-BE-NEXT: addic 3, 4, -1 -; CHECK-BE-NEXT: subfe 3, 3, 4 +; CHECK-BE-NEXT: lwz 3, 2(3) +; CHECK-BE-NEXT: clrlwi 3, 3, 15 +; CHECK-BE-NEXT: addic 4, 3, -1 +; CHECK-BE-NEXT: subfe 3, 4, 3 ; CHECK-BE-NEXT: blr %a = load i48, ptr %y %b = and i48 %a, u0x1ffff @@ -292,8 +290,8 @@ define i1 @test_40_1_32(ptr %y) { ; ; CHECK-BE-LABEL: test_40_1_32: ; CHECK-BE: # %bb.0: -; CHECK-BE-NEXT: lwz 3, 0(3) -; CHECK-BE-NEXT: rlwinm 3, 3, 8, 31, 31 +; CHECK-BE-NEXT: lbz 3, 0(3) +; CHECK-BE-NEXT: clrlwi 3, 3, 31 ; CHECK-BE-NEXT: blr %a = load i40, ptr %y %b = and i40 %a, u0x100000000 @@ -325,7 +323,6 @@ define i1 @test_24_8_8(ptr %y) { ; CHECK-LE-LABEL: test_24_8_8: ; CHECK-LE: # %bb.0: ; CHECK-LE-NEXT: lbz 3, 1(3) -; CHECK-LE-NEXT: slwi 3, 3, 8 ; CHECK-LE-NEXT: addic 4, 3, -1 ; CHECK-LE-NEXT: subfe 3, 4, 3 ; CHECK-LE-NEXT: blr @@ -333,7 +330,6 @@ define i1 @test_24_8_8(ptr %y) { ; CHECK-BE-LABEL: test_24_8_8: ; CHECK-BE: # %bb.0: ; CHECK-BE-NEXT: lbz 3, 1(3) -; CHECK-BE-NEXT: slwi 3, 3, 8 ; CHECK-BE-NEXT: addic 4, 3, -1 ; CHECK-BE-NEXT: subfe 3, 4, 3 ; CHECK-BE-NEXT: blr @@ -346,18 +342,16 @@ define i1 @test_24_8_8(ptr %y) { define i1 @test_24_8_12(ptr %y) { ; CHECK-LE-LABEL: test_24_8_12: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: lhz 4, 0(3) -; CHECK-LE-NEXT: lbz 3, 2(3) -; CHECK-LE-NEXT: rlwinm 4, 4, 0, 16, 19 -; CHECK-LE-NEXT: rlwimi 4, 3, 16, 12, 15 -; CHECK-LE-NEXT: addic 3, 4, -1 -; CHECK-LE-NEXT: subfe 3, 3, 4 +; CHECK-LE-NEXT: lhz 3, 1(3) +; CHECK-LE-NEXT: rlwinm 3, 3, 0, 20, 27 +; CHECK-LE-NEXT: addic 4, 3, -1 +; CHECK-LE-NEXT: subfe 3, 4, 3 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test_24_8_12: ; CHECK-BE: # %bb.0: ; CHECK-BE-NEXT: lhz 3, 0(3) -; CHECK-BE-NEXT: rlwinm 3, 3, 8, 12, 19 +; CHECK-BE-NEXT: rlwinm 3, 3, 0, 20, 27 ; CHECK-BE-NEXT: addic 4, 3, -1 ; CHECK-BE-NEXT: subfe 3, 4, 3 ; CHECK-BE-NEXT: blr @@ -371,7 +365,6 @@ define i1 @test_24_8_16(ptr %y) { ; CHECK-LE-LABEL: test_24_8_16: ; CHECK-LE: # %bb.0: ; CHECK-LE-NEXT: lbz 3, 2(3) -; CHECK-LE-NEXT: slwi 3, 3, 16 ; CHECK-LE-NEXT: addic 4, 3, -1 ; CHECK-LE-NEXT: subfe 3, 4, 3 ; CHECK-LE-NEXT: blr @@ -379,7 +372,6 @@ define i1 @test_24_8_16(ptr %y) { ; CHECK-BE-LABEL: test_24_8_16: ; CHECK-BE: # %bb.0: ; CHECK-BE-NEXT: lbz 3, 0(3) -; CHECK-BE-NEXT: slwi 3, 3, 16 ; CHECK-BE-NEXT: addic 4, 3, -1 ; CHECK-BE-NEXT: subfe 3, 4, 3 ; CHECK-BE-NEXT: blr