diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index f0e250c9fcbce1..0624dd394275a7 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1652,6 +1652,10 @@ class TargetLoweringBase { return true; } + /// Return true (the default) if it is profitable to remove a sext_inreg(x) + /// where the sext is redundant, and use x directly. + virtual bool shouldRemoveRedundantExtend(SDValue Op) const { return true; } + /// When splitting a value of the specified type into parts, does the Lo /// or Hi part come first? This usually follows the endianness, except /// for ppcf128, where the Hi part always comes first. diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 49b127027bc9f3..e5f16c9acf1879 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -804,7 +804,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( SDValue Op0 = Op.getOperand(0); EVT ExVT = cast(Op.getOperand(1))->getVT(); unsigned ExBits = ExVT.getScalarSizeInBits(); - if (DemandedBits.getActiveBits() <= ExBits) + if (DemandedBits.getActiveBits() <= ExBits && + shouldRemoveRedundantExtend(Op)) return Op0; // If the input is already sign extended, just drop the extension. unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index ad7b67a7b8507e..d8b692639507e4 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13655,6 +13655,22 @@ bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load, return true; } +// Treat a sext_inreg(extract(..)) as free if it has multiple uses. +bool AArch64TargetLowering::shouldRemoveRedundantExtend(SDValue Extend) const { + EVT VT = Extend.getValueType(); + if ((VT == MVT::i64 || VT == MVT::i32) && Extend->use_size()) { + SDValue Extract = Extend.getOperand(0); + if (Extract.getOpcode() == ISD::ANY_EXTEND && Extract.hasOneUse()) + Extract = Extract.getOperand(0); + if (Extract.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Extract.hasOneUse()) { + EVT VecVT = Extract.getOperand(0).getValueType(); + if (VecVT.getScalarType() == MVT::i8 || VecVT.getScalarType() == MVT::i16) + return false; + } + } + return true; +} + // Truncations from 64-bit GPR to 32-bit GPR is free. bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 891d52f1d1004f..a878b2b089473f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -618,6 +618,8 @@ class AArch64TargetLowering : public TargetLowering { bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const override; + bool shouldRemoveRedundantExtend(SDValue Op) const override; + bool isTruncateFree(Type *Ty1, Type *Ty2) const override; bool isTruncateFree(EVT VT1, EVT VT2) const override; diff --git a/llvm/test/CodeGen/AArch64/extract-sext-zext.ll b/llvm/test/CodeGen/AArch64/extract-sext-zext.ll index 278a6e3becd9e4..f566ebb4f20892 100644 --- a/llvm/test/CodeGen/AArch64/extract-sext-zext.ll +++ b/llvm/test/CodeGen/AArch64/extract-sext-zext.ll @@ -371,18 +371,11 @@ define i32 @both_i16i32(<8 x i16> %x) { } define i32 @redundant_i16i32(<8 x i16> %x) { -; CHECK-ISEL-LABEL: redundant_i16i32: -; CHECK-ISEL: // %bb.0: -; CHECK-ISEL-NEXT: umov w8, v0.h[2] -; CHECK-ISEL-NEXT: smov w9, v0.h[2] -; CHECK-ISEL-NEXT: eor w0, w9, w8, lsl #16 -; CHECK-ISEL-NEXT: ret -; -; CHECK-GLOBAL-LABEL: redundant_i16i32: -; CHECK-GLOBAL: // %bb.0: -; CHECK-GLOBAL-NEXT: smov w8, v0.h[2] -; CHECK-GLOBAL-NEXT: eor w0, w8, w8, lsl #16 -; CHECK-GLOBAL-NEXT: ret +; CHECK-LABEL: redundant_i16i32: +; CHECK: // %bb.0: +; CHECK-NEXT: smov w8, v0.h[2] +; CHECK-NEXT: eor w0, w8, w8, lsl #16 +; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i64 2 %s = sext i16 %e to i32 %t = shl i32 %s, 16 @@ -406,20 +399,12 @@ define i32 @both_i8i32(<8 x i8> %x) { } define i32 @redundant_i8i32(<8 x i8> %x) { -; CHECK-ISEL-LABEL: redundant_i8i32: -; CHECK-ISEL: // %bb.0: -; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-ISEL-NEXT: umov w8, v0.b[2] -; CHECK-ISEL-NEXT: smov w9, v0.b[2] -; CHECK-ISEL-NEXT: eor w0, w9, w8, lsl #24 -; CHECK-ISEL-NEXT: ret -; -; CHECK-GLOBAL-LABEL: redundant_i8i32: -; CHECK-GLOBAL: // %bb.0: -; CHECK-GLOBAL-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GLOBAL-NEXT: smov w8, v0.b[2] -; CHECK-GLOBAL-NEXT: eor w0, w8, w8, lsl #24 -; CHECK-GLOBAL-NEXT: ret +; CHECK-LABEL: redundant_i8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: smov w8, v0.b[2] +; CHECK-NEXT: eor w0, w8, w8, lsl #24 +; CHECK-NEXT: ret %e = extractelement <8 x i8> %x, i64 2 %s = sext i8 %e to i32 %t = shl i32 %s, 24 @@ -469,18 +454,11 @@ define i64 @both_i16i64(<8 x i16> %x) { } define i64 @redundant_i16i64(<8 x i16> %x) { -; CHECK-ISEL-LABEL: redundant_i16i64: -; CHECK-ISEL: // %bb.0: -; CHECK-ISEL-NEXT: umov w8, v0.h[2] -; CHECK-ISEL-NEXT: smov x9, v0.h[2] -; CHECK-ISEL-NEXT: eor x0, x9, x8, lsl #48 -; CHECK-ISEL-NEXT: ret -; -; CHECK-GLOBAL-LABEL: redundant_i16i64: -; CHECK-GLOBAL: // %bb.0: -; CHECK-GLOBAL-NEXT: smov x8, v0.h[2] -; CHECK-GLOBAL-NEXT: eor x0, x8, x8, lsl #48 -; CHECK-GLOBAL-NEXT: ret +; CHECK-LABEL: redundant_i16i64: +; CHECK: // %bb.0: +; CHECK-NEXT: smov x8, v0.h[2] +; CHECK-NEXT: eor x0, x8, x8, lsl #48 +; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i64 2 %s = sext i16 %e to i64 %t = shl i64 %s, 48 @@ -504,20 +482,12 @@ define i64 @both_i8i64(<8 x i8> %x) { } define i64 @redundant_i8i64(<8 x i8> %x) { -; CHECK-ISEL-LABEL: redundant_i8i64: -; CHECK-ISEL: // %bb.0: -; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-ISEL-NEXT: umov w8, v0.b[2] -; CHECK-ISEL-NEXT: smov x9, v0.b[2] -; CHECK-ISEL-NEXT: eor x0, x9, x8, lsl #56 -; CHECK-ISEL-NEXT: ret -; -; CHECK-GLOBAL-LABEL: redundant_i8i64: -; CHECK-GLOBAL: // %bb.0: -; CHECK-GLOBAL-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GLOBAL-NEXT: smov x8, v0.b[2] -; CHECK-GLOBAL-NEXT: eor x0, x8, x8, lsl #56 -; CHECK-GLOBAL-NEXT: ret +; CHECK-LABEL: redundant_i8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: smov x8, v0.b[2] +; CHECK-NEXT: eor x0, x8, x8, lsl #56 +; CHECK-NEXT: ret %e = extractelement <8 x i8> %x, i64 2 %s = sext i8 %e to i64 %t = shl i64 %s, 56