diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index ac804857be014..77d6e7e93fb00 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -5758,24 +5758,60 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, if (CV->getSplatValue()) { APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger()); - MachineInstr *NewOp; - bool Inv = false; - if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) || - (NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) || - (NewOp = - tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) || - (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) || - (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) || - (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder))) - return NewOp; + auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * { + MachineInstr *NewOp; + bool Inv = false; + if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) || + (NewOp = + tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) || + (NewOp = + tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) || + (NewOp = + tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) || + (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) || + (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder))) + return NewOp; + + DefBits = ~DefBits; + Inv = true; + if ((NewOp = + tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) || + (NewOp = + tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) || + (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv))) + return NewOp; + return nullptr; + }; - DefBits = ~DefBits; - Inv = true; - if ((NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) || - (NewOp = - tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) || - (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv))) + if (auto *NewOp = TryMOVIWithBits(DefBits)) return NewOp; + + // See if a fneg of the constant can be materialized with a MOVI, etc + auto TryWithFNeg = [&](APInt DefBits, int NumBits, + unsigned NegOpc) -> MachineInstr * { + // FNegate each sub-element of the constant + APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize); + APInt NegBits(DstSize, 0); + unsigned NumElts = DstSize / NumBits; + for (unsigned i = 0; i < NumElts; i++) + NegBits |= Neg << (NumBits * i); + NegBits = DefBits ^ NegBits; + + // Try to create the new constants with MOVI, and if so generate a fneg + // for it. + if (auto *NewOp = TryMOVIWithBits(NegBits)) { + Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass); + NewOp->getOperand(0).setReg(NewDst); + return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst}); + } + return nullptr; + }; + MachineInstr *R; + if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) || + (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) || + (STI.hasFullFP16() && + (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16)))) + return R; } auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder); diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll index 7effdc97993c1..170ba7292ae60 100644 --- a/llvm/test/CodeGen/AArch64/neon-mov.ll +++ b/llvm/test/CodeGen/AArch64/neon-mov.ll @@ -123,14 +123,14 @@ define <4 x i32> @movi4s_fneg() { ; ; CHECK-NOFP16-GI-LABEL: movi4s_fneg: ; CHECK-NOFP16-GI: // %bb.0: -; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI13_0 -; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0] +; CHECK-NOFP16-GI-NEXT: movi v0.4s, #240, lsl #8 +; CHECK-NOFP16-GI-NEXT: fneg v0.4s, v0.4s ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: movi4s_fneg: ; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: adrp x8, .LCPI13_0 -; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0] +; CHECK-FP16-GI-NEXT: movi v0.4s, #240, lsl #8 +; CHECK-FP16-GI-NEXT: fneg v0.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret ret <4 x i32> } @@ -322,8 +322,8 @@ define <8 x i16> @mvni8h_neg() { ; ; CHECK-FP16-GI-LABEL: mvni8h_neg: ; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: adrp x8, .LCPI32_0 -; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI32_0] +; CHECK-FP16-GI-NEXT: movi v0.8h, #240 +; CHECK-FP16-GI-NEXT: fneg v0.8h, v0.8h ; CHECK-FP16-GI-NEXT: ret ret <8 x i16> } @@ -508,14 +508,14 @@ define <2 x double> @fmov2d_neg0() { ; ; CHECK-NOFP16-GI-LABEL: fmov2d_neg0: ; CHECK-NOFP16-GI: // %bb.0: -; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI51_0 -; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI51_0] +; CHECK-NOFP16-GI-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NOFP16-GI-NEXT: fneg v0.2d, v0.2d ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: fmov2d_neg0: ; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: adrp x8, .LCPI51_0 -; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI51_0] +; CHECK-FP16-GI-NEXT: movi v0.2d, #0000000000000000 +; CHECK-FP16-GI-NEXT: fneg v0.2d, v0.2d ; CHECK-FP16-GI-NEXT: ret ret <2 x double> }