diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 14b0f9a564e01..394024693194c 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -5666,6 +5666,9 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, MachineRegisterInfo &MRI) { LLT DstTy = MRI.getType(Dst); unsigned DstSize = DstTy.getSizeInBits(); + assert((DstSize == 64 || DstSize == 128) && + "Unexpected vector constant size"); + if (CV->isNullValue()) { if (DstSize == 128) { auto Mov = @@ -5735,17 +5738,24 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, // Try to create the new constants with MOVI, and if so generate a fneg // for it. if (auto *NewOp = TryMOVIWithBits(NegBits)) { - Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass); + Register NewDst = MRI.createVirtualRegister( + DstSize == 64 ? &AArch64::FPR64RegClass : &AArch64::FPR128RegClass); NewOp->getOperand(0).setReg(NewDst); return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst}); } return nullptr; }; MachineInstr *R; - if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) || - (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) || + if ((R = TryWithFNeg(DefBits, 32, + DstSize == 64 ? AArch64::FNEGv2f32 + : AArch64::FNEGv4f32)) || + (R = TryWithFNeg(DefBits, 64, + DstSize == 64 ? AArch64::FNEGDr + : AArch64::FNEGv2f64)) || (STI.hasFullFP16() && - (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16)))) + (R = TryWithFNeg(DefBits, 16, + DstSize == 64 ? AArch64::FNEGv4f16 + : AArch64::FNEGv8f16)))) return R; } diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll index 5be9394f61b30..4f657865e9f05 100644 --- a/llvm/test/CodeGen/AArch64/neon-mov.ll +++ b/llvm/test/CodeGen/AArch64/neon-mov.ll @@ -76,6 +76,15 @@ define <2 x i32> @movi2s_lsl16() { ret <2 x i32> } +define <2 x i32> @movi2s_fneg() { +; CHECK-LABEL: movi2s_fneg: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2s, #240, lsl #8 +; CHECK-NEXT: fneg v0.2s, v0.2s +; CHECK-NEXT: ret + ret <2 x i32> +} + define <2 x i32> @movi2s_lsl24() { ; CHECK-LABEL: movi2s_lsl24: ; CHECK: // %bb.0: @@ -149,6 +158,33 @@ define <4 x i16> @movi4h_lsl8() { ret <4 x i16> } +define <4 x i16> @movi4h_fneg() { +; CHECK-NOFP16-SD-LABEL: movi4h_fneg: +; CHECK-NOFP16-SD: // %bb.0: +; CHECK-NOFP16-SD-NEXT: movi v0.4h, #127, lsl #8 +; CHECK-NOFP16-SD-NEXT: fneg v0.2s, v0.2s +; CHECK-NOFP16-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: movi4h_fneg: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: movi v0.4h, #127, lsl #8 +; CHECK-FP16-SD-NEXT: fneg v0.2s, v0.2s +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-NOFP16-GI-LABEL: movi4h_fneg: +; CHECK-NOFP16-GI: // %bb.0: +; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI18_0 +; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI18_0] +; CHECK-NOFP16-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: movi4h_fneg: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: adrp x8, .LCPI18_0 +; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI18_0] +; CHECK-FP16-GI-NEXT: ret + ret <4 x i16> +} + define <8 x i16> @movi8h_lsl0() { ; CHECK-LABEL: movi8h_lsl0: ; CHECK: // %bb.0: @@ -180,14 +216,14 @@ define <8 x i16> @movi8h_fneg() { ; ; CHECK-NOFP16-GI-LABEL: movi8h_fneg: ; CHECK-NOFP16-GI: // %bb.0: -; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI19_0 -; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI19_0] +; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI21_0 +; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI21_0] ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: movi8h_fneg: ; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: adrp x8, .LCPI19_0 -; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI19_0] +; CHECK-FP16-GI-NEXT: adrp x8, .LCPI21_0 +; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI21_0] ; CHECK-FP16-GI-NEXT: ret ret <8 x i16> } @@ -275,6 +311,27 @@ define <4 x i16> @mvni4h_lsl8() { ret <4 x i16> } +define <4 x i16> @mvni4h_neg() { +; CHECK-NOFP16-SD-LABEL: mvni4h_neg: +; CHECK-NOFP16-SD: // %bb.0: +; CHECK-NOFP16-SD-NEXT: mov w8, #33008 // =0x80f0 +; CHECK-NOFP16-SD-NEXT: dup v0.4h, w8 +; CHECK-NOFP16-SD-NEXT: ret +; +; CHECK-FP16-LABEL: mvni4h_neg: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: movi v0.4h, #240 +; CHECK-FP16-NEXT: fneg v0.4h, v0.4h +; CHECK-FP16-NEXT: ret +; +; CHECK-NOFP16-GI-LABEL: mvni4h_neg: +; CHECK-NOFP16-GI: // %bb.0: +; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI32_0 +; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI32_0] +; CHECK-NOFP16-GI-NEXT: ret + ret <4 x i16> +} + define <8 x i16> @mvni8h_lsl0() { ; CHECK-LABEL: mvni8h_lsl0: ; CHECK: // %bb.0: @@ -306,8 +363,8 @@ define <8 x i16> @mvni8h_neg() { ; ; CHECK-NOFP16-GI-LABEL: mvni8h_neg: ; CHECK-NOFP16-GI: // %bb.0: -; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI32_0 -; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI32_0] +; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI35_0 +; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI35_0] ; CHECK-NOFP16-GI-NEXT: ret ret <8 x i16> } @@ -486,6 +543,33 @@ define <2 x double> @fmov2d_neg0() { ret <2 x double> } +define <1 x double> @fmov1d_neg0() { +; CHECK-NOFP16-SD-LABEL: fmov1d_neg0: +; CHECK-NOFP16-SD: // %bb.0: +; CHECK-NOFP16-SD-NEXT: movi d0, #0000000000000000 +; CHECK-NOFP16-SD-NEXT: fneg d0, d0 +; CHECK-NOFP16-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: fmov1d_neg0: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: movi d0, #0000000000000000 +; CHECK-FP16-SD-NEXT: fneg d0, d0 +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-NOFP16-GI-LABEL: fmov1d_neg0: +; CHECK-NOFP16-GI: // %bb.0: +; CHECK-NOFP16-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NOFP16-GI-NEXT: fmov d0, x8 +; CHECK-NOFP16-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: fmov1d_neg0: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-FP16-GI-NEXT: fmov d0, x8 +; CHECK-FP16-GI-NEXT: ret + ret <1 x double> +} + define <2 x i32> @movi1d_1() { ; CHECK-NOFP16-SD-LABEL: movi1d_1: ; CHECK-NOFP16-SD: // %bb.0: @@ -499,14 +583,14 @@ define <2 x i32> @movi1d_1() { ; ; CHECK-NOFP16-GI-LABEL: movi1d_1: ; CHECK-NOFP16-GI: // %bb.0: -; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI52_0 -; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI52_0] +; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI56_0 +; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI56_0] ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: movi1d_1: ; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: adrp x8, .LCPI52_0 -; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI52_0] +; CHECK-FP16-GI-NEXT: adrp x8, .LCPI56_0 +; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI56_0] ; CHECK-FP16-GI-NEXT: ret ret <2 x i32> } @@ -517,31 +601,31 @@ define <2 x i32> @movi1d() { ; CHECK-NOFP16-SD-LABEL: movi1d: ; CHECK-NOFP16-SD: // %bb.0: ; CHECK-NOFP16-SD-NEXT: movi d1, #0x00ffffffff0000 -; CHECK-NOFP16-SD-NEXT: adrp x8, .LCPI53_0 -; CHECK-NOFP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI53_0] +; CHECK-NOFP16-SD-NEXT: adrp x8, .LCPI57_0 +; CHECK-NOFP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI57_0] ; CHECK-NOFP16-SD-NEXT: b test_movi1d ; ; CHECK-FP16-SD-LABEL: movi1d: ; CHECK-FP16-SD: // %bb.0: ; CHECK-FP16-SD-NEXT: movi d1, #0x00ffffffff0000 -; CHECK-FP16-SD-NEXT: adrp x8, .LCPI53_0 -; CHECK-FP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI53_0] +; CHECK-FP16-SD-NEXT: adrp x8, .LCPI57_0 +; CHECK-FP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI57_0] ; CHECK-FP16-SD-NEXT: b test_movi1d ; ; CHECK-NOFP16-GI-LABEL: movi1d: ; CHECK-NOFP16-GI: // %bb.0: -; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI53_1 -; CHECK-NOFP16-GI-NEXT: adrp x9, .LCPI53_0 -; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI53_1] -; CHECK-NOFP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI53_0] +; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI57_1 +; CHECK-NOFP16-GI-NEXT: adrp x9, .LCPI57_0 +; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI57_1] +; CHECK-NOFP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI57_0] ; CHECK-NOFP16-GI-NEXT: b test_movi1d ; ; CHECK-FP16-GI-LABEL: movi1d: ; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: adrp x8, .LCPI53_1 -; CHECK-FP16-GI-NEXT: adrp x9, .LCPI53_0 -; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI53_1] -; CHECK-FP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI53_0] +; CHECK-FP16-GI-NEXT: adrp x8, .LCPI57_1 +; CHECK-FP16-GI-NEXT: adrp x9, .LCPI57_0 +; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI57_1] +; CHECK-FP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI57_0] ; CHECK-FP16-GI-NEXT: b test_movi1d %1 = tail call <2 x i32> @test_movi1d(<2 x i32> , <2 x i32> ) ret <2 x i32> %1