diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp index 98016271a9d00..2530daf6bb9dc 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp @@ -239,6 +239,71 @@ static bool trySequenceOfOnes(uint64_t UImm, return true; } +// Attempt to expand 64-bit immediate values that consist of shifted negated +// components such as 0x1234'5678'edcb'a987, where the upper half is the +// negation of the lower half. Immediates of this form can generally be +// expanded via a sequence of MOVN+MOVK to expand the lower half, followed by +// an EOR or EON to shift and negate the result to the upper half, for example: +// mov x0, #-22137 // =0xffffffffffffa987 +// movk x0, #60875, lsl #16 // =0xffffffffedcba987 +// eor x0, x0, x0, lsl #32 // =0xffffffffedcba987 ^ 0xedcba98700000000 +// =0x12345678edcba987. +// The logic extends to other shift amounts in the range [17, 48) (outside that +// range we get runs of ones/zeros that are optimised separately). +// +// When the lower half contains a 16-bit chunk of ones, such as +// 0x0000'5678'ffff'a987, the intermediate MOVK is redundant. +// Similarly, when it contains a 16-bit chunk of zeros, such as +// 0xffff'5678'0000'a987, the expansion can instead be effected by expanding +// the negation of the lower half and negating the result with an EON, e.g.: +// mov x0, #-43400 // =0xffffffffffff5678 +// eon x0, x0, x0, lsl #32 // =0xffffffffffff5678 ^ ~0xffff567800000000 +// =0xffffffffffff5678 ^ 0x0000a987ffffffff +// =0xffff56780000a987. +// In any of these cases, the expansion with EOR/EON saves an instruction +// compared to the default expansion based on MOV and MOVKs. +static bool tryCopyWithNegation(uint64_t Imm, bool AllowThreeSequence, + SmallVectorImpl &Insn) { + // Degenerate cases where Imm is a run of ones should be handled separately. + if (!Imm || llvm::isShiftedMask_64(Imm)) + return false; + + const unsigned Mask = 0xffff; + + auto tryExpansion = [&](unsigned Opc, uint64_t C, unsigned N) { + assert((C >> 32) == 0xffffffffULL && "Invalid immediate"); + const unsigned Imm0 = C & Mask; + const unsigned Imm16 = (C >> 16) & Mask; + if (Imm0 != Mask && Imm16 != Mask && !AllowThreeSequence) + return false; + + if (Imm0 != Mask) { + Insn.push_back({AArch64::MOVNXi, Imm0 ^ Mask, 0}); + if (Imm16 != Mask) + Insn.push_back({AArch64::MOVKXi, Imm16, 16}); + } else { + Insn.push_back({AArch64::MOVNXi, Imm16 ^ Mask, 16}); + } + + Insn.push_back({Opc, 0, N}); + return true; + }; + + for (unsigned N = 17; N < 48; ++N) { + // Attempt EOR. + uint64_t C = 0xffffffff00000000ULL | (Imm ^ (Imm << N)); + if ((C ^ (C << N)) == Imm && tryExpansion(AArch64::EORXrs, C, N)) + return true; + + // Attempt EON. + C = 0xffffffff00000000ULL | (Imm ^ ~(~Imm << N)); + if ((C ^ ~(C << N)) == Imm && tryExpansion(AArch64::EONXrs, C, N)) + return true; + } + + return false; +} + static uint64_t GetRunOfOnesStartingAt(uint64_t V, uint64_t StartPosition) { uint64_t NumOnes = llvm::countr_one(V >> StartPosition); @@ -614,6 +679,10 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize, if (tryEorOfLogicalImmediates(UImm, Insn)) return; + // Attempt to use a sequence of MOVN+EOR/EON (shifted register). + if (tryCopyWithNegation(Imm, /*AllowThreeSequence=*/false, Insn)) + return; + // FIXME: Add more two-instruction sequences. // Three instruction sequences. @@ -641,6 +710,10 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize, if (BitSize == 64 && trySequenceOfOnes(UImm, Insn)) return; + // Attempt to use a sequence of MOVN+MOVK+EOR/EON (shifted register). + if (tryCopyWithNegation(Imm, /*AllowThreeSequence=*/true, Insn)) + return; + // We found no possible two or three instruction sequence; use the general // four-instruction sequence. expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn); diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 0f4bbfc3d610e..536260afb9482 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -169,6 +169,8 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, .addImm(I->Op2)); } break; + case AArch64::EONXrs: + case AArch64::EORXrs: case AArch64::ORRWrs: case AArch64::ORRXrs: { Register DstReg = MI.getOperand(0).getReg(); diff --git a/llvm/test/CodeGen/AArch64/arm64-movi.ll b/llvm/test/CodeGen/AArch64/arm64-movi.ll index c9074c2adbe3c..c4d33faa4eda4 100644 --- a/llvm/test/CodeGen/AArch64/arm64-movi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-movi.ll @@ -549,3 +549,165 @@ define i64 @orr_32_eor_64() nounwind { ; CHECK-NEXT: ret ret i64 18446604367017541391 } + +;==--------------------------------------------------------------------------== +; Tests for EOR / EON with MOVN. +;==--------------------------------------------------------------------------== + +define i64 @movn_0_eon_lsl_17() { +; CHECK-LABEL: movn_0_eon_lsl_17: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111 +; CHECK-NEXT: eon x0, x0, x0, lsl #17 +; CHECK-NEXT: ret + ret i64 u0xfffffffe2222eeee +} + +define i64 @movn_0_eon_lsl_32() { +; CHECK-LABEL: movn_0_eon_lsl_32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-43691 // =0xffffffffffff5555 +; CHECK-NEXT: eon x0, x0, x0, lsl #32 +; CHECK-NEXT: ret + ret i64 u0xffff55550000aaaa +} + +define i64 @movn_0_eon_lsl_47() { +; CHECK-LABEL: movn_0_eon_lsl_47: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111 +; CHECK-NEXT: eon x0, x0, x0, lsl #47 +; CHECK-NEXT: ret + ret i64 u0x888880000000eeee +} + +define i64 @movn_1_eon_lsl_17() { +; CHECK-LABEL: movn_1_eon_lsl_17: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-4008574977 // =0xffffffff1111ffff +; CHECK-NEXT: eon x0, x0, x0, lsl #17 +; CHECK-NEXT: ret + ret i64 u0xfffe222311100000 +} + +define i64 @movn_1_eon_lsl_32() { +; CHECK-LABEL: movn_1_eon_lsl_32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-2863267841 // =0xffffffff5555ffff +; CHECK-NEXT: eon x0, x0, x0, lsl #32 +; CHECK-NEXT: ret + ret i64 u0x5555ffffaaaa0000 +} + +define i64 @movn_1_eon_lsl_46() { +; CHECK-LABEL: movn_1_eon_lsl_46: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-4008509441 // =0xffffffff1112ffff +; CHECK-NEXT: eon x0, x0, x0, lsl #46 +; CHECK-NEXT: ret + ret i64 u0xbfffc000eeed0000 +} + +define i64 @movn_0_eor_lsl_17() { +; CHECK-LABEL: movn_0_eor_lsl_17: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111 +; CHECK-NEXT: eor x0, x0, x0, lsl #17 +; CHECK-NEXT: ret + ret i64 u0x00000001dddd1111 +} + +define i64 @movn_0_eor_lsl_32() { +; CHECK-LABEL: movn_0_eor_lsl_32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-43691 // =0xffffffffffff5555 +; CHECK-NEXT: eor x0, x0, x0, lsl #32 +; CHECK-NEXT: ret + ret i64 u0x0000aaaaffff5555 +} + +define i64 @movn_0_eor_lsl_47() { +; CHECK-LABEL: movn_0_eor_lsl_47: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111 +; CHECK-NEXT: eor x0, x0, x0, lsl #47 +; CHECK-NEXT: ret + ret i64 u0x77777fffffff1111 +} + +define i64 @movn_1_eor_lsl_17() { +; CHECK-LABEL: movn_1_eor_lsl_17: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-4008574977 // =0xffffffff1111ffff +; CHECK-NEXT: eor x0, x0, x0, lsl #17 +; CHECK-NEXT: ret + ret i64 u0x0001dddceeefffff +} + +define i64 @movn_1_eor_lsl_32() { +; CHECK-LABEL: movn_1_eor_lsl_32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-2863267841 // =0xffffffff5555ffff +; CHECK-NEXT: eor x0, x0, x0, lsl #32 +; CHECK-NEXT: ret + ret i64 u0xaaaa00005555ffff +} + +define i64 @movn_1_eor_lsl_46() { +; CHECK-LABEL: movn_1_eor_lsl_46: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-4008509441 // =0xffffffff1112ffff +; CHECK-NEXT: eor x0, x0, x0, lsl #46 +; CHECK-NEXT: ret + ret i64 u0x40003fff1112ffff +} + +define i64 @movn_movk_eon_lsl_17() { +; CHECK-LABEL: movn_movk_eon_lsl_17: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-43400 // =0xffffffffffff5678 +; CHECK-NEXT: movk x0, #4660, lsl #16 +; CHECK-NEXT: eon x0, x0, x0, lsl #17 +; CHECK-NEXT: ret + ret i64 u0xfffe2468413ba987 +} + +define i64 @movn_movk_eon_lsl_47() { +; CHECK-LABEL: movn_movk_eon_lsl_47: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111 +; CHECK-NEXT: movk x0, #43981, lsl #16 +; CHECK-NEXT: eon x0, x0, x0, lsl #47 +; CHECK-NEXT: ret + ret i64 u0x888880005432eeee +} + +define i64 @movn_movk_eor_lsl_17() { +; CHECK-LABEL: movn_movk_eor_lsl_17: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-43400 // =0xffffffffffff5678 +; CHECK-NEXT: movk x0, #4660, lsl #16 +; CHECK-NEXT: eor x0, x0, x0, lsl #17 +; CHECK-NEXT: ret + ret i64 u0x0001db97bec45678 +} + +define i64 @movn_movk_eor_lsl_32() { +; CHECK-LABEL: movn_movk_eor_lsl_32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-21846 // =0xffffffffffffaaaa +; CHECK-NEXT: movk x0, #52428, lsl #16 +; CHECK-NEXT: eor x0, x0, x0, lsl #32 +; CHECK-NEXT: ret + ret i64 u0x33335555ccccaaaa +} + +define i64 @movn_movk_eor_lsl_47() { +; CHECK-LABEL: movn_movk_eor_lsl_47: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111 +; CHECK-NEXT: movk x0, #43981, lsl #16 +; CHECK-NEXT: eor x0, x0, x0, lsl #47 +; CHECK-NEXT: ret + ret i64 u0x77777fffabcd1111 +} diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll index 884d668157e5f..bd4cc62255439 100644 --- a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll @@ -61,14 +61,13 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind { ; CHECK-NEXT: sbfx x8, x0, #0, #33 ; CHECK-NEXT: sbfx x10, x1, #0, #33 ; CHECK-NEXT: movk x9, #29127, lsl #16 -; CHECK-NEXT: mov x13, #7281 // =0x1c71 +; CHECK-NEXT: mov x13, #-7282 // =0xffffffffffffe38e ; CHECK-NEXT: sbfx x12, x2, #0, #33 ; CHECK-NEXT: movk x9, #50972, lsl #32 -; CHECK-NEXT: movk x13, #29127, lsl #16 +; CHECK-NEXT: movk x13, #36408, lsl #16 ; CHECK-NEXT: movk x9, #7281, lsl #48 -; CHECK-NEXT: movk x13, #50972, lsl #32 +; CHECK-NEXT: eon x13, x13, x13, lsl #33 ; CHECK-NEXT: smulh x11, x8, x9 -; CHECK-NEXT: movk x13, #7281, lsl #48 ; CHECK-NEXT: smulh x9, x10, x9 ; CHECK-NEXT: smulh x13, x12, x13 ; CHECK-NEXT: add x11, x11, x11, lsr #63