From 2f5604b7b3afc3a5693e297168a2d93c9cc529d9 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 9 Sep 2024 07:40:28 +0100 Subject: [PATCH] [ARM] Fix VBICimm and VORRimm generation under Big endian. This is a smaller follow on to #105519 that fixes VBICimm and VORRimm too. The logic behind lowering vector immediates under big endian Neon/MVE is to treat them in natural lane ordering (same as little endian), and VECTOR_REG_CAST them to the correct type (as opposed to creating the constants in big endian form and bitcasting them). This makes sure that is done when creating VORRIMM and VBICIMM. --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 8 ++++---- llvm/test/CodeGen/ARM/big-endian-vmov.ll | 10 ++++------ llvm/test/CodeGen/Thumb2/mve-vmovimm.ll | 10 ++++------ 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 8c82161bd15c6..67653a17512d2 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14443,9 +14443,9 @@ static SDValue PerformANDCombine(SDNode *N, DAG, dl, VbicVT, VT, OtherModImm); if (Val.getNode()) { SDValue Input = - DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); + DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VbicVT, N->getOperand(0)); SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); - return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); + return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vbic); } } } @@ -14739,9 +14739,9 @@ static SDValue PerformORCombine(SDNode *N, SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm); if (Val.getNode()) { SDValue Input = - DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); + DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VorrVT, N->getOperand(0)); SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); - return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); + return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vorr); } } } diff --git a/llvm/test/CodeGen/ARM/big-endian-vmov.ll b/llvm/test/CodeGen/ARM/big-endian-vmov.ll index 327d4334ad83a..e3647cf6234c8 100644 --- a/llvm/test/CodeGen/ARM/big-endian-vmov.ll +++ b/llvm/test/CodeGen/ARM/big-endian-vmov.ll @@ -219,7 +219,6 @@ define arm_aapcs_vfpcc <8 x i16> @vmvn_v16i8_m1() { ret <8 x i16> } -; FIXME: This is incorrect for BE define arm_aapcs_vfpcc <8 x i16> @and_v8i16_m1(<8 x i16> %a) { ; CHECK-LE-LABEL: and_v8i16_m1: ; CHECK-LE: @ %bb.0: @@ -228,15 +227,14 @@ define arm_aapcs_vfpcc <8 x i16> @and_v8i16_m1(<8 x i16> %a) { ; ; CHECK-BE-LABEL: and_v8i16_m1: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vrev64.32 q8, q0 +; CHECK-BE-NEXT: vrev64.16 q8, q0 ; CHECK-BE-NEXT: vbic.i32 q8, #0x10000 -; CHECK-BE-NEXT: vrev64.32 q0, q8 +; CHECK-BE-NEXT: vrev64.16 q0, q8 ; CHECK-BE-NEXT: bx lr %b = and <8 x i16> %a, ret <8 x i16> %b } -; FIXME: This is incorrect for BE define arm_aapcs_vfpcc <8 x i16> @or_v8i16_1(<8 x i16> %a) { ; CHECK-LE-LABEL: or_v8i16_1: ; CHECK-LE: @ %bb.0: @@ -245,9 +243,9 @@ define arm_aapcs_vfpcc <8 x i16> @or_v8i16_1(<8 x i16> %a) { ; ; CHECK-BE-LABEL: or_v8i16_1: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vrev64.32 q8, q0 +; CHECK-BE-NEXT: vrev64.16 q8, q0 ; CHECK-BE-NEXT: vorr.i32 q8, #0x10000 -; CHECK-BE-NEXT: vrev64.32 q0, q8 +; CHECK-BE-NEXT: vrev64.16 q0, q8 ; CHECK-BE-NEXT: bx lr %b = or <8 x i16> %a, ret <8 x i16> %b diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll index 9cf92663e3b05..1970ff35f183f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll @@ -1331,7 +1331,6 @@ entry: ret <2 x i64> %s } -; FIXME: This is incorrect for BE define arm_aapcs_vfpcc <8 x i16> @and_v8i16_m1(<8 x i16> %a) { ; CHECKLE-LABEL: and_v8i16_m1: ; CHECKLE: @ %bb.0: @@ -1340,15 +1339,14 @@ define arm_aapcs_vfpcc <8 x i16> @and_v8i16_m1(<8 x i16> %a) { ; ; CHECKBE-LABEL: and_v8i16_m1: ; CHECKBE: @ %bb.0: -; CHECKBE-NEXT: vrev64.32 q1, q0 +; CHECKBE-NEXT: vrev64.16 q1, q0 ; CHECKBE-NEXT: vbic.i32 q1, #0x10000 -; CHECKBE-NEXT: vrev64.32 q0, q1 +; CHECKBE-NEXT: vrev64.16 q0, q1 ; CHECKBE-NEXT: bx lr %b = and <8 x i16> %a, ret <8 x i16> %b } -; FIXME: This is incorrect for BE define arm_aapcs_vfpcc <8 x i16> @or_v8i16_1(<8 x i16> %a) { ; CHECKLE-LABEL: or_v8i16_1: ; CHECKLE: @ %bb.0: @@ -1357,9 +1355,9 @@ define arm_aapcs_vfpcc <8 x i16> @or_v8i16_1(<8 x i16> %a) { ; ; CHECKBE-LABEL: or_v8i16_1: ; CHECKBE: @ %bb.0: -; CHECKBE-NEXT: vrev64.32 q1, q0 +; CHECKBE-NEXT: vrev64.16 q1, q0 ; CHECKBE-NEXT: vorr.i32 q1, #0x10000 -; CHECKBE-NEXT: vrev64.32 q0, q1 +; CHECKBE-NEXT: vrev64.16 q0, q1 ; CHECKBE-NEXT: bx lr %b = or <8 x i16> %a, ret <8 x i16> %b