diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 16bb7eb222723..9ceb91ea8017a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1131,13 +1131,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::VSCALE, MVT::i32, Custom); setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom); - - setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom); - setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom); - setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); } if (Subtarget->hasSVE()) { @@ -4483,40 +4476,6 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, return SDValue(); } -// Custom lowering for extending v4i8 vector loads. -SDValue AArch64TargetLowering::LowerLOAD(SDValue Op, - SelectionDAG &DAG) const { - SDLoc DL(Op); - LoadSDNode *LoadNode = cast(Op); - assert(LoadNode && "Expected custom lowering of a load node"); - EVT VT = Op->getValueType(0); - assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32"); - - if (LoadNode->getMemoryVT() != MVT::v4i8) - return SDValue(); - - unsigned ExtType; - if (LoadNode->getExtensionType() == ISD::SEXTLOAD) - ExtType = ISD::SIGN_EXTEND; - else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD || - LoadNode->getExtensionType() == ISD::EXTLOAD) - ExtType = ISD::ZERO_EXTEND; - else - return SDValue(); - - SDValue Load = DAG.getLoad(MVT::f32, DL, DAG.getEntryNode(), - LoadNode->getBasePtr(), MachinePointerInfo()); - SDValue Chain = Load.getValue(1); - SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load); - SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec); - SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC); - Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext, - DAG.getConstant(0, DL, MVT::i64)); - if (VT == MVT::v4i32) - Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext); - return DAG.getMergeValues({Ext, Chain}, DL); -} - // Generate SUBS and CSEL for integer abs. SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); @@ -4760,7 +4719,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::LOAD: if (useSVEForFixedLengthVectorVT(Op.getValueType())) return LowerFixedLengthVectorLoadToSVE(Op, DAG); - return LowerLOAD(Op, DAG); + llvm_unreachable("Unexpected request to lower ISD::LOAD"); case ISD::ADD: return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED); case ISD::AND: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 7daa61996739f..f3b2da8304303 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -851,7 +851,6 @@ class AArch64TargetLowering : public TargetLowering { SmallVectorImpl &InVals, bool isThisReturn, SDValue ThisVal) const; - SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll index d303ab8b80f3c..308352e3e2277 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll @@ -86,195 +86,27 @@ define <2 x i8> @test3(<2 x i8>* %v2i8_ptr) { define <4 x i8> @test4(<4 x i8>* %v4i8_ptr) { ; CHECK-LE-LABEL: test4: ; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldr s0, [x0] -; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: ld1 { v0.b }[0], [x0] +; CHECK-LE-NEXT: add x8, x0, #1 // =1 +; CHECK-LE-NEXT: ld1 { v0.b }[2], [x8] +; CHECK-LE-NEXT: add x8, x0, #2 // =2 +; CHECK-LE-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-LE-NEXT: add x8, x0, #3 // =3 +; CHECK-LE-NEXT: ld1 { v0.b }[6], [x8] ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: test4: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldr s0, [x0] -; CHECK-BE-NEXT: rev32 v0.8b, v0.8b -; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: ld1 { v0.b }[0], [x0] +; CHECK-BE-NEXT: add x8, x0, #1 // =1 +; CHECK-BE-NEXT: ld1 { v0.b }[2], [x8] +; CHECK-BE-NEXT: add x8, x0, #2 // =2 +; CHECK-BE-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-BE-NEXT: add x8, x0, #3 // =3 +; CHECK-BE-NEXT: ld1 { v0.b }[6], [x8] ; CHECK-BE-NEXT: rev64 v0.4h, v0.4h ; CHECK-BE-NEXT: ret %v4i8 = load <4 x i8>, <4 x i8>* %v4i8_ptr ret <4 x i8> %v4i8 } - -define <4 x i32> @fsext_v4i32(<4 x i8>* %a) { -; CHECK-LE-LABEL: fsext_v4i32: -; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldr s0, [x0] -; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-LE-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-LE-NEXT: ret -; -; CHECK-BE-LABEL: fsext_v4i32: -; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldr s0, [x0] -; CHECK-BE-NEXT: rev32 v0.8b, v0.8b -; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-BE-NEXT: rev64 v0.4s, v0.4s -; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-BE-NEXT: ret - %x = load <4 x i8>, <4 x i8>* %a - %y = sext <4 x i8> %x to <4 x i32> - ret <4 x i32> %y -} - -define <4 x i32> @fzext_v4i32(<4 x i8>* %a) { -; CHECK-LE-LABEL: fzext_v4i32: -; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldr s0, [x0] -; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-LE-NEXT: ret -; -; CHECK-BE-LABEL: fzext_v4i32: -; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldr s0, [x0] -; CHECK-BE-NEXT: rev32 v0.8b, v0.8b -; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-BE-NEXT: rev64 v0.4s, v0.4s -; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-BE-NEXT: ret - %x = load <4 x i8>, <4 x i8>* %a - %y = zext <4 x i8> %x to <4 x i32> - ret <4 x i32> %y -} - -; TODO: This codegen could just be: -; ldrb w0, [x0] -; -define i32 @loadExti32(<4 x i8>* %ref) { -; CHECK-LE-LABEL: loadExti32: -; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldr s0, [x0] -; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-LE-NEXT: umov w8, v0.h[0] -; CHECK-LE-NEXT: and w0, w8, #0xff -; CHECK-LE-NEXT: ret -; -; CHECK-BE-LABEL: loadExti32: -; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldr s0, [x0] -; CHECK-BE-NEXT: rev32 v0.8b, v0.8b -; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: umov w8, v0.h[0] -; CHECK-BE-NEXT: and w0, w8, #0xff -; CHECK-BE-NEXT: ret - %a = load <4 x i8>, <4 x i8>* %ref - %vecext = extractelement <4 x i8> %a, i32 0 - %conv = zext i8 %vecext to i32 - ret i32 %conv -} - -define <4 x i16> @fsext_v4i16(<4 x i8>* %a) { -; CHECK-LE-LABEL: fsext_v4i16: -; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldr s0, [x0] -; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-LE-NEXT: ret -; -; CHECK-BE-LABEL: fsext_v4i16: -; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldr s0, [x0] -; CHECK-BE-NEXT: rev32 v0.8b, v0.8b -; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: rev64 v0.4h, v0.4h -; CHECK-BE-NEXT: ret - %x = load <4 x i8>, <4 x i8>* %a - %y = sext <4 x i8> %x to <4 x i16> - ret <4 x i16> %y -} - -define <4 x i16> @fzext_v4i16(<4 x i8>* %a) { -; CHECK-LE-LABEL: fzext_v4i16: -; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldr s0, [x0] -; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-LE-NEXT: ret -; -; CHECK-BE-LABEL: fzext_v4i16: -; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldr s0, [x0] -; CHECK-BE-NEXT: rev32 v0.8b, v0.8b -; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: rev64 v0.4h, v0.4h -; CHECK-BE-NEXT: ret - %x = load <4 x i8>, <4 x i8>* %a - %y = zext <4 x i8> %x to <4 x i16> - ret <4 x i16> %y -} - -define <4 x i16> @anyext_v4i16(<4 x i8> *%a, <4 x i8> *%b) { -; CHECK-LE-LABEL: anyext_v4i16: -; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldr s0, [x0] -; CHECK-LE-NEXT: ldr s1, [x1] -; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-LE-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-LE-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-LE-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-LE-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-LE-NEXT: ret -; -; CHECK-BE-LABEL: anyext_v4i16: -; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldr s0, [x0] -; CHECK-BE-NEXT: ldr s1, [x1] -; CHECK-BE-NEXT: rev32 v0.8b, v0.8b -; CHECK-BE-NEXT: rev32 v1.8b, v1.8b -; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-BE-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-BE-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-BE-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-BE-NEXT: rev64 v0.4h, v0.4h -; CHECK-BE-NEXT: ret - %x = load <4 x i8>, <4 x i8>* %a, align 4 - %y = load <4 x i8>, <4 x i8>* %b, align 4 - %z = add <4 x i8> %x, %y - %s = sext <4 x i8> %z to <4 x i16> - ret <4 x i16> %s -} - -define <4 x i32> @anyext_v4i32(<4 x i8> *%a, <4 x i8> *%b) { -; CHECK-LE-LABEL: anyext_v4i32: -; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldr s0, [x0] -; CHECK-LE-NEXT: ldr s1, [x1] -; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-LE-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-LE-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-LE-NEXT: shl v0.4s, v0.4s, #24 -; CHECK-LE-NEXT: sshr v0.4s, v0.4s, #24 -; CHECK-LE-NEXT: ret -; -; CHECK-BE-LABEL: anyext_v4i32: -; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldr s0, [x0] -; CHECK-BE-NEXT: ldr s1, [x1] -; CHECK-BE-NEXT: rev32 v0.8b, v0.8b -; CHECK-BE-NEXT: rev32 v1.8b, v1.8b -; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-BE-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-BE-NEXT: shl v0.4s, v0.4s, #24 -; CHECK-BE-NEXT: sshr v0.4s, v0.4s, #24 -; CHECK-BE-NEXT: rev64 v0.4s, v0.4s -; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-BE-NEXT: ret - %x = load <4 x i8>, <4 x i8>* %a, align 4 - %y = load <4 x i8>, <4 x i8>* %b, align 4 - %z = add <4 x i8> %x, %y - %s = sext <4 x i8> %z to <4 x i32> - ret <4 x i32> %s -} diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll index 07b257043426d..c63f3399e636f 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll @@ -1494,12 +1494,17 @@ define <8 x i16> @neon.ushl8h_no_constant_shift(<8 x i8>* %A) nounwind { } define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(<4 x i8>* %A) nounwind { -; CHECK-LABEL: neon.ushl8h_constant_shift_extend_not_2x: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: ushll.4s v0, v0, #1 -; CHECK-NEXT: ret +;CHECK-LABEL: @neon.ushl8h_constant_shift_extend_not_2x +;CHECK-NOT: ushll.8h v0, +;CHECK: ldrb w8, [x0] +;CHECK: fmov s0, w8 +;CHECK: ldrb w8, [x0, #1] +;CHECK: mov.s v0[1], w8 +;CHECK: ldrb w8, [x0, #2] +;CHECK: mov.s v0[2], w8 +;CHECK: ldrb w8, [x0, #3] +;CHECK: mov.s v0[3], w8 +;CHECK: shl.4s v0, v0, #1 %tmp1 = load <4 x i8>, <4 x i8>* %A %tmp2 = zext <4 x i8> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> ) @@ -1632,12 +1637,16 @@ define <8 x i16> @neon.sshll8h_constant_shift(<8 x i8>* %A) nounwind { } define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(<4 x i8>* %A) nounwind { -; CHECK-LABEL: neon.sshl4s_wrong_ext_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: sshll.8h v0, v0, #0 -; CHECK-NEXT: sshll.4s v0, v0, #1 -; CHECK-NEXT: ret +;CHECK-LABEL: neon.sshl4s_wrong_ext_constant_shift +;CHECK: ldrsb w8, [x0] +;CHECK-NEXT: fmov s0, w8 +;CHECK-NEXT: ldrsb w8, [x0, #1] +;CHECK-NEXT: mov.s v0[1], w8 +;CHECK-NEXT: ldrsb w8, [x0, #2] +;CHECK-NEXT: mov.s v0[2], w8 +;CHECK-NEXT: ldrsb w8, [x0, #3] +;CHECK-NEXT: mov.s v0[3], w8 +;CHECK-NEXT: shl.4s v0, v0, #1 %tmp1 = load <4 x i8>, <4 x i8>* %A %tmp2 = sext <4 x i8> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> ) diff --git a/llvm/test/CodeGen/AArch64/neon-extload.ll b/llvm/test/CodeGen/AArch64/neon-extload.ll new file mode 100644 index 0000000000000..321a1babb411d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/neon-extload.ll @@ -0,0 +1,145 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=LE +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=BE + +define <4 x i32> @fsext_v4i32(<4 x i8>* %a) { +; LE-LABEL: fsext_v4i32: +; LE: // %bb.0: +; LE-NEXT: ldrsb w8, [x0] +; LE-NEXT: ldrsb w9, [x0, #1] +; LE-NEXT: ldrsb w10, [x0, #2] +; LE-NEXT: ldrsb w11, [x0, #3] +; LE-NEXT: fmov s0, w8 +; LE-NEXT: mov v0.s[1], w9 +; LE-NEXT: mov v0.s[2], w10 +; LE-NEXT: mov v0.s[3], w11 +; LE-NEXT: ret +; +; BE-LABEL: fsext_v4i32: +; BE: // %bb.0: +; BE-NEXT: ldrsb w8, [x0] +; BE-NEXT: ldrsb w9, [x0, #1] +; BE-NEXT: ldrsb w10, [x0, #2] +; BE-NEXT: ldrsb w11, [x0, #3] +; BE-NEXT: fmov s0, w8 +; BE-NEXT: mov v0.s[1], w9 +; BE-NEXT: mov v0.s[2], w10 +; BE-NEXT: mov v0.s[3], w11 +; BE-NEXT: rev64 v0.4s, v0.4s +; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; BE-NEXT: ret + %x = load <4 x i8>, <4 x i8>* %a + %y = sext <4 x i8> %x to <4 x i32> + ret <4 x i32> %y +} + +define <4 x i32> @fzext_v4i32(<4 x i8>* %a) { +; LE-LABEL: fzext_v4i32: +; LE: // %bb.0: +; LE-NEXT: ldrb w8, [x0] +; LE-NEXT: ldrb w9, [x0, #1] +; LE-NEXT: ldrb w10, [x0, #2] +; LE-NEXT: ldrb w11, [x0, #3] +; LE-NEXT: fmov s0, w8 +; LE-NEXT: mov v0.s[1], w9 +; LE-NEXT: mov v0.s[2], w10 +; LE-NEXT: mov v0.s[3], w11 +; LE-NEXT: ret +; +; BE-LABEL: fzext_v4i32: +; BE: // %bb.0: +; BE-NEXT: ldrb w8, [x0] +; BE-NEXT: ldrb w9, [x0, #1] +; BE-NEXT: ldrb w10, [x0, #2] +; BE-NEXT: ldrb w11, [x0, #3] +; BE-NEXT: fmov s0, w8 +; BE-NEXT: mov v0.s[1], w9 +; BE-NEXT: mov v0.s[2], w10 +; BE-NEXT: mov v0.s[3], w11 +; BE-NEXT: rev64 v0.4s, v0.4s +; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; BE-NEXT: ret + %x = load <4 x i8>, <4 x i8>* %a + %y = zext <4 x i8> %x to <4 x i32> + ret <4 x i32> %y +} + +define i32 @loadExt.i32(<4 x i8>* %ref) { +; CHECK-LABEL: loadExt.i32: +; CHECK: ldrb +; LE-LABEL: loadExt.i32: +; LE: // %bb.0: +; LE-NEXT: ldrb w0, [x0] +; LE-NEXT: ret +; +; BE-LABEL: loadExt.i32: +; BE: // %bb.0: +; BE-NEXT: ldrb w0, [x0] +; BE-NEXT: ret + %a = load <4 x i8>, <4 x i8>* %ref + %vecext = extractelement <4 x i8> %a, i32 0 + %conv = zext i8 %vecext to i32 + ret i32 %conv +} + +define <4 x i16> @fsext_v4i16(<4 x i8>* %a) { +; LE-LABEL: fsext_v4i16: +; LE: // %bb.0: +; LE-NEXT: ldrsb w8, [x0] +; LE-NEXT: ldrsb w9, [x0, #1] +; LE-NEXT: ldrsb w10, [x0, #2] +; LE-NEXT: ldrsb w11, [x0, #3] +; LE-NEXT: fmov s0, w8 +; LE-NEXT: mov v0.h[1], w9 +; LE-NEXT: mov v0.h[2], w10 +; LE-NEXT: mov v0.h[3], w11 +; LE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; LE-NEXT: ret +; +; BE-LABEL: fsext_v4i16: +; BE: // %bb.0: +; BE-NEXT: ldrsb w8, [x0] +; BE-NEXT: ldrsb w9, [x0, #1] +; BE-NEXT: ldrsb w10, [x0, #2] +; BE-NEXT: ldrsb w11, [x0, #3] +; BE-NEXT: fmov s0, w8 +; BE-NEXT: mov v0.h[1], w9 +; BE-NEXT: mov v0.h[2], w10 +; BE-NEXT: mov v0.h[3], w11 +; BE-NEXT: rev64 v0.4h, v0.4h +; BE-NEXT: ret + %x = load <4 x i8>, <4 x i8>* %a + %y = sext <4 x i8> %x to <4 x i16> + ret <4 x i16> %y +} + +define <4 x i16> @fzext_v4i16(<4 x i8>* %a) { +; LE-LABEL: fzext_v4i16: +; LE: // %bb.0: +; LE-NEXT: ldrb w8, [x0] +; LE-NEXT: ldrb w9, [x0, #1] +; LE-NEXT: ldrb w10, [x0, #2] +; LE-NEXT: ldrb w11, [x0, #3] +; LE-NEXT: fmov s0, w8 +; LE-NEXT: mov v0.h[1], w9 +; LE-NEXT: mov v0.h[2], w10 +; LE-NEXT: mov v0.h[3], w11 +; LE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; LE-NEXT: ret +; +; BE-LABEL: fzext_v4i16: +; BE: // %bb.0: +; BE-NEXT: ldrb w8, [x0] +; BE-NEXT: ldrb w9, [x0, #1] +; BE-NEXT: ldrb w10, [x0, #2] +; BE-NEXT: ldrb w11, [x0, #3] +; BE-NEXT: fmov s0, w8 +; BE-NEXT: mov v0.h[1], w9 +; BE-NEXT: mov v0.h[2], w10 +; BE-NEXT: mov v0.h[3], w11 +; BE-NEXT: rev64 v0.4h, v0.4h +; BE-NEXT: ret + %x = load <4 x i8>, <4 x i8>* %a + %y = zext <4 x i8> %x to <4 x i16> + ret <4 x i16> %y +} diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index 9c654f6719b18..cefd4758b3747 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -112,10 +112,22 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x1] -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: ldrsb w8, [x0] +; CHECK-NEXT: ldrsb w9, [x1] +; CHECK-NEXT: ldrsb w10, [x0, #1] +; CHECK-NEXT: ldrsb w11, [x1, #1] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: ldrsb w8, [x0, #2] +; CHECK-NEXT: ldrsb w9, [x1, #2] +; CHECK-NEXT: mov v0.h[1], w10 +; CHECK-NEXT: mov v1.h[1], w11 +; CHECK-NEXT: ldrsb w10, [x0, #3] +; CHECK-NEXT: ldrsb w11, [x1, #3] +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: mov v1.h[2], w9 +; CHECK-NEXT: mov v0.h[3], w10 +; CHECK-NEXT: mov v1.h[3], w11 ; CHECK-NEXT: shl v1.4h, v1.4h, #8 ; CHECK-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index 7c2e2330608e8..17af8a11aeee5 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -113,10 +113,22 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x1] -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: ldrsb w8, [x0] +; CHECK-NEXT: ldrsb w9, [x1] +; CHECK-NEXT: ldrsb w10, [x0, #1] +; CHECK-NEXT: ldrsb w11, [x1, #1] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: ldrsb w8, [x0, #2] +; CHECK-NEXT: ldrsb w9, [x1, #2] +; CHECK-NEXT: mov v0.h[1], w10 +; CHECK-NEXT: mov v1.h[1], w11 +; CHECK-NEXT: ldrsb w10, [x0, #3] +; CHECK-NEXT: ldrsb w11, [x1, #3] +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: mov v1.h[2], w9 +; CHECK-NEXT: mov v0.h[3], w10 +; CHECK-NEXT: mov v1.h[3], w11 ; CHECK-NEXT: shl v1.4h, v1.4h, #8 ; CHECK-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index 2b52e4c934c9d..21427a6a92d7e 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -112,11 +112,23 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x1] +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x1] +; CHECK-NEXT: ldrb w10, [x0, #1] +; CHECK-NEXT: ldrb w11, [x1, #1] +; CHECK-NEXT: ldrb w12, [x0, #2] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ldrb w8, [x1, #2] +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: mov v0.h[1], w10 +; CHECK-NEXT: ldrb w9, [x0, #3] +; CHECK-NEXT: ldrb w10, [x1, #3] +; CHECK-NEXT: mov v1.h[1], w11 +; CHECK-NEXT: mov v0.h[2], w12 +; CHECK-NEXT: mov v1.h[2], w8 +; CHECK-NEXT: mov v0.h[3], w9 +; CHECK-NEXT: mov v1.h[3], w10 ; CHECK-NEXT: movi d2, #0xff00ff00ff00ff -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h ; CHECK-NEXT: xtn v0.8b, v0.8h diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index 63bbac3be3fb8..a0ab8040e8fc0 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -113,10 +113,22 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x1] -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x1] +; CHECK-NEXT: ldrb w10, [x0, #1] +; CHECK-NEXT: ldrb w11, [x1, #1] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: ldrb w8, [x0, #2] +; CHECK-NEXT: ldrb w9, [x1, #2] +; CHECK-NEXT: mov v0.h[1], w10 +; CHECK-NEXT: mov v1.h[1], w11 +; CHECK-NEXT: ldrb w10, [x0, #3] +; CHECK-NEXT: ldrb w11, [x1, #3] +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: mov v1.h[2], w9 +; CHECK-NEXT: mov v0.h[3], w10 +; CHECK-NEXT: mov v1.h[3], w11 ; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h ; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x2]