diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td index 84026aa9d3624..1c46965d995fe 100644 --- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td +++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td @@ -1192,4 +1192,42 @@ def int_loongarch_lasx_xvstelm_w def int_loongarch_lasx_xvstelm_d : VecInt<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; + +// LASX and LSX conversion +def int_loongarch_lasx_cast_128_s + : VecInt<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_cast_128_d + : VecInt<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_cast_128 + : VecInt<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_concat_128_s + : VecInt<[llvm_v8f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_concat_128_d + : VecInt<[llvm_v4f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_concat_128 + : VecInt<[llvm_v4i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_lo_s + : VecInt<[llvm_v4f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_lo_d + : VecInt<[llvm_v2f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_lo + : VecInt<[llvm_v2i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_hi_s + : VecInt<[llvm_v4f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_hi_d + : VecInt<[llvm_v2f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_hi + : VecInt<[llvm_v2i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_lo_s + : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_lo_d + : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_lo + : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_hi_s + : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_hi_d + : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_hi + : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>; } // TargetPrefix = "loongarch" diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 634914d3b3fd0..1ae5fc8bcfdbb 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -6168,6 +6168,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, N->getOperand(1), DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), N->getOperand(2))); + case Intrinsic::loongarch_lasx_concat_128_s: + case Intrinsic::loongarch_lasx_concat_128_d: + case Intrinsic::loongarch_lasx_concat_128: + return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), + N->getOperand(1), N->getOperand(2)); } return SDValue(); } diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index a79c01cbe577a..5fed7024a8233 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -2028,6 +2028,37 @@ defm : subvector_subreg_lowering; defm : subvector_subreg_lowering; defm : subvector_subreg_lowering; +// LASX and LSX conversion +def : Pat<(int_loongarch_lasx_cast_128_s (v4f32 LSX128:$src)), + (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_cast_128_d (v2f64 LSX128:$src)), + (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_cast_128 (v2i64 LSX128:$src)), + (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_lo_s (v8f32 LASX256:$src)), + (EXTRACT_SUBREG LASX256:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_lo_d (v4f64 LASX256:$src)), + (EXTRACT_SUBREG LASX256:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_lo (v4i64 LASX256:$src)), + (EXTRACT_SUBREG LASX256:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_hi_s (v8f32 LASX256:$src)), + (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_hi_d (v4f64 LASX256:$src)), + (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_hi (v4i64 LASX256:$src)), + (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>; +def : Pat<(int_loongarch_lasx_insert_128_lo_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>; +def : Pat<(int_loongarch_lasx_insert_128_lo_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>; +def : Pat<(int_loongarch_lasx_insert_128_lo (v4i64 LASX256:$src), (v2i64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>; +def : Pat<(int_loongarch_lasx_insert_128_hi_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>; +def : Pat<(int_loongarch_lasx_insert_128_hi_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>; +def : Pat<(int_loongarch_lasx_insert_128_hi (v4i64 LASX256:$src), (v2i64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>; } // Predicates = [HasExtLASX] /// Intrinsic pattern diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll new file mode 100644 index 0000000000000..7b418cf342714 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll @@ -0,0 +1,234 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float>) + +define <8 x float> @lasx_cast_128_s(<4 x float> %va) { +; CHECK-LABEL: lasx_cast_128_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double>) + +define <4 x double> @lasx_cast_128_d(<2 x double> %va) { +; CHECK-LABEL: lasx_cast_128_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> %va) + ret <4 x double> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64>) + +define <4 x i64> @lasx_cast_128(<2 x i64> %va) { +; CHECK-LABEL: lasx_cast_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64> %va) + ret <4 x i64> %res +} + +declare <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float>, <4 x float>) + +define <8 x float> @lasx_concat_128_s(<4 x float> %va, <4 x float> %vb) { +; CHECK-LABEL: lasx_concat_128_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> %va, <4 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double>, <2 x double>) + +define <4 x double> @lasx_concat_128_d(<2 x double> %va, <2 x double> %vb) { +; CHECK-LABEL: lasx_concat_128_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> %va, <2 x double> %vb) + ret <4 x double> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64>, <2 x i64>) + +define <4 x i64> @lasx_concat_128(<2 x i64> %va, <2 x i64> %vb) { +; CHECK-LABEL: lasx_concat_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i64> %res +} + +declare <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float>) + +define <4 x float> @lasx_extract_128_lo_s(<8 x float> %va) { +; CHECK-LABEL: lasx_extract_128_lo_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double>) + +define <2 x double> @lasx_extract_128_lo_d(<4 x double> %va) { +; CHECK-LABEL: lasx_extract_128_lo_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> %va) + ret <2 x double> %res +} + +declare <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64>) + +define <2 x i64> @lasx_extract_128_lo(<4 x i64> %va) { +; CHECK-LABEL: lasx_extract_128_lo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64> %va) + ret <2 x i64> %res +} + +declare <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float>) + +define <4 x float> @lasx_extract_128_hi_s(<8 x float> %va) { +; CHECK-LABEL: lasx_extract_128_hi_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double>) + +define <2 x double> @lasx_extract_128_hi_d(<4 x double> %va) { +; CHECK-LABEL: lasx_extract_128_hi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> %va) + ret <2 x double> %res +} + +declare <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64>) + +define <2 x i64> @lasx_extract_128_hi(<4 x i64> %va) { +; CHECK-LABEL: lasx_extract_128_hi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64> %va) + ret <2 x i64> %res +} + +declare <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float>, <4 x float>) + +define <8 x float> @lasx_insert_128_lo_s(<8 x float> %va, <4 x float> %vb) { +; CHECK-LABEL: lasx_insert_128_lo_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> %va, <4 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double>, <2 x double>) + +define <4 x double> @lasx_insert_128_lo_d(<4 x double> %va, <2 x double> %vb) { +; CHECK-LABEL: lasx_insert_128_lo_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> %va, <2 x double> %vb) + ret <4 x double> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64>, <2 x i64>) + +define <4 x i64> @lasx_insert_128_lo(<4 x i64> %va, <2 x i64> %vb) { +; CHECK-LABEL: lasx_insert_128_lo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64> %va, <2 x i64> %vb) + ret <4 x i64> %res +} + +declare <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float>, <4 x float>) + +define <8 x float> @lasx_insert_128_hi_s(<8 x float> %va, <4 x float> %vb) { +; CHECK-LABEL: lasx_insert_128_hi_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> %va, <4 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double>, <2 x double>) + +define <4 x double> @lasx_insert_128_hi_d(<4 x double> %va, <2 x double> %vb) { +; CHECK-LABEL: lasx_insert_128_hi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> %va, <2 x double> %vb) + ret <4 x double> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64>, <2 x i64>) + +define <4 x i64> @lasx_insert_128_hi(<4 x i64> %va, <2 x i64> %vb) { +; CHECK-LABEL: lasx_insert_128_hi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64> %va, <2 x i64> %vb) + ret <4 x i64> %res +}