diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index e8668860c2b38..add6bec686c71 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2868,7 +2868,9 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the // desired element. SDValue IdxCp = - DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx); + Subtarget.is64Bit() + ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx) + : DAG.getBitcast(MVT::f32, Idx); SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp); SDValue MaskVec = DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec); diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 2e8e11155c5fa..d7aafe7c58c5f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1915,21 +1915,21 @@ def : Pat<(i64 (bitconvert (f64 (vector_extract v4f64:$xj, uimm2:$imm)))), // Vector extraction with constant index. foreach imm = 16...31 in { defvar Imm = !and(imm, 15); - def : Pat<(i64 (vector_extract v32i8:$xj, imm)), + def : Pat<(GRLenVT (vector_extract v32i8:$xj, imm)), (VPICKVE2GR_B (EXTRACT_SUBREG (XVPERMI_D v32i8:$xj, 14), sub_128), Imm)>; } foreach imm = 8...15 in { defvar Imm = !and(imm, 7); - def : Pat<(i64 (vector_extract v16i16:$xj, imm)), + def : Pat<(GRLenVT (vector_extract v16i16:$xj, imm)), (VPICKVE2GR_H (EXTRACT_SUBREG (XVPERMI_D v16i16:$xj, 14), sub_128), Imm)>; } -def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)), +def : Pat<(GRLenVT (vector_extract v32i8:$xj, uimm4:$imm)), (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>; -def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)), +def : Pat<(GRLenVT (vector_extract v16i16:$xj, uimm3:$imm)), (VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>; -def : Pat<(i64 (vector_extract v8i32:$xj, uimm3:$imm)), +def : Pat<(GRLenVT (vector_extract v8i32:$xj, uimm3:$imm)), (XVPICKVE2GR_W v8i32:$xj, uimm3:$imm)>; def : Pat<(i64 (vector_extract v4i64:$xj, uimm2:$imm)), (XVPICKVE2GR_D v4i64:$xj, uimm2:$imm)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 5421bba0424bf..ac8bbd9ad1752 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -2080,11 +2080,11 @@ def : Pat<(i64 (bitconvert (f64 (vector_extract v2f64:$vj, uimm1:$imm)))), (VPICKVE2GR_D v2f64:$vj, uimm1:$imm)>; // Vector extraction with constant index. -def : Pat<(i64 (vector_extract v16i8:$vj, uimm4:$imm)), +def : Pat<(GRLenVT (vector_extract v16i8:$vj, uimm4:$imm)), (VPICKVE2GR_B v16i8:$vj, uimm4:$imm)>; -def : Pat<(i64 (vector_extract v8i16:$vj, uimm3:$imm)), +def : Pat<(GRLenVT (vector_extract v8i16:$vj, uimm3:$imm)), (VPICKVE2GR_H v8i16:$vj, uimm3:$imm)>; -def : Pat<(i64 (vector_extract v4i32:$vj, uimm2:$imm)), +def : Pat<(GRLenVT (vector_extract v4i32:$vj, uimm2:$imm)), (VPICKVE2GR_W v4i32:$vj, uimm2:$imm)>; def : Pat<(i64 (vector_extract v2i64:$vj, uimm1:$imm)), (VPICKVE2GR_D v2i64:$vj, uimm1:$imm)>; @@ -2094,28 +2094,28 @@ def : Pat<(f64 (vector_extract v2f64:$vj, uimm1:$imm)), (f64 (EXTRACT_SUBREG (VREPLVEI_D v2f64:$vj, uimm1:$imm), sub_64))>; // Vector extraction with variable index. -def : Pat<(i64 (vector_extract v16i8:$vj, i64:$rk)), +def : Pat<(GRLenVT (vector_extract v16i8:$vj, GRLenVT:$rk)), (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj, - i64:$rk), + GRLenVT:$rk), sub_32)), - GPR), (i64 24))>; -def : Pat<(i64 (vector_extract v8i16:$vj, i64:$rk)), + GPR), (GRLenVT 24))>; +def : Pat<(GRLenVT (vector_extract v8i16:$vj, GRLenVT:$rk)), (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj, - i64:$rk), + GRLenVT:$rk), sub_32)), - GPR), (i64 16))>; -def : Pat<(i64 (vector_extract v4i32:$vj, i64:$rk)), - (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, i64:$rk), + GPR), (GRLenVT 16))>; +def : Pat<(GRLenVT (vector_extract v4i32:$vj, GRLenVT:$rk)), + (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, GRLenVT:$rk), sub_32)), GPR)>; def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)), (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk), sub_64)), GPR)>; -def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)), - (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, i64:$rk), sub_32))>; -def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), - (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>; +def : Pat<(f32 (vector_extract v4f32:$vj, GRLenVT:$rk)), + (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, GRLenVT:$rk), sub_32))>; +def : Pat<(f64 (vector_extract v2f64:$vj, GRLenVT:$rk)), + (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, GRLenVT:$rk), sub_64))>; // vselect def : Pat<(v16i8 (vselect LSX128:$vd, (v16i8 (SplatPat_uimm8 uimm8:$imm)), diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll index dddee35fb9e78..cf0496fb8fb89 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll @@ -1,12 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @extract_32xi8(ptr %src, ptr %dst) nounwind { -; CHECK-LABEL: extract_32xi8: -; CHECK: # %bb.0: -; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 1 -; CHECK-NEXT: ret +; LA32-LABEL: extract_32xi8: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a0, 0 +; LA32-NEXT: vpickve2gr.b $a0, $vr0, 1 +; LA32-NEXT: st.b $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_32xi8: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: xvstelm.b $xr0, $a1, 0, 1 +; LA64-NEXT: ret %v = load volatile <32 x i8>, ptr %src %e = extractelement <32 x i8> %v, i32 1 store i8 %e, ptr %dst @@ -14,11 +22,18 @@ define void @extract_32xi8(ptr %src, ptr %dst) nounwind { } define void @extract_16xi16(ptr %src, ptr %dst) nounwind { -; CHECK-LABEL: extract_16xi16: -; CHECK: # %bb.0: -; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 1 -; CHECK-NEXT: ret +; LA32-LABEL: extract_16xi16: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a0, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1 +; LA32-NEXT: st.h $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_16xi16: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: xvstelm.h $xr0, $a1, 0, 1 +; LA64-NEXT: ret %v = load volatile <16 x i16>, ptr %src %e = extractelement <16 x i16> %v, i32 1 store i16 %e, ptr %dst @@ -26,11 +41,18 @@ define void @extract_16xi16(ptr %src, ptr %dst) nounwind { } define void @extract_8xi32(ptr %src, ptr %dst) nounwind { -; CHECK-LABEL: extract_8xi32: -; CHECK: # %bb.0: -; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 1 -; CHECK-NEXT: ret +; LA32-LABEL: extract_8xi32: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a0, 0 +; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 1 +; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_8xi32: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: xvstelm.w $xr0, $a1, 0, 1 +; LA64-NEXT: ret %v = load volatile <8 x i32>, ptr %src %e = extractelement <8 x i32> %v, i32 1 store i32 %e, ptr %dst @@ -38,11 +60,20 @@ define void @extract_8xi32(ptr %src, ptr %dst) nounwind { } define void @extract_4xi64(ptr %src, ptr %dst) nounwind { -; CHECK-LABEL: extract_4xi64: -; CHECK: # %bb.0: -; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 1 -; CHECK-NEXT: ret +; LA32-LABEL: extract_4xi64: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a0, 0 +; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 2 +; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 3 +; LA32-NEXT: st.w $a2, $a1, 4 +; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_4xi64: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: xvstelm.d $xr0, $a1, 0, 1 +; LA64-NEXT: ret %v = load volatile <4 x i64>, ptr %src %e = extractelement <4 x i64> %v, i32 1 store i64 %e, ptr %dst @@ -74,14 +105,24 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind { } define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -; CHECK-LABEL: extract_32xi8_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -; CHECK-NEXT: movgr2fr.w $fa2, $a2 -; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2 -; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: extract_32xi8_idx: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a0, 0 +; LA32-NEXT: movgr2fr.w $fa1, $a2 +; LA32-NEXT: xvpermi.q $xr2, $xr0, 1 +; LA32-NEXT: xvshuf.b $xr0, $xr2, $xr0, $xr1 +; LA32-NEXT: vpickve2gr.b $a0, $vr0, 0 +; LA32-NEXT: st.b $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_32xi8_idx: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 1 +; LA64-NEXT: movgr2fr.w $fa2, $a2 +; LA64-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2 +; LA64-NEXT: xvstelm.b $xr0, $a1, 0, 0 +; LA64-NEXT: ret %v = load volatile <32 x i8>, ptr %src %e = extractelement <32 x i8> %v, i32 %idx store i8 %e, ptr %dst @@ -89,14 +130,24 @@ define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { } define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -; CHECK-LABEL: extract_16xi16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -; CHECK-NEXT: movgr2fr.w $fa2, $a2 -; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0 -; CHECK-NEXT: xvstelm.h $xr2, $a1, 0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: extract_16xi16_idx: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a0, 0 +; LA32-NEXT: movgr2fr.w $fa1, $a2 +; LA32-NEXT: xvpermi.q $xr2, $xr0, 1 +; LA32-NEXT: xvshuf.h $xr1, $xr2, $xr0 +; LA32-NEXT: vpickve2gr.h $a0, $vr1, 0 +; LA32-NEXT: st.h $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_16xi16_idx: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 1 +; LA64-NEXT: movgr2fr.w $fa2, $a2 +; LA64-NEXT: xvshuf.h $xr2, $xr1, $xr0 +; LA64-NEXT: xvstelm.h $xr2, $a1, 0, 0 +; LA64-NEXT: ret %v = load volatile <16 x i16>, ptr %src %e = extractelement <16 x i16> %v, i32 %idx store i16 %e, ptr %dst @@ -104,13 +155,22 @@ define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { } define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -; CHECK-LABEL: extract_8xi32_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2 -; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: extract_8xi32_idx: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a0, 0 +; LA32-NEXT: xvreplgr2vr.w $xr1, $a2 +; LA32-NEXT: xvperm.w $xr0, $xr0, $xr1 +; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0 +; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_8xi32_idx: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: xvreplgr2vr.w $xr1, $a2 +; LA64-NEXT: xvperm.w $xr0, $xr0, $xr1 +; LA64-NEXT: xvstelm.w $xr0, $a1, 0, 0 +; LA64-NEXT: ret %v = load volatile <8 x i32>, ptr %src %e = extractelement <8 x i32> %v, i32 %idx store i32 %e, ptr %dst @@ -118,14 +178,29 @@ define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { } define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -; CHECK-LABEL: extract_4xi64_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -; CHECK-NEXT: movgr2fr.w $fa2, $a2 -; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0 -; CHECK-NEXT: xvstelm.d $xr2, $a1, 0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: extract_4xi64_idx: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a0, 0 +; LA32-NEXT: add.w $a0, $a2, $a2 +; LA32-NEXT: addi.w $a2, $a0, 1 +; LA32-NEXT: xvreplgr2vr.w $xr1, $a2 +; LA32-NEXT: xvperm.w $xr1, $xr0, $xr1 +; LA32-NEXT: xvpickve2gr.w $a2, $xr1, 0 +; LA32-NEXT: xvreplgr2vr.w $xr1, $a0 +; LA32-NEXT: xvperm.w $xr0, $xr0, $xr1 +; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0 +; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: st.w $a2, $a1, 4 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_4xi64_idx: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 1 +; LA64-NEXT: movgr2fr.w $fa2, $a2 +; LA64-NEXT: xvshuf.d $xr2, $xr1, $xr0 +; LA64-NEXT: xvstelm.d $xr2, $a1, 0, 0 +; LA64-NEXT: ret %v = load volatile <4 x i64>, ptr %src %e = extractelement <4 x i64> %v, i32 %idx store i64 %e, ptr %dst @@ -147,14 +222,23 @@ define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { } define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -; CHECK-LABEL: extract_4xdouble_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -; CHECK-NEXT: movgr2fr.w $fa2, $a2 -; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0 -; CHECK-NEXT: xvstelm.d $xr2, $a1, 0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: extract_4xdouble_idx: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a0, 0 +; LA32-NEXT: movgr2fr.w $fa1, $a2 +; LA32-NEXT: xvpermi.q $xr2, $xr0, 1 +; LA32-NEXT: xvshuf.d $xr1, $xr2, $xr0 +; LA32-NEXT: xvstelm.d $xr1, $a1, 0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_4xdouble_idx: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 1 +; LA64-NEXT: movgr2fr.w $fa2, $a2 +; LA64-NEXT: xvshuf.d $xr2, $xr1, $xr0 +; LA64-NEXT: xvstelm.d $xr2, $a1, 0, 0 +; LA64-NEXT: ret %v = load volatile <4 x double>, ptr %src %e = extractelement <4 x double> %v, i32 %idx store double %e, ptr %dst @@ -162,13 +246,21 @@ define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { } define void @eliminate_frame_index(<8 x i32> %a) nounwind { -; CHECK-LABEL: eliminate_frame_index: -; CHECK: # %bb.0: -; CHECK-NEXT: addi.d $sp, $sp, -1040 -; CHECK-NEXT: addi.d $a0, $sp, 524 -; CHECK-NEXT: xvstelm.w $xr0, $a0, 0, 1 -; CHECK-NEXT: addi.d $sp, $sp, 1040 -; CHECK-NEXT: ret +; LA32-LABEL: eliminate_frame_index: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -1040 +; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 1 +; LA32-NEXT: st.w $a0, $sp, 524 +; LA32-NEXT: addi.w $sp, $sp, 1040 +; LA32-NEXT: ret +; +; LA64-LABEL: eliminate_frame_index: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -1040 +; LA64-NEXT: addi.d $a0, $sp, 524 +; LA64-NEXT: xvstelm.w $xr0, $a0, 0, 1 +; LA64-NEXT: addi.d $sp, $sp, 1040 +; LA64-NEXT: ret %1 = alloca [32 x [8 x i32]] %2 = getelementptr i8, ptr %1, i64 508 %b = extractelement <8 x i32> %a, i64 1 diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll index c9c95f19c26f8..3fb55d4806160 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll @@ -1,12 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @extract_16xi8(ptr %src, ptr %dst) nounwind { -; CHECK-LABEL: extract_16xi8: -; CHECK: # %bb.0: -; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 1 -; CHECK-NEXT: ret +; LA32-LABEL: extract_16xi8: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: vpickve2gr.b $a0, $vr0, 1 +; LA32-NEXT: st.b $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_16xi8: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: vstelm.b $vr0, $a1, 0, 1 +; LA64-NEXT: ret %v = load volatile <16 x i8>, ptr %src %e = extractelement <16 x i8> %v, i32 1 store i8 %e, ptr %dst @@ -14,11 +22,18 @@ define void @extract_16xi8(ptr %src, ptr %dst) nounwind { } define void @extract_8xi16(ptr %src, ptr %dst) nounwind { -; CHECK-LABEL: extract_8xi16: -; CHECK: # %bb.0: -; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 1 -; CHECK-NEXT: ret +; LA32-LABEL: extract_8xi16: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1 +; LA32-NEXT: st.h $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_8xi16: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: vstelm.h $vr0, $a1, 0, 1 +; LA64-NEXT: ret %v = load volatile <8 x i16>, ptr %src %e = extractelement <8 x i16> %v, i32 1 store i16 %e, ptr %dst @@ -26,11 +41,18 @@ define void @extract_8xi16(ptr %src, ptr %dst) nounwind { } define void @extract_4xi32(ptr %src, ptr %dst) nounwind { -; CHECK-LABEL: extract_4xi32: -; CHECK: # %bb.0: -; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 1 -; CHECK-NEXT: ret +; LA32-LABEL: extract_4xi32: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1 +; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_4xi32: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: vstelm.w $vr0, $a1, 0, 1 +; LA64-NEXT: ret %v = load volatile <4 x i32>, ptr %src %e = extractelement <4 x i32> %v, i32 1 store i32 %e, ptr %dst @@ -38,11 +60,20 @@ define void @extract_4xi32(ptr %src, ptr %dst) nounwind { } define void @extract_2xi64(ptr %src, ptr %dst) nounwind { -; CHECK-LABEL: extract_2xi64: -; CHECK: # %bb.0: -; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 1 -; CHECK-NEXT: ret +; LA32-LABEL: extract_2xi64: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2 +; LA32-NEXT: vpickve2gr.w $a2, $vr0, 3 +; LA32-NEXT: st.w $a2, $a1, 4 +; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_2xi64: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: vstelm.d $vr0, $a1, 0, 1 +; LA64-NEXT: ret %v = load volatile <2 x i64>, ptr %src %e = extractelement <2 x i64> %v, i32 1 store i64 %e, ptr %dst @@ -74,15 +105,24 @@ define void @extract_2xdouble(ptr %src, ptr %dst) nounwind { } define void @extract_16xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -; CHECK-LABEL: extract_16xi8_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 -; CHECK-NEXT: vreplve.b $vr0, $vr0, $a0 -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: srai.w $a0, $a0, 24 -; CHECK-NEXT: st.b $a0, $a1, 0 -; CHECK-NEXT: ret +; LA32-LABEL: extract_16xi8_idx: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: vreplve.b $vr0, $vr0, $a2 +; LA32-NEXT: movfr2gr.s $a0, $fa0 +; LA32-NEXT: srai.w $a0, $a0, 24 +; LA32-NEXT: st.b $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_16xi8_idx: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0 +; LA64-NEXT: vreplve.b $vr0, $vr0, $a0 +; LA64-NEXT: movfr2gr.s $a0, $fa0 +; LA64-NEXT: srai.w $a0, $a0, 24 +; LA64-NEXT: st.b $a0, $a1, 0 +; LA64-NEXT: ret %v = load volatile <16 x i8>, ptr %src %e = extractelement <16 x i8> %v, i32 %idx store i8 %e, ptr %dst @@ -90,15 +130,24 @@ define void @extract_16xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { } define void @extract_8xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -; CHECK-LABEL: extract_8xi16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 -; CHECK-NEXT: vreplve.h $vr0, $vr0, $a0 -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: srai.w $a0, $a0, 16 -; CHECK-NEXT: st.h $a0, $a1, 0 -; CHECK-NEXT: ret +; LA32-LABEL: extract_8xi16_idx: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: vreplve.h $vr0, $vr0, $a2 +; LA32-NEXT: movfr2gr.s $a0, $fa0 +; LA32-NEXT: srai.w $a0, $a0, 16 +; LA32-NEXT: st.h $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_8xi16_idx: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0 +; LA64-NEXT: vreplve.h $vr0, $vr0, $a0 +; LA64-NEXT: movfr2gr.s $a0, $fa0 +; LA64-NEXT: srai.w $a0, $a0, 16 +; LA64-NEXT: st.h $a0, $a1, 0 +; LA64-NEXT: ret %v = load volatile <8 x i16>, ptr %src %e = extractelement <8 x i16> %v, i32 %idx store i16 %e, ptr %dst @@ -106,14 +155,22 @@ define void @extract_8xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { } define void @extract_4xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -; CHECK-LABEL: extract_4xi32_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 -; CHECK-NEXT: vreplve.w $vr0, $vr0, $a0 -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: st.w $a0, $a1, 0 -; CHECK-NEXT: ret +; LA32-LABEL: extract_4xi32_idx: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: vreplve.w $vr0, $vr0, $a2 +; LA32-NEXT: movfr2gr.s $a0, $fa0 +; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_4xi32_idx: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0 +; LA64-NEXT: vreplve.w $vr0, $vr0, $a0 +; LA64-NEXT: movfr2gr.s $a0, $fa0 +; LA64-NEXT: st.w $a0, $a1, 0 +; LA64-NEXT: ret %v = load volatile <4 x i32>, ptr %src %e = extractelement <4 x i32> %v, i32 %idx store i32 %e, ptr %dst @@ -121,14 +178,27 @@ define void @extract_4xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { } define void @extract_2xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -; CHECK-LABEL: extract_2xi64_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 -; CHECK-NEXT: vreplve.d $vr0, $vr0, $a0 -; CHECK-NEXT: movfr2gr.d $a0, $fa0 -; CHECK-NEXT: st.d $a0, $a1, 0 -; CHECK-NEXT: ret +; LA32-LABEL: extract_2xi64_idx: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: add.w $a0, $a2, $a2 +; LA32-NEXT: addi.w $a2, $a0, 1 +; LA32-NEXT: vreplve.w $vr1, $vr0, $a2 +; LA32-NEXT: vreplve.w $vr0, $vr0, $a0 +; LA32-NEXT: movfr2gr.s $a0, $fa1 +; LA32-NEXT: movfr2gr.s $a2, $fa0 +; LA32-NEXT: st.w $a2, $a1, 0 +; LA32-NEXT: st.w $a0, $a1, 4 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_2xi64_idx: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0 +; LA64-NEXT: vreplve.d $vr0, $vr0, $a0 +; LA64-NEXT: movfr2gr.d $a0, $fa0 +; LA64-NEXT: st.d $a0, $a1, 0 +; LA64-NEXT: ret %v = load volatile <2 x i64>, ptr %src %e = extractelement <2 x i64> %v, i32 %idx store i64 %e, ptr %dst @@ -136,13 +206,20 @@ define void @extract_2xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { } define void @extract_4xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -; CHECK-LABEL: extract_4xfloat_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 -; CHECK-NEXT: vreplve.w $vr0, $vr0, $a0 -; CHECK-NEXT: fst.s $fa0, $a1, 0 -; CHECK-NEXT: ret +; LA32-LABEL: extract_4xfloat_idx: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: vreplve.w $vr0, $vr0, $a2 +; LA32-NEXT: fst.s $fa0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_4xfloat_idx: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0 +; LA64-NEXT: vreplve.w $vr0, $vr0, $a0 +; LA64-NEXT: fst.s $fa0, $a1, 0 +; LA64-NEXT: ret %v = load volatile <4 x float>, ptr %src %e = extractelement <4 x float> %v, i32 %idx store float %e, ptr %dst @@ -150,13 +227,20 @@ define void @extract_4xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { } define void @extract_2xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -; CHECK-LABEL: extract_2xdouble_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 -; CHECK-NEXT: vreplve.d $vr0, $vr0, $a0 -; CHECK-NEXT: fst.d $fa0, $a1, 0 -; CHECK-NEXT: ret +; LA32-LABEL: extract_2xdouble_idx: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: vreplve.d $vr0, $vr0, $a2 +; LA32-NEXT: fst.d $fa0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: extract_2xdouble_idx: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0 +; LA64-NEXT: vreplve.d $vr0, $vr0, $a0 +; LA64-NEXT: fst.d $fa0, $a1, 0 +; LA64-NEXT: ret %v = load volatile <2 x double>, ptr %src %e = extractelement <2 x double> %v, i32 %idx store double %e, ptr %dst @@ -164,13 +248,21 @@ define void @extract_2xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { } define void @eliminate_frame_index(<4 x i32> %a) nounwind { -; CHECK-LABEL: eliminate_frame_index: -; CHECK: # %bb.0: -; CHECK-NEXT: addi.d $sp, $sp, -1040 -; CHECK-NEXT: addi.d $a0, $sp, 524 -; CHECK-NEXT: vstelm.w $vr0, $a0, 0, 1 -; CHECK-NEXT: addi.d $sp, $sp, 1040 -; CHECK-NEXT: ret +; LA32-LABEL: eliminate_frame_index: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -1040 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1 +; LA32-NEXT: st.w $a0, $sp, 524 +; LA32-NEXT: addi.w $sp, $sp, 1040 +; LA32-NEXT: ret +; +; LA64-LABEL: eliminate_frame_index: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -1040 +; LA64-NEXT: addi.d $a0, $sp, 524 +; LA64-NEXT: vstelm.w $vr0, $a0, 0, 1 +; LA64-NEXT: addi.d $sp, $sp, 1040 +; LA64-NEXT: ret %1 = alloca [64 x [4 x i32]] %2 = getelementptr i8, ptr %1, i64 508 %b = extractelement <4 x i32> %a, i64 1