-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LoongArch] Simplily fix extractelement on LA32 #159564
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-loongarch Author: ZhaoQi (zhaoqi5) ChangesPatch is 28.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/159564.diff 5 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index e8668860c2b38..9f2704ab2d600 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2855,6 +2855,10 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
case MVT::v16i16:
case MVT::v4i64:
case MVT::v4f64: {
+ // TODO: Similar optimization can be applied for la32.
+ if (!Subtarget.is64Bit())
+ return SDValue();
+
// Extract the high half subvector and place it to the low half of a new
// vector. It doesn't matter what the high half of the new vector is.
EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 2e8e11155c5fa..d7aafe7c58c5f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1915,21 +1915,21 @@ def : Pat<(i64 (bitconvert (f64 (vector_extract v4f64:$xj, uimm2:$imm)))),
// Vector extraction with constant index.
foreach imm = 16...31 in {
defvar Imm = !and(imm, 15);
- def : Pat<(i64 (vector_extract v32i8:$xj, imm)),
+ def : Pat<(GRLenVT (vector_extract v32i8:$xj, imm)),
(VPICKVE2GR_B (EXTRACT_SUBREG (XVPERMI_D v32i8:$xj, 14), sub_128),
Imm)>;
}
foreach imm = 8...15 in {
defvar Imm = !and(imm, 7);
- def : Pat<(i64 (vector_extract v16i16:$xj, imm)),
+ def : Pat<(GRLenVT (vector_extract v16i16:$xj, imm)),
(VPICKVE2GR_H (EXTRACT_SUBREG (XVPERMI_D v16i16:$xj, 14), sub_128),
Imm)>;
}
-def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)),
+def : Pat<(GRLenVT (vector_extract v32i8:$xj, uimm4:$imm)),
(VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>;
-def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)),
+def : Pat<(GRLenVT (vector_extract v16i16:$xj, uimm3:$imm)),
(VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>;
-def : Pat<(i64 (vector_extract v8i32:$xj, uimm3:$imm)),
+def : Pat<(GRLenVT (vector_extract v8i32:$xj, uimm3:$imm)),
(XVPICKVE2GR_W v8i32:$xj, uimm3:$imm)>;
def : Pat<(i64 (vector_extract v4i64:$xj, uimm2:$imm)),
(XVPICKVE2GR_D v4i64:$xj, uimm2:$imm)>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 5421bba0424bf..ac8bbd9ad1752 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -2080,11 +2080,11 @@ def : Pat<(i64 (bitconvert (f64 (vector_extract v2f64:$vj, uimm1:$imm)))),
(VPICKVE2GR_D v2f64:$vj, uimm1:$imm)>;
// Vector extraction with constant index.
-def : Pat<(i64 (vector_extract v16i8:$vj, uimm4:$imm)),
+def : Pat<(GRLenVT (vector_extract v16i8:$vj, uimm4:$imm)),
(VPICKVE2GR_B v16i8:$vj, uimm4:$imm)>;
-def : Pat<(i64 (vector_extract v8i16:$vj, uimm3:$imm)),
+def : Pat<(GRLenVT (vector_extract v8i16:$vj, uimm3:$imm)),
(VPICKVE2GR_H v8i16:$vj, uimm3:$imm)>;
-def : Pat<(i64 (vector_extract v4i32:$vj, uimm2:$imm)),
+def : Pat<(GRLenVT (vector_extract v4i32:$vj, uimm2:$imm)),
(VPICKVE2GR_W v4i32:$vj, uimm2:$imm)>;
def : Pat<(i64 (vector_extract v2i64:$vj, uimm1:$imm)),
(VPICKVE2GR_D v2i64:$vj, uimm1:$imm)>;
@@ -2094,28 +2094,28 @@ def : Pat<(f64 (vector_extract v2f64:$vj, uimm1:$imm)),
(f64 (EXTRACT_SUBREG (VREPLVEI_D v2f64:$vj, uimm1:$imm), sub_64))>;
// Vector extraction with variable index.
-def : Pat<(i64 (vector_extract v16i8:$vj, i64:$rk)),
+def : Pat<(GRLenVT (vector_extract v16i8:$vj, GRLenVT:$rk)),
(SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj,
- i64:$rk),
+ GRLenVT:$rk),
sub_32)),
- GPR), (i64 24))>;
-def : Pat<(i64 (vector_extract v8i16:$vj, i64:$rk)),
+ GPR), (GRLenVT 24))>;
+def : Pat<(GRLenVT (vector_extract v8i16:$vj, GRLenVT:$rk)),
(SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj,
- i64:$rk),
+ GRLenVT:$rk),
sub_32)),
- GPR), (i64 16))>;
-def : Pat<(i64 (vector_extract v4i32:$vj, i64:$rk)),
- (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, i64:$rk),
+ GPR), (GRLenVT 16))>;
+def : Pat<(GRLenVT (vector_extract v4i32:$vj, GRLenVT:$rk)),
+ (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, GRLenVT:$rk),
sub_32)),
GPR)>;
def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)),
(COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk),
sub_64)),
GPR)>;
-def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)),
- (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, i64:$rk), sub_32))>;
-def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)),
- (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>;
+def : Pat<(f32 (vector_extract v4f32:$vj, GRLenVT:$rk)),
+ (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, GRLenVT:$rk), sub_32))>;
+def : Pat<(f64 (vector_extract v2f64:$vj, GRLenVT:$rk)),
+ (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, GRLenVT:$rk), sub_64))>;
// vselect
def : Pat<(v16i8 (vselect LSX128:$vd, (v16i8 (SplatPat_uimm8 uimm8:$imm)),
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
index dddee35fb9e78..e1296549eecae 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
@@ -1,12 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define void @extract_32xi8(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: extract_32xi8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 1
-; CHECK-NEXT: ret
+; LA32-LABEL: extract_32xi8:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: vpickve2gr.b $a0, $vr0, 1
+; LA32-NEXT: st.b $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: extract_32xi8:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvstelm.b $xr0, $a1, 0, 1
+; LA64-NEXT: ret
%v = load volatile <32 x i8>, ptr %src
%e = extractelement <32 x i8> %v, i32 1
store i8 %e, ptr %dst
@@ -14,11 +22,18 @@ define void @extract_32xi8(ptr %src, ptr %dst) nounwind {
}
define void @extract_16xi16(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: extract_16xi16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 1
-; CHECK-NEXT: ret
+; LA32-LABEL: extract_16xi16:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1
+; LA32-NEXT: st.h $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: extract_16xi16:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvstelm.h $xr0, $a1, 0, 1
+; LA64-NEXT: ret
%v = load volatile <16 x i16>, ptr %src
%e = extractelement <16 x i16> %v, i32 1
store i16 %e, ptr %dst
@@ -26,11 +41,18 @@ define void @extract_16xi16(ptr %src, ptr %dst) nounwind {
}
define void @extract_8xi32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: extract_8xi32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 1
-; CHECK-NEXT: ret
+; LA32-LABEL: extract_8xi32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 1
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: extract_8xi32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvstelm.w $xr0, $a1, 0, 1
+; LA64-NEXT: ret
%v = load volatile <8 x i32>, ptr %src
%e = extractelement <8 x i32> %v, i32 1
store i32 %e, ptr %dst
@@ -38,11 +60,20 @@ define void @extract_8xi32(ptr %src, ptr %dst) nounwind {
}
define void @extract_4xi64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: extract_4xi64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 1
-; CHECK-NEXT: ret
+; LA32-LABEL: extract_4xi64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 2
+; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 3
+; LA32-NEXT: st.w $a2, $a1, 4
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: extract_4xi64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvstelm.d $xr0, $a1, 0, 1
+; LA64-NEXT: ret
%v = load volatile <4 x i64>, ptr %src
%e = extractelement <4 x i64> %v, i32 1
store i64 %e, ptr %dst
@@ -74,14 +105,33 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind {
}
define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
-; CHECK-LABEL: extract_32xi8_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: movgr2fr.w $fa2, $a2
-; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
-; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: extract_32xi8_idx:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -96
+; LA32-NEXT: st.w $ra, $sp, 92 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 88 # 4-byte Folded Spill
+; LA32-NEXT: addi.w $fp, $sp, 96
+; LA32-NEXT: bstrins.w $sp, $zero, 4, 0
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: addi.w $a0, $sp, 32
+; LA32-NEXT: bstrins.w $a0, $a2, 4, 0
+; LA32-NEXT: xvst $xr0, $sp, 32
+; LA32-NEXT: ld.b $a0, $a0, 0
+; LA32-NEXT: st.b $a0, $a1, 0
+; LA32-NEXT: addi.w $sp, $fp, -96
+; LA32-NEXT: ld.w $fp, $sp, 88 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 92 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 96
+; LA32-NEXT: ret
+;
+; LA64-LABEL: extract_32xi8_idx:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: movgr2fr.w $fa2, $a2
+; LA64-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
+; LA64-NEXT: xvstelm.b $xr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load volatile <32 x i8>, ptr %src
%e = extractelement <32 x i8> %v, i32 %idx
store i8 %e, ptr %dst
@@ -89,14 +139,33 @@ define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
}
define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
-; CHECK-LABEL: extract_16xi16_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: movgr2fr.w $fa2, $a2
-; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0
-; CHECK-NEXT: xvstelm.h $xr2, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: extract_16xi16_idx:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -96
+; LA32-NEXT: st.w $ra, $sp, 92 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 88 # 4-byte Folded Spill
+; LA32-NEXT: addi.w $fp, $sp, 96
+; LA32-NEXT: bstrins.w $sp, $zero, 4, 0
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: addi.w $a0, $sp, 32
+; LA32-NEXT: bstrins.w $a0, $a2, 4, 1
+; LA32-NEXT: xvst $xr0, $sp, 32
+; LA32-NEXT: ld.h $a0, $a0, 0
+; LA32-NEXT: st.h $a0, $a1, 0
+; LA32-NEXT: addi.w $sp, $fp, -96
+; LA32-NEXT: ld.w $fp, $sp, 88 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 92 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 96
+; LA32-NEXT: ret
+;
+; LA64-LABEL: extract_16xi16_idx:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: movgr2fr.w $fa2, $a2
+; LA64-NEXT: xvshuf.h $xr2, $xr1, $xr0
+; LA64-NEXT: xvstelm.h $xr2, $a1, 0, 0
+; LA64-NEXT: ret
%v = load volatile <16 x i16>, ptr %src
%e = extractelement <16 x i16> %v, i32 %idx
store i16 %e, ptr %dst
@@ -104,13 +173,22 @@ define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
}
define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
-; CHECK-LABEL: extract_8xi32_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
-; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: extract_8xi32_idx:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvreplgr2vr.w $xr1, $a2
+; LA32-NEXT: xvperm.w $xr0, $xr0, $xr1
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: extract_8xi32_idx:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvreplgr2vr.w $xr1, $a2
+; LA64-NEXT: xvperm.w $xr0, $xr0, $xr1
+; LA64-NEXT: xvstelm.w $xr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load volatile <8 x i32>, ptr %src
%e = extractelement <8 x i32> %v, i32 %idx
store i32 %e, ptr %dst
@@ -118,14 +196,29 @@ define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
}
define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
-; CHECK-LABEL: extract_4xi64_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: movgr2fr.w $fa2, $a2
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvstelm.d $xr2, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: extract_4xi64_idx:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: add.w $a0, $a2, $a2
+; LA32-NEXT: addi.w $a2, $a0, 1
+; LA32-NEXT: xvreplgr2vr.w $xr1, $a2
+; LA32-NEXT: xvperm.w $xr1, $xr0, $xr1
+; LA32-NEXT: xvpickve2gr.w $a2, $xr1, 0
+; LA32-NEXT: xvreplgr2vr.w $xr1, $a0
+; LA32-NEXT: xvperm.w $xr0, $xr0, $xr1
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: st.w $a2, $a1, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: extract_4xi64_idx:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: movgr2fr.w $fa2, $a2
+; LA64-NEXT: xvshuf.d $xr2, $xr1, $xr0
+; LA64-NEXT: xvstelm.d $xr2, $a1, 0, 0
+; LA64-NEXT: ret
%v = load volatile <4 x i64>, ptr %src
%e = extractelement <4 x i64> %v, i32 %idx
store i64 %e, ptr %dst
@@ -147,14 +240,33 @@ define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
}
define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
-; CHECK-LABEL: extract_4xdouble_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: movgr2fr.w $fa2, $a2
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvstelm.d $xr2, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: extract_4xdouble_idx:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -96
+; LA32-NEXT: st.w $ra, $sp, 92 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 88 # 4-byte Folded Spill
+; LA32-NEXT: addi.w $fp, $sp, 96
+; LA32-NEXT: bstrins.w $sp, $zero, 4, 0
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: addi.w $a0, $sp, 32
+; LA32-NEXT: bstrins.w $a0, $a2, 4, 3
+; LA32-NEXT: xvst $xr0, $sp, 32
+; LA32-NEXT: fld.d $fa0, $a0, 0
+; LA32-NEXT: fst.d $fa0, $a1, 0
+; LA32-NEXT: addi.w $sp, $fp, -96
+; LA32-NEXT: ld.w $fp, $sp, 88 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 92 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 96
+; LA32-NEXT: ret
+;
+; LA64-LABEL: extract_4xdouble_idx:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: movgr2fr.w $fa2, $a2
+; LA64-NEXT: xvshuf.d $xr2, $xr1, $xr0
+; LA64-NEXT: xvstelm.d $xr2, $a1, 0, 0
+; LA64-NEXT: ret
%v = load volatile <4 x double>, ptr %src
%e = extractelement <4 x double> %v, i32 %idx
store double %e, ptr %dst
@@ -162,13 +274,21 @@ define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
}
define void @eliminate_frame_index(<8 x i32> %a) nounwind {
-; CHECK-LABEL: eliminate_frame_index:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -1040
-; CHECK-NEXT: addi.d $a0, $sp, 524
-; CHECK-NEXT: xvstelm.w $xr0, $a0, 0, 1
-; CHECK-NEXT: addi.d $sp, $sp, 1040
-; CHECK-NEXT: ret
+; LA32-LABEL: eliminate_frame_index:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -1040
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 1
+; LA32-NEXT: st.w $a0, $sp, 524
+; LA32-NEXT: addi.w $sp, $sp, 1040
+; LA32-NEXT: ret
+;
+; LA64-LABEL: eliminate_frame_index:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -1040
+; LA64-NEXT: addi.d $a0, $sp, 524
+; LA64-NEXT: xvstelm.w $xr0, $a0, 0, 1
+; LA64-NEXT: addi.d $sp, $sp, 1040
+; LA64-NEXT: ret
%1 = alloca [32 x [8 x i32]]
%2 = getelementptr i8, ptr %1, i64 508
%b = extractelement <8 x i32> %a, i64 1
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
index c9c95f19c26f8..3fb55d4806160 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
@@ -1,12 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define void @extract_16xi8(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: extract_16xi8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 1
-; CHECK-NEXT: ret
+; LA32-LABEL: extract_16xi8:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vpickve2gr.b $a0, $vr0, 1
+; LA32-NEXT: st.b $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: extract_16xi8:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vstelm.b $vr0, $a1, 0, 1
+; LA64-NEXT: ret
%v = load volatile <16 x i8>, ptr %src
%e = extractelement <16 x i8> %v, i32 1
store i8 %e, ptr %dst
@@ -14,11 +22,18 @@ define void @extract_16xi8(ptr %src, ptr %dst) nounwind {
}
define void @extract_8xi16(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: extract_8xi16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 1
-; CHECK-NEXT: ret
+; LA32-LABEL: extract_8xi16:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1
+; LA32-NEXT: st.h $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: extract_8xi16:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vstelm.h $vr0, $a1, 0, 1
+; LA64-NEXT: ret
%v = load volatile <8 x i16>, ptr %src
%e = extractelement <8 x i16> %v, i32 1
store i16 %e, ptr %dst
@@ -26,11 +41,18 @@ define void @extract_8xi16(ptr %src, ptr %dst) nounwind {
}
define void @extract_4xi32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: extract_4xi32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 1
-; CHECK-NEXT: ret
+; LA32-LABEL: extract_4xi32:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vpickve2gr.w $a0,...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for fixing LA32. There’s an ongoing branch working on fixing the LA32 vector. Since the work isn’t complete yet, no PR has been opened. In the meantime, feel free to submit separate PRs for individual test issues that are blocking progress.
I am doing some optimization and want to add tests for la32 at meantime, but errors occured. So I push this simply fix firstly. Vectorization on la32 indeed still need many fixes. Thanks for your continuous works on it. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
No description provided.