Skip to content

Conversation

zhaoqi5
Copy link
Contributor

@zhaoqi5 zhaoqi5 commented Sep 18, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Sep 18, 2025

@llvm/pr-subscribers-backend-loongarch

Author: ZhaoQi (zhaoqi5)

Changes

Patch is 28.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/159564.diff

5 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+4)
  • (modified) llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td (+5-5)
  • (modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (+15-15)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll (+187-67)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll (+168-76)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index e8668860c2b38..9f2704ab2d600 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2855,6 +2855,10 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
   case MVT::v16i16:
   case MVT::v4i64:
   case MVT::v4f64: {
+    // TODO: Similar optimization can be applied for la32.
+    if (!Subtarget.is64Bit())
+      return SDValue();
+
     // Extract the high half subvector and place it to the low half of a new
     // vector. It doesn't matter what the high half of the new vector is.
     EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 2e8e11155c5fa..d7aafe7c58c5f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1915,21 +1915,21 @@ def : Pat<(i64 (bitconvert (f64 (vector_extract v4f64:$xj, uimm2:$imm)))),
 // Vector extraction with constant index.
 foreach imm = 16...31 in {
   defvar Imm = !and(imm, 15);
-  def : Pat<(i64 (vector_extract v32i8:$xj, imm)),
+  def : Pat<(GRLenVT (vector_extract v32i8:$xj, imm)),
             (VPICKVE2GR_B (EXTRACT_SUBREG (XVPERMI_D v32i8:$xj, 14), sub_128),
                 Imm)>;
 }
 foreach imm = 8...15 in {
   defvar Imm = !and(imm, 7);
-  def : Pat<(i64 (vector_extract v16i16:$xj, imm)),
+  def : Pat<(GRLenVT (vector_extract v16i16:$xj, imm)),
             (VPICKVE2GR_H (EXTRACT_SUBREG (XVPERMI_D v16i16:$xj, 14), sub_128),
                 Imm)>;
 }
-def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)),
+def : Pat<(GRLenVT (vector_extract v32i8:$xj, uimm4:$imm)),
           (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>;
-def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)),
+def : Pat<(GRLenVT (vector_extract v16i16:$xj, uimm3:$imm)),
           (VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>;
-def : Pat<(i64 (vector_extract v8i32:$xj, uimm3:$imm)),
+def : Pat<(GRLenVT (vector_extract v8i32:$xj, uimm3:$imm)),
           (XVPICKVE2GR_W v8i32:$xj, uimm3:$imm)>;
 def : Pat<(i64 (vector_extract v4i64:$xj, uimm2:$imm)),
           (XVPICKVE2GR_D v4i64:$xj, uimm2:$imm)>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 5421bba0424bf..ac8bbd9ad1752 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -2080,11 +2080,11 @@ def : Pat<(i64 (bitconvert (f64 (vector_extract v2f64:$vj, uimm1:$imm)))),
           (VPICKVE2GR_D v2f64:$vj, uimm1:$imm)>;
 
 // Vector extraction with constant index.
-def : Pat<(i64 (vector_extract v16i8:$vj, uimm4:$imm)),
+def : Pat<(GRLenVT (vector_extract v16i8:$vj, uimm4:$imm)),
           (VPICKVE2GR_B v16i8:$vj, uimm4:$imm)>;
-def : Pat<(i64 (vector_extract v8i16:$vj, uimm3:$imm)),
+def : Pat<(GRLenVT (vector_extract v8i16:$vj, uimm3:$imm)),
           (VPICKVE2GR_H v8i16:$vj, uimm3:$imm)>;
-def : Pat<(i64 (vector_extract v4i32:$vj, uimm2:$imm)),
+def : Pat<(GRLenVT (vector_extract v4i32:$vj, uimm2:$imm)),
           (VPICKVE2GR_W v4i32:$vj, uimm2:$imm)>;
 def : Pat<(i64 (vector_extract v2i64:$vj, uimm1:$imm)),
           (VPICKVE2GR_D v2i64:$vj, uimm1:$imm)>;
@@ -2094,28 +2094,28 @@ def : Pat<(f64 (vector_extract v2f64:$vj, uimm1:$imm)),
           (f64 (EXTRACT_SUBREG (VREPLVEI_D v2f64:$vj, uimm1:$imm), sub_64))>;
 
 // Vector extraction with variable index.
-def : Pat<(i64 (vector_extract v16i8:$vj, i64:$rk)),
+def : Pat<(GRLenVT (vector_extract v16i8:$vj, GRLenVT:$rk)),
           (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj,
-                                                                    i64:$rk),
+                                                                    GRLenVT:$rk),
                                                          sub_32)),
-                                    GPR), (i64 24))>;
-def : Pat<(i64 (vector_extract v8i16:$vj, i64:$rk)),
+                                    GPR), (GRLenVT 24))>;
+def : Pat<(GRLenVT (vector_extract v8i16:$vj, GRLenVT:$rk)),
           (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj,
-                                                                    i64:$rk),
+                                                                    GRLenVT:$rk),
                                                          sub_32)),
-                                    GPR), (i64 16))>;
-def : Pat<(i64 (vector_extract v4i32:$vj, i64:$rk)),
-          (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, i64:$rk),
+                                    GPR), (GRLenVT 16))>;
+def : Pat<(GRLenVT (vector_extract v4i32:$vj, GRLenVT:$rk)),
+          (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, GRLenVT:$rk),
                                                  sub_32)),
                             GPR)>;
 def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)),
           (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk),
                                                  sub_64)),
                             GPR)>;
-def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)),
-          (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, i64:$rk), sub_32))>;
-def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)),
-          (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>;
+def : Pat<(f32 (vector_extract v4f32:$vj, GRLenVT:$rk)),
+          (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, GRLenVT:$rk), sub_32))>;
+def : Pat<(f64 (vector_extract v2f64:$vj, GRLenVT:$rk)),
+          (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, GRLenVT:$rk), sub_64))>;
 
 // vselect
 def : Pat<(v16i8 (vselect LSX128:$vd, (v16i8 (SplatPat_uimm8 uimm8:$imm)),
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
index dddee35fb9e78..e1296549eecae 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
@@ -1,12 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
 
 define void @extract_32xi8(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: extract_32xi8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvstelm.b $xr0, $a1, 0, 1
-; CHECK-NEXT:    ret
+; LA32-LABEL: extract_32xi8:
+; LA32:       # %bb.0:
+; LA32-NEXT:    xvld $xr0, $a0, 0
+; LA32-NEXT:    vpickve2gr.b $a0, $vr0, 1
+; LA32-NEXT:    st.b $a0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: extract_32xi8:
+; LA64:       # %bb.0:
+; LA64-NEXT:    xvld $xr0, $a0, 0
+; LA64-NEXT:    xvstelm.b $xr0, $a1, 0, 1
+; LA64-NEXT:    ret
   %v = load volatile <32 x i8>, ptr %src
   %e = extractelement <32 x i8> %v, i32 1
   store i8 %e, ptr %dst
@@ -14,11 +22,18 @@ define void @extract_32xi8(ptr %src, ptr %dst) nounwind {
 }
 
 define void @extract_16xi16(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: extract_16xi16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvstelm.h $xr0, $a1, 0, 1
-; CHECK-NEXT:    ret
+; LA32-LABEL: extract_16xi16:
+; LA32:       # %bb.0:
+; LA32-NEXT:    xvld $xr0, $a0, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; LA32-NEXT:    st.h $a0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: extract_16xi16:
+; LA64:       # %bb.0:
+; LA64-NEXT:    xvld $xr0, $a0, 0
+; LA64-NEXT:    xvstelm.h $xr0, $a1, 0, 1
+; LA64-NEXT:    ret
   %v = load volatile <16 x i16>, ptr %src
   %e = extractelement <16 x i16> %v, i32 1
   store i16 %e, ptr %dst
@@ -26,11 +41,18 @@ define void @extract_16xi16(ptr %src, ptr %dst) nounwind {
 }
 
 define void @extract_8xi32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: extract_8xi32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvstelm.w $xr0, $a1, 0, 1
-; CHECK-NEXT:    ret
+; LA32-LABEL: extract_8xi32:
+; LA32:       # %bb.0:
+; LA32-NEXT:    xvld $xr0, $a0, 0
+; LA32-NEXT:    xvpickve2gr.w $a0, $xr0, 1
+; LA32-NEXT:    st.w $a0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: extract_8xi32:
+; LA64:       # %bb.0:
+; LA64-NEXT:    xvld $xr0, $a0, 0
+; LA64-NEXT:    xvstelm.w $xr0, $a1, 0, 1
+; LA64-NEXT:    ret
   %v = load volatile <8 x i32>, ptr %src
   %e = extractelement <8 x i32> %v, i32 1
   store i32 %e, ptr %dst
@@ -38,11 +60,20 @@ define void @extract_8xi32(ptr %src, ptr %dst) nounwind {
 }
 
 define void @extract_4xi64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: extract_4xi64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvstelm.d $xr0, $a1, 0, 1
-; CHECK-NEXT:    ret
+; LA32-LABEL: extract_4xi64:
+; LA32:       # %bb.0:
+; LA32-NEXT:    xvld $xr0, $a0, 0
+; LA32-NEXT:    xvpickve2gr.w $a0, $xr0, 2
+; LA32-NEXT:    xvpickve2gr.w $a2, $xr0, 3
+; LA32-NEXT:    st.w $a2, $a1, 4
+; LA32-NEXT:    st.w $a0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: extract_4xi64:
+; LA64:       # %bb.0:
+; LA64-NEXT:    xvld $xr0, $a0, 0
+; LA64-NEXT:    xvstelm.d $xr0, $a1, 0, 1
+; LA64-NEXT:    ret
   %v = load volatile <4 x i64>, ptr %src
   %e = extractelement <4 x i64> %v, i32 1
   store i64 %e, ptr %dst
@@ -74,14 +105,33 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind {
 }
 
 define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
-; CHECK-LABEL: extract_32xi8_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    movgr2fr.w $fa2, $a2
-; CHECK-NEXT:    xvshuf.b $xr0, $xr1, $xr0, $xr2
-; CHECK-NEXT:    xvstelm.b $xr0, $a1, 0, 0
-; CHECK-NEXT:    ret
+; LA32-LABEL: extract_32xi8_idx:
+; LA32:       # %bb.0:
+; LA32-NEXT:    addi.w $sp, $sp, -96
+; LA32-NEXT:    st.w $ra, $sp, 92 # 4-byte Folded Spill
+; LA32-NEXT:    st.w $fp, $sp, 88 # 4-byte Folded Spill
+; LA32-NEXT:    addi.w $fp, $sp, 96
+; LA32-NEXT:    bstrins.w $sp, $zero, 4, 0
+; LA32-NEXT:    xvld $xr0, $a0, 0
+; LA32-NEXT:    addi.w $a0, $sp, 32
+; LA32-NEXT:    bstrins.w $a0, $a2, 4, 0
+; LA32-NEXT:    xvst $xr0, $sp, 32
+; LA32-NEXT:    ld.b $a0, $a0, 0
+; LA32-NEXT:    st.b $a0, $a1, 0
+; LA32-NEXT:    addi.w $sp, $fp, -96
+; LA32-NEXT:    ld.w $fp, $sp, 88 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $ra, $sp, 92 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 96
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: extract_32xi8_idx:
+; LA64:       # %bb.0:
+; LA64-NEXT:    xvld $xr0, $a0, 0
+; LA64-NEXT:    xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT:    movgr2fr.w $fa2, $a2
+; LA64-NEXT:    xvshuf.b $xr0, $xr1, $xr0, $xr2
+; LA64-NEXT:    xvstelm.b $xr0, $a1, 0, 0
+; LA64-NEXT:    ret
   %v = load volatile <32 x i8>, ptr %src
   %e = extractelement <32 x i8> %v, i32 %idx
   store i8 %e, ptr %dst
@@ -89,14 +139,33 @@ define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 }
 
 define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
-; CHECK-LABEL: extract_16xi16_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    movgr2fr.w $fa2, $a2
-; CHECK-NEXT:    xvshuf.h $xr2, $xr1, $xr0
-; CHECK-NEXT:    xvstelm.h $xr2, $a1, 0, 0
-; CHECK-NEXT:    ret
+; LA32-LABEL: extract_16xi16_idx:
+; LA32:       # %bb.0:
+; LA32-NEXT:    addi.w $sp, $sp, -96
+; LA32-NEXT:    st.w $ra, $sp, 92 # 4-byte Folded Spill
+; LA32-NEXT:    st.w $fp, $sp, 88 # 4-byte Folded Spill
+; LA32-NEXT:    addi.w $fp, $sp, 96
+; LA32-NEXT:    bstrins.w $sp, $zero, 4, 0
+; LA32-NEXT:    xvld $xr0, $a0, 0
+; LA32-NEXT:    addi.w $a0, $sp, 32
+; LA32-NEXT:    bstrins.w $a0, $a2, 4, 1
+; LA32-NEXT:    xvst $xr0, $sp, 32
+; LA32-NEXT:    ld.h $a0, $a0, 0
+; LA32-NEXT:    st.h $a0, $a1, 0
+; LA32-NEXT:    addi.w $sp, $fp, -96
+; LA32-NEXT:    ld.w $fp, $sp, 88 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $ra, $sp, 92 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 96
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: extract_16xi16_idx:
+; LA64:       # %bb.0:
+; LA64-NEXT:    xvld $xr0, $a0, 0
+; LA64-NEXT:    xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT:    movgr2fr.w $fa2, $a2
+; LA64-NEXT:    xvshuf.h $xr2, $xr1, $xr0
+; LA64-NEXT:    xvstelm.h $xr2, $a1, 0, 0
+; LA64-NEXT:    ret
   %v = load volatile <16 x i16>, ptr %src
   %e = extractelement <16 x i16> %v, i32 %idx
   store i16 %e, ptr %dst
@@ -104,13 +173,22 @@ define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 }
 
 define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
-; CHECK-LABEL: extract_8xi32_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvreplgr2vr.w $xr1, $a2
-; CHECK-NEXT:    xvperm.w $xr0, $xr0, $xr1
-; CHECK-NEXT:    xvstelm.w $xr0, $a1, 0, 0
-; CHECK-NEXT:    ret
+; LA32-LABEL: extract_8xi32_idx:
+; LA32:       # %bb.0:
+; LA32-NEXT:    xvld $xr0, $a0, 0
+; LA32-NEXT:    xvreplgr2vr.w $xr1, $a2
+; LA32-NEXT:    xvperm.w $xr0, $xr0, $xr1
+; LA32-NEXT:    xvpickve2gr.w $a0, $xr0, 0
+; LA32-NEXT:    st.w $a0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: extract_8xi32_idx:
+; LA64:       # %bb.0:
+; LA64-NEXT:    xvld $xr0, $a0, 0
+; LA64-NEXT:    xvreplgr2vr.w $xr1, $a2
+; LA64-NEXT:    xvperm.w $xr0, $xr0, $xr1
+; LA64-NEXT:    xvstelm.w $xr0, $a1, 0, 0
+; LA64-NEXT:    ret
   %v = load volatile <8 x i32>, ptr %src
   %e = extractelement <8 x i32> %v, i32 %idx
   store i32 %e, ptr %dst
@@ -118,14 +196,29 @@ define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 }
 
 define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
-; CHECK-LABEL: extract_4xi64_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    movgr2fr.w $fa2, $a2
-; CHECK-NEXT:    xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT:    xvstelm.d $xr2, $a1, 0, 0
-; CHECK-NEXT:    ret
+; LA32-LABEL: extract_4xi64_idx:
+; LA32:       # %bb.0:
+; LA32-NEXT:    xvld $xr0, $a0, 0
+; LA32-NEXT:    add.w $a0, $a2, $a2
+; LA32-NEXT:    addi.w $a2, $a0, 1
+; LA32-NEXT:    xvreplgr2vr.w $xr1, $a2
+; LA32-NEXT:    xvperm.w $xr1, $xr0, $xr1
+; LA32-NEXT:    xvpickve2gr.w $a2, $xr1, 0
+; LA32-NEXT:    xvreplgr2vr.w $xr1, $a0
+; LA32-NEXT:    xvperm.w $xr0, $xr0, $xr1
+; LA32-NEXT:    xvpickve2gr.w $a0, $xr0, 0
+; LA32-NEXT:    st.w $a0, $a1, 0
+; LA32-NEXT:    st.w $a2, $a1, 4
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: extract_4xi64_idx:
+; LA64:       # %bb.0:
+; LA64-NEXT:    xvld $xr0, $a0, 0
+; LA64-NEXT:    xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT:    movgr2fr.w $fa2, $a2
+; LA64-NEXT:    xvshuf.d $xr2, $xr1, $xr0
+; LA64-NEXT:    xvstelm.d $xr2, $a1, 0, 0
+; LA64-NEXT:    ret
   %v = load volatile <4 x i64>, ptr %src
   %e = extractelement <4 x i64> %v, i32 %idx
   store i64 %e, ptr %dst
@@ -147,14 +240,33 @@ define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 }
 
 define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
-; CHECK-LABEL: extract_4xdouble_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    movgr2fr.w $fa2, $a2
-; CHECK-NEXT:    xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT:    xvstelm.d $xr2, $a1, 0, 0
-; CHECK-NEXT:    ret
+; LA32-LABEL: extract_4xdouble_idx:
+; LA32:       # %bb.0:
+; LA32-NEXT:    addi.w $sp, $sp, -96
+; LA32-NEXT:    st.w $ra, $sp, 92 # 4-byte Folded Spill
+; LA32-NEXT:    st.w $fp, $sp, 88 # 4-byte Folded Spill
+; LA32-NEXT:    addi.w $fp, $sp, 96
+; LA32-NEXT:    bstrins.w $sp, $zero, 4, 0
+; LA32-NEXT:    xvld $xr0, $a0, 0
+; LA32-NEXT:    addi.w $a0, $sp, 32
+; LA32-NEXT:    bstrins.w $a0, $a2, 4, 3
+; LA32-NEXT:    xvst $xr0, $sp, 32
+; LA32-NEXT:    fld.d $fa0, $a0, 0
+; LA32-NEXT:    fst.d $fa0, $a1, 0
+; LA32-NEXT:    addi.w $sp, $fp, -96
+; LA32-NEXT:    ld.w $fp, $sp, 88 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $ra, $sp, 92 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 96
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: extract_4xdouble_idx:
+; LA64:       # %bb.0:
+; LA64-NEXT:    xvld $xr0, $a0, 0
+; LA64-NEXT:    xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT:    movgr2fr.w $fa2, $a2
+; LA64-NEXT:    xvshuf.d $xr2, $xr1, $xr0
+; LA64-NEXT:    xvstelm.d $xr2, $a1, 0, 0
+; LA64-NEXT:    ret
   %v = load volatile <4 x double>, ptr %src
   %e = extractelement <4 x double> %v, i32 %idx
   store double %e, ptr %dst
@@ -162,13 +274,21 @@ define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 }
 
 define void @eliminate_frame_index(<8 x i32> %a) nounwind {
-; CHECK-LABEL: eliminate_frame_index:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi.d $sp, $sp, -1040
-; CHECK-NEXT:    addi.d $a0, $sp, 524
-; CHECK-NEXT:    xvstelm.w $xr0, $a0, 0, 1
-; CHECK-NEXT:    addi.d $sp, $sp, 1040
-; CHECK-NEXT:    ret
+; LA32-LABEL: eliminate_frame_index:
+; LA32:       # %bb.0:
+; LA32-NEXT:    addi.w $sp, $sp, -1040
+; LA32-NEXT:    xvpickve2gr.w $a0, $xr0, 1
+; LA32-NEXT:    st.w $a0, $sp, 524
+; LA32-NEXT:    addi.w $sp, $sp, 1040
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: eliminate_frame_index:
+; LA64:       # %bb.0:
+; LA64-NEXT:    addi.d $sp, $sp, -1040
+; LA64-NEXT:    addi.d $a0, $sp, 524
+; LA64-NEXT:    xvstelm.w $xr0, $a0, 0, 1
+; LA64-NEXT:    addi.d $sp, $sp, 1040
+; LA64-NEXT:    ret
   %1 = alloca [32 x [8 x i32]]
   %2 = getelementptr i8, ptr %1, i64 508
   %b = extractelement <8 x i32> %a, i64 1
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
index c9c95f19c26f8..3fb55d4806160 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
@@ -1,12 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
 
 define void @extract_16xi8(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: extract_16xi8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vstelm.b $vr0, $a1, 0, 1
-; CHECK-NEXT:    ret
+; LA32-LABEL: extract_16xi8:
+; LA32:       # %bb.0:
+; LA32-NEXT:    vld $vr0, $a0, 0
+; LA32-NEXT:    vpickve2gr.b $a0, $vr0, 1
+; LA32-NEXT:    st.b $a0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: extract_16xi8:
+; LA64:       # %bb.0:
+; LA64-NEXT:    vld $vr0, $a0, 0
+; LA64-NEXT:    vstelm.b $vr0, $a1, 0, 1
+; LA64-NEXT:    ret
   %v = load volatile <16 x i8>, ptr %src
   %e = extractelement <16 x i8> %v, i32 1
   store i8 %e, ptr %dst
@@ -14,11 +22,18 @@ define void @extract_16xi8(ptr %src, ptr %dst) nounwind {
 }
 
 define void @extract_8xi16(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: extract_8xi16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vstelm.h $vr0, $a1, 0, 1
-; CHECK-NEXT:    ret
+; LA32-LABEL: extract_8xi16:
+; LA32:       # %bb.0:
+; LA32-NEXT:    vld $vr0, $a0, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; LA32-NEXT:    st.h $a0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: extract_8xi16:
+; LA64:       # %bb.0:
+; LA64-NEXT:    vld $vr0, $a0, 0
+; LA64-NEXT:    vstelm.h $vr0, $a1, 0, 1
+; LA64-NEXT:    ret
   %v = load volatile <8 x i16>, ptr %src
   %e = extractelement <8 x i16> %v, i32 1
   store i16 %e, ptr %dst
@@ -26,11 +41,18 @@ define void @extract_8xi16(ptr %src, ptr %dst) nounwind {
 }
 
 define void @extract_4xi32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: extract_4xi32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vstelm.w $vr0, $a1, 0, 1
-; CHECK-NEXT:    ret
+; LA32-LABEL: extract_4xi32:
+; LA32:       # %bb.0:
+; LA32-NEXT:    vld $vr0, $a0, 0
+; LA32-NEXT:    vpickve2gr.w $a0,...
[truncated]

Copy link
Member

@heiher heiher left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for fixing LA32. There’s an ongoing branch working on fixing the LA32 vector. Since the work isn’t complete yet, no PR has been opened. In the meantime, feel free to submit separate PRs for individual test issues that are blocking progress.

https://github.com/heiher/llvm-project/commits/la32-vec

@zhaoqi5
Copy link
Contributor Author

zhaoqi5 commented Sep 18, 2025

Thanks for fixing LA32. There’s an ongoing branch working on fixing the LA32 vector. Since the work isn’t complete yet, no PR has been opened. In the meantime, feel free to submit separate PRs for individual test issues that are blocking progress.

https://github.com/heiher/llvm-project/commits/la32-vec

I am doing some optimization and want to add tests for la32 at meantime, but errors occured. So I push this simply fix firstly.

Vectorization on la32 indeed still need many fixes. Thanks for your continuous works on it.

Copy link
Member

@heiher heiher left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@zhaoqi5 zhaoqi5 merged commit 680c657 into main Sep 19, 2025
9 checks passed
@zhaoqi5 zhaoqi5 deleted the users/zhaoqi5/fix-la32-extractelement branch September 19, 2025 06:14
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants