diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 613dea6093f5f..6a3717bd6dda7 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1920,10 +1920,13 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in def : Pat<(vt (loongarch_xvreplve0q LASX256:$xj)), (XVREPLVE0_Q LASX256:$xj)>; -// VSTELM +// XVSTELM defm : VstelmPat; defm : VstelmPat; defm : VstelmPat; +let Predicates = [IsLA32] in { + defm : VstelmPat; +} // Predicates = [IsLA32] defm : VstelmPat; defm : VstelmPat; defm : VstelmPat; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 4619c6bd248a6..1d60cb9360289 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -2061,9 +2061,13 @@ def : Pat<(lsxsplatf32 FPR32:$fj), def : Pat<(lsxsplatf64 FPR64:$fj), (VREPLVEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>; +// VSTELM defm : VstelmPat; defm : VstelmPat; defm : VstelmPat; +let Predicates = [IsLA32] in { + defm : VstelmPat; +} // Predicates = [IsLA32] defm : VstelmPat; defm : VstelmPat; defm : VstelmPat; diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll index 60b51755681a4..d9d1f19726e62 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll @@ -27,18 +27,11 @@ define void @extract_16xi16(ptr %src, ptr %dst) nounwind { } define void @extract_8xi32(ptr %src, ptr %dst) nounwind { -; LA32-LABEL: extract_8xi32: -; LA32: # %bb.0: -; LA32-NEXT: xvld $xr0, $a0, 0 -; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 1 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: extract_8xi32: -; LA64: # %bb.0: -; LA64-NEXT: xvld $xr0, $a0, 0 -; LA64-NEXT: xvstelm.w $xr0, $a1, 0, 1 -; LA64-NEXT: ret +; CHECK-LABEL: extract_8xi32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 1 +; CHECK-NEXT: ret %v = load volatile <8 x i32>, ptr %src %e = extractelement <8 x i32> %v, i32 1 store i32 %e, ptr %dst @@ -49,10 +42,8 @@ define void @extract_4xi64(ptr %src, ptr %dst) nounwind { ; LA32-LABEL: extract_4xi64: ; LA32: # %bb.0: ; LA32-NEXT: xvld $xr0, $a0, 0 -; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 2 -; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 3 -; LA32-NEXT: st.w $a2, $a1, 4 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: xvstelm.w $xr0, $a1, 4, 3 +; LA32-NEXT: xvstelm.w $xr0, $a1, 0, 2 ; LA32-NEXT: ret ; ; LA64-LABEL: extract_4xi64: @@ -139,22 +130,13 @@ define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { } define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -; LA32-LABEL: extract_8xi32_idx: -; LA32: # %bb.0: -; LA32-NEXT: xvld $xr0, $a0, 0 -; LA32-NEXT: xvreplgr2vr.w $xr1, $a2 -; LA32-NEXT: xvperm.w $xr0, $xr0, $xr1 -; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: extract_8xi32_idx: -; LA64: # %bb.0: -; LA64-NEXT: xvld $xr0, $a0, 0 -; LA64-NEXT: xvreplgr2vr.w $xr1, $a2 -; LA64-NEXT: xvperm.w $xr0, $xr0, $xr1 -; LA64-NEXT: xvstelm.w $xr0, $a1, 0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: extract_8xi32_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2 +; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0 +; CHECK-NEXT: ret %v = load volatile <8 x i32>, ptr %src %e = extractelement <8 x i32> %v, i32 %idx store i32 %e, ptr %dst @@ -169,12 +151,10 @@ define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ; LA32-NEXT: addi.w $a2, $a0, 1 ; LA32-NEXT: xvreplgr2vr.w $xr1, $a2 ; LA32-NEXT: xvperm.w $xr1, $xr0, $xr1 -; LA32-NEXT: xvpickve2gr.w $a2, $xr1, 0 -; LA32-NEXT: xvreplgr2vr.w $xr1, $a0 -; LA32-NEXT: xvperm.w $xr0, $xr0, $xr1 -; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: st.w $a2, $a1, 4 +; LA32-NEXT: xvreplgr2vr.w $xr2, $a0 +; LA32-NEXT: xvperm.w $xr0, $xr0, $xr2 +; LA32-NEXT: xvstelm.w $xr0, $a1, 0, 0 +; LA32-NEXT: xvstelm.w $xr1, $a1, 4, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: extract_4xi64_idx: @@ -233,8 +213,8 @@ define void @eliminate_frame_index(<8 x i32> %a) nounwind { ; LA32-LABEL: eliminate_frame_index: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $sp, $sp, -1040 -; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 1 -; LA32-NEXT: st.w $a0, $sp, 524 +; LA32-NEXT: addi.w $a0, $sp, 524 +; LA32-NEXT: xvstelm.w $xr0, $a0, 0, 1 ; LA32-NEXT: addi.w $sp, $sp, 1040 ; LA32-NEXT: ret ; diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-add.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-add.ll index 3e815a174d232..a6d1b23e083e9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-add.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-add.ll @@ -72,8 +72,7 @@ define void @vec_reduce_add_v8i32(ptr %src, ptr %dst) nounwind { ; LA32-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0 ; LA32-NEXT: xvpermi.d $xr1, $xr0, 2 ; LA32-NEXT: xvadd.d $xr0, $xr1, $xr0 -; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: xvstelm.w $xr0, $a1, 0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: vec_reduce_add_v8i32: diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll index 23cc230f04503..614dcf4b500ff 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll @@ -45,30 +45,17 @@ define void @vec_reduce_and_v16i16(ptr %src, ptr %dst) nounwind { } define void @vec_reduce_and_v8i32(ptr %src, ptr %dst) nounwind { -; LA32-LABEL: vec_reduce_and_v8i32: -; LA32: # %bb.0: -; LA32-NEXT: xvld $xr0, $a0, 0 -; LA32-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA32-NEXT: vand.v $vr0, $vr0, $vr1 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA32-NEXT: vand.v $vr0, $vr1, $vr0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA32-NEXT: vand.v $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: vec_reduce_and_v8i32: -; LA64: # %bb.0: -; LA64-NEXT: xvld $xr0, $a0, 0 -; LA64-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA64-NEXT: vand.v $vr0, $vr0, $vr1 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA64-NEXT: vand.v $vr0, $vr1, $vr0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA64-NEXT: vand.v $vr0, $vr1, $vr0 -; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: vec_reduce_and_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 +; CHECK-NEXT: ret %v = load <8 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %v) store i32 %res, ptr %dst diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll index d7d3afc6dd1da..e3e02b5ba8b7d 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll @@ -45,30 +45,17 @@ define void @vec_reduce_or_v16i16(ptr %src, ptr %dst) nounwind { } define void @vec_reduce_or_v8i32(ptr %src, ptr %dst) nounwind { -; LA32-LABEL: vec_reduce_or_v8i32: -; LA32: # %bb.0: -; LA32-NEXT: xvld $xr0, $a0, 0 -; LA32-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA32-NEXT: vor.v $vr0, $vr0, $vr1 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA32-NEXT: vor.v $vr0, $vr1, $vr0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA32-NEXT: vor.v $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: vec_reduce_or_v8i32: -; LA64: # %bb.0: -; LA64-NEXT: xvld $xr0, $a0, 0 -; LA64-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA64-NEXT: vor.v $vr0, $vr0, $vr1 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA64-NEXT: vor.v $vr0, $vr1, $vr0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA64-NEXT: vor.v $vr0, $vr1, $vr0 -; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: vec_reduce_or_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 +; CHECK-NEXT: ret %v = load <8 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %v) store i32 %res, ptr %dst diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll index 8cbbb52884865..8024b5906e98b 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll @@ -45,30 +45,17 @@ define void @vec_reduce_smax_v16i16(ptr %src, ptr %dst) nounwind { } define void @vec_reduce_smax_v8i32(ptr %src, ptr %dst) nounwind { -; LA32-LABEL: vec_reduce_smax_v8i32: -; LA32: # %bb.0: -; LA32-NEXT: xvld $xr0, $a0, 0 -; LA32-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA32-NEXT: vmax.w $vr0, $vr0, $vr1 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA32-NEXT: vmax.w $vr0, $vr1, $vr0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA32-NEXT: vmax.w $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: vec_reduce_smax_v8i32: -; LA64: # %bb.0: -; LA64-NEXT: xvld $xr0, $a0, 0 -; LA64-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA64-NEXT: vmax.w $vr0, $vr0, $vr1 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA64-NEXT: vmax.w $vr0, $vr1, $vr0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA64-NEXT: vmax.w $vr0, $vr1, $vr0 -; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: vec_reduce_smax_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmax.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 +; CHECK-NEXT: ret %v = load <8 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %v) store i32 %res, ptr %dst diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll index c34852aa8a28f..f1ce9b20adcc2 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll @@ -45,30 +45,17 @@ define void @vec_reduce_smin_v16i16(ptr %src, ptr %dst) nounwind { } define void @vec_reduce_smin_v8i32(ptr %src, ptr %dst) nounwind { -; LA32-LABEL: vec_reduce_smin_v8i32: -; LA32: # %bb.0: -; LA32-NEXT: xvld $xr0, $a0, 0 -; LA32-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA32-NEXT: vmin.w $vr0, $vr0, $vr1 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA32-NEXT: vmin.w $vr0, $vr1, $vr0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA32-NEXT: vmin.w $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: vec_reduce_smin_v8i32: -; LA64: # %bb.0: -; LA64-NEXT: xvld $xr0, $a0, 0 -; LA64-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA64-NEXT: vmin.w $vr0, $vr0, $vr1 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA64-NEXT: vmin.w $vr0, $vr1, $vr0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA64-NEXT: vmin.w $vr0, $vr1, $vr0 -; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: vec_reduce_smin_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmin.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 +; CHECK-NEXT: ret %v = load <8 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %v) store i32 %res, ptr %dst diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umax.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umax.ll index c44f83a909a68..040e25a99cd5c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umax.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umax.ll @@ -45,30 +45,17 @@ define void @vec_reduce_umax_v16i16(ptr %src, ptr %dst) nounwind { } define void @vec_reduce_umax_v8i32(ptr %src, ptr %dst) nounwind { -; LA32-LABEL: vec_reduce_umax_v8i32: -; LA32: # %bb.0: -; LA32-NEXT: xvld $xr0, $a0, 0 -; LA32-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA32-NEXT: vmax.wu $vr0, $vr0, $vr1 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA32-NEXT: vmax.wu $vr0, $vr1, $vr0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA32-NEXT: vmax.wu $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: vec_reduce_umax_v8i32: -; LA64: # %bb.0: -; LA64-NEXT: xvld $xr0, $a0, 0 -; LA64-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA64-NEXT: vmax.wu $vr0, $vr0, $vr1 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA64-NEXT: vmax.wu $vr0, $vr1, $vr0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA64-NEXT: vmax.wu $vr0, $vr1, $vr0 -; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: vec_reduce_umax_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmax.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 +; CHECK-NEXT: ret %v = load <8 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %v) store i32 %res, ptr %dst diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umin.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umin.ll index f91a1b34dffe9..a20b903eb5e69 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umin.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umin.ll @@ -45,30 +45,17 @@ define void @vec_reduce_umin_v16i16(ptr %src, ptr %dst) nounwind { } define void @vec_reduce_umin_v8i32(ptr %src, ptr %dst) nounwind { -; LA32-LABEL: vec_reduce_umin_v8i32: -; LA32: # %bb.0: -; LA32-NEXT: xvld $xr0, $a0, 0 -; LA32-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA32-NEXT: vmin.wu $vr0, $vr0, $vr1 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA32-NEXT: vmin.wu $vr0, $vr1, $vr0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA32-NEXT: vmin.wu $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: vec_reduce_umin_v8i32: -; LA64: # %bb.0: -; LA64-NEXT: xvld $xr0, $a0, 0 -; LA64-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA64-NEXT: vmin.wu $vr0, $vr0, $vr1 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA64-NEXT: vmin.wu $vr0, $vr1, $vr0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA64-NEXT: vmin.wu $vr0, $vr1, $vr0 -; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: vec_reduce_umin_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmin.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmin.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 +; CHECK-NEXT: ret %v = load <8 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %v) store i32 %res, ptr %dst diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-xor.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-xor.ll index af1a66b574c03..2145947523fa9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-xor.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-xor.ll @@ -45,30 +45,17 @@ define void @vec_reduce_xor_v16i16(ptr %src, ptr %dst) nounwind { } define void @vec_reduce_xor_v8i32(ptr %src, ptr %dst) nounwind { -; LA32-LABEL: vec_reduce_xor_v8i32: -; LA32: # %bb.0: -; LA32-NEXT: xvld $xr0, $a0, 0 -; LA32-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA32-NEXT: vxor.v $vr0, $vr0, $vr1 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA32-NEXT: vxor.v $vr0, $vr1, $vr0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA32-NEXT: vxor.v $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: vec_reduce_xor_v8i32: -; LA64: # %bb.0: -; LA64-NEXT: xvld $xr0, $a0, 0 -; LA64-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA64-NEXT: vxor.v $vr0, $vr0, $vr1 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA64-NEXT: vxor.v $vr0, $vr1, $vr0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA64-NEXT: vxor.v $vr0, $vr1, $vr0 -; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: vec_reduce_xor_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 +; CHECK-NEXT: ret %v = load <8 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %v) store i32 %res, ptr %dst diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll index b17a90e71e85a..144835d92d0fc 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll @@ -27,18 +27,11 @@ define void @extract_8xi16(ptr %src, ptr %dst) nounwind { } define void @extract_4xi32(ptr %src, ptr %dst) nounwind { -; LA32-LABEL: extract_4xi32: -; LA32: # %bb.0: -; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: extract_4xi32: -; LA64: # %bb.0: -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: vstelm.w $vr0, $a1, 0, 1 -; LA64-NEXT: ret +; CHECK-LABEL: extract_4xi32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 1 +; CHECK-NEXT: ret %v = load volatile <4 x i32>, ptr %src %e = extractelement <4 x i32> %v, i32 1 store i32 %e, ptr %dst @@ -49,10 +42,8 @@ define void @extract_2xi64(ptr %src, ptr %dst) nounwind { ; LA32-LABEL: extract_2xi64: ; LA32: # %bb.0: ; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2 -; LA32-NEXT: vpickve2gr.w $a2, $vr0, 3 -; LA32-NEXT: st.w $a2, $a1, 4 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: vstelm.w $vr0, $a1, 4, 3 +; LA32-NEXT: vstelm.w $vr0, $a1, 0, 2 ; LA32-NEXT: ret ; ; LA64-LABEL: extract_2xi64: @@ -237,8 +228,8 @@ define void @eliminate_frame_index(<4 x i32> %a) nounwind { ; LA32-LABEL: eliminate_frame_index: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $sp, $sp, -1040 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1 -; LA32-NEXT: st.w $a0, $sp, 524 +; LA32-NEXT: addi.w $a0, $sp, 524 +; LA32-NEXT: vstelm.w $vr0, $a0, 0, 1 ; LA32-NEXT: addi.w $sp, $sp, 1040 ; LA32-NEXT: ret ; diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-add.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-add.ll index 9c3a6f7be0542..43161d6b65717 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-add.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-add.ll @@ -194,8 +194,7 @@ define void @vec_reduce_add_v4i32(ptr %src, ptr %dst) nounwind { ; LA32-NEXT: vld $vr0, $a0, 0 ; LA32-NEXT: vhaddw.d.w $vr0, $vr0, $vr0 ; LA32-NEXT: vhaddw.q.d $vr0, $vr0, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: vstelm.w $vr0, $a1, 0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: vec_reduce_add_v4i32: @@ -220,8 +219,7 @@ define void @vec_reduce_add_v2i32(ptr %src, ptr %dst) nounwind { ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 ; LA32-NEXT: vhaddw.d.w $vr0, $vr0, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: vstelm.w $vr0, $a1, 0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: vec_reduce_add_v2i32: diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-and.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-and.ll index 734ecba843a4e..0359d9bc15366 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-and.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-and.ll @@ -152,26 +152,15 @@ define void @vec_reduce_and_v2i16(ptr %src, ptr %dst) nounwind { } define void @vec_reduce_and_v4i32(ptr %src, ptr %dst) nounwind { -; LA32-LABEL: vec_reduce_and_v4i32: -; LA32: # %bb.0: -; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA32-NEXT: vand.v $vr0, $vr1, $vr0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA32-NEXT: vand.v $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: vec_reduce_and_v4i32: -; LA64: # %bb.0: -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA64-NEXT: vand.v $vr0, $vr1, $vr0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA64-NEXT: vand.v $vr0, $vr1, $vr0 -; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: vec_reduce_and_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 +; CHECK-NEXT: ret %v = load <4 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %v) store i32 %res, ptr %dst @@ -188,8 +177,7 @@ define void @vec_reduce_and_v2i32(ptr %src, ptr %dst) nounwind { ; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 ; LA32-NEXT: vbsrl.v $vr1, $vr1, 4 ; LA32-NEXT: vand.v $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: vstelm.w $vr0, $a1, 0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: vec_reduce_and_v2i32: diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-or.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-or.ll index e833930830c3f..2323ad367b38c 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-or.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-or.ll @@ -152,26 +152,15 @@ define void @vec_reduce_or_v2i16(ptr %src, ptr %dst) nounwind { } define void @vec_reduce_or_v4i32(ptr %src, ptr %dst) nounwind { -; LA32-LABEL: vec_reduce_or_v4i32: -; LA32: # %bb.0: -; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA32-NEXT: vor.v $vr0, $vr1, $vr0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA32-NEXT: vor.v $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: vec_reduce_or_v4i32: -; LA64: # %bb.0: -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA64-NEXT: vor.v $vr0, $vr1, $vr0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA64-NEXT: vor.v $vr0, $vr1, $vr0 -; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: vec_reduce_or_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 +; CHECK-NEXT: ret %v = load <4 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %v) store i32 %res, ptr %dst @@ -188,8 +177,7 @@ define void @vec_reduce_or_v2i32(ptr %src, ptr %dst) nounwind { ; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 ; LA32-NEXT: vbsrl.v $vr1, $vr1, 4 ; LA32-NEXT: vor.v $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: vstelm.w $vr0, $a1, 0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: vec_reduce_or_v2i32: diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smax.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smax.ll index 2220df68cddfd..c1f856a1aa998 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smax.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smax.ll @@ -152,26 +152,15 @@ define void @vec_reduce_smax_v2i16(ptr %src, ptr %dst) nounwind { } define void @vec_reduce_smax_v4i32(ptr %src, ptr %dst) nounwind { -; LA32-LABEL: vec_reduce_smax_v4i32: -; LA32: # %bb.0: -; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA32-NEXT: vmax.w $vr0, $vr1, $vr0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA32-NEXT: vmax.w $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: vec_reduce_smax_v4i32: -; LA64: # %bb.0: -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA64-NEXT: vmax.w $vr0, $vr1, $vr0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA64-NEXT: vmax.w $vr0, $vr1, $vr0 -; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: vec_reduce_smax_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmax.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 +; CHECK-NEXT: ret %v = load <4 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %v) store i32 %res, ptr %dst @@ -187,8 +176,7 @@ define void @vec_reduce_smax_v2i32(ptr %src, ptr %dst) nounwind { ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 ; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 ; LA32-NEXT: vmax.w $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: vstelm.w $vr0, $a1, 0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: vec_reduce_smax_v2i32: diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smin.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smin.ll index 50d76a3872e1e..3ed23503fbccd 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smin.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smin.ll @@ -152,26 +152,15 @@ define void @vec_reduce_smin_v2i16(ptr %src, ptr %dst) nounwind { } define void @vec_reduce_smin_v4i32(ptr %src, ptr %dst) nounwind { -; LA32-LABEL: vec_reduce_smin_v4i32: -; LA32: # %bb.0: -; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA32-NEXT: vmin.w $vr0, $vr1, $vr0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA32-NEXT: vmin.w $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: vec_reduce_smin_v4i32: -; LA64: # %bb.0: -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA64-NEXT: vmin.w $vr0, $vr1, $vr0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA64-NEXT: vmin.w $vr0, $vr1, $vr0 -; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: vec_reduce_smin_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmin.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 +; CHECK-NEXT: ret %v = load <4 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %v) store i32 %res, ptr %dst @@ -187,8 +176,7 @@ define void @vec_reduce_smin_v2i32(ptr %src, ptr %dst) nounwind { ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 ; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 ; LA32-NEXT: vmin.w $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: vstelm.w $vr0, $a1, 0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: vec_reduce_smin_v2i32: diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umax.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umax.ll index 88146c78a969d..5504d2c5d0b6d 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umax.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umax.ll @@ -152,26 +152,15 @@ define void @vec_reduce_umax_v2i16(ptr %src, ptr %dst) nounwind { } define void @vec_reduce_umax_v4i32(ptr %src, ptr %dst) nounwind { -; LA32-LABEL: vec_reduce_umax_v4i32: -; LA32: # %bb.0: -; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA32-NEXT: vmax.wu $vr0, $vr1, $vr0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA32-NEXT: vmax.wu $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: vec_reduce_umax_v4i32: -; LA64: # %bb.0: -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA64-NEXT: vmax.wu $vr0, $vr1, $vr0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA64-NEXT: vmax.wu $vr0, $vr1, $vr0 -; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: vec_reduce_umax_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmax.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 +; CHECK-NEXT: ret %v = load <4 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %v) store i32 %res, ptr %dst @@ -187,8 +176,7 @@ define void @vec_reduce_umax_v2i32(ptr %src, ptr %dst) nounwind { ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 ; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 ; LA32-NEXT: vmax.wu $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: vstelm.w $vr0, $a1, 0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: vec_reduce_umax_v2i32: diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umin.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umin.ll index e9d4b4aab6f91..7e5b4d83dbc14 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umin.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umin.ll @@ -152,26 +152,15 @@ define void @vec_reduce_umin_v2i16(ptr %src, ptr %dst) nounwind { } define void @vec_reduce_umin_v4i32(ptr %src, ptr %dst) nounwind { -; LA32-LABEL: vec_reduce_umin_v4i32: -; LA32: # %bb.0: -; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA32-NEXT: vmin.wu $vr0, $vr1, $vr0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA32-NEXT: vmin.wu $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: vec_reduce_umin_v4i32: -; LA64: # %bb.0: -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA64-NEXT: vmin.wu $vr0, $vr1, $vr0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA64-NEXT: vmin.wu $vr0, $vr1, $vr0 -; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: vec_reduce_umin_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmin.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 +; CHECK-NEXT: ret %v = load <4 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %v) store i32 %res, ptr %dst @@ -187,8 +176,7 @@ define void @vec_reduce_umin_v2i32(ptr %src, ptr %dst) nounwind { ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 ; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 ; LA32-NEXT: vmin.wu $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: vstelm.w $vr0, $a1, 0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: vec_reduce_umin_v2i32: diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-xor.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-xor.ll index ed965e9e10ee7..053f81d8b93df 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-xor.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-xor.ll @@ -152,26 +152,15 @@ define void @vec_reduce_xor_v2i16(ptr %src, ptr %dst) nounwind { } define void @vec_reduce_xor_v4i32(ptr %src, ptr %dst) nounwind { -; LA32-LABEL: vec_reduce_xor_v4i32: -; LA32: # %bb.0: -; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA32-NEXT: vxor.v $vr0, $vr1, $vr0 -; LA32-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA32-NEXT: vxor.v $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: vec_reduce_xor_v4i32: -; LA64: # %bb.0: -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 8 -; LA64-NEXT: vxor.v $vr0, $vr1, $vr0 -; LA64-NEXT: vbsrl.v $vr1, $vr0, 4 -; LA64-NEXT: vxor.v $vr0, $vr1, $vr0 -; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: vec_reduce_xor_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 +; CHECK-NEXT: ret %v = load <4 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %v) store i32 %res, ptr %dst @@ -188,8 +177,7 @@ define void @vec_reduce_xor_v2i32(ptr %src, ptr %dst) nounwind { ; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 ; LA32-NEXT: vbsrl.v $vr1, $vr1, 4 ; LA32-NEXT: vxor.v $vr0, $vr1, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: vstelm.w $vr0, $a1, 0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: vec_reduce_xor_v2i32: diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll index 314350acd23d6..c30ecd1c7a0e9 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll @@ -6,10 +6,8 @@ define void @load_trunc_2i64_to_2i32(ptr %ptr, ptr %dst) nounwind { ; LA32-LABEL: load_trunc_2i64_to_2i32: ; LA32: # %bb.0: ; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2 -; LA32-NEXT: st.w $a0, $a1, 4 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: vstelm.w $vr0, $a1, 4, 2 +; LA32-NEXT: vstelm.w $vr0, $a1, 0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: load_trunc_2i64_to_2i32: @@ -31,8 +29,7 @@ define void @load_trunc_2i64_to_2i16(ptr %ptr, ptr %dst) nounwind { ; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) ; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0) ; LA32-NEXT: vshuf.h $vr1, $vr0, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr1, 0 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: vstelm.w $vr1, $a1, 0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: load_trunc_2i64_to_2i16: @@ -78,10 +75,8 @@ define void @load_trunc_4i32_to_4i16(ptr %ptr, ptr %dst) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: vld $vr0, $a0, 0 ; LA32-NEXT: vpickev.h $vr0, $vr0, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1 -; LA32-NEXT: st.w $a0, $a1, 4 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: vstelm.w $vr0, $a1, 4, 1 +; LA32-NEXT: vstelm.w $vr0, $a1, 0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: load_trunc_4i32_to_4i16: @@ -103,8 +98,7 @@ define void @load_trunc_4i32_to_4i8(ptr %ptr, ptr %dst) nounwind { ; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) ; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI4_0) ; LA32-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: vstelm.w $vr0, $a1, 0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: load_trunc_4i32_to_4i8: @@ -126,10 +120,8 @@ define void @load_trunc_8i16_to_8i8(ptr %ptr, ptr %dst) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: vld $vr0, $a0, 0 ; LA32-NEXT: vpickev.b $vr0, $vr0, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1 -; LA32-NEXT: st.w $a0, $a1, 4 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: vstelm.w $vr0, $a1, 4, 1 +; LA32-NEXT: vstelm.w $vr0, $a1, 0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: load_trunc_8i16_to_8i8: @@ -152,8 +144,7 @@ define void @load_trunc_2i32_to_2i16(ptr %ptr, ptr %dst) nounwind { ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 ; LA32-NEXT: vshuf4i.h $vr0, $vr0, 8 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: vstelm.w $vr0, $a1, 0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: load_trunc_2i32_to_2i16: @@ -205,8 +196,7 @@ define void @load_trunc_4i16_to_4i8(ptr %ptr, ptr %dst) nounwind { ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 ; LA32-NEXT: vpickev.b $vr0, $vr0, $vr0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: vstelm.w $vr0, $a1, 0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: load_trunc_4i16_to_4i8: