Skip to content

Conversation

ylzsx
Copy link
Contributor

@ylzsx ylzsx commented Sep 29, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Sep 29, 2025

@llvm/pr-subscribers-backend-loongarch

Author: Zhaoxin Yang (ylzsx)

Changes

Patch is 20.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/161115.diff

2 Files Affected:

  • (added) llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll (+283)
  • (added) llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll (+283)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll
new file mode 100644
index 0000000000000..f9f024dda973c
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll
@@ -0,0 +1,283 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+
+define void @rotl_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $a2
+; CHECK-NEXT:    xvrepli.b $xr2, 8
+; CHECK-NEXT:    xvsub.b $xr2, $xr2, $xr1
+; CHECK-NEXT:    xvsll.b $xr1, $xr0, $xr1
+; CHECK-NEXT:    xvsrl.b $xr0, $xr0, $xr2
+; CHECK-NEXT:    xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <32 x i8>, ptr %src
+  %v1.ele = insertelement <32 x i8> poison, i8 %a0, i8 0
+  %v1 = shufflevector <32 x i8> %v1.ele, <32 x i8> poison, <32 x i32> zeroinitializer
+  %v1.sub = sub <32 x i8> splat (i8 8), %v1
+  %b = shl <32 x i8> %v0, %v1
+  %c = lshr <32 x i8> %v0, %v1.sub
+  %d = or <32 x i8> %b, %c
+  store <32 x i8> %d, ptr %dst
+  ret void
+}
+
+define void @rotr_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $a2
+; CHECK-NEXT:    xvrepli.b $xr2, 8
+; CHECK-NEXT:    xvsub.b $xr2, $xr2, $xr1
+; CHECK-NEXT:    xvsrl.b $xr1, $xr0, $xr1
+; CHECK-NEXT:    xvsll.b $xr0, $xr0, $xr2
+; CHECK-NEXT:    xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <32 x i8>, ptr %src
+  %v1.ele = insertelement <32 x i8> poison, i8 %a0, i8 0
+  %v1 = shufflevector <32 x i8> %v1.ele, <32 x i8> poison, <32 x i32> zeroinitializer
+  %v1.sub = sub <32 x i8> splat (i8 8), %v1
+  %b = lshr <32 x i8> %v0, %v1
+  %c = shl <32 x i8> %v0, %v1.sub
+  %d = or <32 x i8> %b, %c
+  store <32 x i8> %d, ptr %dst
+  ret void
+}
+
+define void @rotr_v32i8_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v32i8_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvsrli.b $xr1, $xr0, 2
+; CHECK-NEXT:    xvslli.b $xr0, $xr0, 6
+; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <32 x i8>, ptr %src
+  %b = lshr <32 x i8> %v0, splat (i8 2)
+  %c = shl <32 x i8> %v0, splat (i8 6)
+  %d = or <32 x i8> %b, %c
+  store <32 x i8> %d, ptr %dst
+  ret void
+}
+
+define void @rotl_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvreplgr2vr.h $xr1, $a2
+; CHECK-NEXT:    xvrepli.h $xr2, 16
+; CHECK-NEXT:    xvsub.h $xr2, $xr2, $xr1
+; CHECK-NEXT:    xvsll.h $xr1, $xr0, $xr1
+; CHECK-NEXT:    xvsrl.h $xr0, $xr0, $xr2
+; CHECK-NEXT:    xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <16 x i16>, ptr %src
+  %v1.ele = insertelement <16 x i16> poison, i16 %a0, i16 0
+  %v1 = shufflevector <16 x i16> %v1.ele, <16 x i16> poison, <16 x i32> zeroinitializer
+  %v1.sub = sub <16 x i16> splat (i16 16), %v1
+  %b = shl <16 x i16> %v0, %v1
+  %c = lshr <16 x i16> %v0, %v1.sub
+  %d = or <16 x i16> %b, %c
+  store <16 x i16> %d, ptr %dst
+  ret void
+}
+
+define void @rotr_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvreplgr2vr.h $xr1, $a2
+; CHECK-NEXT:    xvrepli.h $xr2, 16
+; CHECK-NEXT:    xvsub.h $xr2, $xr2, $xr1
+; CHECK-NEXT:    xvsrl.h $xr1, $xr0, $xr1
+; CHECK-NEXT:    xvsll.h $xr0, $xr0, $xr2
+; CHECK-NEXT:    xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <16 x i16>, ptr %src
+  %v1.ele = insertelement <16 x i16> poison, i16 %a0, i16 0
+  %v1 = shufflevector <16 x i16> %v1.ele, <16 x i16> poison, <16 x i32> zeroinitializer
+  %v1.sub = sub <16 x i16> splat (i16 16), %v1
+  %b = lshr <16 x i16> %v0, %v1
+  %c = shl <16 x i16> %v0, %v1.sub
+  %d = or <16 x i16> %b, %c
+  store <16 x i16> %d, ptr %dst
+  ret void
+}
+
+define void @rotr_v16i16_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v16i16_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvsrli.h $xr1, $xr0, 2
+; CHECK-NEXT:    xvslli.h $xr0, $xr0, 14
+; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <16 x i16>, ptr %src
+  %b = lshr <16 x i16> %v0, splat (i16 2)
+  %c = shl <16 x i16> %v0, splat (i16 14)
+  %d = or <16 x i16> %b, %c
+  store <16 x i16> %d, ptr %dst
+  ret void
+}
+
+define void @rotl_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvreplgr2vr.w $xr1, $a2
+; CHECK-NEXT:    xvrepli.w $xr2, 32
+; CHECK-NEXT:    xvsub.w $xr2, $xr2, $xr1
+; CHECK-NEXT:    xvsll.w $xr1, $xr0, $xr1
+; CHECK-NEXT:    xvsrl.w $xr0, $xr0, $xr2
+; CHECK-NEXT:    xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <8 x i32>, ptr %src
+  %v1.ele = insertelement <8 x i32> poison, i32 %a0, i32 0
+  %v1 = shufflevector <8 x i32> %v1.ele, <8 x i32> poison, <8 x i32> zeroinitializer
+  %v1.sub = sub <8 x i32> splat (i32 32), %v1
+  %b = shl <8 x i32> %v0, %v1
+  %c = lshr <8 x i32> %v0, %v1.sub
+  %d = or <8 x i32> %b, %c
+  store <8 x i32> %d, ptr %dst
+  ret void
+}
+
+define void @rotr_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvreplgr2vr.w $xr1, $a2
+; CHECK-NEXT:    xvrepli.w $xr2, 32
+; CHECK-NEXT:    xvsub.w $xr2, $xr2, $xr1
+; CHECK-NEXT:    xvsrl.w $xr1, $xr0, $xr1
+; CHECK-NEXT:    xvsll.w $xr0, $xr0, $xr2
+; CHECK-NEXT:    xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <8 x i32>, ptr %src
+  %v1.ele = insertelement <8 x i32> poison, i32 %a0, i32 0
+  %v1 = shufflevector <8 x i32> %v1.ele, <8 x i32> poison, <8 x i32> zeroinitializer
+  %v1.sub = sub <8 x i32> splat (i32 32), %v1
+  %b = lshr <8 x i32> %v0, %v1
+  %c = shl <8 x i32> %v0, %v1.sub
+  %d = or <8 x i32> %b, %c
+  store <8 x i32> %d, ptr %dst
+  ret void
+}
+
+define void @rotr_v8i32_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v8i32_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvsrli.w $xr1, $xr0, 2
+; CHECK-NEXT:    xvslli.w $xr0, $xr0, 30
+; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <8 x i32>, ptr %src
+  %b = lshr <8 x i32> %v0, splat (i32 2)
+  %c = shl <8 x i32> %v0, splat (i32 30)
+  %d = or <8 x i32> %b, %c
+  store <8 x i32> %d, ptr %dst
+  ret void
+}
+
+define void @rotl_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind {
+; LA32-LABEL: rotl_v4i64:
+; LA32:       # %bb.0:
+; LA32-NEXT:    xvld $xr0, $a1, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a3, 1
+; LA32-NEXT:    xvreplve0.d $xr1, $xr1
+; LA32-NEXT:    xvrepli.d $xr2, 64
+; LA32-NEXT:    xvsub.d $xr2, $xr2, $xr1
+; LA32-NEXT:    xvsll.d $xr1, $xr0, $xr1
+; LA32-NEXT:    xvsrl.d $xr0, $xr0, $xr2
+; LA32-NEXT:    xvor.v $xr0, $xr1, $xr0
+; LA32-NEXT:    xvst $xr0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: rotl_v4i64:
+; LA64:       # %bb.0:
+; LA64-NEXT:    xvld $xr0, $a1, 0
+; LA64-NEXT:    xvreplgr2vr.d $xr1, $a2
+; LA64-NEXT:    xvrepli.d $xr2, 64
+; LA64-NEXT:    xvsub.d $xr2, $xr2, $xr1
+; LA64-NEXT:    xvsll.d $xr1, $xr0, $xr1
+; LA64-NEXT:    xvsrl.d $xr0, $xr0, $xr2
+; LA64-NEXT:    xvor.v $xr0, $xr1, $xr0
+; LA64-NEXT:    xvst $xr0, $a0, 0
+; LA64-NEXT:    ret
+  %v0 = load <4 x i64>, ptr %src
+  %v1.ele = insertelement <4 x i64> poison, i64 %a0, i64 0
+  %v1 = shufflevector <4 x i64> %v1.ele, <4 x i64> poison, <4 x i32> zeroinitializer
+  %v1.sub = sub <4 x i64> splat (i64 64), %v1
+  %b = shl <4 x i64> %v0, %v1
+  %c = lshr <4 x i64> %v0, %v1.sub
+  %d = or <4 x i64> %b, %c
+  store <4 x i64> %d, ptr %dst
+  ret void
+}
+
+define void @rotr_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind {
+; LA32-LABEL: rotr_v4i64:
+; LA32:       # %bb.0:
+; LA32-NEXT:    xvld $xr0, $a1, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a3, 1
+; LA32-NEXT:    xvreplve0.d $xr1, $xr1
+; LA32-NEXT:    xvrepli.d $xr2, 64
+; LA32-NEXT:    xvsub.d $xr2, $xr2, $xr1
+; LA32-NEXT:    xvsrl.d $xr1, $xr0, $xr1
+; LA32-NEXT:    xvsll.d $xr0, $xr0, $xr2
+; LA32-NEXT:    xvor.v $xr0, $xr1, $xr0
+; LA32-NEXT:    xvst $xr0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: rotr_v4i64:
+; LA64:       # %bb.0:
+; LA64-NEXT:    xvld $xr0, $a1, 0
+; LA64-NEXT:    xvreplgr2vr.d $xr1, $a2
+; LA64-NEXT:    xvrepli.d $xr2, 64
+; LA64-NEXT:    xvsub.d $xr2, $xr2, $xr1
+; LA64-NEXT:    xvsrl.d $xr1, $xr0, $xr1
+; LA64-NEXT:    xvsll.d $xr0, $xr0, $xr2
+; LA64-NEXT:    xvor.v $xr0, $xr1, $xr0
+; LA64-NEXT:    xvst $xr0, $a0, 0
+; LA64-NEXT:    ret
+  %v0 = load <4 x i64>, ptr %src
+  %v1.ele = insertelement <4 x i64> poison, i64 %a0, i64 0
+  %v1 = shufflevector <4 x i64> %v1.ele, <4 x i64> poison, <4 x i32> zeroinitializer
+  %v1.sub = sub <4 x i64> splat (i64 64), %v1
+  %b = lshr <4 x i64> %v0, %v1
+  %c = shl <4 x i64> %v0, %v1.sub
+  %d = or <4 x i64> %b, %c
+  store <4 x i64> %d, ptr %dst
+  ret void
+}
+
+define void @rotr_v4i64_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v4i64_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvsrli.d $xr1, $xr0, 2
+; CHECK-NEXT:    xvslli.d $xr0, $xr0, 62
+; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <4 x i64>, ptr %src
+  %b = lshr <4 x i64> %v0, splat (i64 2)
+  %c = shl <4 x i64> %v0, splat (i64 62)
+  %d = or <4 x i64> %b, %c
+  store <4 x i64> %d, ptr %dst
+  ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll
new file mode 100644
index 0000000000000..79e74f35abafb
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll
@@ -0,0 +1,283 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+
+define void @rotl_v16i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vreplgr2vr.b $vr1, $a2
+; CHECK-NEXT:    vrepli.b $vr2, 8
+; CHECK-NEXT:    vsub.b $vr2, $vr2, $vr1
+; CHECK-NEXT:    vsll.b $vr1, $vr0, $vr1
+; CHECK-NEXT:    vsrl.b $vr0, $vr0, $vr2
+; CHECK-NEXT:    vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <16 x i8>, ptr %src
+  %v1.ele = insertelement <16 x i8> poison, i8 %a0, i8 0
+  %v1 = shufflevector <16 x i8> %v1.ele, <16 x i8> poison, <16 x i32> zeroinitializer
+  %v1.sub = sub <16 x i8> splat (i8 8), %v1
+  %b = shl <16 x i8> %v0, %v1
+  %c = lshr <16 x i8> %v0, %v1.sub
+  %d = or <16 x i8> %b, %c
+  store <16 x i8> %d, ptr %dst
+  ret void
+}
+
+define void @rotr_v16i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vreplgr2vr.b $vr1, $a2
+; CHECK-NEXT:    vrepli.b $vr2, 8
+; CHECK-NEXT:    vsub.b $vr2, $vr2, $vr1
+; CHECK-NEXT:    vsrl.b $vr1, $vr0, $vr1
+; CHECK-NEXT:    vsll.b $vr0, $vr0, $vr2
+; CHECK-NEXT:    vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <16 x i8>, ptr %src
+  %v1.ele = insertelement <16 x i8> poison, i8 %a0, i8 0
+  %v1 = shufflevector <16 x i8> %v1.ele, <16 x i8> poison, <16 x i32> zeroinitializer
+  %v1.sub = sub <16 x i8> splat (i8 8), %v1
+  %b = lshr <16 x i8> %v0, %v1
+  %c = shl <16 x i8> %v0, %v1.sub
+  %d = or <16 x i8> %b, %c
+  store <16 x i8> %d, ptr %dst
+  ret void
+}
+
+define void @rotr_v16i8_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v16i8_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vsrli.b $vr1, $vr0, 2
+; CHECK-NEXT:    vslli.b $vr0, $vr0, 6
+; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <16 x i8>, ptr %src
+  %b = lshr <16 x i8> %v0, splat (i8 2)
+  %c = shl <16 x i8> %v0, splat (i8 6)
+  %d = or <16 x i8> %b, %c
+  store <16 x i8> %d, ptr %dst
+  ret void
+}
+
+define void @rotl_v8i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vreplgr2vr.h $vr1, $a2
+; CHECK-NEXT:    vrepli.h $vr2, 16
+; CHECK-NEXT:    vsub.h $vr2, $vr2, $vr1
+; CHECK-NEXT:    vsll.h $vr1, $vr0, $vr1
+; CHECK-NEXT:    vsrl.h $vr0, $vr0, $vr2
+; CHECK-NEXT:    vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <8 x i16>, ptr %src
+  %v1.ele = insertelement <8 x i16> poison, i16 %a0, i16 0
+  %v1 = shufflevector <8 x i16> %v1.ele, <8 x i16> poison, <8 x i32> zeroinitializer
+  %v1.sub = sub <8 x i16> splat (i16 16), %v1
+  %b = shl <8 x i16> %v0, %v1
+  %c = lshr <8 x i16> %v0, %v1.sub
+  %d = or <8 x i16> %b, %c
+  store <8 x i16> %d, ptr %dst
+  ret void
+}
+
+define void @rotr_v8i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vreplgr2vr.h $vr1, $a2
+; CHECK-NEXT:    vrepli.h $vr2, 16
+; CHECK-NEXT:    vsub.h $vr2, $vr2, $vr1
+; CHECK-NEXT:    vsrl.h $vr1, $vr0, $vr1
+; CHECK-NEXT:    vsll.h $vr0, $vr0, $vr2
+; CHECK-NEXT:    vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <8 x i16>, ptr %src
+  %v1.ele = insertelement <8 x i16> poison, i16 %a0, i16 0
+  %v1 = shufflevector <8 x i16> %v1.ele, <8 x i16> poison, <8 x i32> zeroinitializer
+  %v1.sub = sub <8 x i16> splat (i16 16), %v1
+  %b = lshr <8 x i16> %v0, %v1
+  %c = shl <8 x i16> %v0, %v1.sub
+  %d = or <8 x i16> %b, %c
+  store <8 x i16> %d, ptr %dst
+  ret void
+}
+
+define void @rotr_v8i16_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v8i16_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vsrli.h $vr1, $vr0, 2
+; CHECK-NEXT:    vslli.h $vr0, $vr0, 14
+; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <8 x i16>, ptr %src
+  %b = lshr <8 x i16> %v0, splat (i16 2)
+  %c = shl <8 x i16> %v0, splat (i16 14)
+  %d = or <8 x i16> %b, %c
+  store <8 x i16> %d, ptr %dst
+  ret void
+}
+
+define void @rotl_v4i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vreplgr2vr.w $vr1, $a2
+; CHECK-NEXT:    vrepli.w $vr2, 32
+; CHECK-NEXT:    vsub.w $vr2, $vr2, $vr1
+; CHECK-NEXT:    vsll.w $vr1, $vr0, $vr1
+; CHECK-NEXT:    vsrl.w $vr0, $vr0, $vr2
+; CHECK-NEXT:    vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <4 x i32>, ptr %src
+  %v1.ele = insertelement <4 x i32> poison, i32 %a0, i32 0
+  %v1 = shufflevector <4 x i32> %v1.ele, <4 x i32> poison, <4 x i32> zeroinitializer
+  %v1.sub = sub <4 x i32> splat (i32 32), %v1
+  %b = shl <4 x i32> %v0, %v1
+  %c = lshr <4 x i32> %v0, %v1.sub
+  %d = or <4 x i32> %b, %c
+  store <4 x i32> %d, ptr %dst
+  ret void
+}
+
+define void @rotr_v4i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vreplgr2vr.w $vr1, $a2
+; CHECK-NEXT:    vrepli.w $vr2, 32
+; CHECK-NEXT:    vsub.w $vr2, $vr2, $vr1
+; CHECK-NEXT:    vsrl.w $vr1, $vr0, $vr1
+; CHECK-NEXT:    vsll.w $vr0, $vr0, $vr2
+; CHECK-NEXT:    vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <4 x i32>, ptr %src
+  %v1.ele = insertelement <4 x i32> poison, i32 %a0, i32 0
+  %v1 = shufflevector <4 x i32> %v1.ele, <4 x i32> poison, <4 x i32> zeroinitializer
+  %v1.sub = sub <4 x i32> splat (i32 32), %v1
+  %b = lshr <4 x i32> %v0, %v1
+  %c = shl <4 x i32> %v0, %v1.sub
+  %d = or <4 x i32> %b, %c
+  store <4 x i32> %d, ptr %dst
+  ret void
+}
+
+define void @rotr_v4i32_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v4i32_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vsrli.w $vr1, $vr0, 2
+; CHECK-NEXT:    vslli.w $vr0, $vr0, 30
+; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <4 x i32>, ptr %src
+  %b = lshr <4 x i32> %v0, splat (i32 2)
+  %c = shl <4 x i32> %v0, splat (i32 30)
+  %d = or <4 x i32> %b, %c
+  store <4 x i32> %d, ptr %dst
+  ret void
+}
+
+define void @rotl_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind {
+; LA32-LABEL: rotl_v2i64:
+; LA32:       # %bb.0:
+; LA32-NEXT:    vld $vr0, $a1, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a3, 1
+; LA32-NEXT:    vreplvei.d $vr1, $vr1, 0
+; LA32-NEXT:    vrepli.d $vr2, 64
+; LA32-NEXT:    vsub.d $vr2, $vr2, $vr1
+; LA32-NEXT:    vsll.d $vr1, $vr0, $vr1
+; LA32-NEXT:    vsrl.d $vr0, $vr0, $vr2
+; LA32-NEXT:    vor.v $vr0, $vr1, $vr0
+; LA32-NEXT:    vst $vr0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: rotl_v2i64:
+; LA64:       # %bb.0:
+; LA64-NEXT:    vld $vr0, $a1, 0
+; LA64-NEXT:    vreplgr2vr.d $vr1, $a2
+; LA64-NEXT:    vrepli.d $vr2, 64
+; LA64-NEXT:    vsub.d $vr2, $vr2, $vr1
+; LA64-NEXT:    vsll.d $vr1, $vr0, $vr1
+; LA64-NEXT:    vsrl.d $vr0, $vr0, $vr2
+; LA64-NEXT:    vor.v $vr0, $vr1, $vr0
+; LA64-NEXT:    vst $vr0, $a0, 0
+; LA64-NEXT:    ret
+  %v0 = load <2 x i64>, ptr %src
+  %v1.ele = insertelement <2 x i64> poison, i64 %a0, i64 0
+  %v1 = shufflevector <2 x i64> %v1.ele, <2 x i64> poison, <2 x i32> zeroinitializer
+  %v1.sub = sub <2 x i64> splat (i64 64), %v1
+  %b = shl <2 x i64> %v0, %v1
+  %c = lshr <2 x i64> %v0, %v1.sub
+  %d = or <2 x i64> %b, %c
+  store <2 x i64> %d, ptr %dst
+  ret void
+}
+
+define void @rotr_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind {
+; LA32-LABEL: rotr_v2i64:
+; LA32:       # %bb.0:
+; LA32-NEXT:    vld $vr0, $a1, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a3, 1
+; LA32-NEXT:    vreplvei.d $vr1, $vr1, 0
+; LA32-NEXT:    vrepli.d $vr2, 64
+; LA32-NEXT:    vsub.d $vr2, $vr2, $vr1
+; LA32-NEXT:    vsrl.d $vr1, $vr0, $vr1
+; LA32-NEXT:    vsll.d $vr0, $vr0, $vr2
+; LA32-NEXT:    vor.v $vr0, $vr1, $vr0
+; LA32-NEXT:    vst $vr0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: rotr_v2i64:
+; LA64:       # %bb.0:
+; LA64-NEXT:    vld $vr0, $a1, 0
+; LA64-NEXT:    vreplgr2vr.d $vr1, $a2
+; LA64-NEXT:    vrepli.d $vr2, 64
+; LA64-NEXT:    vsub.d $vr2, $vr2, $vr1
+; LA64-NEXT:    vsrl.d $vr1, $vr0, $vr1
+; LA64-NEXT:    vsll.d $vr0, $vr0, $vr2
+; LA64-NEXT:    vor.v $vr0, $vr1, $vr0
+; LA64-NEXT:    vst $vr0, $a0, 0
+; LA64-NEXT:    ret
+  %v0 = load <2 x i64>, ptr %src
+  %v1.ele = insertelement <2 x i64> poison, i64 %a0, i64 0
+  %v1 = shufflevector <2 x i64> %v1.ele, <2 x i64> poison, <2 x i32> zeroinitializer
+  %v1.sub = sub <2 x i64> splat (i64 64), %v1
+  %b = lshr <2 x i64> %v0, %v1
+  %c = shl <2 x i64> %v0, %v1.sub
+  %d = or <2 x i64> %b, %c
+  store <2 x i64> %d, ptr %dst
+  ret void
+}
+
+define void @rotr_v2i64_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v2i64_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vsrli.d $vr1, $vr0, 2
+; CHECK-NEXT:    vslli.d $vr0, $vr0, 62
+; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <2 x i64>, ptr %src
+  %b = lshr <2 x i64> %v0, splat (i64 2)
+  %c = shl <2 x i64> %v0, splat (i64 62)
+  %d = or <2 x i64> %b, %c
+  store <2 x i64...
[truncated]

@ylzsx ylzsx changed the title [LoongArch][NFC] Pre-commit tests for custom rotr [LoongArch][NFC] Pre-commit tests for vector rotl/rotr Sep 29, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants