diff --git a/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll new file mode 100644 index 0000000000000..f9f024dda973c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll @@ -0,0 +1,283 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 + +define void @rotl_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind { +; CHECK-LABEL: rotl_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 +; CHECK-NEXT: xvrepli.b $xr2, 8 +; CHECK-NEXT: xvsub.b $xr2, $xr2, $xr1 +; CHECK-NEXT: xvsll.b $xr1, $xr0, $xr1 +; CHECK-NEXT: xvsrl.b $xr0, $xr0, $xr2 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %src + %v1.ele = insertelement <32 x i8> poison, i8 %a0, i8 0 + %v1 = shufflevector <32 x i8> %v1.ele, <32 x i8> poison, <32 x i32> zeroinitializer + %v1.sub = sub <32 x i8> splat (i8 8), %v1 + %b = shl <32 x i8> %v0, %v1 + %c = lshr <32 x i8> %v0, %v1.sub + %d = or <32 x i8> %b, %c + store <32 x i8> %d, ptr %dst + ret void +} + +define void @rotr_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind { +; CHECK-LABEL: rotr_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 +; CHECK-NEXT: xvrepli.b $xr2, 8 +; CHECK-NEXT: xvsub.b $xr2, $xr2, $xr1 +; CHECK-NEXT: xvsrl.b $xr1, $xr0, $xr1 +; CHECK-NEXT: xvsll.b $xr0, $xr0, $xr2 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %src + %v1.ele = insertelement <32 x i8> poison, i8 %a0, i8 0 + %v1 = shufflevector <32 x i8> %v1.ele, <32 x i8> poison, <32 x i32> zeroinitializer + %v1.sub = sub <32 x i8> splat (i8 8), %v1 + %b = lshr <32 x i8> %v0, %v1 + %c = shl <32 x i8> %v0, %v1.sub + %d = or <32 x i8> %b, %c + store <32 x i8> %d, ptr %dst + ret void +} + +define void @rotr_v32i8_imm(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: rotr_v32i8_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.b $xr1, $xr0, 2 +; CHECK-NEXT: xvslli.b $xr0, $xr0, 6 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %src + %b = lshr <32 x i8> %v0, splat (i8 2) + %c = shl <32 x i8> %v0, splat (i8 6) + %d = or <32 x i8> %b, %c + store <32 x i8> %d, ptr %dst + ret void +} + +define void @rotl_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind { +; CHECK-LABEL: rotl_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2 +; CHECK-NEXT: xvrepli.h $xr2, 16 +; CHECK-NEXT: xvsub.h $xr2, $xr2, $xr1 +; CHECK-NEXT: xvsll.h $xr1, $xr0, $xr1 +; CHECK-NEXT: xvsrl.h $xr0, $xr0, $xr2 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %src + %v1.ele = insertelement <16 x i16> poison, i16 %a0, i16 0 + %v1 = shufflevector <16 x i16> %v1.ele, <16 x i16> poison, <16 x i32> zeroinitializer + %v1.sub = sub <16 x i16> splat (i16 16), %v1 + %b = shl <16 x i16> %v0, %v1 + %c = lshr <16 x i16> %v0, %v1.sub + %d = or <16 x i16> %b, %c + store <16 x i16> %d, ptr %dst + ret void +} + +define void @rotr_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind { +; CHECK-LABEL: rotr_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2 +; CHECK-NEXT: xvrepli.h $xr2, 16 +; CHECK-NEXT: xvsub.h $xr2, $xr2, $xr1 +; CHECK-NEXT: xvsrl.h $xr1, $xr0, $xr1 +; CHECK-NEXT: xvsll.h $xr0, $xr0, $xr2 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %src + %v1.ele = insertelement <16 x i16> poison, i16 %a0, i16 0 + %v1 = shufflevector <16 x i16> %v1.ele, <16 x i16> poison, <16 x i32> zeroinitializer + %v1.sub = sub <16 x i16> splat (i16 16), %v1 + %b = lshr <16 x i16> %v0, %v1 + %c = shl <16 x i16> %v0, %v1.sub + %d = or <16 x i16> %b, %c + store <16 x i16> %d, ptr %dst + ret void +} + +define void @rotr_v16i16_imm(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: rotr_v16i16_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.h $xr1, $xr0, 2 +; CHECK-NEXT: xvslli.h $xr0, $xr0, 14 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %src + %b = lshr <16 x i16> %v0, splat (i16 2) + %c = shl <16 x i16> %v0, splat (i16 14) + %d = or <16 x i16> %b, %c + store <16 x i16> %d, ptr %dst + ret void +} + +define void @rotl_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind { +; CHECK-LABEL: rotl_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2 +; CHECK-NEXT: xvrepli.w $xr2, 32 +; CHECK-NEXT: xvsub.w $xr2, $xr2, $xr1 +; CHECK-NEXT: xvsll.w $xr1, $xr0, $xr1 +; CHECK-NEXT: xvsrl.w $xr0, $xr0, $xr2 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %src + %v1.ele = insertelement <8 x i32> poison, i32 %a0, i32 0 + %v1 = shufflevector <8 x i32> %v1.ele, <8 x i32> poison, <8 x i32> zeroinitializer + %v1.sub = sub <8 x i32> splat (i32 32), %v1 + %b = shl <8 x i32> %v0, %v1 + %c = lshr <8 x i32> %v0, %v1.sub + %d = or <8 x i32> %b, %c + store <8 x i32> %d, ptr %dst + ret void +} + +define void @rotr_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind { +; CHECK-LABEL: rotr_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2 +; CHECK-NEXT: xvrepli.w $xr2, 32 +; CHECK-NEXT: xvsub.w $xr2, $xr2, $xr1 +; CHECK-NEXT: xvsrl.w $xr1, $xr0, $xr1 +; CHECK-NEXT: xvsll.w $xr0, $xr0, $xr2 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %src + %v1.ele = insertelement <8 x i32> poison, i32 %a0, i32 0 + %v1 = shufflevector <8 x i32> %v1.ele, <8 x i32> poison, <8 x i32> zeroinitializer + %v1.sub = sub <8 x i32> splat (i32 32), %v1 + %b = lshr <8 x i32> %v0, %v1 + %c = shl <8 x i32> %v0, %v1.sub + %d = or <8 x i32> %b, %c + store <8 x i32> %d, ptr %dst + ret void +} + +define void @rotr_v8i32_imm(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: rotr_v8i32_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.w $xr1, $xr0, 2 +; CHECK-NEXT: xvslli.w $xr0, $xr0, 30 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %src + %b = lshr <8 x i32> %v0, splat (i32 2) + %c = shl <8 x i32> %v0, splat (i32 30) + %d = or <8 x i32> %b, %c + store <8 x i32> %d, ptr %dst + ret void +} + +define void @rotl_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind { +; LA32-LABEL: rotl_v4i64: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1 +; LA32-NEXT: xvreplve0.d $xr1, $xr1 +; LA32-NEXT: xvrepli.d $xr2, 64 +; LA32-NEXT: xvsub.d $xr2, $xr2, $xr1 +; LA32-NEXT: xvsll.d $xr1, $xr0, $xr1 +; LA32-NEXT: xvsrl.d $xr0, $xr0, $xr2 +; LA32-NEXT: xvor.v $xr0, $xr1, $xr0 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: rotl_v4i64: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvreplgr2vr.d $xr1, $a2 +; LA64-NEXT: xvrepli.d $xr2, 64 +; LA64-NEXT: xvsub.d $xr2, $xr2, $xr1 +; LA64-NEXT: xvsll.d $xr1, $xr0, $xr1 +; LA64-NEXT: xvsrl.d $xr0, $xr0, $xr2 +; LA64-NEXT: xvor.v $xr0, $xr1, $xr0 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <4 x i64>, ptr %src + %v1.ele = insertelement <4 x i64> poison, i64 %a0, i64 0 + %v1 = shufflevector <4 x i64> %v1.ele, <4 x i64> poison, <4 x i32> zeroinitializer + %v1.sub = sub <4 x i64> splat (i64 64), %v1 + %b = shl <4 x i64> %v0, %v1 + %c = lshr <4 x i64> %v0, %v1.sub + %d = or <4 x i64> %b, %c + store <4 x i64> %d, ptr %dst + ret void +} + +define void @rotr_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind { +; LA32-LABEL: rotr_v4i64: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1 +; LA32-NEXT: xvreplve0.d $xr1, $xr1 +; LA32-NEXT: xvrepli.d $xr2, 64 +; LA32-NEXT: xvsub.d $xr2, $xr2, $xr1 +; LA32-NEXT: xvsrl.d $xr1, $xr0, $xr1 +; LA32-NEXT: xvsll.d $xr0, $xr0, $xr2 +; LA32-NEXT: xvor.v $xr0, $xr1, $xr0 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_v4i64: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvreplgr2vr.d $xr1, $a2 +; LA64-NEXT: xvrepli.d $xr2, 64 +; LA64-NEXT: xvsub.d $xr2, $xr2, $xr1 +; LA64-NEXT: xvsrl.d $xr1, $xr0, $xr1 +; LA64-NEXT: xvsll.d $xr0, $xr0, $xr2 +; LA64-NEXT: xvor.v $xr0, $xr1, $xr0 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <4 x i64>, ptr %src + %v1.ele = insertelement <4 x i64> poison, i64 %a0, i64 0 + %v1 = shufflevector <4 x i64> %v1.ele, <4 x i64> poison, <4 x i32> zeroinitializer + %v1.sub = sub <4 x i64> splat (i64 64), %v1 + %b = lshr <4 x i64> %v0, %v1 + %c = shl <4 x i64> %v0, %v1.sub + %d = or <4 x i64> %b, %c + store <4 x i64> %d, ptr %dst + ret void +} + +define void @rotr_v4i64_imm(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: rotr_v4i64_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.d $xr1, $xr0, 2 +; CHECK-NEXT: xvslli.d $xr0, $xr0, 62 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %src + %b = lshr <4 x i64> %v0, splat (i64 2) + %c = shl <4 x i64> %v0, splat (i64 62) + %d = or <4 x i64> %b, %c + store <4 x i64> %d, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll new file mode 100644 index 0000000000000..79e74f35abafb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll @@ -0,0 +1,283 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 + +define void @rotl_v16i8(ptr %dst, ptr %src, i8 signext %a0) nounwind { +; CHECK-LABEL: rotl_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.b $vr1, $a2 +; CHECK-NEXT: vrepli.b $vr2, 8 +; CHECK-NEXT: vsub.b $vr2, $vr2, $vr1 +; CHECK-NEXT: vsll.b $vr1, $vr0, $vr1 +; CHECK-NEXT: vsrl.b $vr0, $vr0, $vr2 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %src + %v1.ele = insertelement <16 x i8> poison, i8 %a0, i8 0 + %v1 = shufflevector <16 x i8> %v1.ele, <16 x i8> poison, <16 x i32> zeroinitializer + %v1.sub = sub <16 x i8> splat (i8 8), %v1 + %b = shl <16 x i8> %v0, %v1 + %c = lshr <16 x i8> %v0, %v1.sub + %d = or <16 x i8> %b, %c + store <16 x i8> %d, ptr %dst + ret void +} + +define void @rotr_v16i8(ptr %dst, ptr %src, i8 signext %a0) nounwind { +; CHECK-LABEL: rotr_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.b $vr1, $a2 +; CHECK-NEXT: vrepli.b $vr2, 8 +; CHECK-NEXT: vsub.b $vr2, $vr2, $vr1 +; CHECK-NEXT: vsrl.b $vr1, $vr0, $vr1 +; CHECK-NEXT: vsll.b $vr0, $vr0, $vr2 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %src + %v1.ele = insertelement <16 x i8> poison, i8 %a0, i8 0 + %v1 = shufflevector <16 x i8> %v1.ele, <16 x i8> poison, <16 x i32> zeroinitializer + %v1.sub = sub <16 x i8> splat (i8 8), %v1 + %b = lshr <16 x i8> %v0, %v1 + %c = shl <16 x i8> %v0, %v1.sub + %d = or <16 x i8> %b, %c + store <16 x i8> %d, ptr %dst + ret void +} + +define void @rotr_v16i8_imm(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: rotr_v16i8_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.b $vr1, $vr0, 2 +; CHECK-NEXT: vslli.b $vr0, $vr0, 6 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %src + %b = lshr <16 x i8> %v0, splat (i8 2) + %c = shl <16 x i8> %v0, splat (i8 6) + %d = or <16 x i8> %b, %c + store <16 x i8> %d, ptr %dst + ret void +} + +define void @rotl_v8i16(ptr %dst, ptr %src, i16 signext %a0) nounwind { +; CHECK-LABEL: rotl_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.h $vr1, $a2 +; CHECK-NEXT: vrepli.h $vr2, 16 +; CHECK-NEXT: vsub.h $vr2, $vr2, $vr1 +; CHECK-NEXT: vsll.h $vr1, $vr0, $vr1 +; CHECK-NEXT: vsrl.h $vr0, $vr0, $vr2 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %src + %v1.ele = insertelement <8 x i16> poison, i16 %a0, i16 0 + %v1 = shufflevector <8 x i16> %v1.ele, <8 x i16> poison, <8 x i32> zeroinitializer + %v1.sub = sub <8 x i16> splat (i16 16), %v1 + %b = shl <8 x i16> %v0, %v1 + %c = lshr <8 x i16> %v0, %v1.sub + %d = or <8 x i16> %b, %c + store <8 x i16> %d, ptr %dst + ret void +} + +define void @rotr_v8i16(ptr %dst, ptr %src, i16 signext %a0) nounwind { +; CHECK-LABEL: rotr_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.h $vr1, $a2 +; CHECK-NEXT: vrepli.h $vr2, 16 +; CHECK-NEXT: vsub.h $vr2, $vr2, $vr1 +; CHECK-NEXT: vsrl.h $vr1, $vr0, $vr1 +; CHECK-NEXT: vsll.h $vr0, $vr0, $vr2 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %src + %v1.ele = insertelement <8 x i16> poison, i16 %a0, i16 0 + %v1 = shufflevector <8 x i16> %v1.ele, <8 x i16> poison, <8 x i32> zeroinitializer + %v1.sub = sub <8 x i16> splat (i16 16), %v1 + %b = lshr <8 x i16> %v0, %v1 + %c = shl <8 x i16> %v0, %v1.sub + %d = or <8 x i16> %b, %c + store <8 x i16> %d, ptr %dst + ret void +} + +define void @rotr_v8i16_imm(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: rotr_v8i16_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.h $vr1, $vr0, 2 +; CHECK-NEXT: vslli.h $vr0, $vr0, 14 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %src + %b = lshr <8 x i16> %v0, splat (i16 2) + %c = shl <8 x i16> %v0, splat (i16 14) + %d = or <8 x i16> %b, %c + store <8 x i16> %d, ptr %dst + ret void +} + +define void @rotl_v4i32(ptr %dst, ptr %src, i32 signext %a0) nounwind { +; CHECK-LABEL: rotl_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.w $vr1, $a2 +; CHECK-NEXT: vrepli.w $vr2, 32 +; CHECK-NEXT: vsub.w $vr2, $vr2, $vr1 +; CHECK-NEXT: vsll.w $vr1, $vr0, $vr1 +; CHECK-NEXT: vsrl.w $vr0, $vr0, $vr2 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %src + %v1.ele = insertelement <4 x i32> poison, i32 %a0, i32 0 + %v1 = shufflevector <4 x i32> %v1.ele, <4 x i32> poison, <4 x i32> zeroinitializer + %v1.sub = sub <4 x i32> splat (i32 32), %v1 + %b = shl <4 x i32> %v0, %v1 + %c = lshr <4 x i32> %v0, %v1.sub + %d = or <4 x i32> %b, %c + store <4 x i32> %d, ptr %dst + ret void +} + +define void @rotr_v4i32(ptr %dst, ptr %src, i32 signext %a0) nounwind { +; CHECK-LABEL: rotr_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.w $vr1, $a2 +; CHECK-NEXT: vrepli.w $vr2, 32 +; CHECK-NEXT: vsub.w $vr2, $vr2, $vr1 +; CHECK-NEXT: vsrl.w $vr1, $vr0, $vr1 +; CHECK-NEXT: vsll.w $vr0, $vr0, $vr2 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %src + %v1.ele = insertelement <4 x i32> poison, i32 %a0, i32 0 + %v1 = shufflevector <4 x i32> %v1.ele, <4 x i32> poison, <4 x i32> zeroinitializer + %v1.sub = sub <4 x i32> splat (i32 32), %v1 + %b = lshr <4 x i32> %v0, %v1 + %c = shl <4 x i32> %v0, %v1.sub + %d = or <4 x i32> %b, %c + store <4 x i32> %d, ptr %dst + ret void +} + +define void @rotr_v4i32_imm(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: rotr_v4i32_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.w $vr1, $vr0, 2 +; CHECK-NEXT: vslli.w $vr0, $vr0, 30 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %src + %b = lshr <4 x i32> %v0, splat (i32 2) + %c = shl <4 x i32> %v0, splat (i32 30) + %d = or <4 x i32> %b, %c + store <4 x i32> %d, ptr %dst + ret void +} + +define void @rotl_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind { +; LA32-LABEL: rotl_v2i64: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1 +; LA32-NEXT: vreplvei.d $vr1, $vr1, 0 +; LA32-NEXT: vrepli.d $vr2, 64 +; LA32-NEXT: vsub.d $vr2, $vr2, $vr1 +; LA32-NEXT: vsll.d $vr1, $vr0, $vr1 +; LA32-NEXT: vsrl.d $vr0, $vr0, $vr2 +; LA32-NEXT: vor.v $vr0, $vr1, $vr0 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: rotl_v2i64: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vreplgr2vr.d $vr1, $a2 +; LA64-NEXT: vrepli.d $vr2, 64 +; LA64-NEXT: vsub.d $vr2, $vr2, $vr1 +; LA64-NEXT: vsll.d $vr1, $vr0, $vr1 +; LA64-NEXT: vsrl.d $vr0, $vr0, $vr2 +; LA64-NEXT: vor.v $vr0, $vr1, $vr0 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <2 x i64>, ptr %src + %v1.ele = insertelement <2 x i64> poison, i64 %a0, i64 0 + %v1 = shufflevector <2 x i64> %v1.ele, <2 x i64> poison, <2 x i32> zeroinitializer + %v1.sub = sub <2 x i64> splat (i64 64), %v1 + %b = shl <2 x i64> %v0, %v1 + %c = lshr <2 x i64> %v0, %v1.sub + %d = or <2 x i64> %b, %c + store <2 x i64> %d, ptr %dst + ret void +} + +define void @rotr_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind { +; LA32-LABEL: rotr_v2i64: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1 +; LA32-NEXT: vreplvei.d $vr1, $vr1, 0 +; LA32-NEXT: vrepli.d $vr2, 64 +; LA32-NEXT: vsub.d $vr2, $vr2, $vr1 +; LA32-NEXT: vsrl.d $vr1, $vr0, $vr1 +; LA32-NEXT: vsll.d $vr0, $vr0, $vr2 +; LA32-NEXT: vor.v $vr0, $vr1, $vr0 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_v2i64: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vreplgr2vr.d $vr1, $a2 +; LA64-NEXT: vrepli.d $vr2, 64 +; LA64-NEXT: vsub.d $vr2, $vr2, $vr1 +; LA64-NEXT: vsrl.d $vr1, $vr0, $vr1 +; LA64-NEXT: vsll.d $vr0, $vr0, $vr2 +; LA64-NEXT: vor.v $vr0, $vr1, $vr0 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <2 x i64>, ptr %src + %v1.ele = insertelement <2 x i64> poison, i64 %a0, i64 0 + %v1 = shufflevector <2 x i64> %v1.ele, <2 x i64> poison, <2 x i32> zeroinitializer + %v1.sub = sub <2 x i64> splat (i64 64), %v1 + %b = lshr <2 x i64> %v0, %v1 + %c = shl <2 x i64> %v0, %v1.sub + %d = or <2 x i64> %b, %c + store <2 x i64> %d, ptr %dst + ret void +} + +define void @rotr_v2i64_imm(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: rotr_v2i64_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.d $vr1, $vr0, 2 +; CHECK-NEXT: vslli.d $vr0, $vr0, 62 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %src + %b = lshr <2 x i64> %v0, splat (i64 2) + %c = shl <2 x i64> %v0, splat (i64 62) + %d = or <2 x i64> %b, %c + store <2 x i64> %d, ptr %dst + ret void +}