diff --git a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll index ba2118fb94f63..3cae541adfc5c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll @@ -106,6 +106,69 @@ define void @ctlz_v4i64(ptr %src, ptr %dst) nounwind { ret void } +define void @ctlo_v32i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlo_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvxori.b $xr0, $xr0, 255 +; CHECK-NEXT: xvclz.b $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <32 x i8>, ptr %src + %v.not = xor <32 x i8> %v, splat (i8 -1) + %res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %v.not, i1 false) + store <32 x i8> %res, ptr %dst + ret void +} + +define void @ctlo_v16i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlo_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvclz.h $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <16 x i16>, ptr %src + %v.not = xor <16 x i16> %v, splat (i16 -1) + %res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %v.not, i1 false) + store <16 x i16> %res, ptr %dst + ret void +} + +define void @ctlo_v8i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlo_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvclz.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i32>, ptr %src + %v.not = xor <8 x i32> %v, splat (i32 -1) + %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %v.not, i1 false) + store <8 x i32> %res, ptr %dst + ret void +} + +define void @ctlo_v4i64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlo_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvclz.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i64>, ptr %src + %v.not = xor <4 x i64> %v, splat (i64 -1) + %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %v.not, i1 false) + store <4 x i64> %res, ptr %dst + ret void +} + declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll index a9a38e8f75f9c..42f554208ffc4 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll @@ -106,6 +106,69 @@ define void @ctlz_v2i64(ptr %src, ptr %dst) nounwind { ret void } +define void @ctlo_v16i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlo_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vxori.b $vr0, $vr0, 255 +; CHECK-NEXT: vclz.b $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <16 x i8>, ptr %src + %v.not = xor <16 x i8> %v, splat (i8 -1) + %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %v.not, i1 false) + store <16 x i8> %res, ptr %dst + ret void +} + +define void @ctlo_v8i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlo_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vrepli.b $vr1, -1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vclz.h $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i16>, ptr %src + %v.not = xor <8 x i16> %v, splat (i16 -1) + %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %v.not, i1 false) + store <8 x i16> %res, ptr %dst + ret void +} + +define void @ctlo_v4i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlo_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vrepli.b $vr1, -1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vclz.w $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i32>, ptr %src + %v.not = xor <4 x i32> %v, splat (i32 -1) + %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %v.not, i1 false) + store <4 x i32> %res, ptr %dst + ret void +} + +define void @ctlo_v2i64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlo_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vrepli.b $vr1, -1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vclz.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <2 x i64>, ptr %src + %v.not = xor <2 x i64> %v, splat (i64 -1) + %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %v.not, i1 false) + store <2 x i64> %res, ptr %dst + ret void +} + declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)