480 changes: 480 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,480 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s

declare <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64>, <2 x i64>)
declare <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32>, <4 x i32>)
declare <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16>, <8 x i16>)
declare <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8>, <16 x i8>)

define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
; CHECK-LABEL: vec_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma
; CHECK-NEXT: vmv.x.s a2, v8
; CHECK-NEXT: li a1, -1
; CHECK-NEXT: vmv.x.s a3, v9
; CHECK-NEXT: sll a0, a2, a3
; CHECK-NEXT: sra a3, a0, a3
; CHECK-NEXT: srli a1, a1, 1
; CHECK-NEXT: beq a2, a3, .LBB0_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: slti a0, a2, 0
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v9, v9, 1
; CHECK-NEXT: vmv.x.s a4, v9
; CHECK-NEXT: vslidedown.vi v8, v8, 1
; CHECK-NEXT: vmv.x.s a3, v8
; CHECK-NEXT: sll a2, a3, a4
; CHECK-NEXT: sra a4, a2, a4
; CHECK-NEXT: beq a3, a4, .LBB0_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: slti a2, a3, 0
; CHECK-NEXT: add a2, a2, a1
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.x v8, a2
; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
%tmp = call <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %tmp
}

define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: vec_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: slli a2, a0, 32
; CHECK-NEXT: li a0, -1
; CHECK-NEXT: vmv.x.s a3, v9
; CHECK-NEXT: sll a1, a2, a3
; CHECK-NEXT: sra a3, a1, a3
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: beq a2, a3, .LBB1_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: srli a1, a1, 32
; CHECK-NEXT: sw a1, 0(sp)
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v9, 3
; CHECK-NEXT: vmv.x.s a3, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 3
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 32
; CHECK-NEXT: sll a1, a2, a3
; CHECK-NEXT: sra a3, a1, a3
; CHECK-NEXT: beq a2, a3, .LBB1_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: srli a3, a1, 32
; CHECK-NEXT: vslidedown.vi v10, v9, 2
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 2
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 32
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sw a3, 12(sp)
; CHECK-NEXT: beq a2, a4, .LBB1_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB1_6:
; CHECK-NEXT: srli a3, a1, 32
; CHECK-NEXT: vslidedown.vi v9, v9, 1
; CHECK-NEXT: vmv.x.s a4, v9
; CHECK-NEXT: vslidedown.vi v8, v8, 1
; CHECK-NEXT: vmv.x.s a1, v8
; CHECK-NEXT: slli a2, a1, 32
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sw a3, 8(sp)
; CHECK-NEXT: beq a2, a4, .LBB1_8
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB1_8:
; CHECK-NEXT: srli a0, a1, 32
; CHECK-NEXT: sw a0, 4(sp)
; CHECK-NEXT: mv a0, sp
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%tmp = call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
ret <4 x i32> %tmp
}

define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
; CHECK-LABEL: vec_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: slli a2, a0, 48
; CHECK-NEXT: li a0, -1
; CHECK-NEXT: vmv.x.s a3, v9
; CHECK-NEXT: sll a1, a2, a3
; CHECK-NEXT: sra a3, a1, a3
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: beq a2, a3, .LBB2_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB2_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: srli a1, a1, 48
; CHECK-NEXT: sh a1, 0(sp)
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v9, 7
; CHECK-NEXT: vmv.x.s a3, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 7
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 48
; CHECK-NEXT: sll a1, a2, a3
; CHECK-NEXT: sra a3, a1, a3
; CHECK-NEXT: beq a2, a3, .LBB2_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB2_4:
; CHECK-NEXT: srli a3, a1, 48
; CHECK-NEXT: vslidedown.vi v10, v9, 6
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 6
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 48
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sh a3, 14(sp)
; CHECK-NEXT: beq a2, a4, .LBB2_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB2_6:
; CHECK-NEXT: srli a3, a1, 48
; CHECK-NEXT: vslidedown.vi v10, v9, 5
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 5
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 48
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sh a3, 12(sp)
; CHECK-NEXT: beq a2, a4, .LBB2_8
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB2_8:
; CHECK-NEXT: srli a3, a1, 48
; CHECK-NEXT: vslidedown.vi v10, v9, 4
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 48
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sh a3, 10(sp)
; CHECK-NEXT: beq a2, a4, .LBB2_10
; CHECK-NEXT: # %bb.9:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB2_10:
; CHECK-NEXT: srli a3, a1, 48
; CHECK-NEXT: vslidedown.vi v10, v9, 3
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 3
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 48
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sh a3, 8(sp)
; CHECK-NEXT: beq a2, a4, .LBB2_12
; CHECK-NEXT: # %bb.11:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB2_12:
; CHECK-NEXT: srli a3, a1, 48
; CHECK-NEXT: vslidedown.vi v10, v9, 2
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 2
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 48
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sh a3, 6(sp)
; CHECK-NEXT: beq a2, a4, .LBB2_14
; CHECK-NEXT: # %bb.13:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB2_14:
; CHECK-NEXT: srli a3, a1, 48
; CHECK-NEXT: vslidedown.vi v9, v9, 1
; CHECK-NEXT: vmv.x.s a4, v9
; CHECK-NEXT: vslidedown.vi v8, v8, 1
; CHECK-NEXT: vmv.x.s a1, v8
; CHECK-NEXT: slli a2, a1, 48
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sh a3, 4(sp)
; CHECK-NEXT: beq a2, a4, .LBB2_16
; CHECK-NEXT: # %bb.15:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB2_16:
; CHECK-NEXT: srli a0, a1, 48
; CHECK-NEXT: sh a0, 2(sp)
; CHECK-NEXT: mv a0, sp
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%tmp = call <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
ret <8 x i16> %tmp
}

define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
; CHECK-LABEL: vec_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: slli a2, a0, 56
; CHECK-NEXT: li a0, -1
; CHECK-NEXT: vmv.x.s a3, v9
; CHECK-NEXT: sll a1, a2, a3
; CHECK-NEXT: sra a3, a1, a3
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: beq a2, a3, .LBB3_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB3_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: srli a1, a1, 56
; CHECK-NEXT: sb a1, 0(sp)
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v9, 15
; CHECK-NEXT: vmv.x.s a3, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 15
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 56
; CHECK-NEXT: sll a1, a2, a3
; CHECK-NEXT: sra a3, a1, a3
; CHECK-NEXT: beq a2, a3, .LBB3_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB3_4:
; CHECK-NEXT: srli a3, a1, 56
; CHECK-NEXT: vslidedown.vi v10, v9, 14
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 14
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 56
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sb a3, 15(sp)
; CHECK-NEXT: beq a2, a4, .LBB3_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB3_6:
; CHECK-NEXT: srli a3, a1, 56
; CHECK-NEXT: vslidedown.vi v10, v9, 13
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 13
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 56
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sb a3, 14(sp)
; CHECK-NEXT: beq a2, a4, .LBB3_8
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB3_8:
; CHECK-NEXT: srli a3, a1, 56
; CHECK-NEXT: vslidedown.vi v10, v9, 12
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 12
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 56
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sb a3, 13(sp)
; CHECK-NEXT: beq a2, a4, .LBB3_10
; CHECK-NEXT: # %bb.9:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB3_10:
; CHECK-NEXT: srli a3, a1, 56
; CHECK-NEXT: vslidedown.vi v10, v9, 11
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 11
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 56
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sb a3, 12(sp)
; CHECK-NEXT: beq a2, a4, .LBB3_12
; CHECK-NEXT: # %bb.11:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB3_12:
; CHECK-NEXT: srli a3, a1, 56
; CHECK-NEXT: vslidedown.vi v10, v9, 10
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 10
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 56
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sb a3, 11(sp)
; CHECK-NEXT: beq a2, a4, .LBB3_14
; CHECK-NEXT: # %bb.13:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB3_14:
; CHECK-NEXT: srli a3, a1, 56
; CHECK-NEXT: vslidedown.vi v10, v9, 9
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 9
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 56
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sb a3, 10(sp)
; CHECK-NEXT: beq a2, a4, .LBB3_16
; CHECK-NEXT: # %bb.15:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB3_16:
; CHECK-NEXT: srli a3, a1, 56
; CHECK-NEXT: vslidedown.vi v10, v9, 8
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 8
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 56
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sb a3, 9(sp)
; CHECK-NEXT: beq a2, a4, .LBB3_18
; CHECK-NEXT: # %bb.17:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB3_18:
; CHECK-NEXT: srli a3, a1, 56
; CHECK-NEXT: vslidedown.vi v10, v9, 7
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 7
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 56
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sb a3, 8(sp)
; CHECK-NEXT: beq a2, a4, .LBB3_20
; CHECK-NEXT: # %bb.19:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB3_20:
; CHECK-NEXT: srli a3, a1, 56
; CHECK-NEXT: vslidedown.vi v10, v9, 6
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 6
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 56
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sb a3, 7(sp)
; CHECK-NEXT: beq a2, a4, .LBB3_22
; CHECK-NEXT: # %bb.21:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB3_22:
; CHECK-NEXT: srli a3, a1, 56
; CHECK-NEXT: vslidedown.vi v10, v9, 5
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 5
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 56
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sb a3, 6(sp)
; CHECK-NEXT: beq a2, a4, .LBB3_24
; CHECK-NEXT: # %bb.23:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB3_24:
; CHECK-NEXT: srli a3, a1, 56
; CHECK-NEXT: vslidedown.vi v10, v9, 4
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 56
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sb a3, 5(sp)
; CHECK-NEXT: beq a2, a4, .LBB3_26
; CHECK-NEXT: # %bb.25:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB3_26:
; CHECK-NEXT: srli a3, a1, 56
; CHECK-NEXT: vslidedown.vi v10, v9, 3
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 3
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 56
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sb a3, 4(sp)
; CHECK-NEXT: beq a2, a4, .LBB3_28
; CHECK-NEXT: # %bb.27:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB3_28:
; CHECK-NEXT: srli a3, a1, 56
; CHECK-NEXT: vslidedown.vi v10, v9, 2
; CHECK-NEXT: vmv.x.s a4, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 2
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a2, a1, 56
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sb a3, 3(sp)
; CHECK-NEXT: beq a2, a4, .LBB3_30
; CHECK-NEXT: # %bb.29:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB3_30:
; CHECK-NEXT: srli a3, a1, 56
; CHECK-NEXT: vslidedown.vi v9, v9, 1
; CHECK-NEXT: vmv.x.s a4, v9
; CHECK-NEXT: vslidedown.vi v8, v8, 1
; CHECK-NEXT: vmv.x.s a1, v8
; CHECK-NEXT: slli a2, a1, 56
; CHECK-NEXT: sll a1, a2, a4
; CHECK-NEXT: sra a4, a1, a4
; CHECK-NEXT: sb a3, 2(sp)
; CHECK-NEXT: beq a2, a4, .LBB3_32
; CHECK-NEXT: # %bb.31:
; CHECK-NEXT: slti a1, a2, 0
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: .LBB3_32:
; CHECK-NEXT: srli a0, a1, 56
; CHECK-NEXT: sb a0, 1(sp)
; CHECK-NEXT: mv a0, sp
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%tmp = call <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
ret <16 x i8> %tmp
}
442 changes: 442 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/ushl_sat_vec.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,442 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s

declare <2 x i64> @llvm.ushl.sat.v2i64(<2 x i64>, <2 x i64>)
declare <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32>, <4 x i32>)
declare <8 x i16> @llvm.ushl.sat.v8i16(<8 x i16>, <8 x i16>)
declare <16 x i8> @llvm.ushl.sat.v16i8(<16 x i8>, <16 x i8>)

define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
; CHECK-LABEL: vec_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v9, 1
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 1
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.x v10, a0
; CHECK-NEXT: vmv.x.s a0, v9
; CHECK-NEXT: vmv.x.s a1, v8
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
%tmp = call <2 x i64> @llvm.ushl.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %tmp
}

define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: vec_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v9
; CHECK-NEXT: vmv.x.s a1, v8
; CHECK-NEXT: slli a1, a1, 32
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 32
; CHECK-NEXT: sw a0, 0(sp)
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v9, 3
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 3
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 32
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 32
; CHECK-NEXT: sw a0, 12(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 2
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 2
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 32
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 32
; CHECK-NEXT: sw a0, 8(sp)
; CHECK-NEXT: vslidedown.vi v9, v9, 1
; CHECK-NEXT: vmv.x.s a0, v9
; CHECK-NEXT: vslidedown.vi v8, v8, 1
; CHECK-NEXT: vmv.x.s a1, v8
; CHECK-NEXT: slli a1, a1, 32
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 32
; CHECK-NEXT: sw a0, 4(sp)
; CHECK-NEXT: mv a0, sp
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%tmp = call <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
ret <4 x i32> %tmp
}

define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
; CHECK-LABEL: vec_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v9
; CHECK-NEXT: vmv.x.s a1, v8
; CHECK-NEXT: slli a1, a1, 48
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 48
; CHECK-NEXT: sh a0, 0(sp)
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v9, 7
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 7
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 48
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 48
; CHECK-NEXT: sh a0, 14(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 6
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 6
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 48
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 48
; CHECK-NEXT: sh a0, 12(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 5
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 5
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 48
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 48
; CHECK-NEXT: sh a0, 10(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 4
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 48
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 48
; CHECK-NEXT: sh a0, 8(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 3
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 3
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 48
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 48
; CHECK-NEXT: sh a0, 6(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 2
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 2
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 48
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 48
; CHECK-NEXT: sh a0, 4(sp)
; CHECK-NEXT: vslidedown.vi v9, v9, 1
; CHECK-NEXT: vmv.x.s a0, v9
; CHECK-NEXT: vslidedown.vi v8, v8, 1
; CHECK-NEXT: vmv.x.s a1, v8
; CHECK-NEXT: slli a1, a1, 48
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 48
; CHECK-NEXT: sh a0, 2(sp)
; CHECK-NEXT: mv a0, sp
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%tmp = call <8 x i16> @llvm.ushl.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
ret <8 x i16> %tmp
}

define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
; CHECK-LABEL: vec_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v9
; CHECK-NEXT: vmv.x.s a1, v8
; CHECK-NEXT: slli a1, a1, 56
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 56
; CHECK-NEXT: sb a0, 0(sp)
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v9, 15
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 15
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 56
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 56
; CHECK-NEXT: sb a0, 15(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 14
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 14
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 56
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 56
; CHECK-NEXT: sb a0, 14(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 13
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 13
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 56
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 56
; CHECK-NEXT: sb a0, 13(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 12
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 12
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 56
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 56
; CHECK-NEXT: sb a0, 12(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 11
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 11
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 56
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 56
; CHECK-NEXT: sb a0, 11(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 10
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 10
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 56
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 56
; CHECK-NEXT: sb a0, 10(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 9
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 9
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 56
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 56
; CHECK-NEXT: sb a0, 9(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 8
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 8
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 56
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 56
; CHECK-NEXT: sb a0, 8(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 7
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 7
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 56
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 56
; CHECK-NEXT: sb a0, 7(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 6
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 6
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 56
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 56
; CHECK-NEXT: sb a0, 6(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 5
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 5
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 56
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 56
; CHECK-NEXT: sb a0, 5(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 4
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 56
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 56
; CHECK-NEXT: sb a0, 4(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 3
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 3
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 56
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 56
; CHECK-NEXT: sb a0, 3(sp)
; CHECK-NEXT: vslidedown.vi v10, v9, 2
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: vslidedown.vi v10, v8, 2
; CHECK-NEXT: vmv.x.s a1, v10
; CHECK-NEXT: slli a1, a1, 56
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 56
; CHECK-NEXT: sb a0, 2(sp)
; CHECK-NEXT: vslidedown.vi v9, v9, 1
; CHECK-NEXT: vmv.x.s a0, v9
; CHECK-NEXT: vslidedown.vi v8, v8, 1
; CHECK-NEXT: vmv.x.s a1, v8
; CHECK-NEXT: slli a1, a1, 56
; CHECK-NEXT: sll a2, a1, a0
; CHECK-NEXT: srl a0, a2, a0
; CHECK-NEXT: xor a0, a1, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: srli a0, a0, 56
; CHECK-NEXT: sb a0, 1(sp)
; CHECK-NEXT: mv a0, sp
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%tmp = call <16 x i8> @llvm.ushl.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
ret <16 x i8> %tmp
}