44 changes: 2 additions & 42 deletions llvm/lib/Target/RISCV/RISCVInstrInfoB.td
Original file line number Diff line number Diff line change
Expand Up @@ -671,8 +671,10 @@ def riscv_grevi : SDNode<"RISCVISD::GREVI", SDTIntBinOp, []>;
def riscv_greviw : SDNode<"RISCVISD::GREVIW", SDTIntBinOp, []>;
def riscv_gorci : SDNode<"RISCVISD::GORCI", SDTIntBinOp, []>;
def riscv_gorciw : SDNode<"RISCVISD::GORCIW", SDTIntBinOp, []>;
def riscv_shfli : SDNode<"RISCVISD::SHFLI", SDTIntBinOp, []>;

let Predicates = [HasStdExtZbp] in {
def : Pat<(riscv_shfli GPR:$rs1, timm:$shamt), (SHFLI GPR:$rs1, timm:$shamt)>;
def : Pat<(riscv_grevi GPR:$rs1, timm:$shamt), (GREVI GPR:$rs1, timm:$shamt)>;
def : Pat<(riscv_gorci GPR:$rs1, timm:$shamt), (GORCI GPR:$rs1, timm:$shamt)>;

Expand Down Expand Up @@ -789,48 +791,6 @@ let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
def : Pat<(i64 (and GPR:$rs, 0xFFFF)), (ZEXTH_RV64 GPR:$rs)>;
}

let Predicates = [HasStdExtZbp, IsRV32] in {
def : Pat<(or (or (and (shl GPR:$rs1, (i32 8)), (i32 0x00FF0000)),
(and GPR:$rs1, (i32 0xFF0000FF))),
(and (srl GPR:$rs1, (i32 8)), (i32 0x0000FF00))),
(SHFLI GPR:$rs1, (i32 8))>;
def : Pat<(or (or (and (shl GPR:$rs1, (i32 4)), (i32 0x0F000F00)),
(and GPR:$rs1, (i32 0xF00FF00F))),
(and (srl GPR:$rs1, (i32 4)), (i32 0x00F000F0))),
(SHFLI GPR:$rs1, (i32 4))>;
def : Pat<(or (or (and (shl GPR:$rs1, (i32 2)), (i32 0x30303030)),
(and GPR:$rs1, (i32 0xC3C3C3C3))),
(and (srl GPR:$rs1, (i32 2)), (i32 0x0C0C0C0C))),
(SHFLI GPR:$rs1, (i32 2))>;
def : Pat<(or (or (and (shl GPR:$rs1, (i32 1)), (i32 0x44444444)),
(and GPR:$rs1, (i32 0x99999999))),
(and (srl GPR:$rs1, (i32 1)), (i32 0x22222222))),
(SHFLI GPR:$rs1, (i32 1))>;
} // Predicates = [HasStdExtZbp, IsRV32]

let Predicates = [HasStdExtZbp, IsRV64] in {
def : Pat<(or (or (and (shl GPR:$rs1, (i64 16)), (i64 0x0000FFFF00000000)),
(and GPR:$rs1, (i64 0xFFFF00000000FFFF))),
(and (srl GPR:$rs1, (i64 16)), (i64 0x00000000FFFF0000))),
(SHFLI GPR:$rs1, (i64 16))>;
def : Pat<(or (or (and (shl GPR:$rs1, (i64 8)), (i64 0x00FF000000FF0000)),
(and GPR:$rs1, (i64 0xFF0000FFFF0000FF))),
(and (srl GPR:$rs1, (i64 8)), (i64 0x0000FF000000FF00))),
(SHFLI GPR:$rs1, (i64 8))>;
def : Pat<(or (or (and (shl GPR:$rs1, (i64 4)), (i64 0x0F000F000F000F00)),
(and GPR:$rs1, (i64 0xF00FF00FF00FF00F))),
(and (srl GPR:$rs1, (i64 4)), (i64 0x00F000F000F000F0))),
(SHFLI GPR:$rs1, (i64 4))>;
def : Pat<(or (or (and (shl GPR:$rs1, (i64 2)), (i64 0x3030303030303030)),
(and GPR:$rs1, (i64 0xC3C3C3C3C3C3C3C3))),
(and (srl GPR:$rs1, (i64 2)), (i64 0x0C0C0C0C0C0C0C0C))),
(SHFLI GPR:$rs1, (i64 2))>;
def : Pat<(or (or (and (shl GPR:$rs1, (i64 1)), (i64 0x4444444444444444)),
(and GPR:$rs1, (i64 0x9999999999999999))),
(and (srl GPR:$rs1, (i64 1)), (i64 0x2222222222222222))),
(SHFLI GPR:$rs1, (i64 1))>;
} // Predicates = [HasStdExtZbp, IsRV64]

let Predicates = [HasStdExtZba] in {
def : Pat<(add (shl GPR:$rs1, (XLenVT 1)), GPR:$rs2),
(SH1ADD GPR:$rs1, GPR:$rs2)>;
Expand Down
110 changes: 8 additions & 102 deletions llvm/test/CodeGen/RISCV/rv64Zbp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3430,36 +3430,12 @@ define signext i32 @shfl1_i32(i32 signext %a, i32 signext %b) nounwind {
;
; RV64IB-LABEL: shfl1_i32:
; RV64IB: # %bb.0:
; RV64IB-NEXT: lui a1, 629146
; RV64IB-NEXT: addiw a1, a1, -1639
; RV64IB-NEXT: and a1, a0, a1
; RV64IB-NEXT: slli a2, a0, 1
; RV64IB-NEXT: lui a3, 279620
; RV64IB-NEXT: addiw a3, a3, 1092
; RV64IB-NEXT: and a2, a2, a3
; RV64IB-NEXT: or a1, a2, a1
; RV64IB-NEXT: srli a0, a0, 1
; RV64IB-NEXT: lui a2, 139810
; RV64IB-NEXT: addiw a2, a2, 546
; RV64IB-NEXT: and a0, a0, a2
; RV64IB-NEXT: or a0, a1, a0
; RV64IB-NEXT: zip.n a0, a0
; RV64IB-NEXT: ret
;
; RV64IBP-LABEL: shfl1_i32:
; RV64IBP: # %bb.0:
; RV64IBP-NEXT: lui a1, 629146
; RV64IBP-NEXT: addiw a1, a1, -1639
; RV64IBP-NEXT: and a1, a0, a1
; RV64IBP-NEXT: slli a2, a0, 1
; RV64IBP-NEXT: lui a3, 279620
; RV64IBP-NEXT: addiw a3, a3, 1092
; RV64IBP-NEXT: and a2, a2, a3
; RV64IBP-NEXT: or a1, a2, a1
; RV64IBP-NEXT: srli a0, a0, 1
; RV64IBP-NEXT: lui a2, 139810
; RV64IBP-NEXT: addiw a2, a2, 546
; RV64IBP-NEXT: and a0, a0, a2
; RV64IBP-NEXT: or a0, a1, a0
; RV64IBP-NEXT: zip.n a0, a0
; RV64IBP-NEXT: ret
%and = and i32 %a, -1717986919
%shl = shl i32 %a, 1
Expand Down Expand Up @@ -3540,36 +3516,12 @@ define signext i32 @shfl2_i32(i32 signext %a, i32 signext %b) nounwind {
;
; RV64IB-LABEL: shfl2_i32:
; RV64IB: # %bb.0:
; RV64IB-NEXT: lui a1, 801852
; RV64IB-NEXT: addiw a1, a1, 963
; RV64IB-NEXT: and a1, a0, a1
; RV64IB-NEXT: slli a2, a0, 2
; RV64IB-NEXT: lui a3, 197379
; RV64IB-NEXT: addiw a3, a3, 48
; RV64IB-NEXT: and a2, a2, a3
; RV64IB-NEXT: or a1, a2, a1
; RV64IB-NEXT: srli a0, a0, 2
; RV64IB-NEXT: lui a2, 49345
; RV64IB-NEXT: addiw a2, a2, -1012
; RV64IB-NEXT: and a0, a0, a2
; RV64IB-NEXT: or a0, a0, a1
; RV64IB-NEXT: zip2.b a0, a0
; RV64IB-NEXT: ret
;
; RV64IBP-LABEL: shfl2_i32:
; RV64IBP: # %bb.0:
; RV64IBP-NEXT: lui a1, 801852
; RV64IBP-NEXT: addiw a1, a1, 963
; RV64IBP-NEXT: and a1, a0, a1
; RV64IBP-NEXT: slli a2, a0, 2
; RV64IBP-NEXT: lui a3, 197379
; RV64IBP-NEXT: addiw a3, a3, 48
; RV64IBP-NEXT: and a2, a2, a3
; RV64IBP-NEXT: or a1, a2, a1
; RV64IBP-NEXT: srli a0, a0, 2
; RV64IBP-NEXT: lui a2, 49345
; RV64IBP-NEXT: addiw a2, a2, -1012
; RV64IBP-NEXT: and a0, a0, a2
; RV64IBP-NEXT: or a0, a0, a1
; RV64IBP-NEXT: zip2.b a0, a0
; RV64IBP-NEXT: ret
%and = and i32 %a, -1010580541
%shl = shl i32 %a, 2
Expand Down Expand Up @@ -3652,36 +3604,12 @@ define signext i32 @shfl4_i32(i32 signext %a, i32 signext %b) nounwind {
;
; RV64IB-LABEL: shfl4_i32:
; RV64IB: # %bb.0:
; RV64IB-NEXT: lui a1, 983295
; RV64IB-NEXT: addiw a1, a1, 15
; RV64IB-NEXT: and a1, a0, a1
; RV64IB-NEXT: slli a2, a0, 4
; RV64IB-NEXT: lui a3, 61441
; RV64IB-NEXT: addiw a3, a3, -256
; RV64IB-NEXT: and a2, a2, a3
; RV64IB-NEXT: srli a0, a0, 4
; RV64IB-NEXT: lui a3, 3840
; RV64IB-NEXT: addiw a3, a3, 240
; RV64IB-NEXT: and a0, a0, a3
; RV64IB-NEXT: or a0, a0, a1
; RV64IB-NEXT: or a0, a0, a2
; RV64IB-NEXT: zip4.h a0, a0
; RV64IB-NEXT: ret
;
; RV64IBP-LABEL: shfl4_i32:
; RV64IBP: # %bb.0:
; RV64IBP-NEXT: lui a1, 983295
; RV64IBP-NEXT: addiw a1, a1, 15
; RV64IBP-NEXT: and a1, a0, a1
; RV64IBP-NEXT: slli a2, a0, 4
; RV64IBP-NEXT: lui a3, 61441
; RV64IBP-NEXT: addiw a3, a3, -256
; RV64IBP-NEXT: and a2, a2, a3
; RV64IBP-NEXT: srli a0, a0, 4
; RV64IBP-NEXT: lui a3, 3840
; RV64IBP-NEXT: addiw a3, a3, 240
; RV64IBP-NEXT: and a0, a0, a3
; RV64IBP-NEXT: or a0, a0, a1
; RV64IBP-NEXT: or a0, a0, a2
; RV64IBP-NEXT: zip4.h a0, a0
; RV64IBP-NEXT: ret
%and = and i32 %a, -267390961
%shl = shl i32 %a, 4
Expand Down Expand Up @@ -3761,34 +3689,12 @@ define signext i32 @shfl8_i32(i32 signext %a, i32 signext %b) nounwind {
;
; RV64IB-LABEL: shfl8_i32:
; RV64IB: # %bb.0:
; RV64IB-NEXT: lui a1, 1044480
; RV64IB-NEXT: addiw a1, a1, 255
; RV64IB-NEXT: and a1, a0, a1
; RV64IB-NEXT: slli a2, a0, 8
; RV64IB-NEXT: lui a3, 4080
; RV64IB-NEXT: and a2, a2, a3
; RV64IB-NEXT: srli a0, a0, 8
; RV64IB-NEXT: lui a3, 16
; RV64IB-NEXT: addiw a3, a3, -256
; RV64IB-NEXT: and a0, a0, a3
; RV64IB-NEXT: or a0, a1, a0
; RV64IB-NEXT: or a0, a0, a2
; RV64IB-NEXT: zip8.w a0, a0
; RV64IB-NEXT: ret
;
; RV64IBP-LABEL: shfl8_i32:
; RV64IBP: # %bb.0:
; RV64IBP-NEXT: lui a1, 1044480
; RV64IBP-NEXT: addiw a1, a1, 255
; RV64IBP-NEXT: and a1, a0, a1
; RV64IBP-NEXT: slli a2, a0, 8
; RV64IBP-NEXT: lui a3, 4080
; RV64IBP-NEXT: and a2, a2, a3
; RV64IBP-NEXT: srli a0, a0, 8
; RV64IBP-NEXT: lui a3, 16
; RV64IBP-NEXT: addiw a3, a3, -256
; RV64IBP-NEXT: and a0, a0, a3
; RV64IBP-NEXT: or a0, a1, a0
; RV64IBP-NEXT: or a0, a0, a2
; RV64IBP-NEXT: zip8.w a0, a0
; RV64IBP-NEXT: ret
%and = and i32 %a, -16776961
%shl = shl i32 %a, 8
Expand Down
261 changes: 261 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/zvlsseg-zero-vl.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zvlsseg,+experimental-zfh \
; RUN: -verify-machineinstrs < %s | FileCheck %s

; Make sure we don't select a 0 vl to X0 in the custom isel handlers we use
; for these intrinsics.

declare {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vlseg2.nxv16i16(i16* , i64)
declare {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vlseg2.mask.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, <vscale x 16 x i1>, i64)

define <vscale x 16 x i16> @test_vlseg2_mask_nxv16i16(i16* %base, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mv a1, zero
; CHECK-NEXT: vsetvli a2, a1, e16,m4,ta,mu
; CHECK-NEXT: vlseg2e16.v v4, (a0)
; CHECK-NEXT: vmv4r.v v8, v4
; CHECK-NEXT: vsetvli a1, a1, e16,m4,tu,mu
; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4
; CHECK-NEXT: ret
entry:
%0 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vlseg2.nxv16i16(i16* %base, i64 0)
%1 = extractvalue {<vscale x 16 x i16>,<vscale x 16 x i16>} %0, 0
%2 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vlseg2.mask.nxv16i16(<vscale x 16 x i16> %1,<vscale x 16 x i16> %1, i16* %base, <vscale x 16 x i1> %mask, i64 0)
%3 = extractvalue {<vscale x 16 x i16>,<vscale x 16 x i16>} %2, 1
ret <vscale x 16 x i16> %3
}

declare {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vlsseg2.nxv16i16(i16*, i64, i64)
declare {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vlsseg2.mask.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, i64, <vscale x 16 x i1>, i64)

define <vscale x 16 x i16> @test_vlsseg2_mask_nxv16i16(i16* %base, i64 %offset, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vlsseg2_mask_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mv a2, zero
; CHECK-NEXT: vsetvli a3, a2, e16,m4,ta,mu
; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1
; CHECK-NEXT: vmv4r.v v8, v4
; CHECK-NEXT: vsetvli a2, a2, e16,m4,tu,mu
; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1, v0.t
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4
; CHECK-NEXT: ret
entry:
%0 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vlsseg2.nxv16i16(i16* %base, i64 %offset, i64 0)
%1 = extractvalue {<vscale x 16 x i16>,<vscale x 16 x i16>} %0, 0
%2 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vlsseg2.mask.nxv16i16(<vscale x 16 x i16> %1,<vscale x 16 x i16> %1, i16* %base, i64 %offset, <vscale x 16 x i1> %mask, i64 0)
%3 = extractvalue {<vscale x 16 x i16>,<vscale x 16 x i16>} %2, 1
ret <vscale x 16 x i16> %3
}
declare {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vloxseg2.nxv16i16.nxv16i16(i16*, <vscale x 16 x i16>, i64)
declare {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vloxseg2.mask.nxv16i16.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, <vscale x 16 x i16>, <vscale x 16 x i1>, i64)

define <vscale x 16 x i16> @test_vloxseg2_mask_nxv16i16_nxv16i16(i16* %base, <vscale x 16 x i16> %index, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vloxseg2_mask_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mv a1, zero
; CHECK-NEXT: vsetvli a2, a1, e16,m4,ta,mu
; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8
; CHECK-NEXT: vmv4r.v v16, v12
; CHECK-NEXT: vsetvli a1, a1, e16,m4,tu,mu
; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8, v0.t
; CHECK-NEXT: vmv4r.v v8, v16
; CHECK-NEXT: ret
entry:
%0 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vloxseg2.nxv16i16.nxv16i16(i16* %base, <vscale x 16 x i16> %index, i64 0)
%1 = extractvalue {<vscale x 16 x i16>,<vscale x 16 x i16>} %0, 0
%2 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vloxseg2.mask.nxv16i16.nxv16i16(<vscale x 16 x i16> %1,<vscale x 16 x i16> %1, i16* %base, <vscale x 16 x i16> %index, <vscale x 16 x i1> %mask, i64 0)
%3 = extractvalue {<vscale x 16 x i16>,<vscale x 16 x i16>} %2, 1
ret <vscale x 16 x i16> %3
}

declare {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vluxseg2.nxv16i16.nxv16i16(i16*, <vscale x 16 x i16>, i64)
declare {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vluxseg2.mask.nxv16i16.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, <vscale x 16 x i16>, <vscale x 16 x i1>, i64)

define <vscale x 16 x i16> @test_vluxseg2_mask_nxv16i16_nxv16i16(i16* %base, <vscale x 16 x i16> %index, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vluxseg2_mask_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mv a1, zero
; CHECK-NEXT: vsetvli a2, a1, e16,m4,ta,mu
; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8
; CHECK-NEXT: vmv4r.v v16, v12
; CHECK-NEXT: vsetvli a1, a1, e16,m4,tu,mu
; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8, v0.t
; CHECK-NEXT: vmv4r.v v8, v16
; CHECK-NEXT: ret
entry:
%0 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vluxseg2.nxv16i16.nxv16i16(i16* %base, <vscale x 16 x i16> %index, i64 0)
%1 = extractvalue {<vscale x 16 x i16>,<vscale x 16 x i16>} %0, 0
%2 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vluxseg2.mask.nxv16i16.nxv16i16(<vscale x 16 x i16> %1,<vscale x 16 x i16> %1, i16* %base, <vscale x 16 x i16> %index, <vscale x 16 x i1> %mask, i64 0)
%3 = extractvalue {<vscale x 16 x i16>,<vscale x 16 x i16>} %2, 1
ret <vscale x 16 x i16> %3
}

declare {<vscale x 16 x i16>,<vscale x 16 x i16>, i64} @llvm.riscv.vlseg2ff.nxv16i16(i16* , i64)
declare {<vscale x 16 x i16>,<vscale x 16 x i16>, i64} @llvm.riscv.vlseg2ff.mask.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, <vscale x 16 x i1>, i64)

define <vscale x 16 x i16> @test_vlseg2ff_nxv16i16(i16* %base, i64* %outvl) {
; CHECK-LABEL: test_vlseg2ff_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mv a2, zero
; CHECK-NEXT: vsetvli a2, a2, e16,m4,ta,mu
; CHECK-NEXT: vlseg2e16ff.v v4, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: sd a0, 0(a1)
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4
; CHECK-NEXT: ret
entry:
%0 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>, i64} @llvm.riscv.vlseg2ff.nxv16i16(i16* %base, i64 0)
%1 = extractvalue {<vscale x 16 x i16>,<vscale x 16 x i16>, i64} %0, 1
%2 = extractvalue {<vscale x 16 x i16>,<vscale x 16 x i16>, i64} %0, 2
store i64 %2, i64* %outvl
ret <vscale x 16 x i16> %1
}

define <vscale x 16 x i16> @test_vlseg2ff_mask_nxv16i16(<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i1> %mask, i64* %outvl) {
; CHECK-LABEL: test_vlseg2ff_mask_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv4r.v v4, v8
; CHECK-NEXT: mv a2, zero
; CHECK-NEXT: vsetvli a2, a2, e16,m4,tu,mu
; CHECK-NEXT: vlseg2e16ff.v v4, (a0), v0.t
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: sd a0, 0(a1)
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4
; CHECK-NEXT: ret
entry:
%0 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>, i64} @llvm.riscv.vlseg2ff.mask.nxv16i16(<vscale x 16 x i16> %val,<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i1> %mask, i64 0)
%1 = extractvalue {<vscale x 16 x i16>,<vscale x 16 x i16>, i64} %0, 1
%2 = extractvalue {<vscale x 16 x i16>,<vscale x 16 x i16>, i64} %0, 2
store i64 %2, i64* %outvl
ret <vscale x 16 x i16> %1
}

declare void @llvm.riscv.vsseg2.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16* , i64)
declare void @llvm.riscv.vsseg2.mask.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, <vscale x 16 x i1>, i64)

define void @test_vsseg2_nxv16i16(<vscale x 16 x i16> %val, i16* %base) {
; CHECK-LABEL: test_vsseg2_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: mv a1, zero
; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu
; CHECK-NEXT: vsseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
tail call void @llvm.riscv.vsseg2.nxv16i16(<vscale x 16 x i16> %val,<vscale x 16 x i16> %val, i16* %base, i64 0)
ret void
}

define void @test_vsseg2_mask_nxv16i16(<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vsseg2_mask_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: mv a1, zero
; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu
; CHECK-NEXT: vsseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
tail call void @llvm.riscv.vsseg2.mask.nxv16i16(<vscale x 16 x i16> %val,<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i1> %mask, i64 0)
ret void
}

declare void @llvm.riscv.vssseg2.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, i64, i64)
declare void @llvm.riscv.vssseg2.mask.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, i64, <vscale x 16 x i1>, i64)

define void @test_vssseg2_nxv16i16(<vscale x 16 x i16> %val, i16* %base, i64 %offset) {
; CHECK-LABEL: test_vssseg2_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: mv a2, zero
; CHECK-NEXT: vsetvli a2, a2, e16,m4,ta,mu
; CHECK-NEXT: vssseg2e16.v v8, (a0), a1
; CHECK-NEXT: ret
entry:
tail call void @llvm.riscv.vssseg2.nxv16i16(<vscale x 16 x i16> %val,<vscale x 16 x i16> %val, i16* %base, i64 %offset, i64 0)
ret void
}

define void @test_vssseg2_mask_nxv16i16(<vscale x 16 x i16> %val, i16* %base, i64 %offset, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vssseg2_mask_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: mv a2, zero
; CHECK-NEXT: vsetvli a2, a2, e16,m4,ta,mu
; CHECK-NEXT: vssseg2e16.v v8, (a0), a1, v0.t
; CHECK-NEXT: ret
entry:
tail call void @llvm.riscv.vssseg2.mask.nxv16i16(<vscale x 16 x i16> %val,<vscale x 16 x i16> %val, i16* %base, i64 %offset, <vscale x 16 x i1> %mask, i64 0)
ret void
}

declare void @llvm.riscv.vsoxseg2.nxv16i16.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, <vscale x 16 x i16>, i64)
declare void @llvm.riscv.vsoxseg2.mask.nxv16i16.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, <vscale x 16 x i16>, <vscale x 16 x i1>, i64)

define void @test_vsoxseg2_nxv16i16_nxv16i16(<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i16> %index) {
; CHECK-LABEL: test_vsoxseg2_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v8m4_v12m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v28, v12
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: mv a1, zero
; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu
; CHECK-NEXT: vsoxseg2ei16.v v8, (a0), v28
; CHECK-NEXT: ret
entry:
tail call void @llvm.riscv.vsoxseg2.nxv16i16.nxv16i16(<vscale x 16 x i16> %val,<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i16> %index, i64 0)
ret void
}

define void @test_vsoxseg2_mask_nxv16i16_nxv16i16(<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i16> %index, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vsoxseg2_mask_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v8m4_v12m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v28, v12
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: mv a1, zero
; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu
; CHECK-NEXT: vsoxseg2ei16.v v8, (a0), v28, v0.t
; CHECK-NEXT: ret
entry:
tail call void @llvm.riscv.vsoxseg2.mask.nxv16i16.nxv16i16(<vscale x 16 x i16> %val,<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i16> %index, <vscale x 16 x i1> %mask, i64 0)
ret void
}

declare void @llvm.riscv.vsuxseg2.nxv16i16.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, <vscale x 16 x i16>, i64)
declare void @llvm.riscv.vsuxseg2.mask.nxv16i16.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, <vscale x 16 x i16>, <vscale x 16 x i1>, i64)

define void @test_vsuxseg2_nxv16i16_nxv16i16(<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i16> %index) {
; CHECK-LABEL: test_vsuxseg2_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v8m4_v12m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v28, v12
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: mv a1, zero
; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu
; CHECK-NEXT: vsuxseg2ei16.v v8, (a0), v28
; CHECK-NEXT: ret
entry:
tail call void @llvm.riscv.vsuxseg2.nxv16i16.nxv16i16(<vscale x 16 x i16> %val,<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i16> %index, i64 0)
ret void
}

define void @test_vsuxseg2_mask_nxv16i16_nxv16i16(<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i16> %index, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vsuxseg2_mask_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v8m4_v12m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v28, v12
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: mv a1, zero
; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu
; CHECK-NEXT: vsuxseg2ei16.v v8, (a0), v28, v0.t
; CHECK-NEXT: ret
entry:
tail call void @llvm.riscv.vsuxseg2.mask.nxv16i16.nxv16i16(<vscale x 16 x i16> %val,<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i16> %index, <vscale x 16 x i1> %mask, i64 0)
ret void
}