Skip to content

Commit

Permalink
[RISCV] Match trunc_vector_vl+sra_vl/srl_vl with splat shift amount t…
Browse files Browse the repository at this point in the history
…o vnsra/vnsrl.

Limited to splats because we would need to truncate the shift
amount vector otherwise.

I tried to do this with new ISD nodes and a DAG combine to
avoid such a large pattern, but we don't form the splat until
LegalizeDAG and need DAG combine to remove a scalable->fixed->scalable
cast before it becomes visible to the shift node. By the time that
happens we've already visited the truncate node and won't revisit it.

I think I have an idea how to improve i64 on RV32 I'll save for a
follow up.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D102019
  • Loading branch information
topperc committed May 11, 2021
1 parent 668dccc commit dc00cbb
Show file tree
Hide file tree
Showing 2 changed files with 227 additions and 6 deletions.
44 changes: 38 additions & 6 deletions llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -636,15 +636,47 @@ defm : VPatBinaryVL_VV_VX_VI<riscv_shl_vl, "PseudoVSLL", uimm5>;
defm : VPatBinaryVL_VV_VX_VI<riscv_srl_vl, "PseudoVSRL", uimm5>;
defm : VPatBinaryVL_VV_VX_VI<riscv_sra_vl, "PseudoVSRA", uimm5>;



// 12.7. Vector Narrowing Integer Right Shift Instructions
foreach vtiTofti = AllFractionableVF2IntVectors in {
defvar vti = vtiTofti.Vti;
defvar fti = vtiTofti.Fti;
def : Pat<(fti.Vector (riscv_trunc_vector_vl (vti.Vector vti.RegClass:$rs1),
foreach vtiTowti = AllWidenableIntVectors in {
defvar vti = vtiTowti.Vti;
defvar wti = vtiTowti.Wti;
def : Pat<(vti.Vector (riscv_trunc_vector_vl (wti.Vector wti.RegClass:$rs1),
(vti.Mask true_mask),
VLOpFrag)),
(!cast<Instruction>("PseudoVNSRL_WI_"#fti.LMul.MX)
vti.RegClass:$rs1, 0, GPR:$vl, fti.Log2SEW)>;
(!cast<Instruction>("PseudoVNSRL_WI_"#vti.LMul.MX)
wti.RegClass:$rs1, 0, GPR:$vl, vti.Log2SEW)>;

def : Pat<(vti.Vector
(riscv_trunc_vector_vl
(wti.Vector
(riscv_sra_vl wti.RegClass:$rs1, (SplatPat XLenVT:$rs2),
true_mask, VLOpFrag)), true_mask, VLOpFrag)),
(!cast<Instruction>("PseudoVNSRA_WX_"#vti.LMul.MX)
wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
def : Pat<(vti.Vector
(riscv_trunc_vector_vl
(wti.Vector
(riscv_sra_vl wti.RegClass:$rs1, (SplatPat_uimm5 uimm5:$rs2),
true_mask, VLOpFrag)), true_mask, VLOpFrag)),
(!cast<Instruction>("PseudoVNSRA_WI_"#vti.LMul.MX)
wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW)>;

def : Pat<(vti.Vector
(riscv_trunc_vector_vl
(wti.Vector
(riscv_srl_vl wti.RegClass:$rs1, (SplatPat XLenVT:$rs2),
true_mask, VLOpFrag)), true_mask, VLOpFrag)),
(!cast<Instruction>("PseudoVNSRL_WX_"#vti.LMul.MX)
wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
def : Pat<(vti.Vector
(riscv_trunc_vector_vl
(wti.Vector
(riscv_srl_vl wti.RegClass:$rs1, (SplatPat_uimm5 uimm5:$rs2),
true_mask, VLOpFrag)), true_mask, VLOpFrag)),
(!cast<Instruction>("PseudoVNSRL_WI_"#vti.LMul.MX)
wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW)>;
}

// 12.8. Vector Integer Comparison Instructions
Expand Down
189 changes: 189 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64

define <8 x i8> @vnsra_v8i16_v8i8_scalar(<8 x i16> %x, i16 %y) {
; CHECK-LABEL: vnsra_v8i16_v8i8_scalar:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 8, e8,mf2,ta,mu
; CHECK-NEXT: vnsra.wx v25, v8, a0
; CHECK-NEXT: vmv1r.v v8, v25
; CHECK-NEXT: ret
%insert = insertelement <8 x i16> undef, i16 %y, i16 0
%splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
%a = ashr <8 x i16> %x, %splat
%b = trunc <8 x i16> %a to <8 x i8>
ret <8 x i8> %b
}

define <4 x i16> @vnsra_v4i32_v4i16_scalar(<4 x i32> %x, i32 %y) {
; CHECK-LABEL: vnsra_v4i32_v4i16_scalar:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
; CHECK-NEXT: vnsra.wx v25, v8, a0
; CHECK-NEXT: vmv1r.v v8, v25
; CHECK-NEXT: ret
%insert = insertelement <4 x i32> undef, i32 %y, i32 0
%splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
%a = ashr <4 x i32> %x, %splat
%b = trunc <4 x i32> %a to <4 x i16>
ret <4 x i16> %b
}

define <2 x i32> @vnsra_v2i64_v2i32_scalar(<2 x i64> %x, i64 %y) {
; RV32-LABEL: vnsra_v2i64_v2i32_scalar:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vlse64.v v25, (a0), zero
; RV32-NEXT: vsra.vv v25, v8, v25
; RV32-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; RV32-NEXT: vnsrl.wi v8, v25, 0
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vnsra_v2i64_v2i32_scalar:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
; RV64-NEXT: vnsra.wx v25, v8, a0
; RV64-NEXT: vmv1r.v v8, v25
; RV64-NEXT: ret
%insert = insertelement <2 x i64> undef, i64 %y, i32 0
%splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
%a = ashr <2 x i64> %x, %splat
%b = trunc <2 x i64> %a to <2 x i32>
ret <2 x i32> %b
}

define <8 x i8> @vnsra_v8i16_v8i8_imm(<8 x i16> %x) {
; CHECK-LABEL: vnsra_v8i16_v8i8_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
; CHECK-NEXT: vnsrl.wi v25, v8, 8
; CHECK-NEXT: vmv1r.v v8, v25
; CHECK-NEXT: ret
%a = ashr <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8,i16 8, i16 8, i16 8, i16 8>
%b = trunc <8 x i16> %a to <8 x i8>
ret <8 x i8> %b
}

define <4 x i16> @vnsra_v4i32_v4i16_imm(<4 x i32> %x) {
; CHECK-LABEL: vnsra_v4i32_v4i16_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
; CHECK-NEXT: vnsrl.wi v25, v8, 16
; CHECK-NEXT: vmv1r.v v8, v25
; CHECK-NEXT: ret
%a = ashr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
%b = trunc <4 x i32> %a to <4 x i16>
ret <4 x i16> %b
}

define <2 x i32> @vnsra_v2i64_v2i32_imm(<2 x i64> %x) {
; CHECK-LABEL: vnsra_v2i64_v2i32_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; CHECK-NEXT: vnsrl.wi v25, v8, 31
; CHECK-NEXT: vmv1r.v v8, v25
; CHECK-NEXT: ret
%a = ashr <2 x i64> %x, <i64 31, i64 31>
%b = trunc <2 x i64> %a to <2 x i32>
ret <2 x i32> %b
}

define <8 x i8> @vnsrl_v8i16_v8i8_scalar(<8 x i16> %x, i16 %y) {
; CHECK-LABEL: vnsrl_v8i16_v8i8_scalar:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 8, e8,mf2,ta,mu
; CHECK-NEXT: vnsrl.wx v25, v8, a0
; CHECK-NEXT: vmv1r.v v8, v25
; CHECK-NEXT: ret
%insert = insertelement <8 x i16> undef, i16 %y, i16 0
%splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
%a = lshr <8 x i16> %x, %splat
%b = trunc <8 x i16> %a to <8 x i8>
ret <8 x i8> %b
}

define <4 x i16> @vnsrl_v4i32_v4i16_scalar(<4 x i32> %x, i32 %y) {
; CHECK-LABEL: vnsrl_v4i32_v4i16_scalar:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
; CHECK-NEXT: vnsrl.wx v25, v8, a0
; CHECK-NEXT: vmv1r.v v8, v25
; CHECK-NEXT: ret
%insert = insertelement <4 x i32> undef, i32 %y, i32 0
%splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
%a = lshr <4 x i32> %x, %splat
%b = trunc <4 x i32> %a to <4 x i16>
ret <4 x i16> %b
}

define <2 x i32> @vnsrl_v2i64_v2i32_scalar(<2 x i64> %x, i64 %y) {
; RV32-LABEL: vnsrl_v2i64_v2i32_scalar:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vlse64.v v25, (a0), zero
; RV32-NEXT: vsrl.vv v25, v8, v25
; RV32-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; RV32-NEXT: vnsrl.wi v8, v25, 0
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vnsrl_v2i64_v2i32_scalar:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
; RV64-NEXT: vnsrl.wx v25, v8, a0
; RV64-NEXT: vmv1r.v v8, v25
; RV64-NEXT: ret
%insert = insertelement <2 x i64> undef, i64 %y, i32 0
%splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
%a = lshr <2 x i64> %x, %splat
%b = trunc <2 x i64> %a to <2 x i32>
ret <2 x i32> %b
}

define <8 x i8> @vnsrl_v8i16_v8i8_imm(<8 x i16> %x) {
; CHECK-LABEL: vnsrl_v8i16_v8i8_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
; CHECK-NEXT: vnsrl.wi v25, v8, 8
; CHECK-NEXT: vmv1r.v v8, v25
; CHECK-NEXT: ret
%a = lshr <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8,i16 8, i16 8, i16 8, i16 8>
%b = trunc <8 x i16> %a to <8 x i8>
ret <8 x i8> %b
}

define <4 x i16> @vnsrl_v4i32_v4i16_imm(<4 x i32> %x) {
; CHECK-LABEL: vnsrl_v4i32_v4i16_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
; CHECK-NEXT: vnsrl.wi v25, v8, 16
; CHECK-NEXT: vmv1r.v v8, v25
; CHECK-NEXT: ret
%a = lshr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
%b = trunc <4 x i32> %a to <4 x i16>
ret <4 x i16> %b
}

define <2 x i32> @vnsrl_v2i64_v2i32_imm(<2 x i64> %x) {
; CHECK-LABEL: vnsrl_v2i64_v2i32_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; CHECK-NEXT: vnsrl.wi v25, v8, 31
; CHECK-NEXT: vmv1r.v v8, v25
; CHECK-NEXT: ret
%a = lshr <2 x i64> %x, <i64 31, i64 31>
%b = trunc <2 x i64> %a to <2 x i32>
ret <2 x i32> %b
}

0 comments on commit dc00cbb

Please sign in to comment.