-
Notifications
You must be signed in to change notification settings - Fork 11.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[RISCV] Match trunc_vector_vl+sra_vl/srl_vl with splat shift amount t…
…o vnsra/vnsrl. Limited to splats because we would need to truncate the shift amount vector otherwise. I tried to do this with new ISD nodes and a DAG combine to avoid such a large pattern, but we don't form the splat until LegalizeDAG and need DAG combine to remove a scalable->fixed->scalable cast before it becomes visible to the shift node. By the time that happens we've already visited the truncate node and won't revisit it. I think I have an idea how to improve i64 on RV32 I'll save for a follow up. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D102019
- Loading branch information
Showing
2 changed files
with
227 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
189 changes: 189 additions & 0 deletions
189
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 | ||
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 | ||
|
||
define <8 x i8> @vnsra_v8i16_v8i8_scalar(<8 x i16> %x, i16 %y) { | ||
; CHECK-LABEL: vnsra_v8i16_v8i8_scalar: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: vsetivli a1, 8, e8,mf2,ta,mu | ||
; CHECK-NEXT: vnsra.wx v25, v8, a0 | ||
; CHECK-NEXT: vmv1r.v v8, v25 | ||
; CHECK-NEXT: ret | ||
%insert = insertelement <8 x i16> undef, i16 %y, i16 0 | ||
%splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer | ||
%a = ashr <8 x i16> %x, %splat | ||
%b = trunc <8 x i16> %a to <8 x i8> | ||
ret <8 x i8> %b | ||
} | ||
|
||
define <4 x i16> @vnsra_v4i32_v4i16_scalar(<4 x i32> %x, i32 %y) { | ||
; CHECK-LABEL: vnsra_v4i32_v4i16_scalar: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: vsetivli a1, 4, e16,mf2,ta,mu | ||
; CHECK-NEXT: vnsra.wx v25, v8, a0 | ||
; CHECK-NEXT: vmv1r.v v8, v25 | ||
; CHECK-NEXT: ret | ||
%insert = insertelement <4 x i32> undef, i32 %y, i32 0 | ||
%splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer | ||
%a = ashr <4 x i32> %x, %splat | ||
%b = trunc <4 x i32> %a to <4 x i16> | ||
ret <4 x i16> %b | ||
} | ||
|
||
define <2 x i32> @vnsra_v2i64_v2i32_scalar(<2 x i64> %x, i64 %y) { | ||
; RV32-LABEL: vnsra_v2i64_v2i32_scalar: | ||
; RV32: # %bb.0: | ||
; RV32-NEXT: addi sp, sp, -16 | ||
; RV32-NEXT: .cfi_def_cfa_offset 16 | ||
; RV32-NEXT: sw a1, 12(sp) | ||
; RV32-NEXT: sw a0, 8(sp) | ||
; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu | ||
; RV32-NEXT: addi a0, sp, 8 | ||
; RV32-NEXT: vlse64.v v25, (a0), zero | ||
; RV32-NEXT: vsra.vv v25, v8, v25 | ||
; RV32-NEXT: vsetivli a0, 2, e32,mf2,ta,mu | ||
; RV32-NEXT: vnsrl.wi v8, v25, 0 | ||
; RV32-NEXT: addi sp, sp, 16 | ||
; RV32-NEXT: ret | ||
; | ||
; RV64-LABEL: vnsra_v2i64_v2i32_scalar: | ||
; RV64: # %bb.0: | ||
; RV64-NEXT: vsetivli a1, 2, e32,mf2,ta,mu | ||
; RV64-NEXT: vnsra.wx v25, v8, a0 | ||
; RV64-NEXT: vmv1r.v v8, v25 | ||
; RV64-NEXT: ret | ||
%insert = insertelement <2 x i64> undef, i64 %y, i32 0 | ||
%splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer | ||
%a = ashr <2 x i64> %x, %splat | ||
%b = trunc <2 x i64> %a to <2 x i32> | ||
ret <2 x i32> %b | ||
} | ||
|
||
define <8 x i8> @vnsra_v8i16_v8i8_imm(<8 x i16> %x) { | ||
; CHECK-LABEL: vnsra_v8i16_v8i8_imm: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu | ||
; CHECK-NEXT: vnsrl.wi v25, v8, 8 | ||
; CHECK-NEXT: vmv1r.v v8, v25 | ||
; CHECK-NEXT: ret | ||
%a = ashr <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8,i16 8, i16 8, i16 8, i16 8> | ||
%b = trunc <8 x i16> %a to <8 x i8> | ||
ret <8 x i8> %b | ||
} | ||
|
||
define <4 x i16> @vnsra_v4i32_v4i16_imm(<4 x i32> %x) { | ||
; CHECK-LABEL: vnsra_v4i32_v4i16_imm: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu | ||
; CHECK-NEXT: vnsrl.wi v25, v8, 16 | ||
; CHECK-NEXT: vmv1r.v v8, v25 | ||
; CHECK-NEXT: ret | ||
%a = ashr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16> | ||
%b = trunc <4 x i32> %a to <4 x i16> | ||
ret <4 x i16> %b | ||
} | ||
|
||
define <2 x i32> @vnsra_v2i64_v2i32_imm(<2 x i64> %x) { | ||
; CHECK-LABEL: vnsra_v2i64_v2i32_imm: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu | ||
; CHECK-NEXT: vnsrl.wi v25, v8, 31 | ||
; CHECK-NEXT: vmv1r.v v8, v25 | ||
; CHECK-NEXT: ret | ||
%a = ashr <2 x i64> %x, <i64 31, i64 31> | ||
%b = trunc <2 x i64> %a to <2 x i32> | ||
ret <2 x i32> %b | ||
} | ||
|
||
define <8 x i8> @vnsrl_v8i16_v8i8_scalar(<8 x i16> %x, i16 %y) { | ||
; CHECK-LABEL: vnsrl_v8i16_v8i8_scalar: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: vsetivli a1, 8, e8,mf2,ta,mu | ||
; CHECK-NEXT: vnsrl.wx v25, v8, a0 | ||
; CHECK-NEXT: vmv1r.v v8, v25 | ||
; CHECK-NEXT: ret | ||
%insert = insertelement <8 x i16> undef, i16 %y, i16 0 | ||
%splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer | ||
%a = lshr <8 x i16> %x, %splat | ||
%b = trunc <8 x i16> %a to <8 x i8> | ||
ret <8 x i8> %b | ||
} | ||
|
||
define <4 x i16> @vnsrl_v4i32_v4i16_scalar(<4 x i32> %x, i32 %y) { | ||
; CHECK-LABEL: vnsrl_v4i32_v4i16_scalar: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: vsetivli a1, 4, e16,mf2,ta,mu | ||
; CHECK-NEXT: vnsrl.wx v25, v8, a0 | ||
; CHECK-NEXT: vmv1r.v v8, v25 | ||
; CHECK-NEXT: ret | ||
%insert = insertelement <4 x i32> undef, i32 %y, i32 0 | ||
%splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer | ||
%a = lshr <4 x i32> %x, %splat | ||
%b = trunc <4 x i32> %a to <4 x i16> | ||
ret <4 x i16> %b | ||
} | ||
|
||
define <2 x i32> @vnsrl_v2i64_v2i32_scalar(<2 x i64> %x, i64 %y) { | ||
; RV32-LABEL: vnsrl_v2i64_v2i32_scalar: | ||
; RV32: # %bb.0: | ||
; RV32-NEXT: addi sp, sp, -16 | ||
; RV32-NEXT: .cfi_def_cfa_offset 16 | ||
; RV32-NEXT: sw a1, 12(sp) | ||
; RV32-NEXT: sw a0, 8(sp) | ||
; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu | ||
; RV32-NEXT: addi a0, sp, 8 | ||
; RV32-NEXT: vlse64.v v25, (a0), zero | ||
; RV32-NEXT: vsrl.vv v25, v8, v25 | ||
; RV32-NEXT: vsetivli a0, 2, e32,mf2,ta,mu | ||
; RV32-NEXT: vnsrl.wi v8, v25, 0 | ||
; RV32-NEXT: addi sp, sp, 16 | ||
; RV32-NEXT: ret | ||
; | ||
; RV64-LABEL: vnsrl_v2i64_v2i32_scalar: | ||
; RV64: # %bb.0: | ||
; RV64-NEXT: vsetivli a1, 2, e32,mf2,ta,mu | ||
; RV64-NEXT: vnsrl.wx v25, v8, a0 | ||
; RV64-NEXT: vmv1r.v v8, v25 | ||
; RV64-NEXT: ret | ||
%insert = insertelement <2 x i64> undef, i64 %y, i32 0 | ||
%splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer | ||
%a = lshr <2 x i64> %x, %splat | ||
%b = trunc <2 x i64> %a to <2 x i32> | ||
ret <2 x i32> %b | ||
} | ||
|
||
define <8 x i8> @vnsrl_v8i16_v8i8_imm(<8 x i16> %x) { | ||
; CHECK-LABEL: vnsrl_v8i16_v8i8_imm: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu | ||
; CHECK-NEXT: vnsrl.wi v25, v8, 8 | ||
; CHECK-NEXT: vmv1r.v v8, v25 | ||
; CHECK-NEXT: ret | ||
%a = lshr <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8,i16 8, i16 8, i16 8, i16 8> | ||
%b = trunc <8 x i16> %a to <8 x i8> | ||
ret <8 x i8> %b | ||
} | ||
|
||
define <4 x i16> @vnsrl_v4i32_v4i16_imm(<4 x i32> %x) { | ||
; CHECK-LABEL: vnsrl_v4i32_v4i16_imm: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu | ||
; CHECK-NEXT: vnsrl.wi v25, v8, 16 | ||
; CHECK-NEXT: vmv1r.v v8, v25 | ||
; CHECK-NEXT: ret | ||
%a = lshr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16> | ||
%b = trunc <4 x i32> %a to <4 x i16> | ||
ret <4 x i16> %b | ||
} | ||
|
||
define <2 x i32> @vnsrl_v2i64_v2i32_imm(<2 x i64> %x) { | ||
; CHECK-LABEL: vnsrl_v2i64_v2i32_imm: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu | ||
; CHECK-NEXT: vnsrl.wi v25, v8, 31 | ||
; CHECK-NEXT: vmv1r.v v8, v25 | ||
; CHECK-NEXT: ret | ||
%a = lshr <2 x i64> %x, <i64 31, i64 31> | ||
%b = trunc <2 x i64> %a to <2 x i32> | ||
ret <2 x i32> %b | ||
} |