Skip to content

Commit

Permalink
[RISCV] Lower unary zvbb ops for fixed vectors
Browse files Browse the repository at this point in the history
This reuses the same strategy for fixed vectors as other ops, i.e. custom lower
to a scalable *_vl SD node.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D157294
  • Loading branch information
lukel97 committed Aug 8, 2023
1 parent 44383ac commit 768740e
Show file tree
Hide file tree
Showing 6 changed files with 462 additions and 7 deletions.
38 changes: 31 additions & 7 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1058,13 +1058,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

setOperationAction(IntegerVPOps, VT, Custom);

// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
// range of f32.
EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
if (isTypeLegal(FloatVT))
setOperationAction(
{ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
Custom);
if (Subtarget.hasStdExtZvbb()) {
setOperationAction({ISD::BITREVERSE, ISD::BSWAP, ISD::CTLZ,
ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ,
ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP},
VT, Custom);
} else {
// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
// range of f32.
EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
if (isTypeLegal(FloatVT))
setOperationAction(
{ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
Custom);
}
}

for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
Expand Down Expand Up @@ -4880,6 +4887,11 @@ static unsigned getRISCVVLOp(SDValue Op) {
OP_CASE(SHL)
OP_CASE(SRA)
OP_CASE(SRL)
OP_CASE(BSWAP)
OP_CASE(CTTZ)
OP_CASE(CTLZ)
OP_CASE(CTPOP)
OP_CASE(BITREVERSE)
OP_CASE(SADDSAT)
OP_CASE(UADDSAT)
OP_CASE(SSUBSAT)
Expand Down Expand Up @@ -4927,8 +4939,10 @@ static unsigned getRISCVVLOp(SDValue Op) {
VP_CASE(CTLZ) // VP_CTLZ
VP_CASE(CTTZ) // VP_CTTZ
VP_CASE(CTPOP) // VP_CTPOP
case ISD::CTLZ_ZERO_UNDEF:
case ISD::VP_CTLZ_ZERO_UNDEF:
return RISCVISD::CTLZ_VL;
case ISD::CTTZ_ZERO_UNDEF:
case ISD::VP_CTTZ_ZERO_UNDEF:
return RISCVISD::CTTZ_VL;
case ISD::FMA:
Expand Down Expand Up @@ -5156,6 +5170,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return LowerIS_FPCLASS(Op, DAG);
case ISD::BITREVERSE: {
MVT VT = Op.getSimpleValueType();
if (VT.isFixedLengthVector()) {
assert(Subtarget.hasStdExtZvbb());
return lowerToScalableOp(Op, DAG);
}
SDLoc DL(Op);
assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
Expand Down Expand Up @@ -5668,6 +5686,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::SREM:
case ISD::UDIV:
case ISD::UREM:
case ISD::BSWAP:
case ISD::CTPOP:
return lowerToScalableOp(Op, DAG);
case ISD::SHL:
case ISD::SRA:
Expand Down Expand Up @@ -5702,7 +5722,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerABS(Op, DAG);
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
if (Subtarget.hasStdExtZvbb())
return lowerToScalableOp(Op, DAG);
assert(Op.getOpcode() != ISD::CTTZ);
return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
case ISD::VSELECT:
return lowerFixedLengthVectorSelectToRVV(Op, DAG);
Expand Down
50 changes: 50 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64,LMULMAX2-RV64
; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32,LMULMAX1-RV32
; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64,LMULMAX1-RV64
; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB

define void @bitreverse_v8i16(ptr %x, ptr %y) {
; RV32-LABEL: bitreverse_v8i16:
Expand Down Expand Up @@ -66,6 +68,14 @@ define void @bitreverse_v8i16(ptr %x, ptr %y) {
; RV64-NEXT: vor.vv v8, v9, v8
; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: ret
;
; ZVBB-LABEL: bitreverse_v8i16:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVBB-NEXT: vle16.v v8, (a0)
; ZVBB-NEXT: vbrev.v v8, v8
; ZVBB-NEXT: vse16.v v8, (a0)
; ZVBB-NEXT: ret
%a = load <8 x i16>, ptr %x
%b = load <8 x i16>, ptr %y
%c = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a)
Expand Down Expand Up @@ -152,6 +162,14 @@ define void @bitreverse_v4i32(ptr %x, ptr %y) {
; RV64-NEXT: vor.vv v8, v9, v8
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
;
; ZVBB-LABEL: bitreverse_v4i32:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; ZVBB-NEXT: vle32.v v8, (a0)
; ZVBB-NEXT: vbrev.v v8, v8
; ZVBB-NEXT: vse32.v v8, (a0)
; ZVBB-NEXT: ret
%a = load <4 x i32>, ptr %x
%b = load <4 x i32>, ptr %y
%c = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
Expand Down Expand Up @@ -291,6 +309,14 @@ define void @bitreverse_v2i64(ptr %x, ptr %y) {
; RV64-NEXT: vor.vv v8, v9, v8
; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: ret
;
; ZVBB-LABEL: bitreverse_v2i64:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; ZVBB-NEXT: vle64.v v8, (a0)
; ZVBB-NEXT: vbrev.v v8, v8
; ZVBB-NEXT: vse64.v v8, (a0)
; ZVBB-NEXT: ret
%a = load <2 x i64>, ptr %x
%b = load <2 x i64>, ptr %y
%c = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a)
Expand Down Expand Up @@ -465,6 +491,14 @@ define void @bitreverse_v16i16(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vse16.v v9, (a0)
; LMULMAX1-RV64-NEXT: vse16.v v8, (a1)
; LMULMAX1-RV64-NEXT: ret
;
; ZVBB-LABEL: bitreverse_v16i16:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVBB-NEXT: vle16.v v8, (a0)
; ZVBB-NEXT: vbrev.v v8, v8
; ZVBB-NEXT: vse16.v v8, (a0)
; ZVBB-NEXT: ret
%a = load <16 x i16>, ptr %x
%b = load <16 x i16>, ptr %y
%c = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
Expand Down Expand Up @@ -683,6 +717,14 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vse32.v v9, (a0)
; LMULMAX1-RV64-NEXT: vse32.v v8, (a1)
; LMULMAX1-RV64-NEXT: ret
;
; ZVBB-LABEL: bitreverse_v8i32:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; ZVBB-NEXT: vle32.v v8, (a0)
; ZVBB-NEXT: vbrev.v v8, v8
; ZVBB-NEXT: vse32.v v8, (a0)
; ZVBB-NEXT: ret
%a = load <8 x i32>, ptr %x
%b = load <8 x i32>, ptr %y
%c = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
Expand Down Expand Up @@ -1033,6 +1075,14 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT: vse64.v v9, (a1)
; LMULMAX1-RV64-NEXT: ret
;
; ZVBB-LABEL: bitreverse_v4i64:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; ZVBB-NEXT: vle64.v v8, (a0)
; ZVBB-NEXT: vbrev.v v8, v8
; ZVBB-NEXT: vse64.v v8, (a0)
; ZVBB-NEXT: ret
%a = load <4 x i64>, ptr %x
%b = load <4 x i64>, ptr %y
%c = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
Expand Down
50 changes: 50 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2-RV64
; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1-RV32
; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1-RV64
; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB

define void @bswap_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: bswap_v8i16:
Expand All @@ -14,6 +16,14 @@ define void @bswap_v8i16(ptr %x, ptr %y) {
; CHECK-NEXT: vor.vv v8, v8, v9
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
;
; ZVBB-LABEL: bswap_v8i16:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVBB-NEXT: vle16.v v8, (a0)
; ZVBB-NEXT: vrev8.v v8, v8
; ZVBB-NEXT: vse16.v v8, (a0)
; ZVBB-NEXT: ret
%a = load <8 x i16>, ptr %x
%b = load <8 x i16>, ptr %y
%c = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a)
Expand Down Expand Up @@ -58,6 +68,14 @@ define void @bswap_v4i32(ptr %x, ptr %y) {
; RV64-NEXT: vor.vv v8, v8, v9
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
;
; ZVBB-LABEL: bswap_v4i32:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; ZVBB-NEXT: vle32.v v8, (a0)
; ZVBB-NEXT: vrev8.v v8, v8
; ZVBB-NEXT: vse32.v v8, (a0)
; ZVBB-NEXT: ret
%a = load <4 x i32>, ptr %x
%b = load <4 x i32>, ptr %y
%c = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a)
Expand Down Expand Up @@ -140,6 +158,14 @@ define void @bswap_v2i64(ptr %x, ptr %y) {
; RV64-NEXT: vor.vv v8, v8, v9
; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: ret
;
; ZVBB-LABEL: bswap_v2i64:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; ZVBB-NEXT: vle64.v v8, (a0)
; ZVBB-NEXT: vrev8.v v8, v8
; ZVBB-NEXT: vse64.v v8, (a0)
; ZVBB-NEXT: ret
%a = load <2 x i64>, ptr %x
%b = load <2 x i64>, ptr %y
%c = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a)
Expand Down Expand Up @@ -200,6 +226,14 @@ define void @bswap_v16i16(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vse16.v v9, (a0)
; LMULMAX1-RV64-NEXT: vse16.v v8, (a1)
; LMULMAX1-RV64-NEXT: ret
;
; ZVBB-LABEL: bswap_v16i16:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVBB-NEXT: vle16.v v8, (a0)
; ZVBB-NEXT: vrev8.v v8, v8
; ZVBB-NEXT: vse16.v v8, (a0)
; ZVBB-NEXT: ret
%a = load <16 x i16>, ptr %x
%b = load <16 x i16>, ptr %y
%c = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a)
Expand Down Expand Up @@ -304,6 +338,14 @@ define void @bswap_v8i32(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vse32.v v9, (a0)
; LMULMAX1-RV64-NEXT: vse32.v v8, (a1)
; LMULMAX1-RV64-NEXT: ret
;
; ZVBB-LABEL: bswap_v8i32:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; ZVBB-NEXT: vle32.v v8, (a0)
; ZVBB-NEXT: vrev8.v v8, v8
; ZVBB-NEXT: vse32.v v8, (a0)
; ZVBB-NEXT: ret
%a = load <8 x i32>, ptr %x
%b = load <8 x i32>, ptr %y
%c = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a)
Expand Down Expand Up @@ -510,6 +552,14 @@ define void @bswap_v4i64(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vse64.v v9, (a0)
; LMULMAX1-RV64-NEXT: vse64.v v8, (a1)
; LMULMAX1-RV64-NEXT: ret
;
; ZVBB-LABEL: bswap_v4i64:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; ZVBB-NEXT: vle64.v v8, (a0)
; ZVBB-NEXT: vrev8.v v8, v8
; ZVBB-NEXT: vse64.v v8, (a0)
; ZVBB-NEXT: ret
%a = load <4 x i64>, ptr %x
%b = load <4 x i64>, ptr %y
%c = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a)
Expand Down

0 comments on commit 768740e

Please sign in to comment.