diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 3b6dd0c11bbf9..43f08b453536c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2209,20 +2209,12 @@ bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, return true; // Convervatively only handle extracting half of a vector. + // TODO: We can do arbitrary slidedowns, but for now only support extracting + // the upper half of a vector until we have more test coverage. // TODO: For sizes which aren't multiples of VLEN sizes, this may not be // a cheap extract. However, this case is important in practice for // shuffled extracts of longer vectors. How resolve? - if ((ResElts * 2) != SrcElts) - return false; - - // Slide can support arbitrary index, but we only treat vslidedown.vi as - // cheap. - if (Index >= 32) - return false; - - // TODO: We can do arbitrary slidedowns, but for now only support extracting - // the upper half of a vector until we have more test coverage. - return Index == 0 || Index == ResElts; + return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts); } MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll index df7b3eb8d4548..0bc2d34dada48 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll @@ -307,243 +307,14 @@ define <32 x i32> @v32i32_v4i32(<4 x i32>) { ret <32 x i32> %2 } -; TODO: This case should be a simple vnsrl, but gets scalarized instead define <32 x i8> @vnsrl_v32i8_v64i8(<64 x i8> %in) { -; RV32-LABEL: vnsrl_v32i8_v64i8: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -128 -; RV32-NEXT: .cfi_def_cfa_offset 128 -; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: addi s0, sp, 128 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: andi sp, sp, -64 -; RV32-NEXT: li a0, 64 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV32-NEXT: vse8.v v8, (a1) -; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 1 -; RV32-NEXT: vmv.x.s a0, v10 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vmv.v.x v10, a0 -; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV32-NEXT: vslidedown.vi v12, v8, 3 -; RV32-NEXT: vmv.x.s a0, v12 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV32-NEXT: vslidedown.vi v12, v8, 5 -; RV32-NEXT: vmv.x.s a0, v12 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV32-NEXT: vslidedown.vi v12, v8, 7 -; RV32-NEXT: vmv.x.s a0, v12 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV32-NEXT: vslidedown.vi v12, v8, 9 -; RV32-NEXT: vmv.x.s a0, v12 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV32-NEXT: vslidedown.vi v12, v8, 11 -; RV32-NEXT: vmv.x.s a0, v12 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV32-NEXT: vslidedown.vi v12, v8, 13 -; RV32-NEXT: vmv.x.s a0, v12 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV32-NEXT: vslidedown.vi v12, v8, 15 -; RV32-NEXT: vmv.x.s a0, v12 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vslidedown.vi v12, v8, 17 -; RV32-NEXT: vmv.x.s a0, v12 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vslidedown.vi v12, v8, 19 -; RV32-NEXT: vmv.x.s a0, v12 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vslidedown.vi v12, v8, 21 -; RV32-NEXT: vmv.x.s a0, v12 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vslidedown.vi v12, v8, 23 -; RV32-NEXT: vmv.x.s a0, v12 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vslidedown.vi v12, v8, 25 -; RV32-NEXT: vmv.x.s a0, v12 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vslidedown.vi v12, v8, 27 -; RV32-NEXT: vmv.x.s a0, v12 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vslidedown.vi v12, v8, 29 -; RV32-NEXT: vmv.x.s a0, v12 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vslidedown.vi v8, v8, 31 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: vslide1down.vx v8, v10, a0 -; RV32-NEXT: lbu a0, 33(sp) -; RV32-NEXT: lbu a1, 35(sp) -; RV32-NEXT: lbu a2, 37(sp) -; RV32-NEXT: lbu a3, 39(sp) -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: lbu a0, 41(sp) -; RV32-NEXT: lbu a1, 43(sp) -; RV32-NEXT: lbu a2, 45(sp) -; RV32-NEXT: lbu a3, 47(sp) -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: lbu a0, 49(sp) -; RV32-NEXT: lbu a1, 51(sp) -; RV32-NEXT: lbu a2, 53(sp) -; RV32-NEXT: lbu a3, 55(sp) -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: lbu a0, 57(sp) -; RV32-NEXT: lbu a1, 59(sp) -; RV32-NEXT: lbu a2, 61(sp) -; RV32-NEXT: lbu a3, 63(sp) -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: addi sp, s0, -128 -; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 128 -; RV32-NEXT: ret -; -; RV64-LABEL: vnsrl_v32i8_v64i8: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -128 -; RV64-NEXT: .cfi_def_cfa_offset 128 -; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 128 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -64 -; RV64-NEXT: li a0, 64 -; RV64-NEXT: mv a1, sp -; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV64-NEXT: vse8.v v8, (a1) -; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 1 -; RV64-NEXT: vmv.x.s a0, v10 -; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vmv.v.x v10, a0 -; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 3 -; RV64-NEXT: vmv.x.s a0, v12 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 5 -; RV64-NEXT: vmv.x.s a0, v12 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 7 -; RV64-NEXT: vmv.x.s a0, v12 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 9 -; RV64-NEXT: vmv.x.s a0, v12 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 11 -; RV64-NEXT: vmv.x.s a0, v12 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 13 -; RV64-NEXT: vmv.x.s a0, v12 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 15 -; RV64-NEXT: vmv.x.s a0, v12 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vslidedown.vi v12, v8, 17 -; RV64-NEXT: vmv.x.s a0, v12 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vslidedown.vi v12, v8, 19 -; RV64-NEXT: vmv.x.s a0, v12 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vslidedown.vi v12, v8, 21 -; RV64-NEXT: vmv.x.s a0, v12 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vslidedown.vi v12, v8, 23 -; RV64-NEXT: vmv.x.s a0, v12 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vslidedown.vi v12, v8, 25 -; RV64-NEXT: vmv.x.s a0, v12 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vslidedown.vi v12, v8, 27 -; RV64-NEXT: vmv.x.s a0, v12 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vslidedown.vi v12, v8, 29 -; RV64-NEXT: vmv.x.s a0, v12 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vslidedown.vi v8, v8, 31 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: vslide1down.vx v8, v10, a0 -; RV64-NEXT: lbu a0, 33(sp) -; RV64-NEXT: lbu a1, 35(sp) -; RV64-NEXT: lbu a2, 37(sp) -; RV64-NEXT: lbu a3, 39(sp) -; RV64-NEXT: vslide1down.vx v8, v8, a0 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a3 -; RV64-NEXT: lbu a0, 41(sp) -; RV64-NEXT: lbu a1, 43(sp) -; RV64-NEXT: lbu a2, 45(sp) -; RV64-NEXT: lbu a3, 47(sp) -; RV64-NEXT: vslide1down.vx v8, v8, a0 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a3 -; RV64-NEXT: lbu a0, 49(sp) -; RV64-NEXT: lbu a1, 51(sp) -; RV64-NEXT: lbu a2, 53(sp) -; RV64-NEXT: lbu a3, 55(sp) -; RV64-NEXT: vslide1down.vx v8, v8, a0 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a3 -; RV64-NEXT: lbu a0, 57(sp) -; RV64-NEXT: lbu a1, 59(sp) -; RV64-NEXT: lbu a2, 61(sp) -; RV64-NEXT: lbu a3, 63(sp) -; RV64-NEXT: vslide1down.vx v8, v8, a0 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a3 -; RV64-NEXT: addi sp, s0, -128 -; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 128 -; RV64-NEXT: ret +; CHECK-LABEL: vnsrl_v32i8_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 8 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret %res = shufflevector <64 x i8> %in, <64 x i8> poison, <32 x i32> ret <32 x i8> %res }