Skip to content

Commit

Permalink
[DAGCombine] Fold Splat(bitcast(buildvector(x,..))) to splat(x)
Browse files Browse the repository at this point in the history
This adds a fold which teaches the backend to fold
splat(bitcast(buildvector(x,..))) or
splat(bitcast(scalar_to_vector(x))) to a single splat.

This only handles lane 0 splats, which are only valid under LE, and
needs to be a little careful with the types it creates for the new
buildvector.

Differential Revision: https://reviews.llvm.org/D139611
  • Loading branch information
davemgreen committed Dec 12, 2022
1 parent 8005332 commit fd71692
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 16 deletions.
17 changes: 17 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -23021,6 +23021,23 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
if (Idx->getAPIntValue() == SplatIndex)
return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));

// Look through a bitcast if LE and splatting lane 0, through to a
// scalar_to_vector or a build_vector.
if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
(N0.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR ||
N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR)) {
EVT N00VT = N0.getOperand(0).getValueType();
if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
VT.isInteger() && N00VT.isInteger()) {
EVT InVT =
TLI.getTypeToTransformTo(*DAG.getContext(), VT.getScalarType());
SDValue Op = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0),
SDLoc(N), InVT);
return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
}
}
}

// If this is a bit convert that changes the element type of the vector but
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AArch64/arm64-dup.ll
Expand Up @@ -508,8 +508,7 @@ define <4 x i32> @dup_const24(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C) nounwind
define <8 x i16> @bitcast_i64_v8i16(i64 %a) {
; CHECK-LABEL: bitcast_i64_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: dup.8h v0, v0[0]
; CHECK-NEXT: dup.8h v0, w0
; CHECK-NEXT: ret
%b = bitcast i64 %a to <4 x i16>
%r = shufflevector <4 x i16> %b, <4 x i16> poison, <8 x i32> zeroinitializer
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
Expand Up @@ -2538,12 +2538,11 @@ define <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %scale.coer
; CHECK-LABEL: cmplx_mul_combined_re_im:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: lsr x8, x0, #16
; CHECK-NEXT: adrp x9, .LCPI196_0
; CHECK-NEXT: fmov d4, x0
; CHECK-NEXT: rev32 v5.8h, v0.8h
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: adrp x8, .LCPI196_0
; CHECK-NEXT: dup v1.8h, v1.h[0]
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI196_0]
; CHECK-NEXT: dup v1.8h, w8
; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI196_0]
; CHECK-NEXT: sqneg v2.8h, v1.8h
; CHECK-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b
; CHECK-NEXT: sqdmull v2.4s, v0.4h, v4.h[0]
Expand Down
4 changes: 0 additions & 4 deletions llvm/test/CodeGen/Thumb2/mve-vdup.ll
Expand Up @@ -399,8 +399,6 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_i64_v8i16(i64 %a) {
define arm_aapcs_vfpcc <8 x i16> @bitcast_i128_v8i16(i128 %a) {
; CHECK-LE-LABEL: bitcast_i128_v8i16:
; CHECK-LE: @ %bb.0:
; CHECK-LE-NEXT: vmov.32 q0[0], r0
; CHECK-LE-NEXT: vmov.u16 r0, q0[0]
; CHECK-LE-NEXT: vdup.16 q0, r0
; CHECK-LE-NEXT: bx lr
;
Expand Down Expand Up @@ -549,8 +547,6 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_v2f64_v8i16(<2 x i64> %a) {
define arm_aapcs_vfpcc <8 x i16> @other_max_case(i32 %blockSize) {
; CHECK-LE-LABEL: other_max_case:
; CHECK-LE: @ %bb.0:
; CHECK-LE-NEXT: vmov.32 q0[0], r0
; CHECK-LE-NEXT: vmov.u16 r0, q0[0]
; CHECK-LE-NEXT: vdup.16 q0, r0
; CHECK-LE-NEXT: bx lr
;
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll
@@ -1,8 +1,7 @@
; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s

; Test that a splat shuffle of an fp-to-int bitcasted vector correctly
; optimizes and lowers to a single splat instruction. Without a custom
; DAG combine, this ends up doing both a splat and a shuffle.
; optimizes and lowers to a single splat instruction.

target triple = "wasm32-unknown-unknown"

Expand All @@ -19,8 +18,8 @@ define <4 x i32> @f32x4_splat(float %x) {

; CHECK-LABEL: not_a_vec:
; CHECK-NEXT: .functype not_a_vec (i64, i64) -> (v128){{$}}
; CHECK-NEXT: i64x2.splat $push[[L1:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $pop[[L1]], $2, 0, 1, 2, 3
; CHECK-NEXT: i32.wrap_i64 $push[[L:[0-9]+]]=, $0
; CHECK-NEXT: i32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]
; CHECK-NEXT: return $pop[[R]]
define <4 x i32> @not_a_vec(i128 %x) {
%a = bitcast i128 %x to <4 x i32>
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
Expand Up @@ -74,8 +74,8 @@ define void @test2() nounwind {
; X64-LABEL: test2:
; X64: ## %bb.0: ## %entry
; X64-NEXT: movq _tmp_V2i@GOTPCREL(%rip), %rax
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; X64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-NEXT: movq %xmm0, (%rax)
; X64-NEXT: retq
entry:
Expand Down

0 comments on commit fd71692

Please sign in to comment.