Skip to content

Commit

Permalink
[DAG] foldShuffleOfConcatUndefs - ensure shuffles of upper (undef) su…
Browse files Browse the repository at this point in the history
…bvector elements is undef (PR50609)

shuffle(concat(x,undef),concat(y,undef)) -> concat(shuffle(x,y),shuffle(x,y))

If the original shuffle references any of the upper (undef) subvector elements, ensure the split shuffle masks uses undef instead of an out-of-bounds value.

Fixes PR50609
  • Loading branch information
RKSimon committed Jun 8, 2021
1 parent dee1f0c commit 61a2d6b
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 0 deletions.
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20427,6 +20427,9 @@ static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
for (unsigned i = 0; i != NumElts; ++i) {
if (Mask[i] == -1)
continue;
// If we reference the upper (undef) subvector then the element is undef.
if ((Mask[i] % NumElts) >= HalfNumElts)
continue;
int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
if (i < HalfNumElts)
Mask0[i] = M;
Expand Down
42 changes: 42 additions & 0 deletions llvm/test/CodeGen/X86/pr50609.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s

define void @PR50609(float* noalias nocapture %RET, float* noalias %aFOO, <16 x i32> %__mask) nounwind {
; CHECK-LABEL: PR50609:
; CHECK: # %bb.0: # %allocas
; CHECK-NEXT: leal 40(%rsi), %eax
; CHECK-NEXT: vmovq %rsi, %xmm2
; CHECK-NEXT: vmovd %eax, %xmm3
; CHECK-NEXT: vpsubq %xmm2, %xmm3, %xmm2
; CHECK-NEXT: vpsrad $31, %xmm2, %xmm3
; CHECK-NEXT: vpsrld $30, %xmm3, %xmm3
; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm2
; CHECK-NEXT: vpsrad $2, %xmm2, %xmm2
; CHECK-NEXT: vcvtdq2ps %ymm2, %ymm2
; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,0,0,0]
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2
; CHECK-NEXT: vmaskmovps %ymm2, %ymm0, (%rdi)
; CHECK-NEXT: vmaskmovps %ymm2, %ymm1, 32(%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
allocas:
%aFOO_load_ptr2int = ptrtoint float* %aFOO to i64
%aFOO_load_ptr2int_broadcast = insertelement <16 x i64> undef, i64 %aFOO_load_ptr2int, i32 0
%aFOO_load4_offset = getelementptr float, float* %aFOO, i64 10
%c_load_ptr2int = ptrtoint float* %aFOO_load4_offset to i64
%c_load_ptr2int_broadcast = insertelement <16 x i64> undef, i64 %c_load_ptr2int, i32 0
%0 = sub <16 x i64> %c_load_ptr2int_broadcast, %aFOO_load_ptr2int_broadcast
%1 = trunc <16 x i64> %0 to <16 x i32>
%2 = sdiv <16 x i32> %1, <i32 4, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%3 = sitofp <16 x i32> %2 to <16 x float>
%ptr.i.i = bitcast float* %RET to i8*
%val0.i.i = shufflevector <16 x float> %3, <16 x float> undef, <8 x i32> zeroinitializer
%mask0.i.i = shufflevector <16 x i32> %__mask, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%mask1.i.i = shufflevector <16 x i32> %__mask, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
call void @llvm.x86.avx.maskstore.ps.256(i8* %ptr.i.i, <8 x i32> %mask0.i.i, <8 x float> %val0.i.i) #1
%ptr1.i.i16 = getelementptr float, float* %RET, i64 8
%ptr1.i.i = bitcast float* %ptr1.i.i16 to i8*
call void @llvm.x86.avx.maskstore.ps.256(i8* %ptr1.i.i, <8 x i32> %mask1.i.i, <8 x float> %val0.i.i) #1
ret void
}
declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>)

0 comments on commit 61a2d6b

Please sign in to comment.