-
Notifications
You must be signed in to change notification settings - Fork 11.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[DAG] foldShuffleOfConcatUndefs - ensure shuffles of upper (undef) su…
…bvector elements is undef (PR50609) shuffle(concat(x,undef),concat(y,undef)) -> concat(shuffle(x,y),shuffle(x,y)) If the original shuffle references any of the upper (undef) subvector elements, ensure the split shuffle masks uses undef instead of an out-of-bounds value. Fixes PR50609
- Loading branch information
Showing
2 changed files
with
45 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s | ||
|
||
define void @PR50609(float* noalias nocapture %RET, float* noalias %aFOO, <16 x i32> %__mask) nounwind { | ||
; CHECK-LABEL: PR50609: | ||
; CHECK: # %bb.0: # %allocas | ||
; CHECK-NEXT: leal 40(%rsi), %eax | ||
; CHECK-NEXT: vmovq %rsi, %xmm2 | ||
; CHECK-NEXT: vmovd %eax, %xmm3 | ||
; CHECK-NEXT: vpsubq %xmm2, %xmm3, %xmm2 | ||
; CHECK-NEXT: vpsrad $31, %xmm2, %xmm3 | ||
; CHECK-NEXT: vpsrld $30, %xmm3, %xmm3 | ||
; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm2 | ||
; CHECK-NEXT: vpsrad $2, %xmm2, %xmm2 | ||
; CHECK-NEXT: vcvtdq2ps %ymm2, %ymm2 | ||
; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,0,0,0] | ||
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 | ||
; CHECK-NEXT: vmaskmovps %ymm2, %ymm0, (%rdi) | ||
; CHECK-NEXT: vmaskmovps %ymm2, %ymm1, 32(%rdi) | ||
; CHECK-NEXT: vzeroupper | ||
; CHECK-NEXT: retq | ||
allocas: | ||
%aFOO_load_ptr2int = ptrtoint float* %aFOO to i64 | ||
%aFOO_load_ptr2int_broadcast = insertelement <16 x i64> undef, i64 %aFOO_load_ptr2int, i32 0 | ||
%aFOO_load4_offset = getelementptr float, float* %aFOO, i64 10 | ||
%c_load_ptr2int = ptrtoint float* %aFOO_load4_offset to i64 | ||
%c_load_ptr2int_broadcast = insertelement <16 x i64> undef, i64 %c_load_ptr2int, i32 0 | ||
%0 = sub <16 x i64> %c_load_ptr2int_broadcast, %aFOO_load_ptr2int_broadcast | ||
%1 = trunc <16 x i64> %0 to <16 x i32> | ||
%2 = sdiv <16 x i32> %1, <i32 4, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> | ||
%3 = sitofp <16 x i32> %2 to <16 x float> | ||
%ptr.i.i = bitcast float* %RET to i8* | ||
%val0.i.i = shufflevector <16 x float> %3, <16 x float> undef, <8 x i32> zeroinitializer | ||
%mask0.i.i = shufflevector <16 x i32> %__mask, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | ||
%mask1.i.i = shufflevector <16 x i32> %__mask, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> | ||
call void @llvm.x86.avx.maskstore.ps.256(i8* %ptr.i.i, <8 x i32> %mask0.i.i, <8 x float> %val0.i.i) #1 | ||
%ptr1.i.i16 = getelementptr float, float* %RET, i64 8 | ||
%ptr1.i.i = bitcast float* %ptr1.i.i16 to i8* | ||
call void @llvm.x86.avx.maskstore.ps.256(i8* %ptr1.i.i, <8 x i32> %mask1.i.i, <8 x float> %val0.i.i) #1 | ||
ret void | ||
} | ||
declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) |