Skip to content

Commit

Permalink
VX512] Only look at lower bit in constant scalar masks
Browse files Browse the repository at this point in the history
for scalar masked instructions only the lower bit of the mask is relevant. so for constant masks we should either do an unmasked operation or no operation, depending on the value of the lower bit.
This patch handles cases where the lower bit is '1'.

Differential Revision: https://reviews.llvm.org/D32805

llvm-svn: 302546
  • Loading branch information
Guy Blank committed May 9, 2017
1 parent f7f6f82 commit 0c42d8c
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 8 deletions.
6 changes: 4 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -19021,8 +19021,10 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
SDValue PreservedSrc,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
if (isAllOnesConstant(Mask))
return Op;

if (auto *MaskConst = dyn_cast<ConstantSDNode>(Mask))
if (MaskConst->getZExtValue() & 0x1)
return Op;

MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/avx512-scalar_mask.ll
Expand Up @@ -26,6 +26,7 @@ define <4 x float>@test_var_maskz(<4 x float> %v0, <4 x float> %v1, <4 x float>
ret < 4 x float> %res
}

; FIXME: we should just return %xmm0 here.
define <4 x float>@test_const0_mask(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: test_const0_mask:
; CHECK: ## BB#0:
Expand All @@ -36,6 +37,7 @@ define <4 x float>@test_const0_mask(<4 x float> %v0, <4 x float> %v1, <4 x float
ret < 4 x float> %res
}

; FIXME: we should zero the lower element of xmm0 and return it.
define <4 x float>@test_const0_maskz(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: test_const0_maskz:
; CHECK: ## BB#0:
Expand All @@ -46,6 +48,7 @@ define <4 x float>@test_const0_maskz(<4 x float> %v0, <4 x float> %v1, <4 x floa
ret < 4 x float> %res
}

; FIXME: we should just return %xmm0 here.
define <4 x float>@test_const2_mask(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: test_const2_mask:
; CHECK: ## BB#0:
Expand All @@ -56,6 +59,7 @@ define <4 x float>@test_const2_mask(<4 x float> %v0, <4 x float> %v1, <4 x float
ret < 4 x float> %res
}

; FIXME: we should zero the lower element of xmm0 and return it.
define <4 x float>@test_const2_maskz(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: test_const2_maskz:
; CHECK: ## BB#0:
Expand Down Expand Up @@ -87,9 +91,7 @@ define <4 x float>@test_const_allone_maskz(<4 x float> %v0, <4 x float> %v1, <4
define <4 x float>@test_const_3_mask(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: test_const_3_mask:
; CHECK: ## BB#0:
; CHECK-NEXT: kxnorw %k0, %k0, %k0
; CHECK-NEXT: kshiftrw $15, %k0, %k1
; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 3, i32 4)
ret < 4 x float> %res
Expand All @@ -98,9 +100,7 @@ define <4 x float>@test_const_3_mask(<4 x float> %v0, <4 x float> %v1, <4 x floa
define <4 x float>@test_const_3_maskz(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: test_const_3_maskz:
; CHECK: ## BB#0:
; CHECK-NEXT: kxnorw %k0, %k0, %k0
; CHECK-NEXT: kshiftrw $15, %k0, %k1
; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 3, i32 4)
ret < 4 x float> %res
Expand Down

0 comments on commit 0c42d8c

Please sign in to comment.