Skip to content

Commit

Permalink
[DAG] SelectionDAG.computeKnownBits - add NSW/NUW flags support to IS…
Browse files Browse the repository at this point in the history
…D::SHL handling (#89877)

fix #89414
  • Loading branch information
zxc12523 authored May 2, 2024
1 parent d00ed83 commit 171aeb2
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 53 deletions.
11 changes: 9 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3527,16 +3527,23 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setBitsFrom(1);
break;
}
case ISD::SHL:
case ISD::SHL: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known = KnownBits::shl(Known, Known2);

bool NUW = Op->getFlags().hasNoUnsignedWrap();
bool NSW = Op->getFlags().hasNoSignedWrap();

bool ShAmtNonZero = Known2.isNonZero();

Known = KnownBits::shl(Known, Known2, NUW, NSW, ShAmtNonZero);

// Minimum shift low bits are known zero.
if (const APInt *ShMinAmt =
getValidMinimumShiftAmountConstant(Op, DemandedElts))
Known.Zero.setLowBits(ShMinAmt->getZExtValue());
break;
}
case ISD::SRL:
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Expand Down
65 changes: 16 additions & 49 deletions llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -840,44 +840,18 @@ define double @fmul_pow_shl_cnt_fail_maybe_non_pow2(i64 %v, i64 %cnt) nounwind {
define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nounwind {
; CHECK-SSE-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast:
; CHECK-SSE: # %bb.0:
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm3 = [2,2]
; CHECK-SSE-NEXT: movdqa %xmm3, %xmm1
; CHECK-SSE-NEXT: psllq %xmm2, %xmm1
; CHECK-SSE-NEXT: psllq %xmm0, %xmm3
; CHECK-SSE-NEXT: movq %xmm3, %rax
; CHECK-SSE-NEXT: testq %rax, %rax
; CHECK-SSE-NEXT: js .LBB12_1
; CHECK-SSE-NEXT: # %bb.2:
; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm0
; CHECK-SSE-NEXT: jmp .LBB12_3
; CHECK-SSE-NEXT: .LBB12_1:
; CHECK-SSE-NEXT: movq %rax, %rcx
; CHECK-SSE-NEXT: shrq %rcx
; CHECK-SSE-NEXT: andl $1, %eax
; CHECK-SSE-NEXT: orq %rcx, %rax
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm2 = [2,2]
; CHECK-SSE-NEXT: movdqa %xmm2, %xmm3
; CHECK-SSE-NEXT: psllq %xmm1, %xmm3
; CHECK-SSE-NEXT: psllq %xmm0, %xmm2
; CHECK-SSE-NEXT: movq %xmm2, %rax
; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm0
; CHECK-SSE-NEXT: addss %xmm0, %xmm0
; CHECK-SSE-NEXT: .LBB12_3:
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
; CHECK-SSE-NEXT: movq %xmm1, %rax
; CHECK-SSE-NEXT: testq %rax, %rax
; CHECK-SSE-NEXT: js .LBB12_4
; CHECK-SSE-NEXT: # %bb.5:
; CHECK-SSE-NEXT: xorps %xmm1, %xmm1
; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1
; CHECK-SSE-NEXT: jmp .LBB12_6
; CHECK-SSE-NEXT: .LBB12_4:
; CHECK-SSE-NEXT: movq %rax, %rcx
; CHECK-SSE-NEXT: shrq %rcx
; CHECK-SSE-NEXT: andl $1, %eax
; CHECK-SSE-NEXT: orq %rcx, %rax
; CHECK-SSE-NEXT: xorps %xmm1, %xmm1
; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1
; CHECK-SSE-NEXT: addss %xmm1, %xmm1
; CHECK-SSE-NEXT: .LBB12_6:
; CHECK-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE-NEXT: retq
Expand All @@ -886,18 +860,11 @@ define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nou
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,2]
; CHECK-AVX2-NEXT: vpsllvq %xmm0, %xmm1, %xmm0
; CHECK-AVX2-NEXT: vpsrlq $1, %xmm0, %xmm1
; CHECK-AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpextrq $1, %xmm1, %rax
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; CHECK-AVX2-NEXT: vmovq %xmm1, %rax
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
; CHECK-AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2
; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-AVX2-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; CHECK-AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; CHECK-AVX2-NEXT: vpextrq $1, %xmm0, %rax
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; CHECK-AVX2-NEXT: vmovq %xmm0, %rax
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; CHECK-AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1]
; CHECK-AVX2-NEXT: vmulps %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: retq
Expand All @@ -907,9 +874,9 @@ define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nou
; CHECK-NO-FASTFMA-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,2]
; CHECK-NO-FASTFMA-NEXT: vpsllvq %xmm0, %xmm1, %xmm0
; CHECK-NO-FASTFMA-NEXT: vpextrq $1, %xmm0, %rax
; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; CHECK-NO-FASTFMA-NEXT: vmovq %xmm0, %rax
; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; CHECK-NO-FASTFMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; CHECK-NO-FASTFMA-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1]
; CHECK-NO-FASTFMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
Expand All @@ -919,7 +886,7 @@ define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nou
; CHECK-FMA: # %bb.0:
; CHECK-FMA-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2]
; CHECK-FMA-NEXT: vpsllvq %xmm0, %xmm1, %xmm0
; CHECK-FMA-NEXT: vcvtuqq2ps %xmm0, %xmm0
; CHECK-FMA-NEXT: vcvtqq2ps %xmm0, %xmm0
; CHECK-FMA-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-FMA-NEXT: retq
%shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
Expand Down Expand Up @@ -986,7 +953,7 @@ define <4 x float> @fmul_pow_shl_cnt_vec_preserve_fma(<4 x i32> %cnt, <4 x float
; CHECK-FMA: # %bb.0:
; CHECK-FMA-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2,2,2,2]
; CHECK-FMA-NEXT: vpsllvd %xmm0, %xmm2, %xmm0
; CHECK-FMA-NEXT: vcvtudq2ps %xmm0, %xmm0
; CHECK-FMA-NEXT: vcvtdq2ps %xmm0, %xmm0
; CHECK-FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
; CHECK-FMA-NEXT: retq
%shl = shl nsw nuw <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %cnt
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/known-never-zero.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1612,7 +1612,7 @@ define i32 @sext_known_nonzero(i16 %xx) {
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: shll %cl, %eax
; X86-NEXT: cwtl
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
Expand All @@ -1622,7 +1622,7 @@ define i32 @sext_known_nonzero(i16 %xx) {
; X64-NEXT: movl $256, %eax # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: cwtl
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = shl nuw nsw i16 256, %xx
Expand Down
115 changes: 115 additions & 0 deletions llvm/test/CodeGen/X86/pr89877.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64

define i32 @sext_known_nonzero(i16 %xx) {
; X86-LABEL: sext_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: shll %cl, %eax
; X86-NEXT: cwtl
; X86-NEXT: testl %eax, %eax
; X86-NEXT: je .LBB0_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
; X86-NEXT: .LBB0_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
; X64-LABEL: sext_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl $256, %eax # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: cwtl
; X64-NEXT: testl %eax, %eax
; X64-NEXT: je .LBB0_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB0_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%x = shl i16 256, %xx
%z = sext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}

define i32 @sext_known_nonzero_nuw(i16 %xx) {
; X86-LABEL: sext_known_nonzero_nuw:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: shll %cl, %eax
; X86-NEXT: cwtl
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sext_known_nonzero_nuw:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl $256, %eax # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: cwtl
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = shl nuw i16 256, %xx
%z = sext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}

define i32 @sext_known_nonzero_nsw(i16 %xx) {
; X86-LABEL: sext_known_nonzero_nsw:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: shll %cl, %eax
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sext_known_nonzero_nsw:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl $256, %eax # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = shl nsw i16 256, %xx
%z = sext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}

define i32 @sext_known_nonzero_nuw_nsw(i16 %xx) {
; X86-LABEL: sext_known_nonzero_nuw_nsw:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: shll %cl, %eax
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sext_known_nonzero_nuw_nsw:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl $256, %eax # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = shl nuw nsw i16 256, %xx
%z = sext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}

0 comments on commit 171aeb2

Please sign in to comment.