Skip to content

Commit

Permalink
[DAGcombiner] Fix incorrect sinking of a truncate into the operand of…
Browse files Browse the repository at this point in the history
… a shift.

This fixes a regression introduced by revision 268094.
Revision 268094 added the following dag combine rule:
// trunc (shl x, K) -> shl (trunc x), K => K < vt.size / 2

That rule converts a truncate of a shift-by-constant into a shift of a truncated
value. We do this only if the shift count is less than half the size in bits of
the truncated value (K < vt.size / 2).

The problem is that the constraint on the shift count is incorrect, so the rule
doesn't work well in some cases involving vector types. The combine rule should
have been written instead like this:
// trunc (shl x, K) -> shl (trunc x), K => K < vt.getScalarSizeInBits()

Basically, if K is smaller than the "scalar size in bits" of the truncated value
then we know that by "sinking" the truncate into the operand of the shift we
would never accidentally make the shift undefined.

This patch fixes the check on the shift count, and adds test cases to make sure
that we don't regress the behavior.

Differential Revision: https://reviews.llvm.org/D24154

llvm-svn: 280482
  • Loading branch information
Andrea Di Biagio authored and Andrea Di Biagio committed Sep 2, 2016
1 parent 7454145 commit fd503e5
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 3 deletions.
6 changes: 3 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -7188,15 +7188,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}

// trunc (shl x, K) -> shl (trunc x), K => K < vt.size / 2
// trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
uint64_t Amt = CAmt->getZExtValue();
unsigned Size = VT.getSizeInBits();
unsigned Size = VT.getScalarSizeInBits();

if (Amt < Size / 2) {
if (Amt < Size) {
SDLoc SL(N);
EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());

Expand Down
139 changes: 139 additions & 0 deletions llvm/test/CodeGen/X86/reduce-trunc-shl.ll
Expand Up @@ -26,3 +26,142 @@ define void @trunc_shl_7_v4i32_v4i64(<4 x i32> addrspace(1)* %out, <4 x i64> add
store <4 x i32> %trunc, <4 x i32> addrspace(1)* %out
ret void
}

define <8 x i16> @trunc_shl_v8i16_v8i32(<8 x i32> %a) {
; SSE2-LABEL: trunc_shl_v8i16_v8i32:
; SSE2: # BB#0:
; SSE2-NEXT: pslld $17, %xmm0
; SSE2-NEXT: pslld $17, %xmm1
; SSE2-NEXT: pslld $16, %xmm1
; SSE2-NEXT: psrad $16, %xmm1
; SSE2-NEXT: pslld $16, %xmm0
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: packssdw %xmm1, %xmm0
; SSE2-NEXT: retq
;
; AVX2-LABEL: trunc_shl_v8i16_v8i32:
; AVX2: # BB#0:
; AVX2-NEXT: vpslld $17, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%shl = shl <8 x i32> %a, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
%conv = trunc <8 x i32> %shl to <8 x i16>
ret <8 x i16> %conv
}

define void @trunc_shl_31_i32_i64(i32* %out, i64* %in) {
; SSE2-LABEL: trunc_shl_31_i32_i64:
; SSE2: # BB#0:
; SSE2-NEXT: movl (%rsi), %eax
; SSE2-NEXT: shll $31, %eax
; SSE2-NEXT: movl %eax, (%rdi)
; SSE2-NEXT: retq
;
; AVX2-LABEL: trunc_shl_31_i32_i64:
; AVX2: # BB#0:
; AVX2-NEXT: movl (%rsi), %eax
; AVX2-NEXT: shll $31, %eax
; AVX2-NEXT: movl %eax, (%rdi)
; AVX2-NEXT: retq
%val = load i64, i64* %in
%shl = shl i64 %val, 31
%trunc = trunc i64 %shl to i32
store i32 %trunc, i32* %out
ret void
}

define void @trunc_shl_32_i32_i64(i32* %out, i64* %in) {
; SSE2-LABEL: trunc_shl_32_i32_i64:
; SSE2: # BB#0:
; SSE2-NEXT: movl $0, (%rdi)
; SSE2-NEXT: retq
;
; AVX2-LABEL: trunc_shl_32_i32_i64:
; AVX2: # BB#0:
; AVX2-NEXT: movl $0, (%rdi)
; AVX2-NEXT: retq
%val = load i64, i64* %in
%shl = shl i64 %val, 32
%trunc = trunc i64 %shl to i32
store i32 %trunc, i32* %out
ret void
}

define void @trunc_shl_15_i16_i64(i16* %out, i64* %in) {
; SSE2-LABEL: trunc_shl_15_i16_i64:
; SSE2: # BB#0:
; SSE2-NEXT: movzwl (%rsi), %eax
; SSE2-NEXT: shlw $15, %ax
; SSE2-NEXT: movw %ax, (%rdi)
; SSE2-NEXT: retq
;
; AVX2-LABEL: trunc_shl_15_i16_i64:
; AVX2: # BB#0:
; AVX2-NEXT: movzwl (%rsi), %eax
; AVX2-NEXT: shlw $15, %ax
; AVX2-NEXT: movw %ax, (%rdi)
; AVX2-NEXT: retq
%val = load i64, i64* %in
%shl = shl i64 %val, 15
%trunc = trunc i64 %shl to i16
store i16 %trunc, i16* %out
ret void
}

define void @trunc_shl_16_i16_i64(i16* %out, i64* %in) {
; SSE2-LABEL: trunc_shl_16_i16_i64:
; SSE2: # BB#0:
; SSE2-NEXT: movw $0, (%rdi)
; SSE2-NEXT: retq
;
; AVX2-LABEL: trunc_shl_16_i16_i64:
; AVX2: # BB#0:
; AVX2-NEXT: movw $0, (%rdi)
; AVX2-NEXT: retq
%val = load i64, i64* %in
%shl = shl i64 %val, 16
%trunc = trunc i64 %shl to i16
store i16 %trunc, i16* %out
ret void
}

define void @trunc_shl_7_i8_i64(i8* %out, i64* %in) {
; SSE2-LABEL: trunc_shl_7_i8_i64:
; SSE2: # BB#0:
; SSE2-NEXT: movb (%rsi), %al
; SSE2-NEXT: shlb $7, %al
; SSE2-NEXT: movb %al, (%rdi)
; SSE2-NEXT: retq
;
; AVX2-LABEL: trunc_shl_7_i8_i64:
; AVX2: # BB#0:
; AVX2-NEXT: movb (%rsi), %al
; AVX2-NEXT: shlb $7, %al
; AVX2-NEXT: movb %al, (%rdi)
; AVX2-NEXT: retq
%val = load i64, i64* %in
%shl = shl i64 %val, 7
%trunc = trunc i64 %shl to i8
store i8 %trunc, i8* %out
ret void
}

define void @trunc_shl_8_i8_i64(i8* %out, i64* %in) {
; SSE2-LABEL: trunc_shl_8_i8_i64:
; SSE2: # BB#0:
; SSE2-NEXT: movb $0, (%rdi)
; SSE2-NEXT: retq
;
; AVX2-LABEL: trunc_shl_8_i8_i64:
; AVX2: # BB#0:
; AVX2-NEXT: movb $0, (%rdi)
; AVX2-NEXT: retq
%val = load i64, i64* %in
%shl = shl i64 %val, 8
%trunc = trunc i64 %shl to i8
store i8 %trunc, i8* %out
ret void
}

0 comments on commit fd503e5

Please sign in to comment.