Skip to content

Commit

Permalink
[DAGCombine] Don't fold a trunc if it feeds an anyext
Browse files Browse the repository at this point in the history
Legalization tends to create anyext(trunc) patterns. This should always be
combined - into either a single trunc, a single ext, or nothing if the
types match exactly. But if we happen to combine the trunc first, we may pull
the trunc away from the anyext or make it implicit (e.g. the truncate(extract)
-> extract(bitcast) fold).

To prevent this, we can avoid doing the fold, similarly to how we already handle
fpround(fpextend).

Differential Revision: https://reviews.llvm.org/D23893

llvm-svn: 280386
  • Loading branch information
mkuperst committed Sep 1, 2016
1 parent b28fe03 commit 65bc3c8
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 8 deletions.
4 changes: 4 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7135,6 +7135,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return N0.getOperand(0);
}

// If this is anyext(trunc), don't fold it, allow ourselves to be folded.
if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
return SDValue();

// Fold extract-and-trunc into a narrow extract. For example:
// i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
// i32 y = TRUNCATE(i64 x)
Expand Down
5 changes: 1 addition & 4 deletions llvm/test/CodeGen/X86/2011-10-21-widen-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@ define void @cmp_2_floats() {
; CHECK-NEXT: cmpordps %xmm0, %xmm0
; CHECK-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; CHECK-NEXT: psllq $32, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-NEXT: psrad $31, %xmm0
; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; CHECK-NEXT: pslld $31, %xmm0
; CHECK-NEXT: blendvps %xmm0, %xmm0
; CHECK-NEXT: movlps %xmm0, (%rax)
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/X86/mem-intrin-base-reg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,12 @@ target triple = "i686-pc-windows-msvc"
; for when this is necessary. Typically, we chose ESI for the base register,
; which all of the X86 string instructions use.

; The pattern of vector icmp and extractelement is used in these tests because
; it forces creation of an aligned stack temporary. Perhaps such temporaries
; shouldn't be aligned.

declare void @escape_vla_and_icmp(i8*, i1 zeroext)
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)

define i32 @memcpy_novla_vector(<4 x i32>* %vp0, i8* %a, i8* %b, i32 %n, i1 zeroext %cond) {
%foo = alloca <4 x i32>, align 16
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %b, i32 128, i32 4, i1 false)
br i1 %cond, label %spill_vectors, label %no_vectors

Expand All @@ -42,6 +39,7 @@ spill_vectors:
; CHECK: rep;movsl

define i32 @memcpy_vla_vector(<4 x i32>* %vp0, i8* %a, i8* %b, i32 %n, i1 zeroext %cond) {
%foo = alloca <4 x i32>, align 16
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %b, i32 128, i32 4, i1 false)
br i1 %cond, label %spill_vectors, label %no_vectors

Expand Down Expand Up @@ -70,6 +68,7 @@ spill_vectors:
; stosd doesn't clobber esi, so we can use it.

define i32 @memset_vla_vector(<4 x i32>* %vp0, i8* %a, i32 %n, i1 zeroext %cond) {
%foo = alloca <4 x i32>, align 16
call void @llvm.memset.p0i8.i32(i8* %a, i8 42, i32 128, i32 4, i1 false)
br i1 %cond, label %spill_vectors, label %no_vectors

Expand Down

0 comments on commit 65bc3c8

Please sign in to comment.