Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Original message: [X86] Fix wrong target specific combine on SETCC nodes. Part of the folding logic implemented by function 'PerformISDSETCCCombine' only worked under the assumption that the condition code in input could have been either SETNE or SETEQ. Unfortunately that assumption was incorrect, and in some cases the algorithm ended up incorrectly folding SETCC nodes. The incorrect folding only affected SETCC dag nodes where: - one of the operands was a build_vector of all zeroes; - the other operand was a SIGN_EXTEND from a vector of MVT:i1 elements; - the condition code was neither SETNE nor SETEQ. Example: (setcc (v4i32 (sign_extend v4i1:%A)), (v4i32 VectorOfAllZeroes), setge) Before this patch, the entire dag node sequence from the example was incorrectly folded to node %A. With this patch, the dag node sequence is folded to a (xor %A, (v4i1 VectorOfAllOnes)). Added test setcc-combine.ll. Thanks to Greg Bedwell for spotting this issue. llvm-svn: 232804
- Loading branch information
Andrea Di Biagio
authored and
Andrea Di Biagio
committed
Mar 20, 2015
1 parent
df6c515
commit c25b68e
Showing
2 changed files
with
198 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic < %s | FileCheck %s | ||
|
||
define i32 @test_eq_1(<4 x i32> %A, <4 x i32> %B) { | ||
; CHECK-LABEL: test_eq_1: | ||
; CHECK: pcmpgtd %xmm0, %xmm1 | ||
; CHECK-NEXT: pxor {{.*}}(%rip), %xmm1 | ||
; CHECK: retq | ||
entry: | ||
%cmp = icmp slt <4 x i32> %A, %B | ||
%sext = sext <4 x i1> %cmp to <4 x i32> | ||
%cmp1 = icmp eq <4 x i32> %sext, zeroinitializer | ||
%0 = extractelement <4 x i1> %cmp1, i32 1 | ||
%1 = sext i1 %0 to i32 | ||
ret i32 %1 | ||
} | ||
|
||
define i32 @test_ne_1(<4 x i32> %A, <4 x i32> %B) { | ||
; CHECK-LABEL: test_ne_1: | ||
; CHECK: pcmpgtd %xmm0, %xmm1 | ||
; CHECK-NOT: pxor | ||
; CHECK: retq | ||
entry: | ||
%cmp = icmp slt <4 x i32> %A, %B | ||
%sext = sext <4 x i1> %cmp to <4 x i32> | ||
%cmp1 = icmp ne <4 x i32> %sext, zeroinitializer | ||
%0 = extractelement <4 x i1> %cmp1, i32 1 | ||
%1 = sext i1 %0 to i32 | ||
ret i32 %1 | ||
} | ||
|
||
define i32 @test_le_1(<4 x i32> %A, <4 x i32> %B) { | ||
; CHECK-LABEL: test_le_1: | ||
; CHECK: movl $-1, %eax | ||
; CHECK-NEXT: retq | ||
entry: | ||
%cmp = icmp slt <4 x i32> %A, %B | ||
%sext = sext <4 x i1> %cmp to <4 x i32> | ||
%cmp1 = icmp sle <4 x i32> %sext, zeroinitializer | ||
%0 = extractelement <4 x i1> %cmp1, i32 1 | ||
%1 = sext i1 %0 to i32 | ||
ret i32 %1 | ||
} | ||
|
||
define i32 @test_ge_1(<4 x i32> %A, <4 x i32> %B) { | ||
; CHECK-LABEL: test_ge_1: | ||
; CHECK: pcmpgtd %xmm0, %xmm1 | ||
; CHECK: pxor {{.*}}(%rip), %xmm1 | ||
; CHECK: retq | ||
entry: | ||
%cmp = icmp slt <4 x i32> %A, %B | ||
%sext = sext <4 x i1> %cmp to <4 x i32> | ||
%cmp1 = icmp sge <4 x i32> %sext, zeroinitializer | ||
%0 = extractelement <4 x i1> %cmp1, i32 1 | ||
%1 = sext i1 %0 to i32 | ||
ret i32 %1 | ||
} | ||
|
||
define i32 @test_lt_1(<4 x i32> %A, <4 x i32> %B) { | ||
; CHECK-LABEL: test_lt_1: | ||
; CHECK: pcmpgtd %xmm0, %xmm1 | ||
; CHECK-NOT: pxor | ||
; CHECK: retq | ||
entry: | ||
%cmp = icmp slt <4 x i32> %A, %B | ||
%sext = sext <4 x i1> %cmp to <4 x i32> | ||
%cmp1 = icmp slt <4 x i32> %sext, zeroinitializer | ||
%0 = extractelement <4 x i1> %cmp, i32 1 | ||
%1 = sext i1 %0 to i32 | ||
ret i32 %1 | ||
} | ||
|
||
define i32 @test_gt_1(<4 x i32> %A, <4 x i32> %B) { | ||
; CHECK-LABEL: test_gt_1: | ||
; CHECK: xorl %eax, %eax | ||
; CHECK: retq | ||
entry: | ||
%cmp = icmp slt <4 x i32> %A, %B | ||
%sext = sext <4 x i1> %cmp to <4 x i32> | ||
%cmp1 = icmp sgt <4 x i32> %sext, zeroinitializer | ||
%0 = extractelement <4 x i1> %cmp1, i32 1 | ||
%1 = sext i1 %0 to i32 | ||
ret i32 %1 | ||
} | ||
|
||
define i32 @test_eq_2(<4 x i32> %A, <4 x i32> %B) { | ||
; CHECK-LABEL: test_eq_2: | ||
; CHECK: pcmpgtd %xmm1, %xmm0 | ||
; CHECK-NEXT: pxor {{.*}}(%rip), %xmm0 | ||
; CHECK: retq | ||
entry: | ||
%cmp = icmp slt <4 x i32> %B, %A | ||
%sext = sext <4 x i1> %cmp to <4 x i32> | ||
%cmp1 = icmp eq <4 x i32> %sext, zeroinitializer | ||
%0 = extractelement <4 x i1> %cmp1, i32 1 | ||
%1 = sext i1 %0 to i32 | ||
ret i32 %1 | ||
} | ||
|
||
define i32 @test_ne_2(<4 x i32> %A, <4 x i32> %B) { | ||
; CHECK-LABEL: test_ne_2: | ||
; CHECK: pcmpgtd %xmm1, %xmm0 | ||
; CHECK-NOT: pxor | ||
; CHECK: retq | ||
entry: | ||
%cmp = icmp slt <4 x i32> %B, %A | ||
%sext = sext <4 x i1> %cmp to <4 x i32> | ||
%cmp1 = icmp ne <4 x i32> %sext, zeroinitializer | ||
%0 = extractelement <4 x i1> %cmp1, i32 1 | ||
%1 = sext i1 %0 to i32 | ||
ret i32 %1 | ||
} | ||
|
||
define i32 @test_le_2(<4 x i32> %A, <4 x i32> %B) { | ||
; CHECK-LABEL: test_le_2: | ||
; CHECK: pcmpgtd %xmm1, %xmm0 | ||
; CHECK: pxor {{.*}}(%rip), %xmm0 | ||
; CHECK: retq | ||
entry: | ||
%cmp = icmp slt <4 x i32> %B, %A | ||
%sext = sext <4 x i1> %cmp to <4 x i32> | ||
%cmp1 = icmp sle <4 x i32> zeroinitializer, %sext | ||
%0 = extractelement <4 x i1> %cmp1, i32 1 | ||
%1 = sext i1 %0 to i32 | ||
ret i32 %1 | ||
} | ||
|
||
define i32 @test_ge_2(<4 x i32> %A, <4 x i32> %B) { | ||
; CHECK-LABEL: test_ge_2: | ||
; CHECK: movl $-1, %eax | ||
; CHECK: retq | ||
entry: | ||
%cmp = icmp slt <4 x i32> %B, %A | ||
%sext = sext <4 x i1> %cmp to <4 x i32> | ||
%cmp1 = icmp sge <4 x i32> zeroinitializer, %sext | ||
%0 = extractelement <4 x i1> %cmp1, i32 1 | ||
%1 = sext i1 %0 to i32 | ||
ret i32 %1 | ||
} | ||
|
||
define i32 @test_lt_2(<4 x i32> %A, <4 x i32> %B) { | ||
; CHECK-LABEL: test_lt_2: | ||
; CHECK: pcmpgtd %xmm1, %xmm0 | ||
; CHECK-NOT: pxor | ||
; CHECK: retq | ||
entry: | ||
%cmp = icmp slt <4 x i32> %B, %A | ||
%sext = sext <4 x i1> %cmp to <4 x i32> | ||
%cmp1 = icmp slt <4 x i32> zeroinitializer, %sext | ||
%0 = extractelement <4 x i1> %cmp, i32 1 | ||
%1 = sext i1 %0 to i32 | ||
ret i32 %1 | ||
} | ||
|
||
define i32 @test_gt_2(<4 x i32> %A, <4 x i32> %B) { | ||
; CHECK-LABEL: test_gt_2: | ||
; CHECK: pcmpgtd %xmm1, %xmm0 | ||
; CHECK-NOT: pxor | ||
; CHECK: retq | ||
entry: | ||
%cmp = icmp slt <4 x i32> %B, %A | ||
%sext = sext <4 x i1> %cmp to <4 x i32> | ||
%cmp1 = icmp sgt <4 x i32> zeroinitializer, %sext | ||
%0 = extractelement <4 x i1> %cmp1, i32 1 | ||
%1 = sext i1 %0 to i32 | ||
ret i32 %1 | ||
} |