-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86] Transform (xor x, SIGN_BIT)
-> (add x, SIGN_BIT)
32 bit and smaller scalars
#83659
Conversation
… smaller scalars We can use `lea` for `(add x, SIGN_BIT)` for scalars below 32-bits which is preferable to `xor`. `lea` can both be fused with surrounding code more easily and save register moves.
@llvm/pr-subscribers-backend-x86 Author: None (goldsteinn) ChangesSIGN_BIT) Patch is 30.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83659.diff 9 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index aea046b119d49d..7fd53e740fc81b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -52128,7 +52128,19 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineBMILogicOp(N, DAG, Subtarget))
return R;
- return combineFneg(N, DAG, DCI, Subtarget);
+ if (SDValue R = combineFneg(N, DAG, DCI, Subtarget))
+ return R;
+
+ // (xor x, SIGN_BIT) == (add x, SIGN_BIT), since we can lower `add` with `lea`
+ // which can be fused with shift/eliminate register moves, its preferable.
+ if (VT.isScalarInteger() && VT.getScalarSizeInBits() <= 32) {
+ if (auto *C = dyn_cast<ConstantSDNode>(N1)) {
+ if (C->getAPIntValue().isSignMask())
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1);
+ }
+ }
+
+ return SDValue();
}
static SDValue combineBITREVERSE(SDNode *N, SelectionDAG &DAG,
diff --git a/llvm/test/CodeGen/X86/combine-fneg.ll b/llvm/test/CodeGen/X86/combine-fneg.ll
index e4a07348dc96c5..4a7712ef61fa55 100644
--- a/llvm/test/CodeGen/X86/combine-fneg.ll
+++ b/llvm/test/CodeGen/X86/combine-fneg.ll
@@ -124,9 +124,9 @@ define <2 x float> @fneg_bitcast(i64 %i) nounwind {
; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X86-SSE1-NEXT: movl 12(%ebp), %ecx
-; X86-SSE1-NEXT: xorl %eax, %ecx
+; X86-SSE1-NEXT: addl %eax, %ecx
; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: xorl 8(%ebp), %eax
+; X86-SSE1-NEXT: addl 8(%ebp), %eax
; X86-SSE1-NEXT: movl %eax, (%esp)
; X86-SSE1-NEXT: movaps (%esp), %xmm0
; X86-SSE1-NEXT: movl %ebp, %esp
@@ -137,9 +137,9 @@ define <2 x float> @fneg_bitcast(i64 %i) nounwind {
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: xorl %eax, %ecx
+; X86-SSE2-NEXT: addl %eax, %ecx
; X86-SSE2-NEXT: movd %ecx, %xmm1
-; X86-SSE2-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: movd %eax, %xmm0
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-SSE2-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
index 47bb0957f3fbb6..ef98f08f185efa 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
@@ -208,7 +208,7 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
-; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X86-SSE42-NEXT: addl $-32768, %eax ## imm = 0x8000
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
@@ -217,7 +217,7 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
; X86-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
-; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X86-AVX-NEXT: addl $-32768, %eax ## imm = 0x8000
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX-NEXT: retl
;
@@ -239,7 +239,7 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
-; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-SSE42-NEXT: addl $-32768, %eax ## imm = 0x8000
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
@@ -248,7 +248,7 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax
-; X64-AVX1OR2-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX1OR2-NEXT: addl $-32768, %eax ## imm = 0x8000
; X64-AVX1OR2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1OR2-NEXT: retq
;
@@ -257,7 +257,7 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
-; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX512-NEXT: addl $-32768, %eax ## imm = 0x8000
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -743,7 +743,7 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
-; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X86-SSE42-NEXT: addl $-32768, %eax ## imm = 0x8000
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
@@ -754,7 +754,7 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X86-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
-; X86-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X86-AVX1-NEXT: addl $-32768, %eax ## imm = 0x8000
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
@@ -766,7 +766,7 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X86-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
-; X86-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X86-AVX2-NEXT: addl $-32768, %eax ## imm = 0x8000
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
@@ -791,7 +791,7 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
-; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-SSE42-NEXT: addl $-32768, %eax ## imm = 0x8000
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
@@ -802,7 +802,7 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
-; X64-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX1-NEXT: addl $-32768, %eax ## imm = 0x8000
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
@@ -814,7 +814,7 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
-; X64-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX2-NEXT: addl $-32768, %eax ## imm = 0x8000
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@@ -826,7 +826,7 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
-; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX512-NEXT: addl $-32768, %eax ## imm = 0x8000
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
@@ -1500,7 +1500,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
-; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X86-SSE42-NEXT: addl $-32768, %eax ## imm = 0x8000
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
@@ -1514,7 +1514,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X86-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
-; X86-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X86-AVX1-NEXT: addl $-32768, %eax ## imm = 0x8000
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
@@ -1527,7 +1527,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X86-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
-; X86-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X86-AVX2-NEXT: addl $-32768, %eax ## imm = 0x8000
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
@@ -1556,7 +1556,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
-; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-SSE42-NEXT: addl $-32768, %eax ## imm = 0x8000
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
@@ -1570,7 +1570,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
-; X64-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX1-NEXT: addl $-32768, %eax ## imm = 0x8000
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
@@ -1583,7 +1583,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
-; X64-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX2-NEXT: addl $-32768, %eax ## imm = 0x8000
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@@ -1597,7 +1597,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
-; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX512-NEXT: addl $-32768, %eax ## imm = 0x8000
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
@@ -1869,7 +1869,7 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
-; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X86-SSE42-NEXT: addl $-32768, %eax ## imm = 0x8000
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
@@ -1878,7 +1878,7 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
; X86-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
-; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X86-AVX-NEXT: addl $-32768, %eax ## imm = 0x8000
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: retl
@@ -1901,7 +1901,7 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
-; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-SSE42-NEXT: addl $-32768, %eax ## imm = 0x8000
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
@@ -1910,7 +1910,7 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax
-; X64-AVX1OR2-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX1OR2-NEXT: addl $-32768, %eax ## imm = 0x8000
; X64-AVX1OR2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1OR2-NEXT: vzeroupper
; X64-AVX1OR2-NEXT: retq
@@ -1920,7 +1920,7 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
-; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX512-NEXT: addl $-32768, %eax ## imm = 0x8000
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
@@ -1956,7 +1956,7 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
-; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X86-SSE42-NEXT: addl $-32768, %eax ## imm = 0x8000
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
@@ -1965,7 +1965,7 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
; X86-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
-; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X86-AVX-NEXT: addl $-32768, %eax ## imm = 0x8000
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: retl
@@ -1988,7 +1988,7 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
-; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-SSE42-NEXT: addl $-32768, %eax ## imm = 0x8000
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
@@ -1997,7 +1997,7 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax
-; X64-AVX1OR2-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX1OR2-NEXT: addl $-32768, %eax ## imm = 0x8000
; X64-AVX1OR2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1OR2-NEXT: vzeroupper
; X64-AVX1OR2-NEXT: retq
@@ -2007,7 +2007,7 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
-; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX512-NEXT: addl $-32768, %eax ## imm = 0x8000
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll
index ec2323ac2250c7..0abf44262f99b4 100644
--- a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll
@@ -249,7 +249,7 @@ define i1 @is_inf_f80(x86_fp80 %x) {
; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-32-NEXT: notl %eax
; CHECK-32-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
-; CHECK-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; CHECK-32-NEXT: addl {{[0-9]+}}(%esp), %ecx
; CHECK-32-NEXT: andl $32767, %eax # imm = 0x7FFF
; CHECK-32-NEXT: orl {{[0-9]+}}(%esp), %eax
; CHECK-32-NEXT: orl %ecx, %eax
@@ -276,7 +276,7 @@ define i1 @is_posinf_f80(x86_fp80 %x) {
; CHECK-32: # %bb.0: # %entry
; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-32-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
-; CHECK-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; CHECK-32-NEXT: addl {{[0-9]+}}(%esp), %ecx
; CHECK-32-NEXT: xorl $32767, %eax # imm = 0x7FFF
; CHECK-32-NEXT: orl {{[0-9]+}}(%esp), %eax
; CHECK-32-NEXT: orl %ecx, %eax
@@ -303,7 +303,7 @@ define i1 @is_neginf_f80(x86_fp80 %x) {
; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-32-NEXT: xorl $65535, %eax # imm = 0xFFFF
; CHECK-32-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
-; CHECK-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; CHECK-32-NEXT: addl {{[0-9]+}}(%esp), %ecx
; CHECK-32-NEXT: orl {{[0-9]+}}(%esp), %eax
; CHECK-32-NEXT: orl %ecx, %eax
; CHECK-32-NEXT: sete %al
diff --git a/llvm/test/CodeGen/X86/sadd_sat.ll b/llvm/test/CodeGen/X86/sadd_sat.ll
index 5b9a42d1f0d91f..bec65b4bff60ec 100644
--- a/llvm/test/CodeGen/X86/sadd_sat.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat.ll
@@ -76,7 +76,7 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; X86-NEXT: addw %cx, %dx
; X86-NEXT: movswl %dx, %edx
; X86-NEXT: sarl $15, %edx
-; X86-NEXT: xorl $-32768, %edx # imm = 0x8000
+; X86-NEXT: addl $-32768, %edx # imm = 0x8000
; X86-NEXT: addw %cx, %ax
; X86-NEXT: cmovol %edx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
@@ -89,7 +89,7 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; X64-NEXT: leal (%rdi,%rsi), %eax
; X64-NEXT: cwtl
; X64-NEXT: sarl $15, %eax
-; X64-NEXT: xorl $-32768, %eax # imm = 0x8000
+; X64-NEXT: addl $-32768, %eax # imm = 0x8000
; X64-NEXT: addw %si, %di
; X64-NEXT: cmovnol %edi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
diff --git a/llvm/test/CodeGen/X86/sadd_sat_plus.ll b/llvm/test/CodeGen/X86/sadd_sat_plus.ll
index deabeb27cdab83..efa1fd80f898f1 100644
--- a/llvm/test/CodeGen/X86/sadd_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_plus.ll
@@ -80,7 +80,7 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw
; X86-NEXT: addw %cx, %dx
; X86-NEXT: movswl %dx, %edx
; X86-NEXT: sarl $15, %edx
-; X86-NEXT: xorl $-32768, %edx # imm = 0x8000
+; X86-NEXT: addl $-32768, %edx # imm = 0x8000
; X86-NEXT: addw %cx, %ax
; X86-NEXT: cmovol %edx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
@@ -94,7 +94,7 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw
; X64-NEXT: leal (%rdi,%rsi), %eax
; X64-NEXT: cwtl
; X64-NEXT: sarl $15, %eax
-; X64-NEXT: xorl $-32768, %eax # imm = 0x8000
+; X64-NEXT: addl $-32768, %eax # imm = 0x8000
; X64-NEXT: addw %si, %di
; X64-NEXT: cmovnol %edi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
index b2b242fa29818f..f7bead70c5d0f2 100644
--- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
@@ -469,7 +469,7 @@ define void @v1i16(ptr %px, ptr %py, ptr %pz) nounwind {
; SSE-NEXT: leal (%rax,%rcx), %esi
; SSE-NEXT: movswl %si, %esi
; SSE-NEXT: sarl $15, %esi
-; SSE-NEXT: xorl $-32768, %esi # imm = 0x8000
+; SSE-NEXT: addl $-32768, %esi # imm = 0x8000
; SSE-NEXT: addw %cx, %ax
; SSE-NEXT: cmovol %esi, %eax
; SSE-NEXT: movw %ax, (%rdx)
@@ -482,7 +482,7 @@ define void @v1i16(ptr %px, ptr %py, ptr %pz) nounwind {
; AVX-NEXT: leal (%rax,%rcx), %esi
; AVX-NEXT: movswl %si, %esi
; AVX-NEXT: sarl $15, %esi
-; AVX-NEXT: xorl $-32768, %esi # imm = 0x8000
+; AVX-NEXT: addl $-32768, %esi # imm = 0x8000
; AVX-NEXT: addw %cx, %ax
; AVX-NEXT: cmovol %esi, %eax
; AVX-NEXT: movw %ax, (%rdx)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smin.ll b/llvm/test/CodeGen/X86/vector-reduce-smin.ll
index bb87740c215382..22312e7f0f4991 100644
--- a/llvm/test/CodeGen/X86/vector-redu...
[truncated]
|
(xor x, SIGN_BIT)
-> (add x, SIGN_BIT)
32 bit and smaller scalars
; X86-NEXT: retl | ||
; | ||
; X64-LABEL: sub_xor_sminval_i8: | ||
; X64: # %bb.0: | ||
; X64-NEXT: # kill: def $edi killed $edi def $rdi | ||
; X64-NEXT: subb %sil, %dil |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this correct? leal -128(%rdi), %eax
may generate value of out i8
range.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Im not sure. The prior impl has the same issue, but the new code gen does not have the same high bits as before:
https://alive2.llvm.org/ce/z/FTCyUY
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Both new/old have same low 8-bits and may have different set high 24 bits. Different from eachother and different than if we had stuck with xor
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's not a problem to i8
but zeroext i8
.
We already do some of this in X86DAGToDAGISel::matchAddressRecursively - maybe see why these cases weren't working there? |
if (C->getAPIntValue().isSignMask()) | ||
return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use DAG.isADDLike ?
I see:
Which isn't quite the same, or is there another transform im missing? But it looks to me like whatever we are doing in |
I tested a patch that transforms |
Ahh, its a TD pattern:
|
I created: #83691, although all the cases caught here end up being missed. |
|
||
// (xor x, SIGN_BIT) == (add x, SIGN_BIT), since we can lower `add` with `lea` | ||
// which can be fused with shift/eliminate register moves, its preferable. | ||
if (VT.isScalarInteger() && VT.getScalarSizeInBits() <= 32) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't we already do this for ==32 and ==8 in tablegen? i16 is weird because its promoted before tablegen.
def : Pat<(xor GR8:$src1, -128),
(ADD8ri GR8:$src1, -128)>;
def : Pat<(xor GR16:$src1, -32768),
(ADD16ri GR16:$src1, -32768)>;
def : Pat<(xor GR32:$src1, -2147483648),
(ADD32ri GR32:$src1, -2147483648)>;
@@ -124,9 +124,9 @@ define <2 x float> @fneg_bitcast(i64 %i) nounwind { | |||
; X86-SSE1-NEXT: subl $16, %esp | |||
; X86-SSE1-NEXT: movl $-2147483648, %eax # imm = 0x80000000 | |||
; X86-SSE1-NEXT: movl 12(%ebp), %ecx | |||
; X86-SSE1-NEXT: xorl %eax, %ecx | |||
; X86-SSE1-NEXT: addl %eax, %ecx |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why didn't the load or the immediate fold into this instruction?
Closing in favor of #83691 |
We can use
lea
for(add x, SIGN_BIT)
for scalars below 32-bitswhich is preferable to
xor
.lea
can both be fused with surroundingcode more easily and save register moves.