Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1262,6 +1262,41 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
if (N1 == N00 || N1 == N01)
return N0;
}

// Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
// This allows the andn operation to be done in parallel with the xor
if (Opc == ISD::AND && TLI.hasAndNot(N1)) {
// Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c))
// Transform to: AND(XOR(Constant, a), AND(b, NOT(c)))

// Check if N1 is NOT(c) - i.e., XOR(c, -1)
if (N1.getOpcode() == ISD::XOR &&
DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
isAllOnesConstant(N1.getOperand(1))) {

// Check if one operand of N0 is XOR(Constant, a)
SDValue XorOp, OtherOp;
if (N00.getOpcode() == ISD::XOR) {
XorOp = N00;
OtherOp = N01;
} else if (N01.getOpcode() == ISD::XOR) {
XorOp = N01;
OtherOp = N00;
} else {
return SDValue();
}

// Check if XOR has a constant operand
if (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) ||
DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1))) {
// Transform: AND(AND(XOR(Constant, a), b), NOT(c))
// To: AND(XOR(Constant, a), AND(b, NOT(c)))
// This allows the andn (b & ~c) to be done in parallel with the xor
SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1);
return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
}
}
}
if (Opc == ISD::XOR) {
// (N00 ^ N01) ^ N00 --> N01
if (N1 == N00)
Expand Down Expand Up @@ -7463,6 +7498,67 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;

// Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
// This allows the andn operation to be done in parallel with the xor
if (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) {
// Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c))
// Transform to: AND(XOR(Constant, a), AND(b, NOT(c)))

// Handle both operand orders: N0=AND, N1=NOT and N0=NOT, N1=AND
SDValue AndOp, NotOp;
if (N0.getOpcode() == ISD::AND &&
N1.getOpcode() == ISD::XOR &&
DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
isAllOnesConstant(N1.getOperand(1))) {
AndOp = N0;
NotOp = N1;
} else if (N1.getOpcode() == ISD::AND &&
N0.getOpcode() == ISD::XOR &&
DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
isAllOnesConstant(N0.getOperand(1))) {
AndOp = N1;
NotOp = N0;
} else {
// Pattern doesn't match, continue to next optimization
}

// If we found a valid pattern, check if the AND node has one use
if (AndOp && NotOp && AndOp.hasOneUse()) {
SDValue AndOp0 = AndOp.getOperand(0);
SDValue AndOp1 = AndOp.getOperand(1);

// Check if one operand of AndOp is XOR(Constant, a)
SDValue XorOp, OtherOp;
if (AndOp0.getOpcode() == ISD::XOR) {
XorOp = AndOp0;
OtherOp = AndOp1;
} else if (AndOp1.getOpcode() == ISD::XOR) {
XorOp = AndOp1;
OtherOp = AndOp0;
} else {
// No XOR found in AND operands, continue to next optimization
}

// If we found XOR, check if it has a constant operand (and not all-ones constant to avoid NOT)
if (XorOp && OtherOp &&
((DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) &&
!isAllOnesConstant(XorOp.getOperand(0))) ||
(DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1)) &&
!isAllOnesConstant(XorOp.getOperand(1))))) {
// Prevent infinite loops: only apply if OtherOp is not also a NOT
if (!(OtherOp.getOpcode() == ISD::XOR &&
DAG.isConstantIntBuildVectorOrConstantInt(OtherOp.getOperand(1)) &&
isAllOnesConstant(OtherOp.getOperand(1)))) {
// Transform: AND(AND(XOR(Constant, a), b), NOT(c))
// To: AND(XOR(Constant, a), AND(b, NOT(c)))
// This allows the andn (b & ~c) to be done in parallel with the xor
SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);
return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
}
}
}
}

// reassociate and
if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
return RAND;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51541,6 +51541,7 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
return SDValue();
}


/// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available.
static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
SelectionDAG &DAG,
Expand Down
63 changes: 63 additions & 0 deletions llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s

; Test the optimization described in issue #161630:
; (Constant XOR a) & b & ~c should compile to allow andn to be done in parallel with xor

define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: test_constant_xor_and_andnot:
; CHECK: # %bb.0:
; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2
; CHECK-NEXT: andnq %rsi, %rdx, %rax
; CHECK-NEXT: andq %rdi, %rax
; CHECK-NEXT: retq
%xor = xor i64 %a, 1234
%and1 = and i64 %xor, %b
%not_c = xor i64 %c, -1
%result = and i64 %and1, %not_c
ret i64 %result
}

define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: test_constant_xor_and_andnot_32:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl $5678, %edi # imm = 0x162E
; CHECK-NEXT: andnl %esi, %edx, %eax
; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: retq
%xor = xor i32 %a, 5678
%and1 = and i32 %xor, %b
%not_c = xor i32 %c, -1
%result = and i32 %and1, %not_c
ret i32 %result
}

; Test with different operand order
define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: test_constant_xor_and_andnot_swapped:
; CHECK: # %bb.0:
; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2
; CHECK-NEXT: andnq %rsi, %rdx, %rax
; CHECK-NEXT: andq %rdi, %rax
; CHECK-NEXT: retq
%xor = xor i64 %a, 1234
%and1 = and i64 %b, %xor
%not_c = xor i64 %c, -1
%result = and i64 %and1, %not_c
ret i64 %result
}

; Test with different operand order for the final AND
define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: test_constant_xor_and_andnot_final_swapped:
; CHECK: # %bb.0:
; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2
; CHECK-NEXT: andnq %rsi, %rdx, %rax
; CHECK-NEXT: andq %rdi, %rax
; CHECK-NEXT: retq
%xor = xor i64 %a, 1234
%and1 = and i64 %xor, %b
%not_c = xor i64 %c, -1
%result = and i64 %not_c, %and1
ret i64 %result
}
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/pr108731.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ define i64 @test_i64(i64 %w, i64 %x, i64 %y, i64 %z) {
; BMI-LABEL: test_i64:
; BMI: # %bb.0: # %Entry
; BMI-NEXT: andq %rdx, %rsi
; BMI-NEXT: andnq %rdi, %rsi, %rax
; BMI-NEXT: andnq %rcx, %rdx, %rcx
; BMI-NEXT: andnq %rax, %rcx, %rax
; BMI-NEXT: andnq %rcx, %rdx, %rax
; BMI-NEXT: andnq %rdi, %rax, %rax
; BMI-NEXT: andnq %rax, %rsi, %rax
; BMI-NEXT: retq
Entry:
%and1 = and i64 %y, %x
Expand All @@ -46,9 +46,9 @@ define i32 @test_i32(i32 %w, i32 %x, i32 %y, i32 %z) {
; BMI-LABEL: test_i32:
; BMI: # %bb.0: # %Entry
; BMI-NEXT: andl %edx, %esi
; BMI-NEXT: andnl %edi, %esi, %eax
; BMI-NEXT: andnl %ecx, %edx, %ecx
; BMI-NEXT: andnl %eax, %ecx, %eax
; BMI-NEXT: andnl %ecx, %edx, %eax
; BMI-NEXT: andnl %edi, %eax, %eax
; BMI-NEXT: andnl %eax, %esi, %eax
; BMI-NEXT: retq
Entry:
%and1 = and i32 %y, %x
Expand Down