diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c372919f44f709..9666d71288a349 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -52606,7 +52606,8 @@ static SDValue combineAddOfPMADDWD(SelectionDAG &DAG, SDValue N0, SDValue N1, /// Try to fold those constants into an 'add' instruction to reduce instruction /// count. We do this with CMOV rather the generic 'select' because there are /// earlier folds that may be used to turn select-of-constants into logic hacks. -static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) { +static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { // If an operand is zero, add-of-0 gets simplified away, so that's clearly // better because we eliminate 1-2 instructions. This transform is still // an improvement without zero operands because we trade 2 move constants and @@ -52631,6 +52632,11 @@ static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) { if (!isSuitableCmov(Cmov)) return SDValue(); + // Don't remove a load folding opportunity for the add. That would neutralize + // any improvements from removing constant materializations. + if (X86::mayFoldLoad(OtherOp, Subtarget)) + return SDValue(); + EVT VT = N->getValueType(0); SDLoc DL(N); SDValue FalseOp = Cmov.getOperand(0); @@ -52673,7 +52679,7 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, SDValue Op1 = N->getOperand(1); SDLoc DL(N); - if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG)) + if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG, Subtarget)) return Select; if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, DL, VT, Subtarget)) diff --git a/llvm/test/CodeGen/X86/add-cmov.ll b/llvm/test/CodeGen/X86/add-cmov.ll index a47cad269da967..492feff344152b 100644 --- a/llvm/test/CodeGen/X86/add-cmov.ll +++ b/llvm/test/CodeGen/X86/add-cmov.ll @@ -477,12 +477,11 @@ define void @complex_lea_alt8(i1 %b, i16* readnone %ptr, i64 %idx) { define i32 @loadfold_select_const_arms(i32* %x, i1 %y) { ; CHECK-LABEL: loadfold_select_const_arms: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: leal -10(%rax), %ecx -; CHECK-NEXT: addl $10, %eax ; CHECK-NEXT: testb $1, %sil -; CHECK-NEXT: cmovel %ecx, %eax -; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: movl $10, %ecx +; CHECK-NEXT: movl $-10, %eax +; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: addl (%rdi), %eax ; CHECK-NEXT: retq %cond = select i1 %y, i32 10, i32 -10 %t0 = load i32, i32* %x, align 4 @@ -522,12 +521,11 @@ define void @rmw_add_select_const_arm(i32* %x, i1 %y, i32 %z) { define void @rmw_select_const_arms(i32* %x, i1 %y) { ; CHECK-LABEL: rmw_select_const_arms: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: leal -10(%rax), %ecx -; CHECK-NEXT: addl $10, %eax ; CHECK-NEXT: testb $1, %sil -; CHECK-NEXT: cmovel %ecx, %eax -; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: movl $10, %eax +; CHECK-NEXT: movl $-10, %ecx +; CHECK-NEXT: cmovnel %eax, %ecx +; CHECK-NEXT: addl %ecx, (%rdi) ; CHECK-NEXT: retq %cond = select i1 %y, i32 10, i32 -10 %t0 = load i32, i32* %x, align 4 @@ -557,13 +555,12 @@ define i32 @rmw_select_const_arms_extra_load_use(i32* %x, i1 %y) { define i32 @rmw_select_const_arms_extra_add_use(i32* %x, i1 %y) { ; CHECK-LABEL: rmw_select_const_arms_extra_add_use: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: leal -10(%rax), %ecx -; CHECK-NEXT: addl $10, %eax ; CHECK-NEXT: testb $1, %sil -; CHECK-NEXT: cmovel %ecx, %eax +; CHECK-NEXT: movl $10, %ecx +; CHECK-NEXT: movl $-10, %eax +; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: addl (%rdi), %eax ; CHECK-NEXT: movl %eax, (%rdi) -; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: retq %cond = select i1 %y, i32 10, i32 -10 %t0 = load i32, i32* %x, align 4