diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c372919f44f709..9666d71288a349 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -52606,7 +52606,8 @@ static SDValue combineAddOfPMADDWD(SelectionDAG &DAG, SDValue N0, SDValue N1,
 /// Try to fold those constants into an 'add' instruction to reduce instruction
 /// count. We do this with CMOV rather the generic 'select' because there are
 /// earlier folds that may be used to turn select-of-constants into logic hacks.
-static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) {
+static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG,
+                                       const X86Subtarget &Subtarget) {
   // If an operand is zero, add-of-0 gets simplified away, so that's clearly
   // better because we eliminate 1-2 instructions. This transform is still
   // an improvement without zero operands because we trade 2 move constants and
@@ -52631,6 +52632,11 @@ static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) {
   if (!isSuitableCmov(Cmov))
     return SDValue();
 
+  // Don't remove a load folding opportunity for the add. That would neutralize
+  // any improvements from removing constant materializations.
+  if (X86::mayFoldLoad(OtherOp, Subtarget))
+    return SDValue();
+
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
   SDValue FalseOp = Cmov.getOperand(0);
@@ -52673,7 +52679,7 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
   SDValue Op1 = N->getOperand(1);
   SDLoc DL(N);
 
-  if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG))
+  if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG, Subtarget))
     return Select;
 
   if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, DL, VT, Subtarget))
diff --git a/llvm/test/CodeGen/X86/add-cmov.ll b/llvm/test/CodeGen/X86/add-cmov.ll
index a47cad269da967..492feff344152b 100644
--- a/llvm/test/CodeGen/X86/add-cmov.ll
+++ b/llvm/test/CodeGen/X86/add-cmov.ll
@@ -477,12 +477,11 @@ define void @complex_lea_alt8(i1 %b, i16* readnone %ptr, i64 %idx) {
 define i32 @loadfold_select_const_arms(i32* %x, i1 %y) {
 ; CHECK-LABEL: loadfold_select_const_arms:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    leal -10(%rax), %ecx
-; CHECK-NEXT:    addl $10, %eax
 ; CHECK-NEXT:    testb $1, %sil
-; CHECK-NEXT:    cmovel %ecx, %eax
-; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    movl $10, %ecx
+; CHECK-NEXT:    movl $-10, %eax
+; CHECK-NEXT:    cmovnel %ecx, %eax
+; CHECK-NEXT:    addl (%rdi), %eax
 ; CHECK-NEXT:    retq
   %cond = select i1 %y, i32 10, i32 -10
   %t0 = load i32, i32* %x, align 4
@@ -522,12 +521,11 @@ define void @rmw_add_select_const_arm(i32* %x, i1 %y, i32 %z) {
 define void @rmw_select_const_arms(i32* %x, i1 %y) {
 ; CHECK-LABEL: rmw_select_const_arms:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    leal -10(%rax), %ecx
-; CHECK-NEXT:    addl $10, %eax
 ; CHECK-NEXT:    testb $1, %sil
-; CHECK-NEXT:    cmovel %ecx, %eax
-; CHECK-NEXT:    movl %eax, (%rdi)
+; CHECK-NEXT:    movl $10, %eax
+; CHECK-NEXT:    movl $-10, %ecx
+; CHECK-NEXT:    cmovnel %eax, %ecx
+; CHECK-NEXT:    addl %ecx, (%rdi)
 ; CHECK-NEXT:    retq
   %cond = select i1 %y, i32 10, i32 -10
   %t0 = load i32, i32* %x, align 4
@@ -557,13 +555,12 @@ define i32 @rmw_select_const_arms_extra_load_use(i32* %x, i1 %y) {
 define i32 @rmw_select_const_arms_extra_add_use(i32* %x, i1 %y) {
 ; CHECK-LABEL: rmw_select_const_arms_extra_add_use:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    leal -10(%rax), %ecx
-; CHECK-NEXT:    addl $10, %eax
 ; CHECK-NEXT:    testb $1, %sil
-; CHECK-NEXT:    cmovel %ecx, %eax
+; CHECK-NEXT:    movl $10, %ecx
+; CHECK-NEXT:    movl $-10, %eax
+; CHECK-NEXT:    cmovnel %ecx, %eax
+; CHECK-NEXT:    addl (%rdi), %eax
 ; CHECK-NEXT:    movl %eax, (%rdi)
-; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-NEXT:    retq
   %cond = select i1 %y, i32 10, i32 -10
   %t0 = load i32, i32* %x, align 4