diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 5a43b8b20db9c5..f26c194d31b9c2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1113,10 +1113,27 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
   // replacement cycle.
   Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1);
   if (TrueVal != CmpLHS &&
-      isGuaranteedNotToBeUndefOrPoison(CmpRHS, SQ.AC, &Sel, &DT))
+      isGuaranteedNotToBeUndefOrPoison(CmpRHS, SQ.AC, &Sel, &DT)) {
     if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, SQ,
                                           /* AllowRefinement */ true))
       return replaceOperand(Sel, Swapped ? 2 : 1, V);
+
+    // Even if TrueVal does not simplify, we can directly replace a use of
+    // CmpLHS with CmpRHS, as long as the instruction is not used anywhere
+    // else and is safe to speculatively execute (we may end up executing it
+    // with different operands, which should not cause side-effects or trigger
+    // undefined behavior). Only do this if CmpRHS is a constant, as
+    // profitability is not clear for other cases.
+    // FIXME: The replacement could be performed recursively.
+    if (match(CmpRHS, m_ImmConstant()) && !match(CmpLHS, m_ImmConstant()))
+      if (auto *I = dyn_cast<Instruction>(TrueVal))
+        if (I->hasOneUse() && isSafeToSpeculativelyExecute(I))
+          for (Use &U : I->operands())
+            if (U == CmpLHS) {
+              replaceUse(U, CmpRHS);
+              return &Sel;
+            }
+  }
   if (TrueVal != CmpRHS &&
       isGuaranteedNotToBeUndefOrPoison(CmpLHS, SQ.AC, &Sel, &DT))
     if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, SQ,
diff --git a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll
index 99670c4cc1d3b5..bbf7456ae81199 100644
--- a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll
@@ -502,7 +502,7 @@ define i32 @select_xor_icmp_bad_2(i32 %x, i32 %y, i32 %z, i32 %k) {
 define i32 @select_xor_icmp_bad_3(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_xor_icmp_bad_3(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 3
-; CHECK-NEXT:    [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[Z:%.*]], 3
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -541,7 +541,7 @@ define i32 @select_xor_icmp_bad_5(i32 %x, i32 %y, i32 %z) {
 define i32 @select_xor_icmp_bad_6(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_xor_icmp_bad_6(
 ; CHECK-NEXT:    [[A_NOT:%.*]] = icmp eq i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[Z:%.*]], 1
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A_NOT]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -554,7 +554,7 @@ define i32 @select_xor_icmp_bad_6(i32 %x, i32 %y, i32 %z) {
 define <2 x i8> @select_xor_icmp_vec_bad(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
 ; CHECK-LABEL: @select_xor_icmp_vec_bad(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 5, i8 3>
-; CHECK-NEXT:    [[B:%.*]] = xor <2 x i8> [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = xor <2 x i8> [[Z:%.*]], <i8 5, i8 3>
 ; CHECK-NEXT:    [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[B]], <2 x i8> [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[C]]
 ;
@@ -581,7 +581,7 @@ define <2 x i8> @select_xor_icmp_vec_undef(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z
 define i32 @select_mul_icmp_bad(i32 %x, i32 %y, i32 %z, i32 %k) {
 ; CHECK-LABEL: @select_mul_icmp_bad(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 3
-; CHECK-NEXT:    [[B:%.*]] = mul i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = mul i32 [[Z:%.*]], 3
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -594,7 +594,7 @@ define i32 @select_mul_icmp_bad(i32 %x, i32 %y, i32 %z, i32 %k) {
 define i32 @select_add_icmp_bad(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_add_icmp_bad(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[B:%.*]] = add i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = add i32 [[Z:%.*]], 1
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -619,7 +619,7 @@ define i32 @select_and_icmp_zero(i32 %x, i32 %y, i32 %z) {
 define i32 @select_or_icmp_bad(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_or_icmp_bad(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 3
-; CHECK-NEXT:    [[B:%.*]] = or i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = or i32 [[Z:%.*]], 3
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -940,7 +940,7 @@ define float @select_fsub_fcmp_bad_2(float %x, float %y, float %z) {
 define i32 @select_sub_icmp_bad(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_sub_icmp_bad(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK-NEXT:    [[B:%.*]] = sub i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = sub i32 0, [[Z:%.*]]
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -953,7 +953,7 @@ define i32 @select_sub_icmp_bad(i32 %x, i32 %y, i32 %z) {
 define i32 @select_sub_icmp_bad_2(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_sub_icmp_bad_2(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[B:%.*]] = sub i32 [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[B:%.*]] = add i32 [[Z:%.*]], -1
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -1022,7 +1022,7 @@ define i32 @select_sub_icmp_bad_5(i32 %x, i32 %y, i32 %z, i32 %k) {
 define i32 @select_shl_icmp_bad(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_shl_icmp_bad(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[B:%.*]] = shl i32 [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[B:%.*]] = shl i32 [[Z:%.*]], 1
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -1035,7 +1035,7 @@ define i32 @select_shl_icmp_bad(i32 %x, i32 %y, i32 %z) {
 define i32 @select_lshr_icmp_bad(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_lshr_icmp_bad(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[B:%.*]] = lshr i32 [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[B:%.*]] = lshr i32 [[Z:%.*]], 1
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -1048,7 +1048,7 @@ define i32 @select_lshr_icmp_bad(i32 %x, i32 %y, i32 %z) {
 define i32 @select_ashr_icmp_bad(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_ashr_icmp_bad(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[B:%.*]] = ashr i32 [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[B:%.*]] = ashr i32 [[Z:%.*]], 1
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -1084,10 +1084,11 @@ define i32 @select_sdiv_icmp_bad(i32 %x, i32 %y, i32 %z) {
   ret i32 %C
 }
 
+; Can replace %x with 0, because sub is only used in the select.
 define i32 @select_replace_one_use(i32 %x, i32 %y) {
 ; CHECK-LABEL: @select_replace_one_use(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 0, [[Y:%.*]]
 ; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[SUB]], i32 [[Y]]
 ; CHECK-NEXT:    ret i32 [[S]]
 ;
@@ -1097,6 +1098,7 @@ define i32 @select_replace_one_use(i32 %x, i32 %y) {
   ret i32 %s
 }
 
+; Can not replace %x with 0, because %sub has other uses as well.
 define i32 @select_replace_multi_use(i32 %x, i32 %y) {
 ; CHECK-LABEL: @select_replace_multi_use(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 0
@@ -1112,11 +1114,11 @@ define i32 @select_replace_multi_use(i32 %x, i32 %y) {
   ret i32 %s
 }
 
+; Case where the replacement allows the instruction to fold away.
 define i32 @select_replace_fold(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_replace_fold(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK-NEXT:    [[FSHR:%.*]] = call i32 @llvm.fshr.i32(i32 [[Y:%.*]], i32 [[Z:%.*]], i32 [[X]])
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[FSHR]], i32 [[Y]]
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[Z:%.*]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[S]]
 ;
   %c = icmp eq i32 %x, 0
@@ -1125,6 +1127,9 @@ define i32 @select_replace_fold(i32 %x, i32 %y, i32 %z) {
   ret i32 %s
 }
 
+
+; Case where the use of %x is in a nested instruction.
+; FIXME: We only perform replacements one level up right now.
 define i32 @select_replace_nested(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_replace_nested(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 0
@@ -1140,6 +1145,9 @@ define i32 @select_replace_nested(i32 %x, i32 %y, i32 %z) {
   ret i32 %s
 }
 
+; Do not replace with constant expressions. The profitability in this case is
+; unclear, and such replacements have historically lead to infinite combine
+; loops.
 define i32 @select_replace_constexpr(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_replace_constexpr(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], ptrtoint (i32* @g to i32)
@@ -1153,6 +1161,8 @@ define i32 @select_replace_constexpr(i32 %x, i32 %y, i32 %z) {
   ret i32 %s
 }
 
+; Don't replace with a potentially undef constant, as undef could evaluate
+; to different values for both uses.
 define <2 x i32> @select_replace_undef(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @select_replace_undef(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 0, i32 undef>
@@ -1166,10 +1176,11 @@ define <2 x i32> @select_replace_undef(<2 x i32> %x, <2 x i32> %y) {
   ret <2 x i32> %s
 }
 
+; We can replace the call arguments, as the call is speculatable.
 define i32 @select_replace_call_speculatable(i32 %x, i32 %y) {
 ; CHECK-LABEL: @select_replace_call_speculatable(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @call_speculatable(i32 [[X]], i32 [[X]])
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @call_speculatable(i32 0, i32 0)
 ; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[CALL]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[S]]
 ;
@@ -1179,6 +1190,8 @@ define i32 @select_replace_call_speculatable(i32 %x, i32 %y) {
   ret i32 %s
 }
 
+; We can't replace the call arguments, as the call is not speculatable. We
+; may end up changing side-effects or causing undefined behavior.
 define i32 @select_replace_call_non_speculatable(i32 %x, i32 %y) {
 ; CHECK-LABEL: @select_replace_call_non_speculatable(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 0
@@ -1192,6 +1205,8 @@ define i32 @select_replace_call_non_speculatable(i32 %x, i32 %y) {
   ret i32 %s
 }
 
+; We can replace %x by 2 here, because division by two cannot cause UB.
+; FIXME: As we check speculation prior to replacement, we don't catch this.
 define i32 @select_replace_sdiv_speculatable(i32 %x, i32 %y) {
 ; CHECK-LABEL: @select_replace_sdiv_speculatable(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 2
@@ -1205,6 +1220,7 @@ define i32 @select_replace_sdiv_speculatable(i32 %x, i32 %y) {
   ret i32 %s
 }
 
+; We cannot replace %x by -1, because division by -1 can cause UB.
 define i32 @select_replace_sdiv_non_speculatable(i32 %x, i32 %y) {
 ; CHECK-LABEL: @select_replace_sdiv_non_speculatable(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], -1
@@ -1218,6 +1234,8 @@ define i32 @select_replace_sdiv_non_speculatable(i32 %x, i32 %y) {
   ret i32 %s
 }
 
+; We can replace %x by 2 here, because division by two cannot cause UB.
+; FIXME: As we check speculation prior to replacement, we don't catch this.
 define i32 @select_replace_udiv_speculatable(i32 %x, i32 %y) {
 ; CHECK-LABEL: @select_replace_udiv_speculatable(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 2
@@ -1231,6 +1249,8 @@ define i32 @select_replace_udiv_speculatable(i32 %x, i32 %y) {
   ret i32 %s
 }
 
+; We can't replace %x by 0 here, because that would cause UB. However,
+; replacing the udiv result by poisong is fine.
 define i32 @select_replace_udiv_non_speculatable(i32 %x, i32 %y) {
 ; CHECK-LABEL: @select_replace_udiv_non_speculatable(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 0
@@ -1243,6 +1263,8 @@ define i32 @select_replace_udiv_non_speculatable(i32 %x, i32 %y) {
   ret i32 %s
 }
 
+; We can't replace %i in the phi node here, because it refers to %i from
+; the previous loop iteration, not the current one.
 define void @select_replace_phi(i32 %x) {
 ; CHECK-LABEL: @select_replace_phi(
 ; CHECK-NEXT:  entry:
diff --git a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
index 4bdf516d73f02c..35f100302d47b2 100644
--- a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
+++ b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
@@ -17,7 +17,7 @@ define i1 @cond_eq_and(i8 %X, i8 %Y, i8 noundef %C) {
 define i1 @cond_eq_and_const(i8 %X, i8 %Y) {
 ; CHECK-LABEL: @cond_eq_and_const(
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[X:%.*]], 10
-; CHECK-NEXT:    [[LHS:%.*]] = icmp ult i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[LHS:%.*]] = icmp ugt i8 [[Y:%.*]], 10
 ; CHECK-NEXT:    [[RES:%.*]] = select i1 [[COND]], i1 [[LHS]], i1 false
 ; CHECK-NEXT:    ret i1 [[RES]]
 ;
@@ -43,7 +43,7 @@ define i1 @cond_eq_or(i8 %X, i8 %Y, i8 noundef %C) {
 define i1 @cond_eq_or_const(i8 %X, i8 %Y) {
 ; CHECK-LABEL: @cond_eq_or_const(
 ; CHECK-NEXT:    [[COND:%.*]] = icmp ne i8 [[X:%.*]], 10
-; CHECK-NEXT:    [[LHS:%.*]] = icmp ult i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[LHS:%.*]] = icmp ugt i8 [[Y:%.*]], 10
 ; CHECK-NEXT:    [[RES:%.*]] = select i1 [[COND]], i1 true, i1 [[LHS]]
 ; CHECK-NEXT:    ret i1 [[RES]]
 ;