diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6571ddd7cb12b..fce26dd6c21f5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14442,15 +14442,23 @@ AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
   SDValue ShiftLHS = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not combine
-  // it with shift 'N' to let it be lowered to UBFX.
+  // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not
+  // combine it with shift 'N' to let it be lowered to UBFX except:
+  // ((x >> C) & mask) << C.
   if (ShiftLHS.getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
       isa<ConstantSDNode>(ShiftLHS.getOperand(1))) {
     uint64_t TruncMask = ShiftLHS.getConstantOperandVal(1);
-    if (isMask_64(TruncMask) &&
-        ShiftLHS.getOperand(0).getOpcode() == ISD::SRL &&
-        isa<ConstantSDNode>(ShiftLHS.getOperand(0).getOperand(1)))
-      return false;
+    if (isMask_64(TruncMask)) {
+      SDValue AndLHS = ShiftLHS.getOperand(0);
+      if (AndLHS.getOpcode() == ISD::SRL) {
+        if (auto *SRLC = dyn_cast<ConstantSDNode>(AndLHS.getOperand(1))) {
+          if (N->getOpcode() == ISD::SHL)
+            if (auto *SHLC = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+              return SRLC->getZExtValue() == SHLC->getZExtValue();
+          return false;
+        }
+      }
+    }
   }
   return true;
 }
diff --git a/llvm/test/CodeGen/AArch64/shift-logic.ll b/llvm/test/CodeGen/AArch64/shift-logic.ll
index af684bbb8aff7..be1ddccf901b8 100644
--- a/llvm/test/CodeGen/AArch64/shift-logic.ll
+++ b/llvm/test/CodeGen/AArch64/shift-logic.ll
@@ -151,3 +151,46 @@ define i32 @lshr_or_extra_use(i32 %x, i32 %y, i32* %p) nounwind {
   %sh1 = lshr i32 %r, 7
   ret i32 %sh1
 }
+
+define i64 @desirable_to_commute1(i64 %x) {
+; CHECK-LABEL: desirable_to_commute1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x0, x0, #0x7fff8
+; CHECK-NEXT:    ret
+  %s1 = lshr i64 %x, 3
+  %a = and i64 %s1, 65535
+  %s2 = shl i64 %a, 3
+  ret i64 %s2
+}
+
+define i64 @desirable_to_commute2(i64* %p, i64 %i) {
+; CHECK-LABEL: desirable_to_commute2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x8, x1, #0x1ff8
+; CHECK-NEXT:    ldr x0, [x0, x8]
+; CHECK-NEXT:    ret
+  %lshr = lshr i64 %i, 3
+  %and = and i64 %lshr, 1023
+  %pidx = getelementptr i64, i64* %p, i64 %and
+  %r = load i64, i64* %pidx
+  ret i64 %r
+}
+
+; Shrink demanded op will shrink the shl to i32,
+; Lshr and shl will have different shift amount type.
+; Compare apint will cause crash when type is different.
+define void @apint_type_mismatch(i16 %a, i32* %p) {
+; CHECK-LABEL: apint_type_mismatch:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and w8, w0, #0x7f8
+; CHECK-NEXT:    str w8, [x1]
+; CHECK-NEXT:    ret
+entry:
+  %lshr = lshr i16 %a, 3
+  %and = and i16 %lshr, 255
+  %zext = zext i16 %and to i64
+  %shl = shl i64 %zext, 3
+  %trunc = trunc i64 %shl to i32
+  store i32 %trunc, i32* %p
+  ret void
+}