diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 1efd57ea5e87a..9e491e726b418 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -307,7 +307,22 @@ def ARMLoopDec  : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>;
 // because that's what (MVE) VSTRH.16 followed by VLDRB.8 would do. So the
 // bitconvert would have to emit a VREV16.8 instruction, whereas the
 // VECTOR_REG_CAST emits no code at all if the vector is already in a register.
-def ARMVectorRegCast : SDNode<"ARMISD::VECTOR_REG_CAST", SDTUnaryOp>;
+def ARMVectorRegCastImpl : SDNode<"ARMISD::VECTOR_REG_CAST", SDTUnaryOp>;
+
+// In little-endian, VECTOR_REG_CAST is often turned into bitconvert during
+// lowering (because in that situation they're identical). So an isel pattern
+// that needs to match something that's _logically_ a VECTOR_REG_CAST must
+// _physically_ match a different node type depending on endianness.
+//
+// This 'PatFrags' instance is a centralized facility to make that easy. It
+// matches VECTOR_REG_CAST in either endianness, and also bitconvert in the
+// endianness where it's equivalent.
+def ARMVectorRegCast: PatFrags<
+    (ops node:$x), [(ARMVectorRegCastImpl node:$x), (bitconvert node:$x)], [{
+       // Reject a match against bitconvert (aka ISD::BITCAST) if big-endian
+       return !(CurDAG->getDataLayout().isBigEndian() &&
+                N->getOpcode() == ISD::BITCAST);
+    }]>;
 
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 0583a26dffeab..fffd86e0152b4 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -4013,9 +4013,15 @@ let Predicates = [HasMVEInt] in {
                 (VT  (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>;
   }
 
+  // Here we match the specific SDNode type 'ARMVectorRegCastImpl'
+  // rather than the more general 'ARMVectorRegCast' which would also
+  // match some bitconverts. If we use the latter in cases where the
+  // input and output types are the same, the bitconvert gets elided
+  // and we end up generating a nonsense match of nothing.
+
   foreach VT = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in
     foreach VT2 = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in
-      def : Pat<(VT (ARMVectorRegCast (VT2 MQPR:$src))), (VT MQPR:$src)>;
+      def : Pat<(VT (ARMVectorRegCastImpl (VT2 MQPR:$src))), (VT MQPR:$src)>;
 }
 
 // end of MVE compares
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index d0801b84682fb..19a901bfc811e 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -7532,13 +7532,19 @@ let Predicates = [IsBE,HasNEON] in {
 }
 
 let Predicates = [HasNEON] in {
+  // Here we match the specific SDNode type 'ARMVectorRegCastImpl'
+  // rather than the more general 'ARMVectorRegCast' which would also
+  // match some bitconverts. If we use the latter in cases where the
+  // input and output types are the same, the bitconvert gets elided
+  // and we end up generating a nonsense match of nothing.
+
   foreach VT = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in
     foreach VT2 = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in
-      def : Pat<(VT (ARMVectorRegCast (VT2 QPR:$src))), (VT QPR:$src)>;
+      def : Pat<(VT (ARMVectorRegCastImpl (VT2 QPR:$src))), (VT QPR:$src)>;
 
   foreach VT = [ v8i8, v4i16, v4f16, v2i32, v2f32, v1i64, f64 ] in
     foreach VT2 = [ v8i8, v4i16, v4f16, v2i32, v2f32, v1i64, f64 ] in
-      def : Pat<(VT (ARMVectorRegCast (VT2 DPR:$src))), (VT DPR:$src)>;
+      def : Pat<(VT (ARMVectorRegCastImpl (VT2 DPR:$src))), (VT DPR:$src)>;
 }
 
 // Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian