diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2f8fe09c3dc98..3fdb9bf7e5171 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -26068,9 +26068,11 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
       // If the bitcast type isn't legal, it might be a trunc of a legal type;
       // look through the trunc so we can still do the transform:
       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
+      // However, this is only equivalent on little-endian targets.
       if (Scalar->getOpcode() == ISD::TRUNCATE &&
           !TLI.isTypeLegal(Scalar.getValueType()) &&
-          TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
+          TLI.isTypeLegal(Scalar->getOperand(0).getValueType()) &&
+          DAG.getDataLayout().isLittleEndian())
         Scalar = Scalar->getOperand(0);
 
       EVT SclTy = Scalar.getValueType();
diff --git a/llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll b/llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll
new file mode 100644
index 0000000000000..42d787d945145
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test that truncated scalars use the correct vector insert instruction.
+; On big-endian targets, concat_vectors should not skip truncates when
+; creating scalar_to_vector, as the bytes would be in the wrong position.
+
+; This truncated i16 should use vlvgh (insert halfword), not vlvgf (insert fullword).
+define <16 x i8> @test_concat_trunc_i16(i32 %x) {
+; CHECK-LABEL: test_concat_trunc_i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vlvgh %v24, %r2, 0
+; CHECK-NEXT:    br %r14
+  %t = trunc i32 %x to i16
+  %vec = bitcast i16 %t to <2 x i8>
+  %result = shufflevector <2 x i8> %vec, <2 x i8> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  ret <16 x i8> %result
+}
+
+; Test with a more complex shuffle pattern, reduced from a Rust bug report.
+define fastcc void @test_shuffle_with_trunc() {
+; CHECK-LABEL: test_shuffle_with_trunc:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lh %r1, 0
+; CHECK-NEXT:    l %r0, 0
+; CHECK-NEXT:    vlvgh %v1, %r1, 0
+; CHECK-NEXT:    larl %r1, .LCPI1_0
+; CHECK-NEXT:    vl %v2, 0(%r1), 3
+; CHECK-NEXT:    vlvgf %v0, %r0, 0
+; CHECK-NEXT:    vperm %v0, %v0, %v1, %v2
+; CHECK-NEXT:    vst %v0, 0, 3
+; CHECK-NEXT:    br %r14
+  %1 = load i32, ptr null, align 8
+  %2 = load i16, ptr null, align 1
+  br label %3
+
+3:
+  %4 = bitcast i32 %1 to <4 x i8>
+  %5 = shufflevector <4 x i8> %4, <4 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %6 = bitcast i16 %2 to <2 x i8>
+  %7 = shufflevector <2 x i8> %6, <2 x i8> zeroinitializer, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %8 = shufflevector <16 x i8> %5, <16 x i8> %7, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 25, i32 26, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  store <16 x i8> %8, ptr null, align 8
+  ret void
+}