diff --git a/clang/include/clang/AST/OperationKinds.def b/clang/include/clang/AST/OperationKinds.def
index c2dca895e8411..8a13ad988403b 100644
--- a/clang/include/clang/AST/OperationKinds.def
+++ b/clang/include/clang/AST/OperationKinds.def
@@ -364,6 +364,9 @@ CAST_OPERATION(IntToOCLSampler)
 // Truncate a vector type by dropping elements from the end (HLSL only).
 CAST_OPERATION(HLSLVectorTruncation)
 
+// Truncate a matrix type by dropping elements from the end (HLSL only).
+CAST_OPERATION(HLSLMatrixTruncation)
+
 // Non-decaying array RValue cast (HLSL only).
 CAST_OPERATION(HLSLArrayRValue)
 
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index a6e60fe4692ee..063204b540461 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -4338,6 +4338,9 @@ def warn_param_typestate_mismatch : Warning<
 def warn_unknown_sanitizer_ignored : Warning<
   "unknown sanitizer '%0' ignored">, InGroup<UnknownSanitizers>;
 
+def warn_impcast_matrix_scalar : Warning<
+  "implicit conversion turns matrix to scalar: %0 to %1">,
+  InGroup<Conversion>, DefaultIgnore;
 def warn_impcast_vector_scalar : Warning<
   "implicit conversion turns vector to scalar: %0 to %1">,
   InGroup<Conversion>, DefaultIgnore;
@@ -13245,6 +13248,9 @@ def err_hlsl_builtin_scalar_vector_mismatch
 def warn_hlsl_impcast_vector_truncation : Warning<
   "implicit conversion truncates vector: %0 to %1">, InGroup<Conversion>;
 
+def warn_hlsl_impcast_matrix_truncation : Warning<
+  "implicit conversion truncates matrix: %0 to %1">, InGroup<Conversion>;
+
 def warn_hlsl_availability : Warning<
   "%0 is only available %select{|in %4 environment }3on %1 %2 or newer">,
   InGroup<HLSLAvailability>, DefaultError;
diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h
index 59bbd0fbd9e95..ab45328ee8ab7 100644
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -198,6 +198,9 @@ class Sema;
     /// HLSL vector truncation.
     ICK_HLSL_Vector_Truncation,
 
+    /// HLSL Matrix truncation.
+    ICK_HLSL_Matrix_Truncation,
+
     /// HLSL non-decaying array rvalue cast.
     ICK_HLSL_Array_RValue,
 
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 1d914fa876759..159ea4867857d 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -1937,6 +1937,7 @@ bool CastExpr::CastConsistency() const {
   case CK_FixedPointToBoolean:
   case CK_HLSLArrayRValue:
   case CK_HLSLVectorTruncation:
+  case CK_HLSLMatrixTruncation:
   case CK_HLSLElementwiseCast:
   case CK_HLSLAggregateSplatCast:
   CheckNoBasePath:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 74f6e3acb6b39..44f82cd9b8168 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11773,6 +11773,10 @@ bool VectorExprEvaluator::VisitCastExpr(const CastExpr *E) {
       Elements.push_back(Val.getVectorElt(I));
     return Success(Elements, E);
   }
+  case CK_HLSLMatrixTruncation: {
+    // TODO: See #168935. Add matrix truncation support to expr constant.
+    return Error(E);
+  }
   case CK_HLSLAggregateSplatCast: {
     APValue Val;
     QualType ValTy;
@@ -18011,6 +18015,10 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) {
       return Error(E);
     return Success(Val.getVectorElt(0), E);
   }
+  case CK_HLSLMatrixTruncation: {
+    // TODO: See #168935. Add matrix truncation support to expr constant.
+    return Error(E);
+  }
   case CK_HLSLElementwiseCast: {
     SmallVector<APValue> SrcVals;
     SmallVector<QualType> SrcTypes;
@@ -18604,6 +18612,10 @@ bool FloatExprEvaluator::VisitCastExpr(const CastExpr *E) {
       return Error(E);
     return Success(Val.getVectorElt(0), E);
   }
+  case CK_HLSLMatrixTruncation: {
+    // TODO: See #168935. Add matrix truncation support to expr constant.
+    return Error(E);
+  }
   case CK_HLSLElementwiseCast: {
     SmallVector<APValue> SrcVals;
     SmallVector<QualType> SrcTypes;
@@ -18761,6 +18773,7 @@ bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) {
   case CK_IntegralToFixedPoint:
   case CK_MatrixCast:
   case CK_HLSLVectorTruncation:
+  case CK_HLSLMatrixTruncation:
   case CK_HLSLElementwiseCast:
   case CK_HLSLAggregateSplatCast:
     llvm_unreachable("invalid cast kind for complex value");
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 8607558c1cf7d..abfbca16cd60b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -188,6 +188,7 @@ Address CIRGenFunction::emitPointerWithAlignment(const Expr *expr,
     case CK_HLSLArrayRValue:
     case CK_HLSLElementwiseCast:
     case CK_HLSLVectorTruncation:
+    case CK_HLSLMatrixTruncation:
     case CK_IntToOCLSampler:
     case CK_IntegralCast:
     case CK_IntegralComplexCast:
@@ -1279,6 +1280,7 @@ LValue CIRGenFunction::emitCastLValue(const CastExpr *e) {
   case CK_IntegralToFixedPoint:
   case CK_MatrixCast:
   case CK_HLSLVectorTruncation:
+  case CK_HLSLMatrixTruncation:
   case CK_HLSLArrayRValue:
   case CK_HLSLElementwiseCast:
   case CK_HLSLAggregateSplatCast:
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
index 9ed920085c8c6..fe06f8cc2c430 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
@@ -534,6 +534,7 @@ mlir::Value ComplexExprEmitter::emitCast(CastKind ck, Expr *op,
   case CK_IntegralToFixedPoint:
   case CK_MatrixCast:
   case CK_HLSLVectorTruncation:
+  case CK_HLSLMatrixTruncation:
   case CK_HLSLArrayRValue:
   case CK_HLSLElementwiseCast:
   case CK_HLSLAggregateSplatCast:
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
index 6af87a0159f0a..7ce02f9b42af4 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
@@ -1012,6 +1012,7 @@ class ConstExprEmitter
     case CK_MatrixCast:
     case CK_HLSLArrayRValue:
     case CK_HLSLVectorTruncation:
+    case CK_HLSLMatrixTruncation:
     case CK_HLSLElementwiseCast:
     case CK_HLSLAggregateSplatCast:
       return {};
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index f2451b16e78be..1737301c67021 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5744,6 +5744,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
   case CK_IntegralToFixedPoint:
   case CK_MatrixCast:
   case CK_HLSLVectorTruncation:
+  case CK_HLSLMatrixTruncation:
   case CK_HLSLArrayRValue:
   case CK_HLSLElementwiseCast:
   case CK_HLSLAggregateSplatCast:
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index 67b5f919d1b2a..7cc4d6c8f06f6 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -1036,7 +1036,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
   case CK_ZeroToOCLOpaqueType:
   case CK_MatrixCast:
   case CK_HLSLVectorTruncation:
-
+  case CK_HLSLMatrixTruncation:
   case CK_IntToOCLSampler:
   case CK_FloatingToFixedPoint:
   case CK_FixedPointToFloating:
@@ -1550,6 +1550,7 @@ static bool castPreservesZero(const CastExpr *CE) {
   case CK_NonAtomicToAtomic:
   case CK_AtomicToNonAtomic:
   case CK_HLSLVectorTruncation:
+  case CK_HLSLMatrixTruncation:
   case CK_HLSLElementwiseCast:
   case CK_HLSLAggregateSplatCast:
     return true;
diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp
index f8a946a76554a..e6683d4c931b8 100644
--- a/clang/lib/CodeGen/CGExprComplex.cpp
+++ b/clang/lib/CodeGen/CGExprComplex.cpp
@@ -621,6 +621,7 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op,
   case CK_IntegralToFixedPoint:
   case CK_MatrixCast:
   case CK_HLSLVectorTruncation:
+  case CK_HLSLMatrixTruncation:
   case CK_HLSLArrayRValue:
   case CK_HLSLElementwiseCast:
   case CK_HLSLAggregateSplatCast:
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index 6407afc3d9447..0eec4dba4824a 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -1333,6 +1333,7 @@ class ConstExprEmitter
     case CK_ZeroToOCLOpaqueType:
     case CK_MatrixCast:
     case CK_HLSLVectorTruncation:
+    case CK_HLSLMatrixTruncation:
     case CK_HLSLArrayRValue:
     case CK_HLSLElementwiseCast:
     case CK_HLSLAggregateSplatCast:
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 714192db1b15c..a9e2ebdffa59a 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2422,9 +2422,27 @@ static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue SrcVal,
     }
     return V;
   }
+  if (auto *MatTy = DestTy->getAs<ConstantMatrixType>()) {
+    assert(LoadList.size() >= MatTy->getNumElementsFlattened() &&
+           "Flattened type on RHS must have the same number or more elements "
+           "than vector on LHS.");
+    llvm::Value *V =
+        CGF.Builder.CreateLoad(CGF.CreateIRTemp(DestTy, "flatcast.tmp"));
+    // write to V.
+    for (unsigned I = 0, E = MatTy->getNumElementsFlattened(); I < E; I++) {
+      RValue RVal = CGF.EmitLoadOfLValue(LoadList[I], Loc);
+      assert(RVal.isScalar() &&
+             "All flattened source values should be scalars.");
+      llvm::Value *Cast =
+          CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[I].getType(),
+                                   MatTy->getElementType(), Loc);
+      V = CGF.Builder.CreateInsertElement(V, Cast, I);
+    }
+    return V;
+  }
   // if its a builtin just do an extract element or load.
   assert(DestTy->isBuiltinType() &&
-         "Destination type must be a vector or builtin type.");
+         "Destination type must be a vector, matrix, or builtin type.");
   RValue RVal = CGF.EmitLoadOfLValue(LoadList[0], Loc);
   assert(RVal.isScalar() && "All flattened source values should be scalars.");
   return CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[0].getType(),
@@ -2954,6 +2972,21 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     llvm::Value *Zero = llvm::Constant::getNullValue(CGF.SizeTy);
     return Builder.CreateExtractElement(Vec, Zero, "cast.vtrunc");
   }
+  case CK_HLSLMatrixTruncation: {
+    assert((DestTy->isMatrixType() || DestTy->isBuiltinType()) &&
+           "Destination type must be a matrix or builtin type.");
+    Value *Mat = Visit(E);
+    if (auto *MatTy = DestTy->getAs<ConstantMatrixType>()) {
+      SmallVector<int> Mask;
+      unsigned NumElts = MatTy->getNumElementsFlattened();
+      for (unsigned I = 0; I != NumElts; ++I)
+        Mask.push_back(I);
+
+      return Builder.CreateShuffleVector(Mat, Mask, "trunc");
+    }
+    llvm::Value *Zero = llvm::Constant::getNullValue(CGF.SizeTy);
+    return Builder.CreateExtractElement(Mat, Zero, "cast.mtrunc");
+  }
   case CK_HLSLElementwiseCast: {
     RValue RV = CGF.EmitAnyExpr(E);
     SourceLocation Loc = CE->getExprLoc();
diff --git a/clang/lib/Edit/RewriteObjCFoundationAPI.cpp b/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
index 40f8348241ecc..e8d4660fd36b2 100644
--- a/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
+++ b/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
@@ -1085,6 +1085,7 @@ static bool rewriteToNumericBoxedExpression(const ObjCMessageExpr *Msg,
       llvm_unreachable("OpenCL-specific cast in Objective-C?");
 
     case CK_HLSLVectorTruncation:
+    case CK_HLSLMatrixTruncation:
     case CK_HLSLElementwiseCast:
     case CK_HLSLAggregateSplatCast:
       llvm_unreachable("HLSL-specific cast in Objective-C?");
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index cf407f7279c46..4f042f9300da7 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -37,6 +37,7 @@
 #include "clang/AST/TemplateBase.h"
 #include "clang/AST/TemplateName.h"
 #include "clang/AST/Type.h"
+#include "clang/AST/TypeBase.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/AST/UnresolvedSet.h"
 #include "clang/Basic/AddressSpaces.h"
@@ -12586,6 +12587,19 @@ void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC,
   if (auto VecTy = dyn_cast<VectorType>(Target))
     Target = VecTy->getElementType().getTypePtr();
 
+  if (isa<ConstantMatrixType>(Source)) {
+    if (!isa<ConstantMatrixType>(Target)) {
+      return DiagnoseImpCast(*this, E, T, CC, diag::warn_impcast_matrix_scalar);
+    } else if (getLangOpts().HLSL &&
+               Target->castAs<ConstantMatrixType>()->getNumElementsFlattened() <
+                   Source->castAs<ConstantMatrixType>()
+                       ->getNumElementsFlattened()) {
+      // Diagnose Matrix truncation but don't return. We may also want to
+      // diagnose an element conversion.
+      DiagnoseImpCast(*this, E, T, CC,
+                      diag::warn_hlsl_impcast_matrix_truncation);
+    }
+  }
   // Strip complex types.
   if (isa<ComplexType>(Source)) {
     if (!isa<ComplexType>(Target)) {
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index dc7ed4e9a48bc..be3ac296f2597 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -5197,6 +5197,7 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
   case ICK_Incompatible_Pointer_Conversion:
   case ICK_HLSL_Array_RValue:
   case ICK_HLSL_Vector_Truncation:
+  case ICK_HLSL_Matrix_Truncation:
   case ICK_HLSL_Vector_Splat:
     llvm_unreachable("Improper second standard conversion");
   }
@@ -5204,12 +5205,10 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
   if (SCS.Dimension != ICK_Identity) {
     // If SCS.Element is not ICK_Identity the To and From types must be HLSL
     // vectors or matrices.
-
-    // TODO: Support HLSL matrices.
-    assert((!From->getType()->isMatrixType() && !ToType->isMatrixType()) &&
-           "Dimension conversion for matrix types is not implemented yet.");
-    assert((ToType->isVectorType() || ToType->isBuiltinType()) &&
-           "Dimension conversion output must be vector or scalar type.");
+    assert(
+        (ToType->isVectorType() || ToType->isConstantMatrixType() ||
+         ToType->isBuiltinType()) &&
+        "Dimension conversion output must be vector, matrix, or scalar type.");
     switch (SCS.Dimension) {
     case ICK_HLSL_Vector_Splat: {
       // Vector splat from any arithmetic type to a vector.
@@ -5235,6 +5234,17 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
 
       break;
     }
+    case ICK_HLSL_Matrix_Truncation: {
+      auto *FromMat = From->getType()->castAs<ConstantMatrixType>();
+      QualType TruncTy = FromMat->getElementType();
+      if (auto *ToMat = ToType->getAs<ConstantMatrixType>())
+        TruncTy = Context.getConstantMatrixType(TruncTy, ToMat->getNumRows(),
+                                                ToMat->getNumColumns());
+      From = ImpCastExprToType(From, TruncTy, CK_HLSLMatrixTruncation,
+                               From->getValueKind())
+                 .get();
+      break;
+    }
     case ICK_Identity:
     default:
       llvm_unreachable("Improper element standard conversion");
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 5555916c2536f..168bfc3da99e0 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3728,7 +3728,6 @@ bool SemaHLSL::CanPerformAggregateSplatCast(Expr *Src, QualType DestTy) {
 }
 
 // Can we perform an HLSL Elementwise cast?
-// TODO: update this code when matrices are added; see issue #88060
 bool SemaHLSL::CanPerformElementwiseCast(Expr *Src, QualType DestTy) {
 
   // Don't handle casts where LHS and RHS are any combination of scalar/vector
@@ -3741,6 +3740,10 @@ bool SemaHLSL::CanPerformElementwiseCast(Expr *Src, QualType DestTy) {
       (DestTy->isScalarType() || DestTy->isVectorType()))
     return false;
 
+  if (SrcTy->isConstantMatrixType() &&
+      (DestTy->isScalarType() || DestTy->isConstantMatrixType()))
+    return false;
+
   llvm::SmallVector<QualType> DestTypes;
   BuildFlattenedTypeList(DestTy, DestTypes);
   llvm::SmallVector<QualType> SrcTypes;
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 37f351174e3d0..0809ff252b5a6 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -162,6 +162,7 @@ ImplicitConversionRank clang::GetConversionRank(ImplicitConversionKind Kind) {
       ICR_C_Conversion_Extension,
       ICR_Conversion,
       ICR_HLSL_Dimension_Reduction,
+      ICR_HLSL_Dimension_Reduction,
       ICR_Conversion,
       ICR_HLSL_Scalar_Widening,
   };
@@ -224,6 +225,7 @@ static const char *GetImplicitConversionName(ImplicitConversionKind Kind) {
       "Incompatible pointer conversion",
       "Fixed point conversion",
       "HLSL vector truncation",
+      "HLSL matrix truncation",
       "Non-decaying array conversion",
       "HLSL vector splat",
   };
@@ -2046,9 +2048,10 @@ static bool IsFloatingPointConversion(Sema &S, QualType FromType,
   return true;
 }
 
-static bool IsVectorElementConversion(Sema &S, QualType FromType,
-                                      QualType ToType,
-                                      ImplicitConversionKind &ICK, Expr *From) {
+static bool IsVectorOrMatrixElementConversion(Sema &S, QualType FromType,
+                                              QualType ToType,
+                                              ImplicitConversionKind &ICK,
+                                              Expr *From) {
   if (S.Context.hasSameUnqualifiedType(FromType, ToType))
     return true;
 
@@ -2088,6 +2091,57 @@ static bool IsVectorElementConversion(Sema &S, QualType FromType,
   return false;
 }
 
+/// Determine whether the conversion from FromType to ToType is a valid
+/// matrix conversion.
+///
+/// \param ICK Will be set to the matrix conversion kind, if this is a matrix
+/// conversion.
+static bool IsMatrixConversion(Sema &S, QualType FromType, QualType ToType,
+                               ImplicitConversionKind &ICK,
+                               ImplicitConversionKind &ElConv, Expr *From,
+                               bool InOverloadResolution, bool CStyle) {
+  // The non HLSL Matrix conversion rules are not clear.
+  if (!S.getLangOpts().HLSL)
+    return false;
+
+  auto *ToMatrixType = ToType->getAs<ConstantMatrixType>();
+  auto *FromMatrixType = FromType->getAs<ConstantMatrixType>();
+
+  // If both arguments are vectors, handle possible vector truncation and
+  // element conversion.
+  if (ToMatrixType && FromMatrixType) {
+    unsigned FromCols = FromMatrixType->getNumColumns();
+    unsigned ToCols = ToMatrixType->getNumColumns();
+    if (FromCols < ToCols)
+      return false;
+
+    unsigned FromRows = FromMatrixType->getNumRows();
+    unsigned ToRows = ToMatrixType->getNumRows();
+    if (FromRows < ToRows)
+      return false;
+
+    if (FromRows == ToRows && FromCols == ToCols)
+      ElConv = ICK_Identity;
+    else
+      ElConv = ICK_HLSL_Matrix_Truncation;
+
+    QualType FromElTy = FromMatrixType->getElementType();
+    QualType ToElTy = ToMatrixType->getElementType();
+    if (S.Context.hasSameUnqualifiedType(FromElTy, ToElTy))
+      return true;
+    return IsVectorOrMatrixElementConversion(S, FromElTy, ToElTy, ICK, From);
+  }
+  if (FromMatrixType && !ToMatrixType) {
+    ElConv = ICK_HLSL_Matrix_Truncation;
+    QualType FromElTy = FromMatrixType->getElementType();
+    if (S.Context.hasSameUnqualifiedType(FromElTy, ToType))
+      return true;
+    return IsVectorOrMatrixElementConversion(S, FromElTy, ToType, ICK, From);
+  }
+
+  return false;
+}
+
 /// Determine whether the conversion from FromType to ToType is a valid
 /// vector conversion.
 ///
@@ -2127,14 +2181,14 @@ static bool IsVectorConversion(Sema &S, QualType FromType, QualType ToType,
       QualType ToElTy = ToExtType->getElementType();
       if (S.Context.hasSameUnqualifiedType(FromElTy, ToElTy))
         return true;
-      return IsVectorElementConversion(S, FromElTy, ToElTy, ICK, From);
+      return IsVectorOrMatrixElementConversion(S, FromElTy, ToElTy, ICK, From);
     }
     if (FromExtType && !ToExtType) {
       ElConv = ICK_HLSL_Vector_Truncation;
       QualType FromElTy = FromExtType->getElementType();
       if (S.Context.hasSameUnqualifiedType(FromElTy, ToType))
         return true;
-      return IsVectorElementConversion(S, FromElTy, ToType, ICK, From);
+      return IsVectorOrMatrixElementConversion(S, FromElTy, ToType, ICK, From);
     }
     // Fallthrough for the case where ToType is a vector and FromType is not.
   }
@@ -2161,7 +2215,8 @@ static bool IsVectorConversion(Sema &S, QualType FromType, QualType ToType,
       if (S.getLangOpts().HLSL) {
         ElConv = ICK_HLSL_Vector_Splat;
         QualType ToElTy = ToExtType->getElementType();
-        return IsVectorElementConversion(S, FromType, ToElTy, ICK, From);
+        return IsVectorOrMatrixElementConversion(S, FromType, ToElTy, ICK,
+                                                 From);
       }
       ICK = ICK_Vector_Splat;
       return true;
@@ -2460,6 +2515,11 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType,
     SCS.Second = SecondICK;
     SCS.Dimension = DimensionICK;
     FromType = ToType.getUnqualifiedType();
+  } else if (IsMatrixConversion(S, FromType, ToType, SecondICK, DimensionICK,
+                                From, InOverloadResolution, CStyle)) {
+    SCS.Second = SecondICK;
+    SCS.Dimension = DimensionICK;
+    FromType = ToType.getUnqualifiedType();
   } else if (!S.getLangOpts().CPlusPlus &&
              S.Context.typesAreCompatible(ToType, FromType)) {
     // Compatible conversions (Clang extension for C function overloading)
@@ -6237,6 +6297,7 @@ static bool CheckConvertedConstantConversions(Sema &S,
   case ICK_Incompatible_Pointer_Conversion:
   case ICK_Fixed_Point_Conversion:
   case ICK_HLSL_Vector_Truncation:
+  case ICK_HLSL_Matrix_Truncation:
     return false;
 
   case ICK_Lvalue_To_Rvalue:
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
index 4ddf8fd5b4b0f..db27c06cd18a3 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
@@ -560,6 +560,7 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex,
       case CK_VectorSplat:
       case CK_HLSLElementwiseCast:
       case CK_HLSLAggregateSplatCast:
+      case CK_HLSLMatrixTruncation:
       case CK_HLSLVectorTruncation: {
         QualType resultType = CastE->getType();
         if (CastE->isGLValue())
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
new file mode 100644
index 0000000000000..081b8013efcbc
--- /dev/null
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
@@ -0,0 +1,186 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fnative-half-type -fnative-int16-type -o - %s | FileCheck %s
+
+
+// CHECK-LABEL: define hidden noundef <6 x i32> @_Z22elementwise_type_cast0u11matrix_typeILm3ELm2EfE(
+// CHECK-SAME: <6 x float> noundef nofpclass(nan inf) [[F32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca [6 x float], align 4
+// CHECK-NEXT:    [[I32:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT:    store <6 x float> [[F32]], ptr [[F32_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <6 x float>, ptr [[F32_ADDR]], align 4
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi <6 x float> [[TMP0]] to <6 x i32>
+// CHECK-NEXT:    store <6 x i32> [[CONV]], ptr [[I32]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
+// CHECK-NEXT:    ret <6 x i32> [[TMP1]]
+//
+int3x2 elementwise_type_cast0(float3x2 f32) {
+    int3x2 i32 = (int3x2)f32;
+    return i32;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> @_Z22elementwise_type_cast1u11matrix_typeILm3ELm2EsE(
+// CHECK-SAME: <6 x i16> noundef [[I16_32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I16_32_ADDR:%.*]] = alloca [6 x i16], align 2
+// CHECK-NEXT:    [[I32:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT:    store <6 x i16> [[I16_32]], ptr [[I16_32_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load <6 x i16>, ptr [[I16_32_ADDR]], align 2
+// CHECK-NEXT:    [[CONV:%.*]] = sext <6 x i16> [[TMP0]] to <6 x i32>
+// CHECK-NEXT:    store <6 x i32> [[CONV]], ptr [[I32]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
+// CHECK-NEXT:    ret <6 x i32> [[TMP1]]
+//
+int3x2 elementwise_type_cast1(int16_t3x2 i16_32) {
+    int3x2 i32 = (int3x2)i16_32;
+    return i32;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> @_Z22elementwise_type_cast2u11matrix_typeILm3ELm2ElE(
+// CHECK-SAME: <6 x i64> noundef [[I64_32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I64_32_ADDR:%.*]] = alloca [6 x i64], align 8
+// CHECK-NEXT:    [[I32:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT:    store <6 x i64> [[I64_32]], ptr [[I64_32_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load <6 x i64>, ptr [[I64_32_ADDR]], align 8
+// CHECK-NEXT:    [[CONV:%.*]] = trunc <6 x i64> [[TMP0]] to <6 x i32>
+// CHECK-NEXT:    store <6 x i32> [[CONV]], ptr [[I32]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
+// CHECK-NEXT:    ret <6 x i32> [[TMP1]]
+//
+int3x2 elementwise_type_cast2(int64_t3x2 i64_32) {
+    int3x2 i32 = (int3x2)i64_32;
+    return i32;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i16> @_Z22elementwise_type_cast3u11matrix_typeILm2ELm3EDhE(
+// CHECK-SAME: <6 x half> noundef nofpclass(nan inf) [[H23:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[H23_ADDR:%.*]] = alloca [6 x half], align 2
+// CHECK-NEXT:    [[I23:%.*]] = alloca [6 x i16], align 2
+// CHECK-NEXT:    store <6 x half> [[H23]], ptr [[H23_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load <6 x half>, ptr [[H23_ADDR]], align 2
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi <6 x half> [[TMP0]] to <6 x i16>
+// CHECK-NEXT:    store <6 x i16> [[CONV]], ptr [[I23]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i16>, ptr [[I23]], align 2
+// CHECK-NEXT:    ret <6 x i16> [[TMP1]]
+//
+int16_t2x3 elementwise_type_cast3(half2x3 h23) {
+    int16_t2x3 i23 = (int16_t2x3)h23;
+    return i23;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> @_Z22elementwise_type_cast4u11matrix_typeILm3ELm2EdE(
+// CHECK-SAME: <6 x double> noundef nofpclass(nan inf) [[D32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[D32_ADDR:%.*]] = alloca [6 x double], align 8
+// CHECK-NEXT:    [[I32:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT:    store <6 x double> [[D32]], ptr [[D32_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load <6 x double>, ptr [[D32_ADDR]], align 8
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi <6 x double> [[TMP0]] to <6 x i32>
+// CHECK-NEXT:    store <6 x i32> [[CONV]], ptr [[I32]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
+// CHECK-NEXT:    ret <6 x i32> [[TMP1]]
+//
+int3x2 elementwise_type_cast4(double3x2 d32) {
+    int3x2 i32 = (int3x2)d32;
+    return i32;
+}
+
+// CHECK-LABEL: define hidden void @_Z5call2v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A:%.*]] = alloca [2 x [1 x i32]], align 4
+// CHECK-NEXT:    [[B:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [2 x [1 x i32]], align 4
+// CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <2 x i32>, align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @__const._Z5call2v.A, i32 8, i1 false)
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_TEMP]], ptr align 4 [[A]], i32 8, i1 false)
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[AGG_TEMP]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[AGG_TEMP]], i32 0, i32 1, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[FLATCAST_TMP]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[GEP]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[TMP1]], i64 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[GEP1]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP3]], i64 1
+// CHECK-NEXT:    store <2 x i32> [[TMP4]], ptr [[B]], align 4
+// CHECK-NEXT:    ret void
+//
+void call2() {
+  int A[2][1] = {{1},{2}};
+  int2x1 B = (int2x1)A;
+}
+
+struct S {
+  int X;
+  float Y;
+};
+
+// CHECK-LABEL: define hidden void @_Z5call3v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    [[A:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <2 x i32>, align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr align 1 @__const._Z5call3v.s, i32 8, i1 false)
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 [[S]], i32 8, i1 false)
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_TEMP]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_TEMP]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[FLATCAST_TMP]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[GEP]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[TMP1]], i64 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[GEP1]], align 4
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP3]] to i32
+// CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[CONV]], i64 1
+// CHECK-NEXT:    store <2 x i32> [[TMP4]], ptr [[A]], align 4
+// CHECK-NEXT:    ret void
+//
+void call3() {
+  S s = {1, 2.0};
+  int2x1 A = (int2x1)s;
+}
+
+struct BFields {
+  double D;
+  int E: 15;
+  int : 8;
+  float F;
+};
+
+struct Derived : BFields {
+  int G;
+};
+
+// CHECK-LABEL: define hidden void @_Z5call47Derived(
+// CHECK-SAME: ptr noundef byval([[STRUCT_DERIVED:%.*]]) align 1 [[D:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A:%.*]] = alloca [4 x i32], align 4
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_DERIVED]], align 1
+// CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <4 x i32>, align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 [[D]], i32 19, i1 false)
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 0
+// CHECK-NEXT:    [[E:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS:%.*]], ptr [[GEP]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 0, i32 2
+// CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[FLATCAST_TMP]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load double, ptr [[GEP1]], align 8
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[TMP1]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[CONV]], i64 0
+// CHECK-NEXT:    [[BF_LOAD:%.*]] = load i24, ptr [[E]], align 1
+// CHECK-NEXT:    [[BF_SHL:%.*]] = shl i24 [[BF_LOAD]], 9
+// CHECK-NEXT:    [[BF_ASHR:%.*]] = ashr i24 [[BF_SHL]], 9
+// CHECK-NEXT:    [[BF_CAST:%.*]] = sext i24 [[BF_ASHR]] to i32
+// CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[BF_CAST]], i64 1
+// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[GEP2]], align 4
+// CHECK-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP4]] to i32
+// CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[CONV4]], i64 2
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[GEP3]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP6]], i64 3
+// CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr [[A]], align 4
+// CHECK-NEXT:    ret void
+//
+void call4(Derived D) {
+  int2x2 A = (int2x2)D;
+}
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
new file mode 100644
index 0000000000000..f16d01e1d12ea
--- /dev/null
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
@@ -0,0 +1,156 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes -emit-llvm -finclude-default-header -o - %s | FileCheck %s
+
+// CHECK-LABEL: define hidden noundef <12 x i32> @_Z10trunc_castu11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I34:%.*]] = alloca [12 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT:    store <12 x i32> [[TRUNC]], ptr [[I34]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <12 x i32>, ptr [[I34]], align 4
+// CHECK-NEXT:    ret <12 x i32> [[TMP1]]
+//
+ int3x4 trunc_cast(int4x4 i44) {
+    int3x4 i34 = (int3x4)i44;
+    return i34;
+}
+
+// CHECK-LABEL: define hidden noundef <12 x i32> @_Z11trunc_cast0u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I43:%.*]] = alloca [12 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT:    store <12 x i32> [[TRUNC]], ptr [[I43]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <12 x i32>, ptr [[I43]], align 4
+// CHECK-NEXT:    ret <12 x i32> [[TMP1]]
+//
+ int4x3 trunc_cast0(int4x4 i44) {
+    int4x3 i43 = (int4x3)i44;
+    return i43;
+}
+
+// CHECK-LABEL: define hidden noundef <9 x i32> @_Z11trunc_cast1u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I33:%.*]] = alloca [9 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+// CHECK-NEXT:    store <9 x i32> [[TRUNC]], ptr [[I33]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <9 x i32>, ptr [[I33]], align 4
+// CHECK-NEXT:    ret <9 x i32> [[TMP1]]
+//
+ int3x3 trunc_cast1(int4x4 i44) {
+    int3x3 i33 = (int3x3)i44;
+    return i33;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> @_Z11trunc_cast2u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I32:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+// CHECK-NEXT:    store <6 x i32> [[TRUNC]], ptr [[I32]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
+// CHECK-NEXT:    ret <6 x i32> [[TMP1]]
+//
+ int3x2 trunc_cast2(int4x4 i44) {
+    int3x2 i32 = (int3x2)i44;
+    return i32;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> @_Z11trunc_cast3u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I23:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+// CHECK-NEXT:    store <6 x i32> [[TRUNC]], ptr [[I23]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I23]], align 4
+// CHECK-NEXT:    ret <6 x i32> [[TMP1]]
+//
+ int2x3 trunc_cast3(int4x4 i44) {
+    int2x3 i23 = (int2x3)i44;
+    return i23;
+}
+
+// CHECK-LABEL: define hidden noundef <4 x i32> @_Z11trunc_cast4u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I22:%.*]] = alloca [4 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    store <4 x i32> [[TRUNC]], ptr [[I22]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[I22]], align 4
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+ int2x2 trunc_cast4(int4x4 i44) {
+    int2x2 i22 = (int2x2)i44;
+    return i22;
+}
+
+// CHECK-LABEL: define hidden noundef <2 x i32> @_Z11trunc_cast5u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I21:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT:    store <2 x i32> [[TRUNC]], ptr [[I21]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[I21]], align 4
+// CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+//
+ int2x1 trunc_cast5(int4x4 i44) {
+    int2x1 i21 = (int2x1)i44;
+    return i21;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z11trunc_cast6u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I1:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[CAST_MTRUNC:%.*]] = extractelement <16 x i32> [[TMP0]], i32 0
+// CHECK-NEXT:    store i32 [[CAST_MTRUNC]], ptr [[I1]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I1]], align 4
+// CHECK-NEXT:    ret i32 [[TMP1]]
+//
+ int trunc_cast6(int4x4 i44) {
+    int i1 = (int)i44;
+    return i1;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z16trunc_multi_castu11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I1:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT:    [[CAST_MTRUNC:%.*]] = extractelement <12 x i32> [[TRUNC]], i32 0
+// CHECK-NEXT:    store i32 [[CAST_MTRUNC]], ptr [[I1]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I1]], align 4
+// CHECK-NEXT:    ret i32 [[TMP1]]
+//
+ int trunc_multi_cast(int4x4 i44) {
+    int i1 = (int)(int3x4)i44;
+    return i1;
+}
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
new file mode 100644
index 0000000000000..6a53d2e8ee96c
--- /dev/null
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
@@ -0,0 +1,138 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes -emit-llvm -finclude-default-header -o - %s | FileCheck %s
+
+// CHECK-LABEL: define hidden noundef <12 x i32> @_Z10trunc_castu11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I34:%.*]] = alloca [12 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT:    store <12 x i32> [[TRUNC]], ptr [[I34]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <12 x i32>, ptr [[I34]], align 4
+// CHECK-NEXT:    ret <12 x i32> [[TMP1]]
+//
+ int3x4 trunc_cast(int4x4 i44) {
+    int3x4 i34 = i44;
+    return i34;
+}
+
+// CHECK-LABEL: define hidden noundef <12 x i32> @_Z11trunc_cast0u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I43:%.*]] = alloca [12 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT:    store <12 x i32> [[TRUNC]], ptr [[I43]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <12 x i32>, ptr [[I43]], align 4
+// CHECK-NEXT:    ret <12 x i32> [[TMP1]]
+//
+ int4x3 trunc_cast0(int4x4 i44) {
+    int4x3 i43 = i44;
+    return i43;
+}
+
+// CHECK-LABEL: define hidden noundef <9 x i32> @_Z11trunc_cast1u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I33:%.*]] = alloca [9 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+// CHECK-NEXT:    store <9 x i32> [[TRUNC]], ptr [[I33]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <9 x i32>, ptr [[I33]], align 4
+// CHECK-NEXT:    ret <9 x i32> [[TMP1]]
+//
+ int3x3 trunc_cast1(int4x4 i44) {
+    int3x3 i33 = i44;
+    return i33;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> @_Z11trunc_cast2u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I32:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+// CHECK-NEXT:    store <6 x i32> [[TRUNC]], ptr [[I32]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
+// CHECK-NEXT:    ret <6 x i32> [[TMP1]]
+//
+ int3x2 trunc_cast2(int4x4 i44) {
+    int3x2 i32 = i44;
+    return i32;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> @_Z11trunc_cast3u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I23:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+// CHECK-NEXT:    store <6 x i32> [[TRUNC]], ptr [[I23]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I23]], align 4
+// CHECK-NEXT:    ret <6 x i32> [[TMP1]]
+//
+ int2x3 trunc_cast3(int4x4 i44) {
+    int2x3 i23 = i44;
+    return i23;
+}
+
+// CHECK-LABEL: define hidden noundef <4 x i32> @_Z11trunc_cast4u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I22:%.*]] = alloca [4 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    store <4 x i32> [[TRUNC]], ptr [[I22]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[I22]], align 4
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+ int2x2 trunc_cast4(int4x4 i44) {
+    int2x2 i22 = i44;
+    return i22;
+}
+
+// CHECK-LABEL: define hidden noundef <2 x i32> @_Z11trunc_cast5u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I21:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT:    store <2 x i32> [[TRUNC]], ptr [[I21]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[I21]], align 4
+// CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+//
+ int2x1 trunc_cast5(int4x4 i44) {
+    int2x1 i21 = i44;
+    return i21;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z11trunc_cast6u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I1:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[CAST_MTRUNC:%.*]] = extractelement <16 x i32> [[TMP0]], i32 0
+// CHECK-NEXT:    store i32 [[CAST_MTRUNC]], ptr [[I1]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I1]], align 4
+// CHECK-NEXT:    ret i32 [[TMP1]]
+//
+ int trunc_cast6(int4x4 i44) {
+    int i1 = i44;
+    return i1;
+}
diff --git a/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl b/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl
new file mode 100644
index 0000000000000..ebe4db7a9e26e
--- /dev/null
+++ b/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl
@@ -0,0 +1,287 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -fnative-half-type -finclude-default-header -Wconversion -verify -o - %s -DERROR=1
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -fnative-half-type -finclude-default-header -Wno-conversion -ast-dump %s | FileCheck %s
+
+// This test verifies floating point type implicit conversion ranks for overload
+// resolution. In HLSL the built-in type ranks are half < float < double. This
+// applies to both scalar and matrix types.
+
+// HLSL allows implicit truncation fo types, so it differentiates between
+// promotions (converting to larger types) and conversions (converting to
+// smaller types). Promotions are preferred over conversions. Promotions prefer
+// promoting to the next lowest type in the ranking order. Conversions prefer
+// converting to the next highest type in the ranking order.
+
+void HalfFloatDouble(double2x2 D);
+void HalfFloatDouble(float2x2 F);
+void HalfFloatDouble(half2x2 H);
+
+// CHECK: FunctionDecl {{.*}} used HalfFloatDouble 'void (double2x2)'
+// CHECK: FunctionDecl {{.*}} used HalfFloatDouble 'void (float2x2)'
+// CHECK: FunctionDecl {{.*}} used HalfFloatDouble 'void (half2x2)'
+
+void FloatDouble(double2x2 D); // expected-note {{candidate function}}
+void FloatDouble(float2x2 F); // expected-note {{candidate function}}
+
+// CHECK: FunctionDecl {{.*}} used FloatDouble 'void (double2x2)'
+// CHECK: FunctionDecl {{.*}} used FloatDouble 'void (float2x2)'
+
+void HalfDouble(double2x2 D);
+void HalfDouble(half2x2 H);
+
+// CHECK: FunctionDecl {{.*}} used HalfDouble 'void (double2x2)'
+// CHECK: FunctionDecl {{.*}} used HalfDouble 'void (half2x2)'
+
+void HalfFloat(float2x2 F); // expected-note {{candidate function}}
+void HalfFloat(half2x2 H); // expected-note {{candidate function}}
+
+// CHECK: FunctionDecl {{.*}} used HalfFloat 'void (float2x2)'
+// CHECK: FunctionDecl {{.*}} used HalfFloat 'void (half2x2)'
+
+void Double(double2x2 D);
+void Float(float2x2 F);
+void Half(half2x2 H);
+
+// CHECK: FunctionDecl {{.*}} used Double 'void (double2x2)'
+// CHECK: FunctionDecl {{.*}} used Float 'void (float2x2)'
+// CHECK: FunctionDecl {{.*}} used Half 'void (half2x2)'
+
+// Case 1: A function declared with overloads for half float and double types.
+//   (a) When called with half, it will resolve to half because half is an exact
+//   match.
+//   (b) When called with float it will resolve to float because float is an
+//   exact match.
+//   (c) When called with double it will resolve to double because it is an
+//   exact match.
+
+// CHECK-LABEL: FunctionDecl {{.*}} Case1 'void (half2x2, float2x2, double2x2)'
+void Case1(half2x2 H, float2x2 F, double2x2 D) {
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(half2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (half2x2)' lvalue Function {{.*}} 'HalfFloatDouble' 'void (half2x2)'
+  HalfFloatDouble(H);
+
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'HalfFloatDouble' 'void (float2x2)'
+  HalfFloatDouble(F);
+
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(double2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (double2x2)' lvalue Function {{.*}} 'HalfFloatDouble' 'void (double2x2)'
+  HalfFloatDouble(D);
+}
+
+// Case 2: A function declared with double and float overlaods.
+//   (a) When called with half, it fails to resulve the ambiguous promotion.
+//   (b) When called with float it will resolve to float because float is an
+//   exact match.
+//   (c) When called with double it will resolve to double because it is an
+//   exact match.
+
+// CHECK-LABEL: FunctionDecl {{.*}} Case2 'void (half2x2, float2x2, double2x2)'
+void Case2(half2x2 H, float2x2 F, double2x2 D) {
+#if ERROR
+  FloatDouble(H); // expected-error {{call to 'FloatDouble' is ambiguous}}
+#endif
+
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'FloatDouble' 'void (float2x2)'
+  FloatDouble(F);
+
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(double2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (double2x2)' lvalue Function {{.*}} 'FloatDouble' 'void (double2x2)'
+  FloatDouble(D);
+}
+
+// Case 3: A function declared with half and double overloads
+//   (a) When called with half, it will resolve to half because it is an exact
+//   match.
+//   (b) When called with flaot, it will resolve to double because double is a
+//   valid promotion.
+//   (c) When called with double, it will resolve to double because it is an
+//   exact match.
+
+// CHECK-LABEL: FunctionDecl {{.*}} Case3 'void (half2x2, float2x2, double2x2)'
+void Case3(half2x2 H, float2x2 F, double2x2 D) {
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(half2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (half2x2)' lvalue Function {{.*}} 'HalfDouble' 'void (half2x2)'
+  HalfDouble(H);
+
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(double2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (double2x2)' lvalue Function {{.*}} 'HalfDouble' 'void (double2x2)'
+  HalfDouble(F);
+
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(double2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (double2x2)' lvalue Function {{.*}} 'HalfDouble' 'void (double2x2)'
+  HalfDouble(D);
+}
+
+// Case 4: A function declared with half and float overloads.
+//   (a) When called with half, it will resolve to half because half is an exact
+//   match.
+//   (b) When called with float it will resolve to float because float is an
+//   exact match.
+//   (c) When called with double it fails to resolve the ambigjuous conversion.
+
+// CHECK-LABEL: FunctionDecl {{.*}} Case4 'void (half2x2, float2x2, double2x2)'
+void Case4(half2x2 H, float2x2 F, double2x2 D) {
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(half2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (half2x2)' lvalue Function {{.*}} 'HalfFloat' 'void (half2x2)'
+  HalfFloat(H);
+
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'HalfFloat' 'void (float2x2)'
+  HalfFloat(F);
+
+#if ERROR
+  HalfFloat(D); // expected-error{{call to 'HalfFloat' is ambiguous}}
+#endif
+}
+
+// Case 5: A function declared with only a double overload.
+//   (a) When called with half, it will resolve to double because double is a
+//   valid promotion.
+//   (b) When called with float it will resolve to double because double is a
+//   valid promotion.
+//   (c) When called with double it will resolve to double because it is an
+//   exact match.
+
+// CHECK-LABEL: FunctionDecl {{.*}} Case5 'void (half2x2, float2x2, double2x2)'
+void Case5(half2x2 H, float2x2 F, double2x2 D) {
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(double2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (double2x2)' lvalue Function {{.*}} 'Double' 'void (double2x2)'
+  Double(H);
+
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(double2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (double2x2)' lvalue Function {{.*}} 'Double' 'void (double2x2)'
+  Double(F);
+
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(double2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (double2x2)' lvalue Function {{.*}} 'Double' 'void (double2x2)'
+  Double(D);
+}
+
+// Case 6: A function declared with only a float overload.
+//   (a) When called with half, it will resolve to float because float is a
+//   valid promotion.
+//   (b) When called with float it will resolve to float because float is an
+//   exact match.
+//   (c) When called with double it will resolve to float because it is a
+//   valid conversion.
+
+// CHECK-LABEL: FunctionDecl {{.*}} Case6 'void (half2x2, float2x2, double2x2)'
+void Case6(half2x2 H, float2x2 F, double2x2 D) {
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'Float' 'void (float2x2)'
+  Float(H);
+
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'Float' 'void (float2x2)'
+  Float(F);
+
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'Float' 'void (float2x2)'
+  Float(D); // TODO: See #168944. Make this an expect warning. {{implicit conversion loses floating-point precision: 'double2x2' (aka 'matrix<double, 2, 2>') to 'matrix<float, 2, 2>' (matrix of 2 'float' values)}}
+}
+
+// Case 7: A function declared with only a half overload.
+//   (a) When called with half, it will resolve to half because half is an
+//   exact match
+//   (b) When called with float it will resolve to half because half is a
+//   valid conversion.
+//   (c) When called with double it will resolve to float because it is a
+//   valid conversion.
+
+// CHECK-LABEL: FunctionDecl {{.*}} Case7 'void (half2x2, float2x2, double2x2)'
+void Case7(half2x2 H, float2x2 F, double2x2 D) {
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(half2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (half2x2)' lvalue Function {{.*}} 'Half' 'void (half2x2)'
+  Half(H);
+
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(half2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (half2x2)' lvalue Function {{.*}} 'Half' 'void (half2x2)'
+  Half(F); // TODO: See #168944. Make this an expect warning. {{implicit conversion loses floating-point precision: 'float2x2' (aka 'matrix<float, 2, 2>') to 'matrix<half, 2, 2>' (matrix of 4 'half' values)}}
+
+  // CHECK: CallExpr {{.*}} 'void'
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(half2x2)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (half2x2)' lvalue Function {{.*}} 'Half' 'void (half2x2)'
+  Half(D); // TODO: See #168944. Make this an expect warning.  {{implicit conversion loses floating-point precision: 'double2x2' (aka 'matrix<double, 2, 2>') to 'matrix<half, 2, 2>' (matrix of 4 'half' values)}}
+}
+
+void fn3x2(float3x2) {} // expected-note{{candidate function}}
+void fn2x2(float2x2) {}
+void fn2x2IO(inout float2x2) {}
+void fnI2x2IO(inout int2x2) {}
+
+void matOrVec(float4 F) {}
+void matOrVec(float2x2 F) {}
+
+void matOrVec2(float3 F) {} // expected-note{{candidate function}}
+void matOrVec2(float2x3 F) {} // expected-note{{candidate function}}
+
+export void Case8(float2x3 f23, float4x4 f44, float3x3 f33, float3x2 f32) {
+  int2x2 i22 = f23;
+  //CHECK: VarDecl {{.*}} i22 'int2x2':'matrix<int, 2, 2>' cinit
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'int2x2':'matrix<int, 2, 2>' <FloatingToIntegral>
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' <LValueToRValue>
+#ifdef ERROR
+  int3x2 i32 = f23; // expected-error{{cannot initialize a variable of type 'matrix<int, 3, 2>' with an lvalue of type 'matrix<float, 2, 3>'}}
+  fn3x2(f23); // expected-error{{no matching function for call to 'fn3x2'}}
+#endif
+  
+  fn2x2(f23);
+  //CHECK: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'fn2x2' 'void (float2x2)'
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 2>' <HLSLMatrixTruncation>
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' <LValueToRValue>
+
+#ifdef ERROR
+  fn2x2IO(f23); // expected-error{{assigning to 'matrix<[2 * ...], 3>' from incompatible type 'matrix<[2 * ...], 2>'}}
+  fnI2x2IO(f23); // expected-error{{assigning to 'matrix<float, [...], 3>' from incompatible type 'matrix<int, [...], 2>'}}
+#endif
+
+  matOrVec(f23);
+  //CHECK: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'matOrVec' 'void (float2x2)'
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 2>' <HLSLMatrixTruncation>
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' <LValueToRValue>
+
+  matOrVec(f44);
+  //CHECK: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'matOrVec' 'void (float2x2)'
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 2>' <HLSLMatrixTruncation>
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4x4':'matrix<float, 4, 4>' <LValueToRValue>
+
+#ifdef ERROR
+  matOrVec(2.0); // TODO: See #168960 this should be ambiguous once we implement ICK_HLSL_Matrix_Splat.
+#endif
+  matOrVec2(f23);
+  //CHECK: DeclRefExpr {{.*}} 'void (float2x3)' lvalue Function {{.*}} 'matOrVec2' 'void (float2x3)'
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' <LValueToRValue>
+
+  matOrVec2(f44);
+  //CHECK: DeclRefExpr {{.*}} 'void (float2x3)' lvalue Function {{.*}} 'matOrVec2' 'void (float2x3)'
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 3>' <HLSLMatrixTruncation>
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4x4':'matrix<float, 4, 4>' <LValueToRValue>
+
+  matOrVec2(f33);
+  //CHECK: DeclRefExpr {{.*}} 'void (float2x3)' lvalue Function {{.*}} 'matOrVec2' 'void (float2x3)'
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 3>' <HLSLMatrixTruncation>
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float3x3':'matrix<float, 3, 3>' <LValueToRValue>
+  
+#ifdef ERROR
+  matOrVec2(f32); // expected-error{{no matching function for call to 'matOrVec2'}}
+#endif
+}
diff --git a/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixCastErrors.hlsl b/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixCastErrors.hlsl
new file mode 100644
index 0000000000000..59d432cd3eb00
--- /dev/null
+++ b/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixCastErrors.hlsl
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -std=hlsl202x -verify %s
+
+// Note column is too large
+export int3x2 shape_cast_error(float2x3 f23) {
+    int3x2 i32 = (int3x2)f23;
+    // expected-error@-1 {{conversion between matrix types 'int3x2' (aka 'matrix<int, 3, 2>') and 'matrix<float, 2, 3>' of different size is not allowed}}
+    return i32;
+}
+// Note row is too large
+export int2x3 shape_cast_error2(float3x2 f32) {
+    int2x3 i23 = (int2x3)f32;
+    // expected-error@-1 {{conversion between matrix types 'int2x3' (aka 'matrix<int, 2, 3>') and 'matrix<float, 3, 2>' of different size is not allowed}}
+    return i23;
+}
+
+// Note do the type change independent of the shape should still error
+export int2x3 shape_cast_error3(float3x2 f32) {
+    int2x3 i23 = (int3x2)f32;
+    // expected-error@-1 {{cannot initialize a variable of type 'matrix<[...], 2, 3>' with an rvalue of type 'matrix<[...], 3, 2>}}
+    return i23;
+}
diff --git a/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl b/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl
new file mode 100644
index 0000000000000..360c9f7f31b15
--- /dev/null
+++ b/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -Wconversion -verify %s
+
+export int3x4 trunc_cast(int4x4 i44) {
+    int3x4 i34 = i44;
+    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' (aka 'matrix<int, 4, 4>') to 'matrix<int, 3, 4>'}}
+    return i34;
+}
+
+export int4x3 trunc_cast0(int4x4 i44) {
+    int4x3 i43 = i44;
+    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' (aka 'matrix<int, 4, 4>') to 'matrix<int, 4, 3>'}}
+    return i43;
+}
+
+export int3x3 trunc_cast1(int4x4 i44) {
+    int3x3 i33 = i44;
+    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' (aka 'matrix<int, 4, 4>') to 'matrix<int, 3, 3>'}}
+    return i33;
+}
+
+export int3x2 trunc_cast2(int4x4 i44) {
+    int3x2 i32 = i44;
+    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' (aka 'matrix<int, 4, 4>') to 'matrix<int, 3, 2>'}}
+    return i32;
+}
+
+export int2x3 trunc_cast3(int4x4 i44) {
+    int2x3 i23 = i44;
+    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' (aka 'matrix<int, 4, 4>') to 'matrix<int, 2, 3>'}}
+    return i23;
+}
+
+export int2x2 trunc_cast4(int4x4 i44) {
+    int2x2 i22 = i44;
+    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' (aka 'matrix<int, 4, 4>') to 'matrix<int, 2, 2>'}}
+    return i22;
+}
+
+export int2x1 trunc_cast5(int4x4 i44) {
+    int2x1 i21 = i44;
+    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' (aka 'matrix<int, 4, 4>') to 'matrix<int, 2, 1>'}}
+    return i21;
+}
+
+export int trunc_scalar_cast6(int4x4 i44) {
+    int i1 = i44;
+    // expected-warning@-1{{implicit conversion turns matrix to scalar: 'int4x4' (aka 'matrix<int, 4, 4>') to 'int'}}
+    return i1;
+}
+