diff --git a/fem/ceed/integrators/convection/convection.cpp b/fem/ceed/integrators/convection/convection.cpp
index 47adca54524..c980123baf2 100644
--- a/fem/ceed/integrators/convection/convection.cpp
+++ b/fem/ceed/integrators/convection/convection.cpp
@@ -25,17 +25,28 @@ namespace ceed
 #ifdef MFEM_USE_CEED
 struct ConvectionOperatorInfo : public OperatorInfo
 {
-   ConvectionContext ctx;
+   ConvectionContext ctx = {0};
    ConvectionOperatorInfo(const mfem::FiniteElementSpace &fes,
                           mfem::VectorCoefficient *VQ, double alpha,
-                          bool use_bdr)
+                          bool use_bdr = false, bool use_mf = false)
    {
       MFEM_VERIFY(VQ && VQ->GetVDim() == fes.GetMesh()->SpaceDimension(),
                   "Incorrect coefficient dimensions in ceed::ConvectionOperatorInfo!");
       ctx.dim = fes.GetMesh()->Dimension() - use_bdr;
       ctx.space_dim = fes.GetMesh()->SpaceDimension();
-      if (VectorConstantCoefficient *const_coeff =
-             dynamic_cast<VectorConstantCoefficient *>(VQ))
+      ctx.alpha = alpha;
+      if (!use_mf)
+      {
+         apply_func = ":f_apply_conv";
+         apply_qf = &f_apply_conv;
+      }
+      else
+      {
+         build_func = "";
+         build_qf = nullptr;
+      }
+      if (mfem::VectorConstantCoefficient *const_coeff =
+             dynamic_cast<mfem::VectorConstantCoefficient *>(VQ))
       {
          const int vdim = VQ->GetVDim();
          MFEM_VERIFY(vdim <= LIBCEED_CONV_COEFF_COMP_MAX,
@@ -45,20 +56,31 @@ struct ConvectionOperatorInfo : public OperatorInfo
          {
             ctx.coeff[i] = val[i];
          }
+         if (!use_mf)
+         {
+            build_func = ":f_build_conv_const";
+            build_qf = &f_build_conv_const;
+         }
+         else
+         {
+            apply_func = ":f_apply_conv_mf_const";
+            apply_qf = &f_apply_conv_mf_const;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = ":f_build_conv_quad";
+            build_qf = &f_build_conv_quad;
+         }
+         else
+         {
+            apply_func = ":f_apply_conv_mf_quad";
+            apply_qf = &f_apply_conv_mf_quad;
+         }
       }
-      ctx.alpha = alpha;
-
       header = "/integrators/convection/convection_qf.h";
-      build_func_const = ":f_build_conv_const";
-      build_qf_const = &f_build_conv_const;
-      build_func_quad = ":f_build_conv_quad";
-      build_qf_quad = &f_build_conv_quad;
-      apply_func = ":f_apply_conv";
-      apply_qf = &f_apply_conv;
-      apply_func_mf_const = ":f_apply_conv_mf_const";
-      apply_qf_mf_const = &f_apply_conv_mf_const;
-      apply_func_mf_quad = ":f_apply_conv_mf_quad";
-      apply_qf_mf_quad = &f_apply_conv_mf_quad;
       trial_op = EvalMode::Grad;
       test_op = EvalMode::Interp;
       qdatasize = ctx.dim;
@@ -89,7 +111,7 @@ MFConvectionIntegrator::MFConvectionIntegrator(
    const bool use_bdr)
 {
 #ifdef MFEM_USE_CEED
-   ConvectionOperatorInfo info(fes, VQ, alpha, use_bdr);
+   ConvectionOperatorInfo info(fes, VQ, alpha, use_bdr, true);
    Assemble(integ, info, fes, VQ, use_bdr, true);
 #else
    MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
diff --git a/fem/ceed/integrators/convection/convection_qf.h b/fem/ceed/integrators/convection/convection_qf.h
index 909ea919425..0dd11387c4a 100644
--- a/fem/ceed/integrators/convection/convection_qf.h
+++ b/fem/ceed/integrators/convection/convection_qf.h
@@ -16,12 +16,11 @@
 
 #define LIBCEED_CONV_COEFF_COMP_MAX 3
 
-/// A structure used to pass additional data to f_build_conv and f_apply_conv
 struct ConvectionContext
 {
    CeedInt dim, space_dim;
-   CeedScalar coeff[LIBCEED_CONV_COEFF_COMP_MAX];
    CeedScalar alpha;
+   CeedScalar coeff[LIBCEED_CONV_COEFF_COMP_MAX];
 };
 
 /// libCEED QFunction for building quadrature data for a convection operator
@@ -34,7 +33,7 @@ CEED_QFUNCTION(f_build_conv_const)(void *ctx, CeedInt Q,
    // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[1] is quadrature weights, size (Q)
    //
-   // At every quadrature point, compute and store qw * α * c^T adj(J)^T.
+   // At every quadrature point, compute and store qw * α * c^T adj(J)^T
    const CeedScalar alpha  = bc->alpha;
    const CeedScalar *coeff = bc->coeff;
    const CeedScalar *J = in[0], *qw = in[1];
@@ -77,7 +76,7 @@ CEED_QFUNCTION(f_build_conv_const)(void *ctx, CeedInt Q,
 }
 
 /// libCEED QFunction for building quadrature data for a convection operator
-/// with a coefficient evaluated at quadrature points.
+/// with a coefficient evaluated at quadrature points
 CEED_QFUNCTION(f_build_conv_quad)(void *ctx, CeedInt Q,
                                   const CeedScalar *const *in,
                                   CeedScalar *const *out)
@@ -87,7 +86,7 @@ CEED_QFUNCTION(f_build_conv_quad)(void *ctx, CeedInt Q,
    // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[2] is quadrature weights, size (Q)
    //
-   // At every quadrature point, compute and store qw * α * c^T adj(J)^T.
+   // At every quadrature point, compute and store qw * α * c^T adj(J)^T
    const CeedScalar alpha  = bc->alpha;
    const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
    CeedScalar *qd = out[0];
@@ -127,7 +126,7 @@ CEED_QFUNCTION(f_build_conv_quad)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for applying a conv operator
+/// libCEED QFunction for applying a convection operator
 CEED_QFUNCTION(f_apply_conv)(void *ctx, CeedInt Q,
                              const CeedScalar *const *in,
                              CeedScalar *const *out)
@@ -166,7 +165,8 @@ CEED_QFUNCTION(f_apply_conv)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for applying a conv operator
+/// libCEED QFunction for applying a convection operator with a constant
+/// coefficient
 CEED_QFUNCTION(f_apply_conv_mf_const)(void *ctx, CeedInt Q,
                                       const CeedScalar *const *in,
                                       CeedScalar *const *out)
@@ -177,7 +177,7 @@ CEED_QFUNCTION(f_apply_conv_mf_const)(void *ctx, CeedInt Q,
    // in[2] is quadrature weights, size (Q)
    // out[0] has shape [ncomp=1, Q]
    //
-   // At every quadrature point, compute qw * α * c^T adj(J)^T.
+   // At every quadrature point, compute qw * α * c^T adj(J)^T
    const CeedScalar alpha  = bc->alpha;
    const CeedScalar *coeff = bc->coeff;
    const CeedScalar *ug = in[0], *J = in[1], *qw = in[2];
@@ -235,7 +235,8 @@ CEED_QFUNCTION(f_apply_conv_mf_const)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for applying a conv operator
+/// libCEED QFunction for applying a convection operator with a coefficient
+/// evaluated at quadrature points
 CEED_QFUNCTION(f_apply_conv_mf_quad)(void *ctx, CeedInt Q,
                                      const CeedScalar *const *in,
                                      CeedScalar *const *out)
@@ -247,7 +248,7 @@ CEED_QFUNCTION(f_apply_conv_mf_quad)(void *ctx, CeedInt Q,
    // in[3] is quadrature weights, size (Q)
    // out[0] has shape [ncomp=1, Q]
    //
-   // At every quadrature point, compute qw * α * c^T adj(J)^T.
+   // At every quadrature point, compute qw * α * c^T adj(J)^T
    const CeedScalar alpha  = bc->alpha;
    const CeedScalar *ug = in[0], *c = in[1], *J = in[2], *qw = in[3];
    CeedScalar *vg = out[0];
diff --git a/fem/ceed/integrators/curlcurl/curlcurl.cpp b/fem/ceed/integrators/curlcurl/curlcurl.cpp
index e813d38cfcf..5c230e86b3a 100644
--- a/fem/ceed/integrators/curlcurl/curlcurl.cpp
+++ b/fem/ceed/integrators/curlcurl/curlcurl.cpp
@@ -25,10 +25,10 @@ namespace ceed
 #ifdef MFEM_USE_CEED
 struct CurlCurlOperatorInfo : public OperatorInfo
 {
-   CurlCurlContext ctx;
+   CurlCurlContext ctx = {0};
    template <typename CoeffType>
    CurlCurlOperatorInfo(const mfem::FiniteElementSpace &fes, CoeffType *Q,
-                        bool use_bdr)
+                        bool use_bdr = false, bool use_mf = false)
    {
       MFEM_VERIFY(fes.GetVDim() == 1,
                   "libCEED interface for vector FE does not support VDim > 1!");
@@ -37,65 +37,116 @@ struct CurlCurlOperatorInfo : public OperatorInfo
                   "CurlCurlIntegrator requires dim == 2 or dim == 3!");
       ctx.space_dim = fes.GetMesh()->SpaceDimension();
       ctx.curl_dim = (ctx.dim < 3) ? 1 : ctx.dim;
+      if (!use_mf)
+      {
+         apply_func = ":f_apply_curlcurl";
+         apply_qf = &f_apply_curlcurl;
+      }
+      else
+      {
+         build_func = "";
+         build_qf = nullptr;
+      }
       if (Q == nullptr)
       {
-         ctx.coeff_comp = 1;
          ctx.coeff[0] = 1.0;
+         if (!use_mf)
+         {
+            build_func = ":f_build_curlcurl_const_scalar";
+            build_qf = &f_build_curlcurl_const_scalar;
+         }
+         else
+         {
+            apply_func = ":f_apply_curlcurl_mf_const_scalar";
+            apply_qf = &f_apply_curlcurl_mf_const_scalar;
+         }
       }
       else
       {
-         InitCoefficient(*Q);
+         InitCoefficient(*Q, use_mf);
       }
-
       header = "/integrators/curlcurl/curlcurl_qf.h";
-      build_func_const = ":f_build_curlcurl_const";
-      build_qf_const = &f_build_curlcurl_const;
-      build_func_quad = ":f_build_curlcurl_quad";
-      build_qf_quad = &f_build_curlcurl_quad;
-      apply_func = ":f_apply_curlcurl";
-      apply_qf = &f_apply_curlcurl;
-      apply_func_mf_const = ":f_apply_curlcurl_mf_const";
-      apply_qf_mf_const = &f_apply_curlcurl_mf_const;
-      apply_func_mf_quad = ":f_apply_curlcurl_mf_quad";
-      apply_qf_mf_quad = &f_apply_curlcurl_mf_quad;
       trial_op = EvalMode::Curl;
       test_op = EvalMode::Curl;
       qdatasize = (ctx.curl_dim * (ctx.curl_dim + 1)) / 2;
    }
-   void InitCoefficient(mfem::Coefficient &Q)
+   void InitCoefficient(mfem::Coefficient &Q, bool use_mf)
    {
-      ctx.coeff_comp = 1;
-      if (ConstantCoefficient *const_coeff =
-             dynamic_cast<ConstantCoefficient *>(&Q))
+      if (mfem::ConstantCoefficient *const_coeff =
+             dynamic_cast<mfem::ConstantCoefficient *>(&Q))
       {
          ctx.coeff[0] = const_coeff->constant;
+         if (!use_mf)
+         {
+            build_func = ":f_build_curlcurl_const_scalar";
+            build_qf = &f_build_curlcurl_const_scalar;
+         }
+         else
+         {
+            apply_func = ":f_apply_curlcurl_mf_const_scalar";
+            apply_qf = &f_apply_curlcurl_mf_const_scalar;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = ":f_build_curlcurl_quad_scalar";
+            build_qf = &f_build_curlcurl_quad_scalar;
+         }
+         else
+         {
+            apply_func = ":f_apply_curlcurl_mf_quad_scalar";
+            apply_qf = &f_apply_curlcurl_mf_quad_scalar;
+         }
       }
    }
-   void InitCoefficient(mfem::VectorCoefficient &VQ)
+   void InitCoefficient(mfem::VectorCoefficient &VQ, bool use_mf)
    {
-      const int vdim = VQ.GetVDim();
-      ctx.coeff_comp = vdim;
-      if (VectorConstantCoefficient *const_coeff =
-             dynamic_cast<VectorConstantCoefficient *>(&VQ))
+      if (mfem::VectorConstantCoefficient *const_coeff =
+             dynamic_cast<mfem::VectorConstantCoefficient *>(&VQ))
       {
-         MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_CURLCURL_COEFF_COMP_MAX,
+         const int vdim = VQ.GetVDim();
+         MFEM_VERIFY(vdim <= LIBCEED_CURLCURL_COEFF_COMP_MAX,
                      "VectorCoefficient dimension exceeds context storage!");
          const mfem::Vector &val = const_coeff->GetVec();
          for (int i = 0; i < vdim; i++)
          {
             ctx.coeff[i] = val[i];
          }
+         if (!use_mf)
+         {
+            build_func = ":f_build_curlcurl_const_vector";
+            build_qf = &f_build_curlcurl_const_vector;
+         }
+         else
+         {
+            apply_func = ":f_apply_curlcurl_mf_const_vector";
+            apply_qf = &f_apply_curlcurl_mf_const_vector;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = ":f_build_curlcurl_quad_vector";
+            build_qf = &f_build_curlcurl_quad_vector;
+         }
+         else
+         {
+            apply_func = ":f_apply_curlcurl_mf_quad_vector";
+            apply_qf = &f_apply_curlcurl_mf_quad_vector;
+         }
       }
    }
-   void InitCoefficient(mfem::MatrixCoefficient &MQ)
+   void InitCoefficient(mfem::MatrixCoefficient &MQ, bool use_mf)
    {
       // Assumes matrix coefficient is symmetric
-      const int vdim = MQ.GetVDim();
-      ctx.coeff_comp = (vdim * (vdim + 1)) / 2;
-      if (MatrixConstantCoefficient *const_coeff =
-             dynamic_cast<MatrixConstantCoefficient *>(&MQ))
+      if (mfem::MatrixConstantCoefficient *const_coeff =
+             dynamic_cast<mfem::MatrixConstantCoefficient *>(&MQ))
       {
-         MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_CURLCURL_COEFF_COMP_MAX,
+         const int vdim = MQ.GetVDim();
+         MFEM_VERIFY((vdim * (vdim + 1)) / 2 <= LIBCEED_CURLCURL_COEFF_COMP_MAX,
                      "MatrixCoefficient dimensions exceed context storage!");
          const mfem::DenseMatrix &val = const_coeff->GetMatrix();
          for (int j = 0; j < vdim; j++)
@@ -106,6 +157,29 @@ struct CurlCurlOperatorInfo : public OperatorInfo
                ctx.coeff[idx] = val(i, j);
             }
          }
+         if (!use_mf)
+         {
+            build_func = ":f_build_curlcurl_const_matrix";
+            build_qf = &f_build_curlcurl_const_matrix;
+         }
+         else
+         {
+            apply_func = ":f_apply_curlcurl_mf_const_matrix";
+            apply_qf = &f_apply_curlcurl_mf_const_matrix;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = ":f_build_curlcurl_quad_matrix";
+            build_qf = &f_build_curlcurl_quad_matrix;
+         }
+         else
+         {
+            apply_func = ":f_apply_curlcurl_mf_quad_matrix";
+            apply_qf = &f_apply_curlcurl_mf_quad_matrix;
+         }
       }
    }
 };
@@ -134,7 +208,7 @@ MFCurlCurlIntegrator::MFCurlCurlIntegrator(
    const bool use_bdr)
 {
 #ifdef MFEM_USE_CEED
-   CurlCurlOperatorInfo info(fes, Q, use_bdr);
+   CurlCurlOperatorInfo info(fes, Q, use_bdr, true);
    Assemble(integ, info, fes, Q, use_bdr, true);
 #else
    MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
diff --git a/fem/ceed/integrators/curlcurl/curlcurl_qf.h b/fem/ceed/integrators/curlcurl/curlcurl_qf.h
index 4f8e0afbbad..6fbace54bbb 100644
--- a/fem/ceed/integrators/curlcurl/curlcurl_qf.h
+++ b/fem/ceed/integrators/curlcurl/curlcurl_qf.h
@@ -16,114 +16,194 @@
 
 #define LIBCEED_CURLCURL_COEFF_COMP_MAX 6
 
-/// A structure used to pass additional data to f_build_curlcurl and
-/// f_apply_curlcurl
 struct CurlCurlContext
 {
-   CeedInt dim, space_dim, curl_dim, coeff_comp;
+   CeedInt dim, space_dim, curl_dim;
    CeedScalar coeff[LIBCEED_CURLCURL_COEFF_COMP_MAX];
 };
 
-/// libCEED QFunction for building quadrature data for a curl-curl
-/// operator with a constant coefficient
-CEED_QFUNCTION(f_build_curlcurl_const)(void *ctx, CeedInt Q,
-                                       const CeedScalar *const *in,
-                                       CeedScalar *const *out)
+/// libCEED QFunction for building quadrature data for a curl-curl operator
+/// with a scalar constant coefficient
+CEED_QFUNCTION(f_build_curlcurl_const_scalar)(void *ctx, CeedInt Q,
+                                              const CeedScalar *const *in,
+                                              CeedScalar *const *out)
 {
    CurlCurlContext *bc = (CurlCurlContext *)ctx;
    // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[1] is quadrature weights, size (Q)
    //
    // At every quadrature point, compute qw/det(J) J^T C J and store the
-   // symmetric part of the result. In 2D, compute and store qw * c / det(J).
+   // symmetric part of the result. In 2D, compute and store qw * c / det(J)
    const CeedScalar *coeff = bc->coeff;
    const CeedScalar *J = in[0], *qw = in[1];
    CeedScalar *qd = out[0];
-   switch (1000 * bc->space_dim + 100 * bc->dim + 10 * bc->curl_dim +
-           bc->coeff_comp)
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
    {
-      case 2211:
+      case 221:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             const CeedScalar coeff0 = coeff[0];
             qd[i] = qw[i] * coeff0 / DetJ22(J + i, Q);
          }
          break;
-      case 3211:
+      case 321:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             const CeedScalar coeff0 = coeff[0];
             qd[i] = qw[i] * coeff0 / DetJ32(J + i, Q);
          }
          break;
-      case 3336:
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i);
+            MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
          }
          break;
-      case 3333:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for building quadrature data for a curl-curl operator
+/// with a vector constant coefficient
+CEED_QFUNCTION(f_build_curlcurl_const_vector)(void *ctx, CeedInt Q,
+                                              const CeedScalar *const *in,
+                                              CeedScalar *const *out)
+{
+   CurlCurlContext *bc = (CurlCurlContext *)ctx;
+   // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[1] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) J^T C J and store the
+   // symmetric part of the result. In 2D, compute and store qw * c / det(J)
+   const CeedScalar *coeff = bc->coeff;
+   const CeedScalar *J = in[0], *qw = in[1];
+   CeedScalar *qd = out[0];
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
+   {
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             MultJtCJ33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
          }
          break;
-      case 3331:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for building quadrature data for a curl-curl operator
+/// with a matrix constant coefficient
+CEED_QFUNCTION(f_build_curlcurl_const_matrix)(void *ctx, CeedInt Q,
+                                              const CeedScalar *const *in,
+                                              CeedScalar *const *out)
+{
+   CurlCurlContext *bc = (CurlCurlContext *)ctx;
+   // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[1] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) J^T C J and store the
+   // symmetric part of the result. In 2D, compute and store qw * c / det(J)
+   const CeedScalar *coeff = bc->coeff;
+   const CeedScalar *J = in[0], *qw = in[1];
+   CeedScalar *qd = out[0];
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
+   {
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
+            MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i);
          }
          break;
    }
    return 0;
 }
 
-/// libCEED QFunction for building quadrature data for a curl-curl
-/// operator with a coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_curlcurl_quad)(void *ctx, CeedInt Q,
-                                      const CeedScalar *const *in,
-                                      CeedScalar *const *out)
+/// libCEED QFunction for building quadrature data for a curl-curl operator
+/// with a scalar coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_build_curlcurl_quad_scalar)(void *ctx, CeedInt Q,
+                                             const CeedScalar *const *in,
+                                             CeedScalar *const *out)
 {
    CurlCurlContext *bc = (CurlCurlContext *)ctx;
-   // in[0] is coefficients with shape [ncomp=coeff_comp, Q]
+   // in[0] is coefficients with shape [ncomp=1, Q]
    // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[2] is quadrature weights, size (Q)
    //
    // At every quadrature point, compute qw/det(J) J^T C J and store the
-   // symmetric part of the result. In 2D, compute and store qw * c / det(J).
+   // symmetric part of the result. In 2D, compute and store qw * c / det(J)
    const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
    CeedScalar *qd = out[0];
-   switch (1000 * bc->space_dim + 100 * bc->dim + 10 * bc->curl_dim +
-           bc->coeff_comp)
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
    {
-      case 2211:
+      case 221:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             qd[i] = qw[i] * c[i] / DetJ22(J + i, Q);
          }
          break;
-      case 3211:
+      case 321:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             qd[i] = qw[i] * c[i] / DetJ32(J + i, Q);
          }
          break;
-      case 3336:
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
+            MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
          }
          break;
-      case 3333:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for building quadrature data for a curl-curl operator
+/// with a vector coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_build_curlcurl_quad_vector)(void *ctx, CeedInt Q,
+                                             const CeedScalar *const *in,
+                                             CeedScalar *const *out)
+{
+   CurlCurlContext *bc = (CurlCurlContext *)ctx;
+   // in[0] is coefficients with shape [ncomp=space_dim, Q]
+   // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[2] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) J^T C J and store the
+   // symmetric part of the result. In 2D, compute and store qw * c / det(J)
+   const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
+   CeedScalar *qd = out[0];
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
+   {
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             MultJtCJ33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
          }
          break;
-      case 3331:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for building quadrature data for a curl-curl operator
+/// with a matrix coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_build_curlcurl_quad_matrix)(void *ctx, CeedInt Q,
+                                             const CeedScalar *const *in,
+                                             CeedScalar *const *out)
+{
+   CurlCurlContext *bc = (CurlCurlContext *)ctx;
+   // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
+   // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[2] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) J^T C J and store the
+   // symmetric part of the result. In 2D, compute and store qw * c / det(J)
+   const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
+   CeedScalar *qd = out[0];
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
+   {
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
+            MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
          }
          break;
    }
@@ -162,24 +242,24 @@ CEED_QFUNCTION(f_apply_curlcurl)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for applying a curl-curl operator
-CEED_QFUNCTION(f_apply_curlcurl_mf_const)(void *ctx, CeedInt Q,
-                                          const CeedScalar *const *in,
-                                          CeedScalar *const *out)
+/// libCEED QFunction for applying a curl-curl operator with a scalar constant
+/// coefficient
+CEED_QFUNCTION(f_apply_curlcurl_mf_const_scalar)(void *ctx, CeedInt Q,
+                                                 const CeedScalar *const *in,
+                                                 CeedScalar *const *out)
 {
    CurlCurlContext *bc = (CurlCurlContext *)ctx;
    // in[0], out[0] have shape [curl_dim, ncomp=1, Q]
    // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[2] is quadrature weights, size (Q)
    //
-   // At every quadrature point, compute qw/det(J) J^T C J.
+   // At every quadrature point, compute qw/det(J) J^T C J
    const CeedScalar *coeff = bc->coeff;
    const CeedScalar *uc = in[0], *J = in[1], *qw = in[2];
    CeedScalar *vc = out[0];
-   switch (1000 * bc->space_dim + 100 * bc->dim + 10 * bc->curl_dim +
-           bc->coeff_comp)
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
    {
-      case 2211:
+      case 221:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             const CeedScalar coeff0 = coeff[0];
@@ -187,7 +267,7 @@ CEED_QFUNCTION(f_apply_curlcurl_mf_const)(void *ctx, CeedInt Q,
             vc[i] = qd * uc[i];
          }
          break;
-      case 3211:
+      case 321:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             const CeedScalar coeff0 = coeff[0];
@@ -195,11 +275,11 @@ CEED_QFUNCTION(f_apply_curlcurl_mf_const)(void *ctx, CeedInt Q,
             vc[i] = qd * uc[i];
          }
          break;
-      case 3336:
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
+            MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
             const CeedScalar uc0 = uc[i + Q * 0];
             const CeedScalar uc1 = uc[i + Q * 1];
             const CeedScalar uc2 = uc[i + Q * 2];
@@ -208,7 +288,28 @@ CEED_QFUNCTION(f_apply_curlcurl_mf_const)(void *ctx, CeedInt Q,
             vc[i + Q * 2] = qd[2] * uc0 + qd[4] * uc1 + qd[5] * uc2;
          }
          break;
-      case 3333:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for applying a curl-curl operator with a vector constant
+/// coefficient
+CEED_QFUNCTION(f_apply_curlcurl_mf_const_vector)(void *ctx, CeedInt Q,
+                                                 const CeedScalar *const *in,
+                                                 CeedScalar *const *out)
+{
+   CurlCurlContext *bc = (CurlCurlContext *)ctx;
+   // in[0], out[0] have shape [curl_dim, ncomp=1, Q]
+   // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[2] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) J^T C J
+   const CeedScalar *coeff = bc->coeff;
+   const CeedScalar *uc = in[0], *J = in[1], *qw = in[2];
+   CeedScalar *vc = out[0];
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
+   {
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
@@ -221,11 +322,32 @@ CEED_QFUNCTION(f_apply_curlcurl_mf_const)(void *ctx, CeedInt Q,
             vc[i + Q * 2] = qd[2] * uc0 + qd[4] * uc1 + qd[5] * uc2;
          }
          break;
-      case 3331:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for applying a curl-curl operator with a matrix constant
+/// coefficient
+CEED_QFUNCTION(f_apply_curlcurl_mf_const_matrix)(void *ctx, CeedInt Q,
+                                                 const CeedScalar *const *in,
+                                                 CeedScalar *const *out)
+{
+   CurlCurlContext *bc = (CurlCurlContext *)ctx;
+   // in[0], out[0] have shape [curl_dim, ncomp=1, Q]
+   // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[2] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) J^T C J
+   const CeedScalar *coeff = bc->coeff;
+   const CeedScalar *uc = in[0], *J = in[1], *qw = in[2];
+   CeedScalar *vc = out[0];
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
+   {
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
+            MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
             const CeedScalar uc0 = uc[i + Q * 0];
             const CeedScalar uc1 = uc[i + Q * 1];
             const CeedScalar uc2 = uc[i + Q * 2];
@@ -238,42 +360,42 @@ CEED_QFUNCTION(f_apply_curlcurl_mf_const)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for applying a curl-curl operator
-CEED_QFUNCTION(f_apply_curlcurl_mf_quad)(void *ctx, CeedInt Q,
-                                         const CeedScalar *const *in,
-                                         CeedScalar *const *out)
+/// libCEED QFunction for applying a curl-curl operator with a scalar
+/// coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_apply_curlcurl_mf_quad_scalar)(void *ctx, CeedInt Q,
+                                                const CeedScalar *const *in,
+                                                CeedScalar *const *out)
 {
    CurlCurlContext *bc = (CurlCurlContext *)ctx;
    // in[0], out[0] have shape [curl_dim, ncomp=1, Q]
-   // in[1] is coefficients with shape [ncomp=coeff_comp, Q]
+   // in[1] is coefficients with shape [ncomp=1, Q]
    // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[3] is quadrature weights, size (Q)
    //
-   // At every quadrature point, compute qw/det(J) J^T C J.
+   // At every quadrature point, compute qw/det(J) J^T C J
    const CeedScalar *uc = in[0], *c = in[1], *J = in[2], *qw = in[3];
    CeedScalar *vc = out[0];
-   switch (1000 * bc->space_dim + 100 * bc->dim + 10 * bc->curl_dim +
-           bc->coeff_comp)
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
    {
-      case 2211:
+      case 221:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             const CeedScalar qd = qw[i] * c[i] / DetJ22(J + i, Q);
             vc[i] = qd * uc[i];
          }
          break;
-      case 3211:
+      case 321:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             const CeedScalar qd = qw[i] * c[i] / DetJ32(J + i, Q);
             vc[i] = qd * uc[i];
          }
          break;
-      case 3336:
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
+            MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
             const CeedScalar uc0 = uc[i + Q * 0];
             const CeedScalar uc1 = uc[i + Q * 1];
             const CeedScalar uc2 = uc[i + Q * 2];
@@ -282,7 +404,28 @@ CEED_QFUNCTION(f_apply_curlcurl_mf_quad)(void *ctx, CeedInt Q,
             vc[i + Q * 2] = qd[2] * uc0 + qd[4] * uc1 + qd[5] * uc2;
          }
          break;
-      case 3333:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for applying a curl-curl operator with a vector
+/// coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_apply_curlcurl_mf_quad_vector)(void *ctx, CeedInt Q,
+                                                const CeedScalar *const *in,
+                                                CeedScalar *const *out)
+{
+   CurlCurlContext *bc = (CurlCurlContext *)ctx;
+   // in[0], out[0] have shape [curl_dim, ncomp=1, Q]
+   // in[1] is coefficients with shape [ncomp=space_dim, Q]
+   // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[3] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) J^T C J
+   const CeedScalar *uc = in[0], *c = in[1], *J = in[2], *qw = in[3];
+   CeedScalar *vc = out[0];
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
+   {
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
@@ -295,11 +438,32 @@ CEED_QFUNCTION(f_apply_curlcurl_mf_quad)(void *ctx, CeedInt Q,
             vc[i + Q * 2] = qd[2] * uc0 + qd[4] * uc1 + qd[5] * uc2;
          }
          break;
-      case 3331:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for applying a curl-curl operator with a matrix
+/// coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_apply_curlcurl_mf_quad_matrix)(void *ctx, CeedInt Q,
+                                                const CeedScalar *const *in,
+                                                CeedScalar *const *out)
+{
+   CurlCurlContext *bc = (CurlCurlContext *)ctx;
+   // in[0], out[0] have shape [curl_dim, ncomp=1, Q]
+   // in[1] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
+   // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[3] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) J^T C J
+   const CeedScalar *uc = in[0], *c = in[1], *J = in[2], *qw = in[3];
+   CeedScalar *vc = out[0];
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
+   {
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
+            MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
             const CeedScalar uc0 = uc[i + Q * 0];
             const CeedScalar uc1 = uc[i + Q * 1];
             const CeedScalar uc2 = uc[i + Q * 2];
diff --git a/fem/ceed/integrators/diffusion/diffusion.cpp b/fem/ceed/integrators/diffusion/diffusion.cpp
index b9c74bc64a9..d2f56db828f 100644
--- a/fem/ceed/integrators/diffusion/diffusion.cpp
+++ b/fem/ceed/integrators/diffusion/diffusion.cpp
@@ -25,73 +25,124 @@ namespace ceed
 #ifdef MFEM_USE_CEED
 struct DiffusionOperatorInfo : public OperatorInfo
 {
-   DiffusionContext ctx;
+   DiffusionContext ctx = {0};
    template <typename CoeffType>
    DiffusionOperatorInfo(const mfem::FiniteElementSpace &fes, CoeffType *Q,
-                         bool use_bdr)
+                         bool use_bdr = false, bool use_mf = false)
    {
       ctx.dim = fes.GetMesh()->Dimension() - use_bdr;
       ctx.space_dim = fes.GetMesh()->SpaceDimension();
       ctx.vdim = fes.GetVDim();
+      if (!use_mf)
+      {
+         apply_func = ":f_apply_diff";
+         apply_qf = &f_apply_diff;
+      }
+      else
+      {
+         build_func = "";
+         build_qf = nullptr;
+      }
       if (Q == nullptr)
       {
-         ctx.coeff_comp = 1;
          ctx.coeff[0] = 1.0;
+         if (!use_mf)
+         {
+            build_func = ":f_build_diff_const_scalar";
+            build_qf = &f_build_diff_const_scalar;
+         }
+         else
+         {
+            apply_func = ":f_apply_diff_mf_const_scalar";
+            apply_qf = &f_apply_diff_mf_const_scalar;
+         }
       }
       else
       {
-         InitCoefficient(*Q);
+         InitCoefficient(*Q, use_mf);
       }
-
       header = "/integrators/diffusion/diffusion_qf.h";
-      build_func_const = ":f_build_diff_const";
-      build_qf_const = &f_build_diff_const;
-      build_func_quad = ":f_build_diff_quad";
-      build_qf_quad = &f_build_diff_quad;
-      apply_func = ":f_apply_diff";
-      apply_qf = &f_apply_diff;
-      apply_func_mf_const = ":f_apply_diff_mf_const";
-      apply_qf_mf_const = &f_apply_diff_mf_const;
-      apply_func_mf_quad = ":f_apply_diff_mf_quad";
-      apply_qf_mf_quad = &f_apply_diff_mf_quad;
       trial_op = EvalMode::Grad;
       test_op = EvalMode::Grad;
       qdatasize = (ctx.dim * (ctx.dim + 1)) / 2;
    }
-   void InitCoefficient(mfem::Coefficient &Q)
+   void InitCoefficient(mfem::Coefficient &Q, bool use_mf)
    {
-      ctx.coeff_comp = 1;
-      if (ConstantCoefficient *const_coeff =
-             dynamic_cast<ConstantCoefficient *>(&Q))
+      if (mfem::ConstantCoefficient *const_coeff =
+             dynamic_cast<mfem::ConstantCoefficient *>(&Q))
       {
          ctx.coeff[0] = const_coeff->constant;
+         if (!use_mf)
+         {
+            build_func = ":f_build_diff_const_scalar";
+            build_qf = &f_build_diff_const_scalar;
+         }
+         else
+         {
+            apply_func = ":f_apply_diff_mf_const_scalar";
+            apply_qf = &f_apply_diff_mf_const_scalar;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = ":f_build_diff_quad_scalar";
+            build_qf = &f_build_diff_quad_scalar;
+         }
+         else
+         {
+            apply_func = ":f_apply_diff_mf_quad_scalar";
+            apply_qf = &f_apply_diff_mf_quad_scalar;
+         }
       }
    }
-   void InitCoefficient(mfem::VectorCoefficient &VQ)
+   void InitCoefficient(mfem::VectorCoefficient &VQ, bool use_mf)
    {
-      const int vdim = VQ.GetVDim();
-      ctx.coeff_comp = vdim;
-      if (VectorConstantCoefficient *const_coeff =
-             dynamic_cast<VectorConstantCoefficient *>(&VQ))
+      if (mfem::VectorConstantCoefficient *const_coeff =
+             dynamic_cast<mfem::VectorConstantCoefficient *>(&VQ))
       {
-         MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_DIFF_COEFF_COMP_MAX,
+         const int vdim = VQ.GetVDim();
+         MFEM_VERIFY(vdim <= LIBCEED_DIFF_COEFF_COMP_MAX,
                      "VectorCoefficient dimension exceeds context storage!");
          const mfem::Vector &val = const_coeff->GetVec();
          for (int i = 0; i < vdim; i++)
          {
             ctx.coeff[i] = val[i];
          }
+         if (!use_mf)
+         {
+            build_func = ":f_build_diff_const_vector";
+            build_qf = &f_build_diff_const_vector;
+         }
+         else
+         {
+            apply_func = ":f_apply_diff_mf_const_vector";
+            apply_qf = &f_apply_diff_mf_const_vector;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = ":f_build_diff_quad_vector";
+            build_qf = &f_build_diff_quad_vector;
+         }
+         else
+         {
+            apply_func = ":f_apply_diff_mf_quad_vector";
+            apply_qf = &f_apply_diff_mf_quad_vector;
+         }
       }
    }
-   void InitCoefficient(mfem::MatrixCoefficient &MQ)
+   void InitCoefficient(mfem::MatrixCoefficient &MQ, bool use_mf)
    {
       // Assumes matrix coefficient is symmetric
-      const int vdim = MQ.GetVDim();
-      ctx.coeff_comp = (vdim * (vdim + 1)) / 2;
-      if (MatrixConstantCoefficient *const_coeff =
-             dynamic_cast<MatrixConstantCoefficient *>(&MQ))
+      if (mfem::MatrixConstantCoefficient *const_coeff =
+             dynamic_cast<mfem::MatrixConstantCoefficient *>(&MQ))
       {
-         MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_DIFF_COEFF_COMP_MAX,
+         const int vdim = MQ.GetVDim();
+         MFEM_VERIFY((vdim * (vdim + 1)) / 2 <= LIBCEED_DIFF_COEFF_COMP_MAX,
                      "MatrixCoefficient dimensions exceed context storage!");
          const mfem::DenseMatrix &val = const_coeff->GetMatrix();
          for (int j = 0; j < vdim; j++)
@@ -102,6 +153,29 @@ struct DiffusionOperatorInfo : public OperatorInfo
                ctx.coeff[idx] = val(i, j);
             }
          }
+         if (!use_mf)
+         {
+            build_func = ":f_build_diff_const_matrix";
+            build_qf = &f_build_diff_const_matrix;
+         }
+         else
+         {
+            apply_func = ":f_apply_diff_mf_const_matrix";
+            apply_qf = &f_apply_diff_mf_const_matrix;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = ":f_build_diff_quad_matrix";
+            build_qf = &f_build_diff_quad_matrix;
+         }
+         else
+         {
+            apply_func = ":f_apply_diff_mf_quad_matrix";
+            apply_qf = &f_apply_diff_mf_quad_matrix;
+         }
       }
    }
 };
@@ -145,7 +219,7 @@ MFDiffusionIntegrator::MFDiffusionIntegrator(
    const bool use_bdr)
 {
 #ifdef MFEM_USE_CEED
-   DiffusionOperatorInfo info(fes, Q, use_bdr);
+   DiffusionOperatorInfo info(fes, Q, use_bdr, true);
    Assemble(integ, info, fes, Q, use_bdr, true);
 #else
    MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
@@ -160,7 +234,7 @@ MFDiffusionIntegrator::MFDiffusionIntegrator(
    const bool use_bdr)
 {
 #ifdef MFEM_USE_CEED
-   DiffusionOperatorInfo info(fes, Q, use_bdr);
+   DiffusionOperatorInfo info(fes, Q, use_bdr, true);
    Assemble(integ, info, fes, Q, use_bdr, true);
 #else
    MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
diff --git a/fem/ceed/integrators/diffusion/diffusion_qf.h b/fem/ceed/integrators/diffusion/diffusion_qf.h
index 779cdacd040..9ab50a3ed78 100644
--- a/fem/ceed/integrators/diffusion/diffusion_qf.h
+++ b/fem/ceed/integrators/diffusion/diffusion_qf.h
@@ -16,107 +16,148 @@
 
 #define LIBCEED_DIFF_COEFF_COMP_MAX 6
 
-/// A structure used to pass additional data to f_build_diff and f_apply_diff
 struct DiffusionContext
 {
-   CeedInt dim, space_dim, vdim, coeff_comp;
+   CeedInt dim, space_dim, vdim;
    CeedScalar coeff[LIBCEED_DIFF_COEFF_COMP_MAX];
 };
 
 /// libCEED QFunction for building quadrature data for a diffusion operator
-/// with a constant coefficient
-CEED_QFUNCTION(f_build_diff_const)(void *ctx, CeedInt Q,
-                                   const CeedScalar *const *in,
-                                   CeedScalar *const *out)
+/// with a scalar constant coefficient
+CEED_QFUNCTION(f_build_diff_const_scalar)(void *ctx, CeedInt Q,
+                                          const CeedScalar *const *in,
+                                          CeedScalar *const *out)
 {
    DiffusionContext *bc = (DiffusionContext *)ctx;
    // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[1] is quadrature weights, size (Q)
    //
    // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store
-   // the symmetric part of the result.
+   // the symmetric part of the result
    const CeedScalar *coeff = bc->coeff;
    const CeedScalar *J = in[0], *qw = in[1];
    CeedScalar *qd = out[0];
-   switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp)
+   switch (10 * bc->space_dim + bc->dim)
    {
-      case 111:
+      case 11:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             const CeedScalar coeff0 = coeff[0];
             qd[i] = qw[i] * coeff0 / J[i];
          }
          break;
-      case 213:
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
+            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
          }
          break;
-      case 212:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i);
+            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
          }
          break;
-      case 211:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
+            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
          }
          break;
-      case 223:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
+            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
          }
          break;
-      case 222:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for building quadrature data for a diffusion operator
+/// with a vector constant coefficient
+CEED_QFUNCTION(f_build_diff_const_vector)(void *ctx, CeedInt Q,
+                                          const CeedScalar *const *in,
+                                          CeedScalar *const *out)
+{
+   DiffusionContext *bc = (DiffusionContext *)ctx;
+   // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[1] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store
+   // the symmetric part of the result
+   const CeedScalar *coeff = bc->coeff;
+   const CeedScalar *J = in[0], *qw = in[1];
+   CeedScalar *qd = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i);
+            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i);
          }
          break;
-      case 221:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
+            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i);
          }
          break;
-      case 326:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i);
+            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
          }
          break;
-      case 323:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
+            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
          }
          break;
-      case 321:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for building quadrature data for a diffusion operator
+/// with a matrix constant coefficient
+CEED_QFUNCTION(f_build_diff_const_matrix)(void *ctx, CeedInt Q,
+                                          const CeedScalar *const *in,
+                                          CeedScalar *const *out)
+{
+   DiffusionContext *bc = (DiffusionContext *)ctx;
+   // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[1] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store
+   // the symmetric part of the result
+   const CeedScalar *coeff = bc->coeff;
+   const CeedScalar *J = in[0], *qw = in[1];
+   CeedScalar *qd = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
+            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
          }
          break;
-      case 336:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i);
+            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
          }
          break;
-      case 333:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
+            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i);
          }
          break;
-      case 331:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
+            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i);
          }
          break;
    }
@@ -124,105 +165,147 @@ CEED_QFUNCTION(f_build_diff_const)(void *ctx, CeedInt Q,
 }
 
 /// libCEED QFunction for building quadrature data for a diffusion operator
-/// with a coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_diff_quad)(void *ctx, CeedInt Q,
-                                  const CeedScalar *const *in,
-                                  CeedScalar *const *out)
+/// with a scalar coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_build_diff_quad_scalar)(void *ctx, CeedInt Q,
+                                         const CeedScalar *const *in,
+                                         CeedScalar *const *out)
 {
    DiffusionContext *bc = (DiffusionContext *)ctx;
-   // in[0] is coefficients with shape [ncomp=coeff_comp, Q]
+   // in[0] is coefficients with shape [ncomp=1, Q]
    // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[2] is quadrature weights, size (Q)
    //
    // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store
-   // the symmetric part of the result.
+   // the symmetric part of the result
    const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
    CeedScalar *qd = out[0];
-   switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp)
+   switch (10 * bc->space_dim + bc->dim)
    {
-      case 111:
+      case 11:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             qd[i] = qw[i] * c[i] / J[i];
          }
          break;
-      case 213:
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
+            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
          }
          break;
-      case 212:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
+            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
          }
          break;
-      case 211:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
+            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
          }
          break;
-      case 223:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
+            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
          }
          break;
-      case 222:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for building quadrature data for a diffusion operator
+/// with a vector coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_build_diff_quad_vector)(void *ctx, CeedInt Q,
+                                         const CeedScalar *const *in,
+                                         CeedScalar *const *out)
+{
+   DiffusionContext *bc = (DiffusionContext *)ctx;
+   // in[0] is coefficients with shape [ncomp=space_dim, Q]
+   // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[2] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store
+   // the symmetric part of the result
+   const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
+   CeedScalar *qd = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
+            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
          }
          break;
-      case 221:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
+            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
          }
          break;
-      case 326:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
+            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
          }
          break;
-      case 323:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
+            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
          }
          break;
-      case 321:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for building quadrature data for a diffusion operator
+/// with a matrix coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_build_diff_quad_matrix)(void *ctx, CeedInt Q,
+                                         const CeedScalar *const *in,
+                                         CeedScalar *const *out)
+{
+   DiffusionContext *bc = (DiffusionContext *)ctx;
+   // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
+   // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[2] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store
+   // the symmetric part of the result
+   const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
+   CeedScalar *qd = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
+            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
          }
          break;
-      case 336:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
+            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
          }
          break;
-      case 333:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
+            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
          }
          break;
-      case 331:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
+            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
          }
          break;
    }
    return 0;
 }
 
-/// libCEED QFunction for applying a diff operator
+/// libCEED QFunction for applying a diffusion operator
 CEED_QFUNCTION(f_apply_diff)(void *ctx, CeedInt Q,
                              const CeedScalar *const *in,
                              CeedScalar *const *out)
@@ -328,23 +411,24 @@ CEED_QFUNCTION(f_apply_diff)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for applying a diff operator
-CEED_QFUNCTION(f_apply_diff_mf_const)(void *ctx, CeedInt Q,
-                                      const CeedScalar *const *in,
-                                      CeedScalar *const *out)
+/// libCEED QFunction for applying a diffusion operator with a scalar constant
+/// coefficient
+CEED_QFUNCTION(f_apply_diff_mf_const_scalar)(void *ctx, CeedInt Q,
+                                             const CeedScalar *const *in,
+                                             CeedScalar *const *out)
 {
    DiffusionContext *bc = (DiffusionContext *)ctx;
    // in[0], out[0] have shape [dim, ncomp=vdim, Q]
    // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[2] is quadrature weights, size (Q)
    //
-   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T.
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T
    const CeedScalar *coeff = bc->coeff;
    const CeedScalar *ug = in[0], *J = in[1], *qw = in[2];
    CeedScalar *vg = out[0];
-   switch (1000 * bc->space_dim + 100 * bc->dim + 10 * bc->coeff_comp + bc->vdim)
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim)
    {
-      case 1111:
+      case 111:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             const CeedScalar coeff0 = coeff[0];
@@ -352,89 +436,145 @@ CEED_QFUNCTION(f_apply_diff_mf_const)(void *ctx, CeedInt Q,
             vg[i] = qd * ug[i];
          }
          break;
-      case 2131:
+      case 211:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd;
-            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd);
+            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], 1, &qd);
             vg[i] = qd * ug[i];
          }
          break;
-      case 2132:
+      case 212:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd;
-            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd);
+            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], 1, &qd);
             CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
             {
                vg[i + Q * d] = qd * ug[i + Q * d];
             }
          }
          break;
-      case 2121:
+      case 221:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd;
-            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd);
-            vg[i] = qd * ug[i];
+            CeedScalar qd[3];
+            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
+            const CeedScalar ug0 = ug[i + Q * 0];
+            const CeedScalar ug1 = ug[i + Q * 1];
+            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1;
+            vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
          }
          break;
-      case 2122:
+      case 222:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd;
-            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd);
+            CeedScalar qd[3];
+            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
             CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
             {
-               vg[i + Q * d] = qd * ug[i + Q * d];
+               const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)];
+               const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)];
+               vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1;
+               vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1;
             }
          }
          break;
-      case 2111:
+      case 321:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd;
-            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], 1, &qd);
-            vg[i] = qd * ug[i];
+            CeedScalar qd[3];
+            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
+            const CeedScalar ug0 = ug[i + Q * 0];
+            const CeedScalar ug1 = ug[i + Q * 1];
+            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1;
+            vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
          }
          break;
-      case 2112:
+      case 323:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd;
-            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], 1, &qd);
-            CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
+            CeedScalar qd[3];
+            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
+            CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
             {
-               vg[i + Q * d] = qd * ug[i + Q * d];
+               const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)];
+               const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)];
+               vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1;
+               vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1;
             }
          }
          break;
-      case 2231:
+      case 331:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
+            CeedScalar qd[6];
+            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
             const CeedScalar ug0 = ug[i + Q * 0];
             const CeedScalar ug1 = ug[i + Q * 1];
-            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1;
-            vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
+            const CeedScalar ug2 = ug[i + Q * 2];
+            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2;
+            vg[i + Q * 1] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2;
+            vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2;
          }
          break;
-      case 2232:
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
+            CeedScalar qd[6];
+            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
+            CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
+            {
+               const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)];
+               const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)];
+               const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)];
+               vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2;
+               vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2;
+               vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2;
+            }
+         }
+         break;
+   }
+   return 0;
+}
+
+/// libCEED QFunction for applying a diffusion operator with a vector constant
+/// coefficient
+CEED_QFUNCTION(f_apply_diff_mf_const_vector)(void *ctx, CeedInt Q,
+                                             const CeedScalar *const *in,
+                                             CeedScalar *const *out)
+{
+   DiffusionContext *bc = (DiffusionContext *)ctx;
+   // in[0], out[0] have shape [dim, ncomp=vdim, Q]
+   // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[2] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T
+   const CeedScalar *coeff = bc->coeff;
+   const CeedScalar *ug = in[0], *J = in[1], *qw = in[2];
+   CeedScalar *vg = out[0];
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim)
+   {
+      case 211:
+         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+         {
+            CeedScalar qd;
+            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd);
+            vg[i] = qd * ug[i];
+         }
+         break;
+      case 212:
+         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+         {
+            CeedScalar qd;
+            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd);
             CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
             {
-               const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)];
-               const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)];
-               vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1;
-               vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1;
+               vg[i + Q * d] = qd * ug[i + Q * d];
             }
          }
          break;
-      case 2221:
+      case 221:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
@@ -445,7 +585,7 @@ CEED_QFUNCTION(f_apply_diff_mf_const)(void *ctx, CeedInt Q,
             vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
          }
          break;
-      case 2222:
+      case 222:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
@@ -459,47 +599,22 @@ CEED_QFUNCTION(f_apply_diff_mf_const)(void *ctx, CeedInt Q,
             }
          }
          break;
-      case 2211:
+      case 321:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
-            const CeedScalar ug0 = ug[i + Q * 0];
-            const CeedScalar ug1 = ug[i + Q * 1];
-            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1;
-            vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
-         }
-         break;
-      case 2212:
-         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-         {
-            CeedScalar qd[3];
-            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
-            CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
-            {
-               const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)];
-               const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)];
-               vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1;
-               vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1;
-            }
-         }
-         break;
-      case 3261:
-         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-         {
-            CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
+            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
             const CeedScalar ug0 = ug[i + Q * 0];
             const CeedScalar ug1 = ug[i + Q * 1];
             vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1;
             vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
          }
          break;
-      case 3263:
+      case 323:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
+            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
             CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
             {
                const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)];
@@ -509,119 +624,130 @@ CEED_QFUNCTION(f_apply_diff_mf_const)(void *ctx, CeedInt Q,
             }
          }
          break;
-      case 3231:
+      case 331:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
+            CeedScalar qd[6];
+            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
             const CeedScalar ug0 = ug[i + Q * 0];
             const CeedScalar ug1 = ug[i + Q * 1];
-            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1;
-            vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
+            const CeedScalar ug2 = ug[i + Q * 2];
+            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2;
+            vg[i + Q * 1] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2;
+            vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2;
          }
          break;
-      case 3233:
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
+            CeedScalar qd[6];
+            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
             CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
             {
                const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)];
                const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)];
-               vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1;
-               vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1;
+               const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)];
+               vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2;
+               vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2;
+               vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2;
             }
          }
          break;
-      case 3211:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for applying a diffusion operator with a matrix constant
+/// coefficient
+CEED_QFUNCTION(f_apply_diff_mf_const_matrix)(void *ctx, CeedInt Q,
+                                             const CeedScalar *const *in,
+                                             CeedScalar *const *out)
+{
+   DiffusionContext *bc = (DiffusionContext *)ctx;
+   // in[0], out[0] have shape [dim, ncomp=vdim, Q]
+   // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[2] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T
+   const CeedScalar *coeff = bc->coeff;
+   const CeedScalar *ug = in[0], *J = in[1], *qw = in[2];
+   CeedScalar *vg = out[0];
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim)
+   {
+      case 211:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
-            const CeedScalar ug0 = ug[i + Q * 0];
-            const CeedScalar ug1 = ug[i + Q * 1];
-            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1;
-            vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
+            CeedScalar qd;
+            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd);
+            vg[i] = qd * ug[i];
          }
          break;
-      case 3213:
+      case 212:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
-            CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
+            CeedScalar qd;
+            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd);
+            CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
             {
-               const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)];
-               const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)];
-               vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1;
-               vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1;
+               vg[i + Q * d] = qd * ug[i + Q * d];
             }
          }
          break;
-      case 3361:
+      case 221:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
+            CeedScalar qd[3];
+            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
             const CeedScalar ug0 = ug[i + Q * 0];
             const CeedScalar ug1 = ug[i + Q * 1];
-            const CeedScalar ug2 = ug[i + Q * 2];
-            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2;
-            vg[i + Q * 1] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2;
-            vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2;
+            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1;
+            vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
          }
          break;
-      case 3363:
+      case 222:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
-            CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
+            CeedScalar qd[3];
+            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
+            CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
             {
-               const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)];
-               const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)];
-               const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)];
-               vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2;
-               vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2;
-               vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2;
+               const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)];
+               const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)];
+               vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1;
+               vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1;
             }
          }
          break;
-      case 3331:
+      case 321:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
+            CeedScalar qd[3];
+            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
             const CeedScalar ug0 = ug[i + Q * 0];
             const CeedScalar ug1 = ug[i + Q * 1];
-            const CeedScalar ug2 = ug[i + Q * 2];
-            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2;
-            vg[i + Q * 1] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2;
-            vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2;
+            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1;
+            vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
          }
          break;
-      case 3333:
+      case 323:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
+            CeedScalar qd[3];
+            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
             CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
             {
                const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)];
                const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)];
-               const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)];
-               vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2;
-               vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2;
-               vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2;
+               vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1;
+               vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1;
             }
          }
          break;
-      case 3311:
+      case 331:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
+            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
             const CeedScalar ug0 = ug[i + Q * 0];
             const CeedScalar ug1 = ug[i + Q * 1];
             const CeedScalar ug2 = ug[i + Q * 2];
@@ -630,11 +756,11 @@ CEED_QFUNCTION(f_apply_diff_mf_const)(void *ctx, CeedInt Q,
             vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2;
          }
          break;
-      case 3313:
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
+            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
             CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
             {
                const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)];
@@ -650,68 +776,31 @@ CEED_QFUNCTION(f_apply_diff_mf_const)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for applying a diff operator
-CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q,
-                                     const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+/// libCEED QFunction for applying a diffusion operator with a scalar
+/// coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_apply_diff_mf_quad_scalar)(void *ctx, CeedInt Q,
+                                            const CeedScalar *const *in,
+                                            CeedScalar *const *out)
 {
    DiffusionContext *bc = (DiffusionContext *)ctx;
    // in[0], out[0] have shape [dim, ncomp=vdim, Q]
-   // in[1] is coefficients with shape [ncomp=coeff_comp, Q]
+   // in[1] is coefficients with shape [ncomp=1, Q]
    // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[3] is quadrature weights, size (Q)
    //
-   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T.
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T
    const CeedScalar *ug = in[0], *c = in[1], *J = in[2], *qw = in[3];
    CeedScalar *vg = out[0];
-   switch (1000 * bc->space_dim + 100 * bc->dim + 10 * bc->coeff_comp + bc->vdim)
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim)
    {
-      case 1111:
+      case 111:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             const CeedScalar qd = qw[i] * c[i] / J[i];
             vg[i] = qd * ug[i];
          }
          break;
-      case 2131:
-         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-         {
-            CeedScalar qd;
-            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd);
-            vg[i] = qd * ug[i];
-         }
-         break;
-      case 2132:
-         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-         {
-            CeedScalar qd;
-            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd);
-            CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
-            {
-               vg[i + Q * d] = qd * ug[i + Q * d];
-            }
-         }
-         break;
-      case 2121:
-         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-         {
-            CeedScalar qd;
-            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd);
-            vg[i] = qd * ug[i];
-         }
-         break;
-      case 2122:
-         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-         {
-            CeedScalar qd;
-            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd);
-            CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
-            {
-               vg[i + Q * d] = qd * ug[i + Q * d];
-            }
-         }
-         break;
-      case 2111:
+      case 211:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd;
@@ -719,7 +808,7 @@ CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q,
             vg[i] = qd * ug[i];
          }
          break;
-      case 2112:
+      case 212:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd;
@@ -730,22 +819,22 @@ CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q,
             }
          }
          break;
-      case 2231:
+      case 221:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
+            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
             const CeedScalar ug0 = ug[i + Q * 0];
             const CeedScalar ug1 = ug[i + Q * 1];
             vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1;
             vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
          }
          break;
-      case 2232:
+      case 222:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
+            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
             CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
             {
                const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)];
@@ -755,122 +844,141 @@ CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q,
             }
          }
          break;
-      case 2221:
+      case 321:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], 1, qd);
+            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
             const CeedScalar ug0 = ug[i + Q * 0];
             const CeedScalar ug1 = ug[i + Q * 1];
             vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1;
             vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
          }
          break;
-      case 2222:
+      case 323:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], 1, qd);
-            CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
+            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
+            CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
             {
-               const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)];
-               const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)];
-               vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1;
-               vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1;
+               const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)];
+               const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)];
+               vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1;
+               vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1;
             }
          }
          break;
-      case 2211:
+      case 331:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
+            CeedScalar qd[6];
+            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
             const CeedScalar ug0 = ug[i + Q * 0];
             const CeedScalar ug1 = ug[i + Q * 1];
-            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1;
-            vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
+            const CeedScalar ug2 = ug[i + Q * 2];
+            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2;
+            vg[i + Q * 1] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2;
+            vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2;
          }
          break;
-      case 2212:
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
-            CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
+            CeedScalar qd[6];
+            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
+            CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
             {
-               const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)];
-               const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)];
-               vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1;
-               vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1;
+               const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)];
+               const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)];
+               const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)];
+               vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2;
+               vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2;
+               vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2;
             }
          }
          break;
-      case 3261:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for applying a diffusion operator with a vector
+/// coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_apply_diff_mf_quad_vector)(void *ctx, CeedInt Q,
+                                            const CeedScalar *const *in,
+                                            CeedScalar *const *out)
+{
+   DiffusionContext *bc = (DiffusionContext *)ctx;
+   // in[0], out[0] have shape [dim, ncomp=vdim, Q]
+   // in[1] is coefficients with shape [ncomp=space_dim, Q]
+   // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[3] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T
+   const CeedScalar *ug = in[0], *c = in[1], *J = in[2], *qw = in[3];
+   CeedScalar *vg = out[0];
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim)
+   {
+      case 211:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
-            const CeedScalar ug0 = ug[i + Q * 0];
-            const CeedScalar ug1 = ug[i + Q * 1];
-            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1;
-            vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
+            CeedScalar qd;
+            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd);
+            vg[i] = qd * ug[i];
          }
          break;
-      case 3263:
+      case 212:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
-            CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
+            CeedScalar qd;
+            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd);
+            CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
             {
-               const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)];
-               const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)];
-               vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1;
-               vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1;
+               vg[i + Q * d] = qd * ug[i + Q * d];
             }
          }
          break;
-      case 3231:
+      case 221:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
+            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], 1, qd);
             const CeedScalar ug0 = ug[i + Q * 0];
             const CeedScalar ug1 = ug[i + Q * 1];
             vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1;
             vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
          }
          break;
-      case 3233:
+      case 222:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
-            CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
+            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], 1, qd);
+            CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
             {
-               const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)];
-               const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)];
-               vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1;
-               vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1;
+               const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)];
+               const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)];
+               vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1;
+               vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1;
             }
          }
          break;
-      case 3211:
+      case 321:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
+            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
             const CeedScalar ug0 = ug[i + Q * 0];
             const CeedScalar ug1 = ug[i + Q * 1];
             vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1;
             vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
          }
          break;
-      case 3213:
+      case 323:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
+            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
             CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
             {
                const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)];
@@ -880,11 +988,11 @@ CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q,
             }
          }
          break;
-      case 3361:
+      case 331:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
+            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
             const CeedScalar ug0 = ug[i + Q * 0];
             const CeedScalar ug1 = ug[i + Q * 1];
             const CeedScalar ug2 = ug[i + Q * 2];
@@ -893,11 +1001,11 @@ CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q,
             vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2;
          }
          break;
-      case 3363:
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
+            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
             CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
             {
                const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)];
@@ -909,40 +1017,101 @@ CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q,
             }
          }
          break;
-      case 3331:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for applying a diffusion operator with a matrix
+/// coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_apply_diff_mf_quad_matrix)(void *ctx, CeedInt Q,
+                                            const CeedScalar *const *in,
+                                            CeedScalar *const *out)
+{
+   DiffusionContext *bc = (DiffusionContext *)ctx;
+   // in[0], out[0] have shape [dim, ncomp=vdim, Q]
+   // in[1] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
+   // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[3] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T
+   const CeedScalar *ug = in[0], *c = in[1], *J = in[2], *qw = in[3];
+   CeedScalar *vg = out[0];
+   switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim)
+   {
+      case 211:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
+            CeedScalar qd;
+            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd);
+            vg[i] = qd * ug[i];
+         }
+         break;
+      case 212:
+         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+         {
+            CeedScalar qd;
+            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd);
+            CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
+            {
+               vg[i + Q * d] = qd * ug[i + Q * d];
+            }
+         }
+         break;
+      case 221:
+         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+         {
+            CeedScalar qd[3];
+            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
             const CeedScalar ug0 = ug[i + Q * 0];
             const CeedScalar ug1 = ug[i + Q * 1];
-            const CeedScalar ug2 = ug[i + Q * 2];
-            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2;
-            vg[i + Q * 1] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2;
-            vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2;
+            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1;
+            vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
          }
          break;
-      case 3333:
+      case 222:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
+            CeedScalar qd[3];
+            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
+            CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
+            {
+               const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)];
+               const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)];
+               vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1;
+               vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1;
+            }
+         }
+         break;
+      case 321:
+         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+         {
+            CeedScalar qd[3];
+            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
+            const CeedScalar ug0 = ug[i + Q * 0];
+            const CeedScalar ug1 = ug[i + Q * 1];
+            vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1;
+            vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1;
+         }
+         break;
+      case 323:
+         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+         {
+            CeedScalar qd[3];
+            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
             CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
             {
                const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)];
                const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)];
-               const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)];
-               vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2;
-               vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2;
-               vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2;
+               vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1;
+               vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1;
             }
          }
          break;
-      case 3311:
+      case 331:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
+            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
             const CeedScalar ug0 = ug[i + Q * 0];
             const CeedScalar ug1 = ug[i + Q * 1];
             const CeedScalar ug2 = ug[i + Q * 2];
@@ -951,11 +1120,11 @@ CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q,
             vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2;
          }
          break;
-      case 3313:
+      case 333:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
+            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
             CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
             {
                const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)];
diff --git a/fem/ceed/integrators/divdiv/divdiv.cpp b/fem/ceed/integrators/divdiv/divdiv.cpp
index 237a6398cde..1a3acdadfb7 100644
--- a/fem/ceed/integrators/divdiv/divdiv.cpp
+++ b/fem/ceed/integrators/divdiv/divdiv.cpp
@@ -25,35 +25,67 @@ namespace ceed
 #ifdef MFEM_USE_CEED
 struct DivDivOperatorInfo : public OperatorInfo
 {
-   DivDivContext ctx;
+   DivDivContext ctx = {0};
    DivDivOperatorInfo(const mfem::FiniteElementSpace &fes, mfem::Coefficient *Q,
-                      bool use_bdr)
+                      bool use_bdr = false, bool use_mf = false)
    {
       MFEM_VERIFY(fes.GetVDim() == 1,
                   "libCEED interface for vector FE does not support VDim > 1!");
       ctx.dim = fes.GetMesh()->Dimension() - use_bdr;
       ctx.space_dim = fes.GetMesh()->SpaceDimension();
+      if (!use_mf)
+      {
+         apply_func = ":f_apply_divdiv";
+         apply_qf = &f_apply_divdiv;
+      }
+      else
+      {
+         build_func = "";
+         build_qf = nullptr;
+      }
       if (Q == nullptr)
       {
          ctx.coeff = 1.0;
+         if (!use_mf)
+         {
+            build_func = ":f_build_divdiv_const";
+            build_qf = &f_build_divdiv_const;
+         }
+         else
+         {
+            apply_func = ":f_apply_divdiv_mf_const";
+            apply_qf = &f_apply_divdiv_mf_const;
+         }
       }
-      else if (ConstantCoefficient *const_coeff =
-                  dynamic_cast<ConstantCoefficient *>(Q))
+      else if (mfem::ConstantCoefficient *const_coeff =
+                  dynamic_cast<mfem::ConstantCoefficient *>(Q))
       {
          ctx.coeff = const_coeff->constant;
+         if (!use_mf)
+         {
+            build_func = ":f_build_divdiv_const";
+            build_qf = &f_build_divdiv_const;
+         }
+         else
+         {
+            apply_func = ":f_apply_divdiv_mf_const";
+            apply_qf = &f_apply_divdiv_mf_const;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = ":f_build_divdiv_quad";
+            build_qf = &f_build_divdiv_quad;
+         }
+         else
+         {
+            apply_func = ":f_apply_divdiv_mf_quad";
+            apply_qf = &f_apply_divdiv_mf_quad;
+         }
       }
-
       header = "/integrators/divdiv/divdiv_qf.h";
-      build_func_const = ":f_build_divdiv_const";
-      build_qf_const = &f_build_divdiv_const;
-      build_func_quad = ":f_build_divdiv_quad";
-      build_qf_quad = &f_build_divdiv_quad;
-      apply_func = ":f_apply_divdiv";
-      apply_qf = &f_apply_divdiv;
-      apply_func_mf_const = ":f_apply_divdiv_mf_const";
-      apply_qf_mf_const = &f_apply_divdiv_mf_const;
-      apply_func_mf_quad = ":f_apply_divdiv_mf_quad";
-      apply_qf_mf_quad = &f_apply_divdiv_mf_quad;
       trial_op = EvalMode::Div;
       test_op = EvalMode::Div;
       qdatasize = 1;
@@ -80,7 +112,7 @@ MFDivDivIntegrator::MFDivDivIntegrator(const mfem::DivDivIntegrator &integ,
                                        const bool use_bdr)
 {
 #ifdef MFEM_USE_CEED
-   DivDivOperatorInfo info(fes, Q, use_bdr);
+   DivDivOperatorInfo info(fes, Q, use_bdr, true);
    Assemble(integ, info, fes, Q, use_bdr, true);
 #else
    MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
diff --git a/fem/ceed/integrators/divdiv/divdiv_qf.h b/fem/ceed/integrators/divdiv/divdiv_qf.h
index d78893c62ff..853aa0011ed 100644
--- a/fem/ceed/integrators/divdiv/divdiv_qf.h
+++ b/fem/ceed/integrators/divdiv/divdiv_qf.h
@@ -14,16 +14,14 @@
 
 #include "../util/util_qf.h"
 
-/// A structure used to pass additional data to f_build_divdiv and
-/// f_apply_divdiv
 struct DivDivContext
 {
    CeedInt dim, space_dim;
    CeedScalar coeff;
 };
 
-/// libCEED QFunction for building quadrature data for a div-div operator with
-/// a constant coefficient
+/// libCEED QFunction for building quadrature data for a div-div operator
+/// with a constant coefficient
 CEED_QFUNCTION(f_build_divdiv_const)(void *ctx, CeedInt Q,
                                      const CeedScalar *const *in,
                                      CeedScalar *const *out)
@@ -32,7 +30,7 @@ CEED_QFUNCTION(f_build_divdiv_const)(void *ctx, CeedInt Q,
    // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[1] is quadrature weights, size (Q)
    //
-   // At every quadrature point, compute and store qw * c / det(J).
+   // At every quadrature point, compute and store qw * c / det(J)
    const CeedScalar coeff = bc->coeff;
    const CeedScalar *J = in[0], *qw = in[1];
    CeedScalar *qd = out[0];
@@ -72,8 +70,8 @@ CEED_QFUNCTION(f_build_divdiv_const)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for building quadrature data for a div-div operator with
-/// a coefficient evaluated at quadrature points.
+/// libCEED QFunction for building quadrature data for a div-div operator
+/// with a coefficient evaluated at quadrature points
 CEED_QFUNCTION(f_build_divdiv_quad)(void *ctx, CeedInt Q,
                                     const CeedScalar *const *in,
                                     CeedScalar *const *out)
@@ -83,7 +81,7 @@ CEED_QFUNCTION(f_build_divdiv_quad)(void *ctx, CeedInt Q,
    // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[2] is quadrature weights, size (Q)
    //
-   // At every quadrature point, compute and store qw * c / det(J).
+   // At every quadrature point, compute and store qw * c / det(J)
    const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
    CeedScalar *qd = out[0];
    switch (10 * bc->space_dim + bc->dim)
@@ -137,16 +135,18 @@ CEED_QFUNCTION(f_apply_divdiv)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for applying a div-div operator
+/// libCEED QFunction for applying a div-div operator with a constant
+/// coefficient
 CEED_QFUNCTION(f_apply_divdiv_mf_const)(void *ctx, CeedInt Q,
-                                        const CeedScalar *const *in, CeedScalar *const *out)
+                                        const CeedScalar *const *in,
+                                        CeedScalar *const *out)
 {
    DivDivContext *bc = (DivDivContext *)ctx;
    // in[0], out[0] have shape [ncomp=1, Q]
    // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[2] is quadrature weights, size (Q)
    //
-   // At every quadrature point, compute qw * c / det(J).
+   // At every quadrature point, compute qw * c / det(J)
    const CeedScalar coeff = bc->coeff;
    const CeedScalar *ud = in[0], *J = in[1], *qw = in[2];
    CeedScalar *vd = out[0];
@@ -191,9 +191,11 @@ CEED_QFUNCTION(f_apply_divdiv_mf_const)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for applying a div-div operator
+/// libCEED QFunction for applying a div-div operator with a coefficient
+/// evaluated at quadrature points
 CEED_QFUNCTION(f_apply_divdiv_mf_quad)(void *ctx, CeedInt Q,
-                                       const CeedScalar *const *in, CeedScalar *const *out)
+                                       const CeedScalar *const *in,
+                                       CeedScalar *const *out)
 {
    DivDivContext *bc = (DivDivContext *)ctx;
    // in[0], out[0] have shape [ncomp=1, Q]
@@ -201,7 +203,7 @@ CEED_QFUNCTION(f_apply_divdiv_mf_quad)(void *ctx, CeedInt Q,
    // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[3] is quadrature weights, size (Q)
    //
-   // At every quadrature point, compute qw * c / det(J).
+   // At every quadrature point, compute qw * c / det(J)
    const CeedScalar *ud = in[0], *c = in[1], *J = in[2], *qw = in[3];
    CeedScalar *vd = out[0];
    switch (10 * bc->space_dim + bc->dim)
diff --git a/fem/ceed/integrators/mass/mass.cpp b/fem/ceed/integrators/mass/mass.cpp
index f6871d58a5a..6a8d67ddc03 100644
--- a/fem/ceed/integrators/mass/mass.cpp
+++ b/fem/ceed/integrators/mass/mass.cpp
@@ -25,34 +25,66 @@ namespace ceed
 #ifdef MFEM_USE_CEED
 struct MassOperatorInfo : public OperatorInfo
 {
-   MassContext ctx;
+   MassContext ctx = {0};
    MassOperatorInfo(const mfem::FiniteElementSpace &fes, mfem::Coefficient *Q,
-                    bool use_bdr)
+                    bool use_bdr = false, bool use_mf = false)
    {
       ctx.dim = fes.GetMesh()->Dimension() - use_bdr;
       ctx.space_dim = fes.GetMesh()->SpaceDimension();
       ctx.vdim = fes.GetVDim();
+      if (!use_mf)
+      {
+         apply_func = ":f_apply_mass";
+         apply_qf = &f_apply_mass;
+      }
+      else
+      {
+         build_func = "";
+         build_qf = nullptr;
+      }
       if (Q == nullptr)
       {
          ctx.coeff = 1.0;
+         if (!use_mf)
+         {
+            build_func = ":f_build_mass_const";
+            build_qf = &f_build_mass_const;
+         }
+         else
+         {
+            apply_func = ":f_apply_mass_mf_const";
+            apply_qf = &f_apply_mass_mf_const;
+         }
       }
-      else if (ConstantCoefficient *const_coeff =
-                  dynamic_cast<ConstantCoefficient *>(Q))
+      else if (mfem::ConstantCoefficient *const_coeff =
+                  dynamic_cast<mfem::ConstantCoefficient *>(Q))
       {
          ctx.coeff = const_coeff->constant;
+         if (!use_mf)
+         {
+            build_func = ":f_build_mass_const";
+            build_qf = &f_build_mass_const;
+         }
+         else
+         {
+            apply_func = ":f_apply_mass_mf_const";
+            apply_qf = &f_apply_mass_mf_const;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = ":f_build_mass_quad";
+            build_qf = &f_build_mass_quad;
+         }
+         else
+         {
+            apply_func = ":f_apply_mass_mf_quad";
+            apply_qf = &f_apply_mass_mf_quad;
+         }
       }
-
       header = "/integrators/mass/mass_qf.h";
-      build_func_const = ":f_build_mass_const";
-      build_qf_const = &f_build_mass_const;
-      build_func_quad = ":f_build_mass_quad";
-      build_qf_quad = &f_build_mass_quad;
-      apply_func = ":f_apply_mass";
-      apply_qf = &f_apply_mass;
-      apply_func_mf_const = ":f_apply_mass_mf_const";
-      apply_qf_mf_const = &f_apply_mass_mf_const;
-      apply_func_mf_quad = ":f_apply_mass_mf_quad";
-      apply_qf_mf_quad = &f_apply_mass_mf_quad;
       trial_op = EvalMode::Interp;
       test_op = EvalMode::Interp;
       qdatasize = 1;
@@ -92,7 +124,7 @@ MFMassIntegrator::MFMassIntegrator(const mfem::MassIntegrator &integ,
                                    const bool use_bdr)
 {
 #ifdef MFEM_USE_CEED
-   MassOperatorInfo info(fes, Q, use_bdr);
+   MassOperatorInfo info(fes, Q, use_bdr, true);
    Assemble(integ, info, fes, Q, use_bdr, true);
 #else
    MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
@@ -105,7 +137,7 @@ MFMassIntegrator::MFMassIntegrator(const mfem::VectorMassIntegrator &integ,
                                    const bool use_bdr)
 {
 #ifdef MFEM_USE_CEED
-   MassOperatorInfo info(fes, Q, use_bdr);
+   MassOperatorInfo info(fes, Q, use_bdr, true);
    Assemble(integ, info, fes, Q, use_bdr, true);
 #else
    MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
diff --git a/fem/ceed/integrators/mass/mass_qf.h b/fem/ceed/integrators/mass/mass_qf.h
index 93d904ff4bc..3cdd3b5e39a 100644
--- a/fem/ceed/integrators/mass/mass_qf.h
+++ b/fem/ceed/integrators/mass/mass_qf.h
@@ -14,15 +14,14 @@
 
 #include "../util/util_qf.h"
 
-/// A structure used to pass additional data to f_build_mass and f_apply_mass
 struct MassContext
 {
    CeedInt dim, space_dim, vdim;
    CeedScalar coeff;
 };
 
-/// libCEED QFunction for building quadrature data for a mass operator with a
-/// constant coefficient
+/// libCEED QFunction for building quadrature data for a mass operator
+/// with a constant coefficient
 CEED_QFUNCTION(f_build_mass_const)(void *ctx, CeedInt Q,
                                    const CeedScalar *const *in,
                                    CeedScalar *const *out)
@@ -30,7 +29,7 @@ CEED_QFUNCTION(f_build_mass_const)(void *ctx, CeedInt Q,
    // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[1] is quadrature weights, size (Q)
    //
-   // At every quadrature point, compute and store qw * c * det(J).
+   // At every quadrature point, compute and store qw * c * det(J)
    MassContext *bc = (MassContext *)ctx;
    const CeedScalar coeff = bc->coeff;
    const CeedScalar *J = in[0], *qw = in[1];
@@ -71,8 +70,8 @@ CEED_QFUNCTION(f_build_mass_const)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for building quadrature data for a mass operator with a
-/// coefficient evaluated at quadrature points.
+/// libCEED QFunction for building quadrature data for a mass operator
+/// with a coefficient evaluated at quadrature points
 CEED_QFUNCTION(f_build_mass_quad)(void *ctx, CeedInt Q,
                                   const CeedScalar *const *in,
                                   CeedScalar *const *out)
@@ -81,7 +80,7 @@ CEED_QFUNCTION(f_build_mass_quad)(void *ctx, CeedInt Q,
    // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[2] is quadrature weights, size (Q)
    //
-   // At every quadrature point, compute and store qw * c * det(J).
+   // At every quadrature point, compute and store qw * c * det(J)
    MassContext *bc = (MassContext *)ctx;
    const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
    CeedScalar *qd = out[0];
@@ -162,16 +161,18 @@ CEED_QFUNCTION(f_apply_mass)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for applying a mass operator
+/// libCEED QFunction for applying a mass operator with a constant
+/// coefficient
 CEED_QFUNCTION(f_apply_mass_mf_const)(void *ctx, CeedInt Q,
-                                      const CeedScalar *const *in, CeedScalar *const *out)
+                                      const CeedScalar *const *in,
+                                      CeedScalar *const *out)
 {
    MassContext *bc = (MassContext *)ctx;
    // in[0], out[0] have shape [ncomp=vdim, Q]
    // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[2] is quadrature weights, size (Q)
    //
-   // At every quadrature point, compute qw * c * det(J).
+   // At every quadrature point, compute qw * c * det(J)
    const CeedScalar coeff = bc->coeff;
    const CeedScalar *u = in[0], *J = in[1], *qw = in[2];
    CeedScalar *v = out[0];
@@ -256,16 +257,18 @@ CEED_QFUNCTION(f_apply_mass_mf_const)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for applying a mass operator
+/// libCEED QFunction for applying a mass operator with a coefficient
+/// evaluated at quadrature points
 CEED_QFUNCTION(f_apply_mass_mf_quad)(void *ctx, CeedInt Q,
-                                     const CeedScalar *const *in, CeedScalar *const *out)
+                                     const CeedScalar *const *in,
+                                     CeedScalar *const *out)
 {
    MassContext *bc = (MassContext *)ctx;
    // in[0], out[0] have shape [ncomp=vdim, Q]
    // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[2] is quadrature weights, size (Q)
    //
-   // At every quadrature point, compute qw * c * det(J).
+   // At every quadrature point, compute qw * c * det(J)
    const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3];
    CeedScalar *v = out[0];
    switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim)
diff --git a/fem/ceed/integrators/mixedveccurl/mixedveccurl.cpp b/fem/ceed/integrators/mixedveccurl/mixedveccurl.cpp
index 352688b580f..93c11c8bf3d 100644
--- a/fem/ceed/integrators/mixedveccurl/mixedveccurl.cpp
+++ b/fem/ceed/integrators/mixedveccurl/mixedveccurl.cpp
@@ -23,94 +23,132 @@ namespace ceed
 {
 
 #ifdef MFEM_USE_CEED
-struct MixedVectorCurlOperatorInfo : public OperatorInfo
+struct MixedVectorCurlOperatorInfoBase : public OperatorInfo
 {
-   CurlCurlContext ctx;
+   CurlCurlContext ctx = {0};
    template <typename CoeffType>
-   MixedVectorCurlOperatorInfo(const mfem::FiniteElementSpace &trial_fes,
-                               const mfem::FiniteElementSpace &test_fes,
-                               CoeffType *Q, bool use_bdr, bool weak_curl)
+   MixedVectorCurlOperatorInfoBase(const mfem::FiniteElementSpace &trial_fes,
+                                   const mfem::FiniteElementSpace &test_fes,
+                                   CoeffType *Q, bool use_bdr = false,
+                                   bool use_mf = false)
    {
+      // Reuse H(div) quadrature functions for CurlCurlIntegrator
       MFEM_VERIFY(trial_fes.GetVDim() == 1 && test_fes.GetVDim() == 1,
                   "libCEED interface for vector FE does not support VDim > 1!");
       ctx.dim = trial_fes.GetMesh()->Dimension() - use_bdr;
       MFEM_VERIFY(ctx.dim == 3,
                   "MixedVectorCurlIntegrator and MixedVectorWeakCurlIntegrator "
                   "require dim == 3!");
-      MFEM_VERIFY(
-         weak_curl ||
-         (trial_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_DIV &&
-          test_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_DIV),
-         "libCEED interface for MixedVectorCurlIntegrator requires "
-         "H(curl) domain and H(div) range FE spaces!");
-      MFEM_VERIFY(
-         !weak_curl ||
-         (trial_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_DIV &&
-          test_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_DIV),
-         "libCEED interface for MixedVectorWeakCurlIntegrator requires "
-         "H(div) domain and H(curl) range FE spaces!");
       ctx.space_dim = trial_fes.GetMesh()->SpaceDimension();
       ctx.curl_dim = (ctx.dim < 3) ? 1 : ctx.dim;
+      if (!use_mf)
+      {
+         apply_func = ":f_apply_curlcurl";
+         apply_qf = &f_apply_curlcurl;
+      }
+      else
+      {
+         build_func = "";
+         build_qf = nullptr;
+      }
       if (Q == nullptr)
       {
-         ctx.coeff_comp = 1;
          ctx.coeff[0] = 1.0;
+         if (!use_mf)
+         {
+            build_func = ":f_build_curlcurl_const_scalar";
+            build_qf = &f_build_curlcurl_const_scalar;
+         }
+         else
+         {
+            apply_func = ":f_apply_curlcurl_mf_const_scalar";
+            apply_qf = &f_apply_curlcurl_mf_const_scalar;
+         }
       }
       else
       {
-         InitCoefficient(*Q);
+         InitCoefficient(*Q, use_mf);
       }
-
-      // Reuse H(div) quadrature functions for CurlCurlIntegrator
       header = "/integrators/curlcurl/curlcurl_qf.h";
-      build_func_const = ":f_build_curlcurl_const";
-      build_qf_const = &f_build_curlcurl_const;
-      build_func_quad = ":f_build_curlcurl_quad";
-      build_qf_quad = &f_build_curlcurl_quad;
-      apply_func = ":f_apply_curlcurl";
-      apply_qf = &f_apply_curlcurl;
-      apply_func_mf_const = ":f_apply_curlcurl_mf_const";
-      apply_qf_mf_const = &f_apply_curlcurl_mf_const;
-      apply_func_mf_quad = ":f_apply_curlcurl_mf_quad";
-      apply_qf_mf_quad = &f_apply_curlcurl_mf_quad;
-      trial_op = weak_curl ? EvalMode::Interp : EvalMode::Curl;
-      test_op = weak_curl ? EvalMode::Curl : EvalMode::Interp;
       qdatasize = (ctx.curl_dim * (ctx.curl_dim + 1)) / 2;
    }
-   void InitCoefficient(mfem::Coefficient &Q)
+   void InitCoefficient(mfem::Coefficient &Q, bool use_mf)
    {
-      ctx.coeff_comp = 1;
-      if (ConstantCoefficient *const_coeff =
-             dynamic_cast<ConstantCoefficient *>(&Q))
+      if (mfem::ConstantCoefficient *const_coeff =
+             dynamic_cast<mfem::ConstantCoefficient *>(&Q))
       {
          ctx.coeff[0] = const_coeff->constant;
+         if (!use_mf)
+         {
+            build_func = ":f_build_curlcurl_const_scalar";
+            build_qf = &f_build_curlcurl_const_scalar;
+         }
+         else
+         {
+            apply_func = ":f_apply_curlcurl_mf_const_scalar";
+            apply_qf = &f_apply_curlcurl_mf_const_scalar;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = ":f_build_curlcurl_quad_scalar";
+            build_qf = &f_build_curlcurl_quad_scalar;
+         }
+         else
+         {
+            apply_func = ":f_apply_curlcurl_mf_quad_scalar";
+            apply_qf = &f_apply_curlcurl_mf_quad_scalar;
+         }
       }
    }
-   void InitCoefficient(mfem::VectorCoefficient &VQ)
+   void InitCoefficient(mfem::VectorCoefficient &VQ, bool use_mf)
    {
-      const int vdim = VQ.GetVDim();
-      ctx.coeff_comp = vdim;
-      if (VectorConstantCoefficient *const_coeff =
-             dynamic_cast<VectorConstantCoefficient *>(&VQ))
+      if (mfem::VectorConstantCoefficient *const_coeff =
+             dynamic_cast<mfem::VectorConstantCoefficient *>(&VQ))
       {
-         MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_CURLCURL_COEFF_COMP_MAX,
+         const int vdim = VQ.GetVDim();
+         MFEM_VERIFY(vdim <= LIBCEED_CURLCURL_COEFF_COMP_MAX,
                      "VectorCoefficient dimension exceeds context storage!");
          const mfem::Vector &val = const_coeff->GetVec();
          for (int i = 0; i < vdim; i++)
          {
             ctx.coeff[i] = val[i];
          }
+         if (!use_mf)
+         {
+            build_func = ":f_build_curlcurl_const_vector";
+            build_qf = &f_build_curlcurl_const_vector;
+         }
+         else
+         {
+            apply_func = ":f_apply_curlcurl_mf_const_vector";
+            apply_qf = &f_apply_curlcurl_mf_const_vector;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = ":f_build_curlcurl_quad_vector";
+            build_qf = &f_build_curlcurl_quad_vector;
+         }
+         else
+         {
+            apply_func = ":f_apply_curlcurl_mf_quad_vector";
+            apply_qf = &f_apply_curlcurl_mf_quad_vector;
+         }
       }
    }
-   void InitCoefficient(mfem::MatrixCoefficient &MQ)
+   void InitCoefficient(mfem::MatrixCoefficient &MQ, bool use_mf)
    {
       // Assumes matrix coefficient is symmetric
-      const int vdim = MQ.GetVDim();
-      ctx.coeff_comp = (vdim * (vdim + 1)) / 2;
-      if (MatrixConstantCoefficient *const_coeff =
-             dynamic_cast<MatrixConstantCoefficient *>(&MQ))
+      if (mfem::MatrixConstantCoefficient *const_coeff =
+             dynamic_cast<mfem::MatrixConstantCoefficient *>(&MQ))
       {
-         MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_CURLCURL_COEFF_COMP_MAX,
+         const int vdim = MQ.GetVDim();
+         MFEM_VERIFY((vdim * (vdim + 1)) / 2 <= LIBCEED_CURLCURL_COEFF_COMP_MAX,
                      "MatrixCoefficient dimensions exceed context storage!");
          const mfem::DenseMatrix &val = const_coeff->GetMatrix();
          for (int j = 0; j < vdim; j++)
@@ -121,7 +159,68 @@ struct MixedVectorCurlOperatorInfo : public OperatorInfo
                ctx.coeff[idx] = val(i, j);
             }
          }
+         if (!use_mf)
+         {
+            build_func = ":f_build_curlcurl_const_matrix";
+            build_qf = &f_build_curlcurl_const_matrix;
+         }
+         else
+         {
+            apply_func = ":f_apply_curlcurl_mf_const_matrix";
+            apply_qf = &f_apply_curlcurl_mf_const_matrix;
+         }
       }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = ":f_build_curlcurl_quad_matrix";
+            build_qf = &f_build_curlcurl_quad_matrix;
+         }
+         else
+         {
+            apply_func = ":f_apply_curlcurl_mf_quad_matrix";
+            apply_qf = &f_apply_curlcurl_mf_quad_matrix;
+         }
+      }
+   }
+};
+
+struct MixedVectorCurlOperatorInfo : public MixedVectorCurlOperatorInfoBase
+{
+   template <typename CoeffType>
+   MixedVectorCurlOperatorInfo(const mfem::FiniteElementSpace &trial_fes,
+                               const mfem::FiniteElementSpace &test_fes,
+                               CoeffType *Q, bool use_bdr = false,
+                               bool use_mf = false)
+      : MixedVectorCurlOperatorInfoBase(trial_fes, test_fes, Q, use_bdr, use_mf)
+   {
+      MFEM_VERIFY(
+         trial_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_DIV &&
+         test_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_DIV,
+         "libCEED interface for MixedVectorCurlIntegrator requires "
+         "H(curl) domain and H(div) range FE spaces!");
+      trial_op = EvalMode::Curl;
+      test_op = EvalMode::Interp;
+   }
+};
+
+struct MixedVectorWeakCurlOperatorInfo : public MixedVectorCurlOperatorInfoBase
+{
+   template <typename CoeffType>
+   MixedVectorWeakCurlOperatorInfo(const mfem::FiniteElementSpace &trial_fes,
+                                   const mfem::FiniteElementSpace &test_fes,
+                                   CoeffType *Q, bool use_bdr = false,
+                                   bool use_mf = false)
+      : MixedVectorCurlOperatorInfoBase(trial_fes, test_fes, Q, use_bdr, use_mf)
+   {
+      MFEM_VERIFY(
+         trial_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_DIV &&
+         test_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_DIV,
+         "libCEED interface for MixedVectorWeakCurlIntegrator requires "
+         "H(div) domain and H(curl) range FE spaces!");
+      trial_op = EvalMode::Interp;
+      test_op = EvalMode::Curl;
    }
 };
 #endif
@@ -135,7 +234,7 @@ PAMixedVectorCurlIntegrator::PAMixedVectorCurlIntegrator(
    const bool use_bdr)
 {
 #ifdef MFEM_USE_CEED
-   MixedVectorCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr, false);
+   MixedVectorCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr);
    Assemble(integ, info, trial_fes, test_fes, Q, use_bdr);
 #else
    MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
@@ -151,7 +250,7 @@ MFMixedVectorCurlIntegrator::MFMixedVectorCurlIntegrator(
    const bool use_bdr)
 {
 #ifdef MFEM_USE_CEED
-   MixedVectorCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr, false);
+   MixedVectorCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr, true);
    Assemble(integ, info, trial_fes, test_fes, Q, use_bdr, true);
 #else
    MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
@@ -167,7 +266,7 @@ PAMixedVectorWeakCurlIntegrator::PAMixedVectorWeakCurlIntegrator(
    const bool use_bdr)
 {
 #ifdef MFEM_USE_CEED
-   MixedVectorCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr, true);
+   MixedVectorWeakCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr);
    Assemble(integ, info, trial_fes, test_fes, Q, use_bdr);
 #else
    MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
@@ -183,7 +282,7 @@ MFMixedVectorWeakCurlIntegrator::MFMixedVectorWeakCurlIntegrator(
    const bool use_bdr)
 {
 #ifdef MFEM_USE_CEED
-   MixedVectorCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr, true);
+   MixedVectorWeakCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr, true);
    Assemble(integ, info, trial_fes, test_fes, Q, use_bdr, true);
 #else
    MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
diff --git a/fem/ceed/integrators/mixedvecgrad/mixedvecgrad.cpp b/fem/ceed/integrators/mixedvecgrad/mixedvecgrad.cpp
index 52fe6b66886..d485fb81987 100644
--- a/fem/ceed/integrators/mixedvecgrad/mixedvecgrad.cpp
+++ b/fem/ceed/integrators/mixedvecgrad/mixedvecgrad.cpp
@@ -23,94 +23,132 @@ namespace ceed
 {
 
 #ifdef MFEM_USE_CEED
-struct MixedVectorGradientOperatorInfo : public OperatorInfo
+struct MixedVectorGradientOperatorInfoBase : public OperatorInfo
 {
-   DiffusionContext ctx;
+   DiffusionContext ctx = {0};
    template <typename CoeffType>
-   MixedVectorGradientOperatorInfo(const mfem::FiniteElementSpace &trial_fes,
-                                   const mfem::FiniteElementSpace &test_fes,
-                                   CoeffType *Q, bool use_bdr, bool mixed_vector_grad)
+   MixedVectorGradientOperatorInfoBase(const mfem::FiniteElementSpace &trial_fes,
+                                       const mfem::FiniteElementSpace &test_fes,
+                                       CoeffType *Q, bool use_bdr = false,
+                                       bool use_mf = false)
    {
+      // Reuse H(curl) quadrature functions for DiffusionIntegrator
       MFEM_VERIFY(trial_fes.GetVDim() == 1 && test_fes.GetVDim() == 1,
                   "libCEED interface for vector FE does not support VDim > 1!");
       ctx.dim = trial_fes.GetMesh()->Dimension() - use_bdr;
       MFEM_VERIFY(ctx.dim == 2 || ctx.dim == 3,
                   "MixedVectorGradientIntegrator and MixedVectorWeakDivergenceIntegrator "
                   "require dim == 2 or dim == 3!");
-      MFEM_VERIFY(
-         !mixed_vector_grad ||
-         (trial_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_CURL &&
-          test_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_CURL),
-         "libCEED interface for MixedVectorGradientIntegrator requires "
-         "H^1 domain and H(curl) range FE spaces!");
-      MFEM_VERIFY(
-         mixed_vector_grad ||
-         (trial_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_CURL &&
-          test_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_CURL),
-         "libCEED interface for MixedVectorWeakDivergenceIntegrator requires "
-         "H(curl) domain and H^1 range FE spaces!");
       ctx.space_dim = trial_fes.GetMesh()->SpaceDimension();
       ctx.vdim = 1;
+      if (!use_mf)
+      {
+         apply_func = ":f_apply_diff";
+         apply_qf = &f_apply_diff;
+      }
+      else
+      {
+         build_func = "";
+         build_qf = nullptr;
+      }
       if (Q == nullptr)
       {
-         ctx.coeff_comp = 1;
-         ctx.coeff[0] = mixed_vector_grad ? 1.0 : -1.0;
+         ctx.coeff[0] = 1.0;
+         if (!use_mf)
+         {
+            build_func = ":f_build_diff_const_scalar";
+            build_qf = &f_build_diff_const_scalar;
+         }
+         else
+         {
+            apply_func = ":f_apply_diff_mf_const_scalar";
+            apply_qf = &f_apply_diff_mf_const_scalar;
+         }
       }
       else
       {
-         InitCoefficient(*Q, mixed_vector_grad ? 1.0 : -1.0);
+         InitCoefficient(*Q, use_mf);
       }
-
-      // Reuse H(curl) quadrature functions for DiffusionIntegrator
       header = "/integrators/diffusion/diffusion_qf.h";
-      build_func_const = ":f_build_diff_const";
-      build_qf_const = &f_build_diff_const;
-      build_func_quad = ":f_build_diff_quad";
-      build_qf_quad = &f_build_diff_quad;
-      apply_func = ":f_apply_diff";
-      apply_qf = &f_apply_diff;
-      apply_func_mf_const = ":f_apply_diff_mf_const";
-      apply_qf_mf_const = &f_apply_diff_mf_const;
-      apply_func_mf_quad = ":f_apply_diff_mf_quad";
-      apply_qf_mf_quad = &f_apply_diff_mf_quad;
-      trial_op = mixed_vector_grad ? EvalMode::Grad : EvalMode::Interp;
-      test_op = mixed_vector_grad ? EvalMode::Interp : EvalMode::Grad;
       qdatasize = (ctx.dim * (ctx.dim + 1)) / 2;
    }
-   void InitCoefficient(mfem::Coefficient &Q, double sign)
+   void InitCoefficient(mfem::Coefficient &Q, bool use_mf)
    {
-      ctx.coeff_comp = 1;
-      if (ConstantCoefficient *const_coeff =
-             dynamic_cast<ConstantCoefficient *>(&Q))
+      if (mfem::ConstantCoefficient *const_coeff =
+             dynamic_cast<mfem::ConstantCoefficient *>(&Q))
       {
-         ctx.coeff[0] = sign * const_coeff->constant;
+         ctx.coeff[0] = const_coeff->constant;
+         if (!use_mf)
+         {
+            build_func = ":f_build_diff_const_scalar";
+            build_qf = &f_build_diff_const_scalar;
+         }
+         else
+         {
+            apply_func = ":f_apply_diff_mf_const_scalar";
+            apply_qf = &f_apply_diff_mf_const_scalar;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = ":f_build_diff_quad_scalar";
+            build_qf = &f_build_diff_quad_scalar;
+         }
+         else
+         {
+            apply_func = ":f_apply_diff_mf_quad_scalar";
+            apply_qf = &f_apply_diff_mf_quad_scalar;
+         }
       }
    }
-   void InitCoefficient(mfem::VectorCoefficient &VQ, double sign)
+   void InitCoefficient(mfem::VectorCoefficient &VQ, bool use_mf)
    {
-      const int vdim = VQ.GetVDim();
-      ctx.coeff_comp = vdim;
-      if (VectorConstantCoefficient *const_coeff =
-             dynamic_cast<VectorConstantCoefficient *>(&VQ))
+      if (mfem::VectorConstantCoefficient *const_coeff =
+             dynamic_cast<mfem::VectorConstantCoefficient *>(&VQ))
       {
-         MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_DIFF_COEFF_COMP_MAX,
+         const int vdim = VQ.GetVDim();
+         MFEM_VERIFY(vdim <= LIBCEED_DIFF_COEFF_COMP_MAX,
                      "VectorCoefficient dimension exceeds context storage!");
          const mfem::Vector &val = const_coeff->GetVec();
          for (int i = 0; i < vdim; i++)
          {
-            ctx.coeff[i] = sign * val[i];
+            ctx.coeff[i] = val[i];
+         }
+         if (!use_mf)
+         {
+            build_func = ":f_build_diff_const_vector";
+            build_qf = &f_build_diff_const_vector;
+         }
+         else
+         {
+            apply_func = ":f_apply_diff_mf_const_vector";
+            apply_qf = &f_apply_diff_mf_const_vector;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = ":f_build_diff_quad_vector";
+            build_qf = &f_build_diff_quad_vector;
+         }
+         else
+         {
+            apply_func = ":f_apply_diff_mf_quad_vector";
+            apply_qf = &f_apply_diff_mf_quad_vector;
          }
       }
    }
-   void InitCoefficient(mfem::MatrixCoefficient &MQ, double sign)
+   void InitCoefficient(mfem::MatrixCoefficient &MQ, bool use_mf)
    {
       // Assumes matrix coefficient is symmetric
-      const int vdim = MQ.GetVDim();
-      ctx.coeff_comp = (vdim * (vdim + 1)) / 2;
-      if (MatrixConstantCoefficient *const_coeff =
-             dynamic_cast<MatrixConstantCoefficient *>(&MQ))
+      if (mfem::MatrixConstantCoefficient *const_coeff =
+             dynamic_cast<mfem::MatrixConstantCoefficient *>(&MQ))
       {
-         MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_DIFF_COEFF_COMP_MAX,
+         const int vdim = MQ.GetVDim();
+         MFEM_VERIFY((vdim * (vdim + 1)) / 2 <= LIBCEED_DIFF_COEFF_COMP_MAX,
                      "MatrixCoefficient dimensions exceed context storage!");
          const mfem::DenseMatrix &val = const_coeff->GetMatrix();
          for (int j = 0; j < vdim; j++)
@@ -118,9 +156,76 @@ struct MixedVectorGradientOperatorInfo : public OperatorInfo
             for (int i = j; i < vdim; i++)
             {
                const int idx = (j * vdim) - (((j - 1) * j) / 2) + i - j;
-               ctx.coeff[idx] = sign * val(i, j);
+               ctx.coeff[idx] = val(i, j);
             }
          }
+         if (!use_mf)
+         {
+            build_func = ":f_build_diff_const_matrix";
+            build_qf = &f_build_diff_const_matrix;
+         }
+         else
+         {
+            apply_func = ":f_apply_diff_mf_const_matrix";
+            apply_qf = &f_apply_diff_mf_const_matrix;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = ":f_build_diff_quad_matrix";
+            build_qf = &f_build_diff_quad_matrix;
+         }
+         else
+         {
+            apply_func = ":f_apply_diff_mf_quad_matrix";
+            apply_qf = &f_apply_diff_mf_quad_matrix;
+         }
+      }
+   }
+};
+
+struct MixedVectorGradientOperatorInfo :
+   public MixedVectorGradientOperatorInfoBase
+{
+   template <typename CoeffType>
+   MixedVectorGradientOperatorInfo(const mfem::FiniteElementSpace &trial_fes,
+                                   const mfem::FiniteElementSpace &test_fes,
+                                   CoeffType *Q, bool use_bdr = false,
+                                   bool use_mf = false)
+      : MixedVectorGradientOperatorInfoBase(trial_fes, test_fes, Q, use_bdr, use_mf)
+   {
+      MFEM_VERIFY(
+         (trial_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_CURL &&
+          test_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_CURL),
+         "libCEED interface for MixedVectorGradientIntegrator requires "
+         "H^1 domain and H(curl) range FE spaces!");
+      trial_op = EvalMode::Grad;
+      test_op = EvalMode::Interp;
+   }
+};
+
+struct MixedVectorWeakDivergenceOperatorInfo :
+   public MixedVectorGradientOperatorInfoBase
+{
+   template <typename CoeffType>
+   MixedVectorWeakDivergenceOperatorInfo(const mfem::FiniteElementSpace &trial_fes,
+                                         const mfem::FiniteElementSpace &test_fes,
+                                         CoeffType *Q, bool use_bdr = false,
+                                         bool use_mf = false)
+      : MixedVectorGradientOperatorInfoBase(trial_fes, test_fes, Q, use_bdr, use_mf)
+   {
+      MFEM_VERIFY(
+         (trial_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_CURL &&
+          test_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_CURL),
+         "libCEED interface for MixedVectorWeakDivergenceIntegrator requires "
+         "H(curl) domain and H^1 range FE spaces!");
+      trial_op = EvalMode::Interp;
+      test_op = EvalMode::Grad;
+      for (int i = 0; i < LIBCEED_DIFF_COEFF_COMP_MAX; i++)
+      {
+         ctx.coeff[i] *= -1.0;
       }
    }
 };
@@ -135,7 +240,7 @@ PAMixedVectorGradientIntegrator::PAMixedVectorGradientIntegrator(
    const bool use_bdr)
 {
 #ifdef MFEM_USE_CEED
-   MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr, true);
+   MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr);
    Assemble(integ, info, trial_fes, test_fes, Q, use_bdr);
 #else
    MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
@@ -158,133 +263,79 @@ MFMixedVectorGradientIntegrator::MFMixedVectorGradientIntegrator(
 #endif
 }
 
-template <>
-PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator(
-   const mfem::MixedVectorWeakDivergenceIntegrator &integ,
-   const mfem::FiniteElementSpace &trial_fes,
-   const mfem::FiniteElementSpace &test_fes,
-   mfem::Coefficient *Q,
-   const bool use_bdr)
+namespace
 {
+
 #ifdef MFEM_USE_CEED
-   MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr, false);
-   if (Q)
-   {
-      // Does not inherit ownership of old Q
-      Q = new ProductCoefficient(-1.0, *Q);
-   }
-   Assemble(integ, info, trial_fes, test_fes, Q, use_bdr);
-   delete Q;
-#else
-   MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
-#endif
+mfem::Coefficient *NegativeCoeff(mfem::Coefficient &Q)
+{
+   return (dynamic_cast<mfem::ConstantCoefficient *>(&Q) != nullptr) ?
+          nullptr : new mfem::ProductCoefficient(-1.0, Q);
 }
 
-template <>
-PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator(
-   const mfem::MixedVectorWeakDivergenceIntegrator &integ,
-   const mfem::FiniteElementSpace &trial_fes,
-   const mfem::FiniteElementSpace &test_fes,
-   mfem::VectorCoefficient *Q,
-   const bool use_bdr)
+mfem::VectorCoefficient *NegativeCoeff(mfem::VectorCoefficient &Q)
 {
-#ifdef MFEM_USE_CEED
-   MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr, false);
-   if (Q)
-   {
-      // Does not inherit ownership of old Q
-      Q = new ScalarVectorProductCoefficient(-1.0, *Q);
-   }
-   Assemble(integ, info, trial_fes, test_fes, Q, use_bdr);
-   delete Q;
-#else
-   MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
-#endif
+   return (dynamic_cast<mfem::VectorConstantCoefficient *>(&Q) != nullptr) ?
+          nullptr : new mfem::ScalarVectorProductCoefficient(-1.0, Q);
 }
 
-template <>
-PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator(
-   const mfem::MixedVectorWeakDivergenceIntegrator &integ,
-   const mfem::FiniteElementSpace &trial_fes,
-   const mfem::FiniteElementSpace &test_fes,
-   mfem::MatrixCoefficient *Q,
-   const bool use_bdr)
+mfem::MatrixCoefficient *NegativeCoeff(mfem::MatrixCoefficient &Q)
 {
-#ifdef MFEM_USE_CEED
-   MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr, false);
-   if (Q)
-   {
-      // Does not inherit ownership of old Q
-      Q = new ScalarMatrixProductCoefficient(-1.0, *Q);
-   }
-   Assemble(integ, info, trial_fes, test_fes, Q, use_bdr);
-   delete Q;
-#else
-   MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
-#endif
+   return (dynamic_cast<mfem::MatrixConstantCoefficient *>(&Q) != nullptr) ?
+          nullptr : new mfem::ScalarMatrixProductCoefficient(-1.0, Q);
 }
+#endif
 
-template <>
-MFMixedVectorWeakDivergenceIntegrator::MFMixedVectorWeakDivergenceIntegrator(
+} // namespace
+
+template <typename CoeffType>
+PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator(
    const mfem::MixedVectorWeakDivergenceIntegrator &integ,
    const mfem::FiniteElementSpace &trial_fes,
    const mfem::FiniteElementSpace &test_fes,
-   mfem::Coefficient *Q,
+   CoeffType *Q,
    const bool use_bdr)
 {
 #ifdef MFEM_USE_CEED
-   MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr, false);
+   MixedVectorWeakDivergenceOperatorInfo info(trial_fes, test_fes, Q, use_bdr);
    if (Q)
    {
       // Does not inherit ownership of old Q
-      Q = new ProductCoefficient(-1.0, *Q);
+      auto *nQ = NegativeCoeff(*Q);
+      Assemble(integ, info, trial_fes, test_fes, nQ, use_bdr);
+      delete nQ;
    }
-   Assemble(integ, info, trial_fes, test_fes, Q, use_bdr, true);
-   delete Q;
-#else
-   MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
-#endif
-}
-
-template <>
-MFMixedVectorWeakDivergenceIntegrator::MFMixedVectorWeakDivergenceIntegrator(
-   const mfem::MixedVectorWeakDivergenceIntegrator &integ,
-   const mfem::FiniteElementSpace &trial_fes,
-   const mfem::FiniteElementSpace &test_fes,
-   mfem::VectorCoefficient *Q,
-   const bool use_bdr)
-{
-#ifdef MFEM_USE_CEED
-   MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr, false);
-   if (Q)
+   else
    {
-      // Does not inherit ownership of old Q
-      Q = new ScalarVectorProductCoefficient(-1.0, *Q);
+      Assemble(integ, info, trial_fes, test_fes, Q, use_bdr);
    }
-   Assemble(integ, info, trial_fes, test_fes, Q, use_bdr, true);
-   delete Q;
 #else
    MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
 #endif
 }
 
-template <>
+template <typename CoeffType>
 MFMixedVectorWeakDivergenceIntegrator::MFMixedVectorWeakDivergenceIntegrator(
    const mfem::MixedVectorWeakDivergenceIntegrator &integ,
    const mfem::FiniteElementSpace &trial_fes,
    const mfem::FiniteElementSpace &test_fes,
-   mfem::MatrixCoefficient *Q,
+   CoeffType *Q,
    const bool use_bdr)
 {
 #ifdef MFEM_USE_CEED
-   MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr, false);
+   MixedVectorWeakDivergenceOperatorInfo info(trial_fes, test_fes, Q, use_bdr,
+                                              true);
    if (Q)
    {
       // Does not inherit ownership of old Q
-      Q = new ScalarMatrixProductCoefficient(-1.0, *Q);
+      auto *nQ = NegativeCoeff(*Q);
+      Assemble(integ, info, trial_fes, test_fes, nQ, use_bdr, true);
+      delete nQ;
+   }
+   else
+   {
+      Assemble(integ, info, trial_fes, test_fes, Q, use_bdr, true);
    }
-   Assemble(integ, info, trial_fes, test_fes, Q, use_bdr, true);
-   delete Q;
 #else
    MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
 #endif
@@ -302,6 +353,19 @@ template PAMixedVectorGradientIntegrator::PAMixedVectorGradientIntegrator(
    const mfem::MixedVectorGradientIntegrator &, const mfem::FiniteElementSpace &,
    const mfem::FiniteElementSpace &, mfem::MatrixCoefficient *, const bool);
 
+template PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator(
+   const mfem::MixedVectorWeakDivergenceIntegrator &,
+   const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &,
+   mfem::Coefficient *, const bool);
+template PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator(
+   const mfem::MixedVectorWeakDivergenceIntegrator &,
+   const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &,
+   mfem::VectorCoefficient *, const bool);
+template PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator(
+   const mfem::MixedVectorWeakDivergenceIntegrator &,
+   const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &,
+   mfem::MatrixCoefficient *, const bool);
+
 template MFMixedVectorGradientIntegrator::MFMixedVectorGradientIntegrator(
    const mfem::MixedVectorGradientIntegrator &, const mfem::FiniteElementSpace &,
    const mfem::FiniteElementSpace &, mfem::Coefficient *, const bool);
@@ -312,6 +376,19 @@ template MFMixedVectorGradientIntegrator::MFMixedVectorGradientIntegrator(
    const mfem::MixedVectorGradientIntegrator &, const mfem::FiniteElementSpace &,
    const mfem::FiniteElementSpace &, mfem::MatrixCoefficient *, const bool);
 
+template MFMixedVectorWeakDivergenceIntegrator::MFMixedVectorWeakDivergenceIntegrator(
+   const mfem::MixedVectorWeakDivergenceIntegrator &,
+   const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &,
+   mfem::Coefficient *, const bool);
+template MFMixedVectorWeakDivergenceIntegrator::MFMixedVectorWeakDivergenceIntegrator(
+   const mfem::MixedVectorWeakDivergenceIntegrator &,
+   const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &,
+   mfem::VectorCoefficient *, const bool);
+template MFMixedVectorWeakDivergenceIntegrator::MFMixedVectorWeakDivergenceIntegrator(
+   const mfem::MixedVectorWeakDivergenceIntegrator &,
+   const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &,
+   mfem::MatrixCoefficient *, const bool);
+
 // @endcond
 
 } // namespace ceed
diff --git a/fem/ceed/integrators/nlconvection/nlconvection.cpp b/fem/ceed/integrators/nlconvection/nlconvection.cpp
index b71af29788d..c285051eed6 100644
--- a/fem/ceed/integrators/nlconvection/nlconvection.cpp
+++ b/fem/ceed/integrators/nlconvection/nlconvection.cpp
@@ -25,35 +25,68 @@ namespace ceed
 #ifdef MFEM_USE_CEED
 struct NLConvectionOperatorInfo : public OperatorInfo
 {
-   NLConvectionContext ctx;
+   NLConvectionContext ctx = {0};
    NLConvectionOperatorInfo(const mfem::FiniteElementSpace &fes,
-                            mfem::Coefficient *Q, bool use_bdr)
+                            mfem::Coefficient *Q, bool use_bdr = false,
+                            bool use_mf = false)
    {
       MFEM_VERIFY(fes.GetVDim() == fes.GetMesh()->SpaceDimension(),
                   "Missing coefficient in ceed::NLConvectionOperatorInfo!");
       ctx.dim = fes.GetMesh()->Dimension() - use_bdr;
       ctx.space_dim = fes.GetMesh()->SpaceDimension();
+      if (!use_mf)
+      {
+         apply_func = ":f_apply_conv";
+         apply_qf = &f_apply_conv;
+      }
+      else
+      {
+         build_func = "";
+         build_qf = nullptr;
+      }
       if (Q == nullptr)
       {
          ctx.coeff = 1.0;
+         if (!use_mf)
+         {
+            build_func = ":f_build_conv_const";
+            build_qf = &f_build_conv_const;
+         }
+         else
+         {
+            apply_func = ":f_apply_conv_mf_const";
+            apply_qf = &f_apply_conv_mf_const;
+         }
       }
-      else if (ConstantCoefficient *const_coeff =
-                  dynamic_cast<ConstantCoefficient *>(Q))
+      else if (mfem::ConstantCoefficient *const_coeff =
+                  dynamic_cast<mfem::ConstantCoefficient *>(Q))
       {
          ctx.coeff = const_coeff->constant;
+         if (!use_mf)
+         {
+            build_func = ":f_build_conv_const";
+            build_qf = &f_build_conv_const;
+         }
+         else
+         {
+            apply_func = ":f_apply_conv_mf_const";
+            apply_qf = &f_apply_conv_mf_const;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = ":f_build_conv_quad";
+            build_qf = &f_build_conv_quad;
+         }
+         else
+         {
+            apply_func = ":f_apply_conv_mf_quad";
+            apply_qf = &f_apply_conv_mf_quad;
+         }
       }
-
       header = "/integrators/nlconvection/nlconvection_qf.h";
-      build_func_const = ":f_build_conv_const";
-      build_qf_const = &f_build_conv_const;
-      build_func_quad = ":f_build_conv_quad";
-      build_qf_quad = &f_build_conv_quad;
-      apply_func = ":f_apply_conv";
-      apply_qf = &f_apply_conv;
-      apply_func_mf_const = ":f_apply_conv_mf_const";
-      apply_qf_mf_const = &f_apply_conv_mf_const;
-      apply_func_mf_quad = ":f_apply_conv_mf_quad";
-      apply_qf_mf_quad = &f_apply_conv_mf_quad;
       trial_op = EvalMode::InterpAndGrad;
       test_op = EvalMode::Interp;
       qdatasize = ctx.dim * ctx.space_dim;
@@ -82,7 +115,7 @@ MFVectorConvectionNLIntegrator::MFVectorConvectionNLIntegrator(
    const bool use_bdr)
 {
 #ifdef MFEM_USE_CEED
-   NLConvectionOperatorInfo info(fes, Q, use_bdr);
+   NLConvectionOperatorInfo info(fes, Q, use_bdr, true);
    Assemble(integ, info, fes, Q, use_bdr, true);
 #else
    MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
diff --git a/fem/ceed/integrators/nlconvection/nlconvection_qf.h b/fem/ceed/integrators/nlconvection/nlconvection_qf.h
index 227971c1fae..ee1782784b1 100644
--- a/fem/ceed/integrators/nlconvection/nlconvection_qf.h
+++ b/fem/ceed/integrators/nlconvection/nlconvection_qf.h
@@ -14,15 +14,14 @@
 
 #include "../util/util_qf.h"
 
-/// A structure used to pass additional data to f_build_conv and f_apply_conv
 struct NLConvectionContext
 {
    CeedInt dim, space_dim;
    CeedScalar coeff;
 };
 
-/// libCEED QFunction for building quadrature data for a convection operator
-/// with a constant coefficient
+/// libCEED QFunction for building quadrature data for a convection
+/// operator with a constant coefficient
 CEED_QFUNCTION(f_build_conv_const)(void *ctx, CeedInt Q,
                                    const CeedScalar *const *in,
                                    CeedScalar *const *out)
@@ -31,7 +30,7 @@ CEED_QFUNCTION(f_build_conv_const)(void *ctx, CeedInt Q,
    // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[1] is quadrature weights, size (Q)
    //
-   // At every quadrature point, compute and store qw * c * adj(J)^T.
+   // At every quadrature point, compute and store qw * c * adj(J)^T
    const CeedScalar coeff = bc->coeff;
    const CeedScalar *J = in[0], *qw = in[1];
    CeedScalar *qd = out[0];
@@ -71,8 +70,8 @@ CEED_QFUNCTION(f_build_conv_const)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for building quadrature data for a convection operator
-/// with a coefficient evaluated at quadrature points.
+/// libCEED QFunction for building quadrature data for a convection
+/// operator with a coefficient evaluated at quadrature points
 CEED_QFUNCTION(f_build_conv_quad)(void *ctx, CeedInt Q,
                                   const CeedScalar *const *in,
                                   CeedScalar *const *out)
@@ -82,7 +81,7 @@ CEED_QFUNCTION(f_build_conv_quad)(void *ctx, CeedInt Q,
    // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[2] is quadrature weights, size (Q)
    //
-   // At every quadrature point, compute and store qw * c * adj(J)^T.
+   // At every quadrature point, compute and store qw * c * adj(J)^T
    const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
    CeedScalar *qd = out[0];
    switch (10 * bc->space_dim + bc->dim)
@@ -121,7 +120,7 @@ CEED_QFUNCTION(f_build_conv_quad)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for applying a conv operator
+/// libCEED QFunction for applying a convection operator
 CEED_QFUNCTION(f_apply_conv)(void *ctx, CeedInt Q,
                              const CeedScalar *const *in,
                              CeedScalar *const *out)
@@ -251,7 +250,8 @@ CEED_QFUNCTION(f_apply_conv)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for applying a conv operator
+/// libCEED QFunction for applying a convection operator with a constant
+/// coefficient
 CEED_QFUNCTION(f_apply_conv_mf_const)(void *ctx, CeedInt Q,
                                       const CeedScalar *const *in,
                                       CeedScalar *const *out)
@@ -263,7 +263,7 @@ CEED_QFUNCTION(f_apply_conv_mf_const)(void *ctx, CeedInt Q,
    // in[3] is quadrature weights, size (Q)
    // out[0] has shape [ncomp=space_dim, Q]
    //
-   // At every quadrature point, compute qw * c * adj(J)^T.
+   // At every quadrature point, compute qw * c * adj(J)^T
    const CeedScalar coeff = bc->coeff;
    const CeedScalar *u = in[0], *ug = in[1], *J = in[2], *qw = in[3];
    CeedScalar *vg = out[0];
@@ -375,7 +375,8 @@ CEED_QFUNCTION(f_apply_conv_mf_const)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for applying a conv operator
+/// libCEED QFunction for applying a convection operator with a coefficient
+/// evaluated at quadrature points
 CEED_QFUNCTION(f_apply_conv_mf_quad)(void *ctx, CeedInt Q,
                                      const CeedScalar *const *in,
                                      CeedScalar *const *out)
@@ -388,7 +389,7 @@ CEED_QFUNCTION(f_apply_conv_mf_quad)(void *ctx, CeedInt Q,
    // in[4] is quadrature weights, size (Q)
    // out[0] has shape [ncomp=space_dim, Q]
    //
-   // At every quadrature point, compute qw * c * adj(J)^T.
+   // At every quadrature point, compute qw * c * adj(J)^T
    const CeedScalar *u = in[0], *ug = in[1], *c = in[2], *J = in[3], *qw = in[4];
    CeedScalar *vg = out[0];
    switch (10 * bc->space_dim + bc->dim)
diff --git a/fem/ceed/integrators/util/util_qf.h b/fem/ceed/integrators/util/util_qf.h
index beb95cd3c0e..fa7ca763b29 100644
--- a/fem/ceed/integrators/util/util_qf.h
+++ b/fem/ceed/integrators/util/util_qf.h
@@ -73,7 +73,7 @@ CEED_QFUNCTION_HELPER void MultAdjJCAdjJt22(const CeedScalar *J,
                                             const CeedInt qd_stride,
                                             CeedScalar *qd)
 {
-   // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result.
+   // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result
    // J: 0 2   adj(J):  J22 -J12   qd: 0 1
    //    1 3           -J21  J11       1 2
    const CeedScalar J11 = J[J_stride * 0];
@@ -122,7 +122,7 @@ CEED_QFUNCTION_HELPER void MultAdjJCAdjJt21(const CeedScalar *J,
                                             const CeedInt qd_stride,
                                             CeedScalar *qd)
 {
-   // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result.
+   // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result
    // J: 0   adj(J): 1/sqrt(J^T J) J^T   qd: 0
    //    1
    const CeedScalar J11 = J[J_stride * 0];
@@ -160,7 +160,7 @@ CEED_QFUNCTION_HELPER void MultAdjJCAdjJt33(const CeedScalar *J,
                                             const CeedInt qd_stride,
                                             CeedScalar *qd)
 {
-   // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result.
+   // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result
    // J: 0 3 6   qd: 0 1 2
    //    1 4 7       1 3 4
    //    2 5 8       2 4 5
@@ -273,7 +273,7 @@ CEED_QFUNCTION_HELPER void MultAdjJCAdjJt32(const CeedScalar *J,
                                             const CeedInt qd_stride,
                                             CeedScalar *qd)
 {
-   // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result.
+   // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result
    // J: 0 3   qd: 0 1
    //    1 4       1 2
    //    2 5
@@ -373,7 +373,7 @@ CEED_QFUNCTION_HELPER void MultJtCJ22(const CeedScalar *J,
                                       const CeedInt qd_stride,
                                       CeedScalar *qd)
 {
-   // compute qw/det(J) J^T C J and store the symmetric part of the result.
+   // compute qw/det(J) J^T C J and store the symmetric part of the result
    // J: 0 2   qd: 0 1
    //    1 3       1 2
    const CeedScalar J11 = J[J_stride * 0];
@@ -422,7 +422,7 @@ CEED_QFUNCTION_HELPER void MultJtCJ21(const CeedScalar *J,
                                       const CeedInt qd_stride,
                                       CeedScalar *qd)
 {
-   // compute qw/det(J) J^T C J and store the symmetric part of the result.
+   // compute qw/det(J) J^T C J and store the symmetric part of the result
    // J: 0   qd: 0
    //    1
    const CeedScalar J11 = J[J_stride * 0];
@@ -460,7 +460,7 @@ CEED_QFUNCTION_HELPER void MultJtCJ33(const CeedScalar *J,
                                       const CeedInt qd_stride,
                                       CeedScalar *qd)
 {
-   // compute qw/det(J) J^T C J and store the symmetric part of the result.
+   // compute qw/det(J) J^T C J and store the symmetric part of the result
    // J: 0 3 6   qd: 0 1 2
    //    1 4 7       1 3 4
    //    2 5 8       2 4 5
@@ -566,7 +566,7 @@ CEED_QFUNCTION_HELPER void MultJtCJ32(const CeedScalar *J,
                                       const CeedInt qd_stride,
                                       CeedScalar *qd)
 {
-   // compute qw/det(J) J^T C J and store the symmetric part of the result.
+   // compute qw/det(J) J^T C J and store the symmetric part of the result
    // J: 0 3   qd: 0 1
    //    1 4       1 2
    //    2 5
@@ -639,7 +639,7 @@ CEED_QFUNCTION_HELPER void MultCtAdjJt22(const CeedScalar *J,
                                          const CeedInt qd_stride,
                                          CeedScalar *qd)
 {
-   // compute qw c^T adj(J)^T and store the result vector.
+   // compute qw c^T adj(J)^T and store the result vector
    // J: 0 2   adj(J):  J22 -J12
    //    1 3           -J21  J11
    const CeedScalar J11 = J[J_stride * 0];
@@ -660,7 +660,7 @@ CEED_QFUNCTION_HELPER void MultCtAdjJt21(const CeedScalar *J,
                                          const CeedInt qd_stride,
                                          CeedScalar *qd)
 {
-   // compute qw c^T adj(J)^T and store the result vector.
+   // compute qw c^T adj(J)^T and store the result vector
    // J: 0   adj(J): 1/sqrt(J^T J) J^T
    //    1
    const CeedScalar J11 = J[J_stride * 0];
@@ -679,7 +679,7 @@ CEED_QFUNCTION_HELPER void MultCtAdjJt33(const CeedScalar *J,
                                          const CeedInt qd_stride,
                                          CeedScalar *qd)
 {
-   // compute qw c^T adj(J)^T and store the result vector.
+   // compute qw c^T adj(J)^T and store the result vector
    // J: 0 3 6
    //    1 4 7
    //    2 5 8
@@ -717,7 +717,7 @@ CEED_QFUNCTION_HELPER void MultCtAdjJt32(const CeedScalar *J,
                                          const CeedInt qd_stride,
                                          CeedScalar *qd)
 {
-   // compute qw c^T adj(J)^T and store the result vector.
+   // compute qw c^T adj(J)^T and store the result vector
    // J: 0 3
    //    1 4
    //    2 5
@@ -750,7 +750,7 @@ CEED_QFUNCTION_HELPER void MultAdjJt22(const CeedScalar *J,
                                        const CeedInt qd_stride,
                                        CeedScalar *qd)
 {
-   // compute qw adj(J)^T and store the result matrix.
+   // compute qw adj(J)^T and store the result matrix
    // J: 0 2   adj(J):  J22 -J12   qd: 0 2
    //    1 3           -J21  J11       1 3
    const CeedScalar J11 = J[J_stride * 0];
@@ -769,7 +769,7 @@ CEED_QFUNCTION_HELPER void MultAdjJt21(const CeedScalar *J,
                                        const CeedInt qd_stride,
                                        CeedScalar *qd)
 {
-   // compute qw adj(J)^T and store the result matrix.
+   // compute qw adj(J)^T and store the result matrix
    // J: 0   adj(J):  1/sqrt(J^T J) J^T   qd: 0
    //    1                                    1
    const CeedScalar J11 = J[J_stride * 0];
@@ -785,7 +785,7 @@ CEED_QFUNCTION_HELPER void MultAdjJt33(const CeedScalar *J,
                                        const CeedInt qd_stride,
                                        CeedScalar *qd)
 {
-   // compute qw adj(J)^T and store the result matrix.
+   // compute qw adj(J)^T and store the result matrix
    // J: 0 3 6   qd: 0 3 6
    //    1 4 7       1 4 7
    //    2 5 8       2 5 8
@@ -824,7 +824,7 @@ CEED_QFUNCTION_HELPER void MultAdjJt32(const CeedScalar *J,
                                        const CeedInt qd_stride,
                                        CeedScalar *qd)
 {
-   // compute qw adj(J)^T and store the result matrix.
+   // compute qw adj(J)^T and store the result matrix
    // J: 0 3   qd: 0 3
    //    1 4       1 4
    //    2 5       2 5
diff --git a/fem/ceed/integrators/vecfemass/vecfemass.cpp b/fem/ceed/integrators/vecfemass/vecfemass.cpp
index 5cbb043e643..32df1e322b0 100644
--- a/fem/ceed/integrators/vecfemass/vecfemass.cpp
+++ b/fem/ceed/integrators/vecfemass/vecfemass.cpp
@@ -25,92 +25,150 @@ namespace ceed
 #ifdef MFEM_USE_CEED
 struct VectorFEMassOperatorInfo : public OperatorInfo
 {
-   VectorFEMassContext ctx;
+   VectorFEMassContext ctx = {0};
    template <typename CoeffType>
    VectorFEMassOperatorInfo(const mfem::FiniteElementSpace &fes, CoeffType *Q,
-                            bool use_bdr)
+                            bool use_bdr = false, bool use_mf = false)
    {
       MFEM_VERIFY(fes.GetVDim() == 1,
                   "libCEED interface for vector FE does not support VDim > 1!");
       ctx.dim = fes.GetMesh()->Dimension() - use_bdr;
       ctx.space_dim = fes.GetMesh()->SpaceDimension();
-      if (Q == nullptr)
+      bool is_hdiv = (fes.FEColl()->GetMapType(ctx.dim) ==
+                      mfem::FiniteElement::H_DIV);
+      MFEM_VERIFY(is_hdiv ||
+                  fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_CURL,
+                  "VectorFEMassIntegrator requires H(div) or H(curl) FE space!");
+      if (!use_mf)
       {
-         ctx.coeff_comp = 1;
-         ctx.coeff[0] = 1.0;
+         apply_func = ":f_apply_vecfemass";
+         apply_qf = &f_apply_vecfemass;
       }
       else
       {
-         InitCoefficient(*Q);
+         build_func = "";
+         build_qf = nullptr;
       }
-
-      header = "/integrators/vecfemass/vecfemass_qf.h";
-      if (fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_DIV)
+      if (Q == nullptr)
       {
-         build_func_const = ":f_build_hdivmass_const";
-         build_qf_const = &f_build_hdivmass_const;
-         build_func_quad = ":f_build_hdivmass_quad";
-         build_qf_quad = &f_build_hdivmass_quad;
-         apply_func = ":f_apply_vecfemass";
-         apply_qf = &f_apply_vecfemass;
-         apply_func_mf_const = ":f_apply_hdivmass_mf_const";
-         apply_qf_mf_const = &f_apply_hdivmass_mf_const;
-         apply_func_mf_quad = ":f_apply_hdivmass_mf_quad";
-         apply_qf_mf_quad = &f_apply_hdivmass_mf_quad;
+         ctx.coeff[0] = 1.0;
+         if (!use_mf)
+         {
+            build_func = is_hdiv ? ":f_build_hdivmass_const_scalar" :
+                         ":f_build_hcurlmass_const_scalar";
+            build_qf = is_hdiv ? &f_build_hdivmass_const_scalar :
+                       &f_build_hcurlmass_const_scalar;
+         }
+         else
+         {
+            apply_func = is_hdiv ? ":f_apply_hdivmass_mf_const_scalar" :
+                         ":f_apply_hcurlmass_mf_const_scalar";
+            apply_qf = is_hdiv ? &f_apply_hdivmass_mf_const_scalar :
+                       &f_apply_hcurlmass_mf_const_scalar;
+         }
       }
       else
       {
-         MFEM_VERIFY(fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_CURL,
-                     "VectorFEMassIntegrator requires H(div) or H(curl) FE space!");
-         build_func_const = ":f_build_hcurlmass_const";
-         build_qf_const = &f_build_hcurlmass_const;
-         build_func_quad = ":f_build_hcurlmass_quad";
-         build_qf_quad = &f_build_hcurlmass_quad;
-         apply_func = ":f_apply_vecfemass";
-         apply_qf = &f_apply_vecfemass;
-         apply_func_mf_const = ":f_apply_hcurlmass_mf_const";
-         apply_qf_mf_const = &f_apply_hcurlmass_mf_const;
-         apply_func_mf_quad = ":f_apply_hcurlmass_mf_quad";
-         apply_qf_mf_quad = &f_apply_hcurlmass_mf_quad;
+         InitCoefficient(*Q, is_hdiv, use_mf);
       }
+      header = "/integrators/vecfemass/vecfemass_qf.h";
       trial_op = EvalMode::Interp;
       test_op = EvalMode::Interp;
       qdatasize = (ctx.dim * (ctx.dim + 1)) / 2;
    }
-   void InitCoefficient(mfem::Coefficient &Q)
+   void InitCoefficient(mfem::Coefficient &Q, bool is_hdiv, bool use_mf)
    {
-      ctx.coeff_comp = 1;
-      if (ConstantCoefficient *const_coeff =
-             dynamic_cast<ConstantCoefficient *>(&Q))
+      if (mfem::ConstantCoefficient *const_coeff =
+             dynamic_cast<mfem::ConstantCoefficient *>(&Q))
       {
          ctx.coeff[0] = const_coeff->constant;
+         if (!use_mf)
+         {
+            build_func = is_hdiv ? ":f_build_hdivmass_const_scalar" :
+                         ":f_build_hcurlmass_const_scalar";
+            build_qf = is_hdiv ? &f_build_hdivmass_const_scalar :
+                       &f_build_hcurlmass_const_scalar;
+         }
+         else
+         {
+            apply_func = is_hdiv ? ":f_apply_hdivmass_mf_const_scalar" :
+                         ":f_apply_hcurlmass_mf_const_scalar";
+            apply_qf = is_hdiv ? &f_apply_hdivmass_mf_const_scalar :
+                       &f_apply_hcurlmass_mf_const_scalar;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = is_hdiv ? ":f_build_hdivmass_quad_scalar" :
+                         ":f_build_hcurlmass_quad_scalar";
+            build_qf = is_hdiv ? &f_build_hdivmass_quad_scalar :
+                       &f_build_hcurlmass_quad_scalar;
+         }
+         else
+         {
+            apply_func = is_hdiv ? ":f_apply_hdivmass_mf_quad_scalar" :
+                         ":f_apply_hcurlmass_mf_quad_scalar";
+            apply_qf = is_hdiv ? &f_apply_hdivmass_mf_quad_scalar :
+                       &f_apply_hcurlmass_mf_quad_scalar;
+         }
       }
    }
-   void InitCoefficient(mfem::VectorCoefficient &VQ)
+   void InitCoefficient(mfem::VectorCoefficient &VQ, bool is_hdiv, bool use_mf)
    {
-      const int vdim = VQ.GetVDim();
-      ctx.coeff_comp = vdim;
-      if (VectorConstantCoefficient *const_coeff =
-             dynamic_cast<VectorConstantCoefficient *>(&VQ))
+      if (mfem::VectorConstantCoefficient *const_coeff =
+             dynamic_cast<mfem::VectorConstantCoefficient *>(&VQ))
       {
-         MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_VECFEMASS_COEFF_COMP_MAX,
+         const int vdim = VQ.GetVDim();
+         MFEM_VERIFY(vdim <= LIBCEED_VECFEMASS_COEFF_COMP_MAX,
                      "VectorCoefficient dimension exceeds context storage!");
          const mfem::Vector &val = const_coeff->GetVec();
          for (int i = 0; i < vdim; i++)
          {
             ctx.coeff[i] = val[i];
          }
+         if (!use_mf)
+         {
+            build_func = is_hdiv ? ":f_build_hdivmass_const_vector" :
+                         ":f_build_hcurlmass_const_vector";
+            build_qf = is_hdiv ? &f_build_hdivmass_const_vector :
+                       &f_build_hcurlmass_const_vector;
+         }
+         else
+         {
+            apply_func = is_hdiv ? ":f_apply_hdivmass_mf_const_vector" :
+                         ":f_apply_hcurlmass_mf_const_vector";
+            apply_qf = is_hdiv ? &f_apply_hdivmass_mf_const_vector :
+                       &f_apply_hcurlmass_mf_const_vector;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = is_hdiv ? ":f_build_hdivmass_quad_vector" :
+                         ":f_build_hcurlmass_quad_vector";
+            build_qf = is_hdiv ? &f_build_hdivmass_quad_vector :
+                       &f_build_hcurlmass_quad_vector;
+         }
+         else
+         {
+            apply_func = is_hdiv ? ":f_apply_hdivmass_mf_quad_vector" :
+                         ":f_apply_hcurlmass_mf_quad_vector";
+            apply_qf = is_hdiv ? &f_apply_hdivmass_mf_quad_vector :
+                       &f_apply_hcurlmass_mf_quad_vector;
+         }
       }
    }
-   void InitCoefficient(mfem::MatrixCoefficient &MQ)
+   void InitCoefficient(mfem::MatrixCoefficient &MQ, bool is_hdiv, bool use_mf)
    {
       // Assumes matrix coefficient is symmetric
-      const int vdim = MQ.GetVDim();
-      ctx.coeff_comp = (vdim * (vdim + 1)) / 2;
-      if (MatrixConstantCoefficient *const_coeff =
-             dynamic_cast<MatrixConstantCoefficient *>(&MQ))
+      if (mfem::MatrixConstantCoefficient *const_coeff =
+             dynamic_cast<mfem::MatrixConstantCoefficient *>(&MQ))
       {
-         MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_VECFEMASS_COEFF_COMP_MAX,
+         const int vdim = MQ.GetVDim();
+         MFEM_VERIFY((vdim * (vdim + 1)) / 2 <= LIBCEED_VECFEMASS_COEFF_COMP_MAX,
                      "MatrixCoefficient dimensions exceed context storage!");
          const mfem::DenseMatrix &val = const_coeff->GetMatrix();
          for (int j = 0; j < vdim; j++)
@@ -121,6 +179,37 @@ struct VectorFEMassOperatorInfo : public OperatorInfo
                ctx.coeff[idx] = val(i, j);
             }
          }
+         if (!use_mf)
+         {
+            build_func = is_hdiv ? ":f_build_hdivmass_const_matrix" :
+                         ":f_build_hcurlmass_const_matrix";
+            build_qf = is_hdiv ? &f_build_hdivmass_const_matrix :
+                       &f_build_hcurlmass_const_matrix;
+         }
+         else
+         {
+            apply_func = is_hdiv ? ":f_apply_hdivmass_mf_const_matrix" :
+                         ":f_apply_hcurlmass_mf_const_matrix";
+            apply_qf = is_hdiv ? &f_apply_hdivmass_mf_const_matrix :
+                       &f_apply_hcurlmass_mf_const_matrix;
+         }
+      }
+      else
+      {
+         if (!use_mf)
+         {
+            build_func = is_hdiv ? ":f_build_hdivmass_quad_matrix" :
+                         ":f_build_hcurlmass_quad_matrix";
+            build_qf = is_hdiv ? &f_build_hdivmass_quad_matrix :
+                       &f_build_hcurlmass_quad_matrix;
+         }
+         else
+         {
+            apply_func = is_hdiv ? ":f_apply_hdivmass_mf_quad_matrix" :
+                         ":f_apply_hcurlmass_mf_quad_matrix";
+            apply_qf = is_hdiv ? &f_apply_hdivmass_mf_quad_matrix :
+                       &f_apply_hcurlmass_mf_quad_matrix;
+         }
       }
    }
 };
@@ -149,7 +238,7 @@ MFVectorFEMassIntegrator::MFVectorFEMassIntegrator(
    const bool use_bdr)
 {
 #ifdef MFEM_USE_CEED
-   VectorFEMassOperatorInfo info(fes, Q, use_bdr);
+   VectorFEMassOperatorInfo info(fes, Q, use_bdr, true);
    Assemble(integ, info, fes, Q, use_bdr, true);
 #else
    MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED.");
diff --git a/fem/ceed/integrators/vecfemass/vecfemass_qf.h b/fem/ceed/integrators/vecfemass/vecfemass_qf.h
index a921a580404..571316ba821 100644
--- a/fem/ceed/integrators/vecfemass/vecfemass_qf.h
+++ b/fem/ceed/integrators/vecfemass/vecfemass_qf.h
@@ -16,19 +16,17 @@
 
 #define LIBCEED_VECFEMASS_COEFF_COMP_MAX 6
 
-/// A structure used to pass additional data to f_build_vecfemass and
-/// f_apply_vecfemass
 struct VectorFEMassContext
 {
-   CeedInt dim, space_dim, coeff_comp;
+   CeedInt dim, space_dim;
    CeedScalar coeff[LIBCEED_VECFEMASS_COEFF_COMP_MAX];
 };
 
-/// libCEED QFunction for building quadrature data for a vector FE mass
-/// operator with a constant coefficient
-CEED_QFUNCTION(f_build_hdivmass_const)(void *ctx, CeedInt Q,
-                                       const CeedScalar *const *in,
-                                       CeedScalar *const *out)
+/// libCEED QFunction for building quadrature data for an H(div) mass operator
+/// with a scalar constant coefficient
+CEED_QFUNCTION(f_build_hdivmass_const_scalar)(void *ctx, CeedInt Q,
+                                              const CeedScalar *const *in,
+                                              CeedScalar *const *out)
 {
    VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
    // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
@@ -36,98 +34,144 @@ CEED_QFUNCTION(f_build_hdivmass_const)(void *ctx, CeedInt Q,
    //
    // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
    // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part
-   // of the result.
+   // of the result
    const CeedScalar *coeff = bc->coeff;
    const CeedScalar *J = in[0], *qw = in[1];
    CeedScalar *qd = out[0];
-   switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp)
+   switch (10 * bc->space_dim + bc->dim)
    {
-      case 111:
+      case 11:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             const CeedScalar coeff0 = coeff[0];
             qd[i] = qw[i] * coeff0 * J[i];
          }
          break;
-      case 213:
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ21(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
+            MultJtCJ21(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
          }
          break;
-      case 212:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ21(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i);
+            MultJtCJ22(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
          }
          break;
-      case 211:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ21(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
+            MultJtCJ32(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
          }
          break;
-      case 223:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ22(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
+            MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
          }
          break;
-      case 222:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for building quadrature data for an H(div) mass operator
+/// with a vector constant coefficient
+CEED_QFUNCTION(f_build_hdivmass_const_vector)(void *ctx, CeedInt Q,
+                                              const CeedScalar *const *in,
+                                              CeedScalar *const *out)
+{
+   VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
+   // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[1] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
+   // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part
+   // of the result
+   const CeedScalar *coeff = bc->coeff;
+   const CeedScalar *J = in[0], *qw = in[1];
+   CeedScalar *qd = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ22(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i);
+            MultJtCJ21(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i);
          }
          break;
-      case 221:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ22(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
+            MultJtCJ22(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i);
          }
          break;
-      case 326:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ32(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i);
+            MultJtCJ32(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
          }
          break;
-      case 323:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ32(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
+            MultJtCJ33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
          }
          break;
-      case 321:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for building quadrature data for an H(div) mass operator
+/// with a matrix constant coefficient
+CEED_QFUNCTION(f_build_hdivmass_const_matrix)(void *ctx, CeedInt Q,
+                                              const CeedScalar *const *in,
+                                              CeedScalar *const *out)
+{
+   VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
+   // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[1] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
+   // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part
+   // of the result
+   const CeedScalar *coeff = bc->coeff;
+   const CeedScalar *J = in[0], *qw = in[1];
+   CeedScalar *qd = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ32(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
+            MultJtCJ21(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
          }
          break;
-      case 336:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i);
+            MultJtCJ22(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
          }
          break;
-      case 333:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
+            MultJtCJ32(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i);
          }
          break;
-      case 331:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
+            MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i);
          }
          break;
    }
    return 0;
 }
 
-CEED_QFUNCTION(f_build_hcurlmass_const)(void *ctx, CeedInt Q,
-                                        const CeedScalar *const *in,
-                                        CeedScalar *const *out)
+/// libCEED QFunction for building quadrature data for an H(curl) mass operator
+/// with a scalar constant coefficient
+CEED_QFUNCTION(f_build_hcurlmass_const_scalar)(void *ctx, CeedInt Q,
+                                               const CeedScalar *const *in,
+                                               CeedScalar *const *out)
 {
    VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
    // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
@@ -135,287 +179,421 @@ CEED_QFUNCTION(f_build_hcurlmass_const)(void *ctx, CeedInt Q,
    //
    // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
    // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part
-   // of the result.
+   // of the result
    const CeedScalar *coeff = bc->coeff;
    const CeedScalar *J = in[0], *qw = in[1];
    CeedScalar *qd = out[0];
-   switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp)
+   switch (10 * bc->space_dim + bc->dim)
    {
-      case 111:
+      case 11:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             const CeedScalar coeff0 = coeff[0];
             qd[i] = qw[i] * coeff0 / J[i];
          }
          break;
-      case 213:
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
+            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
          }
          break;
-      case 212:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i);
+            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
          }
          break;
-      case 211:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
+            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
          }
          break;
-      case 223:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
+            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
          }
          break;
-      case 222:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for building quadrature data for an H(curl) mass operator
+/// with a vector constant coefficient
+CEED_QFUNCTION(f_build_hcurlmass_const_vector)(void *ctx, CeedInt Q,
+                                               const CeedScalar *const *in,
+                                               CeedScalar *const *out)
+{
+   VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
+   // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[1] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
+   // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part
+   // of the result
+   const CeedScalar *coeff = bc->coeff;
+   const CeedScalar *J = in[0], *qw = in[1];
+   CeedScalar *qd = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i);
+            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i);
          }
          break;
-      case 221:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
+            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i);
          }
          break;
-      case 326:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i);
+            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
          }
          break;
-      case 323:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
+            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
          }
          break;
-      case 321:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for building quadrature data for an H(curl) mass operator
+/// with a matrix constant coefficient
+CEED_QFUNCTION(f_build_hcurlmass_const_matrix)(void *ctx, CeedInt Q,
+                                               const CeedScalar *const *in,
+                                               CeedScalar *const *out)
+{
+   VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
+   // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[1] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
+   // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part
+   // of the result
+   const CeedScalar *coeff = bc->coeff;
+   const CeedScalar *J = in[0], *qw = in[1];
+   CeedScalar *qd = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
+            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
          }
          break;
-      case 336:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i);
+            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
          }
          break;
-      case 333:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i);
+            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i);
          }
          break;
-      case 331:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i);
+            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i);
          }
          break;
    }
    return 0;
 }
 
-/// libCEED QFunction for building quadrature data for a vector FE mass
-/// operator with a coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_hdivmass_quad)(void *ctx, CeedInt Q,
-                                      const CeedScalar *const *in,
-                                      CeedScalar *const *out)
+/// libCEED QFunction for building quadrature data for an H(div) mass operator
+/// with a scalar coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_build_hdivmass_quad_scalar)(void *ctx, CeedInt Q,
+                                             const CeedScalar *const *in,
+                                             CeedScalar *const *out)
 {
    VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-   // in[0] is coefficients with shape [ncomp=coeff_comp, Q]
+   // in[0] is coefficients with shape [ncomp=1, Q]
    // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[2] is quadrature weights, size (Q)
    //
    // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
    // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part
-   // of the result.
+   // of the result
    const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
    CeedScalar *qd = out[0];
-   switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp)
+   switch (10 * bc->space_dim + bc->dim)
    {
-      case 111:
+      case 11:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             qd[i] = qw[i] * c[i] * J[i];
          }
          break;
-      case 213:
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
+            MultJtCJ21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
          }
          break;
-      case 212:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
+            MultJtCJ22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
          }
          break;
-      case 211:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
+            MultJtCJ32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
          }
          break;
-      case 223:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
+            MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
          }
          break;
-      case 222:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for building quadrature data for an H(div) mass operator
+/// with a vector coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_build_hdivmass_quad_vector)(void *ctx, CeedInt Q,
+                                             const CeedScalar *const *in,
+                                             CeedScalar *const *out)
+{
+   VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
+   // in[0] is coefficients with shape [ncomp=space_dim, Q]
+   // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[2] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
+   // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part
+   // of the result
+   const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
+   CeedScalar *qd = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
+            MultJtCJ21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
          }
          break;
-      case 221:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
+            MultJtCJ22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
          }
          break;
-      case 326:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
+            MultJtCJ32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
          }
          break;
-      case 323:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
+            MultJtCJ33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
          }
          break;
-      case 321:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for building quadrature data for an H(div) mass operator
+/// with a matrix coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_build_hdivmass_quad_matrix)(void *ctx, CeedInt Q,
+                                             const CeedScalar *const *in,
+                                             CeedScalar *const *out)
+{
+   VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
+   // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
+   // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[2] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
+   // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part
+   // of the result
+   const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
+   CeedScalar *qd = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
+            MultJtCJ21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
          }
          break;
-      case 336:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
+            MultJtCJ22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
          }
          break;
-      case 333:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
+            MultJtCJ32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
          }
          break;
-      case 331:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
+            MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
          }
          break;
    }
    return 0;
 }
 
-CEED_QFUNCTION(f_build_hcurlmass_quad)(void *ctx, CeedInt Q,
-                                       const CeedScalar *const *in,
-                                       CeedScalar *const *out)
+/// libCEED QFunction for building quadrature data for an H(curl) mass operator
+/// with a scalar coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_build_hcurlmass_quad_scalar)(void *ctx, CeedInt Q,
+                                              const CeedScalar *const *in,
+                                              CeedScalar *const *out)
 {
    VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-   // in[0] is coefficients with shape [ncomp=coeff_comp, Q]
+   // in[0] is coefficients with shape [ncomp=1, Q]
    // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[2] is quadrature weights, size (Q)
    //
    // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
    // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part
-   // of the result.
+   // of the result
    const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
    CeedScalar *qd = out[0];
-   switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp)
+   switch (10 * bc->space_dim + bc->dim)
    {
-      case 111:
+      case 11:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             qd[i] = qw[i] * c[i] / J[i];
          }
          break;
-      case 213:
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
+            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
          }
          break;
-      case 212:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
+            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
          }
          break;
-      case 211:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
+            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
          }
          break;
-      case 223:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
+            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
          }
          break;
-      case 222:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for building quadrature data for an H(curl) mass operator
+/// with a vector coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_build_hcurlmass_quad_vector)(void *ctx, CeedInt Q,
+                                              const CeedScalar *const *in,
+                                              CeedScalar *const *out)
+{
+   VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
+   // in[0] is coefficients with shape [ncomp=space_dim, Q]
+   // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[2] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
+   // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part
+   // of the result
+   const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
+   CeedScalar *qd = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
+            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
          }
          break;
-      case 221:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
+            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
          }
          break;
-      case 326:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
+            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
          }
          break;
-      case 323:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
+            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
          }
          break;
-      case 321:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for building quadrature data for an H(curl) mass operator
+/// with a matrix coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_build_hcurlmass_quad_matrix)(void *ctx, CeedInt Q,
+                                              const CeedScalar *const *in,
+                                              CeedScalar *const *out)
+{
+   VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
+   // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
+   // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[2] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
+   // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part
+   // of the result
+   const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
+   CeedScalar *qd = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
+            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
          }
          break;
-      case 336:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
+            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
          }
          break;
-      case 333:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
+            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
          }
          break;
-      case 331:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
+            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
          }
          break;
    }
@@ -463,10 +641,11 @@ CEED_QFUNCTION(f_apply_vecfemass)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for applying a vector FE mass operator
-CEED_QFUNCTION(f_apply_hdivmass_mf_const)(void *ctx, CeedInt Q,
-                                          const CeedScalar *const *in,
-                                          CeedScalar *const *out)
+/// libCEED QFunction for applying an H(div) mass operator with a scalar
+/// constant coefficient
+CEED_QFUNCTION(f_apply_hdivmass_mf_const_scalar)(void *ctx, CeedInt Q,
+                                                 const CeedScalar *const *in,
+                                                 CeedScalar *const *out)
 {
    VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
    // in[0], out[0] have shape [dim, ncomp=1, Q]
@@ -474,13 +653,13 @@ CEED_QFUNCTION(f_apply_hdivmass_mf_const)(void *ctx, CeedInt Q,
    // in[2] is quadrature weights, size (Q)
    //
    // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
-   // H(curl)) or qw/det(J) J^T C J (for H(div)).
+   // H(curl)) or qw/det(J) J^T C J (for H(div))
    const CeedScalar *coeff = bc->coeff;
    const CeedScalar *u = in[0], *J = in[1], *qw = in[2];
    CeedScalar *v = out[0];
-   switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp)
+   switch (10 * bc->space_dim + bc->dim)
    {
-      case 111:
+      case 11:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             const CeedScalar coeff0 = coeff[0];
@@ -488,23 +667,7 @@ CEED_QFUNCTION(f_apply_hdivmass_mf_const)(void *ctx, CeedInt Q,
             v[i] = qd * u[i];
          }
          break;
-      case 213:
-         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-         {
-            CeedScalar qd;
-            MultJtCJ21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd);
-            v[i] = qd * u[i];
-         }
-         break;
-      case 212:
-         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-         {
-            CeedScalar qd;
-            MultJtCJ21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd);
-            v[i] = qd * u[i];
-         }
-         break;
-      case 211:
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd;
@@ -512,77 +675,98 @@ CEED_QFUNCTION(f_apply_hdivmass_mf_const)(void *ctx, CeedInt Q,
             v[i] = qd * u[i];
          }
          break;
-      case 223:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultJtCJ22(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
+            MultJtCJ22(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
             v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 222:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultJtCJ22(J + i, Q, coeff, 1, 2, qw[i], 1, qd);
+            MultJtCJ32(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
             v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 221:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultJtCJ22(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
+            CeedScalar qd[6];
+            MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
-            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
-            v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
+            const CeedScalar u2 = u[i + Q * 2];
+            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2;
+            v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2;
+            v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2;
          }
          break;
-      case 326:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for applying an H(div) mass operator with a vector
+/// constant coefficient
+CEED_QFUNCTION(f_apply_hdivmass_mf_const_vector)(void *ctx, CeedInt Q,
+                                                 const CeedScalar *const *in,
+                                                 CeedScalar *const *out)
+{
+   VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
+   // in[0], out[0] have shape [dim, ncomp=1, Q]
+   // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[2] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
+   // H(curl)) or qw/det(J) J^T C J (for H(div))
+   const CeedScalar *coeff = bc->coeff;
+   const CeedScalar *u = in[0], *J = in[1], *qw = in[2];
+   CeedScalar *v = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultJtCJ32(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
-            const CeedScalar u0 = u[i + Q * 0];
-            const CeedScalar u1 = u[i + Q * 1];
-            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
-            v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
+            CeedScalar qd;
+            MultJtCJ21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd);
+            v[i] = qd * u[i];
          }
          break;
-      case 323:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultJtCJ32(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
+            MultJtCJ22(J + i, Q, coeff, 1, 2, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
             v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 321:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultJtCJ32(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
+            MultJtCJ32(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
             v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 336:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
+            MultJtCJ33(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             const CeedScalar u2 = u[i + Q * 2];
@@ -591,24 +775,63 @@ CEED_QFUNCTION(f_apply_hdivmass_mf_const)(void *ctx, CeedInt Q,
             v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2;
          }
          break;
-      case 333:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for applying an H(div) mass operator with a matrix
+/// constant coefficient
+CEED_QFUNCTION(f_apply_hdivmass_mf_const_matrix)(void *ctx, CeedInt Q,
+                                                 const CeedScalar *const *in,
+                                                 CeedScalar *const *out)
+{
+   VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
+   // in[0], out[0] have shape [dim, ncomp=1, Q]
+   // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[2] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
+   // H(curl)) or qw/det(J) J^T C J (for H(div))
+   const CeedScalar *coeff = bc->coeff;
+   const CeedScalar *u = in[0], *J = in[1], *qw = in[2];
+   CeedScalar *v = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
+         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+         {
+            CeedScalar qd;
+            MultJtCJ21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd);
+            v[i] = qd * u[i];
+         }
+         break;
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[6];
-            MultJtCJ33(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
+            CeedScalar qd[3];
+            MultJtCJ22(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
-            const CeedScalar u2 = u[i + Q * 2];
-            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2;
-            v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2;
-            v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2;
+            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
+            v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
+         }
+         break;
+      case 32:
+         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+         {
+            CeedScalar qd[3];
+            MultJtCJ32(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
+            const CeedScalar u0 = u[i + Q * 0];
+            const CeedScalar u1 = u[i + Q * 1];
+            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
+            v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 331:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
+            MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             const CeedScalar u2 = u[i + Q * 2];
@@ -621,9 +844,11 @@ CEED_QFUNCTION(f_apply_hdivmass_mf_const)(void *ctx, CeedInt Q,
    return 0;
 }
 
-CEED_QFUNCTION(f_apply_hcurlmass_mf_const)(void *ctx, CeedInt Q,
-                                           const CeedScalar *const *in,
-                                           CeedScalar *const *out)
+/// libCEED QFunction for applying an H(curl) mass operator with a scalar
+/// constant coefficient
+CEED_QFUNCTION(f_apply_hcurlmass_mf_const_scalar)(void *ctx, CeedInt Q,
+                                                  const CeedScalar *const *in,
+                                                  CeedScalar *const *out)
 {
    VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
    // in[0], out[0] have shape [dim, ncomp=1, Q]
@@ -631,13 +856,13 @@ CEED_QFUNCTION(f_apply_hcurlmass_mf_const)(void *ctx, CeedInt Q,
    // in[2] is quadrature weights, size (Q)
    //
    // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
-   // H(curl)) or qw/det(J) J^T C J (for H(div)).
+   // H(curl)) or qw/det(J) J^T C J (for H(div))
    const CeedScalar *coeff = bc->coeff;
    const CeedScalar *u = in[0], *J = in[1], *qw = in[2];
    CeedScalar *v = out[0];
-   switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp)
+   switch (10 * bc->space_dim + bc->dim)
    {
-      case 111:
+      case 11:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             const CeedScalar coeff0 = coeff[0];
@@ -645,23 +870,7 @@ CEED_QFUNCTION(f_apply_hcurlmass_mf_const)(void *ctx, CeedInt Q,
             v[i] = qd * u[i];
          }
          break;
-      case 213:
-         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-         {
-            CeedScalar qd;
-            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd);
-            v[i] = qd * u[i];
-         }
-         break;
-      case 212:
-         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-         {
-            CeedScalar qd;
-            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd);
-            v[i] = qd * u[i];
-         }
-         break;
-      case 211:
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd;
@@ -669,77 +878,98 @@ CEED_QFUNCTION(f_apply_hcurlmass_mf_const)(void *ctx, CeedInt Q,
             v[i] = qd * u[i];
          }
          break;
-      case 223:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
+            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
             v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 222:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 2, qw[i], 1, qd);
+            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
             v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 221:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
+            CeedScalar qd[6];
+            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
-            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
-            v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
+            const CeedScalar u2 = u[i + Q * 2];
+            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2;
+            v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2;
+            v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2;
          }
          break;
-      case 326:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for applying an H(curl) mass operator with a vector
+/// constant coefficient
+CEED_QFUNCTION(f_apply_hcurlmass_mf_const_vector)(void *ctx, CeedInt Q,
+                                                  const CeedScalar *const *in,
+                                                  CeedScalar *const *out)
+{
+   VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
+   // in[0], out[0] have shape [dim, ncomp=1, Q]
+   // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[2] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
+   // H(curl)) or qw/det(J) J^T C J (for H(div))
+   const CeedScalar *coeff = bc->coeff;
+   const CeedScalar *u = in[0], *J = in[1], *qw = in[2];
+   CeedScalar *v = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
-            const CeedScalar u0 = u[i + Q * 0];
-            const CeedScalar u1 = u[i + Q * 1];
-            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
-            v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
+            CeedScalar qd;
+            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd);
+            v[i] = qd * u[i];
          }
          break;
-      case 323:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
+            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 2, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
             v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 321:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
+            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
             v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 336:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
+            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             const CeedScalar u2 = u[i + Q * 2];
@@ -748,24 +978,63 @@ CEED_QFUNCTION(f_apply_hcurlmass_mf_const)(void *ctx, CeedInt Q,
             v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2;
          }
          break;
-      case 333:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for applying an H(curl) mass operator with a matrix
+/// constant coefficient
+CEED_QFUNCTION(f_apply_hcurlmass_mf_const_matrix)(void *ctx, CeedInt Q,
+                                                  const CeedScalar *const *in,
+                                                  CeedScalar *const *out)
+{
+   VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
+   // in[0], out[0] have shape [dim, ncomp=1, Q]
+   // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[2] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
+   // H(curl)) or qw/det(J) J^T C J (for H(div))
+   const CeedScalar *coeff = bc->coeff;
+   const CeedScalar *u = in[0], *J = in[1], *qw = in[2];
+   CeedScalar *v = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
+            CeedScalar qd;
+            MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd);
+            v[i] = qd * u[i];
+         }
+         break;
+      case 22:
+         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+         {
+            CeedScalar qd[3];
+            MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
-            const CeedScalar u2 = u[i + Q * 2];
-            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2;
-            v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2;
-            v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2;
+            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
+            v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
+         }
+         break;
+      case 32:
+         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+         {
+            CeedScalar qd[3];
+            MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
+            const CeedScalar u0 = u[i + Q * 0];
+            const CeedScalar u1 = u[i + Q * 1];
+            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
+            v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 331:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], 1, qd);
+            MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             const CeedScalar u2 = u[i + Q * 2];
@@ -778,47 +1047,32 @@ CEED_QFUNCTION(f_apply_hcurlmass_mf_const)(void *ctx, CeedInt Q,
    return 0;
 }
 
-/// libCEED QFunction for applying a vector FE mass operator
-CEED_QFUNCTION(f_apply_hdivmass_mf_quad)(void *ctx, CeedInt Q,
-                                         const CeedScalar *const *in,
-                                         CeedScalar *const *out)
+/// libCEED QFunction for applying an H(div) operator with a scalar
+/// coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_apply_hdivmass_mf_quad_scalar)(void *ctx, CeedInt Q,
+                                                const CeedScalar *const *in,
+                                                CeedScalar *const *out)
 {
    VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
    // in[0], out[0] have shape [dim, ncomp=1, Q]
-   // in[1] is coefficients with shape [ncomp=coeff_comp, Q]
+   // in[1] is coefficients with shape [ncomp=1, Q]
    // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[3] is quadrature weights, size (Q)
    //
    // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
-   // H(curl)) or qw/det(J) J^T C J (for H(div)).
+   // H(curl)) or qw/det(J) J^T C J (for H(div))
    const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3];
    CeedScalar *v = out[0];
-   switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp)
+   switch (10 * bc->space_dim + bc->dim)
    {
-      case 111:
+      case 11:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             const CeedScalar qd = qw[i] * c[i] * J[i];
             v[i] = qd * u[i];
          }
          break;
-      case 213:
-         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-         {
-            CeedScalar qd;
-            MultJtCJ21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd);
-            v[i] = qd * u[i];
-         }
-         break;
-      case 212:
-         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-         {
-            CeedScalar qd;
-            MultJtCJ21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd);
-            v[i] = qd * u[i];
-         }
-         break;
-      case 211:
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd;
@@ -826,77 +1080,98 @@ CEED_QFUNCTION(f_apply_hdivmass_mf_quad)(void *ctx, CeedInt Q,
             v[i] = qd * u[i];
          }
          break;
-      case 223:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultJtCJ22(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
+            MultJtCJ22(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
             v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 222:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultJtCJ22(J + i, Q, c + i, Q, 2, qw[i], 1, qd);
+            MultJtCJ32(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
             v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 221:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultJtCJ22(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
+            CeedScalar qd[6];
+            MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
-            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
-            v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
+            const CeedScalar u2 = u[i + Q * 2];
+            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2;
+            v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2;
+            v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2;
          }
          break;
-      case 326:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for applying an H(div) operator with a vector
+/// coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_apply_hdivmass_mf_quad_vector)(void *ctx, CeedInt Q,
+                                                const CeedScalar *const *in,
+                                                CeedScalar *const *out)
+{
+   VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
+   // in[0], out[0] have shape [dim, ncomp=1, Q]
+   // in[1] is coefficients with shape [ncomp=space_dim, Q]
+   // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[3] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
+   // H(curl)) or qw/det(J) J^T C J (for H(div))
+   const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3];
+   CeedScalar *v = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultJtCJ32(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
-            const CeedScalar u0 = u[i + Q * 0];
-            const CeedScalar u1 = u[i + Q * 1];
-            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
-            v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
+            CeedScalar qd;
+            MultJtCJ21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd);
+            v[i] = qd * u[i];
          }
          break;
-      case 323:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultJtCJ32(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
+            MultJtCJ22(J + i, Q, c + i, Q, 2, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
             v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 321:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultJtCJ32(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
+            MultJtCJ32(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
             v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 336:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
+            MultJtCJ33(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             const CeedScalar u2 = u[i + Q * 2];
@@ -905,24 +1180,63 @@ CEED_QFUNCTION(f_apply_hdivmass_mf_quad)(void *ctx, CeedInt Q,
             v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2;
          }
          break;
-      case 333:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for applying an H(div) operator with a matrix
+/// coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_apply_hdivmass_mf_quad_matrix)(void *ctx, CeedInt Q,
+                                                const CeedScalar *const *in,
+                                                CeedScalar *const *out)
+{
+   VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
+   // in[0], out[0] have shape [dim, ncomp=1, Q]
+   // in[1] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
+   // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[3] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
+   // H(curl)) or qw/det(J) J^T C J (for H(div))
+   const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3];
+   CeedScalar *v = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[6];
-            MultJtCJ33(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
+            CeedScalar qd;
+            MultJtCJ21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd);
+            v[i] = qd * u[i];
+         }
+         break;
+      case 22:
+         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+         {
+            CeedScalar qd[3];
+            MultJtCJ22(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
-            const CeedScalar u2 = u[i + Q * 2];
-            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2;
-            v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2;
-            v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2;
+            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
+            v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 331:
+      case 32:
+         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+         {
+            CeedScalar qd[3];
+            MultJtCJ32(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
+            const CeedScalar u0 = u[i + Q * 0];
+            const CeedScalar u1 = u[i + Q * 1];
+            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
+            v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
+         }
+         break;
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
+            MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             const CeedScalar u2 = u[i + Q * 2];
@@ -935,46 +1249,32 @@ CEED_QFUNCTION(f_apply_hdivmass_mf_quad)(void *ctx, CeedInt Q,
    return 0;
 }
 
-CEED_QFUNCTION(f_apply_hcurlmass_mf_quad)(void *ctx, CeedInt Q,
-                                          const CeedScalar *const *in,
-                                          CeedScalar *const *out)
+/// libCEED QFunction for applying an H(curl) operator with a scalar
+/// coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_apply_hcurlmass_mf_quad_scalar)(void *ctx, CeedInt Q,
+                                                 const CeedScalar *const *in,
+                                                 CeedScalar *const *out)
 {
    VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
    // in[0], out[0] have shape [dim, ncomp=1, Q]
-   // in[1] is coefficients with shape [ncomp=coeff_comp, Q]
+   // in[1] is coefficients with shape [ncomp=1, Q]
    // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
    // in[3] is quadrature weights, size (Q)
    //
    // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
-   // H(curl)) or qw/det(J) J^T C J (for H(div)).
+   // H(curl)) or qw/det(J) J^T C J (for H(div))
    const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3];
    CeedScalar *v = out[0];
-   switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp)
+   switch (10 * bc->space_dim + bc->dim)
    {
-      case 111:
+      case 11:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             const CeedScalar qd = qw[i] * c[i] / J[i];
             v[i] = qd * u[i];
          }
          break;
-      case 213:
-         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-         {
-            CeedScalar qd;
-            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd);
-            v[i] = qd * u[i];
-         }
-         break;
-      case 212:
-         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-         {
-            CeedScalar qd;
-            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd);
-            v[i] = qd * u[i];
-         }
-         break;
-      case 211:
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd;
@@ -982,77 +1282,98 @@ CEED_QFUNCTION(f_apply_hcurlmass_mf_quad)(void *ctx, CeedInt Q,
             v[i] = qd * u[i];
          }
          break;
-      case 223:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
+            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
             v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 222:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], 1, qd);
+            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
             v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 221:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
+            CeedScalar qd[6];
+            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
-            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
-            v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
+            const CeedScalar u2 = u[i + Q * 2];
+            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2;
+            v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2;
+            v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2;
          }
          break;
-      case 326:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for applying an H(curl) operator with a vector
+/// coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_apply_hcurlmass_mf_quad_vector)(void *ctx, CeedInt Q,
+                                                 const CeedScalar *const *in,
+                                                 CeedScalar *const *out)
+{
+   VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
+   // in[0], out[0] have shape [dim, ncomp=1, Q]
+   // in[1] is coefficients with shape [ncomp=space_dim, Q]
+   // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[3] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
+   // H(curl)) or qw/det(J) J^T C J (for H(div))
+   const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3];
+   CeedScalar *v = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
-            const CeedScalar u0 = u[i + Q * 0];
-            const CeedScalar u1 = u[i + Q * 1];
-            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
-            v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
+            CeedScalar qd;
+            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd);
+            v[i] = qd * u[i];
          }
          break;
-      case 323:
+      case 22:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
+            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
             v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 321:
+      case 32:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[3];
-            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
+            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
             v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 336:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
+            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             const CeedScalar u2 = u[i + Q * 2];
@@ -1061,24 +1382,63 @@ CEED_QFUNCTION(f_apply_hcurlmass_mf_quad)(void *ctx, CeedInt Q,
             v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2;
          }
          break;
-      case 333:
+   }
+   return 0;
+}
+
+/// libCEED QFunction for applying an H(curl) operator with a matrix
+/// coefficient evaluated at quadrature points
+CEED_QFUNCTION(f_apply_hcurlmass_mf_quad_matrix)(void *ctx, CeedInt Q,
+                                                 const CeedScalar *const *in,
+                                                 CeedScalar *const *out)
+{
+   VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
+   // in[0], out[0] have shape [dim, ncomp=1, Q]
+   // in[1] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
+   // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
+   // in[3] is quadrature weights, size (Q)
+   //
+   // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for
+   // H(curl)) or qw/det(J) J^T C J (for H(div))
+   const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3];
+   CeedScalar *v = out[0];
+   switch (10 * bc->space_dim + bc->dim)
+   {
+      case 21:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
-            CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
+            CeedScalar qd;
+            MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd);
+            v[i] = qd * u[i];
+         }
+         break;
+      case 22:
+         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+         {
+            CeedScalar qd[3];
+            MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
-            const CeedScalar u2 = u[i + Q * 2];
-            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2;
-            v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2;
-            v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2;
+            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
+            v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
+         }
+         break;
+      case 32:
+         CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+         {
+            CeedScalar qd[3];
+            MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
+            const CeedScalar u0 = u[i + Q * 0];
+            const CeedScalar u1 = u[i + Q * 1];
+            v[i + Q * 0] = qd[0] * u0 + qd[1] * u1;
+            v[i + Q * 1] = qd[1] * u0 + qd[2] * u1;
          }
          break;
-      case 331:
+      case 33:
          CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
          {
             CeedScalar qd[6];
-            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], 1, qd);
+            MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], 1, qd);
             const CeedScalar u0 = u[i + Q * 0];
             const CeedScalar u1 = u[i + Q * 1];
             const CeedScalar u2 = u[i + Q * 2];
diff --git a/fem/ceed/interface/coefficient.hpp b/fem/ceed/interface/coefficient.hpp
index 9ccc40ceec0..d4e067d7a07 100644
--- a/fem/ceed/interface/coefficient.hpp
+++ b/fem/ceed/interface/coefficient.hpp
@@ -74,7 +74,7 @@ inline void InitCoefficient(mfem::Coefficient *Q, mfem::Mesh &mesh,
                             const mfem::IntegrationRule &ir, bool use_bdr,
                             Coefficient *&coeff_ptr)
 {
-   if (Q == nullptr || dynamic_cast<ConstantCoefficient *>(Q))
+   if (Q == nullptr || dynamic_cast<mfem::ConstantCoefficient *>(Q))
    {
       // The constant coefficient case is handled by the QFunction context
       coeff_ptr = nullptr;
@@ -277,15 +277,15 @@ inline void InitCoefficientWithIndices(mfem::Coefficient *Q,
       // The constant coefficient case is handled by the QFunction context
       coeff_ptr = nullptr;
    }
-   else if (GridFunctionCoefficient *gf_coeff =
-               dynamic_cast<GridFunctionCoefficient *>(Q))
+   else if (mfem::GridFunctionCoefficient *gf_coeff =
+               dynamic_cast<mfem::GridFunctionCoefficient *>(Q))
    {
       GridCoefficient *ceed_coeff =
          new GridCoefficient(*gf_coeff->GetGridFunction());
       coeff_ptr = ceed_coeff;
    }
-   else if (QuadratureFunctionCoefficient *qf_coeff =
-               dynamic_cast<QuadratureFunctionCoefficient *>(Q))
+   else if (mfem::QuadratureFunctionCoefficient *qf_coeff =
+               dynamic_cast<mfem::QuadratureFunctionCoefficient *>(Q))
    {
       const int ne = use_bdr ? mesh.GetNBE() : mesh.GetNE();
       const int nq = ir.GetNPoints();
@@ -360,15 +360,15 @@ inline void InitCoefficientWithIndices(mfem::VectorCoefficient *VQ,
       // The constant coefficient case is handled by the QFunction context
       coeff_ptr = nullptr;
    }
-   else if (VectorGridFunctionCoefficient *vgf_coeff =
-               dynamic_cast<VectorGridFunctionCoefficient *>(VQ))
+   else if (mfem::VectorGridFunctionCoefficient *vgf_coeff =
+               dynamic_cast<mfem::VectorGridFunctionCoefficient *>(VQ))
    {
       GridCoefficient *ceed_coeff =
          new GridCoefficient(*vgf_coeff->GetGridFunction());
       coeff_ptr = ceed_coeff;
    }
-   else if (VectorQuadratureFunctionCoefficient *vqf_coeff =
-               dynamic_cast<VectorQuadratureFunctionCoefficient *>(VQ))
+   else if (mfem::VectorQuadratureFunctionCoefficient *vqf_coeff =
+               dynamic_cast<mfem::VectorQuadratureFunctionCoefficient *>(VQ))
    {
       const int vdim = vqf_coeff->GetVDim();
       const int ne = use_bdr ? mesh.GetNBE() : mesh.GetNE();
diff --git a/fem/ceed/interface/integrator.hpp b/fem/ceed/interface/integrator.hpp
index ca139825198..2ebad8e086f 100644
--- a/fem/ceed/interface/integrator.hpp
+++ b/fem/ceed/interface/integrator.hpp
@@ -36,34 +36,18 @@ struct OperatorInfo
 {
    /** The path to the QFunction header. */
    const char *header;
-   /** The name of the QFunction to build a partially assembled CeedOperator
-       with a constant Coefficient. */
-   const char *build_func_const;
-   /** The QFunction to build a partially assembled CeedOperator with a constant
-       Coefficient. */
-   CeedQFunctionUser build_qf_const;
-   /** The name of the QFunction to build a partially assembled CeedOperator
-       with a variable Coefficient. */
-   const char *build_func_quad;
-   /** The QFunction to build a partially assembled CeedOperator with a variable
-       Coefficient. */
-   CeedQFunctionUser build_qf_quad;
+   /** The name of the QFunction to build a partially assembled CeedOperator. */
+   const char *build_func;
+   /** The QFunction to build a partially assembled CeedOperator. */
+   CeedQFunctionUser build_qf;
    /** The name of the QFunction to apply a partially assembled CeedOperator. */
    const char *apply_func;
    /** The QFunction to apply a partially assembled CeedOperator. */
    CeedQFunctionUser apply_qf;
-   /** The name of the QFunction to apply a matrix-free CeedOperator with a
-       constant Coefficient. */
-   const char *apply_func_mf_const;
-   /** The QFunction to apply a matrix-free CeedOperator with a constant
-       Coefficient. */
-   CeedQFunctionUser apply_qf_mf_const;
-   /** The name of the QFunction to apply a matrix-free CeedOperator with a
-       variable Coefficient. */
-   const char *apply_func_mf_quad;
-   /** The QFunction to apply a matrix-free CeedOperator with a variable
-       Coefficient. */
-   CeedQFunctionUser apply_qf_mf_quad;
+   /** The name of the QFunction to apply a matrix-free CeedOperator. */
+   const char *apply_func_mf;
+   /** The QFunction to apply a matrix-free CeedOperator. */
+   CeedQFunctionUser apply_qf_mf;
    /** The EvalMode on the trial basis functions. */
    EvalMode trial_op;
    /** The EvalMode on the test basis functions. */
@@ -274,17 +258,11 @@ class Integrator : public Operator
                                 &qdata_restr);
          CeedVectorCreate(ceed, nelem * nqpts * qdatasize, &qdata);
 
-         std::string build_func = coeff ? info.build_func_quad
-                                  : info.build_func_const;
-         CeedQFunctionUser build_qf = coeff ? info.build_qf_quad
-                                      : info.build_qf_const;
-
          // Create the QFunction that builds the operator (i.e. computes its
          // quadrature data) and set its context data.
          CeedQFunction build_qfunc;
-         std::string qf_file = GetCeedPath() + info.header;
-         std::string qf = qf_file + build_func;
-         CeedQFunctionCreateInterior(ceed, 1, build_qf, qf.c_str(),
+         std::string qf = GetCeedPath() + info.header + info.build_func;
+         CeedQFunctionCreateInterior(ceed, 1, info.build_qf, qf.c_str(),
                                      &build_qfunc);
          if (coeff)
          {
@@ -335,17 +313,9 @@ class Integrator : public Operator
          coeff = nullptr;
       }
 
-      std::string apply_func = !use_mf ? info.apply_func :
-                               (coeff ? info.apply_func_mf_quad
-                                : info.apply_func_mf_const);
-      CeedQFunctionUser apply_qf = !use_mf ? info.apply_qf :
-                                   (coeff ? info.apply_qf_mf_quad
-                                    : info.apply_qf_mf_const);
-
       // Create the QFunction that defines the action of the operator.
-      std::string qf_file = GetCeedPath() + info.header;
-      std::string qf = qf_file + apply_func;
-      CeedQFunctionCreateInterior(ceed, 1, apply_qf, qf.c_str(),
+      std::string qf = GetCeedPath() + info.header + info.apply_func;
+      CeedQFunctionCreateInterior(ceed, 1, info.apply_qf, qf.c_str(),
                                   &apply_qfunc);
       // input
       switch (info.trial_op)