diff --git a/fem/ceed/integrators/convection/convection.cpp b/fem/ceed/integrators/convection/convection.cpp index 47adca54524..c980123baf2 100644 --- a/fem/ceed/integrators/convection/convection.cpp +++ b/fem/ceed/integrators/convection/convection.cpp @@ -25,17 +25,28 @@ namespace ceed #ifdef MFEM_USE_CEED struct ConvectionOperatorInfo : public OperatorInfo { - ConvectionContext ctx; + ConvectionContext ctx = {0}; ConvectionOperatorInfo(const mfem::FiniteElementSpace &fes, mfem::VectorCoefficient *VQ, double alpha, - bool use_bdr) + bool use_bdr = false, bool use_mf = false) { MFEM_VERIFY(VQ && VQ->GetVDim() == fes.GetMesh()->SpaceDimension(), "Incorrect coefficient dimensions in ceed::ConvectionOperatorInfo!"); ctx.dim = fes.GetMesh()->Dimension() - use_bdr; ctx.space_dim = fes.GetMesh()->SpaceDimension(); - if (VectorConstantCoefficient *const_coeff = - dynamic_cast(VQ)) + ctx.alpha = alpha; + if (!use_mf) + { + apply_func = ":f_apply_conv"; + apply_qf = &f_apply_conv; + } + else + { + build_func = ""; + build_qf = nullptr; + } + if (mfem::VectorConstantCoefficient *const_coeff = + dynamic_cast(VQ)) { const int vdim = VQ->GetVDim(); MFEM_VERIFY(vdim <= LIBCEED_CONV_COEFF_COMP_MAX, @@ -45,20 +56,31 @@ struct ConvectionOperatorInfo : public OperatorInfo { ctx.coeff[i] = val[i]; } + if (!use_mf) + { + build_func = ":f_build_conv_const"; + build_qf = &f_build_conv_const; + } + else + { + apply_func = ":f_apply_conv_mf_const"; + apply_qf = &f_apply_conv_mf_const; + } + } + else + { + if (!use_mf) + { + build_func = ":f_build_conv_quad"; + build_qf = &f_build_conv_quad; + } + else + { + apply_func = ":f_apply_conv_mf_quad"; + apply_qf = &f_apply_conv_mf_quad; + } } - ctx.alpha = alpha; - header = "/integrators/convection/convection_qf.h"; - build_func_const = ":f_build_conv_const"; - build_qf_const = &f_build_conv_const; - build_func_quad = ":f_build_conv_quad"; - build_qf_quad = &f_build_conv_quad; - apply_func = ":f_apply_conv"; - apply_qf = &f_apply_conv; - apply_func_mf_const = ":f_apply_conv_mf_const"; - apply_qf_mf_const = &f_apply_conv_mf_const; - apply_func_mf_quad = ":f_apply_conv_mf_quad"; - apply_qf_mf_quad = &f_apply_conv_mf_quad; trial_op = EvalMode::Grad; test_op = EvalMode::Interp; qdatasize = ctx.dim; @@ -89,7 +111,7 @@ MFConvectionIntegrator::MFConvectionIntegrator( const bool use_bdr) { #ifdef MFEM_USE_CEED - ConvectionOperatorInfo info(fes, VQ, alpha, use_bdr); + ConvectionOperatorInfo info(fes, VQ, alpha, use_bdr, true); Assemble(integ, info, fes, VQ, use_bdr, true); #else MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); diff --git a/fem/ceed/integrators/convection/convection_qf.h b/fem/ceed/integrators/convection/convection_qf.h index 909ea919425..0dd11387c4a 100644 --- a/fem/ceed/integrators/convection/convection_qf.h +++ b/fem/ceed/integrators/convection/convection_qf.h @@ -16,12 +16,11 @@ #define LIBCEED_CONV_COEFF_COMP_MAX 3 -/// A structure used to pass additional data to f_build_conv and f_apply_conv struct ConvectionContext { CeedInt dim, space_dim; - CeedScalar coeff[LIBCEED_CONV_COEFF_COMP_MAX]; CeedScalar alpha; + CeedScalar coeff[LIBCEED_CONV_COEFF_COMP_MAX]; }; /// libCEED QFunction for building quadrature data for a convection operator @@ -34,7 +33,7 @@ CEED_QFUNCTION(f_build_conv_const)(void *ctx, CeedInt Q, // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[1] is quadrature weights, size (Q) // - // At every quadrature point, compute and store qw * α * c^T adj(J)^T. + // At every quadrature point, compute and store qw * α * c^T adj(J)^T const CeedScalar alpha = bc->alpha; const CeedScalar *coeff = bc->coeff; const CeedScalar *J = in[0], *qw = in[1]; @@ -77,7 +76,7 @@ CEED_QFUNCTION(f_build_conv_const)(void *ctx, CeedInt Q, } /// libCEED QFunction for building quadrature data for a convection operator -/// with a coefficient evaluated at quadrature points. +/// with a coefficient evaluated at quadrature points CEED_QFUNCTION(f_build_conv_quad)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) @@ -87,7 +86,7 @@ CEED_QFUNCTION(f_build_conv_quad)(void *ctx, CeedInt Q, // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[2] is quadrature weights, size (Q) // - // At every quadrature point, compute and store qw * α * c^T adj(J)^T. + // At every quadrature point, compute and store qw * α * c^T adj(J)^T const CeedScalar alpha = bc->alpha; const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; CeedScalar *qd = out[0]; @@ -127,7 +126,7 @@ CEED_QFUNCTION(f_build_conv_quad)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for applying a conv operator +/// libCEED QFunction for applying a convection operator CEED_QFUNCTION(f_apply_conv)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) @@ -166,7 +165,8 @@ CEED_QFUNCTION(f_apply_conv)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for applying a conv operator +/// libCEED QFunction for applying a convection operator with a constant +/// coefficient CEED_QFUNCTION(f_apply_conv_mf_const)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) @@ -177,7 +177,7 @@ CEED_QFUNCTION(f_apply_conv_mf_const)(void *ctx, CeedInt Q, // in[2] is quadrature weights, size (Q) // out[0] has shape [ncomp=1, Q] // - // At every quadrature point, compute qw * α * c^T adj(J)^T. + // At every quadrature point, compute qw * α * c^T adj(J)^T const CeedScalar alpha = bc->alpha; const CeedScalar *coeff = bc->coeff; const CeedScalar *ug = in[0], *J = in[1], *qw = in[2]; @@ -235,7 +235,8 @@ CEED_QFUNCTION(f_apply_conv_mf_const)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for applying a conv operator +/// libCEED QFunction for applying a convection operator with a coefficient +/// evaluated at quadrature points CEED_QFUNCTION(f_apply_conv_mf_quad)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) @@ -247,7 +248,7 @@ CEED_QFUNCTION(f_apply_conv_mf_quad)(void *ctx, CeedInt Q, // in[3] is quadrature weights, size (Q) // out[0] has shape [ncomp=1, Q] // - // At every quadrature point, compute qw * α * c^T adj(J)^T. + // At every quadrature point, compute qw * α * c^T adj(J)^T const CeedScalar alpha = bc->alpha; const CeedScalar *ug = in[0], *c = in[1], *J = in[2], *qw = in[3]; CeedScalar *vg = out[0]; diff --git a/fem/ceed/integrators/curlcurl/curlcurl.cpp b/fem/ceed/integrators/curlcurl/curlcurl.cpp index e813d38cfcf..5c230e86b3a 100644 --- a/fem/ceed/integrators/curlcurl/curlcurl.cpp +++ b/fem/ceed/integrators/curlcurl/curlcurl.cpp @@ -25,10 +25,10 @@ namespace ceed #ifdef MFEM_USE_CEED struct CurlCurlOperatorInfo : public OperatorInfo { - CurlCurlContext ctx; + CurlCurlContext ctx = {0}; template CurlCurlOperatorInfo(const mfem::FiniteElementSpace &fes, CoeffType *Q, - bool use_bdr) + bool use_bdr = false, bool use_mf = false) { MFEM_VERIFY(fes.GetVDim() == 1, "libCEED interface for vector FE does not support VDim > 1!"); @@ -37,65 +37,116 @@ struct CurlCurlOperatorInfo : public OperatorInfo "CurlCurlIntegrator requires dim == 2 or dim == 3!"); ctx.space_dim = fes.GetMesh()->SpaceDimension(); ctx.curl_dim = (ctx.dim < 3) ? 1 : ctx.dim; + if (!use_mf) + { + apply_func = ":f_apply_curlcurl"; + apply_qf = &f_apply_curlcurl; + } + else + { + build_func = ""; + build_qf = nullptr; + } if (Q == nullptr) { - ctx.coeff_comp = 1; ctx.coeff[0] = 1.0; + if (!use_mf) + { + build_func = ":f_build_curlcurl_const_scalar"; + build_qf = &f_build_curlcurl_const_scalar; + } + else + { + apply_func = ":f_apply_curlcurl_mf_const_scalar"; + apply_qf = &f_apply_curlcurl_mf_const_scalar; + } } else { - InitCoefficient(*Q); + InitCoefficient(*Q, use_mf); } - header = "/integrators/curlcurl/curlcurl_qf.h"; - build_func_const = ":f_build_curlcurl_const"; - build_qf_const = &f_build_curlcurl_const; - build_func_quad = ":f_build_curlcurl_quad"; - build_qf_quad = &f_build_curlcurl_quad; - apply_func = ":f_apply_curlcurl"; - apply_qf = &f_apply_curlcurl; - apply_func_mf_const = ":f_apply_curlcurl_mf_const"; - apply_qf_mf_const = &f_apply_curlcurl_mf_const; - apply_func_mf_quad = ":f_apply_curlcurl_mf_quad"; - apply_qf_mf_quad = &f_apply_curlcurl_mf_quad; trial_op = EvalMode::Curl; test_op = EvalMode::Curl; qdatasize = (ctx.curl_dim * (ctx.curl_dim + 1)) / 2; } - void InitCoefficient(mfem::Coefficient &Q) + void InitCoefficient(mfem::Coefficient &Q, bool use_mf) { - ctx.coeff_comp = 1; - if (ConstantCoefficient *const_coeff = - dynamic_cast(&Q)) + if (mfem::ConstantCoefficient *const_coeff = + dynamic_cast(&Q)) { ctx.coeff[0] = const_coeff->constant; + if (!use_mf) + { + build_func = ":f_build_curlcurl_const_scalar"; + build_qf = &f_build_curlcurl_const_scalar; + } + else + { + apply_func = ":f_apply_curlcurl_mf_const_scalar"; + apply_qf = &f_apply_curlcurl_mf_const_scalar; + } + } + else + { + if (!use_mf) + { + build_func = ":f_build_curlcurl_quad_scalar"; + build_qf = &f_build_curlcurl_quad_scalar; + } + else + { + apply_func = ":f_apply_curlcurl_mf_quad_scalar"; + apply_qf = &f_apply_curlcurl_mf_quad_scalar; + } } } - void InitCoefficient(mfem::VectorCoefficient &VQ) + void InitCoefficient(mfem::VectorCoefficient &VQ, bool use_mf) { - const int vdim = VQ.GetVDim(); - ctx.coeff_comp = vdim; - if (VectorConstantCoefficient *const_coeff = - dynamic_cast(&VQ)) + if (mfem::VectorConstantCoefficient *const_coeff = + dynamic_cast(&VQ)) { - MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_CURLCURL_COEFF_COMP_MAX, + const int vdim = VQ.GetVDim(); + MFEM_VERIFY(vdim <= LIBCEED_CURLCURL_COEFF_COMP_MAX, "VectorCoefficient dimension exceeds context storage!"); const mfem::Vector &val = const_coeff->GetVec(); for (int i = 0; i < vdim; i++) { ctx.coeff[i] = val[i]; } + if (!use_mf) + { + build_func = ":f_build_curlcurl_const_vector"; + build_qf = &f_build_curlcurl_const_vector; + } + else + { + apply_func = ":f_apply_curlcurl_mf_const_vector"; + apply_qf = &f_apply_curlcurl_mf_const_vector; + } + } + else + { + if (!use_mf) + { + build_func = ":f_build_curlcurl_quad_vector"; + build_qf = &f_build_curlcurl_quad_vector; + } + else + { + apply_func = ":f_apply_curlcurl_mf_quad_vector"; + apply_qf = &f_apply_curlcurl_mf_quad_vector; + } } } - void InitCoefficient(mfem::MatrixCoefficient &MQ) + void InitCoefficient(mfem::MatrixCoefficient &MQ, bool use_mf) { // Assumes matrix coefficient is symmetric - const int vdim = MQ.GetVDim(); - ctx.coeff_comp = (vdim * (vdim + 1)) / 2; - if (MatrixConstantCoefficient *const_coeff = - dynamic_cast(&MQ)) + if (mfem::MatrixConstantCoefficient *const_coeff = + dynamic_cast(&MQ)) { - MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_CURLCURL_COEFF_COMP_MAX, + const int vdim = MQ.GetVDim(); + MFEM_VERIFY((vdim * (vdim + 1)) / 2 <= LIBCEED_CURLCURL_COEFF_COMP_MAX, "MatrixCoefficient dimensions exceed context storage!"); const mfem::DenseMatrix &val = const_coeff->GetMatrix(); for (int j = 0; j < vdim; j++) @@ -106,6 +157,29 @@ struct CurlCurlOperatorInfo : public OperatorInfo ctx.coeff[idx] = val(i, j); } } + if (!use_mf) + { + build_func = ":f_build_curlcurl_const_matrix"; + build_qf = &f_build_curlcurl_const_matrix; + } + else + { + apply_func = ":f_apply_curlcurl_mf_const_matrix"; + apply_qf = &f_apply_curlcurl_mf_const_matrix; + } + } + else + { + if (!use_mf) + { + build_func = ":f_build_curlcurl_quad_matrix"; + build_qf = &f_build_curlcurl_quad_matrix; + } + else + { + apply_func = ":f_apply_curlcurl_mf_quad_matrix"; + apply_qf = &f_apply_curlcurl_mf_quad_matrix; + } } } }; @@ -134,7 +208,7 @@ MFCurlCurlIntegrator::MFCurlCurlIntegrator( const bool use_bdr) { #ifdef MFEM_USE_CEED - CurlCurlOperatorInfo info(fes, Q, use_bdr); + CurlCurlOperatorInfo info(fes, Q, use_bdr, true); Assemble(integ, info, fes, Q, use_bdr, true); #else MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); diff --git a/fem/ceed/integrators/curlcurl/curlcurl_qf.h b/fem/ceed/integrators/curlcurl/curlcurl_qf.h index 4f8e0afbbad..6fbace54bbb 100644 --- a/fem/ceed/integrators/curlcurl/curlcurl_qf.h +++ b/fem/ceed/integrators/curlcurl/curlcurl_qf.h @@ -16,114 +16,194 @@ #define LIBCEED_CURLCURL_COEFF_COMP_MAX 6 -/// A structure used to pass additional data to f_build_curlcurl and -/// f_apply_curlcurl struct CurlCurlContext { - CeedInt dim, space_dim, curl_dim, coeff_comp; + CeedInt dim, space_dim, curl_dim; CeedScalar coeff[LIBCEED_CURLCURL_COEFF_COMP_MAX]; }; -/// libCEED QFunction for building quadrature data for a curl-curl -/// operator with a constant coefficient -CEED_QFUNCTION(f_build_curlcurl_const)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) +/// libCEED QFunction for building quadrature data for a curl-curl operator +/// with a scalar constant coefficient +CEED_QFUNCTION(f_build_curlcurl_const_scalar)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { CurlCurlContext *bc = (CurlCurlContext *)ctx; // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[1] is quadrature weights, size (Q) // // At every quadrature point, compute qw/det(J) J^T C J and store the - // symmetric part of the result. In 2D, compute and store qw * c / det(J). + // symmetric part of the result. In 2D, compute and store qw * c / det(J) const CeedScalar *coeff = bc->coeff; const CeedScalar *J = in[0], *qw = in[1]; CeedScalar *qd = out[0]; - switch (1000 * bc->space_dim + 100 * bc->dim + 10 * bc->curl_dim + - bc->coeff_comp) + switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) { - case 2211: + case 221: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { const CeedScalar coeff0 = coeff[0]; qd[i] = qw[i] * coeff0 / DetJ22(J + i, Q); } break; - case 3211: + case 321: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { const CeedScalar coeff0 = coeff[0]; qd[i] = qw[i] * coeff0 / DetJ32(J + i, Q); } break; - case 3336: + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); + MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); } break; - case 3333: + } + return 0; +} + +/// libCEED QFunction for building quadrature data for a curl-curl operator +/// with a vector constant coefficient +CEED_QFUNCTION(f_build_curlcurl_const_vector)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + CurlCurlContext *bc = (CurlCurlContext *)ctx; + // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[1] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) J^T C J and store the + // symmetric part of the result. In 2D, compute and store qw * c / det(J) + const CeedScalar *coeff = bc->coeff; + const CeedScalar *J = in[0], *qw = in[1]; + CeedScalar *qd = out[0]; + switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) + { + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { MultJtCJ33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); } break; - case 3331: + } + return 0; +} + +/// libCEED QFunction for building quadrature data for a curl-curl operator +/// with a matrix constant coefficient +CEED_QFUNCTION(f_build_curlcurl_const_matrix)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + CurlCurlContext *bc = (CurlCurlContext *)ctx; + // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[1] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) J^T C J and store the + // symmetric part of the result. In 2D, compute and store qw * c / det(J) + const CeedScalar *coeff = bc->coeff; + const CeedScalar *J = in[0], *qw = in[1]; + CeedScalar *qd = out[0]; + switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) + { + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); + MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); } break; } return 0; } -/// libCEED QFunction for building quadrature data for a curl-curl -/// operator with a coefficient evaluated at quadrature points. -CEED_QFUNCTION(f_build_curlcurl_quad)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) +/// libCEED QFunction for building quadrature data for a curl-curl operator +/// with a scalar coefficient evaluated at quadrature points +CEED_QFUNCTION(f_build_curlcurl_quad_scalar)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { CurlCurlContext *bc = (CurlCurlContext *)ctx; - // in[0] is coefficients with shape [ncomp=coeff_comp, Q] + // in[0] is coefficients with shape [ncomp=1, Q] // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[2] is quadrature weights, size (Q) // // At every quadrature point, compute qw/det(J) J^T C J and store the - // symmetric part of the result. In 2D, compute and store qw * c / det(J). + // symmetric part of the result. In 2D, compute and store qw * c / det(J) const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; CeedScalar *qd = out[0]; - switch (1000 * bc->space_dim + 100 * bc->dim + 10 * bc->curl_dim + - bc->coeff_comp) + switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) { - case 2211: + case 221: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { qd[i] = qw[i] * c[i] / DetJ22(J + i, Q); } break; - case 3211: + case 321: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { qd[i] = qw[i] * c[i] / DetJ32(J + i, Q); } break; - case 3336: + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); + MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); } break; - case 3333: + } + return 0; +} + +/// libCEED QFunction for building quadrature data for a curl-curl operator +/// with a vector coefficient evaluated at quadrature points +CEED_QFUNCTION(f_build_curlcurl_quad_vector)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + CurlCurlContext *bc = (CurlCurlContext *)ctx; + // in[0] is coefficients with shape [ncomp=space_dim, Q] + // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[2] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) J^T C J and store the + // symmetric part of the result. In 2D, compute and store qw * c / det(J) + const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; + CeedScalar *qd = out[0]; + switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) + { + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { MultJtCJ33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); } break; - case 3331: + } + return 0; +} + +/// libCEED QFunction for building quadrature data for a curl-curl operator +/// with a matrix coefficient evaluated at quadrature points +CEED_QFUNCTION(f_build_curlcurl_quad_matrix)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + CurlCurlContext *bc = (CurlCurlContext *)ctx; + // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q] + // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[2] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) J^T C J and store the + // symmetric part of the result. In 2D, compute and store qw * c / det(J) + const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; + CeedScalar *qd = out[0]; + switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) + { + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); + MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); } break; } @@ -162,24 +242,24 @@ CEED_QFUNCTION(f_apply_curlcurl)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for applying a curl-curl operator -CEED_QFUNCTION(f_apply_curlcurl_mf_const)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) +/// libCEED QFunction for applying a curl-curl operator with a scalar constant +/// coefficient +CEED_QFUNCTION(f_apply_curlcurl_mf_const_scalar)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { CurlCurlContext *bc = (CurlCurlContext *)ctx; // in[0], out[0] have shape [curl_dim, ncomp=1, Q] // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[2] is quadrature weights, size (Q) // - // At every quadrature point, compute qw/det(J) J^T C J. + // At every quadrature point, compute qw/det(J) J^T C J const CeedScalar *coeff = bc->coeff; const CeedScalar *uc = in[0], *J = in[1], *qw = in[2]; CeedScalar *vc = out[0]; - switch (1000 * bc->space_dim + 100 * bc->dim + 10 * bc->curl_dim + - bc->coeff_comp) + switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) { - case 2211: + case 221: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { const CeedScalar coeff0 = coeff[0]; @@ -187,7 +267,7 @@ CEED_QFUNCTION(f_apply_curlcurl_mf_const)(void *ctx, CeedInt Q, vc[i] = qd * uc[i]; } break; - case 3211: + case 321: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { const CeedScalar coeff0 = coeff[0]; @@ -195,11 +275,11 @@ CEED_QFUNCTION(f_apply_curlcurl_mf_const)(void *ctx, CeedInt Q, vc[i] = qd * uc[i]; } break; - case 3336: + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], 1, qd); + MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], 1, qd); const CeedScalar uc0 = uc[i + Q * 0]; const CeedScalar uc1 = uc[i + Q * 1]; const CeedScalar uc2 = uc[i + Q * 2]; @@ -208,7 +288,28 @@ CEED_QFUNCTION(f_apply_curlcurl_mf_const)(void *ctx, CeedInt Q, vc[i + Q * 2] = qd[2] * uc0 + qd[4] * uc1 + qd[5] * uc2; } break; - case 3333: + } + return 0; +} + +/// libCEED QFunction for applying a curl-curl operator with a vector constant +/// coefficient +CEED_QFUNCTION(f_apply_curlcurl_mf_const_vector)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + CurlCurlContext *bc = (CurlCurlContext *)ctx; + // in[0], out[0] have shape [curl_dim, ncomp=1, Q] + // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[2] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) J^T C J + const CeedScalar *coeff = bc->coeff; + const CeedScalar *uc = in[0], *J = in[1], *qw = in[2]; + CeedScalar *vc = out[0]; + switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) + { + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; @@ -221,11 +322,32 @@ CEED_QFUNCTION(f_apply_curlcurl_mf_const)(void *ctx, CeedInt Q, vc[i + Q * 2] = qd[2] * uc0 + qd[4] * uc1 + qd[5] * uc2; } break; - case 3331: + } + return 0; +} + +/// libCEED QFunction for applying a curl-curl operator with a matrix constant +/// coefficient +CEED_QFUNCTION(f_apply_curlcurl_mf_const_matrix)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + CurlCurlContext *bc = (CurlCurlContext *)ctx; + // in[0], out[0] have shape [curl_dim, ncomp=1, Q] + // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[2] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) J^T C J + const CeedScalar *coeff = bc->coeff; + const CeedScalar *uc = in[0], *J = in[1], *qw = in[2]; + CeedScalar *vc = out[0]; + switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) + { + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], 1, qd); + MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], 1, qd); const CeedScalar uc0 = uc[i + Q * 0]; const CeedScalar uc1 = uc[i + Q * 1]; const CeedScalar uc2 = uc[i + Q * 2]; @@ -238,42 +360,42 @@ CEED_QFUNCTION(f_apply_curlcurl_mf_const)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for applying a curl-curl operator -CEED_QFUNCTION(f_apply_curlcurl_mf_quad)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) +/// libCEED QFunction for applying a curl-curl operator with a scalar +/// coefficient evaluated at quadrature points +CEED_QFUNCTION(f_apply_curlcurl_mf_quad_scalar)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { CurlCurlContext *bc = (CurlCurlContext *)ctx; // in[0], out[0] have shape [curl_dim, ncomp=1, Q] - // in[1] is coefficients with shape [ncomp=coeff_comp, Q] + // in[1] is coefficients with shape [ncomp=1, Q] // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[3] is quadrature weights, size (Q) // - // At every quadrature point, compute qw/det(J) J^T C J. + // At every quadrature point, compute qw/det(J) J^T C J const CeedScalar *uc = in[0], *c = in[1], *J = in[2], *qw = in[3]; CeedScalar *vc = out[0]; - switch (1000 * bc->space_dim + 100 * bc->dim + 10 * bc->curl_dim + - bc->coeff_comp) + switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) { - case 2211: + case 221: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { const CeedScalar qd = qw[i] * c[i] / DetJ22(J + i, Q); vc[i] = qd * uc[i]; } break; - case 3211: + case 321: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { const CeedScalar qd = qw[i] * c[i] / DetJ32(J + i, Q); vc[i] = qd * uc[i]; } break; - case 3336: + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], 1, qd); + MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], 1, qd); const CeedScalar uc0 = uc[i + Q * 0]; const CeedScalar uc1 = uc[i + Q * 1]; const CeedScalar uc2 = uc[i + Q * 2]; @@ -282,7 +404,28 @@ CEED_QFUNCTION(f_apply_curlcurl_mf_quad)(void *ctx, CeedInt Q, vc[i + Q * 2] = qd[2] * uc0 + qd[4] * uc1 + qd[5] * uc2; } break; - case 3333: + } + return 0; +} + +/// libCEED QFunction for applying a curl-curl operator with a vector +/// coefficient evaluated at quadrature points +CEED_QFUNCTION(f_apply_curlcurl_mf_quad_vector)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + CurlCurlContext *bc = (CurlCurlContext *)ctx; + // in[0], out[0] have shape [curl_dim, ncomp=1, Q] + // in[1] is coefficients with shape [ncomp=space_dim, Q] + // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[3] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) J^T C J + const CeedScalar *uc = in[0], *c = in[1], *J = in[2], *qw = in[3]; + CeedScalar *vc = out[0]; + switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) + { + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; @@ -295,11 +438,32 @@ CEED_QFUNCTION(f_apply_curlcurl_mf_quad)(void *ctx, CeedInt Q, vc[i + Q * 2] = qd[2] * uc0 + qd[4] * uc1 + qd[5] * uc2; } break; - case 3331: + } + return 0; +} + +/// libCEED QFunction for applying a curl-curl operator with a matrix +/// coefficient evaluated at quadrature points +CEED_QFUNCTION(f_apply_curlcurl_mf_quad_matrix)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + CurlCurlContext *bc = (CurlCurlContext *)ctx; + // in[0], out[0] have shape [curl_dim, ncomp=1, Q] + // in[1] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q] + // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[3] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) J^T C J + const CeedScalar *uc = in[0], *c = in[1], *J = in[2], *qw = in[3]; + CeedScalar *vc = out[0]; + switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) + { + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], 1, qd); + MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], 1, qd); const CeedScalar uc0 = uc[i + Q * 0]; const CeedScalar uc1 = uc[i + Q * 1]; const CeedScalar uc2 = uc[i + Q * 2]; diff --git a/fem/ceed/integrators/diffusion/diffusion.cpp b/fem/ceed/integrators/diffusion/diffusion.cpp index b9c74bc64a9..d2f56db828f 100644 --- a/fem/ceed/integrators/diffusion/diffusion.cpp +++ b/fem/ceed/integrators/diffusion/diffusion.cpp @@ -25,73 +25,124 @@ namespace ceed #ifdef MFEM_USE_CEED struct DiffusionOperatorInfo : public OperatorInfo { - DiffusionContext ctx; + DiffusionContext ctx = {0}; template DiffusionOperatorInfo(const mfem::FiniteElementSpace &fes, CoeffType *Q, - bool use_bdr) + bool use_bdr = false, bool use_mf = false) { ctx.dim = fes.GetMesh()->Dimension() - use_bdr; ctx.space_dim = fes.GetMesh()->SpaceDimension(); ctx.vdim = fes.GetVDim(); + if (!use_mf) + { + apply_func = ":f_apply_diff"; + apply_qf = &f_apply_diff; + } + else + { + build_func = ""; + build_qf = nullptr; + } if (Q == nullptr) { - ctx.coeff_comp = 1; ctx.coeff[0] = 1.0; + if (!use_mf) + { + build_func = ":f_build_diff_const_scalar"; + build_qf = &f_build_diff_const_scalar; + } + else + { + apply_func = ":f_apply_diff_mf_const_scalar"; + apply_qf = &f_apply_diff_mf_const_scalar; + } } else { - InitCoefficient(*Q); + InitCoefficient(*Q, use_mf); } - header = "/integrators/diffusion/diffusion_qf.h"; - build_func_const = ":f_build_diff_const"; - build_qf_const = &f_build_diff_const; - build_func_quad = ":f_build_diff_quad"; - build_qf_quad = &f_build_diff_quad; - apply_func = ":f_apply_diff"; - apply_qf = &f_apply_diff; - apply_func_mf_const = ":f_apply_diff_mf_const"; - apply_qf_mf_const = &f_apply_diff_mf_const; - apply_func_mf_quad = ":f_apply_diff_mf_quad"; - apply_qf_mf_quad = &f_apply_diff_mf_quad; trial_op = EvalMode::Grad; test_op = EvalMode::Grad; qdatasize = (ctx.dim * (ctx.dim + 1)) / 2; } - void InitCoefficient(mfem::Coefficient &Q) + void InitCoefficient(mfem::Coefficient &Q, bool use_mf) { - ctx.coeff_comp = 1; - if (ConstantCoefficient *const_coeff = - dynamic_cast(&Q)) + if (mfem::ConstantCoefficient *const_coeff = + dynamic_cast(&Q)) { ctx.coeff[0] = const_coeff->constant; + if (!use_mf) + { + build_func = ":f_build_diff_const_scalar"; + build_qf = &f_build_diff_const_scalar; + } + else + { + apply_func = ":f_apply_diff_mf_const_scalar"; + apply_qf = &f_apply_diff_mf_const_scalar; + } + } + else + { + if (!use_mf) + { + build_func = ":f_build_diff_quad_scalar"; + build_qf = &f_build_diff_quad_scalar; + } + else + { + apply_func = ":f_apply_diff_mf_quad_scalar"; + apply_qf = &f_apply_diff_mf_quad_scalar; + } } } - void InitCoefficient(mfem::VectorCoefficient &VQ) + void InitCoefficient(mfem::VectorCoefficient &VQ, bool use_mf) { - const int vdim = VQ.GetVDim(); - ctx.coeff_comp = vdim; - if (VectorConstantCoefficient *const_coeff = - dynamic_cast(&VQ)) + if (mfem::VectorConstantCoefficient *const_coeff = + dynamic_cast(&VQ)) { - MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_DIFF_COEFF_COMP_MAX, + const int vdim = VQ.GetVDim(); + MFEM_VERIFY(vdim <= LIBCEED_DIFF_COEFF_COMP_MAX, "VectorCoefficient dimension exceeds context storage!"); const mfem::Vector &val = const_coeff->GetVec(); for (int i = 0; i < vdim; i++) { ctx.coeff[i] = val[i]; } + if (!use_mf) + { + build_func = ":f_build_diff_const_vector"; + build_qf = &f_build_diff_const_vector; + } + else + { + apply_func = ":f_apply_diff_mf_const_vector"; + apply_qf = &f_apply_diff_mf_const_vector; + } + } + else + { + if (!use_mf) + { + build_func = ":f_build_diff_quad_vector"; + build_qf = &f_build_diff_quad_vector; + } + else + { + apply_func = ":f_apply_diff_mf_quad_vector"; + apply_qf = &f_apply_diff_mf_quad_vector; + } } } - void InitCoefficient(mfem::MatrixCoefficient &MQ) + void InitCoefficient(mfem::MatrixCoefficient &MQ, bool use_mf) { // Assumes matrix coefficient is symmetric - const int vdim = MQ.GetVDim(); - ctx.coeff_comp = (vdim * (vdim + 1)) / 2; - if (MatrixConstantCoefficient *const_coeff = - dynamic_cast(&MQ)) + if (mfem::MatrixConstantCoefficient *const_coeff = + dynamic_cast(&MQ)) { - MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_DIFF_COEFF_COMP_MAX, + const int vdim = MQ.GetVDim(); + MFEM_VERIFY((vdim * (vdim + 1)) / 2 <= LIBCEED_DIFF_COEFF_COMP_MAX, "MatrixCoefficient dimensions exceed context storage!"); const mfem::DenseMatrix &val = const_coeff->GetMatrix(); for (int j = 0; j < vdim; j++) @@ -102,6 +153,29 @@ struct DiffusionOperatorInfo : public OperatorInfo ctx.coeff[idx] = val(i, j); } } + if (!use_mf) + { + build_func = ":f_build_diff_const_matrix"; + build_qf = &f_build_diff_const_matrix; + } + else + { + apply_func = ":f_apply_diff_mf_const_matrix"; + apply_qf = &f_apply_diff_mf_const_matrix; + } + } + else + { + if (!use_mf) + { + build_func = ":f_build_diff_quad_matrix"; + build_qf = &f_build_diff_quad_matrix; + } + else + { + apply_func = ":f_apply_diff_mf_quad_matrix"; + apply_qf = &f_apply_diff_mf_quad_matrix; + } } } }; @@ -145,7 +219,7 @@ MFDiffusionIntegrator::MFDiffusionIntegrator( const bool use_bdr) { #ifdef MFEM_USE_CEED - DiffusionOperatorInfo info(fes, Q, use_bdr); + DiffusionOperatorInfo info(fes, Q, use_bdr, true); Assemble(integ, info, fes, Q, use_bdr, true); #else MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); @@ -160,7 +234,7 @@ MFDiffusionIntegrator::MFDiffusionIntegrator( const bool use_bdr) { #ifdef MFEM_USE_CEED - DiffusionOperatorInfo info(fes, Q, use_bdr); + DiffusionOperatorInfo info(fes, Q, use_bdr, true); Assemble(integ, info, fes, Q, use_bdr, true); #else MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); diff --git a/fem/ceed/integrators/diffusion/diffusion_qf.h b/fem/ceed/integrators/diffusion/diffusion_qf.h index 779cdacd040..9ab50a3ed78 100644 --- a/fem/ceed/integrators/diffusion/diffusion_qf.h +++ b/fem/ceed/integrators/diffusion/diffusion_qf.h @@ -16,107 +16,148 @@ #define LIBCEED_DIFF_COEFF_COMP_MAX 6 -/// A structure used to pass additional data to f_build_diff and f_apply_diff struct DiffusionContext { - CeedInt dim, space_dim, vdim, coeff_comp; + CeedInt dim, space_dim, vdim; CeedScalar coeff[LIBCEED_DIFF_COEFF_COMP_MAX]; }; /// libCEED QFunction for building quadrature data for a diffusion operator -/// with a constant coefficient -CEED_QFUNCTION(f_build_diff_const)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) +/// with a scalar constant coefficient +CEED_QFUNCTION(f_build_diff_const_scalar)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { DiffusionContext *bc = (DiffusionContext *)ctx; // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[1] is quadrature weights, size (Q) // // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store - // the symmetric part of the result. + // the symmetric part of the result const CeedScalar *coeff = bc->coeff; const CeedScalar *J = in[0], *qw = in[1]; CeedScalar *qd = out[0]; - switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp) + switch (10 * bc->space_dim + bc->dim) { - case 111: + case 11: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { const CeedScalar coeff0 = coeff[0]; qd[i] = qw[i] * coeff0 / J[i]; } break; - case 213: + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); + MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); } break; - case 212: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); + MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); } break; - case 211: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); + MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); } break; - case 223: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); + MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); } break; - case 222: + } + return 0; +} + +/// libCEED QFunction for building quadrature data for a diffusion operator +/// with a vector constant coefficient +CEED_QFUNCTION(f_build_diff_const_vector)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + DiffusionContext *bc = (DiffusionContext *)ctx; + // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[1] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store + // the symmetric part of the result + const CeedScalar *coeff = bc->coeff; + const CeedScalar *J = in[0], *qw = in[1]; + CeedScalar *qd = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt22(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); + MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); } break; - case 221: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); + MultAdjJCAdjJt22(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); } break; - case 326: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); + MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); } break; - case 323: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); + MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); } break; - case 321: + } + return 0; +} + +/// libCEED QFunction for building quadrature data for a diffusion operator +/// with a matrix constant coefficient +CEED_QFUNCTION(f_build_diff_const_matrix)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + DiffusionContext *bc = (DiffusionContext *)ctx; + // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[1] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store + // the symmetric part of the result + const CeedScalar *coeff = bc->coeff; + const CeedScalar *J = in[0], *qw = in[1]; + CeedScalar *qd = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); + MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); } break; - case 336: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); + MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); } break; - case 333: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); + MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); } break; - case 331: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); + MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); } break; } @@ -124,105 +165,147 @@ CEED_QFUNCTION(f_build_diff_const)(void *ctx, CeedInt Q, } /// libCEED QFunction for building quadrature data for a diffusion operator -/// with a coefficient evaluated at quadrature points. -CEED_QFUNCTION(f_build_diff_quad)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) +/// with a scalar coefficient evaluated at quadrature points +CEED_QFUNCTION(f_build_diff_quad_scalar)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { DiffusionContext *bc = (DiffusionContext *)ctx; - // in[0] is coefficients with shape [ncomp=coeff_comp, Q] + // in[0] is coefficients with shape [ncomp=1, Q] // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[2] is quadrature weights, size (Q) // // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store - // the symmetric part of the result. + // the symmetric part of the result const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; CeedScalar *qd = out[0]; - switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp) + switch (10 * bc->space_dim + bc->dim) { - case 111: + case 11: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { qd[i] = qw[i] * c[i] / J[i]; } break; - case 213: + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); + MultAdjJCAdjJt21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); } break; - case 212: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); + MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); } break; - case 211: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); + MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); } break; - case 223: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); + MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); } break; - case 222: + } + return 0; +} + +/// libCEED QFunction for building quadrature data for a diffusion operator +/// with a vector coefficient evaluated at quadrature points +CEED_QFUNCTION(f_build_diff_quad_vector)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + DiffusionContext *bc = (DiffusionContext *)ctx; + // in[0] is coefficients with shape [ncomp=space_dim, Q] + // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[2] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store + // the symmetric part of the result + const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; + CeedScalar *qd = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); + MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); } break; - case 221: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); + MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); } break; - case 326: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); + MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); } break; - case 323: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); + MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); } break; - case 321: + } + return 0; +} + +/// libCEED QFunction for building quadrature data for a diffusion operator +/// with a matrix coefficient evaluated at quadrature points +CEED_QFUNCTION(f_build_diff_quad_matrix)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + DiffusionContext *bc = (DiffusionContext *)ctx; + // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q] + // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[2] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store + // the symmetric part of the result + const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; + CeedScalar *qd = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); + MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); } break; - case 336: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); + MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); } break; - case 333: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); + MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); } break; - case 331: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); + MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); } break; } return 0; } -/// libCEED QFunction for applying a diff operator +/// libCEED QFunction for applying a diffusion operator CEED_QFUNCTION(f_apply_diff)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) @@ -328,23 +411,24 @@ CEED_QFUNCTION(f_apply_diff)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for applying a diff operator -CEED_QFUNCTION(f_apply_diff_mf_const)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) +/// libCEED QFunction for applying a diffusion operator with a scalar constant +/// coefficient +CEED_QFUNCTION(f_apply_diff_mf_const_scalar)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { DiffusionContext *bc = (DiffusionContext *)ctx; // in[0], out[0] have shape [dim, ncomp=vdim, Q] // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[2] is quadrature weights, size (Q) // - // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T. + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T const CeedScalar *coeff = bc->coeff; const CeedScalar *ug = in[0], *J = in[1], *qw = in[2]; CeedScalar *vg = out[0]; - switch (1000 * bc->space_dim + 100 * bc->dim + 10 * bc->coeff_comp + bc->vdim) + switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim) { - case 1111: + case 111: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { const CeedScalar coeff0 = coeff[0]; @@ -352,89 +436,145 @@ CEED_QFUNCTION(f_apply_diff_mf_const)(void *ctx, CeedInt Q, vg[i] = qd * ug[i]; } break; - case 2131: + case 211: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd; - MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd); + MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], 1, &qd); vg[i] = qd * ug[i]; } break; - case 2132: + case 212: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd; - MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd); + MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], 1, &qd); CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) { vg[i + Q * d] = qd * ug[i + Q * d]; } } break; - case 2121: + case 221: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd; - MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd); - vg[i] = qd * ug[i]; + CeedScalar qd[3]; + MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], 1, qd); + const CeedScalar ug0 = ug[i + Q * 0]; + const CeedScalar ug1 = ug[i + Q * 1]; + vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; + vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; } break; - case 2122: + case 222: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd; - MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd); + CeedScalar qd[3]; + MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], 1, qd); CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) { - vg[i + Q * d] = qd * ug[i + Q * d]; + const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)]; + const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)]; + vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1; + vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1; } } break; - case 2111: + case 321: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd; - MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], 1, &qd); - vg[i] = qd * ug[i]; + CeedScalar qd[3]; + MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], 1, qd); + const CeedScalar ug0 = ug[i + Q * 0]; + const CeedScalar ug1 = ug[i + Q * 1]; + vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; + vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; } break; - case 2112: + case 323: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd; - MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], 1, &qd); - CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) + CeedScalar qd[3]; + MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], 1, qd); + CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) { - vg[i + Q * d] = qd * ug[i + Q * d]; + const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; + const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; + vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1; + vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1; } } break; - case 2231: + case 331: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], 1, qd); + CeedScalar qd[6]; + MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], 1, qd); const CeedScalar ug0 = ug[i + Q * 0]; const CeedScalar ug1 = ug[i + Q * 1]; - vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; - vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; + const CeedScalar ug2 = ug[i + Q * 2]; + vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; + vg[i + Q * 1] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; + vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; } break; - case 2232: + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], 1, qd); + CeedScalar qd[6]; + MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], 1, qd); + CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) + { + const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; + const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; + const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)]; + vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; + vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; + vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; + } + } + break; + } + return 0; +} + +/// libCEED QFunction for applying a diffusion operator with a vector constant +/// coefficient +CEED_QFUNCTION(f_apply_diff_mf_const_vector)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + DiffusionContext *bc = (DiffusionContext *)ctx; + // in[0], out[0] have shape [dim, ncomp=vdim, Q] + // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[2] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T + const CeedScalar *coeff = bc->coeff; + const CeedScalar *ug = in[0], *J = in[1], *qw = in[2]; + CeedScalar *vg = out[0]; + switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim) + { + case 211: + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar qd; + MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd); + vg[i] = qd * ug[i]; + } + break; + case 212: + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar qd; + MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd); CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) { - const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)]; - const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)]; - vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1; - vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1; + vg[i + Q * d] = qd * ug[i + Q * d]; } } break; - case 2221: + case 221: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; @@ -445,7 +585,7 @@ CEED_QFUNCTION(f_apply_diff_mf_const)(void *ctx, CeedInt Q, vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; } break; - case 2222: + case 222: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; @@ -459,47 +599,22 @@ CEED_QFUNCTION(f_apply_diff_mf_const)(void *ctx, CeedInt Q, } } break; - case 2211: + case 321: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], 1, qd); - const CeedScalar ug0 = ug[i + Q * 0]; - const CeedScalar ug1 = ug[i + Q * 1]; - vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; - vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; - } - break; - case 2212: - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar qd[3]; - MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], 1, qd); - CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) - { - const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)]; - const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)]; - vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1; - vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1; - } - } - break; - case 3261: - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], 1, qd); + MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], 1, qd); const CeedScalar ug0 = ug[i + Q * 0]; const CeedScalar ug1 = ug[i + Q * 1]; vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; } break; - case 3263: + case 323: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], 1, qd); + MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], 1, qd); CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) { const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; @@ -509,119 +624,130 @@ CEED_QFUNCTION(f_apply_diff_mf_const)(void *ctx, CeedInt Q, } } break; - case 3231: + case 331: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], 1, qd); + CeedScalar qd[6]; + MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], 1, qd); const CeedScalar ug0 = ug[i + Q * 0]; const CeedScalar ug1 = ug[i + Q * 1]; - vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; - vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; + const CeedScalar ug2 = ug[i + Q * 2]; + vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; + vg[i + Q * 1] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; + vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; } break; - case 3233: + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], 1, qd); + CeedScalar qd[6]; + MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], 1, qd); CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) { const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; - vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1; - vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1; + const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)]; + vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; + vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; + vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; } } break; - case 3211: + } + return 0; +} + +/// libCEED QFunction for applying a diffusion operator with a matrix constant +/// coefficient +CEED_QFUNCTION(f_apply_diff_mf_const_matrix)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + DiffusionContext *bc = (DiffusionContext *)ctx; + // in[0], out[0] have shape [dim, ncomp=vdim, Q] + // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[2] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T + const CeedScalar *coeff = bc->coeff; + const CeedScalar *ug = in[0], *J = in[1], *qw = in[2]; + CeedScalar *vg = out[0]; + switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim) + { + case 211: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], 1, qd); - const CeedScalar ug0 = ug[i + Q * 0]; - const CeedScalar ug1 = ug[i + Q * 1]; - vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; - vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; + CeedScalar qd; + MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd); + vg[i] = qd * ug[i]; } break; - case 3213: + case 212: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], 1, qd); - CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) + CeedScalar qd; + MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd); + CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) { - const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; - const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; - vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1; - vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1; + vg[i + Q * d] = qd * ug[i + Q * d]; } } break; - case 3361: + case 221: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], 1, qd); + CeedScalar qd[3]; + MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], 1, qd); const CeedScalar ug0 = ug[i + Q * 0]; const CeedScalar ug1 = ug[i + Q * 1]; - const CeedScalar ug2 = ug[i + Q * 2]; - vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; - vg[i + Q * 1] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; - vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; + vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; + vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; } break; - case 3363: + case 222: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], 1, qd); - CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) + CeedScalar qd[3]; + MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], 1, qd); + CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) { - const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; - const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; - const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)]; - vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; - vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; - vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; + const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)]; + const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)]; + vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1; + vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1; } } break; - case 3331: + case 321: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], 1, qd); + CeedScalar qd[3]; + MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], 1, qd); const CeedScalar ug0 = ug[i + Q * 0]; const CeedScalar ug1 = ug[i + Q * 1]; - const CeedScalar ug2 = ug[i + Q * 2]; - vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; - vg[i + Q * 1] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; - vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; + vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; + vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; } break; - case 3333: + case 323: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], 1, qd); + CeedScalar qd[3]; + MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], 1, qd); CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) { const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; - const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)]; - vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; - vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; - vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; + vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1; + vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1; } } break; - case 3311: + case 331: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], 1, qd); + MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], 1, qd); const CeedScalar ug0 = ug[i + Q * 0]; const CeedScalar ug1 = ug[i + Q * 1]; const CeedScalar ug2 = ug[i + Q * 2]; @@ -630,11 +756,11 @@ CEED_QFUNCTION(f_apply_diff_mf_const)(void *ctx, CeedInt Q, vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; } break; - case 3313: + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], 1, qd); + MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], 1, qd); CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) { const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; @@ -650,68 +776,31 @@ CEED_QFUNCTION(f_apply_diff_mf_const)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for applying a diff operator -CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) +/// libCEED QFunction for applying a diffusion operator with a scalar +/// coefficient evaluated at quadrature points +CEED_QFUNCTION(f_apply_diff_mf_quad_scalar)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { DiffusionContext *bc = (DiffusionContext *)ctx; // in[0], out[0] have shape [dim, ncomp=vdim, Q] - // in[1] is coefficients with shape [ncomp=coeff_comp, Q] + // in[1] is coefficients with shape [ncomp=1, Q] // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[3] is quadrature weights, size (Q) // - // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T. + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T const CeedScalar *ug = in[0], *c = in[1], *J = in[2], *qw = in[3]; CeedScalar *vg = out[0]; - switch (1000 * bc->space_dim + 100 * bc->dim + 10 * bc->coeff_comp + bc->vdim) + switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim) { - case 1111: + case 111: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { const CeedScalar qd = qw[i] * c[i] / J[i]; vg[i] = qd * ug[i]; } break; - case 2131: - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar qd; - MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd); - vg[i] = qd * ug[i]; - } - break; - case 2132: - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar qd; - MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd); - CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) - { - vg[i + Q * d] = qd * ug[i + Q * d]; - } - } - break; - case 2121: - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar qd; - MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd); - vg[i] = qd * ug[i]; - } - break; - case 2122: - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar qd; - MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd); - CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) - { - vg[i + Q * d] = qd * ug[i + Q * d]; - } - } - break; - case 2111: + case 211: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd; @@ -719,7 +808,7 @@ CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q, vg[i] = qd * ug[i]; } break; - case 2112: + case 212: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd; @@ -730,22 +819,22 @@ CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q, } } break; - case 2231: + case 221: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], 1, qd); + MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], 1, qd); const CeedScalar ug0 = ug[i + Q * 0]; const CeedScalar ug1 = ug[i + Q * 1]; vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; } break; - case 2232: + case 222: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], 1, qd); + MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], 1, qd); CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) { const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)]; @@ -755,122 +844,141 @@ CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q, } } break; - case 2221: + case 321: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], 1, qd); + MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], 1, qd); const CeedScalar ug0 = ug[i + Q * 0]; const CeedScalar ug1 = ug[i + Q * 1]; vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; } break; - case 2222: + case 323: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], 1, qd); - CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) + MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], 1, qd); + CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) { - const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)]; - const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)]; - vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1; - vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1; + const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; + const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; + vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1; + vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1; } } break; - case 2211: + case 331: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], 1, qd); + CeedScalar qd[6]; + MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], 1, qd); const CeedScalar ug0 = ug[i + Q * 0]; const CeedScalar ug1 = ug[i + Q * 1]; - vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; - vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; + const CeedScalar ug2 = ug[i + Q * 2]; + vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; + vg[i + Q * 1] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; + vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; } break; - case 2212: + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], 1, qd); - CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) + CeedScalar qd[6]; + MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], 1, qd); + CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) { - const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)]; - const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)]; - vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1; - vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1; + const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; + const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; + const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)]; + vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; + vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; + vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; } } break; - case 3261: + } + return 0; +} + +/// libCEED QFunction for applying a diffusion operator with a vector +/// coefficient evaluated at quadrature points +CEED_QFUNCTION(f_apply_diff_mf_quad_vector)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + DiffusionContext *bc = (DiffusionContext *)ctx; + // in[0], out[0] have shape [dim, ncomp=vdim, Q] + // in[1] is coefficients with shape [ncomp=space_dim, Q] + // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[3] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T + const CeedScalar *ug = in[0], *c = in[1], *J = in[2], *qw = in[3]; + CeedScalar *vg = out[0]; + switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim) + { + case 211: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], 1, qd); - const CeedScalar ug0 = ug[i + Q * 0]; - const CeedScalar ug1 = ug[i + Q * 1]; - vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; - vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; + CeedScalar qd; + MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd); + vg[i] = qd * ug[i]; } break; - case 3263: + case 212: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], 1, qd); - CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) + CeedScalar qd; + MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd); + CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) { - const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; - const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; - vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1; - vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1; + vg[i + Q * d] = qd * ug[i + Q * d]; } } break; - case 3231: + case 221: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], 1, qd); + MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], 1, qd); const CeedScalar ug0 = ug[i + Q * 0]; const CeedScalar ug1 = ug[i + Q * 1]; vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; } break; - case 3233: + case 222: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], 1, qd); - CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) + MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], 1, qd); + CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) { - const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; - const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; - vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1; - vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1; + const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)]; + const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)]; + vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1; + vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1; } } break; - case 3211: + case 321: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], 1, qd); + MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], 1, qd); const CeedScalar ug0 = ug[i + Q * 0]; const CeedScalar ug1 = ug[i + Q * 1]; vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; } break; - case 3213: + case 323: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], 1, qd); + MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], 1, qd); CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) { const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; @@ -880,11 +988,11 @@ CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q, } } break; - case 3361: + case 331: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], 1, qd); + MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], 1, qd); const CeedScalar ug0 = ug[i + Q * 0]; const CeedScalar ug1 = ug[i + Q * 1]; const CeedScalar ug2 = ug[i + Q * 2]; @@ -893,11 +1001,11 @@ CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q, vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; } break; - case 3363: + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], 1, qd); + MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], 1, qd); CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) { const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; @@ -909,40 +1017,101 @@ CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q, } } break; - case 3331: + } + return 0; +} + +/// libCEED QFunction for applying a diffusion operator with a matrix +/// coefficient evaluated at quadrature points +CEED_QFUNCTION(f_apply_diff_mf_quad_matrix)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + DiffusionContext *bc = (DiffusionContext *)ctx; + // in[0], out[0] have shape [dim, ncomp=vdim, Q] + // in[1] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q] + // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[3] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T + const CeedScalar *ug = in[0], *c = in[1], *J = in[2], *qw = in[3]; + CeedScalar *vg = out[0]; + switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim) + { + case 211: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], 1, qd); + CeedScalar qd; + MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd); + vg[i] = qd * ug[i]; + } + break; + case 212: + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar qd; + MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd); + CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) + { + vg[i + Q * d] = qd * ug[i + Q * d]; + } + } + break; + case 221: + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar qd[3]; + MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], 1, qd); const CeedScalar ug0 = ug[i + Q * 0]; const CeedScalar ug1 = ug[i + Q * 1]; - const CeedScalar ug2 = ug[i + Q * 2]; - vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; - vg[i + Q * 1] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; - vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; + vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; + vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; } break; - case 3333: + case 222: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], 1, qd); + CeedScalar qd[3]; + MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], 1, qd); + CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) + { + const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)]; + const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)]; + vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1; + vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1; + } + } + break; + case 321: + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar qd[3]; + MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], 1, qd); + const CeedScalar ug0 = ug[i + Q * 0]; + const CeedScalar ug1 = ug[i + Q * 1]; + vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; + vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; + } + break; + case 323: + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar qd[3]; + MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], 1, qd); CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) { const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; - const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)]; - vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; - vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; - vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; + vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1; + vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1; } } break; - case 3311: + case 331: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], 1, qd); + MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], 1, qd); const CeedScalar ug0 = ug[i + Q * 0]; const CeedScalar ug1 = ug[i + Q * 1]; const CeedScalar ug2 = ug[i + Q * 2]; @@ -951,11 +1120,11 @@ CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q, vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; } break; - case 3313: + case 333: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], 1, qd); + MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], 1, qd); CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) { const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; diff --git a/fem/ceed/integrators/divdiv/divdiv.cpp b/fem/ceed/integrators/divdiv/divdiv.cpp index 237a6398cde..1a3acdadfb7 100644 --- a/fem/ceed/integrators/divdiv/divdiv.cpp +++ b/fem/ceed/integrators/divdiv/divdiv.cpp @@ -25,35 +25,67 @@ namespace ceed #ifdef MFEM_USE_CEED struct DivDivOperatorInfo : public OperatorInfo { - DivDivContext ctx; + DivDivContext ctx = {0}; DivDivOperatorInfo(const mfem::FiniteElementSpace &fes, mfem::Coefficient *Q, - bool use_bdr) + bool use_bdr = false, bool use_mf = false) { MFEM_VERIFY(fes.GetVDim() == 1, "libCEED interface for vector FE does not support VDim > 1!"); ctx.dim = fes.GetMesh()->Dimension() - use_bdr; ctx.space_dim = fes.GetMesh()->SpaceDimension(); + if (!use_mf) + { + apply_func = ":f_apply_divdiv"; + apply_qf = &f_apply_divdiv; + } + else + { + build_func = ""; + build_qf = nullptr; + } if (Q == nullptr) { ctx.coeff = 1.0; + if (!use_mf) + { + build_func = ":f_build_divdiv_const"; + build_qf = &f_build_divdiv_const; + } + else + { + apply_func = ":f_apply_divdiv_mf_const"; + apply_qf = &f_apply_divdiv_mf_const; + } } - else if (ConstantCoefficient *const_coeff = - dynamic_cast(Q)) + else if (mfem::ConstantCoefficient *const_coeff = + dynamic_cast(Q)) { ctx.coeff = const_coeff->constant; + if (!use_mf) + { + build_func = ":f_build_divdiv_const"; + build_qf = &f_build_divdiv_const; + } + else + { + apply_func = ":f_apply_divdiv_mf_const"; + apply_qf = &f_apply_divdiv_mf_const; + } + } + else + { + if (!use_mf) + { + build_func = ":f_build_divdiv_quad"; + build_qf = &f_build_divdiv_quad; + } + else + { + apply_func = ":f_apply_divdiv_mf_quad"; + apply_qf = &f_apply_divdiv_mf_quad; + } } - header = "/integrators/divdiv/divdiv_qf.h"; - build_func_const = ":f_build_divdiv_const"; - build_qf_const = &f_build_divdiv_const; - build_func_quad = ":f_build_divdiv_quad"; - build_qf_quad = &f_build_divdiv_quad; - apply_func = ":f_apply_divdiv"; - apply_qf = &f_apply_divdiv; - apply_func_mf_const = ":f_apply_divdiv_mf_const"; - apply_qf_mf_const = &f_apply_divdiv_mf_const; - apply_func_mf_quad = ":f_apply_divdiv_mf_quad"; - apply_qf_mf_quad = &f_apply_divdiv_mf_quad; trial_op = EvalMode::Div; test_op = EvalMode::Div; qdatasize = 1; @@ -80,7 +112,7 @@ MFDivDivIntegrator::MFDivDivIntegrator(const mfem::DivDivIntegrator &integ, const bool use_bdr) { #ifdef MFEM_USE_CEED - DivDivOperatorInfo info(fes, Q, use_bdr); + DivDivOperatorInfo info(fes, Q, use_bdr, true); Assemble(integ, info, fes, Q, use_bdr, true); #else MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); diff --git a/fem/ceed/integrators/divdiv/divdiv_qf.h b/fem/ceed/integrators/divdiv/divdiv_qf.h index d78893c62ff..853aa0011ed 100644 --- a/fem/ceed/integrators/divdiv/divdiv_qf.h +++ b/fem/ceed/integrators/divdiv/divdiv_qf.h @@ -14,16 +14,14 @@ #include "../util/util_qf.h" -/// A structure used to pass additional data to f_build_divdiv and -/// f_apply_divdiv struct DivDivContext { CeedInt dim, space_dim; CeedScalar coeff; }; -/// libCEED QFunction for building quadrature data for a div-div operator with -/// a constant coefficient +/// libCEED QFunction for building quadrature data for a div-div operator +/// with a constant coefficient CEED_QFUNCTION(f_build_divdiv_const)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) @@ -32,7 +30,7 @@ CEED_QFUNCTION(f_build_divdiv_const)(void *ctx, CeedInt Q, // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[1] is quadrature weights, size (Q) // - // At every quadrature point, compute and store qw * c / det(J). + // At every quadrature point, compute and store qw * c / det(J) const CeedScalar coeff = bc->coeff; const CeedScalar *J = in[0], *qw = in[1]; CeedScalar *qd = out[0]; @@ -72,8 +70,8 @@ CEED_QFUNCTION(f_build_divdiv_const)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for building quadrature data for a div-div operator with -/// a coefficient evaluated at quadrature points. +/// libCEED QFunction for building quadrature data for a div-div operator +/// with a coefficient evaluated at quadrature points CEED_QFUNCTION(f_build_divdiv_quad)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) @@ -83,7 +81,7 @@ CEED_QFUNCTION(f_build_divdiv_quad)(void *ctx, CeedInt Q, // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[2] is quadrature weights, size (Q) // - // At every quadrature point, compute and store qw * c / det(J). + // At every quadrature point, compute and store qw * c / det(J) const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; CeedScalar *qd = out[0]; switch (10 * bc->space_dim + bc->dim) @@ -137,16 +135,18 @@ CEED_QFUNCTION(f_apply_divdiv)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for applying a div-div operator +/// libCEED QFunction for applying a div-div operator with a constant +/// coefficient CEED_QFUNCTION(f_apply_divdiv_mf_const)(void *ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) + const CeedScalar *const *in, + CeedScalar *const *out) { DivDivContext *bc = (DivDivContext *)ctx; // in[0], out[0] have shape [ncomp=1, Q] // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[2] is quadrature weights, size (Q) // - // At every quadrature point, compute qw * c / det(J). + // At every quadrature point, compute qw * c / det(J) const CeedScalar coeff = bc->coeff; const CeedScalar *ud = in[0], *J = in[1], *qw = in[2]; CeedScalar *vd = out[0]; @@ -191,9 +191,11 @@ CEED_QFUNCTION(f_apply_divdiv_mf_const)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for applying a div-div operator +/// libCEED QFunction for applying a div-div operator with a coefficient +/// evaluated at quadrature points CEED_QFUNCTION(f_apply_divdiv_mf_quad)(void *ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) + const CeedScalar *const *in, + CeedScalar *const *out) { DivDivContext *bc = (DivDivContext *)ctx; // in[0], out[0] have shape [ncomp=1, Q] @@ -201,7 +203,7 @@ CEED_QFUNCTION(f_apply_divdiv_mf_quad)(void *ctx, CeedInt Q, // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[3] is quadrature weights, size (Q) // - // At every quadrature point, compute qw * c / det(J). + // At every quadrature point, compute qw * c / det(J) const CeedScalar *ud = in[0], *c = in[1], *J = in[2], *qw = in[3]; CeedScalar *vd = out[0]; switch (10 * bc->space_dim + bc->dim) diff --git a/fem/ceed/integrators/mass/mass.cpp b/fem/ceed/integrators/mass/mass.cpp index f6871d58a5a..6a8d67ddc03 100644 --- a/fem/ceed/integrators/mass/mass.cpp +++ b/fem/ceed/integrators/mass/mass.cpp @@ -25,34 +25,66 @@ namespace ceed #ifdef MFEM_USE_CEED struct MassOperatorInfo : public OperatorInfo { - MassContext ctx; + MassContext ctx = {0}; MassOperatorInfo(const mfem::FiniteElementSpace &fes, mfem::Coefficient *Q, - bool use_bdr) + bool use_bdr = false, bool use_mf = false) { ctx.dim = fes.GetMesh()->Dimension() - use_bdr; ctx.space_dim = fes.GetMesh()->SpaceDimension(); ctx.vdim = fes.GetVDim(); + if (!use_mf) + { + apply_func = ":f_apply_mass"; + apply_qf = &f_apply_mass; + } + else + { + build_func = ""; + build_qf = nullptr; + } if (Q == nullptr) { ctx.coeff = 1.0; + if (!use_mf) + { + build_func = ":f_build_mass_const"; + build_qf = &f_build_mass_const; + } + else + { + apply_func = ":f_apply_mass_mf_const"; + apply_qf = &f_apply_mass_mf_const; + } } - else if (ConstantCoefficient *const_coeff = - dynamic_cast(Q)) + else if (mfem::ConstantCoefficient *const_coeff = + dynamic_cast(Q)) { ctx.coeff = const_coeff->constant; + if (!use_mf) + { + build_func = ":f_build_mass_const"; + build_qf = &f_build_mass_const; + } + else + { + apply_func = ":f_apply_mass_mf_const"; + apply_qf = &f_apply_mass_mf_const; + } + } + else + { + if (!use_mf) + { + build_func = ":f_build_mass_quad"; + build_qf = &f_build_mass_quad; + } + else + { + apply_func = ":f_apply_mass_mf_quad"; + apply_qf = &f_apply_mass_mf_quad; + } } - header = "/integrators/mass/mass_qf.h"; - build_func_const = ":f_build_mass_const"; - build_qf_const = &f_build_mass_const; - build_func_quad = ":f_build_mass_quad"; - build_qf_quad = &f_build_mass_quad; - apply_func = ":f_apply_mass"; - apply_qf = &f_apply_mass; - apply_func_mf_const = ":f_apply_mass_mf_const"; - apply_qf_mf_const = &f_apply_mass_mf_const; - apply_func_mf_quad = ":f_apply_mass_mf_quad"; - apply_qf_mf_quad = &f_apply_mass_mf_quad; trial_op = EvalMode::Interp; test_op = EvalMode::Interp; qdatasize = 1; @@ -92,7 +124,7 @@ MFMassIntegrator::MFMassIntegrator(const mfem::MassIntegrator &integ, const bool use_bdr) { #ifdef MFEM_USE_CEED - MassOperatorInfo info(fes, Q, use_bdr); + MassOperatorInfo info(fes, Q, use_bdr, true); Assemble(integ, info, fes, Q, use_bdr, true); #else MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); @@ -105,7 +137,7 @@ MFMassIntegrator::MFMassIntegrator(const mfem::VectorMassIntegrator &integ, const bool use_bdr) { #ifdef MFEM_USE_CEED - MassOperatorInfo info(fes, Q, use_bdr); + MassOperatorInfo info(fes, Q, use_bdr, true); Assemble(integ, info, fes, Q, use_bdr, true); #else MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); diff --git a/fem/ceed/integrators/mass/mass_qf.h b/fem/ceed/integrators/mass/mass_qf.h index 93d904ff4bc..3cdd3b5e39a 100644 --- a/fem/ceed/integrators/mass/mass_qf.h +++ b/fem/ceed/integrators/mass/mass_qf.h @@ -14,15 +14,14 @@ #include "../util/util_qf.h" -/// A structure used to pass additional data to f_build_mass and f_apply_mass struct MassContext { CeedInt dim, space_dim, vdim; CeedScalar coeff; }; -/// libCEED QFunction for building quadrature data for a mass operator with a -/// constant coefficient +/// libCEED QFunction for building quadrature data for a mass operator +/// with a constant coefficient CEED_QFUNCTION(f_build_mass_const)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) @@ -30,7 +29,7 @@ CEED_QFUNCTION(f_build_mass_const)(void *ctx, CeedInt Q, // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[1] is quadrature weights, size (Q) // - // At every quadrature point, compute and store qw * c * det(J). + // At every quadrature point, compute and store qw * c * det(J) MassContext *bc = (MassContext *)ctx; const CeedScalar coeff = bc->coeff; const CeedScalar *J = in[0], *qw = in[1]; @@ -71,8 +70,8 @@ CEED_QFUNCTION(f_build_mass_const)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for building quadrature data for a mass operator with a -/// coefficient evaluated at quadrature points. +/// libCEED QFunction for building quadrature data for a mass operator +/// with a coefficient evaluated at quadrature points CEED_QFUNCTION(f_build_mass_quad)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) @@ -81,7 +80,7 @@ CEED_QFUNCTION(f_build_mass_quad)(void *ctx, CeedInt Q, // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[2] is quadrature weights, size (Q) // - // At every quadrature point, compute and store qw * c * det(J). + // At every quadrature point, compute and store qw * c * det(J) MassContext *bc = (MassContext *)ctx; const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; CeedScalar *qd = out[0]; @@ -162,16 +161,18 @@ CEED_QFUNCTION(f_apply_mass)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for applying a mass operator +/// libCEED QFunction for applying a mass operator with a constant +/// coefficient CEED_QFUNCTION(f_apply_mass_mf_const)(void *ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) + const CeedScalar *const *in, + CeedScalar *const *out) { MassContext *bc = (MassContext *)ctx; // in[0], out[0] have shape [ncomp=vdim, Q] // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[2] is quadrature weights, size (Q) // - // At every quadrature point, compute qw * c * det(J). + // At every quadrature point, compute qw * c * det(J) const CeedScalar coeff = bc->coeff; const CeedScalar *u = in[0], *J = in[1], *qw = in[2]; CeedScalar *v = out[0]; @@ -256,16 +257,18 @@ CEED_QFUNCTION(f_apply_mass_mf_const)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for applying a mass operator +/// libCEED QFunction for applying a mass operator with a coefficient +/// evaluated at quadrature points CEED_QFUNCTION(f_apply_mass_mf_quad)(void *ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) + const CeedScalar *const *in, + CeedScalar *const *out) { MassContext *bc = (MassContext *)ctx; // in[0], out[0] have shape [ncomp=vdim, Q] // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[2] is quadrature weights, size (Q) // - // At every quadrature point, compute qw * c * det(J). + // At every quadrature point, compute qw * c * det(J) const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3]; CeedScalar *v = out[0]; switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim) diff --git a/fem/ceed/integrators/mixedveccurl/mixedveccurl.cpp b/fem/ceed/integrators/mixedveccurl/mixedveccurl.cpp index 352688b580f..93c11c8bf3d 100644 --- a/fem/ceed/integrators/mixedveccurl/mixedveccurl.cpp +++ b/fem/ceed/integrators/mixedveccurl/mixedveccurl.cpp @@ -23,94 +23,132 @@ namespace ceed { #ifdef MFEM_USE_CEED -struct MixedVectorCurlOperatorInfo : public OperatorInfo +struct MixedVectorCurlOperatorInfoBase : public OperatorInfo { - CurlCurlContext ctx; + CurlCurlContext ctx = {0}; template - MixedVectorCurlOperatorInfo(const mfem::FiniteElementSpace &trial_fes, - const mfem::FiniteElementSpace &test_fes, - CoeffType *Q, bool use_bdr, bool weak_curl) + MixedVectorCurlOperatorInfoBase(const mfem::FiniteElementSpace &trial_fes, + const mfem::FiniteElementSpace &test_fes, + CoeffType *Q, bool use_bdr = false, + bool use_mf = false) { + // Reuse H(div) quadrature functions for CurlCurlIntegrator MFEM_VERIFY(trial_fes.GetVDim() == 1 && test_fes.GetVDim() == 1, "libCEED interface for vector FE does not support VDim > 1!"); ctx.dim = trial_fes.GetMesh()->Dimension() - use_bdr; MFEM_VERIFY(ctx.dim == 3, "MixedVectorCurlIntegrator and MixedVectorWeakCurlIntegrator " "require dim == 3!"); - MFEM_VERIFY( - weak_curl || - (trial_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_DIV && - test_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_DIV), - "libCEED interface for MixedVectorCurlIntegrator requires " - "H(curl) domain and H(div) range FE spaces!"); - MFEM_VERIFY( - !weak_curl || - (trial_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_DIV && - test_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_DIV), - "libCEED interface for MixedVectorWeakCurlIntegrator requires " - "H(div) domain and H(curl) range FE spaces!"); ctx.space_dim = trial_fes.GetMesh()->SpaceDimension(); ctx.curl_dim = (ctx.dim < 3) ? 1 : ctx.dim; + if (!use_mf) + { + apply_func = ":f_apply_curlcurl"; + apply_qf = &f_apply_curlcurl; + } + else + { + build_func = ""; + build_qf = nullptr; + } if (Q == nullptr) { - ctx.coeff_comp = 1; ctx.coeff[0] = 1.0; + if (!use_mf) + { + build_func = ":f_build_curlcurl_const_scalar"; + build_qf = &f_build_curlcurl_const_scalar; + } + else + { + apply_func = ":f_apply_curlcurl_mf_const_scalar"; + apply_qf = &f_apply_curlcurl_mf_const_scalar; + } } else { - InitCoefficient(*Q); + InitCoefficient(*Q, use_mf); } - - // Reuse H(div) quadrature functions for CurlCurlIntegrator header = "/integrators/curlcurl/curlcurl_qf.h"; - build_func_const = ":f_build_curlcurl_const"; - build_qf_const = &f_build_curlcurl_const; - build_func_quad = ":f_build_curlcurl_quad"; - build_qf_quad = &f_build_curlcurl_quad; - apply_func = ":f_apply_curlcurl"; - apply_qf = &f_apply_curlcurl; - apply_func_mf_const = ":f_apply_curlcurl_mf_const"; - apply_qf_mf_const = &f_apply_curlcurl_mf_const; - apply_func_mf_quad = ":f_apply_curlcurl_mf_quad"; - apply_qf_mf_quad = &f_apply_curlcurl_mf_quad; - trial_op = weak_curl ? EvalMode::Interp : EvalMode::Curl; - test_op = weak_curl ? EvalMode::Curl : EvalMode::Interp; qdatasize = (ctx.curl_dim * (ctx.curl_dim + 1)) / 2; } - void InitCoefficient(mfem::Coefficient &Q) + void InitCoefficient(mfem::Coefficient &Q, bool use_mf) { - ctx.coeff_comp = 1; - if (ConstantCoefficient *const_coeff = - dynamic_cast(&Q)) + if (mfem::ConstantCoefficient *const_coeff = + dynamic_cast(&Q)) { ctx.coeff[0] = const_coeff->constant; + if (!use_mf) + { + build_func = ":f_build_curlcurl_const_scalar"; + build_qf = &f_build_curlcurl_const_scalar; + } + else + { + apply_func = ":f_apply_curlcurl_mf_const_scalar"; + apply_qf = &f_apply_curlcurl_mf_const_scalar; + } + } + else + { + if (!use_mf) + { + build_func = ":f_build_curlcurl_quad_scalar"; + build_qf = &f_build_curlcurl_quad_scalar; + } + else + { + apply_func = ":f_apply_curlcurl_mf_quad_scalar"; + apply_qf = &f_apply_curlcurl_mf_quad_scalar; + } } } - void InitCoefficient(mfem::VectorCoefficient &VQ) + void InitCoefficient(mfem::VectorCoefficient &VQ, bool use_mf) { - const int vdim = VQ.GetVDim(); - ctx.coeff_comp = vdim; - if (VectorConstantCoefficient *const_coeff = - dynamic_cast(&VQ)) + if (mfem::VectorConstantCoefficient *const_coeff = + dynamic_cast(&VQ)) { - MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_CURLCURL_COEFF_COMP_MAX, + const int vdim = VQ.GetVDim(); + MFEM_VERIFY(vdim <= LIBCEED_CURLCURL_COEFF_COMP_MAX, "VectorCoefficient dimension exceeds context storage!"); const mfem::Vector &val = const_coeff->GetVec(); for (int i = 0; i < vdim; i++) { ctx.coeff[i] = val[i]; } + if (!use_mf) + { + build_func = ":f_build_curlcurl_const_vector"; + build_qf = &f_build_curlcurl_const_vector; + } + else + { + apply_func = ":f_apply_curlcurl_mf_const_vector"; + apply_qf = &f_apply_curlcurl_mf_const_vector; + } + } + else + { + if (!use_mf) + { + build_func = ":f_build_curlcurl_quad_vector"; + build_qf = &f_build_curlcurl_quad_vector; + } + else + { + apply_func = ":f_apply_curlcurl_mf_quad_vector"; + apply_qf = &f_apply_curlcurl_mf_quad_vector; + } } } - void InitCoefficient(mfem::MatrixCoefficient &MQ) + void InitCoefficient(mfem::MatrixCoefficient &MQ, bool use_mf) { // Assumes matrix coefficient is symmetric - const int vdim = MQ.GetVDim(); - ctx.coeff_comp = (vdim * (vdim + 1)) / 2; - if (MatrixConstantCoefficient *const_coeff = - dynamic_cast(&MQ)) + if (mfem::MatrixConstantCoefficient *const_coeff = + dynamic_cast(&MQ)) { - MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_CURLCURL_COEFF_COMP_MAX, + const int vdim = MQ.GetVDim(); + MFEM_VERIFY((vdim * (vdim + 1)) / 2 <= LIBCEED_CURLCURL_COEFF_COMP_MAX, "MatrixCoefficient dimensions exceed context storage!"); const mfem::DenseMatrix &val = const_coeff->GetMatrix(); for (int j = 0; j < vdim; j++) @@ -121,7 +159,68 @@ struct MixedVectorCurlOperatorInfo : public OperatorInfo ctx.coeff[idx] = val(i, j); } } + if (!use_mf) + { + build_func = ":f_build_curlcurl_const_matrix"; + build_qf = &f_build_curlcurl_const_matrix; + } + else + { + apply_func = ":f_apply_curlcurl_mf_const_matrix"; + apply_qf = &f_apply_curlcurl_mf_const_matrix; + } } + else + { + if (!use_mf) + { + build_func = ":f_build_curlcurl_quad_matrix"; + build_qf = &f_build_curlcurl_quad_matrix; + } + else + { + apply_func = ":f_apply_curlcurl_mf_quad_matrix"; + apply_qf = &f_apply_curlcurl_mf_quad_matrix; + } + } + } +}; + +struct MixedVectorCurlOperatorInfo : public MixedVectorCurlOperatorInfoBase +{ + template + MixedVectorCurlOperatorInfo(const mfem::FiniteElementSpace &trial_fes, + const mfem::FiniteElementSpace &test_fes, + CoeffType *Q, bool use_bdr = false, + bool use_mf = false) + : MixedVectorCurlOperatorInfoBase(trial_fes, test_fes, Q, use_bdr, use_mf) + { + MFEM_VERIFY( + trial_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_DIV && + test_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_DIV, + "libCEED interface for MixedVectorCurlIntegrator requires " + "H(curl) domain and H(div) range FE spaces!"); + trial_op = EvalMode::Curl; + test_op = EvalMode::Interp; + } +}; + +struct MixedVectorWeakCurlOperatorInfo : public MixedVectorCurlOperatorInfoBase +{ + template + MixedVectorWeakCurlOperatorInfo(const mfem::FiniteElementSpace &trial_fes, + const mfem::FiniteElementSpace &test_fes, + CoeffType *Q, bool use_bdr = false, + bool use_mf = false) + : MixedVectorCurlOperatorInfoBase(trial_fes, test_fes, Q, use_bdr, use_mf) + { + MFEM_VERIFY( + trial_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_DIV && + test_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_DIV, + "libCEED interface for MixedVectorWeakCurlIntegrator requires " + "H(div) domain and H(curl) range FE spaces!"); + trial_op = EvalMode::Interp; + test_op = EvalMode::Curl; } }; #endif @@ -135,7 +234,7 @@ PAMixedVectorCurlIntegrator::PAMixedVectorCurlIntegrator( const bool use_bdr) { #ifdef MFEM_USE_CEED - MixedVectorCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr, false); + MixedVectorCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr); Assemble(integ, info, trial_fes, test_fes, Q, use_bdr); #else MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); @@ -151,7 +250,7 @@ MFMixedVectorCurlIntegrator::MFMixedVectorCurlIntegrator( const bool use_bdr) { #ifdef MFEM_USE_CEED - MixedVectorCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr, false); + MixedVectorCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr, true); Assemble(integ, info, trial_fes, test_fes, Q, use_bdr, true); #else MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); @@ -167,7 +266,7 @@ PAMixedVectorWeakCurlIntegrator::PAMixedVectorWeakCurlIntegrator( const bool use_bdr) { #ifdef MFEM_USE_CEED - MixedVectorCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr, true); + MixedVectorWeakCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr); Assemble(integ, info, trial_fes, test_fes, Q, use_bdr); #else MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); @@ -183,7 +282,7 @@ MFMixedVectorWeakCurlIntegrator::MFMixedVectorWeakCurlIntegrator( const bool use_bdr) { #ifdef MFEM_USE_CEED - MixedVectorCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr, true); + MixedVectorWeakCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr, true); Assemble(integ, info, trial_fes, test_fes, Q, use_bdr, true); #else MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); diff --git a/fem/ceed/integrators/mixedvecgrad/mixedvecgrad.cpp b/fem/ceed/integrators/mixedvecgrad/mixedvecgrad.cpp index 52fe6b66886..d485fb81987 100644 --- a/fem/ceed/integrators/mixedvecgrad/mixedvecgrad.cpp +++ b/fem/ceed/integrators/mixedvecgrad/mixedvecgrad.cpp @@ -23,94 +23,132 @@ namespace ceed { #ifdef MFEM_USE_CEED -struct MixedVectorGradientOperatorInfo : public OperatorInfo +struct MixedVectorGradientOperatorInfoBase : public OperatorInfo { - DiffusionContext ctx; + DiffusionContext ctx = {0}; template - MixedVectorGradientOperatorInfo(const mfem::FiniteElementSpace &trial_fes, - const mfem::FiniteElementSpace &test_fes, - CoeffType *Q, bool use_bdr, bool mixed_vector_grad) + MixedVectorGradientOperatorInfoBase(const mfem::FiniteElementSpace &trial_fes, + const mfem::FiniteElementSpace &test_fes, + CoeffType *Q, bool use_bdr = false, + bool use_mf = false) { + // Reuse H(curl) quadrature functions for DiffusionIntegrator MFEM_VERIFY(trial_fes.GetVDim() == 1 && test_fes.GetVDim() == 1, "libCEED interface for vector FE does not support VDim > 1!"); ctx.dim = trial_fes.GetMesh()->Dimension() - use_bdr; MFEM_VERIFY(ctx.dim == 2 || ctx.dim == 3, "MixedVectorGradientIntegrator and MixedVectorWeakDivergenceIntegrator " "require dim == 2 or dim == 3!"); - MFEM_VERIFY( - !mixed_vector_grad || - (trial_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_CURL && - test_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_CURL), - "libCEED interface for MixedVectorGradientIntegrator requires " - "H^1 domain and H(curl) range FE spaces!"); - MFEM_VERIFY( - mixed_vector_grad || - (trial_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_CURL && - test_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_CURL), - "libCEED interface for MixedVectorWeakDivergenceIntegrator requires " - "H(curl) domain and H^1 range FE spaces!"); ctx.space_dim = trial_fes.GetMesh()->SpaceDimension(); ctx.vdim = 1; + if (!use_mf) + { + apply_func = ":f_apply_diff"; + apply_qf = &f_apply_diff; + } + else + { + build_func = ""; + build_qf = nullptr; + } if (Q == nullptr) { - ctx.coeff_comp = 1; - ctx.coeff[0] = mixed_vector_grad ? 1.0 : -1.0; + ctx.coeff[0] = 1.0; + if (!use_mf) + { + build_func = ":f_build_diff_const_scalar"; + build_qf = &f_build_diff_const_scalar; + } + else + { + apply_func = ":f_apply_diff_mf_const_scalar"; + apply_qf = &f_apply_diff_mf_const_scalar; + } } else { - InitCoefficient(*Q, mixed_vector_grad ? 1.0 : -1.0); + InitCoefficient(*Q, use_mf); } - - // Reuse H(curl) quadrature functions for DiffusionIntegrator header = "/integrators/diffusion/diffusion_qf.h"; - build_func_const = ":f_build_diff_const"; - build_qf_const = &f_build_diff_const; - build_func_quad = ":f_build_diff_quad"; - build_qf_quad = &f_build_diff_quad; - apply_func = ":f_apply_diff"; - apply_qf = &f_apply_diff; - apply_func_mf_const = ":f_apply_diff_mf_const"; - apply_qf_mf_const = &f_apply_diff_mf_const; - apply_func_mf_quad = ":f_apply_diff_mf_quad"; - apply_qf_mf_quad = &f_apply_diff_mf_quad; - trial_op = mixed_vector_grad ? EvalMode::Grad : EvalMode::Interp; - test_op = mixed_vector_grad ? EvalMode::Interp : EvalMode::Grad; qdatasize = (ctx.dim * (ctx.dim + 1)) / 2; } - void InitCoefficient(mfem::Coefficient &Q, double sign) + void InitCoefficient(mfem::Coefficient &Q, bool use_mf) { - ctx.coeff_comp = 1; - if (ConstantCoefficient *const_coeff = - dynamic_cast(&Q)) + if (mfem::ConstantCoefficient *const_coeff = + dynamic_cast(&Q)) { - ctx.coeff[0] = sign * const_coeff->constant; + ctx.coeff[0] = const_coeff->constant; + if (!use_mf) + { + build_func = ":f_build_diff_const_scalar"; + build_qf = &f_build_diff_const_scalar; + } + else + { + apply_func = ":f_apply_diff_mf_const_scalar"; + apply_qf = &f_apply_diff_mf_const_scalar; + } + } + else + { + if (!use_mf) + { + build_func = ":f_build_diff_quad_scalar"; + build_qf = &f_build_diff_quad_scalar; + } + else + { + apply_func = ":f_apply_diff_mf_quad_scalar"; + apply_qf = &f_apply_diff_mf_quad_scalar; + } } } - void InitCoefficient(mfem::VectorCoefficient &VQ, double sign) + void InitCoefficient(mfem::VectorCoefficient &VQ, bool use_mf) { - const int vdim = VQ.GetVDim(); - ctx.coeff_comp = vdim; - if (VectorConstantCoefficient *const_coeff = - dynamic_cast(&VQ)) + if (mfem::VectorConstantCoefficient *const_coeff = + dynamic_cast(&VQ)) { - MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_DIFF_COEFF_COMP_MAX, + const int vdim = VQ.GetVDim(); + MFEM_VERIFY(vdim <= LIBCEED_DIFF_COEFF_COMP_MAX, "VectorCoefficient dimension exceeds context storage!"); const mfem::Vector &val = const_coeff->GetVec(); for (int i = 0; i < vdim; i++) { - ctx.coeff[i] = sign * val[i]; + ctx.coeff[i] = val[i]; + } + if (!use_mf) + { + build_func = ":f_build_diff_const_vector"; + build_qf = &f_build_diff_const_vector; + } + else + { + apply_func = ":f_apply_diff_mf_const_vector"; + apply_qf = &f_apply_diff_mf_const_vector; + } + } + else + { + if (!use_mf) + { + build_func = ":f_build_diff_quad_vector"; + build_qf = &f_build_diff_quad_vector; + } + else + { + apply_func = ":f_apply_diff_mf_quad_vector"; + apply_qf = &f_apply_diff_mf_quad_vector; } } } - void InitCoefficient(mfem::MatrixCoefficient &MQ, double sign) + void InitCoefficient(mfem::MatrixCoefficient &MQ, bool use_mf) { // Assumes matrix coefficient is symmetric - const int vdim = MQ.GetVDim(); - ctx.coeff_comp = (vdim * (vdim + 1)) / 2; - if (MatrixConstantCoefficient *const_coeff = - dynamic_cast(&MQ)) + if (mfem::MatrixConstantCoefficient *const_coeff = + dynamic_cast(&MQ)) { - MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_DIFF_COEFF_COMP_MAX, + const int vdim = MQ.GetVDim(); + MFEM_VERIFY((vdim * (vdim + 1)) / 2 <= LIBCEED_DIFF_COEFF_COMP_MAX, "MatrixCoefficient dimensions exceed context storage!"); const mfem::DenseMatrix &val = const_coeff->GetMatrix(); for (int j = 0; j < vdim; j++) @@ -118,9 +156,76 @@ struct MixedVectorGradientOperatorInfo : public OperatorInfo for (int i = j; i < vdim; i++) { const int idx = (j * vdim) - (((j - 1) * j) / 2) + i - j; - ctx.coeff[idx] = sign * val(i, j); + ctx.coeff[idx] = val(i, j); } } + if (!use_mf) + { + build_func = ":f_build_diff_const_matrix"; + build_qf = &f_build_diff_const_matrix; + } + else + { + apply_func = ":f_apply_diff_mf_const_matrix"; + apply_qf = &f_apply_diff_mf_const_matrix; + } + } + else + { + if (!use_mf) + { + build_func = ":f_build_diff_quad_matrix"; + build_qf = &f_build_diff_quad_matrix; + } + else + { + apply_func = ":f_apply_diff_mf_quad_matrix"; + apply_qf = &f_apply_diff_mf_quad_matrix; + } + } + } +}; + +struct MixedVectorGradientOperatorInfo : + public MixedVectorGradientOperatorInfoBase +{ + template + MixedVectorGradientOperatorInfo(const mfem::FiniteElementSpace &trial_fes, + const mfem::FiniteElementSpace &test_fes, + CoeffType *Q, bool use_bdr = false, + bool use_mf = false) + : MixedVectorGradientOperatorInfoBase(trial_fes, test_fes, Q, use_bdr, use_mf) + { + MFEM_VERIFY( + (trial_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_CURL && + test_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_CURL), + "libCEED interface for MixedVectorGradientIntegrator requires " + "H^1 domain and H(curl) range FE spaces!"); + trial_op = EvalMode::Grad; + test_op = EvalMode::Interp; + } +}; + +struct MixedVectorWeakDivergenceOperatorInfo : + public MixedVectorGradientOperatorInfoBase +{ + template + MixedVectorWeakDivergenceOperatorInfo(const mfem::FiniteElementSpace &trial_fes, + const mfem::FiniteElementSpace &test_fes, + CoeffType *Q, bool use_bdr = false, + bool use_mf = false) + : MixedVectorGradientOperatorInfoBase(trial_fes, test_fes, Q, use_bdr, use_mf) + { + MFEM_VERIFY( + (trial_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_CURL && + test_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_CURL), + "libCEED interface for MixedVectorWeakDivergenceIntegrator requires " + "H(curl) domain and H^1 range FE spaces!"); + trial_op = EvalMode::Interp; + test_op = EvalMode::Grad; + for (int i = 0; i < LIBCEED_DIFF_COEFF_COMP_MAX; i++) + { + ctx.coeff[i] *= -1.0; } } }; @@ -135,7 +240,7 @@ PAMixedVectorGradientIntegrator::PAMixedVectorGradientIntegrator( const bool use_bdr) { #ifdef MFEM_USE_CEED - MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr, true); + MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr); Assemble(integ, info, trial_fes, test_fes, Q, use_bdr); #else MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); @@ -158,133 +263,79 @@ MFMixedVectorGradientIntegrator::MFMixedVectorGradientIntegrator( #endif } -template <> -PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator( - const mfem::MixedVectorWeakDivergenceIntegrator &integ, - const mfem::FiniteElementSpace &trial_fes, - const mfem::FiniteElementSpace &test_fes, - mfem::Coefficient *Q, - const bool use_bdr) +namespace { + #ifdef MFEM_USE_CEED - MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr, false); - if (Q) - { - // Does not inherit ownership of old Q - Q = new ProductCoefficient(-1.0, *Q); - } - Assemble(integ, info, trial_fes, test_fes, Q, use_bdr); - delete Q; -#else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -#endif +mfem::Coefficient *NegativeCoeff(mfem::Coefficient &Q) +{ + return (dynamic_cast(&Q) != nullptr) ? + nullptr : new mfem::ProductCoefficient(-1.0, Q); } -template <> -PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator( - const mfem::MixedVectorWeakDivergenceIntegrator &integ, - const mfem::FiniteElementSpace &trial_fes, - const mfem::FiniteElementSpace &test_fes, - mfem::VectorCoefficient *Q, - const bool use_bdr) +mfem::VectorCoefficient *NegativeCoeff(mfem::VectorCoefficient &Q) { -#ifdef MFEM_USE_CEED - MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr, false); - if (Q) - { - // Does not inherit ownership of old Q - Q = new ScalarVectorProductCoefficient(-1.0, *Q); - } - Assemble(integ, info, trial_fes, test_fes, Q, use_bdr); - delete Q; -#else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -#endif + return (dynamic_cast(&Q) != nullptr) ? + nullptr : new mfem::ScalarVectorProductCoefficient(-1.0, Q); } -template <> -PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator( - const mfem::MixedVectorWeakDivergenceIntegrator &integ, - const mfem::FiniteElementSpace &trial_fes, - const mfem::FiniteElementSpace &test_fes, - mfem::MatrixCoefficient *Q, - const bool use_bdr) +mfem::MatrixCoefficient *NegativeCoeff(mfem::MatrixCoefficient &Q) { -#ifdef MFEM_USE_CEED - MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr, false); - if (Q) - { - // Does not inherit ownership of old Q - Q = new ScalarMatrixProductCoefficient(-1.0, *Q); - } - Assemble(integ, info, trial_fes, test_fes, Q, use_bdr); - delete Q; -#else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -#endif + return (dynamic_cast(&Q) != nullptr) ? + nullptr : new mfem::ScalarMatrixProductCoefficient(-1.0, Q); } +#endif -template <> -MFMixedVectorWeakDivergenceIntegrator::MFMixedVectorWeakDivergenceIntegrator( +} // namespace + +template +PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator( const mfem::MixedVectorWeakDivergenceIntegrator &integ, const mfem::FiniteElementSpace &trial_fes, const mfem::FiniteElementSpace &test_fes, - mfem::Coefficient *Q, + CoeffType *Q, const bool use_bdr) { #ifdef MFEM_USE_CEED - MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr, false); + MixedVectorWeakDivergenceOperatorInfo info(trial_fes, test_fes, Q, use_bdr); if (Q) { // Does not inherit ownership of old Q - Q = new ProductCoefficient(-1.0, *Q); + auto *nQ = NegativeCoeff(*Q); + Assemble(integ, info, trial_fes, test_fes, nQ, use_bdr); + delete nQ; } - Assemble(integ, info, trial_fes, test_fes, Q, use_bdr, true); - delete Q; -#else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -#endif -} - -template <> -MFMixedVectorWeakDivergenceIntegrator::MFMixedVectorWeakDivergenceIntegrator( - const mfem::MixedVectorWeakDivergenceIntegrator &integ, - const mfem::FiniteElementSpace &trial_fes, - const mfem::FiniteElementSpace &test_fes, - mfem::VectorCoefficient *Q, - const bool use_bdr) -{ -#ifdef MFEM_USE_CEED - MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr, false); - if (Q) + else { - // Does not inherit ownership of old Q - Q = new ScalarVectorProductCoefficient(-1.0, *Q); + Assemble(integ, info, trial_fes, test_fes, Q, use_bdr); } - Assemble(integ, info, trial_fes, test_fes, Q, use_bdr, true); - delete Q; #else MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); #endif } -template <> +template MFMixedVectorWeakDivergenceIntegrator::MFMixedVectorWeakDivergenceIntegrator( const mfem::MixedVectorWeakDivergenceIntegrator &integ, const mfem::FiniteElementSpace &trial_fes, const mfem::FiniteElementSpace &test_fes, - mfem::MatrixCoefficient *Q, + CoeffType *Q, const bool use_bdr) { #ifdef MFEM_USE_CEED - MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr, false); + MixedVectorWeakDivergenceOperatorInfo info(trial_fes, test_fes, Q, use_bdr, + true); if (Q) { // Does not inherit ownership of old Q - Q = new ScalarMatrixProductCoefficient(-1.0, *Q); + auto *nQ = NegativeCoeff(*Q); + Assemble(integ, info, trial_fes, test_fes, nQ, use_bdr, true); + delete nQ; + } + else + { + Assemble(integ, info, trial_fes, test_fes, Q, use_bdr, true); } - Assemble(integ, info, trial_fes, test_fes, Q, use_bdr, true); - delete Q; #else MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); #endif @@ -302,6 +353,19 @@ template PAMixedVectorGradientIntegrator::PAMixedVectorGradientIntegrator( const mfem::MixedVectorGradientIntegrator &, const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &, mfem::MatrixCoefficient *, const bool); +template PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator( + const mfem::MixedVectorWeakDivergenceIntegrator &, + const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &, + mfem::Coefficient *, const bool); +template PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator( + const mfem::MixedVectorWeakDivergenceIntegrator &, + const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &, + mfem::VectorCoefficient *, const bool); +template PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator( + const mfem::MixedVectorWeakDivergenceIntegrator &, + const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &, + mfem::MatrixCoefficient *, const bool); + template MFMixedVectorGradientIntegrator::MFMixedVectorGradientIntegrator( const mfem::MixedVectorGradientIntegrator &, const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &, mfem::Coefficient *, const bool); @@ -312,6 +376,19 @@ template MFMixedVectorGradientIntegrator::MFMixedVectorGradientIntegrator( const mfem::MixedVectorGradientIntegrator &, const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &, mfem::MatrixCoefficient *, const bool); +template MFMixedVectorWeakDivergenceIntegrator::MFMixedVectorWeakDivergenceIntegrator( + const mfem::MixedVectorWeakDivergenceIntegrator &, + const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &, + mfem::Coefficient *, const bool); +template MFMixedVectorWeakDivergenceIntegrator::MFMixedVectorWeakDivergenceIntegrator( + const mfem::MixedVectorWeakDivergenceIntegrator &, + const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &, + mfem::VectorCoefficient *, const bool); +template MFMixedVectorWeakDivergenceIntegrator::MFMixedVectorWeakDivergenceIntegrator( + const mfem::MixedVectorWeakDivergenceIntegrator &, + const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &, + mfem::MatrixCoefficient *, const bool); + // @endcond } // namespace ceed diff --git a/fem/ceed/integrators/nlconvection/nlconvection.cpp b/fem/ceed/integrators/nlconvection/nlconvection.cpp index b71af29788d..c285051eed6 100644 --- a/fem/ceed/integrators/nlconvection/nlconvection.cpp +++ b/fem/ceed/integrators/nlconvection/nlconvection.cpp @@ -25,35 +25,68 @@ namespace ceed #ifdef MFEM_USE_CEED struct NLConvectionOperatorInfo : public OperatorInfo { - NLConvectionContext ctx; + NLConvectionContext ctx = {0}; NLConvectionOperatorInfo(const mfem::FiniteElementSpace &fes, - mfem::Coefficient *Q, bool use_bdr) + mfem::Coefficient *Q, bool use_bdr = false, + bool use_mf = false) { MFEM_VERIFY(fes.GetVDim() == fes.GetMesh()->SpaceDimension(), "Missing coefficient in ceed::NLConvectionOperatorInfo!"); ctx.dim = fes.GetMesh()->Dimension() - use_bdr; ctx.space_dim = fes.GetMesh()->SpaceDimension(); + if (!use_mf) + { + apply_func = ":f_apply_conv"; + apply_qf = &f_apply_conv; + } + else + { + build_func = ""; + build_qf = nullptr; + } if (Q == nullptr) { ctx.coeff = 1.0; + if (!use_mf) + { + build_func = ":f_build_conv_const"; + build_qf = &f_build_conv_const; + } + else + { + apply_func = ":f_apply_conv_mf_const"; + apply_qf = &f_apply_conv_mf_const; + } } - else if (ConstantCoefficient *const_coeff = - dynamic_cast(Q)) + else if (mfem::ConstantCoefficient *const_coeff = + dynamic_cast(Q)) { ctx.coeff = const_coeff->constant; + if (!use_mf) + { + build_func = ":f_build_conv_const"; + build_qf = &f_build_conv_const; + } + else + { + apply_func = ":f_apply_conv_mf_const"; + apply_qf = &f_apply_conv_mf_const; + } + } + else + { + if (!use_mf) + { + build_func = ":f_build_conv_quad"; + build_qf = &f_build_conv_quad; + } + else + { + apply_func = ":f_apply_conv_mf_quad"; + apply_qf = &f_apply_conv_mf_quad; + } } - header = "/integrators/nlconvection/nlconvection_qf.h"; - build_func_const = ":f_build_conv_const"; - build_qf_const = &f_build_conv_const; - build_func_quad = ":f_build_conv_quad"; - build_qf_quad = &f_build_conv_quad; - apply_func = ":f_apply_conv"; - apply_qf = &f_apply_conv; - apply_func_mf_const = ":f_apply_conv_mf_const"; - apply_qf_mf_const = &f_apply_conv_mf_const; - apply_func_mf_quad = ":f_apply_conv_mf_quad"; - apply_qf_mf_quad = &f_apply_conv_mf_quad; trial_op = EvalMode::InterpAndGrad; test_op = EvalMode::Interp; qdatasize = ctx.dim * ctx.space_dim; @@ -82,7 +115,7 @@ MFVectorConvectionNLIntegrator::MFVectorConvectionNLIntegrator( const bool use_bdr) { #ifdef MFEM_USE_CEED - NLConvectionOperatorInfo info(fes, Q, use_bdr); + NLConvectionOperatorInfo info(fes, Q, use_bdr, true); Assemble(integ, info, fes, Q, use_bdr, true); #else MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); diff --git a/fem/ceed/integrators/nlconvection/nlconvection_qf.h b/fem/ceed/integrators/nlconvection/nlconvection_qf.h index 227971c1fae..ee1782784b1 100644 --- a/fem/ceed/integrators/nlconvection/nlconvection_qf.h +++ b/fem/ceed/integrators/nlconvection/nlconvection_qf.h @@ -14,15 +14,14 @@ #include "../util/util_qf.h" -/// A structure used to pass additional data to f_build_conv and f_apply_conv struct NLConvectionContext { CeedInt dim, space_dim; CeedScalar coeff; }; -/// libCEED QFunction for building quadrature data for a convection operator -/// with a constant coefficient +/// libCEED QFunction for building quadrature data for a convection +/// operator with a constant coefficient CEED_QFUNCTION(f_build_conv_const)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) @@ -31,7 +30,7 @@ CEED_QFUNCTION(f_build_conv_const)(void *ctx, CeedInt Q, // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[1] is quadrature weights, size (Q) // - // At every quadrature point, compute and store qw * c * adj(J)^T. + // At every quadrature point, compute and store qw * c * adj(J)^T const CeedScalar coeff = bc->coeff; const CeedScalar *J = in[0], *qw = in[1]; CeedScalar *qd = out[0]; @@ -71,8 +70,8 @@ CEED_QFUNCTION(f_build_conv_const)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for building quadrature data for a convection operator -/// with a coefficient evaluated at quadrature points. +/// libCEED QFunction for building quadrature data for a convection +/// operator with a coefficient evaluated at quadrature points CEED_QFUNCTION(f_build_conv_quad)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) @@ -82,7 +81,7 @@ CEED_QFUNCTION(f_build_conv_quad)(void *ctx, CeedInt Q, // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[2] is quadrature weights, size (Q) // - // At every quadrature point, compute and store qw * c * adj(J)^T. + // At every quadrature point, compute and store qw * c * adj(J)^T const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; CeedScalar *qd = out[0]; switch (10 * bc->space_dim + bc->dim) @@ -121,7 +120,7 @@ CEED_QFUNCTION(f_build_conv_quad)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for applying a conv operator +/// libCEED QFunction for applying a convection operator CEED_QFUNCTION(f_apply_conv)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) @@ -251,7 +250,8 @@ CEED_QFUNCTION(f_apply_conv)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for applying a conv operator +/// libCEED QFunction for applying a convection operator with a constant +/// coefficient CEED_QFUNCTION(f_apply_conv_mf_const)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) @@ -263,7 +263,7 @@ CEED_QFUNCTION(f_apply_conv_mf_const)(void *ctx, CeedInt Q, // in[3] is quadrature weights, size (Q) // out[0] has shape [ncomp=space_dim, Q] // - // At every quadrature point, compute qw * c * adj(J)^T. + // At every quadrature point, compute qw * c * adj(J)^T const CeedScalar coeff = bc->coeff; const CeedScalar *u = in[0], *ug = in[1], *J = in[2], *qw = in[3]; CeedScalar *vg = out[0]; @@ -375,7 +375,8 @@ CEED_QFUNCTION(f_apply_conv_mf_const)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for applying a conv operator +/// libCEED QFunction for applying a convection operator with a coefficient +/// evaluated at quadrature points CEED_QFUNCTION(f_apply_conv_mf_quad)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) @@ -388,7 +389,7 @@ CEED_QFUNCTION(f_apply_conv_mf_quad)(void *ctx, CeedInt Q, // in[4] is quadrature weights, size (Q) // out[0] has shape [ncomp=space_dim, Q] // - // At every quadrature point, compute qw * c * adj(J)^T. + // At every quadrature point, compute qw * c * adj(J)^T const CeedScalar *u = in[0], *ug = in[1], *c = in[2], *J = in[3], *qw = in[4]; CeedScalar *vg = out[0]; switch (10 * bc->space_dim + bc->dim) diff --git a/fem/ceed/integrators/util/util_qf.h b/fem/ceed/integrators/util/util_qf.h index beb95cd3c0e..fa7ca763b29 100644 --- a/fem/ceed/integrators/util/util_qf.h +++ b/fem/ceed/integrators/util/util_qf.h @@ -73,7 +73,7 @@ CEED_QFUNCTION_HELPER void MultAdjJCAdjJt22(const CeedScalar *J, const CeedInt qd_stride, CeedScalar *qd) { - // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result. + // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result // J: 0 2 adj(J): J22 -J12 qd: 0 1 // 1 3 -J21 J11 1 2 const CeedScalar J11 = J[J_stride * 0]; @@ -122,7 +122,7 @@ CEED_QFUNCTION_HELPER void MultAdjJCAdjJt21(const CeedScalar *J, const CeedInt qd_stride, CeedScalar *qd) { - // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result. + // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result // J: 0 adj(J): 1/sqrt(J^T J) J^T qd: 0 // 1 const CeedScalar J11 = J[J_stride * 0]; @@ -160,7 +160,7 @@ CEED_QFUNCTION_HELPER void MultAdjJCAdjJt33(const CeedScalar *J, const CeedInt qd_stride, CeedScalar *qd) { - // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result. + // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result // J: 0 3 6 qd: 0 1 2 // 1 4 7 1 3 4 // 2 5 8 2 4 5 @@ -273,7 +273,7 @@ CEED_QFUNCTION_HELPER void MultAdjJCAdjJt32(const CeedScalar *J, const CeedInt qd_stride, CeedScalar *qd) { - // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result. + // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result // J: 0 3 qd: 0 1 // 1 4 1 2 // 2 5 @@ -373,7 +373,7 @@ CEED_QFUNCTION_HELPER void MultJtCJ22(const CeedScalar *J, const CeedInt qd_stride, CeedScalar *qd) { - // compute qw/det(J) J^T C J and store the symmetric part of the result. + // compute qw/det(J) J^T C J and store the symmetric part of the result // J: 0 2 qd: 0 1 // 1 3 1 2 const CeedScalar J11 = J[J_stride * 0]; @@ -422,7 +422,7 @@ CEED_QFUNCTION_HELPER void MultJtCJ21(const CeedScalar *J, const CeedInt qd_stride, CeedScalar *qd) { - // compute qw/det(J) J^T C J and store the symmetric part of the result. + // compute qw/det(J) J^T C J and store the symmetric part of the result // J: 0 qd: 0 // 1 const CeedScalar J11 = J[J_stride * 0]; @@ -460,7 +460,7 @@ CEED_QFUNCTION_HELPER void MultJtCJ33(const CeedScalar *J, const CeedInt qd_stride, CeedScalar *qd) { - // compute qw/det(J) J^T C J and store the symmetric part of the result. + // compute qw/det(J) J^T C J and store the symmetric part of the result // J: 0 3 6 qd: 0 1 2 // 1 4 7 1 3 4 // 2 5 8 2 4 5 @@ -566,7 +566,7 @@ CEED_QFUNCTION_HELPER void MultJtCJ32(const CeedScalar *J, const CeedInt qd_stride, CeedScalar *qd) { - // compute qw/det(J) J^T C J and store the symmetric part of the result. + // compute qw/det(J) J^T C J and store the symmetric part of the result // J: 0 3 qd: 0 1 // 1 4 1 2 // 2 5 @@ -639,7 +639,7 @@ CEED_QFUNCTION_HELPER void MultCtAdjJt22(const CeedScalar *J, const CeedInt qd_stride, CeedScalar *qd) { - // compute qw c^T adj(J)^T and store the result vector. + // compute qw c^T adj(J)^T and store the result vector // J: 0 2 adj(J): J22 -J12 // 1 3 -J21 J11 const CeedScalar J11 = J[J_stride * 0]; @@ -660,7 +660,7 @@ CEED_QFUNCTION_HELPER void MultCtAdjJt21(const CeedScalar *J, const CeedInt qd_stride, CeedScalar *qd) { - // compute qw c^T adj(J)^T and store the result vector. + // compute qw c^T adj(J)^T and store the result vector // J: 0 adj(J): 1/sqrt(J^T J) J^T // 1 const CeedScalar J11 = J[J_stride * 0]; @@ -679,7 +679,7 @@ CEED_QFUNCTION_HELPER void MultCtAdjJt33(const CeedScalar *J, const CeedInt qd_stride, CeedScalar *qd) { - // compute qw c^T adj(J)^T and store the result vector. + // compute qw c^T adj(J)^T and store the result vector // J: 0 3 6 // 1 4 7 // 2 5 8 @@ -717,7 +717,7 @@ CEED_QFUNCTION_HELPER void MultCtAdjJt32(const CeedScalar *J, const CeedInt qd_stride, CeedScalar *qd) { - // compute qw c^T adj(J)^T and store the result vector. + // compute qw c^T adj(J)^T and store the result vector // J: 0 3 // 1 4 // 2 5 @@ -750,7 +750,7 @@ CEED_QFUNCTION_HELPER void MultAdjJt22(const CeedScalar *J, const CeedInt qd_stride, CeedScalar *qd) { - // compute qw adj(J)^T and store the result matrix. + // compute qw adj(J)^T and store the result matrix // J: 0 2 adj(J): J22 -J12 qd: 0 2 // 1 3 -J21 J11 1 3 const CeedScalar J11 = J[J_stride * 0]; @@ -769,7 +769,7 @@ CEED_QFUNCTION_HELPER void MultAdjJt21(const CeedScalar *J, const CeedInt qd_stride, CeedScalar *qd) { - // compute qw adj(J)^T and store the result matrix. + // compute qw adj(J)^T and store the result matrix // J: 0 adj(J): 1/sqrt(J^T J) J^T qd: 0 // 1 1 const CeedScalar J11 = J[J_stride * 0]; @@ -785,7 +785,7 @@ CEED_QFUNCTION_HELPER void MultAdjJt33(const CeedScalar *J, const CeedInt qd_stride, CeedScalar *qd) { - // compute qw adj(J)^T and store the result matrix. + // compute qw adj(J)^T and store the result matrix // J: 0 3 6 qd: 0 3 6 // 1 4 7 1 4 7 // 2 5 8 2 5 8 @@ -824,7 +824,7 @@ CEED_QFUNCTION_HELPER void MultAdjJt32(const CeedScalar *J, const CeedInt qd_stride, CeedScalar *qd) { - // compute qw adj(J)^T and store the result matrix. + // compute qw adj(J)^T and store the result matrix // J: 0 3 qd: 0 3 // 1 4 1 4 // 2 5 2 5 diff --git a/fem/ceed/integrators/vecfemass/vecfemass.cpp b/fem/ceed/integrators/vecfemass/vecfemass.cpp index 5cbb043e643..32df1e322b0 100644 --- a/fem/ceed/integrators/vecfemass/vecfemass.cpp +++ b/fem/ceed/integrators/vecfemass/vecfemass.cpp @@ -25,92 +25,150 @@ namespace ceed #ifdef MFEM_USE_CEED struct VectorFEMassOperatorInfo : public OperatorInfo { - VectorFEMassContext ctx; + VectorFEMassContext ctx = {0}; template VectorFEMassOperatorInfo(const mfem::FiniteElementSpace &fes, CoeffType *Q, - bool use_bdr) + bool use_bdr = false, bool use_mf = false) { MFEM_VERIFY(fes.GetVDim() == 1, "libCEED interface for vector FE does not support VDim > 1!"); ctx.dim = fes.GetMesh()->Dimension() - use_bdr; ctx.space_dim = fes.GetMesh()->SpaceDimension(); - if (Q == nullptr) + bool is_hdiv = (fes.FEColl()->GetMapType(ctx.dim) == + mfem::FiniteElement::H_DIV); + MFEM_VERIFY(is_hdiv || + fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_CURL, + "VectorFEMassIntegrator requires H(div) or H(curl) FE space!"); + if (!use_mf) { - ctx.coeff_comp = 1; - ctx.coeff[0] = 1.0; + apply_func = ":f_apply_vecfemass"; + apply_qf = &f_apply_vecfemass; } else { - InitCoefficient(*Q); + build_func = ""; + build_qf = nullptr; } - - header = "/integrators/vecfemass/vecfemass_qf.h"; - if (fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_DIV) + if (Q == nullptr) { - build_func_const = ":f_build_hdivmass_const"; - build_qf_const = &f_build_hdivmass_const; - build_func_quad = ":f_build_hdivmass_quad"; - build_qf_quad = &f_build_hdivmass_quad; - apply_func = ":f_apply_vecfemass"; - apply_qf = &f_apply_vecfemass; - apply_func_mf_const = ":f_apply_hdivmass_mf_const"; - apply_qf_mf_const = &f_apply_hdivmass_mf_const; - apply_func_mf_quad = ":f_apply_hdivmass_mf_quad"; - apply_qf_mf_quad = &f_apply_hdivmass_mf_quad; + ctx.coeff[0] = 1.0; + if (!use_mf) + { + build_func = is_hdiv ? ":f_build_hdivmass_const_scalar" : + ":f_build_hcurlmass_const_scalar"; + build_qf = is_hdiv ? &f_build_hdivmass_const_scalar : + &f_build_hcurlmass_const_scalar; + } + else + { + apply_func = is_hdiv ? ":f_apply_hdivmass_mf_const_scalar" : + ":f_apply_hcurlmass_mf_const_scalar"; + apply_qf = is_hdiv ? &f_apply_hdivmass_mf_const_scalar : + &f_apply_hcurlmass_mf_const_scalar; + } } else { - MFEM_VERIFY(fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_CURL, - "VectorFEMassIntegrator requires H(div) or H(curl) FE space!"); - build_func_const = ":f_build_hcurlmass_const"; - build_qf_const = &f_build_hcurlmass_const; - build_func_quad = ":f_build_hcurlmass_quad"; - build_qf_quad = &f_build_hcurlmass_quad; - apply_func = ":f_apply_vecfemass"; - apply_qf = &f_apply_vecfemass; - apply_func_mf_const = ":f_apply_hcurlmass_mf_const"; - apply_qf_mf_const = &f_apply_hcurlmass_mf_const; - apply_func_mf_quad = ":f_apply_hcurlmass_mf_quad"; - apply_qf_mf_quad = &f_apply_hcurlmass_mf_quad; + InitCoefficient(*Q, is_hdiv, use_mf); } + header = "/integrators/vecfemass/vecfemass_qf.h"; trial_op = EvalMode::Interp; test_op = EvalMode::Interp; qdatasize = (ctx.dim * (ctx.dim + 1)) / 2; } - void InitCoefficient(mfem::Coefficient &Q) + void InitCoefficient(mfem::Coefficient &Q, bool is_hdiv, bool use_mf) { - ctx.coeff_comp = 1; - if (ConstantCoefficient *const_coeff = - dynamic_cast(&Q)) + if (mfem::ConstantCoefficient *const_coeff = + dynamic_cast(&Q)) { ctx.coeff[0] = const_coeff->constant; + if (!use_mf) + { + build_func = is_hdiv ? ":f_build_hdivmass_const_scalar" : + ":f_build_hcurlmass_const_scalar"; + build_qf = is_hdiv ? &f_build_hdivmass_const_scalar : + &f_build_hcurlmass_const_scalar; + } + else + { + apply_func = is_hdiv ? ":f_apply_hdivmass_mf_const_scalar" : + ":f_apply_hcurlmass_mf_const_scalar"; + apply_qf = is_hdiv ? &f_apply_hdivmass_mf_const_scalar : + &f_apply_hcurlmass_mf_const_scalar; + } + } + else + { + if (!use_mf) + { + build_func = is_hdiv ? ":f_build_hdivmass_quad_scalar" : + ":f_build_hcurlmass_quad_scalar"; + build_qf = is_hdiv ? &f_build_hdivmass_quad_scalar : + &f_build_hcurlmass_quad_scalar; + } + else + { + apply_func = is_hdiv ? ":f_apply_hdivmass_mf_quad_scalar" : + ":f_apply_hcurlmass_mf_quad_scalar"; + apply_qf = is_hdiv ? &f_apply_hdivmass_mf_quad_scalar : + &f_apply_hcurlmass_mf_quad_scalar; + } } } - void InitCoefficient(mfem::VectorCoefficient &VQ) + void InitCoefficient(mfem::VectorCoefficient &VQ, bool is_hdiv, bool use_mf) { - const int vdim = VQ.GetVDim(); - ctx.coeff_comp = vdim; - if (VectorConstantCoefficient *const_coeff = - dynamic_cast(&VQ)) + if (mfem::VectorConstantCoefficient *const_coeff = + dynamic_cast(&VQ)) { - MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_VECFEMASS_COEFF_COMP_MAX, + const int vdim = VQ.GetVDim(); + MFEM_VERIFY(vdim <= LIBCEED_VECFEMASS_COEFF_COMP_MAX, "VectorCoefficient dimension exceeds context storage!"); const mfem::Vector &val = const_coeff->GetVec(); for (int i = 0; i < vdim; i++) { ctx.coeff[i] = val[i]; } + if (!use_mf) + { + build_func = is_hdiv ? ":f_build_hdivmass_const_vector" : + ":f_build_hcurlmass_const_vector"; + build_qf = is_hdiv ? &f_build_hdivmass_const_vector : + &f_build_hcurlmass_const_vector; + } + else + { + apply_func = is_hdiv ? ":f_apply_hdivmass_mf_const_vector" : + ":f_apply_hcurlmass_mf_const_vector"; + apply_qf = is_hdiv ? &f_apply_hdivmass_mf_const_vector : + &f_apply_hcurlmass_mf_const_vector; + } + } + else + { + if (!use_mf) + { + build_func = is_hdiv ? ":f_build_hdivmass_quad_vector" : + ":f_build_hcurlmass_quad_vector"; + build_qf = is_hdiv ? &f_build_hdivmass_quad_vector : + &f_build_hcurlmass_quad_vector; + } + else + { + apply_func = is_hdiv ? ":f_apply_hdivmass_mf_quad_vector" : + ":f_apply_hcurlmass_mf_quad_vector"; + apply_qf = is_hdiv ? &f_apply_hdivmass_mf_quad_vector : + &f_apply_hcurlmass_mf_quad_vector; + } } } - void InitCoefficient(mfem::MatrixCoefficient &MQ) + void InitCoefficient(mfem::MatrixCoefficient &MQ, bool is_hdiv, bool use_mf) { // Assumes matrix coefficient is symmetric - const int vdim = MQ.GetVDim(); - ctx.coeff_comp = (vdim * (vdim + 1)) / 2; - if (MatrixConstantCoefficient *const_coeff = - dynamic_cast(&MQ)) + if (mfem::MatrixConstantCoefficient *const_coeff = + dynamic_cast(&MQ)) { - MFEM_VERIFY(ctx.coeff_comp <= LIBCEED_VECFEMASS_COEFF_COMP_MAX, + const int vdim = MQ.GetVDim(); + MFEM_VERIFY((vdim * (vdim + 1)) / 2 <= LIBCEED_VECFEMASS_COEFF_COMP_MAX, "MatrixCoefficient dimensions exceed context storage!"); const mfem::DenseMatrix &val = const_coeff->GetMatrix(); for (int j = 0; j < vdim; j++) @@ -121,6 +179,37 @@ struct VectorFEMassOperatorInfo : public OperatorInfo ctx.coeff[idx] = val(i, j); } } + if (!use_mf) + { + build_func = is_hdiv ? ":f_build_hdivmass_const_matrix" : + ":f_build_hcurlmass_const_matrix"; + build_qf = is_hdiv ? &f_build_hdivmass_const_matrix : + &f_build_hcurlmass_const_matrix; + } + else + { + apply_func = is_hdiv ? ":f_apply_hdivmass_mf_const_matrix" : + ":f_apply_hcurlmass_mf_const_matrix"; + apply_qf = is_hdiv ? &f_apply_hdivmass_mf_const_matrix : + &f_apply_hcurlmass_mf_const_matrix; + } + } + else + { + if (!use_mf) + { + build_func = is_hdiv ? ":f_build_hdivmass_quad_matrix" : + ":f_build_hcurlmass_quad_matrix"; + build_qf = is_hdiv ? &f_build_hdivmass_quad_matrix : + &f_build_hcurlmass_quad_matrix; + } + else + { + apply_func = is_hdiv ? ":f_apply_hdivmass_mf_quad_matrix" : + ":f_apply_hcurlmass_mf_quad_matrix"; + apply_qf = is_hdiv ? &f_apply_hdivmass_mf_quad_matrix : + &f_apply_hcurlmass_mf_quad_matrix; + } } } }; @@ -149,7 +238,7 @@ MFVectorFEMassIntegrator::MFVectorFEMassIntegrator( const bool use_bdr) { #ifdef MFEM_USE_CEED - VectorFEMassOperatorInfo info(fes, Q, use_bdr); + VectorFEMassOperatorInfo info(fes, Q, use_bdr, true); Assemble(integ, info, fes, Q, use_bdr, true); #else MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); diff --git a/fem/ceed/integrators/vecfemass/vecfemass_qf.h b/fem/ceed/integrators/vecfemass/vecfemass_qf.h index a921a580404..571316ba821 100644 --- a/fem/ceed/integrators/vecfemass/vecfemass_qf.h +++ b/fem/ceed/integrators/vecfemass/vecfemass_qf.h @@ -16,19 +16,17 @@ #define LIBCEED_VECFEMASS_COEFF_COMP_MAX 6 -/// A structure used to pass additional data to f_build_vecfemass and -/// f_apply_vecfemass struct VectorFEMassContext { - CeedInt dim, space_dim, coeff_comp; + CeedInt dim, space_dim; CeedScalar coeff[LIBCEED_VECFEMASS_COEFF_COMP_MAX]; }; -/// libCEED QFunction for building quadrature data for a vector FE mass -/// operator with a constant coefficient -CEED_QFUNCTION(f_build_hdivmass_const)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) +/// libCEED QFunction for building quadrature data for an H(div) mass operator +/// with a scalar constant coefficient +CEED_QFUNCTION(f_build_hdivmass_const_scalar)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { VectorFEMassContext *bc = (VectorFEMassContext *)ctx; // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] @@ -36,98 +34,144 @@ CEED_QFUNCTION(f_build_hdivmass_const)(void *ctx, CeedInt Q, // // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part - // of the result. + // of the result const CeedScalar *coeff = bc->coeff; const CeedScalar *J = in[0], *qw = in[1]; CeedScalar *qd = out[0]; - switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp) + switch (10 * bc->space_dim + bc->dim) { - case 111: + case 11: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { const CeedScalar coeff0 = coeff[0]; qd[i] = qw[i] * coeff0 * J[i]; } break; - case 213: + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ21(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); + MultJtCJ21(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); } break; - case 212: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ21(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); + MultJtCJ22(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); } break; - case 211: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ21(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); + MultJtCJ32(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); } break; - case 223: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ22(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); + MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); } break; - case 222: + } + return 0; +} + +/// libCEED QFunction for building quadrature data for an H(div) mass operator +/// with a vector constant coefficient +CEED_QFUNCTION(f_build_hdivmass_const_vector)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + VectorFEMassContext *bc = (VectorFEMassContext *)ctx; + // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[1] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for + // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part + // of the result + const CeedScalar *coeff = bc->coeff; + const CeedScalar *J = in[0], *qw = in[1]; + CeedScalar *qd = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ22(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); + MultJtCJ21(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); } break; - case 221: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ22(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); + MultJtCJ22(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); } break; - case 326: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ32(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); + MultJtCJ32(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); } break; - case 323: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ32(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); + MultJtCJ33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); } break; - case 321: + } + return 0; +} + +/// libCEED QFunction for building quadrature data for an H(div) mass operator +/// with a matrix constant coefficient +CEED_QFUNCTION(f_build_hdivmass_const_matrix)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + VectorFEMassContext *bc = (VectorFEMassContext *)ctx; + // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[1] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for + // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part + // of the result + const CeedScalar *coeff = bc->coeff; + const CeedScalar *J = in[0], *qw = in[1]; + CeedScalar *qd = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ32(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); + MultJtCJ21(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); } break; - case 336: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); + MultJtCJ22(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); } break; - case 333: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); + MultJtCJ32(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); } break; - case 331: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); + MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); } break; } return 0; } -CEED_QFUNCTION(f_build_hcurlmass_const)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) +/// libCEED QFunction for building quadrature data for an H(curl) mass operator +/// with a scalar constant coefficient +CEED_QFUNCTION(f_build_hcurlmass_const_scalar)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { VectorFEMassContext *bc = (VectorFEMassContext *)ctx; // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] @@ -135,287 +179,421 @@ CEED_QFUNCTION(f_build_hcurlmass_const)(void *ctx, CeedInt Q, // // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part - // of the result. + // of the result const CeedScalar *coeff = bc->coeff; const CeedScalar *J = in[0], *qw = in[1]; CeedScalar *qd = out[0]; - switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp) + switch (10 * bc->space_dim + bc->dim) { - case 111: + case 11: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { const CeedScalar coeff0 = coeff[0]; qd[i] = qw[i] * coeff0 / J[i]; } break; - case 213: + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); + MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); } break; - case 212: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); + MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); } break; - case 211: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); + MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); } break; - case 223: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); + MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); } break; - case 222: + } + return 0; +} + +/// libCEED QFunction for building quadrature data for an H(curl) mass operator +/// with a vector constant coefficient +CEED_QFUNCTION(f_build_hcurlmass_const_vector)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + VectorFEMassContext *bc = (VectorFEMassContext *)ctx; + // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[1] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for + // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part + // of the result + const CeedScalar *coeff = bc->coeff; + const CeedScalar *J = in[0], *qw = in[1]; + CeedScalar *qd = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt22(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); + MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); } break; - case 221: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); + MultAdjJCAdjJt22(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); } break; - case 326: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); + MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); } break; - case 323: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); + MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); } break; - case 321: + } + return 0; +} + +/// libCEED QFunction for building quadrature data for an H(curl) mass operator +/// with a matrix constant coefficient +CEED_QFUNCTION(f_build_hcurlmass_const_matrix)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + VectorFEMassContext *bc = (VectorFEMassContext *)ctx; + // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[1] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for + // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part + // of the result + const CeedScalar *coeff = bc->coeff; + const CeedScalar *J = in[0], *qw = in[1]; + CeedScalar *qd = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); + MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); } break; - case 336: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); + MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); } break; - case 333: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); + MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); } break; - case 331: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); + MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); } break; } return 0; } -/// libCEED QFunction for building quadrature data for a vector FE mass -/// operator with a coefficient evaluated at quadrature points. -CEED_QFUNCTION(f_build_hdivmass_quad)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) +/// libCEED QFunction for building quadrature data for an H(div) mass operator +/// with a scalar coefficient evaluated at quadrature points +CEED_QFUNCTION(f_build_hdivmass_quad_scalar)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { VectorFEMassContext *bc = (VectorFEMassContext *)ctx; - // in[0] is coefficients with shape [ncomp=coeff_comp, Q] + // in[0] is coefficients with shape [ncomp=1, Q] // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[2] is quadrature weights, size (Q) // // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part - // of the result. + // of the result const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; CeedScalar *qd = out[0]; - switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp) + switch (10 * bc->space_dim + bc->dim) { - case 111: + case 11: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { qd[i] = qw[i] * c[i] * J[i]; } break; - case 213: + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); + MultJtCJ21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); } break; - case 212: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); + MultJtCJ22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); } break; - case 211: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); + MultJtCJ32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); } break; - case 223: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); + MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); } break; - case 222: + } + return 0; +} + +/// libCEED QFunction for building quadrature data for an H(div) mass operator +/// with a vector coefficient evaluated at quadrature points +CEED_QFUNCTION(f_build_hdivmass_quad_vector)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + VectorFEMassContext *bc = (VectorFEMassContext *)ctx; + // in[0] is coefficients with shape [ncomp=space_dim, Q] + // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[2] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for + // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part + // of the result + const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; + CeedScalar *qd = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); + MultJtCJ21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); } break; - case 221: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); + MultJtCJ22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); } break; - case 326: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); + MultJtCJ32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); } break; - case 323: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); + MultJtCJ33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); } break; - case 321: + } + return 0; +} + +/// libCEED QFunction for building quadrature data for an H(div) mass operator +/// with a matrix coefficient evaluated at quadrature points +CEED_QFUNCTION(f_build_hdivmass_quad_matrix)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + VectorFEMassContext *bc = (VectorFEMassContext *)ctx; + // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q] + // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[2] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for + // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part + // of the result + const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; + CeedScalar *qd = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); + MultJtCJ21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); } break; - case 336: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); + MultJtCJ22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); } break; - case 333: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); + MultJtCJ32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); } break; - case 331: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); + MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); } break; } return 0; } -CEED_QFUNCTION(f_build_hcurlmass_quad)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) +/// libCEED QFunction for building quadrature data for an H(curl) mass operator +/// with a scalar coefficient evaluated at quadrature points +CEED_QFUNCTION(f_build_hcurlmass_quad_scalar)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { VectorFEMassContext *bc = (VectorFEMassContext *)ctx; - // in[0] is coefficients with shape [ncomp=coeff_comp, Q] + // in[0] is coefficients with shape [ncomp=1, Q] // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[2] is quadrature weights, size (Q) // // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part - // of the result. + // of the result const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; CeedScalar *qd = out[0]; - switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp) + switch (10 * bc->space_dim + bc->dim) { - case 111: + case 11: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { qd[i] = qw[i] * c[i] / J[i]; } break; - case 213: + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); + MultAdjJCAdjJt21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); } break; - case 212: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); + MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); } break; - case 211: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); + MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); } break; - case 223: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); + MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); } break; - case 222: + } + return 0; +} + +/// libCEED QFunction for building quadrature data for an H(curl) mass operator +/// with a vector coefficient evaluated at quadrature points +CEED_QFUNCTION(f_build_hcurlmass_quad_vector)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + VectorFEMassContext *bc = (VectorFEMassContext *)ctx; + // in[0] is coefficients with shape [ncomp=space_dim, Q] + // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[2] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for + // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part + // of the result + const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; + CeedScalar *qd = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); + MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); } break; - case 221: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); + MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); } break; - case 326: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); + MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); } break; - case 323: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); + MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); } break; - case 321: + } + return 0; +} + +/// libCEED QFunction for building quadrature data for an H(curl) mass operator +/// with a matrix coefficient evaluated at quadrature points +CEED_QFUNCTION(f_build_hcurlmass_quad_matrix)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + VectorFEMassContext *bc = (VectorFEMassContext *)ctx; + // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q] + // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[2] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for + // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part + // of the result + const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; + CeedScalar *qd = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); + MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); } break; - case 336: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); + MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); } break; - case 333: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); + MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); } break; - case 331: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); + MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); } break; } @@ -463,10 +641,11 @@ CEED_QFUNCTION(f_apply_vecfemass)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for applying a vector FE mass operator -CEED_QFUNCTION(f_apply_hdivmass_mf_const)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) +/// libCEED QFunction for applying an H(div) mass operator with a scalar +/// constant coefficient +CEED_QFUNCTION(f_apply_hdivmass_mf_const_scalar)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { VectorFEMassContext *bc = (VectorFEMassContext *)ctx; // in[0], out[0] have shape [dim, ncomp=1, Q] @@ -474,13 +653,13 @@ CEED_QFUNCTION(f_apply_hdivmass_mf_const)(void *ctx, CeedInt Q, // in[2] is quadrature weights, size (Q) // // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for - // H(curl)) or qw/det(J) J^T C J (for H(div)). + // H(curl)) or qw/det(J) J^T C J (for H(div)) const CeedScalar *coeff = bc->coeff; const CeedScalar *u = in[0], *J = in[1], *qw = in[2]; CeedScalar *v = out[0]; - switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp) + switch (10 * bc->space_dim + bc->dim) { - case 111: + case 11: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { const CeedScalar coeff0 = coeff[0]; @@ -488,23 +667,7 @@ CEED_QFUNCTION(f_apply_hdivmass_mf_const)(void *ctx, CeedInt Q, v[i] = qd * u[i]; } break; - case 213: - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar qd; - MultJtCJ21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd); - v[i] = qd * u[i]; - } - break; - case 212: - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar qd; - MultJtCJ21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd); - v[i] = qd * u[i]; - } - break; - case 211: + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd; @@ -512,77 +675,98 @@ CEED_QFUNCTION(f_apply_hdivmass_mf_const)(void *ctx, CeedInt Q, v[i] = qd * u[i]; } break; - case 223: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultJtCJ22(J + i, Q, coeff, 1, 3, qw[i], 1, qd); + MultJtCJ22(J + i, Q, coeff, 1, 1, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 222: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultJtCJ22(J + i, Q, coeff, 1, 2, qw[i], 1, qd); + MultJtCJ32(J + i, Q, coeff, 1, 1, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 221: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultJtCJ22(J + i, Q, coeff, 1, 1, qw[i], 1, qd); + CeedScalar qd[6]; + MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; - v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; - v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; + const CeedScalar u2 = u[i + Q * 2]; + v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; + v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; + v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; } break; - case 326: + } + return 0; +} + +/// libCEED QFunction for applying an H(div) mass operator with a vector +/// constant coefficient +CEED_QFUNCTION(f_apply_hdivmass_mf_const_vector)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + VectorFEMassContext *bc = (VectorFEMassContext *)ctx; + // in[0], out[0] have shape [dim, ncomp=1, Q] + // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[2] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for + // H(curl)) or qw/det(J) J^T C J (for H(div)) + const CeedScalar *coeff = bc->coeff; + const CeedScalar *u = in[0], *J = in[1], *qw = in[2]; + CeedScalar *v = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultJtCJ32(J + i, Q, coeff, 1, 6, qw[i], 1, qd); - const CeedScalar u0 = u[i + Q * 0]; - const CeedScalar u1 = u[i + Q * 1]; - v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; - v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; + CeedScalar qd; + MultJtCJ21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd); + v[i] = qd * u[i]; } break; - case 323: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultJtCJ32(J + i, Q, coeff, 1, 3, qw[i], 1, qd); + MultJtCJ22(J + i, Q, coeff, 1, 2, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 321: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultJtCJ32(J + i, Q, coeff, 1, 1, qw[i], 1, qd); + MultJtCJ32(J + i, Q, coeff, 1, 3, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 336: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], 1, qd); + MultJtCJ33(J + i, Q, coeff, 1, 3, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; const CeedScalar u2 = u[i + Q * 2]; @@ -591,24 +775,63 @@ CEED_QFUNCTION(f_apply_hdivmass_mf_const)(void *ctx, CeedInt Q, v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; } break; - case 333: + } + return 0; +} + +/// libCEED QFunction for applying an H(div) mass operator with a matrix +/// constant coefficient +CEED_QFUNCTION(f_apply_hdivmass_mf_const_matrix)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + VectorFEMassContext *bc = (VectorFEMassContext *)ctx; + // in[0], out[0] have shape [dim, ncomp=1, Q] + // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[2] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for + // H(curl)) or qw/det(J) J^T C J (for H(div)) + const CeedScalar *coeff = bc->coeff; + const CeedScalar *u = in[0], *J = in[1], *qw = in[2]; + CeedScalar *v = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar qd; + MultJtCJ21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd); + v[i] = qd * u[i]; + } + break; + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[6]; - MultJtCJ33(J + i, Q, coeff, 1, 3, qw[i], 1, qd); + CeedScalar qd[3]; + MultJtCJ22(J + i, Q, coeff, 1, 3, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; - const CeedScalar u2 = u[i + Q * 2]; - v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; - v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; - v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; + v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; + v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; + } + break; + case 32: + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar qd[3]; + MultJtCJ32(J + i, Q, coeff, 1, 6, qw[i], 1, qd); + const CeedScalar u0 = u[i + Q * 0]; + const CeedScalar u1 = u[i + Q * 1]; + v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; + v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 331: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], 1, qd); + MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; const CeedScalar u2 = u[i + Q * 2]; @@ -621,9 +844,11 @@ CEED_QFUNCTION(f_apply_hdivmass_mf_const)(void *ctx, CeedInt Q, return 0; } -CEED_QFUNCTION(f_apply_hcurlmass_mf_const)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) +/// libCEED QFunction for applying an H(curl) mass operator with a scalar +/// constant coefficient +CEED_QFUNCTION(f_apply_hcurlmass_mf_const_scalar)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { VectorFEMassContext *bc = (VectorFEMassContext *)ctx; // in[0], out[0] have shape [dim, ncomp=1, Q] @@ -631,13 +856,13 @@ CEED_QFUNCTION(f_apply_hcurlmass_mf_const)(void *ctx, CeedInt Q, // in[2] is quadrature weights, size (Q) // // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for - // H(curl)) or qw/det(J) J^T C J (for H(div)). + // H(curl)) or qw/det(J) J^T C J (for H(div)) const CeedScalar *coeff = bc->coeff; const CeedScalar *u = in[0], *J = in[1], *qw = in[2]; CeedScalar *v = out[0]; - switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp) + switch (10 * bc->space_dim + bc->dim) { - case 111: + case 11: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { const CeedScalar coeff0 = coeff[0]; @@ -645,23 +870,7 @@ CEED_QFUNCTION(f_apply_hcurlmass_mf_const)(void *ctx, CeedInt Q, v[i] = qd * u[i]; } break; - case 213: - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar qd; - MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd); - v[i] = qd * u[i]; - } - break; - case 212: - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar qd; - MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd); - v[i] = qd * u[i]; - } - break; - case 211: + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd; @@ -669,77 +878,98 @@ CEED_QFUNCTION(f_apply_hcurlmass_mf_const)(void *ctx, CeedInt Q, v[i] = qd * u[i]; } break; - case 223: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], 1, qd); + MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 222: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt22(J + i, Q, coeff, 1, 2, qw[i], 1, qd); + MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 221: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], 1, qd); + CeedScalar qd[6]; + MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; - v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; - v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; + const CeedScalar u2 = u[i + Q * 2]; + v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; + v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; + v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; } break; - case 326: + } + return 0; +} + +/// libCEED QFunction for applying an H(curl) mass operator with a vector +/// constant coefficient +CEED_QFUNCTION(f_apply_hcurlmass_mf_const_vector)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + VectorFEMassContext *bc = (VectorFEMassContext *)ctx; + // in[0], out[0] have shape [dim, ncomp=1, Q] + // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[2] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for + // H(curl)) or qw/det(J) J^T C J (for H(div)) + const CeedScalar *coeff = bc->coeff; + const CeedScalar *u = in[0], *J = in[1], *qw = in[2]; + CeedScalar *v = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], 1, qd); - const CeedScalar u0 = u[i + Q * 0]; - const CeedScalar u1 = u[i + Q * 1]; - v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; - v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; + CeedScalar qd; + MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd); + v[i] = qd * u[i]; } break; - case 323: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], 1, qd); + MultAdjJCAdjJt22(J + i, Q, coeff, 1, 2, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 321: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], 1, qd); + MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 336: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], 1, qd); + MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; const CeedScalar u2 = u[i + Q * 2]; @@ -748,24 +978,63 @@ CEED_QFUNCTION(f_apply_hcurlmass_mf_const)(void *ctx, CeedInt Q, v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; } break; - case 333: + } + return 0; +} + +/// libCEED QFunction for applying an H(curl) mass operator with a matrix +/// constant coefficient +CEED_QFUNCTION(f_apply_hcurlmass_mf_const_matrix)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + VectorFEMassContext *bc = (VectorFEMassContext *)ctx; + // in[0], out[0] have shape [dim, ncomp=1, Q] + // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[2] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for + // H(curl)) or qw/det(J) J^T C J (for H(div)) + const CeedScalar *coeff = bc->coeff; + const CeedScalar *u = in[0], *J = in[1], *qw = in[2]; + CeedScalar *v = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], 1, qd); + CeedScalar qd; + MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd); + v[i] = qd * u[i]; + } + break; + case 22: + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar qd[3]; + MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; - const CeedScalar u2 = u[i + Q * 2]; - v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; - v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; - v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; + v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; + v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; + } + break; + case 32: + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar qd[3]; + MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], 1, qd); + const CeedScalar u0 = u[i + Q * 0]; + const CeedScalar u1 = u[i + Q * 1]; + v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; + v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 331: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], 1, qd); + MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; const CeedScalar u2 = u[i + Q * 2]; @@ -778,47 +1047,32 @@ CEED_QFUNCTION(f_apply_hcurlmass_mf_const)(void *ctx, CeedInt Q, return 0; } -/// libCEED QFunction for applying a vector FE mass operator -CEED_QFUNCTION(f_apply_hdivmass_mf_quad)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) +/// libCEED QFunction for applying an H(div) operator with a scalar +/// coefficient evaluated at quadrature points +CEED_QFUNCTION(f_apply_hdivmass_mf_quad_scalar)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { VectorFEMassContext *bc = (VectorFEMassContext *)ctx; // in[0], out[0] have shape [dim, ncomp=1, Q] - // in[1] is coefficients with shape [ncomp=coeff_comp, Q] + // in[1] is coefficients with shape [ncomp=1, Q] // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[3] is quadrature weights, size (Q) // // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for - // H(curl)) or qw/det(J) J^T C J (for H(div)). + // H(curl)) or qw/det(J) J^T C J (for H(div)) const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3]; CeedScalar *v = out[0]; - switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp) + switch (10 * bc->space_dim + bc->dim) { - case 111: + case 11: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { const CeedScalar qd = qw[i] * c[i] * J[i]; v[i] = qd * u[i]; } break; - case 213: - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar qd; - MultJtCJ21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd); - v[i] = qd * u[i]; - } - break; - case 212: - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar qd; - MultJtCJ21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd); - v[i] = qd * u[i]; - } - break; - case 211: + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd; @@ -826,77 +1080,98 @@ CEED_QFUNCTION(f_apply_hdivmass_mf_quad)(void *ctx, CeedInt Q, v[i] = qd * u[i]; } break; - case 223: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultJtCJ22(J + i, Q, c + i, Q, 3, qw[i], 1, qd); + MultJtCJ22(J + i, Q, c + i, Q, 1, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 222: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultJtCJ22(J + i, Q, c + i, Q, 2, qw[i], 1, qd); + MultJtCJ32(J + i, Q, c + i, Q, 1, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 221: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultJtCJ22(J + i, Q, c + i, Q, 1, qw[i], 1, qd); + CeedScalar qd[6]; + MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; - v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; - v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; + const CeedScalar u2 = u[i + Q * 2]; + v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; + v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; + v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; } break; - case 326: + } + return 0; +} + +/// libCEED QFunction for applying an H(div) operator with a vector +/// coefficient evaluated at quadrature points +CEED_QFUNCTION(f_apply_hdivmass_mf_quad_vector)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + VectorFEMassContext *bc = (VectorFEMassContext *)ctx; + // in[0], out[0] have shape [dim, ncomp=1, Q] + // in[1] is coefficients with shape [ncomp=space_dim, Q] + // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[3] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for + // H(curl)) or qw/det(J) J^T C J (for H(div)) + const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3]; + CeedScalar *v = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultJtCJ32(J + i, Q, c + i, Q, 6, qw[i], 1, qd); - const CeedScalar u0 = u[i + Q * 0]; - const CeedScalar u1 = u[i + Q * 1]; - v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; - v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; + CeedScalar qd; + MultJtCJ21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd); + v[i] = qd * u[i]; } break; - case 323: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultJtCJ32(J + i, Q, c + i, Q, 3, qw[i], 1, qd); + MultJtCJ22(J + i, Q, c + i, Q, 2, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 321: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultJtCJ32(J + i, Q, c + i, Q, 1, qw[i], 1, qd); + MultJtCJ32(J + i, Q, c + i, Q, 3, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 336: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], 1, qd); + MultJtCJ33(J + i, Q, c + i, Q, 3, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; const CeedScalar u2 = u[i + Q * 2]; @@ -905,24 +1180,63 @@ CEED_QFUNCTION(f_apply_hdivmass_mf_quad)(void *ctx, CeedInt Q, v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; } break; - case 333: + } + return 0; +} + +/// libCEED QFunction for applying an H(div) operator with a matrix +/// coefficient evaluated at quadrature points +CEED_QFUNCTION(f_apply_hdivmass_mf_quad_matrix)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + VectorFEMassContext *bc = (VectorFEMassContext *)ctx; + // in[0], out[0] have shape [dim, ncomp=1, Q] + // in[1] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q] + // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[3] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for + // H(curl)) or qw/det(J) J^T C J (for H(div)) + const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3]; + CeedScalar *v = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[6]; - MultJtCJ33(J + i, Q, c + i, Q, 3, qw[i], 1, qd); + CeedScalar qd; + MultJtCJ21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd); + v[i] = qd * u[i]; + } + break; + case 22: + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar qd[3]; + MultJtCJ22(J + i, Q, c + i, Q, 3, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; - const CeedScalar u2 = u[i + Q * 2]; - v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; - v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; - v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; + v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; + v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 331: + case 32: + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar qd[3]; + MultJtCJ32(J + i, Q, c + i, Q, 6, qw[i], 1, qd); + const CeedScalar u0 = u[i + Q * 0]; + const CeedScalar u1 = u[i + Q * 1]; + v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; + v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; + } + break; + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], 1, qd); + MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; const CeedScalar u2 = u[i + Q * 2]; @@ -935,46 +1249,32 @@ CEED_QFUNCTION(f_apply_hdivmass_mf_quad)(void *ctx, CeedInt Q, return 0; } -CEED_QFUNCTION(f_apply_hcurlmass_mf_quad)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) +/// libCEED QFunction for applying an H(curl) operator with a scalar +/// coefficient evaluated at quadrature points +CEED_QFUNCTION(f_apply_hcurlmass_mf_quad_scalar)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { VectorFEMassContext *bc = (VectorFEMassContext *)ctx; // in[0], out[0] have shape [dim, ncomp=1, Q] - // in[1] is coefficients with shape [ncomp=coeff_comp, Q] + // in[1] is coefficients with shape [ncomp=1, Q] // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] // in[3] is quadrature weights, size (Q) // // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for - // H(curl)) or qw/det(J) J^T C J (for H(div)). + // H(curl)) or qw/det(J) J^T C J (for H(div)) const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3]; CeedScalar *v = out[0]; - switch (100 * bc->space_dim + 10 * bc->dim + bc->coeff_comp) + switch (10 * bc->space_dim + bc->dim) { - case 111: + case 11: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { const CeedScalar qd = qw[i] * c[i] / J[i]; v[i] = qd * u[i]; } break; - case 213: - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar qd; - MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd); - v[i] = qd * u[i]; - } - break; - case 212: - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar qd; - MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd); - v[i] = qd * u[i]; - } - break; - case 211: + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd; @@ -982,77 +1282,98 @@ CEED_QFUNCTION(f_apply_hcurlmass_mf_quad)(void *ctx, CeedInt Q, v[i] = qd * u[i]; } break; - case 223: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], 1, qd); + MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 222: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], 1, qd); + MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 221: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], 1, qd); + CeedScalar qd[6]; + MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; - v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; - v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; + const CeedScalar u2 = u[i + Q * 2]; + v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; + v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; + v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; } break; - case 326: + } + return 0; +} + +/// libCEED QFunction for applying an H(curl) operator with a vector +/// coefficient evaluated at quadrature points +CEED_QFUNCTION(f_apply_hcurlmass_mf_quad_vector)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + VectorFEMassContext *bc = (VectorFEMassContext *)ctx; + // in[0], out[0] have shape [dim, ncomp=1, Q] + // in[1] is coefficients with shape [ncomp=space_dim, Q] + // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[3] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for + // H(curl)) or qw/det(J) J^T C J (for H(div)) + const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3]; + CeedScalar *v = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], 1, qd); - const CeedScalar u0 = u[i + Q * 0]; - const CeedScalar u1 = u[i + Q * 1]; - v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; - v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; + CeedScalar qd; + MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd); + v[i] = qd * u[i]; } break; - case 323: + case 22: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], 1, qd); + MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 321: + case 32: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[3]; - MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], 1, qd); + MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 336: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], 1, qd); + MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; const CeedScalar u2 = u[i + Q * 2]; @@ -1061,24 +1382,63 @@ CEED_QFUNCTION(f_apply_hcurlmass_mf_quad)(void *ctx, CeedInt Q, v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; } break; - case 333: + } + return 0; +} + +/// libCEED QFunction for applying an H(curl) operator with a matrix +/// coefficient evaluated at quadrature points +CEED_QFUNCTION(f_apply_hcurlmass_mf_quad_matrix)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) +{ + VectorFEMassContext *bc = (VectorFEMassContext *)ctx; + // in[0], out[0] have shape [dim, ncomp=1, Q] + // in[1] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q] + // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] + // in[3] is quadrature weights, size (Q) + // + // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for + // H(curl)) or qw/det(J) J^T C J (for H(div)) + const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3]; + CeedScalar *v = out[0]; + switch (10 * bc->space_dim + bc->dim) + { + case 21: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { - CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], 1, qd); + CeedScalar qd; + MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd); + v[i] = qd * u[i]; + } + break; + case 22: + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar qd[3]; + MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; - const CeedScalar u2 = u[i + Q * 2]; - v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; - v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; - v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; + v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; + v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; + } + break; + case 32: + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar qd[3]; + MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], 1, qd); + const CeedScalar u0 = u[i + Q * 0]; + const CeedScalar u1 = u[i + Q * 1]; + v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; + v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; } break; - case 331: + case 33: CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) { CeedScalar qd[6]; - MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], 1, qd); + MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], 1, qd); const CeedScalar u0 = u[i + Q * 0]; const CeedScalar u1 = u[i + Q * 1]; const CeedScalar u2 = u[i + Q * 2]; diff --git a/fem/ceed/interface/coefficient.hpp b/fem/ceed/interface/coefficient.hpp index 9ccc40ceec0..d4e067d7a07 100644 --- a/fem/ceed/interface/coefficient.hpp +++ b/fem/ceed/interface/coefficient.hpp @@ -74,7 +74,7 @@ inline void InitCoefficient(mfem::Coefficient *Q, mfem::Mesh &mesh, const mfem::IntegrationRule &ir, bool use_bdr, Coefficient *&coeff_ptr) { - if (Q == nullptr || dynamic_cast(Q)) + if (Q == nullptr || dynamic_cast(Q)) { // The constant coefficient case is handled by the QFunction context coeff_ptr = nullptr; @@ -277,15 +277,15 @@ inline void InitCoefficientWithIndices(mfem::Coefficient *Q, // The constant coefficient case is handled by the QFunction context coeff_ptr = nullptr; } - else if (GridFunctionCoefficient *gf_coeff = - dynamic_cast(Q)) + else if (mfem::GridFunctionCoefficient *gf_coeff = + dynamic_cast(Q)) { GridCoefficient *ceed_coeff = new GridCoefficient(*gf_coeff->GetGridFunction()); coeff_ptr = ceed_coeff; } - else if (QuadratureFunctionCoefficient *qf_coeff = - dynamic_cast(Q)) + else if (mfem::QuadratureFunctionCoefficient *qf_coeff = + dynamic_cast(Q)) { const int ne = use_bdr ? mesh.GetNBE() : mesh.GetNE(); const int nq = ir.GetNPoints(); @@ -360,15 +360,15 @@ inline void InitCoefficientWithIndices(mfem::VectorCoefficient *VQ, // The constant coefficient case is handled by the QFunction context coeff_ptr = nullptr; } - else if (VectorGridFunctionCoefficient *vgf_coeff = - dynamic_cast(VQ)) + else if (mfem::VectorGridFunctionCoefficient *vgf_coeff = + dynamic_cast(VQ)) { GridCoefficient *ceed_coeff = new GridCoefficient(*vgf_coeff->GetGridFunction()); coeff_ptr = ceed_coeff; } - else if (VectorQuadratureFunctionCoefficient *vqf_coeff = - dynamic_cast(VQ)) + else if (mfem::VectorQuadratureFunctionCoefficient *vqf_coeff = + dynamic_cast(VQ)) { const int vdim = vqf_coeff->GetVDim(); const int ne = use_bdr ? mesh.GetNBE() : mesh.GetNE(); diff --git a/fem/ceed/interface/integrator.hpp b/fem/ceed/interface/integrator.hpp index ca139825198..2ebad8e086f 100644 --- a/fem/ceed/interface/integrator.hpp +++ b/fem/ceed/interface/integrator.hpp @@ -36,34 +36,18 @@ struct OperatorInfo { /** The path to the QFunction header. */ const char *header; - /** The name of the QFunction to build a partially assembled CeedOperator - with a constant Coefficient. */ - const char *build_func_const; - /** The QFunction to build a partially assembled CeedOperator with a constant - Coefficient. */ - CeedQFunctionUser build_qf_const; - /** The name of the QFunction to build a partially assembled CeedOperator - with a variable Coefficient. */ - const char *build_func_quad; - /** The QFunction to build a partially assembled CeedOperator with a variable - Coefficient. */ - CeedQFunctionUser build_qf_quad; + /** The name of the QFunction to build a partially assembled CeedOperator. */ + const char *build_func; + /** The QFunction to build a partially assembled CeedOperator. */ + CeedQFunctionUser build_qf; /** The name of the QFunction to apply a partially assembled CeedOperator. */ const char *apply_func; /** The QFunction to apply a partially assembled CeedOperator. */ CeedQFunctionUser apply_qf; - /** The name of the QFunction to apply a matrix-free CeedOperator with a - constant Coefficient. */ - const char *apply_func_mf_const; - /** The QFunction to apply a matrix-free CeedOperator with a constant - Coefficient. */ - CeedQFunctionUser apply_qf_mf_const; - /** The name of the QFunction to apply a matrix-free CeedOperator with a - variable Coefficient. */ - const char *apply_func_mf_quad; - /** The QFunction to apply a matrix-free CeedOperator with a variable - Coefficient. */ - CeedQFunctionUser apply_qf_mf_quad; + /** The name of the QFunction to apply a matrix-free CeedOperator. */ + const char *apply_func_mf; + /** The QFunction to apply a matrix-free CeedOperator. */ + CeedQFunctionUser apply_qf_mf; /** The EvalMode on the trial basis functions. */ EvalMode trial_op; /** The EvalMode on the test basis functions. */ @@ -274,17 +258,11 @@ class Integrator : public Operator &qdata_restr); CeedVectorCreate(ceed, nelem * nqpts * qdatasize, &qdata); - std::string build_func = coeff ? info.build_func_quad - : info.build_func_const; - CeedQFunctionUser build_qf = coeff ? info.build_qf_quad - : info.build_qf_const; - // Create the QFunction that builds the operator (i.e. computes its // quadrature data) and set its context data. CeedQFunction build_qfunc; - std::string qf_file = GetCeedPath() + info.header; - std::string qf = qf_file + build_func; - CeedQFunctionCreateInterior(ceed, 1, build_qf, qf.c_str(), + std::string qf = GetCeedPath() + info.header + info.build_func; + CeedQFunctionCreateInterior(ceed, 1, info.build_qf, qf.c_str(), &build_qfunc); if (coeff) { @@ -335,17 +313,9 @@ class Integrator : public Operator coeff = nullptr; } - std::string apply_func = !use_mf ? info.apply_func : - (coeff ? info.apply_func_mf_quad - : info.apply_func_mf_const); - CeedQFunctionUser apply_qf = !use_mf ? info.apply_qf : - (coeff ? info.apply_qf_mf_quad - : info.apply_qf_mf_const); - // Create the QFunction that defines the action of the operator. - std::string qf_file = GetCeedPath() + info.header; - std::string qf = qf_file + apply_func; - CeedQFunctionCreateInterior(ceed, 1, apply_qf, qf.c_str(), + std::string qf = GetCeedPath() + info.header + info.apply_func; + CeedQFunctionCreateInterior(ceed, 1, info.apply_qf, qf.c_str(), &apply_qfunc); // input switch (info.trial_op)