Merge branch 'master' into navier-bookkeeping-fix

mfem · Jun 12, 2020 · 395e75f · 395e75f
2 parents 1875c34 + ae0c650
commit 395e75f
Show file tree

Hide file tree

Showing 188 changed files with 19,978 additions and 4,140 deletions.
diff --git a/.gitignore b/.gitignore
@@ -29,6 +29,8 @@ config/sample-runs-build.log
 doc/CodeDocumentation.conf
 doc/CodeDocumentation.html
 doc/CodeDocumentation
+doc/undoc.log
+doc/warnings.log
 
 # Temporary files created by the tests.
 *.stderr

diff --git a/.travis.yml b/.travis.yml
@@ -11,8 +11,6 @@
 
 language: cpp
 
-sudo: false
-
 stages:
   - checks
   - tests
@@ -370,8 +368,10 @@ script:
    # Compiler
    - if [ $MPI == "YES" ]; then
         export MYCXX=mpic++;
+        export MAKE_CXX_FLAG=MPICXX=$MYCXX;
      else
         export MYCXX="$CXX";
+        export MAKE_CXX_FLAG=CXX=$MYCXX;
      fi
 
    # Print the compiler version
@@ -384,12 +384,9 @@ script:
      if [ "$CODECOV" == "YES" ]; then
         CPPFLAGS="--coverage -g";
      fi;
-     if [ "$CXX" == "clang++" ]; then
-        export MFEM_PERF_SW=clang;
-     fi
 
    # Configure the library
-   - make config MFEM_USE_MPI=$MPI MFEM_DEBUG=$DEBUG MFEM_CXX="$MYCXX"
+   - make config MFEM_USE_MPI=$MPI MFEM_DEBUG=$DEBUG $MAKE_CXX_FLAG
         MFEM_MPI_NP=$NPROCS CPPFLAGS="$CPPFLAGS"
    # Show the configuration
    - make info

diff --git a/CHANGELOG b/CHANGELOG
@@ -23,6 +23,27 @@ Meshing improvements
   Hessian for r-adaptivity using discrete fields, and allows use of skewness
   and orientation based metrics.
 
+- Added support for r-adaptivity with more than one discrete field. This allows
+  the user to specify different discrete functions for controlling the
+  size, aspect-ratio, orientation, and skew of elements in the mesh.
+
+- Added TMOP capability for approximate tangential mesh relaxation.
+
+- Added support for reading periodic meshes in Gmsh format (version 2.2). See
+  for example the periodic-annulus-sector and periodic-torus-sector files in
+  the data directory.
+
+Performance improvements
+------------------------
+- Added support for explicit vectorization in the high-performance templated
+  code, which can now take advantage of specific intrinsics classes on the
+  following architectures:
+    - x86 (SSE/AVX/AVX2/AVX512),
+    - Power8 & Power9 (VSX),
+    - BG/Q (QPX).
+  These are now enabled by default, and can be disabled with MFEM_USE_SIMD=NO.
+  See the new file linalg/simd.hpp and the new directory linalg/simd.
+
 Improved GPU capabilities
 -------------------------
 - Added support for Chebyshev accelerated polynomial smoother on GPU.
@@ -35,6 +56,24 @@ Discretization improvements
 
 - Added support for simplices in GSLIB-FindPoints.
 
+- Added support for H1 and L2 element matrix assembly in the mass, convection,
+  diffusion, transpose, and the face DG trace integrators. This is compatible
+  with GPU device execution and is illustrated in Example 9/9p, see the option
+  '-ea'. When enabled, this level of assembly stores independent dense matrices
+  for the elements, and independent dense matrices for the faces in the DG case.
+
+- Added new partial assembly kernels for H(div) bilinear forms, as well as
+  VectorFEDivergenceIntegrator.
+
+- Improved the documentation of the GridFunction GetValue and GetVectorValue
+  methods. Expanded the GetValue and GetVectorValue methods which accept an
+  ElementTransformation argument to support evaluation on boundary elements
+  and, in the continuous field case, arbitrary mesh edges and faces.
+
+- Added new coefficient and vector coefficient classes for QuadratureFunctions.
+  Additionaly, new LinearForm integrators were also added which make use of
+  these new QuadratureFunction coefficient classes.
+
 Linear and nonlinear solvers
 ----------------------------
 - Added power method to iteratively estimate the largest eigenvalue and the
@@ -43,6 +82,17 @@ Linear and nonlinear solvers
 - Added initial support for h- and p-multigrid solvers and preconditioners for
   matrix-based and matrix-free discretizations with basic GPU capability.
 
+- Added a new IterativeSolverMonitor class that allows to monitor the residual
+  and solution during the solving process of an IterativeSolver after every
+  iteration.
+
+- Block arrays of parallel matrices can now be merged into a single parallel
+  matrix with the function HypreParMatrixFromBlocks. This could be useful for
+  solving block systems with parallel direct solvers such as STRUMPACK.
+
+- In SLISolver, changed the residual inner product from (Br,r) to (Br,Br) so the
+  solver can work with non-SPD preconditioner B.
+
 New and updated examples and miniapps
 -------------------------------------
 - Added a new example, Example 25/25p, to demonstrate the use of a Perfectly
@@ -65,6 +115,12 @@ New and updated examples and miniapps
 - Added a new meshing miniapp, Minimal Surface, which solves Plateau's problem:
   the Dirichlet problem for the minimal surface equation.
 
+- Added partial assembly support to examples 4/4p and 5/5p, with diagonal
+  preconditioning.
+
+- Added a new test problem in example 24/24p, demonstrating a mixed bilinear
+  form for H(div) and L_2, with partial assembly support.
+
 Improved testing
 ----------------
 - Added a GitLab pipeline that automates PR testing on supercomputing systems
@@ -74,15 +130,17 @@ Improved testing
 
 Miscellaneous
 -------------
-- In SLISolver, changed the residual inner product from (Br,r) to (Br,Br) so the
-  solver can work with non-SPD preconditioner B.
-
 - Added support for ADIOS2 for parallel I/O with ParaView visualization. The
   classes adios2stream and ADIOS2DataCollection are introduced in mfem as the
   interfaces to generate ADIOS2 Binary Pack (BP4) directory datasets for the
   entire spatial and temporal data. In addition, ADIOS2 allows for setting a
   user-defined number of data substreams/subfiles. See examples 5, 9, 12, 16.
 
+- The integration order used in the ComputeLpError and ComputeElementLpError
+  methods of class GridFunction has been increased.
+
+- Various other simplifications, extensions, and bugfixes in the code.
+
 
 Version 4.1, released on March 10, 2020
 =======================================

diff --git a/INSTALL b/INSTALL
@@ -396,6 +396,12 @@ MFEM_USE_SIDRE = YES/NO
    blueprint specification. When enabled, this option requires installation of
    HDF5 (see also MFEM_USE_NETCDF), Conduit and LLNL's axom project.
 
+MFEM_USE_SIMD = YES/NO
+   Enables the high performance templated classes to use architecture dependent
+   SIMD intrinsics instead of the generic implementation of class AutoSIMD in
+   linalg/simd/auto.hpp. This option should be combined with suitable
+   compiler options, such as -march=native, to enable optimal vectorization.
+
 MFEM_USE_CONDUIT = YES/NO
    Enables support for converting MFEM Mesh and Grid Function objects to and
    from Conduit Mesh Blueprint Descriptions (https://github.com/LLNL/conduit/)
@@ -426,6 +432,8 @@ MFEM_USE_PUMI = YES/NO
    data management system that is capable of handling general non-manifold
    models and effectively supports automated adaptive analysis. PUMI enables
    support for parallel unstructured mesh modifications in MFEM.
+   The develop branch of PUMI repository (https://github.com/SCOREC/core)
+   should be used for most updated features.
 
 MFEM_USE_UMPIRE = YES/NO
    Enables support for Umpire, a resource management library that allows the
@@ -609,8 +617,9 @@ The specific libraries and their options are:
 
 - PUMI (optional), used when MFEM_USE_PUMI = YES.
   URL: https://scorec.rpi.edu/pumi
+       https://github.com/SCOREC/core
   Options: PUMI_OPT, PUMI_LIB.
-  Versions: PUMI >= 2.2.0.
+  Versions: PUMI >= 2.2.3.
 
 - HiOp (optional), used when MFEM_USE_HIOP = YES.
   URL: https://github.com/LLNL/hiop

diff --git a/config/cmake/MFEMConfig.cmake.in b/config/cmake/MFEMConfig.cmake.in
@@ -47,6 +47,7 @@ set(MFEM_USE_OCCA @MFEM_USE_OCCA@)
 set(MFEM_USE_RAJA @MFEM_USE_RAJA@)
 set(MFEM_USE_CEED @MFEM_USE_CEED@)
 set(MFEM_USE_UMPIRE @MFEM_USE_UMPIRE@)
+set(MFEM_USE_SIMD @MFEM_USE_SIMD@)
 set(MFEM_USE_ADIOS2 @MFEM_USE_ADIOS2@)
 
 set(MFEM_CXX_COMPILER "@CMAKE_CXX_COMPILER@")

diff --git a/config/cmake/config.hpp.in b/config/cmake/config.hpp.in
@@ -107,6 +107,9 @@
 // Enable MFEM functionality based on the Sidre library
 #cmakedefine MFEM_USE_SIDRE
 
+// Enable the use of SIMD in the high performance templated classes
+#cmakedefine MFEM_USE_SIMD
+
 // Enable MFEM functionality based on Conduit
 #cmakedefine MFEM_USE_CONDUIT
 

diff --git a/config/cmake/modules/MfemCmakeUtilities.cmake b/config/cmake/modules/MfemCmakeUtilities.cmake
@@ -733,7 +733,7 @@ function(mfem_export_mk_files)
       MFEM_USE_SUPERLU MFEM_USE_STRUMPACK MFEM_USE_GNUTLS
       MFEM_USE_GSLIB MFEM_USE_NETCDF MFEM_USE_PETSC MFEM_USE_MPFR MFEM_USE_SIDRE
       MFEM_USE_CONDUIT MFEM_USE_PUMI MFEM_USE_CUDA MFEM_USE_OCCA MFEM_USE_RAJA
-      MFEM_USE_UMPIRE)
+      MFEM_USE_UMPIRE MFEM_USE_SIMD MFEM_USE_ADIOS2)
   foreach(var ${CONFIG_MK_BOOL_VARS})
     if (${var})
       set(${var} YES)
@@ -743,6 +743,7 @@ function(mfem_export_mk_files)
   endforeach()
   # TODO: Add support for MFEM_USE_CUDA=YES
   set(MFEM_CXX ${CMAKE_CXX_COMPILER})
+  set(MFEM_HOST_CXX ${MFEM_CXX})
   set(MFEM_CPPFLAGS "")
   string(STRIP "${CMAKE_CXX_FLAGS_${BUILD_TYPE}} ${CMAKE_CXX_FLAGS}"
          MFEM_CXXFLAGS)

diff --git a/config/config.hpp.in b/config/config.hpp.in
@@ -106,6 +106,9 @@
 // Enable Sidre support
 // #define MFEM_USE_SIDRE
 
+// Enable the use of SIMD in the high performance templated classes
+// #define MFEM_USE_SIMD
+
 // Enable Conduit support
 // #define MFEM_USE_CONDUIT
 

diff --git a/config/config.mk.in b/config/config.mk.in
@@ -49,10 +49,12 @@ MFEM_USE_RAJA          = @MFEM_USE_RAJA@
 MFEM_USE_OCCA          = @MFEM_USE_OCCA@
 MFEM_USE_CEED          = @MFEM_USE_CEED@
 MFEM_USE_UMPIRE        = @MFEM_USE_UMPIRE@
+MFEM_USE_SIMD          = @MFEM_USE_SIMD@
 MFEM_USE_ADIOS2        = @MFEM_USE_ADIOS2@
 
 # Compiler, compile options, and link options
 MFEM_CXX       = @MFEM_CXX@
+MFEM_HOST_CXX  = @MFEM_HOST_CXX@
 MFEM_CPPFLAGS  = @MFEM_CPPFLAGS@
 MFEM_CXXFLAGS  = @MFEM_CXXFLAGS@
 MFEM_TPLFLAGS  = @MFEM_TPLFLAGS@

diff --git a/config/defaults.cmake b/config/defaults.cmake
@@ -49,6 +49,7 @@ option(MFEM_USE_OCCA "Enable OCCA" OFF)
 option(MFEM_USE_RAJA "Enable RAJA" OFF)
 option(MFEM_USE_CEED "Enable CEED" OFF)
 option(MFEM_USE_UMPIRE "Enable Umpire" OFF)
+option(MFEM_USE_SIMD "Enable use of SIMD intrinsics" ON)
 option(MFEM_USE_ADIOS2 "Enable ADIOS2" OFF)
 
 set(MFEM_MPI_NP 4 CACHE STRING "Number of processes used for MPI tests")

diff --git a/config/defaults.mk b/config/defaults.mk
@@ -137,6 +137,7 @@ MFEM_USE_RAJA          = NO
 MFEM_USE_OCCA          = NO
 MFEM_USE_CEED          = NO
 MFEM_USE_UMPIRE        = NO
+MFEM_USE_SIMD          = YES
 MFEM_USE_ADIOS2        = NO
 
 # Compile and link options for zlib.

diff --git a/config/tconfig.hpp b/config/tconfig.hpp
@@ -29,20 +29,27 @@
 #define MFEM_ALWAYS_INLINE
 #endif
 
+// --- MFEM_VECTORIZE_LOOP (disabled)
+#if (__cplusplus >= 201103L) && !defined(MFEM_DEBUG) && defined(__GNUC__)
+//#define MFEM_VECTORIZE_LOOP _Pragma("GCC ivdep")
+#define MFEM_VECTORIZE_LOOP
+#else
+#define MFEM_VECTORIZE_LOOP
+#endif
+
+// MFEM_TEMPLATE_BLOCK_SIZE is the block size used by the template matrix-matrix
+// multiply, Mult_AB, defined in tmatrix.hpp. This parameter will generally
+// require tuning to determine good value. It is probably highly influenced by
+// the SIMD width when Mult_AB is used with a SIMD type like AutoSIMD.
 #define MFEM_TEMPLATE_BLOCK_SIZE 4
-#define MFEM_SIMD_SIZE 32
+
 #define MFEM_TEMPLATE_ENABLE_SERIALIZE
 
 // #define MFEM_TEMPLATE_ELTRANS_HAS_NODE_DOFS
 // #define MFEM_TEMPLATE_ELTRANS_RESULT_HAS_NODES
 // #define MFEM_TEMPLATE_FIELD_EVAL_DATA_HAS_DOFS
 #define MFEM_TEMPLATE_INTRULE_COEFF_PRECOMP
 
-// derived macros
-#define MFEM_ROUNDUP(val,base) ((((val)+(base)-1)/(base))*(base))
-#define MFEM_ALIGN_SIZE(size,type) \
-   MFEM_ROUNDUP(size,(MFEM_SIMD_SIZE)/sizeof(type))
-
 #ifdef MFEM_COUNT_FLOPS
 namespace mfem
 {

diff --git a/data/periodic-annulus-sector.geo b/data/periodic-annulus-sector.geo
@@ -0,0 +1,37 @@
+SetFactory("OpenCASCADE");
+
+R1 = 1.0;
+R2 = 2.0;
+
+Point(1) = {0.0, 0, 0, 1.0};
+Point(2) = {R1, 0, 0, 1.0};
+Point(3) = {R2, 0, 0, 1.0};
+Point(4) = {R1*Cos(Pi/3), R1*Sin(Pi/3), 0, 1.0};
+Point(5) = {R2*Cos(Pi/3), R2*Sin(Pi/3), 0, 1.0};
+Line(1) = {2, 3};
+Line(2) = {4, 5};
+Circle(3) = {2, 1, 4};
+Circle(4) = {3, 1, 5};
+Curve Loop(5) = {1, 4, -2, -3};
+Plane Surface(1) = {5};
+
+Transfinite Curve{1} = 7;
+Transfinite Curve{2} = 7;
+Transfinite Curve{3} = 4;
+Transfinite Curve{4} = 10;
+
+// Set a rotation periodicity constraint:
+Periodic Line{1} = {2} Rotate{{0,0,1}, {0,0,0}, -Pi/3};
+
+// Tag surfaces and volumes with positive integers
+Physical Curve(1) = {3};
+Physical Curve(2) = {4};
+Physical Curve(3) = {1};
+Physical Curve(4) = {2};
+Physical Surface(1) = {1};
+
+// Generate 2D mesh
+Mesh 2;
+Mesh.MshFileVersion = 2.2;
+
+Save "periodic-annulus-sector.msh";