diff --git a/patches/gromacs-2021.7.config b/patches/gromacs-2021.7.config deleted file mode 100644 index f69d5800af..0000000000 --- a/patches/gromacs-2021.7.config +++ /dev/null @@ -1,38 +0,0 @@ - - -function plumed_preliminary_test(){ -# check if the README contains the word GROMACS and if gromacs has been already configured - grep -q GROMACS README 1>/dev/null 2>/dev/null -} - -function plumed_patch_info(){ -cat << EOF -PLUMED can be incorporated into gromacs using the standard patching procedure. -Patching must be done in the gromacs root directory _before_ the cmake command is invoked. - -On clusters you may want to patch gromacs using the static version of plumed, in this case -building gromacs can result in multiple errors. One possible solution is to configure gromacs -with these additional options: - -cmake -DBUILD_SHARED_LIBS=OFF -DGMX_PREFER_STATIC_LIBS=ON - -To enable PLUMED in a gromacs simulation one should use -mdrun with an extra -plumed flag. The flag can be used to -specify the name of the PLUMED input file, e.g.: - -gmx mdrun -plumed plumed.dat - -For more information on gromacs you should visit http://www.gromacs.org - -EOF -} - -plumed_before_patch(){ - plumed_patch_info - - mv cmake/gmxVersionInfo.cmake cmake/gmxVersionInfo.cmake.preplumed - awk -v version="$PLUMED_VERSION" '/^set\(GMX_VERSION_STRING_OF_FORK/{gsub(/""/, "plumed-" version)}; {print}' cmake/gmxVersionInfo.cmake.preplumed > cmake/gmxVersionInfo.cmake -} - -PLUMED_PREPLUMED_IGNORE=cmake/gmxVersionInfo.cmake - diff --git a/patches/gromacs-2021.7.diff/src/gromacs/CMakeLists.txt b/patches/gromacs-2021.7.diff/src/gromacs/CMakeLists.txt deleted file mode 100644 index 47bf3c4792..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/CMakeLists.txt +++ /dev/null @@ -1,479 +0,0 @@ -# -# This file is part of the GROMACS molecular simulation package. -# -# Copyright (c) 2010,2011,2012,2013,2014 by the GROMACS development team. -# Copyright (c) 2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by -# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -# and including many others, as listed in the AUTHORS file in the -# top-level source directory and at http://www.gromacs.org. -# -# GROMACS is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public License -# as published by the Free Software Foundation; either version 2.1 -# of the License, or (at your option) any later version. -# -# GROMACS is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with GROMACS; if not, see -# http://www.gnu.org/licenses, or write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -# If you want to redistribute modifications to GROMACS, please -# consider that scientific software is very special. Version -# control is crucial - bugs must be traceable. We will be happy to -# consider code for inclusion in the official distribution, but -# derived work must not be called official GROMACS. Details are found -# in the README & COPYING files - if they are missing, get the -# official version at http://www.gromacs.org. -# -# To help us fund GROMACS development, we humbly ask that you cite -# the research papers on the package. Check out http://www.gromacs.org. - -include(${CMAKE_SOURCE_DIR}/Plumed.cmake) - -set(LIBGROMACS_SOURCES) - -if (GMX_CLANG_CUDA) - include(gmxClangCudaUtils) -endif() - -set_property(GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES) -set_property(GLOBAL PROPERTY CUDA_SOURCES) -set_property(GLOBAL PROPERTY GMX_INSTALLED_HEADERS) -set_property(GLOBAL PROPERTY GMX_AVX_512_SOURCE) - -set(libgromacs_object_library_dependencies "") -function (_gmx_add_files_to_property PROPERTY) - foreach (_file ${ARGN}) - if (IS_ABSOLUTE "${_file}") - set_property(GLOBAL APPEND PROPERTY ${PROPERTY} ${_file}) - else() - set_property(GLOBAL APPEND PROPERTY ${PROPERTY} - ${CMAKE_CURRENT_LIST_DIR}/${_file}) - endif() - endforeach() -endfunction () - -function (gmx_add_libgromacs_sources) - _gmx_add_files_to_property(GMX_LIBGROMACS_SOURCES ${ARGN}) -endfunction () - -# Permit the configuration to disable compiling the many nbnxm kernels -# and others involved in force calculations. Currently only -# short-ranged and bonded kernels are disabled this way, but in future -# others may be appropriate. Thus the cmake option is not specific to -# nbnxm module. -option(GMX_USE_SIMD_KERNELS "Whether to compile NBNXM and other SIMD kernels" ON) -mark_as_advanced(GMX_USE_SIMD_KERNELS) - -# Add these contents first because linking their tests can take a lot -# of time, so we want lots of parallel work still available after -# linking starts. -add_subdirectory(utility) -# Add normal contents -add_subdirectory(gmxlib) -add_subdirectory(mdlib) -add_subdirectory(applied_forces) -add_subdirectory(listed_forces) -add_subdirectory(nbnxm) -add_subdirectory(commandline) -add_subdirectory(domdec) -add_subdirectory(ewald) -add_subdirectory(fft) -add_subdirectory(gpu_utils) -add_subdirectory(hardware) -add_subdirectory(linearalgebra) -add_subdirectory(math) -add_subdirectory(mdrun) -add_subdirectory(mdrunutility) -add_subdirectory(mdspan) -add_subdirectory(mdtypes) -add_subdirectory(onlinehelp) -add_subdirectory(options) -add_subdirectory(pbcutil) -add_subdirectory(random) -add_subdirectory(restraint) -add_subdirectory(tables) -add_subdirectory(taskassignment) -add_subdirectory(timing) -add_subdirectory(topology) -add_subdirectory(trajectory) -add_subdirectory(swap) -add_subdirectory(essentialdynamics) -add_subdirectory(pulling) -add_subdirectory(simd) -add_subdirectory(imd) -add_subdirectory(compat) -add_subdirectory(mimic) -add_subdirectory(modularsimulator) -if (NOT GMX_BUILD_MDRUN_ONLY) - add_subdirectory(gmxana) - add_subdirectory(gmxpreprocess) - add_subdirectory(correlationfunctions) - add_subdirectory(statistics) - add_subdirectory(analysisdata) - add_subdirectory(coordinateio) - add_subdirectory(trajectoryanalysis) - add_subdirectory(energyanalysis) - add_subdirectory(tools) -endif() - -get_property(PROPERTY_SOURCES GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES) -list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${MDLIB_SOURCES} ${PROPERTY_SOURCES}) - -# This would be the standard way to include thread_mpi, but -# we want libgromacs to link the functions directly -#if(GMX_THREAD_MPI) -# add_subdirectory(thread_mpi) -#endif() -#target_link_libraries(gmx ${GMX_EXTRA_LIBRARIES} ${THREAD_MPI_LIB}) -tmpi_get_source_list(THREAD_MPI_SOURCES ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/src) -add_library(thread_mpi OBJECT ${THREAD_MPI_SOURCES}) -target_compile_definitions(thread_mpi PRIVATE HAVE_CONFIG_H) -if(CYGWIN) - # Needs POSIX-isms for strdup, not just std-isms - target_compile_definitions(thread_mpi PRIVATE _POSIX_C_SOURCE=200809L) -endif() -gmx_target_compile_options(thread_mpi) -if (WIN32) - gmx_target_warning_suppression(thread_mpi /wd4996 HAS_NO_MSVC_UNSAFE_FUNCTION) -endif() -list(APPEND libgromacs_object_library_dependencies thread_mpi) - -configure_file(version.h.cmakein version.h) -if(GMX_INSTALL_LEGACY_API) - install(FILES - ${CMAKE_CURRENT_BINARY_DIR}/version.h - analysisdata.h - options.h - selection.h - trajectoryanalysis.h - DESTINATION include/gromacs) -endif() - -# This code is here instead of utility/CMakeLists.txt, because CMake -# custom commands and source file properties can only be set in the directory -# that contains the target that uses them. -# TODO: Generate a header instead that can be included from baseversion.cpp. -# That probably simplifies things somewhat. -set(GENERATED_VERSION_FILE utility/baseversion-gen.cpp) -gmx_configure_version_file( - utility/baseversion-gen.cpp.cmakein ${GENERATED_VERSION_FILE} - REMOTE_HASH - EXTRA_VARS - GMX_SOURCE_DOI - GMX_RELEASE_HASH - GMX_SOURCE_HASH - ) -list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE}) - -# Mark some shared GPU implementation files to compile with CUDA if needed -if (GMX_GPU_CUDA) - get_property(CUDA_SOURCES GLOBAL PROPERTY CUDA_SOURCES) - set_source_files_properties(${CUDA_SOURCES} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ) -endif() - -if (GMX_GPU_CUDA) - # Work around FindCUDA that prevents using target_link_libraries() - # with keywords otherwise... - set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES}) - if (NOT GMX_CLANG_CUDA) - gmx_cuda_add_library(libgromacs ${LIBGROMACS_SOURCES}) - else() - add_library(libgromacs ${LIBGROMACS_SOURCES}) - endif() - target_link_libraries(libgromacs PRIVATE ${CUDA_CUFFT_LIBRARIES}) -else() - add_library(libgromacs ${LIBGROMACS_SOURCES}) -endif() - -# Add these contents first because linking their tests can take a lot -# of time, so we want lots of parallel work still available after -# linking starts. -add_subdirectory(fileio) -add_subdirectory(selection) - -# Suppress a warning about our abuse of t_inputrec -gmx_source_file_warning_suppression(mdtypes/inputrec.cpp -Wno-class-memaccess HAS_NO_CLASS_MEMACCESS) - -# Handle the object libraries that contain the source file -# dependencies that need special handling because they are generated -# or external code. -foreach(object_library ${libgromacs_object_library_dependencies}) - if (BUILD_SHARED_LIBS) - set_target_properties(${object_library} PROPERTIES POSITION_INDEPENDENT_CODE true) - endif() - target_include_directories(${object_library} SYSTEM BEFORE PRIVATE ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/include) - - # Add the sources from the object libraries to the main library. - target_sources(libgromacs PRIVATE $) -endforeach() -gmx_target_compile_options(libgromacs) -target_compile_definitions(libgromacs PRIVATE HAVE_CONFIG_H) -target_include_directories(libgromacs SYSTEM BEFORE PRIVATE ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/include) - -if (GMX_GPU_OPENCL) - option(GMX_EXTERNAL_CLFFT "True if an external clFFT is required to be used" FALSE) - mark_as_advanced(GMX_EXTERNAL_CLFFT) - - # Default to using clFFT found on the system - # switch to quiet at the second run. - if (DEFINED clFFT_LIBRARY) - set (clFFT_FIND_QUIETLY TRUE) - endif() - find_package(clFFT) - if (NOT clFFT_FOUND) - if (GMX_EXTERNAL_CLFFT) - message(FATAL_ERROR "Did not find required external clFFT library, consider setting clFFT_ROOT_DIR") - endif() - - if(MSVC) - message(FATAL_ERROR -"An OpenCL build was requested with Visual Studio compiler, but GROMACS -requires clFFT, which was not found on your system. GROMACS does bundle -clFFT to help with building for OpenCL, but that clFFT has not yet been -ported to the more recent versions of that compiler that GROMACS itself -requires. Thus for now, OpenCL is not available with MSVC and the internal -build of clFFT in GROMACS 2019. Either change compiler, try installing -a clFFT package, or use the latest GROMACS 2018 point release.") - endif() - - # Fall back on the internal version - set (_clFFT_dir ../external/clFFT/src) - add_subdirectory(${_clFFT_dir} clFFT-build) - target_sources(libgromacs PRIVATE - $ - ) - target_include_directories(libgromacs SYSTEM PRIVATE ${_clFFT_dir}/include) - # Use the magic variable for how to link any library needed for - # dlopen, etc. which is -ldl where needed, and empty otherwise - # (e.g. Windows, BSD, Mac). - target_link_libraries(libgromacs PRIVATE "${CMAKE_DL_LIBS}") - else() - target_link_libraries(libgromacs PRIVATE clFFT) - endif() -endif() - -# Permit GROMACS code to include externally developed headers, such as -# the functionality from the nonstd project that we use for -# gmx::compat::optional. These are included as system headers so that -# no warnings are issued from them. -# -# TODO Perhaps generalize this for all headers from src/external -target_include_directories(libgromacs SYSTEM PRIVATE ${PROJECT_SOURCE_DIR}/src/external) - -if(SIMD_AVX_512_CXX_SUPPORTED AND NOT ("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_512_KNL")) - # Since we might be overriding -march=core-avx2, add a flag so we don't warn for this specific file. - # On KNL this can cause illegal instruction because the compiler might use non KNL AVX instructions - # with the SIMD_AVX_512_CXX_FLAGS flags. - set_source_files_properties(hardware/identifyavx512fmaunits.cpp PROPERTIES COMPILE_FLAGS "${SIMD_AVX_512_CXX_FLAGS} ${CXX_NO_UNUSED_OPTION_WARNING_FLAGS}") -endif() - -# Do any special handling needed for .cpp files that use -# CUDA runtime headers -if (GMX_GPU_CUDA AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") - # CUDA header cuda_runtime_api.h in at least CUDA 10.1 uses 0 - # where nullptr would be preferable. GROMACS can't fix these, so - # must suppress them. - GMX_TEST_CXXFLAG(CXXFLAGS_NO_ZERO_AS_NULL_POINTER_CONSTANT "-Wno-zero-as-null-pointer-constant" NVCC_CLANG_SUPPRESSIONS_CXXFLAGS) - - foreach(_compile_flag ${NVCC_CLANG_SUPPRESSIONS_CXXFLAGS}) - set(GMX_CUDA_CLANG_FLAGS "${GMX_CUDA_CLANG_FLAGS} ${_compile_flag}") - endforeach() - if (GMX_CLANG_CUDA) - foreach (_file ${LIBGROMACS_SOURCES}) - get_filename_component(_ext ${_file} EXT) - get_source_file_property(_cuda_source_format ${_file} CUDA_SOURCE_PROPERTY_FORMAT) - if ("${_ext}" STREQUAL ".cu" OR _cuda_source_format) - gmx_compile_cuda_file_with_clang(${_file}) - endif() - endforeach() - else() - get_property(CUDA_SOURCES GLOBAL PROPERTY CUDA_SOURCES) - set_source_files_properties(${CUDA_SOURCES} PROPERTIES COMPILE_FLAGS ${GMX_CUDA_CLANG_FLAGS}) - endif() -endif() - -# Only add the -fsycl flag to sources that really need it -if (GMX_GPU_SYCL) - get_property(SYCL_SOURCES GLOBAL PROPERTY SYCL_SOURCES) - set_source_files_properties(${SYCL_SOURCES} PROPERTIES COMPILE_FLAGS "${SYCL_CXX_FLAGS}") -endif() - -gmx_setup_tng_for_libgromacs() - -# We apply the SYCL flag explicitly just for libgromacs, since bugs in the beta versions of -# icpx/dpcpp leads to crashes if we try to link an library without any SYCL code with the -# -fsycl flag enabled. Once that bug is fixed, we should change it to simply add -# SYCL_CXX_FLAGS to GMX_SHARED_LINKER_FLAGS. -target_link_libraries(libgromacs - PRIVATE - ${EXTRAE_LIBRARIES} - ${GMX_EXTRA_LIBRARIES} - ${GMX_COMMON_LIBRARIES} - ${FFT_LIBRARIES} ${LINEAR_ALGEBRA_LIBRARIES} - ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS} - ${SYCL_CXX_FLAGS} - ${OpenCL_LIBRARIES} - $<$:socket> - PUBLIC - ${GMX_PUBLIC_LIBRARIES} - ${PLUMED_LOAD} - ) -if (GMX_OPENMP) - target_link_libraries(libgromacs PUBLIC OpenMP::OpenMP_CXX) -endif() -set_target_properties(libgromacs PROPERTIES - OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}" - SOVERSION ${LIBRARY_SOVERSION_MAJOR} - VERSION ${LIBRARY_VERSION} - ) - -gmx_manage_lmfit() -target_link_libraries(libgromacs PRIVATE lmfit) - -# Make sure we fix "everything" found by more recent versions of clang. -if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "7") - target_compile_options(libgromacs PRIVATE $<$:-Weverything ${IGNORED_CLANG_ALL_WARNINGS}>) -endif() -if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - target_compile_options(libgromacs PRIVATE $<$:/analyze /analyze:stacksize 70000 - #Control flow warnings are disabled because the commond line output is insufficient. There is no tool - #to convert the xml report to e.g. HTML and even in Visual Studio the viewer doesn't work with cmake support. - /wd6001 #unitialized memory - /wd6011 #derefencing NULL - /wd6053 #prior call not zero-terminate - /wd6054 #might not be zero-terminated - /wd6385 #reading invalid data - /wd6386 #buffer overrun - /wd6387 #could be '0' - /wd28199 #uninitialized memory - # For compile time constant (e.g. templates) the following warnings have flase postives - /wd6239 #( && ) - /wd6240 #( && ) - /wd6294 #Ill-defined for-loop - /wd6326 #comparison of constant with other constant - /wd28020 #expression involving paramter is not true - # Misc - /wd6330 #incorrect type to function (warns for char (instead of unsigned) for isspace/isalpha/isdigit/..)) - /wd6993 #OpenMP ignored - #TODO - /wd6031 #return value ignored (important - mostly warnigns about sscanf) - /wd6244 #hides declaration (known issue - we ingore similar warnings for other compilers) - /wd6246 #hides declaration - > - ) -endif() - -if (GMX_CLANG_TIDY) - set_target_properties(libgromacs PROPERTIES CXX_CLANG_TIDY - "${CLANG_TIDY_EXE};-warnings-as-errors=*") -endif() - -# clang-3.6 warns about a number of issues that are not reported by more modern compilers -# and we know they are not real issues. So we only check that it can compile without error -# but ignore all warnings. -if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION MATCHES "^3\.6") - target_compile_options(libgromacs PRIVATE $<$:-w>) -endif() - -# Only install the library in mdrun-only mode if it is actually necessary -# for the binary -if (NOT GMX_BUILD_MDRUN_ONLY OR BUILD_SHARED_LIBS) - install(TARGETS libgromacs - EXPORT libgromacs - LIBRARY - DESTINATION ${CMAKE_INSTALL_LIBDIR} - COMPONENT libraries - RUNTIME - DESTINATION ${CMAKE_INSTALL_BINDIR} - COMPONENT libraries - ARCHIVE - DESTINATION ${CMAKE_INSTALL_LIBDIR} - COMPONENT libraries - INCLUDES DESTINATION include) - target_compile_definitions(libgromacs PUBLIC $) - # legacy headers use c++17 features, so consumer codes need to use that standard, too - if(GMX_INSTALL_LEGACY_API) - target_compile_features(libgromacs INTERFACE cxx_std_${CMAKE_CXX_STANDARD}) - endif() - add_library(Gromacs::libgromacs ALIAS libgromacs) -endif() - -if (NOT GMX_BUILD_MDRUN_ONLY) - include(InstallLibInfo.cmake) -endif() - -# Technically, the user could want to do this for an OpenCL build -# using the CUDA runtime, but currently there's no reason to want to -# do that. -if (INSTALL_CUDART_LIB) #can be set manual by user - if (GMX_GPU_CUDA) - foreach(CUDA_LIB ${CUDA_LIBRARIES}) - string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB}) - if(IS_CUDART) #libcuda should not be installed - #install also name-links (linker uses those) - file(GLOB CUDA_LIBS ${CUDA_LIB}*) - install(FILES ${CUDA_LIBS} DESTINATION - ${CMAKE_INSTALL_LIBDIR} COMPONENT libraries) - endif() - endforeach() - else() - message(WARNING "INSTALL_CUDART_LIB only makes sense when configuring for CUDA support") - endif() -endif() - -if(GMX_GPU_OPENCL) - # Install the utility headers - file(GLOB OPENCL_INSTALLED_FILES - gpu_utils/vectype_ops.clh - gpu_utils/device_utils.clh - ) - install(FILES ${OPENCL_INSTALLED_FILES} - DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/gpu_utils - COMPONENT libraries) - file(GLOB OPENCL_INSTALLED_FILES - pbcutil/ishift.h - ) - install(FILES ${OPENCL_INSTALLED_FILES} - DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/pbcutil - COMPONENT libraries) - - # Install the NBNXM source and headers - file(GLOB OPENCL_INSTALLED_FILES - nbnxm/constants.h - ) - install(FILES ${OPENCL_INSTALLED_FILES} - DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/nbnxm - COMPONENT libraries) - file(GLOB OPENCL_INSTALLED_FILES - nbnxm/opencl/nbnxm_ocl_kernels.cl - nbnxm/opencl/nbnxm_ocl_kernel.clh - nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh - nbnxm/opencl/nbnxm_ocl_kernels.clh - nbnxm/opencl/nbnxm_ocl_kernels_fastgen.clh - nbnxm/opencl/nbnxm_ocl_kernels_fastgen_add_twincut.clh - nbnxm/opencl/nbnxm_ocl_kernel_utils.clh - nbnxm/opencl/nbnxm_ocl_consts.h - ) - install(FILES ${OPENCL_INSTALLED_FILES} - DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/nbnxm/opencl - COMPONENT libraries) - - # Install the PME source and headers - file(GLOB OPENCL_INSTALLED_FILES - ewald/pme_spread.clh - ewald/pme_solve.clh - ewald/pme_gather.clh - ewald/pme_gpu_calculate_splines.clh - ewald/pme_program.cl - ewald/pme_gpu_types.h - ) - install(FILES ${OPENCL_INSTALLED_FILES} - DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/ewald - COMPONENT libraries) -endif() diff --git a/patches/gromacs-2021.7.diff/src/gromacs/CMakeLists.txt.preplumed b/patches/gromacs-2021.7.diff/src/gromacs/CMakeLists.txt.preplumed deleted file mode 100644 index a4430e9dd6..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/CMakeLists.txt.preplumed +++ /dev/null @@ -1,476 +0,0 @@ -# -# This file is part of the GROMACS molecular simulation package. -# -# Copyright (c) 2010,2011,2012,2013,2014 by the GROMACS development team. -# Copyright (c) 2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by -# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -# and including many others, as listed in the AUTHORS file in the -# top-level source directory and at http://www.gromacs.org. -# -# GROMACS is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public License -# as published by the Free Software Foundation; either version 2.1 -# of the License, or (at your option) any later version. -# -# GROMACS is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with GROMACS; if not, see -# http://www.gnu.org/licenses, or write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -# If you want to redistribute modifications to GROMACS, please -# consider that scientific software is very special. Version -# control is crucial - bugs must be traceable. We will be happy to -# consider code for inclusion in the official distribution, but -# derived work must not be called official GROMACS. Details are found -# in the README & COPYING files - if they are missing, get the -# official version at http://www.gromacs.org. -# -# To help us fund GROMACS development, we humbly ask that you cite -# the research papers on the package. Check out http://www.gromacs.org. - -set(LIBGROMACS_SOURCES) - -if (GMX_CLANG_CUDA) - include(gmxClangCudaUtils) -endif() - -set_property(GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES) -set_property(GLOBAL PROPERTY CUDA_SOURCES) -set_property(GLOBAL PROPERTY GMX_INSTALLED_HEADERS) -set_property(GLOBAL PROPERTY GMX_AVX_512_SOURCE) - -set(libgromacs_object_library_dependencies "") -function (_gmx_add_files_to_property PROPERTY) - foreach (_file ${ARGN}) - if (IS_ABSOLUTE "${_file}") - set_property(GLOBAL APPEND PROPERTY ${PROPERTY} ${_file}) - else() - set_property(GLOBAL APPEND PROPERTY ${PROPERTY} - ${CMAKE_CURRENT_LIST_DIR}/${_file}) - endif() - endforeach() -endfunction () - -function (gmx_add_libgromacs_sources) - _gmx_add_files_to_property(GMX_LIBGROMACS_SOURCES ${ARGN}) -endfunction () - -# Permit the configuration to disable compiling the many nbnxm kernels -# and others involved in force calculations. Currently only -# short-ranged and bonded kernels are disabled this way, but in future -# others may be appropriate. Thus the cmake option is not specific to -# nbnxm module. -option(GMX_USE_SIMD_KERNELS "Whether to compile NBNXM and other SIMD kernels" ON) -mark_as_advanced(GMX_USE_SIMD_KERNELS) - -# Add these contents first because linking their tests can take a lot -# of time, so we want lots of parallel work still available after -# linking starts. -add_subdirectory(utility) -# Add normal contents -add_subdirectory(gmxlib) -add_subdirectory(mdlib) -add_subdirectory(applied_forces) -add_subdirectory(listed_forces) -add_subdirectory(nbnxm) -add_subdirectory(commandline) -add_subdirectory(domdec) -add_subdirectory(ewald) -add_subdirectory(fft) -add_subdirectory(gpu_utils) -add_subdirectory(hardware) -add_subdirectory(linearalgebra) -add_subdirectory(math) -add_subdirectory(mdrun) -add_subdirectory(mdrunutility) -add_subdirectory(mdspan) -add_subdirectory(mdtypes) -add_subdirectory(onlinehelp) -add_subdirectory(options) -add_subdirectory(pbcutil) -add_subdirectory(random) -add_subdirectory(restraint) -add_subdirectory(tables) -add_subdirectory(taskassignment) -add_subdirectory(timing) -add_subdirectory(topology) -add_subdirectory(trajectory) -add_subdirectory(swap) -add_subdirectory(essentialdynamics) -add_subdirectory(pulling) -add_subdirectory(simd) -add_subdirectory(imd) -add_subdirectory(compat) -add_subdirectory(mimic) -add_subdirectory(modularsimulator) -if (NOT GMX_BUILD_MDRUN_ONLY) - add_subdirectory(gmxana) - add_subdirectory(gmxpreprocess) - add_subdirectory(correlationfunctions) - add_subdirectory(statistics) - add_subdirectory(analysisdata) - add_subdirectory(coordinateio) - add_subdirectory(trajectoryanalysis) - add_subdirectory(energyanalysis) - add_subdirectory(tools) -endif() - -get_property(PROPERTY_SOURCES GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES) -list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${MDLIB_SOURCES} ${PROPERTY_SOURCES}) - -# This would be the standard way to include thread_mpi, but -# we want libgromacs to link the functions directly -#if(GMX_THREAD_MPI) -# add_subdirectory(thread_mpi) -#endif() -#target_link_libraries(gmx ${GMX_EXTRA_LIBRARIES} ${THREAD_MPI_LIB}) -tmpi_get_source_list(THREAD_MPI_SOURCES ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/src) -add_library(thread_mpi OBJECT ${THREAD_MPI_SOURCES}) -target_compile_definitions(thread_mpi PRIVATE HAVE_CONFIG_H) -if(CYGWIN) - # Needs POSIX-isms for strdup, not just std-isms - target_compile_definitions(thread_mpi PRIVATE _POSIX_C_SOURCE=200809L) -endif() -gmx_target_compile_options(thread_mpi) -if (WIN32) - gmx_target_warning_suppression(thread_mpi /wd4996 HAS_NO_MSVC_UNSAFE_FUNCTION) -endif() -list(APPEND libgromacs_object_library_dependencies thread_mpi) - -configure_file(version.h.cmakein version.h) -if(GMX_INSTALL_LEGACY_API) - install(FILES - ${CMAKE_CURRENT_BINARY_DIR}/version.h - analysisdata.h - options.h - selection.h - trajectoryanalysis.h - DESTINATION include/gromacs) -endif() - -# This code is here instead of utility/CMakeLists.txt, because CMake -# custom commands and source file properties can only be set in the directory -# that contains the target that uses them. -# TODO: Generate a header instead that can be included from baseversion.cpp. -# That probably simplifies things somewhat. -set(GENERATED_VERSION_FILE utility/baseversion-gen.cpp) -gmx_configure_version_file( - utility/baseversion-gen.cpp.cmakein ${GENERATED_VERSION_FILE} - REMOTE_HASH - EXTRA_VARS - GMX_SOURCE_DOI - GMX_RELEASE_HASH - GMX_SOURCE_HASH - ) -list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE}) - -# Mark some shared GPU implementation files to compile with CUDA if needed -if (GMX_GPU_CUDA) - get_property(CUDA_SOURCES GLOBAL PROPERTY CUDA_SOURCES) - set_source_files_properties(${CUDA_SOURCES} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ) -endif() - -if (GMX_GPU_CUDA) - # Work around FindCUDA that prevents using target_link_libraries() - # with keywords otherwise... - set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES}) - if (NOT GMX_CLANG_CUDA) - gmx_cuda_add_library(libgromacs ${LIBGROMACS_SOURCES}) - else() - add_library(libgromacs ${LIBGROMACS_SOURCES}) - endif() - target_link_libraries(libgromacs PRIVATE ${CUDA_CUFFT_LIBRARIES}) -else() - add_library(libgromacs ${LIBGROMACS_SOURCES}) -endif() - -# Add these contents first because linking their tests can take a lot -# of time, so we want lots of parallel work still available after -# linking starts. -add_subdirectory(fileio) -add_subdirectory(selection) - -# Suppress a warning about our abuse of t_inputrec -gmx_source_file_warning_suppression(mdtypes/inputrec.cpp -Wno-class-memaccess HAS_NO_CLASS_MEMACCESS) - -# Handle the object libraries that contain the source file -# dependencies that need special handling because they are generated -# or external code. -foreach(object_library ${libgromacs_object_library_dependencies}) - if (BUILD_SHARED_LIBS) - set_target_properties(${object_library} PROPERTIES POSITION_INDEPENDENT_CODE true) - endif() - target_include_directories(${object_library} SYSTEM BEFORE PRIVATE ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/include) - - # Add the sources from the object libraries to the main library. - target_sources(libgromacs PRIVATE $) -endforeach() -gmx_target_compile_options(libgromacs) -target_compile_definitions(libgromacs PRIVATE HAVE_CONFIG_H) -target_include_directories(libgromacs SYSTEM BEFORE PRIVATE ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/include) - -if (GMX_GPU_OPENCL) - option(GMX_EXTERNAL_CLFFT "True if an external clFFT is required to be used" FALSE) - mark_as_advanced(GMX_EXTERNAL_CLFFT) - - # Default to using clFFT found on the system - # switch to quiet at the second run. - if (DEFINED clFFT_LIBRARY) - set (clFFT_FIND_QUIETLY TRUE) - endif() - find_package(clFFT) - if (NOT clFFT_FOUND) - if (GMX_EXTERNAL_CLFFT) - message(FATAL_ERROR "Did not find required external clFFT library, consider setting clFFT_ROOT_DIR") - endif() - - if(MSVC) - message(FATAL_ERROR -"An OpenCL build was requested with Visual Studio compiler, but GROMACS -requires clFFT, which was not found on your system. GROMACS does bundle -clFFT to help with building for OpenCL, but that clFFT has not yet been -ported to the more recent versions of that compiler that GROMACS itself -requires. Thus for now, OpenCL is not available with MSVC and the internal -build of clFFT in GROMACS 2019. Either change compiler, try installing -a clFFT package, or use the latest GROMACS 2018 point release.") - endif() - - # Fall back on the internal version - set (_clFFT_dir ../external/clFFT/src) - add_subdirectory(${_clFFT_dir} clFFT-build) - target_sources(libgromacs PRIVATE - $ - ) - target_include_directories(libgromacs SYSTEM PRIVATE ${_clFFT_dir}/include) - # Use the magic variable for how to link any library needed for - # dlopen, etc. which is -ldl where needed, and empty otherwise - # (e.g. Windows, BSD, Mac). - target_link_libraries(libgromacs PRIVATE "${CMAKE_DL_LIBS}") - else() - target_link_libraries(libgromacs PRIVATE clFFT) - endif() -endif() - -# Permit GROMACS code to include externally developed headers, such as -# the functionality from the nonstd project that we use for -# gmx::compat::optional. These are included as system headers so that -# no warnings are issued from them. -# -# TODO Perhaps generalize this for all headers from src/external -target_include_directories(libgromacs SYSTEM PRIVATE ${PROJECT_SOURCE_DIR}/src/external) - -if(SIMD_AVX_512_CXX_SUPPORTED AND NOT ("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_512_KNL")) - # Since we might be overriding -march=core-avx2, add a flag so we don't warn for this specific file. - # On KNL this can cause illegal instruction because the compiler might use non KNL AVX instructions - # with the SIMD_AVX_512_CXX_FLAGS flags. - set_source_files_properties(hardware/identifyavx512fmaunits.cpp PROPERTIES COMPILE_FLAGS "${SIMD_AVX_512_CXX_FLAGS} ${CXX_NO_UNUSED_OPTION_WARNING_FLAGS}") -endif() - -# Do any special handling needed for .cpp files that use -# CUDA runtime headers -if (GMX_GPU_CUDA AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") - # CUDA header cuda_runtime_api.h in at least CUDA 10.1 uses 0 - # where nullptr would be preferable. GROMACS can't fix these, so - # must suppress them. - GMX_TEST_CXXFLAG(CXXFLAGS_NO_ZERO_AS_NULL_POINTER_CONSTANT "-Wno-zero-as-null-pointer-constant" NVCC_CLANG_SUPPRESSIONS_CXXFLAGS) - - foreach(_compile_flag ${NVCC_CLANG_SUPPRESSIONS_CXXFLAGS}) - set(GMX_CUDA_CLANG_FLAGS "${GMX_CUDA_CLANG_FLAGS} ${_compile_flag}") - endforeach() - if (GMX_CLANG_CUDA) - foreach (_file ${LIBGROMACS_SOURCES}) - get_filename_component(_ext ${_file} EXT) - get_source_file_property(_cuda_source_format ${_file} CUDA_SOURCE_PROPERTY_FORMAT) - if ("${_ext}" STREQUAL ".cu" OR _cuda_source_format) - gmx_compile_cuda_file_with_clang(${_file}) - endif() - endforeach() - else() - get_property(CUDA_SOURCES GLOBAL PROPERTY CUDA_SOURCES) - set_source_files_properties(${CUDA_SOURCES} PROPERTIES COMPILE_FLAGS ${GMX_CUDA_CLANG_FLAGS}) - endif() -endif() - -# Only add the -fsycl flag to sources that really need it -if (GMX_GPU_SYCL) - get_property(SYCL_SOURCES GLOBAL PROPERTY SYCL_SOURCES) - set_source_files_properties(${SYCL_SOURCES} PROPERTIES COMPILE_FLAGS "${SYCL_CXX_FLAGS}") -endif() - -gmx_setup_tng_for_libgromacs() - -# We apply the SYCL flag explicitly just for libgromacs, since bugs in the beta versions of -# icpx/dpcpp leads to crashes if we try to link an library without any SYCL code with the -# -fsycl flag enabled. Once that bug is fixed, we should change it to simply add -# SYCL_CXX_FLAGS to GMX_SHARED_LINKER_FLAGS. -target_link_libraries(libgromacs - PRIVATE - ${EXTRAE_LIBRARIES} - ${GMX_EXTRA_LIBRARIES} - ${GMX_COMMON_LIBRARIES} - ${FFT_LIBRARIES} ${LINEAR_ALGEBRA_LIBRARIES} - ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS} - ${SYCL_CXX_FLAGS} - ${OpenCL_LIBRARIES} - $<$:socket> - PUBLIC - ${GMX_PUBLIC_LIBRARIES} - ) -if (GMX_OPENMP) - target_link_libraries(libgromacs PUBLIC OpenMP::OpenMP_CXX) -endif() -set_target_properties(libgromacs PROPERTIES - OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}" - SOVERSION ${LIBRARY_SOVERSION_MAJOR} - VERSION ${LIBRARY_VERSION} - ) - -gmx_manage_lmfit() -target_link_libraries(libgromacs PRIVATE lmfit) - -# Make sure we fix "everything" found by more recent versions of clang. -if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "7") - target_compile_options(libgromacs PRIVATE $<$:-Weverything ${IGNORED_CLANG_ALL_WARNINGS}>) -endif() -if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - target_compile_options(libgromacs PRIVATE $<$:/analyze /analyze:stacksize 70000 - #Control flow warnings are disabled because the commond line output is insufficient. There is no tool - #to convert the xml report to e.g. HTML and even in Visual Studio the viewer doesn't work with cmake support. - /wd6001 #unitialized memory - /wd6011 #derefencing NULL - /wd6053 #prior call not zero-terminate - /wd6054 #might not be zero-terminated - /wd6385 #reading invalid data - /wd6386 #buffer overrun - /wd6387 #could be '0' - /wd28199 #uninitialized memory - # For compile time constant (e.g. templates) the following warnings have flase postives - /wd6239 #( && ) - /wd6240 #( && ) - /wd6294 #Ill-defined for-loop - /wd6326 #comparison of constant with other constant - /wd28020 #expression involving paramter is not true - # Misc - /wd6330 #incorrect type to function (warns for char (instead of unsigned) for isspace/isalpha/isdigit/..)) - /wd6993 #OpenMP ignored - #TODO - /wd6031 #return value ignored (important - mostly warnigns about sscanf) - /wd6244 #hides declaration (known issue - we ingore similar warnings for other compilers) - /wd6246 #hides declaration - > - ) -endif() - -if (GMX_CLANG_TIDY) - set_target_properties(libgromacs PROPERTIES CXX_CLANG_TIDY - "${CLANG_TIDY_EXE};-warnings-as-errors=*") -endif() - -# clang-3.6 warns about a number of issues that are not reported by more modern compilers -# and we know they are not real issues. So we only check that it can compile without error -# but ignore all warnings. -if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION MATCHES "^3\.6") - target_compile_options(libgromacs PRIVATE $<$:-w>) -endif() - -# Only install the library in mdrun-only mode if it is actually necessary -# for the binary -if (NOT GMX_BUILD_MDRUN_ONLY OR BUILD_SHARED_LIBS) - install(TARGETS libgromacs - EXPORT libgromacs - LIBRARY - DESTINATION ${CMAKE_INSTALL_LIBDIR} - COMPONENT libraries - RUNTIME - DESTINATION ${CMAKE_INSTALL_BINDIR} - COMPONENT libraries - ARCHIVE - DESTINATION ${CMAKE_INSTALL_LIBDIR} - COMPONENT libraries - INCLUDES DESTINATION include) - target_compile_definitions(libgromacs PUBLIC $) - # legacy headers use c++17 features, so consumer codes need to use that standard, too - if(GMX_INSTALL_LEGACY_API) - target_compile_features(libgromacs INTERFACE cxx_std_${CMAKE_CXX_STANDARD}) - endif() - add_library(Gromacs::libgromacs ALIAS libgromacs) -endif() - -if (NOT GMX_BUILD_MDRUN_ONLY) - include(InstallLibInfo.cmake) -endif() - -# Technically, the user could want to do this for an OpenCL build -# using the CUDA runtime, but currently there's no reason to want to -# do that. -if (INSTALL_CUDART_LIB) #can be set manual by user - if (GMX_GPU_CUDA) - foreach(CUDA_LIB ${CUDA_LIBRARIES}) - string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB}) - if(IS_CUDART) #libcuda should not be installed - #install also name-links (linker uses those) - file(GLOB CUDA_LIBS ${CUDA_LIB}*) - install(FILES ${CUDA_LIBS} DESTINATION - ${CMAKE_INSTALL_LIBDIR} COMPONENT libraries) - endif() - endforeach() - else() - message(WARNING "INSTALL_CUDART_LIB only makes sense when configuring for CUDA support") - endif() -endif() - -if(GMX_GPU_OPENCL) - # Install the utility headers - file(GLOB OPENCL_INSTALLED_FILES - gpu_utils/vectype_ops.clh - gpu_utils/device_utils.clh - ) - install(FILES ${OPENCL_INSTALLED_FILES} - DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/gpu_utils - COMPONENT libraries) - file(GLOB OPENCL_INSTALLED_FILES - pbcutil/ishift.h - ) - install(FILES ${OPENCL_INSTALLED_FILES} - DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/pbcutil - COMPONENT libraries) - - # Install the NBNXM source and headers - file(GLOB OPENCL_INSTALLED_FILES - nbnxm/constants.h - ) - install(FILES ${OPENCL_INSTALLED_FILES} - DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/nbnxm - COMPONENT libraries) - file(GLOB OPENCL_INSTALLED_FILES - nbnxm/opencl/nbnxm_ocl_kernels.cl - nbnxm/opencl/nbnxm_ocl_kernel.clh - nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh - nbnxm/opencl/nbnxm_ocl_kernels.clh - nbnxm/opencl/nbnxm_ocl_kernels_fastgen.clh - nbnxm/opencl/nbnxm_ocl_kernels_fastgen_add_twincut.clh - nbnxm/opencl/nbnxm_ocl_kernel_utils.clh - nbnxm/opencl/nbnxm_ocl_consts.h - ) - install(FILES ${OPENCL_INSTALLED_FILES} - DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/nbnxm/opencl - COMPONENT libraries) - - # Install the PME source and headers - file(GLOB OPENCL_INSTALLED_FILES - ewald/pme_spread.clh - ewald/pme_solve.clh - ewald/pme_gather.clh - ewald/pme_gpu_calculate_splines.clh - ewald/pme_program.cl - ewald/pme_gpu_types.h - ) - install(FILES ${OPENCL_INSTALLED_FILES} - DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/ewald - COMPONENT libraries) -endif() diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdlib/expanded.cpp b/patches/gromacs-2021.7.diff/src/gromacs/mdlib/expanded.cpp deleted file mode 100644 index 14924afbc1..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdlib/expanded.cpp +++ /dev/null @@ -1,1646 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012-2018, The GROMACS development team. - * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ - -/* PLUMED */ -#include "../../../Plumed.h" -extern int plumedswitch; -extern plumed plumedmain; -/* END PLUMED */ - -#include "gmxpre.h" - -#include "expanded.h" - -#include -#include - -#include - -#include "gromacs/domdec/domdec.h" -#include "gromacs/fileio/confio.h" -#include "gromacs/fileio/gmxfio.h" -#include "gromacs/fileio/xtcio.h" -#include "gromacs/gmxlib/network.h" -#include "gromacs/gmxlib/nrnb.h" -#include "gromacs/listed_forces/disre.h" -#include "gromacs/listed_forces/orires.h" -#include "gromacs/math/functions.h" -#include "gromacs/math/units.h" -#include "gromacs/math/vec.h" -#include "gromacs/mdlib/calcmu.h" -#include "gromacs/mdlib/constr.h" -#include "gromacs/mdlib/force.h" -#include "gromacs/mdlib/update.h" -#include "gromacs/mdtypes/enerdata.h" -#include "gromacs/mdtypes/forcerec.h" -#include "gromacs/mdtypes/inputrec.h" -#include "gromacs/mdtypes/md_enums.h" -#include "gromacs/mdtypes/mdatom.h" -#include "gromacs/mdtypes/state.h" -#include "gromacs/random/threefry.h" -#include "gromacs/random/uniformrealdistribution.h" -#include "gromacs/timing/wallcycle.h" -#include "gromacs/utility/fatalerror.h" -#include "gromacs/utility/gmxmpi.h" -#include "gromacs/utility/logger.h" -#include "gromacs/utility/smalloc.h" - -#include "expanded_internal.h" - -static void init_df_history_weights(df_history_t* dfhist, const t_expanded* expand, int nlim) -{ - int i; - dfhist->wl_delta = expand->init_wl_delta; - for (i = 0; i < nlim; i++) - { - dfhist->sum_weights[i] = expand->init_lambda_weights[i]; - dfhist->sum_dg[i] = expand->init_lambda_weights[i]; - } -} - -/* Eventually should contain all the functions needed to initialize expanded ensemble - before the md loop starts */ -void init_expanded_ensemble(gmx_bool bStateFromCP, const t_inputrec* ir, df_history_t* dfhist, const gmx::MDLogger& mdlog) -{ - if (!bStateFromCP) - { - init_df_history_weights(dfhist, ir->expandedvals, ir->fepvals->n_lambda); - } - if (plumedswitch) - { - if (ir->expandedvals->elamstats == elamstatsNO) - { - // No weight updating was chosen, use PLUMED weights - int plumedVersion=0; - plumed_cmd(plumedmain, "getApiVersion", &plumedVersion); - GMX_RELEASE_ASSERT( - plumedVersion >= 9, - "Please use PLUMED v2.8 or newer to use alchemical metadynamics with expanded ensemble"); - - GMX_LOG(mdlog.info).asParagraph().appendText( - "You requested an expanded ensemble simulation with lmc-stats = no and activated PLUMED.\n" - "As a result, this simulation will use the bias provided by PLUMED and ignore all\n" - "expanded ensemble settings related to weight updates.\n" - "If you want to use lambda weights updated by GROMACS in the expanded ensemble calculation,\n" - "set lmc-stats != no."); - } - else - { - GMX_LOG(mdlog.info).asParagraph().appendText( - "You requested an expanded ensemble simulation with lmc-stats != no and activated PLUMED.\n" - "As a result, this simulation will use lambda weights managed by GROMACS and will not\n" - "explicitly use the PLUMED bias in the expanded ensemble calculation.\n" - "If you want to use the PLUMED bias as lambda weights, set lmc-stats = no."); - } - } -} - -static void GenerateGibbsProbabilities(const real* ene, double* p_k, double* pks, int minfep, int maxfep) -{ - - int i; - real maxene; - - *pks = 0.0; - maxene = ene[minfep]; - /* find the maximum value */ - for (i = minfep; i <= maxfep; i++) - { - if (ene[i] > maxene) - { - maxene = ene[i]; - } - } - /* find the denominator */ - for (i = minfep; i <= maxfep; i++) - { - *pks += std::exp(ene[i] - maxene); - } - /*numerators*/ - for (i = minfep; i <= maxfep; i++) - { - p_k[i] = std::exp(ene[i] - maxene) / *pks; - } -} - -static void -GenerateWeightedGibbsProbabilities(const real* ene, double* p_k, double* pks, int nlim, real* nvals, real delta) -{ - - int i; - real maxene; - real* nene; - *pks = 0.0; - - snew(nene, nlim); - for (i = 0; i < nlim; i++) - { - if (nvals[i] == 0) - { - /* add the delta, since we need to make sure it's greater than zero, and - we need a non-arbitrary number? */ - nene[i] = ene[i] + std::log(nvals[i] + delta); - } - else - { - nene[i] = ene[i] + std::log(nvals[i]); - } - } - - /* find the maximum value */ - maxene = nene[0]; - for (i = 0; i < nlim; i++) - { - if (nene[i] > maxene) - { - maxene = nene[i]; - } - } - - /* subtract off the maximum, avoiding overflow */ - for (i = 0; i < nlim; i++) - { - nene[i] -= maxene; - } - - /* find the denominator */ - for (i = 0; i < nlim; i++) - { - *pks += std::exp(nene[i]); - } - - /*numerators*/ - for (i = 0; i < nlim; i++) - { - p_k[i] = std::exp(nene[i]) / *pks; - } - sfree(nene); -} - -static int FindMinimum(const real* min_metric, int N) -{ - - real min_val; - int min_nval, nval; - - min_nval = 0; - min_val = min_metric[0]; - - for (nval = 0; nval < N; nval++) - { - if (min_metric[nval] < min_val) - { - min_val = min_metric[nval]; - min_nval = nval; - } - } - return min_nval; -} - -static gmx_bool CheckHistogramRatios(int nhisto, const real* histo, real ratio) -{ - - int i; - real nmean; - gmx_bool bIfFlat; - - nmean = 0; - for (i = 0; i < nhisto; i++) - { - nmean += histo[i]; - } - - if (nmean == 0) - { - /* no samples! is bad!*/ - bIfFlat = FALSE; - return bIfFlat; - } - nmean /= static_cast(nhisto); - - bIfFlat = TRUE; - for (i = 0; i < nhisto; i++) - { - /* make sure that all points are in the ratio < x < 1/ratio range */ - if (!((histo[i] / nmean < 1.0 / ratio) && (histo[i] / nmean > ratio))) - { - bIfFlat = FALSE; - break; - } - } - return bIfFlat; -} - -static gmx_bool CheckIfDoneEquilibrating(int nlim, const t_expanded* expand, const df_history_t* dfhist, int64_t step) -{ - - int i, totalsamples; - gmx_bool bDoneEquilibrating = TRUE; - gmx_bool bIfFlat; - - /* If we are doing slow growth to get initial values, we haven't finished equilibrating */ - if (expand->lmc_forced_nstart > 0) - { - for (i = 0; i < nlim; i++) - { - if (dfhist->n_at_lam[i] - < expand->lmc_forced_nstart) /* we are still doing the initial sweep, so we're - definitely not done equilibrating*/ - { - bDoneEquilibrating = FALSE; - break; - } - } - } - else - { - /* assume we have equilibrated the weights, then check to see if any of the conditions are not met */ - bDoneEquilibrating = TRUE; - - /* calculate the total number of samples */ - switch (expand->elmceq) - { - case elmceqNO: - /* We have not equilibrated, and won't, ever. */ - bDoneEquilibrating = FALSE; - break; - case elmceqYES: - /* we have equilibrated -- we're done */ - bDoneEquilibrating = TRUE; - break; - case elmceqSTEPS: - /* first, check if we are equilibrating by steps, if we're still under */ - if (step < expand->equil_steps) - { - bDoneEquilibrating = FALSE; - } - break; - case elmceqSAMPLES: - totalsamples = 0; - for (i = 0; i < nlim; i++) - { - totalsamples += dfhist->n_at_lam[i]; - } - if (totalsamples < expand->equil_samples) - { - bDoneEquilibrating = FALSE; - } - break; - case elmceqNUMATLAM: - for (i = 0; i < nlim; i++) - { - if (dfhist->n_at_lam[i] - < expand->equil_n_at_lam) /* we are still doing the initial sweep, so we're - definitely not done equilibrating*/ - { - bDoneEquilibrating = FALSE; - break; - } - } - break; - case elmceqWLDELTA: - if (EWL(expand->elamstats)) /* This check is in readir as well, but - just to be sure */ - { - if (dfhist->wl_delta > expand->equil_wl_delta) - { - bDoneEquilibrating = FALSE; - } - } - break; - case elmceqRATIO: - /* we can use the flatness as a judge of good weights, as long as - we're not doing minvar, or Wang-Landau. - But turn off for now until we figure out exactly how we do this. - */ - - if (!(EWL(expand->elamstats) || expand->elamstats == elamstatsMINVAR)) - { - /* we want to use flatness -avoiding- the forced-through samples. Plus, we need - to convert to floats for this histogram function. */ - - real* modhisto; - snew(modhisto, nlim); - for (i = 0; i < nlim; i++) - { - modhisto[i] = 1.0 * (dfhist->n_at_lam[i] - expand->lmc_forced_nstart); - } - bIfFlat = CheckHistogramRatios(nlim, modhisto, expand->equil_ratio); - sfree(modhisto); - if (!bIfFlat) - { - bDoneEquilibrating = FALSE; - } - } - break; - default: bDoneEquilibrating = TRUE; break; - } - } - return bDoneEquilibrating; -} - -static gmx_bool UpdateWeights(int nlim, - t_expanded* expand, - df_history_t* dfhist, - int fep_state, - const real* scaled_lamee, - const real* weighted_lamee, - int64_t step) -{ - gmx_bool bSufficientSamples; - real acceptanceWeight; - int i; - int min_nvalm, min_nvalp, maxc; - real omega_m1_0, omega_p1_0; - real zero_sum_weights; - real *omegam_array, *weightsm_array, *omegap_array, *weightsp_array, *varm_array, *varp_array, - *dwp_array, *dwm_array; - real clam_varm, clam_varp, clam_osum, clam_weightsm, clam_weightsp, clam_minvar; - real * lam_variance, *lam_dg; - double* p_k; - double pks = 0; - - /* Future potential todos for this function (see #3848): - * - Update the names in the dhist structure to be clearer. Not done for now since this - * a bugfix update and we are mininizing other code changes. - * - Modularize the code some more. - * - potentially merge with accelerated weight histogram functionality, since it's very similar. - */ - /* if we have equilibrated the expanded ensemble weights, we are not updating them, so exit now */ - if (dfhist->bEquil) - { - return FALSE; - } - - if (CheckIfDoneEquilibrating(nlim, expand, dfhist, step)) - { - dfhist->bEquil = TRUE; - /* zero out the visited states so we know how many equilibrated states we have - from here on out.*/ - for (i = 0; i < nlim; i++) - { - dfhist->n_at_lam[i] = 0; - } - return TRUE; - } - - /* If we reached this far, we have not equilibrated yet, keep on - going resetting the weights */ - - if (EWL(expand->elamstats)) - { - if (expand->elamstats == elamstatsWL) /* Using standard Wang-Landau for weight updates */ - { - dfhist->sum_weights[fep_state] -= dfhist->wl_delta; - dfhist->wl_histo[fep_state] += 1.0; - } - else if (expand->elamstats == elamstatsWWL) - /* Using weighted Wang-Landau for weight updates. - * Very closly equivalent to accelerated weight histogram approach - * applied to expanded ensemble. */ - { - snew(p_k, nlim); - - /* first increment count */ - GenerateGibbsProbabilities(weighted_lamee, p_k, &pks, 0, nlim - 1); - for (i = 0; i < nlim; i++) - { - dfhist->wl_histo[i] += static_cast(p_k[i]); - } - - /* then increment weights (uses count) */ - pks = 0.0; - GenerateWeightedGibbsProbabilities(weighted_lamee, p_k, &pks, nlim, dfhist->wl_histo, - dfhist->wl_delta); - - for (i = 0; i < nlim; i++) - { - dfhist->sum_weights[i] -= dfhist->wl_delta * static_cast(p_k[i]); - } - /* Alternate definition, using logarithms. Shouldn't make very much difference! */ - /* - real di; - for (i=0;iwl_delta*(real)p_k[i]; - dfhist->sum_weights[i] -= log(di); - } - */ - sfree(p_k); - } - - zero_sum_weights = dfhist->sum_weights[0]; - for (i = 0; i < nlim; i++) - { - dfhist->sum_weights[i] -= zero_sum_weights; - } - } - - if (expand->elamstats == elamstatsBARKER || expand->elamstats == elamstatsMETROPOLIS - || expand->elamstats == elamstatsMINVAR) - { - maxc = 2 * expand->c_range + 1; - - snew(lam_dg, nlim); - snew(lam_variance, nlim); - - snew(omegap_array, maxc); - snew(weightsp_array, maxc); - snew(varp_array, maxc); - snew(dwp_array, maxc); - - snew(omegam_array, maxc); - snew(weightsm_array, maxc); - snew(varm_array, maxc); - snew(dwm_array, maxc); - - /* unpack the values of the free energy differences and the - * variance in their estimates between nearby lambdas. We will - * only actually update 2 of these, the state we are currently - * at and the one we end up moving to - */ - - for (i = 0; i < nlim - 1; i++) - { /* only through the second to last */ - lam_dg[i] = dfhist->sum_dg[i + 1] - dfhist->sum_dg[i]; - lam_variance[i] = - gmx::square(dfhist->sum_variance[i + 1]) - gmx::square(dfhist->sum_variance[i]); - } - - /* accumulate running averages of thermodynamic averages for Bennett Acceptance Ratio-based - * estimates of the free energy . - * Rather than peforming self-consistent estimation of the free energies at each step, - * we keep track of an array of possible different free energies (cnvals), - * and we self-consistently choose the best one. The one that leads to a free energy estimate - * that is closest to itself is the best estimate of the free energy. It is essentially a - * parallellized version of self-consistent iteration. maxc is the number of these constants. */ - - for (int nval = 0; nval < maxc; nval++) - { - const real cnval = static_cast(nval - expand->c_range); - - /* Compute acceptance criterion weight to the state below this one for use in averages. - * Note we do not have to have just moved from that state to use this free energy - * estimate; these are essentially "virtual" moves. */ - - if (fep_state > 0) - { - const auto lambdaEnergyDifference = - cnval - (scaled_lamee[fep_state] - scaled_lamee[fep_state - 1]); - acceptanceWeight = - gmx::calculateAcceptanceWeight(expand->elamstats, lambdaEnergyDifference); - dfhist->accum_m[fep_state][nval] += acceptanceWeight; - dfhist->accum_m2[fep_state][nval] += acceptanceWeight * acceptanceWeight; - } - - // Compute acceptance criterion weight to transition to the next state - if (fep_state < nlim - 1) - { - const auto lambdaEnergyDifference = - -cnval + (scaled_lamee[fep_state + 1] - scaled_lamee[fep_state]); - acceptanceWeight = - gmx::calculateAcceptanceWeight(expand->elamstats, lambdaEnergyDifference); - dfhist->accum_p[fep_state][nval] += acceptanceWeight; - dfhist->accum_p2[fep_state][nval] += acceptanceWeight * acceptanceWeight; - } - - /* Determination of Metropolis transition and Barker transition weights */ - - int numObservationsCurrentState = dfhist->n_at_lam[fep_state]; - /* determine the number of observations above and below the current state */ - int numObservationsLowerState = 0; - if (fep_state > 0) - { - numObservationsLowerState = dfhist->n_at_lam[fep_state - 1]; - } - int numObservationsHigherState = 0; - if (fep_state < nlim - 1) - { - numObservationsHigherState = dfhist->n_at_lam[fep_state + 1]; - } - - /* Calculate the biases for each expanded ensemble state that minimize the total - * variance, as implemented in Martinez-Veracoechea and Escobedo, - * J. Phys. Chem. B 2008, 112, 8120-8128 - * - * The variance associated with the free energy estimate between two states i and j - * is calculated as - * Var(i,j) = {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} / numObservations(i->j) - * + {avg[xi(j->i)^2] / avg[xi(j->i)]^2 - 1} / numObservations(j->i) - * where xi(i->j) is the acceptance factor / weight associated with moving from state i to j - * As we are calculating the acceptance factor to the neighbors every time we're visiting - * a state, numObservations(i->j) == numObservations(i) and numObservations(j->i) == numObservations(j) - */ - - /* Accumulation of acceptance weight averages between the current state and the - * states +1 (p1) and -1 (m1), averaged at current state (0) - */ - real avgAcceptanceCurrentToLower = 0; - real avgAcceptanceCurrentToHigher = 0; - /* Accumulation of acceptance weight averages quantities between states 0 - * and states +1 and -1, squared - */ - real avgAcceptanceCurrentToLowerSquared = 0; - real avgAcceptanceCurrentToHigherSquared = 0; - /* Accumulation of free energy quantities from lower state (m1) to current state (0) and squared */ - real avgAcceptanceLowerToCurrent = 0; - real avgAcceptanceLowerToCurrentSquared = 0; - /* Accumulation of free energy quantities from upper state (p1) to current state (0) and squared */ - real avgAcceptanceHigherToCurrent = 0; - real avgAcceptanceHigherToCurrentSquared = 0; - - if (numObservationsCurrentState > 0) - { - avgAcceptanceCurrentToLower = dfhist->accum_m[fep_state][nval] / numObservationsCurrentState; - avgAcceptanceCurrentToHigher = - dfhist->accum_p[fep_state][nval] / numObservationsCurrentState; - avgAcceptanceCurrentToLowerSquared = - dfhist->accum_m2[fep_state][nval] / numObservationsCurrentState; - avgAcceptanceCurrentToHigherSquared = - dfhist->accum_p2[fep_state][nval] / numObservationsCurrentState; - } - - if ((fep_state > 0) && (numObservationsLowerState > 0)) - { - avgAcceptanceLowerToCurrent = - dfhist->accum_p[fep_state - 1][nval] / numObservationsLowerState; - avgAcceptanceLowerToCurrentSquared = - dfhist->accum_p2[fep_state - 1][nval] / numObservationsLowerState; - } - - if ((fep_state < nlim - 1) && (numObservationsHigherState > 0)) - { - avgAcceptanceHigherToCurrent = - dfhist->accum_m[fep_state + 1][nval] / numObservationsHigherState; - avgAcceptanceHigherToCurrentSquared = - dfhist->accum_m2[fep_state + 1][nval] / numObservationsHigherState; - } - /* These are accumulation of positive values (see definition of acceptance functions - * above), or of squares of positive values. - * We're taking this for granted in the following calculation, so make sure - * here that nothing weird happened. Although technically all values should be positive, - * because of floating point precisions, they might be numerically zero. */ - GMX_RELEASE_ASSERT( - avgAcceptanceCurrentToLower >= 0 && avgAcceptanceCurrentToLowerSquared >= 0 - && avgAcceptanceCurrentToHigher >= 0 - && avgAcceptanceCurrentToHigherSquared >= 0 && avgAcceptanceLowerToCurrent >= 0 - && avgAcceptanceLowerToCurrentSquared >= 0 && avgAcceptanceHigherToCurrent >= 0 - && avgAcceptanceHigherToCurrentSquared >= 0, - "By definition, the acceptance factors should all be nonnegative."); - - real varianceCurrentToLower = 0; - real varianceCurrentToHigher = 0; - real weightDifferenceToLower = 0; - real weightDifferenceToHigher = 0; - real varianceToLower = 0; - real varianceToHigher = 0; - - if (fep_state > 0) - { - if (numObservationsCurrentState > 0) - { - /* Calculate {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} - * - * Note that if avg[xi(i->j)] == 0, also avg[xi(i->j)^2] == 0 (since the - * acceptances are all positive!), and hence - * {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} -> 0 for avg[xi(i->j)] -> 0 - * We're catching that case explicitly to avoid numerical - * problems dividing by zero when the overlap between states is small (#3304) - */ - if (avgAcceptanceCurrentToLower > 0) - { - varianceCurrentToLower = - avgAcceptanceCurrentToLowerSquared - / (avgAcceptanceCurrentToLower * avgAcceptanceCurrentToLower) - - 1.0; - } - if (numObservationsLowerState > 0) - { - /* Calculate {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} - * - * Note that if avg[xi(i->j)] == 0, also avg[xi(i->j)^2] == 0 (since the - * acceptances are all positive!), and hence - * {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} -> 0 for avg[xi(i->j)] -> 0 - * We're catching that case explicitly to avoid numerical - * problems dividing by zero when the overlap between states is small (#3304) - */ - real varianceLowerToCurrent = 0; - if (avgAcceptanceLowerToCurrent > 0) - { - varianceLowerToCurrent = - avgAcceptanceLowerToCurrentSquared - / (avgAcceptanceLowerToCurrent * avgAcceptanceLowerToCurrent) - - 1.0; - } - /* Free energy difference to the state one state lower */ - /* if these either of these quantities are zero, the energies are */ - /* way too large for the dynamic range. We need an alternate guesstimate */ - if ((avgAcceptanceCurrentToLower == 0) || (avgAcceptanceLowerToCurrent == 0)) - { - weightDifferenceToLower = - (scaled_lamee[fep_state] - scaled_lamee[fep_state - 1]); - } - else - { - weightDifferenceToLower = (std::log(avgAcceptanceCurrentToLower) - - std::log(avgAcceptanceLowerToCurrent)) - + cnval; - } - /* Variance of the free energy difference to the one state lower */ - varianceToLower = - (1.0 / numObservationsCurrentState) * (varianceCurrentToLower) - + (1.0 / numObservationsLowerState) * (varianceLowerToCurrent); - } - } - } - - if (fep_state < nlim - 1) - { - if (numObservationsCurrentState > 0) - { - /* Calculate {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} - * - * Note that if avg[xi(i->j)] == 0, also avg[xi(i->j)^2] == 0 (since the - * acceptances are all positive!), and hence - * {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} -> 0 for avg[xi(i->j)] -> 0 - * We're catching that case explicitly to avoid numerical - * problems dividing by zero when the overlap between states is small (#3304) - */ - - if (avgAcceptanceCurrentToHigher < 0) - { - varianceCurrentToHigher = - avgAcceptanceCurrentToHigherSquared - / (avgAcceptanceCurrentToHigher * avgAcceptanceCurrentToHigher) - - 1.0; - } - if (numObservationsHigherState > 0) - { - /* Calculate {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} - * - * Note that if avg[xi(i->j)] == 0, also avg[xi(i->j)^2] == 0 (since the - * acceptances are all positive!), and hence - * {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} -> 0 for avg[xi(i->j)] -> 0 - * We're catching that case explicitly to avoid numerical - * problems dividing by zero when the overlap between states is small (#3304) - */ - real varianceHigherToCurrent = 0; - if (avgAcceptanceHigherToCurrent > 0) - { - varianceHigherToCurrent = - avgAcceptanceHigherToCurrentSquared - / (avgAcceptanceHigherToCurrent * avgAcceptanceHigherToCurrent) - - 1.0; - } - /* Free energy difference to the state one state higher */ - /* if these either of these quantities are zero, the energies are */ - /* way too large for the dynamic range. We need an alternate guesstimate */ - if ((avgAcceptanceHigherToCurrent == 0) || (avgAcceptanceCurrentToHigher == 0)) - { - weightDifferenceToHigher = - (scaled_lamee[fep_state + 1] - scaled_lamee[fep_state]); - } - else - { - weightDifferenceToHigher = (std::log(avgAcceptanceHigherToCurrent) - - std::log(avgAcceptanceCurrentToHigher)) - + cnval; - } - /* Variance of the free energy difference to the one state higher */ - varianceToHigher = - (1.0 / numObservationsHigherState) * (varianceHigherToCurrent) - + (1.0 / numObservationsCurrentState) * (varianceCurrentToHigher); - } - } - } - - if (numObservationsCurrentState > 0) - { - omegam_array[nval] = varianceCurrentToLower; - } - else - { - omegam_array[nval] = 0; - } - weightsm_array[nval] = weightDifferenceToLower; - varm_array[nval] = varianceToLower; - if (numObservationsLowerState > 0) - { - dwm_array[nval] = - fabs((cnval + std::log((1.0 * numObservationsCurrentState) / numObservationsLowerState)) - - lam_dg[fep_state - 1]); - } - else - { - dwm_array[nval] = std::fabs(cnval - lam_dg[fep_state - 1]); - } - - if (numObservationsCurrentState > 0) - { - omegap_array[nval] = varianceCurrentToHigher; - } - else - { - omegap_array[nval] = 0; - } - weightsp_array[nval] = weightDifferenceToHigher; - varp_array[nval] = varianceToHigher; - if ((numObservationsHigherState > 0) && (numObservationsCurrentState > 0)) - { - dwp_array[nval] = - fabs((cnval + std::log((1.0 * numObservationsHigherState) / numObservationsCurrentState)) - - lam_dg[fep_state]); - } - else - { - dwp_array[nval] = std::fabs(cnval - lam_dg[fep_state]); - } - } - - /* find the free energy estimate closest to the guessed weight's value */ - - min_nvalm = FindMinimum(dwm_array, maxc); - omega_m1_0 = omegam_array[min_nvalm]; - clam_weightsm = weightsm_array[min_nvalm]; - clam_varm = varm_array[min_nvalm]; - - min_nvalp = FindMinimum(dwp_array, maxc); - omega_p1_0 = omegap_array[min_nvalp]; - clam_weightsp = weightsp_array[min_nvalp]; - clam_varp = varp_array[min_nvalp]; - - clam_osum = omega_m1_0 + omega_p1_0; - clam_minvar = 0; - if (clam_osum > 0) - { - clam_minvar = 0.5 * std::log(clam_osum); - } - - if (fep_state > 0) - { - lam_dg[fep_state - 1] = clam_weightsm; - lam_variance[fep_state - 1] = clam_varm; - } - - if (fep_state < nlim - 1) - { - lam_dg[fep_state] = clam_weightsp; - lam_variance[fep_state] = clam_varp; - } - - if (expand->elamstats == elamstatsMINVAR) - { - bSufficientSamples = TRUE; - /* make sure the number of samples in each state are all - * past a user-specified threshold - */ - for (i = 0; i < nlim; i++) - { - if (dfhist->n_at_lam[i] < expand->minvarmin) - { - bSufficientSamples = FALSE; - } - } - if (bSufficientSamples) - { - dfhist->sum_minvar[fep_state] = clam_minvar; - if (fep_state == 0) - { - for (i = 0; i < nlim; i++) - { - dfhist->sum_minvar[i] += (expand->minvar_const - clam_minvar); - } - expand->minvar_const = clam_minvar; - dfhist->sum_minvar[fep_state] = 0.0; - } - else - { - dfhist->sum_minvar[fep_state] -= expand->minvar_const; - } - } - } - - /* we need to rezero minvar now, since it could change at fep_state = 0 */ - dfhist->sum_dg[0] = 0.0; - dfhist->sum_variance[0] = 0.0; - dfhist->sum_weights[0] = dfhist->sum_dg[0] + dfhist->sum_minvar[0]; /* should be zero */ - - for (i = 1; i < nlim; i++) - { - dfhist->sum_dg[i] = lam_dg[i - 1] + dfhist->sum_dg[i - 1]; - dfhist->sum_variance[i] = - std::sqrt(lam_variance[i - 1] + gmx::square(dfhist->sum_variance[i - 1])); - dfhist->sum_weights[i] = dfhist->sum_dg[i] + dfhist->sum_minvar[i]; - } - - sfree(lam_dg); - sfree(lam_variance); - - sfree(omegam_array); - sfree(weightsm_array); - sfree(varm_array); - sfree(dwm_array); - - sfree(omegap_array); - sfree(weightsp_array); - sfree(varp_array); - sfree(dwp_array); - } - return FALSE; -} - -static int ChooseNewLambda(int nlim, - const t_expanded* expand, - df_history_t* dfhist, - int fep_state, - const real* weighted_lamee, - double* p_k, - int64_t seed, - int64_t step) -{ - /* Choose new lambda value, and update transition matrix */ - - int i, ifep, minfep, maxfep, lamnew, lamtrial, starting_fep_state; - real r1, r2, de, trialprob, tprob = 0; - double * propose, *accept, *remainder; - double pks; - real pnorm; - gmx::ThreeFry2x64<0> rng( - seed, gmx::RandomDomain::ExpandedEnsemble); // We only draw once, so zero bits internal counter is fine - gmx::UniformRealDistribution dist; - - starting_fep_state = fep_state; - lamnew = fep_state; /* so that there is a default setting -- stays the same */ - - // Don't equilibrate weights when using Plumed - if (!plumedswitch || expand->elamstats != elamstatsNO) - { - if (!EWL(expand->elamstats)) /* ignore equilibrating the weights if using WL */ - { - if ((expand->lmc_forced_nstart > 0) && (dfhist->n_at_lam[nlim - 1] <= expand->lmc_forced_nstart)) - { - /* Use a marching method to run through the lambdas and get preliminary free energy data, - before starting 'free' sampling. We start free sampling when we have enough at each lambda */ - - /* if we have enough at this lambda, move on to the next one */ - - if (dfhist->n_at_lam[fep_state] == expand->lmc_forced_nstart) - { - lamnew = fep_state + 1; - if (lamnew == nlim) /* whoops, stepped too far! */ - { - lamnew -= 1; - } - } - else - { - lamnew = fep_state; - } - return lamnew; - } - } - } - - snew(propose, nlim); - snew(accept, nlim); - snew(remainder, nlim); - - for (i = 0; i < expand->lmc_repeats; i++) - { - rng.restart(step, i); - dist.reset(); - - for (ifep = 0; ifep < nlim; ifep++) - { - propose[ifep] = 0; - accept[ifep] = 0; - } - - if ((expand->elmcmove == elmcmoveGIBBS) || (expand->elmcmove == elmcmoveMETGIBBS)) - { - /* use the Gibbs sampler, with restricted range */ - if (expand->gibbsdeltalam < 0) - { - minfep = 0; - maxfep = nlim - 1; - } - else - { - minfep = fep_state - expand->gibbsdeltalam; - maxfep = fep_state + expand->gibbsdeltalam; - if (minfep < 0) - { - minfep = 0; - } - if (maxfep > nlim - 1) - { - maxfep = nlim - 1; - } - } - - GenerateGibbsProbabilities(weighted_lamee, p_k, &pks, minfep, maxfep); - - if (expand->elmcmove == elmcmoveGIBBS) - { - for (ifep = minfep; ifep <= maxfep; ifep++) - { - propose[ifep] = p_k[ifep]; - accept[ifep] = 1.0; - } - /* Gibbs sampling */ - r1 = dist(rng); - for (lamnew = minfep; lamnew <= maxfep; lamnew++) - { - if (r1 <= p_k[lamnew]) - { - break; - } - r1 -= p_k[lamnew]; - } - } - else if (expand->elmcmove == elmcmoveMETGIBBS) - { - - /* Metropolized Gibbs sampling */ - for (ifep = minfep; ifep <= maxfep; ifep++) - { - remainder[ifep] = 1 - p_k[ifep]; - } - - /* find the proposal probabilities */ - - if (remainder[fep_state] == 0) - { - /* only the current state has any probability */ - /* we have to stay at the current state */ - lamnew = fep_state; - } - else - { - for (ifep = minfep; ifep <= maxfep; ifep++) - { - if (ifep != fep_state) - { - propose[ifep] = p_k[ifep] / remainder[fep_state]; - } - else - { - propose[ifep] = 0; - } - } - - r1 = dist(rng); - for (lamtrial = minfep; lamtrial <= maxfep; lamtrial++) - { - pnorm = p_k[lamtrial] / remainder[fep_state]; - if (lamtrial != fep_state) - { - if (r1 <= pnorm) - { - break; - } - r1 -= pnorm; - } - } - - /* we have now selected lamtrial according to p(lamtrial)/1-p(fep_state) */ - tprob = 1.0; - /* trial probability is min{1,\frac{1 - p(old)}{1-p(new)} MRS 1/8/2008 */ - trialprob = (remainder[fep_state]) / (remainder[lamtrial]); - if (trialprob < tprob) - { - tprob = trialprob; - } - r2 = dist(rng); - if (r2 < tprob) - { - lamnew = lamtrial; - } - else - { - lamnew = fep_state; - } - } - - /* now figure out the acceptance probability for each */ - for (ifep = minfep; ifep <= maxfep; ifep++) - { - tprob = 1.0; - if (remainder[ifep] != 0) - { - trialprob = (remainder[fep_state]) / (remainder[ifep]); - } - else - { - trialprob = 1.0; /* this state is the only choice! */ - } - if (trialprob < tprob) - { - tprob = trialprob; - } - /* probability for fep_state=0, but that's fine, it's never proposed! */ - accept[ifep] = tprob; - } - } - - if (lamnew > maxfep) - { - /* it's possible some rounding is failing */ - if (gmx_within_tol(remainder[fep_state], 0, 50 * GMX_DOUBLE_EPS)) - { - /* numerical rounding error -- no state other than the original has weight */ - lamnew = fep_state; - } - else - { - /* probably not a numerical issue */ - int loc = 0; - int nerror = 200 + (maxfep - minfep + 1) * 60; - char* errorstr; - snew(errorstr, nerror); - /* if its greater than maxfep, then something went wrong -- probably underflow - in the calculation of sum weights. Generated detailed info for failure */ - loc += sprintf( - errorstr, - "Something wrong in choosing new lambda state with a Gibbs move -- " - "probably underflow in weight determination.\nDenominator is: " - "%3d%17.10e\n i dE numerator weights\n", - 0, pks); - for (ifep = minfep; ifep <= maxfep; ifep++) - { - loc += sprintf(&errorstr[loc], "%3d %17.10e%17.10e%17.10e\n", ifep, - weighted_lamee[ifep], p_k[ifep], dfhist->sum_weights[ifep]); - } - gmx_fatal(FARGS, "%s", errorstr); - } - } - } - else if ((expand->elmcmove == elmcmoveMETROPOLIS) || (expand->elmcmove == elmcmoveBARKER)) - { - /* use the metropolis sampler with trial +/- 1 */ - r1 = dist(rng); - if (r1 < 0.5) - { - if (fep_state == 0) - { - lamtrial = fep_state; - } - else - { - lamtrial = fep_state - 1; - } - } - else - { - if (fep_state == nlim - 1) - { - lamtrial = fep_state; - } - else - { - lamtrial = fep_state + 1; - } - } - - de = weighted_lamee[lamtrial] - weighted_lamee[fep_state]; - if (expand->elmcmove == elmcmoveMETROPOLIS) - { - tprob = 1.0; - if (de < 0) - { - tprob = std::exp(de); - } - propose[fep_state] = 0; - propose[lamtrial] = 1.0; /* note that this overwrites the above line if fep_state = ntrial, which only occurs at the ends */ - accept[fep_state] = - 1.0; /* doesn't actually matter, never proposed unless fep_state = ntrial, in which case it's 1.0 anyway */ - accept[lamtrial] = tprob; - } - else if (expand->elmcmove == elmcmoveBARKER) - { - if (de > 0) /* Numerically stable version */ - { - tprob = 1.0 / (1.0 + std::exp(-de)); - } - else if (de < 0) - { - tprob = std::exp(de) / (std::exp(de) + 1.0); - } - propose[fep_state] = (1 - tprob); - propose[lamtrial] += - tprob; /* we add, to account for the fact that at the end, they might be the same point */ - accept[fep_state] = 1.0; - accept[lamtrial] = 1.0; - } - - r2 = dist(rng); - if (r2 < tprob) - { - lamnew = lamtrial; - } - else - { - lamnew = fep_state; - } - } - - for (ifep = 0; ifep < nlim; ifep++) - { - dfhist->Tij[fep_state][ifep] += propose[ifep] * accept[ifep]; - dfhist->Tij[fep_state][fep_state] += propose[ifep] * (1.0 - accept[ifep]); - } - fep_state = lamnew; - } - - dfhist->Tij_empirical[starting_fep_state][lamnew] += 1.0; - - sfree(propose); - sfree(accept); - sfree(remainder); - - return lamnew; -} - -/* print out the weights to the log, along with current state */ -void PrintFreeEnergyInfoToFile(FILE* outfile, - const t_lambda* fep, - const t_expanded* expand, - const t_simtemp* simtemp, - const df_history_t* dfhist, - int fep_state, - int frequency, - int64_t step) -{ - int nlim, i, ifep, jfep; - real dw, dg, dv, Tprint; - const char* print_names[efptNR] = { " FEPL", "MassL", "CoulL", " VdwL", - "BondL", "RestT", "Temp.(K)" }; - gmx_bool bSimTemp = FALSE; - - nlim = fep->n_lambda; - if (simtemp != nullptr) - { - bSimTemp = TRUE; - } - - if (step % frequency == 0) - { - fprintf(outfile, " MC-lambda information\n"); - if (EWL(expand->elamstats) && (!(dfhist->bEquil))) - { - fprintf(outfile, " Wang-Landau incrementor is: %11.5g\n", dfhist->wl_delta); - } - fprintf(outfile, " N"); - for (i = 0; i < efptNR; i++) - { - if (fep->separate_dvdl[i]) - { - fprintf(outfile, "%7s", print_names[i]); - } - else if ((i == efptTEMPERATURE) && bSimTemp) - { - fprintf(outfile, "%10s", print_names[i]); /* more space for temperature formats */ - } - } - fprintf(outfile, " Count "); - if (expand->elamstats == elamstatsMINVAR) - { - fprintf(outfile, "W(in kT) G(in kT) dG(in kT) dV(in kT)\n"); - } - else - { - fprintf(outfile, "G(in kT) dG(in kT)\n"); - } - for (ifep = 0; ifep < nlim; ifep++) - { - if (ifep == nlim - 1) - { - dw = 0.0; - dg = 0.0; - dv = 0.0; - } - else - { - dw = dfhist->sum_weights[ifep + 1] - dfhist->sum_weights[ifep]; - dg = dfhist->sum_dg[ifep + 1] - dfhist->sum_dg[ifep]; - dv = std::sqrt(gmx::square(dfhist->sum_variance[ifep + 1]) - - gmx::square(dfhist->sum_variance[ifep])); - } - fprintf(outfile, "%3d", (ifep + 1)); - for (i = 0; i < efptNR; i++) - { - if (fep->separate_dvdl[i]) - { - fprintf(outfile, "%7.3f", fep->all_lambda[i][ifep]); - } - else if (i == efptTEMPERATURE && bSimTemp) - { - fprintf(outfile, "%9.3f", simtemp->temperatures[ifep]); - } - } - if (EWL(expand->elamstats) - && (!(dfhist->bEquil))) /* if performing WL and still haven't equilibrated */ - { - if (expand->elamstats == elamstatsWL) - { - fprintf(outfile, " %8d", static_cast(dfhist->wl_histo[ifep])); - } - else - { - fprintf(outfile, " %8.3f", dfhist->wl_histo[ifep]); - } - } - else /* we have equilibrated weights */ - { - fprintf(outfile, " %8d", dfhist->n_at_lam[ifep]); - } - if (expand->elamstats == elamstatsMINVAR) - { - fprintf(outfile, " %10.5f %10.5f %10.5f %10.5f", dfhist->sum_weights[ifep], - dfhist->sum_dg[ifep], dg, dv); - } - else - { - fprintf(outfile, " %10.5f %10.5f", dfhist->sum_weights[ifep], dw); - } - if (ifep == fep_state) - { - fprintf(outfile, " <<\n"); - } - else - { - fprintf(outfile, " \n"); - } - } - fprintf(outfile, "\n"); - - if ((step % expand->nstTij == 0) && (expand->nstTij > 0) && (step > 0)) - { - fprintf(outfile, " Transition Matrix\n"); - for (ifep = 0; ifep < nlim; ifep++) - { - fprintf(outfile, "%12d", (ifep + 1)); - } - fprintf(outfile, "\n"); - for (ifep = 0; ifep < nlim; ifep++) - { - for (jfep = 0; jfep < nlim; jfep++) - { - if (dfhist->n_at_lam[ifep] > 0) - { - if (expand->bSymmetrizedTMatrix) - { - Tprint = (dfhist->Tij[ifep][jfep] + dfhist->Tij[jfep][ifep]) - / (dfhist->n_at_lam[ifep] + dfhist->n_at_lam[jfep]); - } - else - { - Tprint = (dfhist->Tij[ifep][jfep]) / (dfhist->n_at_lam[ifep]); - } - } - else - { - Tprint = 0.0; - } - fprintf(outfile, "%12.8f", Tprint); - } - fprintf(outfile, "%3d\n", (ifep + 1)); - } - - fprintf(outfile, " Empirical Transition Matrix\n"); - for (ifep = 0; ifep < nlim; ifep++) - { - fprintf(outfile, "%12d", (ifep + 1)); - } - fprintf(outfile, "\n"); - for (ifep = 0; ifep < nlim; ifep++) - { - for (jfep = 0; jfep < nlim; jfep++) - { - if (dfhist->n_at_lam[ifep] > 0) - { - if (expand->bSymmetrizedTMatrix) - { - Tprint = (dfhist->Tij_empirical[ifep][jfep] + dfhist->Tij_empirical[jfep][ifep]) - / (dfhist->n_at_lam[ifep] + dfhist->n_at_lam[jfep]); - } - else - { - Tprint = dfhist->Tij_empirical[ifep][jfep] / (dfhist->n_at_lam[ifep]); - } - } - else - { - Tprint = 0.0; - } - fprintf(outfile, "%12.8f", Tprint); - } - fprintf(outfile, "%3d\n", (ifep + 1)); - } - } - } -} - -int ExpandedEnsembleDynamics(FILE* log, - const t_inputrec* ir, - const gmx_enerdata_t* enerd, - t_state* state, - t_extmass* MassQ, - int fep_state, - df_history_t* dfhist, - int64_t step, - rvec* v, - const t_mdatoms* mdatoms, - real* realFepState) -/* Note that the state variable is only needed for simulated tempering, not - Hamiltonian expanded ensemble. May be able to remove it after integrator refactoring. */ -{ - real * pfep_lamee, *scaled_lamee, *weighted_lamee; - double* p_k; - int i, nlim, lamnew, totalsamples; - real oneovert, maxscaled = 0, maxweighted = 0; - t_expanded* expand; - t_simtemp* simtemp; - gmx_bool bIfReset, bSwitchtoOneOverT, bDoneEquilibrating = FALSE; - - expand = ir->expandedvals; - simtemp = ir->simtempvals; - nlim = ir->fepvals->n_lambda; - - snew(scaled_lamee, nlim); - snew(weighted_lamee, nlim); - snew(pfep_lamee, nlim); - snew(p_k, nlim); - - /* update the count at the current lambda*/ - dfhist->n_at_lam[fep_state]++; - - /* need to calculate the PV term somewhere, but not needed here? Not until there's a lambda - state that's pressure controlled.*/ - /* - pVTerm = 0; - where does this PV term go? - for (i=0;iefep != efepNO) - { - for (i = 0; i < nlim; i++) - { - if (ir->bSimTemp) - { - /* Note -- this assumes no mass changes, since kinetic energy is not added . . . */ - scaled_lamee[i] = enerd->foreignLambdaTerms.deltaH(i) / (simtemp->temperatures[i] * BOLTZ) - + enerd->term[F_EPOT] - * (1.0 / (simtemp->temperatures[i]) - - 1.0 / (simtemp->temperatures[fep_state])) - / BOLTZ; - } - else - { - scaled_lamee[i] = enerd->foreignLambdaTerms.deltaH(i) / (expand->mc_temp * BOLTZ); - /* mc_temp is currently set to the system reft unless otherwise defined */ - } - - /* save these energies for printing, so they don't get overwritten by the next step */ - /* they aren't overwritten in the non-free energy case, but we always print with these - for simplicity */ - } - } - else - { - if (ir->bSimTemp) - { - for (i = 0; i < nlim; i++) - { - scaled_lamee[i] = - enerd->term[F_EPOT] - * (1.0 / simtemp->temperatures[i] - 1.0 / simtemp->temperatures[fep_state]) / BOLTZ; - } - } - } - - for (i = 0; i < nlim; i++) - { - pfep_lamee[i] = scaled_lamee[i]; - - weighted_lamee[i] = dfhist->sum_weights[i] - scaled_lamee[i]; - if (i == 0) - { - maxscaled = scaled_lamee[i]; - maxweighted = weighted_lamee[i]; - } - else - { - if (scaled_lamee[i] > maxscaled) - { - maxscaled = scaled_lamee[i]; - } - if (weighted_lamee[i] > maxweighted) - { - maxweighted = weighted_lamee[i]; - } - } - } - - for (i = 0; i < nlim; i++) - { - scaled_lamee[i] -= maxscaled; - weighted_lamee[i] -= maxweighted; - } - - if (plumedswitch && expand->elamstats == elamstatsNO) - { - // Update weights at all lambda states with current values from Plumed. - // For acceptance criterion, expanded ensemble is expecting the weight at - // lambda i=0 to be zero. - real zeroBias = 0; - for (i = 0; i < nlim; i++) - { - *realFepState = i; - real bias = 0; - plumed_cmd(plumedmain, "prepareCalc", nullptr); - plumed_cmd(plumedmain, "performCalcNoForces", nullptr); - plumed_cmd(plumedmain, "getBias", &bias); - bias /= expand->mc_temp * BOLTZ; - if (i == 0) - { - zeroBias = bias; - } - dfhist->sum_weights[i] = -bias + zeroBias; - } - *realFepState = fep_state; - } - else // Don't update weights using different method when Plumed is active - { - /* update weights - we decide whether or not to actually do this inside */ - - bDoneEquilibrating = - UpdateWeights(nlim, expand, dfhist, fep_state, scaled_lamee, weighted_lamee, step); - if (bDoneEquilibrating) - { - if (log) - { - fprintf(log, "\nStep %" PRId64 ": Weights have equilibrated, using criteria: %s\n", - step, elmceq_names[expand->elmceq]); - } - } - } - - // Accept / reject is handled by GROMACS (possibly with Plumed weights). - lamnew = ChooseNewLambda(nlim, expand, dfhist, fep_state, weighted_lamee, p_k, - ir->expandedvals->lmc_seed, step); - /* if using simulated tempering, we need to adjust the temperatures */ - if (ir->bSimTemp && (lamnew != fep_state)) /* only need to change the temperatures if we change the state */ - { - int i, j, n, d; - real* buf_ngtc; - real told; - int nstart, nend, gt; - - snew(buf_ngtc, ir->opts.ngtc); - - for (i = 0; i < ir->opts.ngtc; i++) - { - if (ir->opts.ref_t[i] > 0) - { - told = ir->opts.ref_t[i]; - ir->opts.ref_t[i] = simtemp->temperatures[lamnew]; - buf_ngtc[i] = std::sqrt(ir->opts.ref_t[i] / told); /* using the buffer as temperature scaling */ - } - } - - /* we don't need to manipulate the ekind information, as it isn't due to be reset until the next step anyway */ - - nstart = 0; - nend = mdatoms->homenr; - for (n = nstart; n < nend; n++) - { - gt = 0; - if (mdatoms->cTC) - { - gt = mdatoms->cTC[n]; - } - for (d = 0; d < DIM; d++) - { - v[n][d] *= buf_ngtc[gt]; - } - } - - if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)) - { - /* we need to recalculate the masses if the temperature has changed */ - init_npt_masses(ir, state, MassQ, FALSE); - for (i = 0; i < state->nnhpres; i++) - { - for (j = 0; j < ir->opts.nhchainlength; j++) - { - state->nhpres_vxi[i + j] *= buf_ngtc[i]; - } - } - for (i = 0; i < ir->opts.ngtc; i++) - { - for (j = 0; j < ir->opts.nhchainlength; j++) - { - state->nosehoover_vxi[i + j] *= buf_ngtc[i]; - } - } - } - sfree(buf_ngtc); - } - - /* now check on the Wang-Landau updating critera */ - - if (EWL(expand->elamstats)) - { - bSwitchtoOneOverT = FALSE; - if (expand->bWLoneovert) - { - totalsamples = 0; - for (i = 0; i < nlim; i++) - { - totalsamples += dfhist->n_at_lam[i]; - } - oneovert = (1.0 * nlim) / totalsamples; - /* oneovert has decreasd by a bit since last time, so we actually make sure its within one of this number */ - /* switch to 1/t incrementing when wl_delta has decreased at least once, and wl_delta is now less than 1/t */ - if ((dfhist->wl_delta <= ((totalsamples) / (totalsamples - 1.00001)) * oneovert) - && (dfhist->wl_delta < expand->init_wl_delta)) - { - bSwitchtoOneOverT = TRUE; - } - } - if (bSwitchtoOneOverT) - { - dfhist->wl_delta = - oneovert; /* now we reduce by this each time, instead of only at flatness */ - } - else - { - bIfReset = CheckHistogramRatios(nlim, dfhist->wl_histo, expand->wl_ratio); - if (bIfReset) - { - for (i = 0; i < nlim; i++) - { - dfhist->wl_histo[i] = 0; - } - dfhist->wl_delta *= expand->wl_scale; - if (log) - { - fprintf(log, "\nStep %d: weights are now:", static_cast(step)); - for (i = 0; i < nlim; i++) - { - fprintf(log, " %.5f", dfhist->sum_weights[i]); - } - fprintf(log, "\n"); - } - } - } - } - sfree(pfep_lamee); - sfree(scaled_lamee); - sfree(weighted_lamee); - sfree(p_k); - - return lamnew; -} diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdlib/expanded.cpp.preplumed b/patches/gromacs-2021.7.diff/src/gromacs/mdlib/expanded.cpp.preplumed deleted file mode 100644 index d48016a4d6..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdlib/expanded.cpp.preplumed +++ /dev/null @@ -1,1580 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012-2018, The GROMACS development team. - * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -#include "gmxpre.h" - -#include "expanded.h" - -#include -#include - -#include - -#include "gromacs/domdec/domdec.h" -#include "gromacs/fileio/confio.h" -#include "gromacs/fileio/gmxfio.h" -#include "gromacs/fileio/xtcio.h" -#include "gromacs/gmxlib/network.h" -#include "gromacs/gmxlib/nrnb.h" -#include "gromacs/listed_forces/disre.h" -#include "gromacs/listed_forces/orires.h" -#include "gromacs/math/functions.h" -#include "gromacs/math/units.h" -#include "gromacs/math/vec.h" -#include "gromacs/mdlib/calcmu.h" -#include "gromacs/mdlib/constr.h" -#include "gromacs/mdlib/force.h" -#include "gromacs/mdlib/update.h" -#include "gromacs/mdtypes/enerdata.h" -#include "gromacs/mdtypes/forcerec.h" -#include "gromacs/mdtypes/inputrec.h" -#include "gromacs/mdtypes/md_enums.h" -#include "gromacs/mdtypes/mdatom.h" -#include "gromacs/mdtypes/state.h" -#include "gromacs/random/threefry.h" -#include "gromacs/random/uniformrealdistribution.h" -#include "gromacs/timing/wallcycle.h" -#include "gromacs/utility/fatalerror.h" -#include "gromacs/utility/gmxmpi.h" -#include "gromacs/utility/smalloc.h" - -#include "expanded_internal.h" - -static void init_df_history_weights(df_history_t* dfhist, const t_expanded* expand, int nlim) -{ - int i; - dfhist->wl_delta = expand->init_wl_delta; - for (i = 0; i < nlim; i++) - { - dfhist->sum_weights[i] = expand->init_lambda_weights[i]; - dfhist->sum_dg[i] = expand->init_lambda_weights[i]; - } -} - -/* Eventually should contain all the functions needed to initialize expanded ensemble - before the md loop starts */ -void init_expanded_ensemble(gmx_bool bStateFromCP, const t_inputrec* ir, df_history_t* dfhist) -{ - if (!bStateFromCP) - { - init_df_history_weights(dfhist, ir->expandedvals, ir->fepvals->n_lambda); - } -} - -static void GenerateGibbsProbabilities(const real* ene, double* p_k, double* pks, int minfep, int maxfep) -{ - - int i; - real maxene; - - *pks = 0.0; - maxene = ene[minfep]; - /* find the maximum value */ - for (i = minfep; i <= maxfep; i++) - { - if (ene[i] > maxene) - { - maxene = ene[i]; - } - } - /* find the denominator */ - for (i = minfep; i <= maxfep; i++) - { - *pks += std::exp(ene[i] - maxene); - } - /*numerators*/ - for (i = minfep; i <= maxfep; i++) - { - p_k[i] = std::exp(ene[i] - maxene) / *pks; - } -} - -static void -GenerateWeightedGibbsProbabilities(const real* ene, double* p_k, double* pks, int nlim, real* nvals, real delta) -{ - - int i; - real maxene; - real* nene; - *pks = 0.0; - - snew(nene, nlim); - for (i = 0; i < nlim; i++) - { - if (nvals[i] == 0) - { - /* add the delta, since we need to make sure it's greater than zero, and - we need a non-arbitrary number? */ - nene[i] = ene[i] + std::log(nvals[i] + delta); - } - else - { - nene[i] = ene[i] + std::log(nvals[i]); - } - } - - /* find the maximum value */ - maxene = nene[0]; - for (i = 0; i < nlim; i++) - { - if (nene[i] > maxene) - { - maxene = nene[i]; - } - } - - /* subtract off the maximum, avoiding overflow */ - for (i = 0; i < nlim; i++) - { - nene[i] -= maxene; - } - - /* find the denominator */ - for (i = 0; i < nlim; i++) - { - *pks += std::exp(nene[i]); - } - - /*numerators*/ - for (i = 0; i < nlim; i++) - { - p_k[i] = std::exp(nene[i]) / *pks; - } - sfree(nene); -} - -static int FindMinimum(const real* min_metric, int N) -{ - - real min_val; - int min_nval, nval; - - min_nval = 0; - min_val = min_metric[0]; - - for (nval = 0; nval < N; nval++) - { - if (min_metric[nval] < min_val) - { - min_val = min_metric[nval]; - min_nval = nval; - } - } - return min_nval; -} - -static gmx_bool CheckHistogramRatios(int nhisto, const real* histo, real ratio) -{ - - int i; - real nmean; - gmx_bool bIfFlat; - - nmean = 0; - for (i = 0; i < nhisto; i++) - { - nmean += histo[i]; - } - - if (nmean == 0) - { - /* no samples! is bad!*/ - bIfFlat = FALSE; - return bIfFlat; - } - nmean /= static_cast(nhisto); - - bIfFlat = TRUE; - for (i = 0; i < nhisto; i++) - { - /* make sure that all points are in the ratio < x < 1/ratio range */ - if (!((histo[i] / nmean < 1.0 / ratio) && (histo[i] / nmean > ratio))) - { - bIfFlat = FALSE; - break; - } - } - return bIfFlat; -} - -static gmx_bool CheckIfDoneEquilibrating(int nlim, const t_expanded* expand, const df_history_t* dfhist, int64_t step) -{ - - int i, totalsamples; - gmx_bool bDoneEquilibrating = TRUE; - gmx_bool bIfFlat; - - /* If we are doing slow growth to get initial values, we haven't finished equilibrating */ - if (expand->lmc_forced_nstart > 0) - { - for (i = 0; i < nlim; i++) - { - if (dfhist->n_at_lam[i] - < expand->lmc_forced_nstart) /* we are still doing the initial sweep, so we're - definitely not done equilibrating*/ - { - bDoneEquilibrating = FALSE; - break; - } - } - } - else - { - /* assume we have equilibrated the weights, then check to see if any of the conditions are not met */ - bDoneEquilibrating = TRUE; - - /* calculate the total number of samples */ - switch (expand->elmceq) - { - case elmceqNO: - /* We have not equilibrated, and won't, ever. */ - bDoneEquilibrating = FALSE; - break; - case elmceqYES: - /* we have equilibrated -- we're done */ - bDoneEquilibrating = TRUE; - break; - case elmceqSTEPS: - /* first, check if we are equilibrating by steps, if we're still under */ - if (step < expand->equil_steps) - { - bDoneEquilibrating = FALSE; - } - break; - case elmceqSAMPLES: - totalsamples = 0; - for (i = 0; i < nlim; i++) - { - totalsamples += dfhist->n_at_lam[i]; - } - if (totalsamples < expand->equil_samples) - { - bDoneEquilibrating = FALSE; - } - break; - case elmceqNUMATLAM: - for (i = 0; i < nlim; i++) - { - if (dfhist->n_at_lam[i] - < expand->equil_n_at_lam) /* we are still doing the initial sweep, so we're - definitely not done equilibrating*/ - { - bDoneEquilibrating = FALSE; - break; - } - } - break; - case elmceqWLDELTA: - if (EWL(expand->elamstats)) /* This check is in readir as well, but - just to be sure */ - { - if (dfhist->wl_delta > expand->equil_wl_delta) - { - bDoneEquilibrating = FALSE; - } - } - break; - case elmceqRATIO: - /* we can use the flatness as a judge of good weights, as long as - we're not doing minvar, or Wang-Landau. - But turn off for now until we figure out exactly how we do this. - */ - - if (!(EWL(expand->elamstats) || expand->elamstats == elamstatsMINVAR)) - { - /* we want to use flatness -avoiding- the forced-through samples. Plus, we need - to convert to floats for this histogram function. */ - - real* modhisto; - snew(modhisto, nlim); - for (i = 0; i < nlim; i++) - { - modhisto[i] = 1.0 * (dfhist->n_at_lam[i] - expand->lmc_forced_nstart); - } - bIfFlat = CheckHistogramRatios(nlim, modhisto, expand->equil_ratio); - sfree(modhisto); - if (!bIfFlat) - { - bDoneEquilibrating = FALSE; - } - } - break; - default: bDoneEquilibrating = TRUE; break; - } - } - return bDoneEquilibrating; -} - -static gmx_bool UpdateWeights(int nlim, - t_expanded* expand, - df_history_t* dfhist, - int fep_state, - const real* scaled_lamee, - const real* weighted_lamee, - int64_t step) -{ - gmx_bool bSufficientSamples; - real acceptanceWeight; - int i; - int min_nvalm, min_nvalp, maxc; - real omega_m1_0, omega_p1_0; - real zero_sum_weights; - real *omegam_array, *weightsm_array, *omegap_array, *weightsp_array, *varm_array, *varp_array, - *dwp_array, *dwm_array; - real clam_varm, clam_varp, clam_osum, clam_weightsm, clam_weightsp, clam_minvar; - real * lam_variance, *lam_dg; - double* p_k; - double pks = 0; - - /* Future potential todos for this function (see #3848): - * - Update the names in the dhist structure to be clearer. Not done for now since this - * a bugfix update and we are mininizing other code changes. - * - Modularize the code some more. - * - potentially merge with accelerated weight histogram functionality, since it's very similar. - */ - /* if we have equilibrated the expanded ensemble weights, we are not updating them, so exit now */ - if (dfhist->bEquil) - { - return FALSE; - } - - if (CheckIfDoneEquilibrating(nlim, expand, dfhist, step)) - { - dfhist->bEquil = TRUE; - /* zero out the visited states so we know how many equilibrated states we have - from here on out.*/ - for (i = 0; i < nlim; i++) - { - dfhist->n_at_lam[i] = 0; - } - return TRUE; - } - - /* If we reached this far, we have not equilibrated yet, keep on - going resetting the weights */ - - if (EWL(expand->elamstats)) - { - if (expand->elamstats == elamstatsWL) /* Using standard Wang-Landau for weight updates */ - { - dfhist->sum_weights[fep_state] -= dfhist->wl_delta; - dfhist->wl_histo[fep_state] += 1.0; - } - else if (expand->elamstats == elamstatsWWL) - /* Using weighted Wang-Landau for weight updates. - * Very closly equivalent to accelerated weight histogram approach - * applied to expanded ensemble. */ - { - snew(p_k, nlim); - - /* first increment count */ - GenerateGibbsProbabilities(weighted_lamee, p_k, &pks, 0, nlim - 1); - for (i = 0; i < nlim; i++) - { - dfhist->wl_histo[i] += static_cast(p_k[i]); - } - - /* then increment weights (uses count) */ - pks = 0.0; - GenerateWeightedGibbsProbabilities(weighted_lamee, p_k, &pks, nlim, dfhist->wl_histo, - dfhist->wl_delta); - - for (i = 0; i < nlim; i++) - { - dfhist->sum_weights[i] -= dfhist->wl_delta * static_cast(p_k[i]); - } - /* Alternate definition, using logarithms. Shouldn't make very much difference! */ - /* - real di; - for (i=0;iwl_delta*(real)p_k[i]; - dfhist->sum_weights[i] -= log(di); - } - */ - sfree(p_k); - } - - zero_sum_weights = dfhist->sum_weights[0]; - for (i = 0; i < nlim; i++) - { - dfhist->sum_weights[i] -= zero_sum_weights; - } - } - - if (expand->elamstats == elamstatsBARKER || expand->elamstats == elamstatsMETROPOLIS - || expand->elamstats == elamstatsMINVAR) - { - maxc = 2 * expand->c_range + 1; - - snew(lam_dg, nlim); - snew(lam_variance, nlim); - - snew(omegap_array, maxc); - snew(weightsp_array, maxc); - snew(varp_array, maxc); - snew(dwp_array, maxc); - - snew(omegam_array, maxc); - snew(weightsm_array, maxc); - snew(varm_array, maxc); - snew(dwm_array, maxc); - - /* unpack the values of the free energy differences and the - * variance in their estimates between nearby lambdas. We will - * only actually update 2 of these, the state we are currently - * at and the one we end up moving to - */ - - for (i = 0; i < nlim - 1; i++) - { /* only through the second to last */ - lam_dg[i] = dfhist->sum_dg[i + 1] - dfhist->sum_dg[i]; - lam_variance[i] = - gmx::square(dfhist->sum_variance[i + 1]) - gmx::square(dfhist->sum_variance[i]); - } - - /* accumulate running averages of thermodynamic averages for Bennett Acceptance Ratio-based - * estimates of the free energy . - * Rather than peforming self-consistent estimation of the free energies at each step, - * we keep track of an array of possible different free energies (cnvals), - * and we self-consistently choose the best one. The one that leads to a free energy estimate - * that is closest to itself is the best estimate of the free energy. It is essentially a - * parallellized version of self-consistent iteration. maxc is the number of these constants. */ - - for (int nval = 0; nval < maxc; nval++) - { - const real cnval = static_cast(nval - expand->c_range); - - /* Compute acceptance criterion weight to the state below this one for use in averages. - * Note we do not have to have just moved from that state to use this free energy - * estimate; these are essentially "virtual" moves. */ - - if (fep_state > 0) - { - const auto lambdaEnergyDifference = - cnval - (scaled_lamee[fep_state] - scaled_lamee[fep_state - 1]); - acceptanceWeight = - gmx::calculateAcceptanceWeight(expand->elamstats, lambdaEnergyDifference); - dfhist->accum_m[fep_state][nval] += acceptanceWeight; - dfhist->accum_m2[fep_state][nval] += acceptanceWeight * acceptanceWeight; - } - - // Compute acceptance criterion weight to transition to the next state - if (fep_state < nlim - 1) - { - const auto lambdaEnergyDifference = - -cnval + (scaled_lamee[fep_state + 1] - scaled_lamee[fep_state]); - acceptanceWeight = - gmx::calculateAcceptanceWeight(expand->elamstats, lambdaEnergyDifference); - dfhist->accum_p[fep_state][nval] += acceptanceWeight; - dfhist->accum_p2[fep_state][nval] += acceptanceWeight * acceptanceWeight; - } - - /* Determination of Metropolis transition and Barker transition weights */ - - int numObservationsCurrentState = dfhist->n_at_lam[fep_state]; - /* determine the number of observations above and below the current state */ - int numObservationsLowerState = 0; - if (fep_state > 0) - { - numObservationsLowerState = dfhist->n_at_lam[fep_state - 1]; - } - int numObservationsHigherState = 0; - if (fep_state < nlim - 1) - { - numObservationsHigherState = dfhist->n_at_lam[fep_state + 1]; - } - - /* Calculate the biases for each expanded ensemble state that minimize the total - * variance, as implemented in Martinez-Veracoechea and Escobedo, - * J. Phys. Chem. B 2008, 112, 8120-8128 - * - * The variance associated with the free energy estimate between two states i and j - * is calculated as - * Var(i,j) = {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} / numObservations(i->j) - * + {avg[xi(j->i)^2] / avg[xi(j->i)]^2 - 1} / numObservations(j->i) - * where xi(i->j) is the acceptance factor / weight associated with moving from state i to j - * As we are calculating the acceptance factor to the neighbors every time we're visiting - * a state, numObservations(i->j) == numObservations(i) and numObservations(j->i) == numObservations(j) - */ - - /* Accumulation of acceptance weight averages between the current state and the - * states +1 (p1) and -1 (m1), averaged at current state (0) - */ - real avgAcceptanceCurrentToLower = 0; - real avgAcceptanceCurrentToHigher = 0; - /* Accumulation of acceptance weight averages quantities between states 0 - * and states +1 and -1, squared - */ - real avgAcceptanceCurrentToLowerSquared = 0; - real avgAcceptanceCurrentToHigherSquared = 0; - /* Accumulation of free energy quantities from lower state (m1) to current state (0) and squared */ - real avgAcceptanceLowerToCurrent = 0; - real avgAcceptanceLowerToCurrentSquared = 0; - /* Accumulation of free energy quantities from upper state (p1) to current state (0) and squared */ - real avgAcceptanceHigherToCurrent = 0; - real avgAcceptanceHigherToCurrentSquared = 0; - - if (numObservationsCurrentState > 0) - { - avgAcceptanceCurrentToLower = dfhist->accum_m[fep_state][nval] / numObservationsCurrentState; - avgAcceptanceCurrentToHigher = - dfhist->accum_p[fep_state][nval] / numObservationsCurrentState; - avgAcceptanceCurrentToLowerSquared = - dfhist->accum_m2[fep_state][nval] / numObservationsCurrentState; - avgAcceptanceCurrentToHigherSquared = - dfhist->accum_p2[fep_state][nval] / numObservationsCurrentState; - } - - if ((fep_state > 0) && (numObservationsLowerState > 0)) - { - avgAcceptanceLowerToCurrent = - dfhist->accum_p[fep_state - 1][nval] / numObservationsLowerState; - avgAcceptanceLowerToCurrentSquared = - dfhist->accum_p2[fep_state - 1][nval] / numObservationsLowerState; - } - - if ((fep_state < nlim - 1) && (numObservationsHigherState > 0)) - { - avgAcceptanceHigherToCurrent = - dfhist->accum_m[fep_state + 1][nval] / numObservationsHigherState; - avgAcceptanceHigherToCurrentSquared = - dfhist->accum_m2[fep_state + 1][nval] / numObservationsHigherState; - } - /* These are accumulation of positive values (see definition of acceptance functions - * above), or of squares of positive values. - * We're taking this for granted in the following calculation, so make sure - * here that nothing weird happened. Although technically all values should be positive, - * because of floating point precisions, they might be numerically zero. */ - GMX_RELEASE_ASSERT( - avgAcceptanceCurrentToLower >= 0 && avgAcceptanceCurrentToLowerSquared >= 0 - && avgAcceptanceCurrentToHigher >= 0 - && avgAcceptanceCurrentToHigherSquared >= 0 && avgAcceptanceLowerToCurrent >= 0 - && avgAcceptanceLowerToCurrentSquared >= 0 && avgAcceptanceHigherToCurrent >= 0 - && avgAcceptanceHigherToCurrentSquared >= 0, - "By definition, the acceptance factors should all be nonnegative."); - - real varianceCurrentToLower = 0; - real varianceCurrentToHigher = 0; - real weightDifferenceToLower = 0; - real weightDifferenceToHigher = 0; - real varianceToLower = 0; - real varianceToHigher = 0; - - if (fep_state > 0) - { - if (numObservationsCurrentState > 0) - { - /* Calculate {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} - * - * Note that if avg[xi(i->j)] == 0, also avg[xi(i->j)^2] == 0 (since the - * acceptances are all positive!), and hence - * {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} -> 0 for avg[xi(i->j)] -> 0 - * We're catching that case explicitly to avoid numerical - * problems dividing by zero when the overlap between states is small (#3304) - */ - if (avgAcceptanceCurrentToLower > 0) - { - varianceCurrentToLower = - avgAcceptanceCurrentToLowerSquared - / (avgAcceptanceCurrentToLower * avgAcceptanceCurrentToLower) - - 1.0; - } - if (numObservationsLowerState > 0) - { - /* Calculate {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} - * - * Note that if avg[xi(i->j)] == 0, also avg[xi(i->j)^2] == 0 (since the - * acceptances are all positive!), and hence - * {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} -> 0 for avg[xi(i->j)] -> 0 - * We're catching that case explicitly to avoid numerical - * problems dividing by zero when the overlap between states is small (#3304) - */ - real varianceLowerToCurrent = 0; - if (avgAcceptanceLowerToCurrent > 0) - { - varianceLowerToCurrent = - avgAcceptanceLowerToCurrentSquared - / (avgAcceptanceLowerToCurrent * avgAcceptanceLowerToCurrent) - - 1.0; - } - /* Free energy difference to the state one state lower */ - /* if these either of these quantities are zero, the energies are */ - /* way too large for the dynamic range. We need an alternate guesstimate */ - if ((avgAcceptanceCurrentToLower == 0) || (avgAcceptanceLowerToCurrent == 0)) - { - weightDifferenceToLower = - (scaled_lamee[fep_state] - scaled_lamee[fep_state - 1]); - } - else - { - weightDifferenceToLower = (std::log(avgAcceptanceCurrentToLower) - - std::log(avgAcceptanceLowerToCurrent)) - + cnval; - } - /* Variance of the free energy difference to the one state lower */ - varianceToLower = - (1.0 / numObservationsCurrentState) * (varianceCurrentToLower) - + (1.0 / numObservationsLowerState) * (varianceLowerToCurrent); - } - } - } - - if (fep_state < nlim - 1) - { - if (numObservationsCurrentState > 0) - { - /* Calculate {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} - * - * Note that if avg[xi(i->j)] == 0, also avg[xi(i->j)^2] == 0 (since the - * acceptances are all positive!), and hence - * {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} -> 0 for avg[xi(i->j)] -> 0 - * We're catching that case explicitly to avoid numerical - * problems dividing by zero when the overlap between states is small (#3304) - */ - - if (avgAcceptanceCurrentToHigher < 0) - { - varianceCurrentToHigher = - avgAcceptanceCurrentToHigherSquared - / (avgAcceptanceCurrentToHigher * avgAcceptanceCurrentToHigher) - - 1.0; - } - if (numObservationsHigherState > 0) - { - /* Calculate {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} - * - * Note that if avg[xi(i->j)] == 0, also avg[xi(i->j)^2] == 0 (since the - * acceptances are all positive!), and hence - * {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} -> 0 for avg[xi(i->j)] -> 0 - * We're catching that case explicitly to avoid numerical - * problems dividing by zero when the overlap between states is small (#3304) - */ - real varianceHigherToCurrent = 0; - if (avgAcceptanceHigherToCurrent > 0) - { - varianceHigherToCurrent = - avgAcceptanceHigherToCurrentSquared - / (avgAcceptanceHigherToCurrent * avgAcceptanceHigherToCurrent) - - 1.0; - } - /* Free energy difference to the state one state higher */ - /* if these either of these quantities are zero, the energies are */ - /* way too large for the dynamic range. We need an alternate guesstimate */ - if ((avgAcceptanceHigherToCurrent == 0) || (avgAcceptanceCurrentToHigher == 0)) - { - weightDifferenceToHigher = - (scaled_lamee[fep_state + 1] - scaled_lamee[fep_state]); - } - else - { - weightDifferenceToHigher = (std::log(avgAcceptanceHigherToCurrent) - - std::log(avgAcceptanceCurrentToHigher)) - + cnval; - } - /* Variance of the free energy difference to the one state higher */ - varianceToHigher = - (1.0 / numObservationsHigherState) * (varianceHigherToCurrent) - + (1.0 / numObservationsCurrentState) * (varianceCurrentToHigher); - } - } - } - - if (numObservationsCurrentState > 0) - { - omegam_array[nval] = varianceCurrentToLower; - } - else - { - omegam_array[nval] = 0; - } - weightsm_array[nval] = weightDifferenceToLower; - varm_array[nval] = varianceToLower; - if (numObservationsLowerState > 0) - { - dwm_array[nval] = - fabs((cnval + std::log((1.0 * numObservationsCurrentState) / numObservationsLowerState)) - - lam_dg[fep_state - 1]); - } - else - { - dwm_array[nval] = std::fabs(cnval - lam_dg[fep_state - 1]); - } - - if (numObservationsCurrentState > 0) - { - omegap_array[nval] = varianceCurrentToHigher; - } - else - { - omegap_array[nval] = 0; - } - weightsp_array[nval] = weightDifferenceToHigher; - varp_array[nval] = varianceToHigher; - if ((numObservationsHigherState > 0) && (numObservationsCurrentState > 0)) - { - dwp_array[nval] = - fabs((cnval + std::log((1.0 * numObservationsHigherState) / numObservationsCurrentState)) - - lam_dg[fep_state]); - } - else - { - dwp_array[nval] = std::fabs(cnval - lam_dg[fep_state]); - } - } - - /* find the free energy estimate closest to the guessed weight's value */ - - min_nvalm = FindMinimum(dwm_array, maxc); - omega_m1_0 = omegam_array[min_nvalm]; - clam_weightsm = weightsm_array[min_nvalm]; - clam_varm = varm_array[min_nvalm]; - - min_nvalp = FindMinimum(dwp_array, maxc); - omega_p1_0 = omegap_array[min_nvalp]; - clam_weightsp = weightsp_array[min_nvalp]; - clam_varp = varp_array[min_nvalp]; - - clam_osum = omega_m1_0 + omega_p1_0; - clam_minvar = 0; - if (clam_osum > 0) - { - clam_minvar = 0.5 * std::log(clam_osum); - } - - if (fep_state > 0) - { - lam_dg[fep_state - 1] = clam_weightsm; - lam_variance[fep_state - 1] = clam_varm; - } - - if (fep_state < nlim - 1) - { - lam_dg[fep_state] = clam_weightsp; - lam_variance[fep_state] = clam_varp; - } - - if (expand->elamstats == elamstatsMINVAR) - { - bSufficientSamples = TRUE; - /* make sure the number of samples in each state are all - * past a user-specified threshold - */ - for (i = 0; i < nlim; i++) - { - if (dfhist->n_at_lam[i] < expand->minvarmin) - { - bSufficientSamples = FALSE; - } - } - if (bSufficientSamples) - { - dfhist->sum_minvar[fep_state] = clam_minvar; - if (fep_state == 0) - { - for (i = 0; i < nlim; i++) - { - dfhist->sum_minvar[i] += (expand->minvar_const - clam_minvar); - } - expand->minvar_const = clam_minvar; - dfhist->sum_minvar[fep_state] = 0.0; - } - else - { - dfhist->sum_minvar[fep_state] -= expand->minvar_const; - } - } - } - - /* we need to rezero minvar now, since it could change at fep_state = 0 */ - dfhist->sum_dg[0] = 0.0; - dfhist->sum_variance[0] = 0.0; - dfhist->sum_weights[0] = dfhist->sum_dg[0] + dfhist->sum_minvar[0]; /* should be zero */ - - for (i = 1; i < nlim; i++) - { - dfhist->sum_dg[i] = lam_dg[i - 1] + dfhist->sum_dg[i - 1]; - dfhist->sum_variance[i] = - std::sqrt(lam_variance[i - 1] + gmx::square(dfhist->sum_variance[i - 1])); - dfhist->sum_weights[i] = dfhist->sum_dg[i] + dfhist->sum_minvar[i]; - } - - sfree(lam_dg); - sfree(lam_variance); - - sfree(omegam_array); - sfree(weightsm_array); - sfree(varm_array); - sfree(dwm_array); - - sfree(omegap_array); - sfree(weightsp_array); - sfree(varp_array); - sfree(dwp_array); - } - return FALSE; -} - -static int ChooseNewLambda(int nlim, - const t_expanded* expand, - df_history_t* dfhist, - int fep_state, - const real* weighted_lamee, - double* p_k, - int64_t seed, - int64_t step) -{ - /* Choose new lambda value, and update transition matrix */ - - int i, ifep, minfep, maxfep, lamnew, lamtrial, starting_fep_state; - real r1, r2, de, trialprob, tprob = 0; - double * propose, *accept, *remainder; - double pks; - real pnorm; - gmx::ThreeFry2x64<0> rng( - seed, gmx::RandomDomain::ExpandedEnsemble); // We only draw once, so zero bits internal counter is fine - gmx::UniformRealDistribution dist; - - starting_fep_state = fep_state; - lamnew = fep_state; /* so that there is a default setting -- stays the same */ - - if (!EWL(expand->elamstats)) /* ignore equilibrating the weights if using WL */ - { - if ((expand->lmc_forced_nstart > 0) && (dfhist->n_at_lam[nlim - 1] <= expand->lmc_forced_nstart)) - { - /* Use a marching method to run through the lambdas and get preliminary free energy data, - before starting 'free' sampling. We start free sampling when we have enough at each lambda */ - - /* if we have enough at this lambda, move on to the next one */ - - if (dfhist->n_at_lam[fep_state] == expand->lmc_forced_nstart) - { - lamnew = fep_state + 1; - if (lamnew == nlim) /* whoops, stepped too far! */ - { - lamnew -= 1; - } - } - else - { - lamnew = fep_state; - } - return lamnew; - } - } - - snew(propose, nlim); - snew(accept, nlim); - snew(remainder, nlim); - - for (i = 0; i < expand->lmc_repeats; i++) - { - rng.restart(step, i); - dist.reset(); - - for (ifep = 0; ifep < nlim; ifep++) - { - propose[ifep] = 0; - accept[ifep] = 0; - } - - if ((expand->elmcmove == elmcmoveGIBBS) || (expand->elmcmove == elmcmoveMETGIBBS)) - { - /* use the Gibbs sampler, with restricted range */ - if (expand->gibbsdeltalam < 0) - { - minfep = 0; - maxfep = nlim - 1; - } - else - { - minfep = fep_state - expand->gibbsdeltalam; - maxfep = fep_state + expand->gibbsdeltalam; - if (minfep < 0) - { - minfep = 0; - } - if (maxfep > nlim - 1) - { - maxfep = nlim - 1; - } - } - - GenerateGibbsProbabilities(weighted_lamee, p_k, &pks, minfep, maxfep); - - if (expand->elmcmove == elmcmoveGIBBS) - { - for (ifep = minfep; ifep <= maxfep; ifep++) - { - propose[ifep] = p_k[ifep]; - accept[ifep] = 1.0; - } - /* Gibbs sampling */ - r1 = dist(rng); - for (lamnew = minfep; lamnew <= maxfep; lamnew++) - { - if (r1 <= p_k[lamnew]) - { - break; - } - r1 -= p_k[lamnew]; - } - } - else if (expand->elmcmove == elmcmoveMETGIBBS) - { - - /* Metropolized Gibbs sampling */ - for (ifep = minfep; ifep <= maxfep; ifep++) - { - remainder[ifep] = 1 - p_k[ifep]; - } - - /* find the proposal probabilities */ - - if (remainder[fep_state] == 0) - { - /* only the current state has any probability */ - /* we have to stay at the current state */ - lamnew = fep_state; - } - else - { - for (ifep = minfep; ifep <= maxfep; ifep++) - { - if (ifep != fep_state) - { - propose[ifep] = p_k[ifep] / remainder[fep_state]; - } - else - { - propose[ifep] = 0; - } - } - - r1 = dist(rng); - for (lamtrial = minfep; lamtrial <= maxfep; lamtrial++) - { - pnorm = p_k[lamtrial] / remainder[fep_state]; - if (lamtrial != fep_state) - { - if (r1 <= pnorm) - { - break; - } - r1 -= pnorm; - } - } - - /* we have now selected lamtrial according to p(lamtrial)/1-p(fep_state) */ - tprob = 1.0; - /* trial probability is min{1,\frac{1 - p(old)}{1-p(new)} MRS 1/8/2008 */ - trialprob = (remainder[fep_state]) / (remainder[lamtrial]); - if (trialprob < tprob) - { - tprob = trialprob; - } - r2 = dist(rng); - if (r2 < tprob) - { - lamnew = lamtrial; - } - else - { - lamnew = fep_state; - } - } - - /* now figure out the acceptance probability for each */ - for (ifep = minfep; ifep <= maxfep; ifep++) - { - tprob = 1.0; - if (remainder[ifep] != 0) - { - trialprob = (remainder[fep_state]) / (remainder[ifep]); - } - else - { - trialprob = 1.0; /* this state is the only choice! */ - } - if (trialprob < tprob) - { - tprob = trialprob; - } - /* probability for fep_state=0, but that's fine, it's never proposed! */ - accept[ifep] = tprob; - } - } - - if (lamnew > maxfep) - { - /* it's possible some rounding is failing */ - if (gmx_within_tol(remainder[fep_state], 0, 50 * GMX_DOUBLE_EPS)) - { - /* numerical rounding error -- no state other than the original has weight */ - lamnew = fep_state; - } - else - { - /* probably not a numerical issue */ - int loc = 0; - int nerror = 200 + (maxfep - minfep + 1) * 60; - char* errorstr; - snew(errorstr, nerror); - /* if its greater than maxfep, then something went wrong -- probably underflow - in the calculation of sum weights. Generated detailed info for failure */ - loc += sprintf( - errorstr, - "Something wrong in choosing new lambda state with a Gibbs move -- " - "probably underflow in weight determination.\nDenominator is: " - "%3d%17.10e\n i dE numerator weights\n", - 0, pks); - for (ifep = minfep; ifep <= maxfep; ifep++) - { - loc += sprintf(&errorstr[loc], "%3d %17.10e%17.10e%17.10e\n", ifep, - weighted_lamee[ifep], p_k[ifep], dfhist->sum_weights[ifep]); - } - gmx_fatal(FARGS, "%s", errorstr); - } - } - } - else if ((expand->elmcmove == elmcmoveMETROPOLIS) || (expand->elmcmove == elmcmoveBARKER)) - { - /* use the metropolis sampler with trial +/- 1 */ - r1 = dist(rng); - if (r1 < 0.5) - { - if (fep_state == 0) - { - lamtrial = fep_state; - } - else - { - lamtrial = fep_state - 1; - } - } - else - { - if (fep_state == nlim - 1) - { - lamtrial = fep_state; - } - else - { - lamtrial = fep_state + 1; - } - } - - de = weighted_lamee[lamtrial] - weighted_lamee[fep_state]; - if (expand->elmcmove == elmcmoveMETROPOLIS) - { - tprob = 1.0; - if (de < 0) - { - tprob = std::exp(de); - } - propose[fep_state] = 0; - propose[lamtrial] = 1.0; /* note that this overwrites the above line if fep_state = ntrial, which only occurs at the ends */ - accept[fep_state] = - 1.0; /* doesn't actually matter, never proposed unless fep_state = ntrial, in which case it's 1.0 anyway */ - accept[lamtrial] = tprob; - } - else if (expand->elmcmove == elmcmoveBARKER) - { - if (de > 0) /* Numerically stable version */ - { - tprob = 1.0 / (1.0 + std::exp(-de)); - } - else if (de < 0) - { - tprob = std::exp(de) / (std::exp(de) + 1.0); - } - propose[fep_state] = (1 - tprob); - propose[lamtrial] += - tprob; /* we add, to account for the fact that at the end, they might be the same point */ - accept[fep_state] = 1.0; - accept[lamtrial] = 1.0; - } - - r2 = dist(rng); - if (r2 < tprob) - { - lamnew = lamtrial; - } - else - { - lamnew = fep_state; - } - } - - for (ifep = 0; ifep < nlim; ifep++) - { - dfhist->Tij[fep_state][ifep] += propose[ifep] * accept[ifep]; - dfhist->Tij[fep_state][fep_state] += propose[ifep] * (1.0 - accept[ifep]); - } - fep_state = lamnew; - } - - dfhist->Tij_empirical[starting_fep_state][lamnew] += 1.0; - - sfree(propose); - sfree(accept); - sfree(remainder); - - return lamnew; -} - -/* print out the weights to the log, along with current state */ -void PrintFreeEnergyInfoToFile(FILE* outfile, - const t_lambda* fep, - const t_expanded* expand, - const t_simtemp* simtemp, - const df_history_t* dfhist, - int fep_state, - int frequency, - int64_t step) -{ - int nlim, i, ifep, jfep; - real dw, dg, dv, Tprint; - const char* print_names[efptNR] = { " FEPL", "MassL", "CoulL", " VdwL", - "BondL", "RestT", "Temp.(K)" }; - gmx_bool bSimTemp = FALSE; - - nlim = fep->n_lambda; - if (simtemp != nullptr) - { - bSimTemp = TRUE; - } - - if (step % frequency == 0) - { - fprintf(outfile, " MC-lambda information\n"); - if (EWL(expand->elamstats) && (!(dfhist->bEquil))) - { - fprintf(outfile, " Wang-Landau incrementor is: %11.5g\n", dfhist->wl_delta); - } - fprintf(outfile, " N"); - for (i = 0; i < efptNR; i++) - { - if (fep->separate_dvdl[i]) - { - fprintf(outfile, "%7s", print_names[i]); - } - else if ((i == efptTEMPERATURE) && bSimTemp) - { - fprintf(outfile, "%10s", print_names[i]); /* more space for temperature formats */ - } - } - fprintf(outfile, " Count "); - if (expand->elamstats == elamstatsMINVAR) - { - fprintf(outfile, "W(in kT) G(in kT) dG(in kT) dV(in kT)\n"); - } - else - { - fprintf(outfile, "G(in kT) dG(in kT)\n"); - } - for (ifep = 0; ifep < nlim; ifep++) - { - if (ifep == nlim - 1) - { - dw = 0.0; - dg = 0.0; - dv = 0.0; - } - else - { - dw = dfhist->sum_weights[ifep + 1] - dfhist->sum_weights[ifep]; - dg = dfhist->sum_dg[ifep + 1] - dfhist->sum_dg[ifep]; - dv = std::sqrt(gmx::square(dfhist->sum_variance[ifep + 1]) - - gmx::square(dfhist->sum_variance[ifep])); - } - fprintf(outfile, "%3d", (ifep + 1)); - for (i = 0; i < efptNR; i++) - { - if (fep->separate_dvdl[i]) - { - fprintf(outfile, "%7.3f", fep->all_lambda[i][ifep]); - } - else if (i == efptTEMPERATURE && bSimTemp) - { - fprintf(outfile, "%9.3f", simtemp->temperatures[ifep]); - } - } - if (EWL(expand->elamstats) - && (!(dfhist->bEquil))) /* if performing WL and still haven't equilibrated */ - { - if (expand->elamstats == elamstatsWL) - { - fprintf(outfile, " %8d", static_cast(dfhist->wl_histo[ifep])); - } - else - { - fprintf(outfile, " %8.3f", dfhist->wl_histo[ifep]); - } - } - else /* we have equilibrated weights */ - { - fprintf(outfile, " %8d", dfhist->n_at_lam[ifep]); - } - if (expand->elamstats == elamstatsMINVAR) - { - fprintf(outfile, " %10.5f %10.5f %10.5f %10.5f", dfhist->sum_weights[ifep], - dfhist->sum_dg[ifep], dg, dv); - } - else - { - fprintf(outfile, " %10.5f %10.5f", dfhist->sum_weights[ifep], dw); - } - if (ifep == fep_state) - { - fprintf(outfile, " <<\n"); - } - else - { - fprintf(outfile, " \n"); - } - } - fprintf(outfile, "\n"); - - if ((step % expand->nstTij == 0) && (expand->nstTij > 0) && (step > 0)) - { - fprintf(outfile, " Transition Matrix\n"); - for (ifep = 0; ifep < nlim; ifep++) - { - fprintf(outfile, "%12d", (ifep + 1)); - } - fprintf(outfile, "\n"); - for (ifep = 0; ifep < nlim; ifep++) - { - for (jfep = 0; jfep < nlim; jfep++) - { - if (dfhist->n_at_lam[ifep] > 0) - { - if (expand->bSymmetrizedTMatrix) - { - Tprint = (dfhist->Tij[ifep][jfep] + dfhist->Tij[jfep][ifep]) - / (dfhist->n_at_lam[ifep] + dfhist->n_at_lam[jfep]); - } - else - { - Tprint = (dfhist->Tij[ifep][jfep]) / (dfhist->n_at_lam[ifep]); - } - } - else - { - Tprint = 0.0; - } - fprintf(outfile, "%12.8f", Tprint); - } - fprintf(outfile, "%3d\n", (ifep + 1)); - } - - fprintf(outfile, " Empirical Transition Matrix\n"); - for (ifep = 0; ifep < nlim; ifep++) - { - fprintf(outfile, "%12d", (ifep + 1)); - } - fprintf(outfile, "\n"); - for (ifep = 0; ifep < nlim; ifep++) - { - for (jfep = 0; jfep < nlim; jfep++) - { - if (dfhist->n_at_lam[ifep] > 0) - { - if (expand->bSymmetrizedTMatrix) - { - Tprint = (dfhist->Tij_empirical[ifep][jfep] + dfhist->Tij_empirical[jfep][ifep]) - / (dfhist->n_at_lam[ifep] + dfhist->n_at_lam[jfep]); - } - else - { - Tprint = dfhist->Tij_empirical[ifep][jfep] / (dfhist->n_at_lam[ifep]); - } - } - else - { - Tprint = 0.0; - } - fprintf(outfile, "%12.8f", Tprint); - } - fprintf(outfile, "%3d\n", (ifep + 1)); - } - } - } -} - -int ExpandedEnsembleDynamics(FILE* log, - const t_inputrec* ir, - const gmx_enerdata_t* enerd, - t_state* state, - t_extmass* MassQ, - int fep_state, - df_history_t* dfhist, - int64_t step, - rvec* v, - const t_mdatoms* mdatoms) -/* Note that the state variable is only needed for simulated tempering, not - Hamiltonian expanded ensemble. May be able to remove it after integrator refactoring. */ -{ - real * pfep_lamee, *scaled_lamee, *weighted_lamee; - double* p_k; - int i, nlim, lamnew, totalsamples; - real oneovert, maxscaled = 0, maxweighted = 0; - t_expanded* expand; - t_simtemp* simtemp; - gmx_bool bIfReset, bSwitchtoOneOverT, bDoneEquilibrating = FALSE; - - expand = ir->expandedvals; - simtemp = ir->simtempvals; - nlim = ir->fepvals->n_lambda; - - snew(scaled_lamee, nlim); - snew(weighted_lamee, nlim); - snew(pfep_lamee, nlim); - snew(p_k, nlim); - - /* update the count at the current lambda*/ - dfhist->n_at_lam[fep_state]++; - - /* need to calculate the PV term somewhere, but not needed here? Not until there's a lambda - state that's pressure controlled.*/ - /* - pVTerm = 0; - where does this PV term go? - for (i=0;iefep != efepNO) - { - for (i = 0; i < nlim; i++) - { - if (ir->bSimTemp) - { - /* Note -- this assumes no mass changes, since kinetic energy is not added . . . */ - scaled_lamee[i] = enerd->foreignLambdaTerms.deltaH(i) / (simtemp->temperatures[i] * BOLTZ) - + enerd->term[F_EPOT] - * (1.0 / (simtemp->temperatures[i]) - - 1.0 / (simtemp->temperatures[fep_state])) - / BOLTZ; - } - else - { - scaled_lamee[i] = enerd->foreignLambdaTerms.deltaH(i) / (expand->mc_temp * BOLTZ); - /* mc_temp is currently set to the system reft unless otherwise defined */ - } - - /* save these energies for printing, so they don't get overwritten by the next step */ - /* they aren't overwritten in the non-free energy case, but we always print with these - for simplicity */ - } - } - else - { - if (ir->bSimTemp) - { - for (i = 0; i < nlim; i++) - { - scaled_lamee[i] = - enerd->term[F_EPOT] - * (1.0 / simtemp->temperatures[i] - 1.0 / simtemp->temperatures[fep_state]) / BOLTZ; - } - } - } - - for (i = 0; i < nlim; i++) - { - pfep_lamee[i] = scaled_lamee[i]; - - weighted_lamee[i] = dfhist->sum_weights[i] - scaled_lamee[i]; - if (i == 0) - { - maxscaled = scaled_lamee[i]; - maxweighted = weighted_lamee[i]; - } - else - { - if (scaled_lamee[i] > maxscaled) - { - maxscaled = scaled_lamee[i]; - } - if (weighted_lamee[i] > maxweighted) - { - maxweighted = weighted_lamee[i]; - } - } - } - - for (i = 0; i < nlim; i++) - { - scaled_lamee[i] -= maxscaled; - weighted_lamee[i] -= maxweighted; - } - - /* update weights - we decide whether or not to actually do this inside */ - - bDoneEquilibrating = - UpdateWeights(nlim, expand, dfhist, fep_state, scaled_lamee, weighted_lamee, step); - if (bDoneEquilibrating) - { - if (log) - { - fprintf(log, "\nStep %" PRId64 ": Weights have equilibrated, using criteria: %s\n", - step, elmceq_names[expand->elmceq]); - } - } - - lamnew = ChooseNewLambda(nlim, expand, dfhist, fep_state, weighted_lamee, p_k, - ir->expandedvals->lmc_seed, step); - /* if using simulated tempering, we need to adjust the temperatures */ - if (ir->bSimTemp && (lamnew != fep_state)) /* only need to change the temperatures if we change the state */ - { - int i, j, n, d; - real* buf_ngtc; - real told; - int nstart, nend, gt; - - snew(buf_ngtc, ir->opts.ngtc); - - for (i = 0; i < ir->opts.ngtc; i++) - { - if (ir->opts.ref_t[i] > 0) - { - told = ir->opts.ref_t[i]; - ir->opts.ref_t[i] = simtemp->temperatures[lamnew]; - buf_ngtc[i] = std::sqrt(ir->opts.ref_t[i] / told); /* using the buffer as temperature scaling */ - } - } - - /* we don't need to manipulate the ekind information, as it isn't due to be reset until the next step anyway */ - - nstart = 0; - nend = mdatoms->homenr; - for (n = nstart; n < nend; n++) - { - gt = 0; - if (mdatoms->cTC) - { - gt = mdatoms->cTC[n]; - } - for (d = 0; d < DIM; d++) - { - v[n][d] *= buf_ngtc[gt]; - } - } - - if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)) - { - /* we need to recalculate the masses if the temperature has changed */ - init_npt_masses(ir, state, MassQ, FALSE); - for (i = 0; i < state->nnhpres; i++) - { - for (j = 0; j < ir->opts.nhchainlength; j++) - { - state->nhpres_vxi[i + j] *= buf_ngtc[i]; - } - } - for (i = 0; i < ir->opts.ngtc; i++) - { - for (j = 0; j < ir->opts.nhchainlength; j++) - { - state->nosehoover_vxi[i + j] *= buf_ngtc[i]; - } - } - } - sfree(buf_ngtc); - } - - /* now check on the Wang-Landau updating critera */ - - if (EWL(expand->elamstats)) - { - bSwitchtoOneOverT = FALSE; - if (expand->bWLoneovert) - { - totalsamples = 0; - for (i = 0; i < nlim; i++) - { - totalsamples += dfhist->n_at_lam[i]; - } - oneovert = (1.0 * nlim) / totalsamples; - /* oneovert has decreasd by a bit since last time, so we actually make sure its within one of this number */ - /* switch to 1/t incrementing when wl_delta has decreased at least once, and wl_delta is now less than 1/t */ - if ((dfhist->wl_delta <= ((totalsamples) / (totalsamples - 1.00001)) * oneovert) - && (dfhist->wl_delta < expand->init_wl_delta)) - { - bSwitchtoOneOverT = TRUE; - } - } - if (bSwitchtoOneOverT) - { - dfhist->wl_delta = - oneovert; /* now we reduce by this each time, instead of only at flatness */ - } - else - { - bIfReset = CheckHistogramRatios(nlim, dfhist->wl_histo, expand->wl_ratio); - if (bIfReset) - { - for (i = 0; i < nlim; i++) - { - dfhist->wl_histo[i] = 0; - } - dfhist->wl_delta *= expand->wl_scale; - if (log) - { - fprintf(log, "\nStep %d: weights are now:", static_cast(step)); - for (i = 0; i < nlim; i++) - { - fprintf(log, " %.5f", dfhist->sum_weights[i]); - } - fprintf(log, "\n"); - } - } - } - } - sfree(pfep_lamee); - sfree(scaled_lamee); - sfree(weighted_lamee); - sfree(p_k); - - return lamnew; -} diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdlib/expanded.h b/patches/gromacs-2021.7.diff/src/gromacs/mdlib/expanded.h deleted file mode 100644 index 7766a864fd..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdlib/expanded.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -#ifndef GMX_MDLIB_EXPANDED_H -#define GMX_MDLIB_EXPANDED_H - -#include - -#include "gromacs/math/vectypes.h" -#include "gromacs/utility/basedefinitions.h" - -struct df_history_t; -struct gmx_enerdata_t; -struct t_expanded; -struct t_extmass; -struct t_inputrec; -struct t_lambda; -struct t_mdatoms; -struct t_simtemp; -class t_state; - -namespace gmx -{ -class MDLogger; -} // namespace gmx - -void init_npt_masses(const t_inputrec* ir, t_state* state, t_extmass* MassQ, gmx_bool bInit); - -void init_expanded_ensemble(gmx_bool bStateFromCP, const t_inputrec* ir, df_history_t* dfhist, const gmx::MDLogger& mdlog); - -int ExpandedEnsembleDynamics(FILE* log, - const t_inputrec* ir, - const gmx_enerdata_t* enerd, - t_state* state, - t_extmass* MassQ, - int fep_state, - df_history_t* dfhist, - int64_t step, - rvec* v, - const t_mdatoms* mdatoms, - real* realFepState); - -void PrintFreeEnergyInfoToFile(FILE* outfile, - const t_lambda* fep, - const t_expanded* expand, - const t_simtemp* simtemp, - const df_history_t* dfhist, - int fep_state, - int frequency, - int64_t step); - -#endif diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdlib/expanded.h.preplumed b/patches/gromacs-2021.7.diff/src/gromacs/mdlib/expanded.h.preplumed deleted file mode 100644 index 6f6bec9804..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdlib/expanded.h.preplumed +++ /dev/null @@ -1,77 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -#ifndef GMX_MDLIB_EXPANDED_H -#define GMX_MDLIB_EXPANDED_H - -#include - -#include "gromacs/math/vectypes.h" -#include "gromacs/utility/basedefinitions.h" - -struct df_history_t; -struct gmx_enerdata_t; -struct t_expanded; -struct t_extmass; -struct t_inputrec; -struct t_lambda; -struct t_mdatoms; -struct t_simtemp; -class t_state; - -void init_npt_masses(const t_inputrec* ir, t_state* state, t_extmass* MassQ, gmx_bool bInit); - -void init_expanded_ensemble(gmx_bool bStateFromCP, const t_inputrec* ir, df_history_t* dfhist); - -int ExpandedEnsembleDynamics(FILE* log, - const t_inputrec* ir, - const gmx_enerdata_t* enerd, - t_state* state, - t_extmass* MassQ, - int fep_state, - df_history_t* dfhist, - int64_t step, - rvec* v, - const t_mdatoms* mdatoms); - -void PrintFreeEnergyInfoToFile(FILE* outfile, - const t_lambda* fep, - const t_expanded* expand, - const t_simtemp* simtemp, - const df_history_t* dfhist, - int fep_state, - int frequency, - int64_t step); - -#endif diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdlib/sim_util.cpp b/patches/gromacs-2021.7.diff/src/gromacs/mdlib/sim_util.cpp deleted file mode 100644 index 69625102ea..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdlib/sim_util.cpp +++ /dev/null @@ -1,2178 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013-2019,2020,2021, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -#include "gmxpre.h" - -#include "config.h" - -#include -#include -#include -#include - -#include -#include - -#include "gromacs/applied_forces/awh/awh.h" -#include "gromacs/domdec/dlbtiming.h" -#include "gromacs/domdec/domdec.h" -#include "gromacs/domdec/domdec_struct.h" -#include "gromacs/domdec/gpuhaloexchange.h" -#include "gromacs/domdec/partition.h" -#include "gromacs/essentialdynamics/edsam.h" -#include "gromacs/ewald/pme.h" -#include "gromacs/ewald/pme_pp.h" -#include "gromacs/ewald/pme_pp_comm_gpu.h" -#include "gromacs/gmxlib/network.h" -#include "gromacs/gmxlib/nonbonded/nb_free_energy.h" -#include "gromacs/gmxlib/nonbonded/nb_kernel.h" -#include "gromacs/gmxlib/nonbonded/nonbonded.h" -#include "gromacs/gpu_utils/gpu_utils.h" -#include "gromacs/imd/imd.h" -#include "gromacs/listed_forces/disre.h" -#include "gromacs/listed_forces/gpubonded.h" -#include "gromacs/listed_forces/listed_forces.h" -#include "gromacs/listed_forces/orires.h" -#include "gromacs/math/arrayrefwithpadding.h" -#include "gromacs/math/functions.h" -#include "gromacs/math/units.h" -#include "gromacs/math/vec.h" -#include "gromacs/math/vecdump.h" -#include "gromacs/mdlib/calcmu.h" -#include "gromacs/mdlib/calcvir.h" -#include "gromacs/mdlib/constr.h" -#include "gromacs/mdlib/dispersioncorrection.h" -#include "gromacs/mdlib/enerdata_utils.h" -#include "gromacs/mdlib/force.h" -#include "gromacs/mdlib/force_flags.h" -#include "gromacs/mdlib/forcerec.h" -#include "gromacs/mdlib/gmx_omp_nthreads.h" -#include "gromacs/mdlib/update.h" -#include "gromacs/mdlib/vsite.h" -#include "gromacs/mdlib/wall.h" -#include "gromacs/mdlib/wholemoleculetransform.h" -#include "gromacs/mdtypes/commrec.h" -#include "gromacs/mdtypes/enerdata.h" -#include "gromacs/mdtypes/forcebuffers.h" -#include "gromacs/mdtypes/forceoutput.h" -#include "gromacs/mdtypes/forcerec.h" -#include "gromacs/mdtypes/iforceprovider.h" -#include "gromacs/mdtypes/inputrec.h" -#include "gromacs/mdtypes/md_enums.h" -#include "gromacs/mdtypes/mdatom.h" -#include "gromacs/mdtypes/multipletimestepping.h" -#include "gromacs/mdtypes/simulation_workload.h" -#include "gromacs/mdtypes/state.h" -#include "gromacs/mdtypes/state_propagator_data_gpu.h" -#include "gromacs/nbnxm/gpu_data_mgmt.h" -#include "gromacs/nbnxm/nbnxm.h" -#include "gromacs/nbnxm/nbnxm_gpu.h" -#include "gromacs/pbcutil/ishift.h" -#include "gromacs/pbcutil/pbc.h" -#include "gromacs/pulling/pull.h" -#include "gromacs/pulling/pull_rotation.h" -#include "gromacs/timing/cyclecounter.h" -#include "gromacs/timing/gpu_timing.h" -#include "gromacs/timing/wallcycle.h" -#include "gromacs/timing/wallcyclereporting.h" -#include "gromacs/timing/walltime_accounting.h" -#include "gromacs/topology/topology.h" -#include "gromacs/utility/arrayref.h" -#include "gromacs/utility/basedefinitions.h" -#include "gromacs/utility/cstringutil.h" -#include "gromacs/utility/exceptions.h" -#include "gromacs/utility/fatalerror.h" -#include "gromacs/utility/fixedcapacityvector.h" -#include "gromacs/utility/gmxassert.h" -#include "gromacs/utility/gmxmpi.h" -#include "gromacs/utility/logger.h" -#include "gromacs/utility/smalloc.h" -#include "gromacs/utility/strconvert.h" -#include "gromacs/utility/sysinfo.h" - -#include "gpuforcereduction.h" - -using gmx::ArrayRef; -using gmx::AtomLocality; -using gmx::DomainLifetimeWorkload; -using gmx::ForceOutputs; -using gmx::ForceWithShiftForces; -using gmx::InteractionLocality; -using gmx::RVec; -using gmx::SimulationWorkload; -using gmx::StepWorkload; - -/* PLUMED */ -#include "../../../Plumed.h" -extern int plumedswitch; -extern plumed plumedmain; -/* END PLUMED */ - -// TODO: this environment variable allows us to verify before release -// that on less common architectures the total cost of polling is not larger than -// a blocking wait (so polling does not introduce overhead when the static -// PME-first ordering would suffice). -static const bool c_disableAlternatingWait = (getenv("GMX_DISABLE_ALTERNATING_GPU_WAIT") != nullptr); - -static void sum_forces(ArrayRef f, ArrayRef forceToAdd) -{ - GMX_ASSERT(f.size() >= forceToAdd.size(), "Accumulation buffer should be sufficiently large"); - const int end = forceToAdd.size(); - - int gmx_unused nt = gmx_omp_nthreads_get(emntDefault); -#pragma omp parallel for num_threads(nt) schedule(static) - for (int i = 0; i < end; i++) - { - rvec_inc(f[i], forceToAdd[i]); - } -} - -static void calc_virial(int start, - int homenr, - const rvec x[], - const gmx::ForceWithShiftForces& forceWithShiftForces, - tensor vir_part, - const matrix box, - t_nrnb* nrnb, - const t_forcerec* fr, - PbcType pbcType) -{ - /* The short-range virial from surrounding boxes */ - const rvec* fshift = as_rvec_array(forceWithShiftForces.shiftForces().data()); - calc_vir(SHIFTS, fr->shift_vec, fshift, vir_part, pbcType == PbcType::Screw, box); - inc_nrnb(nrnb, eNR_VIRIAL, SHIFTS); - - /* Calculate partial virial, for local atoms only, based on short range. - * Total virial is computed in global_stat, called from do_md - */ - const rvec* f = as_rvec_array(forceWithShiftForces.force().data()); - f_calc_vir(start, start + homenr, x, f, vir_part, box); - inc_nrnb(nrnb, eNR_VIRIAL, homenr); - - if (debug) - { - pr_rvecs(debug, 0, "vir_part", vir_part, DIM); - } -} - -static void pull_potential_wrapper(const t_commrec* cr, - const t_inputrec* ir, - const matrix box, - gmx::ArrayRef x, - gmx::ForceWithVirial* force, - const t_mdatoms* mdatoms, - gmx_enerdata_t* enerd, - pull_t* pull_work, - const real* lambda, - double t, - gmx_wallcycle_t wcycle) -{ - t_pbc pbc; - real dvdl; - - /* Calculate the center of mass forces, this requires communication, - * which is why pull_potential is called close to other communication. - */ - wallcycle_start(wcycle, ewcPULLPOT); - set_pbc(&pbc, ir->pbcType, box); - dvdl = 0; - enerd->term[F_COM_PULL] += - pull_potential(pull_work, mdatoms->massT, &pbc, cr, t, lambda[efptRESTRAINT], - as_rvec_array(x.data()), force, &dvdl); - enerd->dvdl_lin[efptRESTRAINT] += dvdl; - wallcycle_stop(wcycle, ewcPULLPOT); -} - -static void pme_receive_force_ener(t_forcerec* fr, - const t_commrec* cr, - gmx::ForceWithVirial* forceWithVirial, - gmx_enerdata_t* enerd, - bool useGpuPmePpComms, - bool receivePmeForceToGpu, - gmx_wallcycle_t wcycle) -{ - real e_q, e_lj, dvdl_q, dvdl_lj; - float cycles_ppdpme, cycles_seppme; - - cycles_ppdpme = wallcycle_stop(wcycle, ewcPPDURINGPME); - dd_cycles_add(cr->dd, cycles_ppdpme, ddCyclPPduringPME); - - /* In case of node-splitting, the PP nodes receive the long-range - * forces, virial and energy from the PME nodes here. - */ - wallcycle_start(wcycle, ewcPP_PMEWAITRECVF); - dvdl_q = 0; - dvdl_lj = 0; - gmx_pme_receive_f(fr->pmePpCommGpu.get(), cr, forceWithVirial, &e_q, &e_lj, &dvdl_q, &dvdl_lj, - useGpuPmePpComms, receivePmeForceToGpu, &cycles_seppme); - enerd->term[F_COUL_RECIP] += e_q; - enerd->term[F_LJ_RECIP] += e_lj; - enerd->dvdl_lin[efptCOUL] += dvdl_q; - enerd->dvdl_lin[efptVDW] += dvdl_lj; - - if (wcycle) - { - dd_cycles_add(cr->dd, cycles_seppme, ddCyclPME); - } - wallcycle_stop(wcycle, ewcPP_PMEWAITRECVF); -} - -static void print_large_forces(FILE* fp, - const t_mdatoms* md, - const t_commrec* cr, - int64_t step, - real forceTolerance, - ArrayRef x, - ArrayRef f) -{ - real force2Tolerance = gmx::square(forceTolerance); - gmx::index numNonFinite = 0; - for (int i = 0; i < md->homenr; i++) - { - real force2 = norm2(f[i]); - bool nonFinite = !std::isfinite(force2); - if (force2 >= force2Tolerance || nonFinite) - { - fprintf(fp, "step %" PRId64 " atom %6d x %8.3f %8.3f %8.3f force %12.5e\n", step, - ddglatnr(cr->dd, i), x[i][XX], x[i][YY], x[i][ZZ], std::sqrt(force2)); - } - if (nonFinite) - { - numNonFinite++; - } - } - if (numNonFinite > 0) - { - /* Note that with MPI this fatal call on one rank might interrupt - * the printing on other ranks. But we can only avoid that with - * an expensive MPI barrier that we would need at each step. - */ - gmx_fatal(FARGS, "At step %" PRId64 " detected non-finite forces on %td atoms", step, numNonFinite); - } -} - -//! When necessary, spreads forces on vsites and computes the virial for \p forceOutputs->forceWithShiftForces() -static void postProcessForceWithShiftForces(t_nrnb* nrnb, - gmx_wallcycle_t wcycle, - const matrix box, - ArrayRef x, - ForceOutputs* forceOutputs, - tensor vir_force, - const t_mdatoms& mdatoms, - const t_forcerec& fr, - gmx::VirtualSitesHandler* vsite, - const StepWorkload& stepWork) -{ - ForceWithShiftForces& forceWithShiftForces = forceOutputs->forceWithShiftForces(); - - /* If we have NoVirSum forces, but we do not calculate the virial, - * we later sum the forceWithShiftForces buffer together with - * the noVirSum buffer and spread the combined vsite forces at once. - */ - if (vsite && (!forceOutputs->haveForceWithVirial() || stepWork.computeVirial)) - { - using VirialHandling = gmx::VirtualSitesHandler::VirialHandling; - - auto f = forceWithShiftForces.force(); - auto fshift = forceWithShiftForces.shiftForces(); - const VirialHandling virialHandling = - (stepWork.computeVirial ? VirialHandling::Pbc : VirialHandling::None); - vsite->spreadForces(x, f, virialHandling, fshift, nullptr, nrnb, box, wcycle); - forceWithShiftForces.haveSpreadVsiteForces() = true; - } - - if (stepWork.computeVirial) - { - /* Calculation of the virial must be done after vsites! */ - calc_virial(0, mdatoms.homenr, as_rvec_array(x.data()), forceWithShiftForces, vir_force, - box, nrnb, &fr, fr.pbcType); - } -} - -//! Spread, compute virial for and sum forces, when necessary -static void postProcessForces(const t_commrec* cr, - int64_t step, - t_nrnb* nrnb, - gmx_wallcycle_t wcycle, - const matrix box, - ArrayRef x, - ForceOutputs* forceOutputs, - tensor vir_force, - const t_mdatoms* mdatoms, - const t_forcerec* fr, - gmx::VirtualSitesHandler* vsite, - const StepWorkload& stepWork) -{ - // Extract the final output force buffer, which is also the buffer for forces with shift forces - ArrayRef f = forceOutputs->forceWithShiftForces().force(); - - if (forceOutputs->haveForceWithVirial()) - { - auto& forceWithVirial = forceOutputs->forceWithVirial(); - - if (vsite) - { - /* Spread the mesh force on virtual sites to the other particles... - * This is parallellized. MPI communication is performed - * if the constructing atoms aren't local. - */ - GMX_ASSERT(!stepWork.computeVirial || f.data() != forceWithVirial.force_.data(), - "We need separate force buffers for shift and virial forces when " - "computing the virial"); - GMX_ASSERT(!stepWork.computeVirial - || forceOutputs->forceWithShiftForces().haveSpreadVsiteForces(), - "We should spread the force with shift forces separately when computing " - "the virial"); - const gmx::VirtualSitesHandler::VirialHandling virialHandling = - (stepWork.computeVirial ? gmx::VirtualSitesHandler::VirialHandling::NonLinear - : gmx::VirtualSitesHandler::VirialHandling::None); - matrix virial = { { 0 } }; - vsite->spreadForces(x, forceWithVirial.force_, virialHandling, {}, virial, nrnb, box, wcycle); - forceWithVirial.addVirialContribution(virial); - } - - if (stepWork.computeVirial) - { - /* Now add the forces, this is local */ - sum_forces(f, forceWithVirial.force_); - - /* Add the direct virial contributions */ - GMX_ASSERT( - forceWithVirial.computeVirial_, - "forceWithVirial should request virial computation when we request the virial"); - m_add(vir_force, forceWithVirial.getVirial(), vir_force); - - if (debug) - { - pr_rvecs(debug, 0, "vir_force", vir_force, DIM); - } - } - } - else - { - GMX_ASSERT(vsite == nullptr || forceOutputs->forceWithShiftForces().haveSpreadVsiteForces(), - "We should have spread the vsite forces (earlier)"); - } - - if (fr->print_force >= 0) - { - print_large_forces(stderr, mdatoms, cr, step, fr->print_force, x, f); - } -} - -static void do_nb_verlet(t_forcerec* fr, - const interaction_const_t* ic, - gmx_enerdata_t* enerd, - const StepWorkload& stepWork, - const InteractionLocality ilocality, - const int clearF, - const int64_t step, - t_nrnb* nrnb, - gmx_wallcycle_t wcycle) -{ - if (!stepWork.computeNonbondedForces) - { - /* skip non-bonded calculation */ - return; - } - - nonbonded_verlet_t* nbv = fr->nbv.get(); - - /* GPU kernel launch overhead is already timed separately */ - if (!nbv->useGpu()) - { - /* When dynamic pair-list pruning is requested, we need to prune - * at nstlistPrune steps. - */ - if (nbv->isDynamicPruningStepCpu(step)) - { - /* Prune the pair-list beyond fr->ic->rlistPrune using - * the current coordinates of the atoms. - */ - wallcycle_sub_start(wcycle, ewcsNONBONDED_PRUNING); - nbv->dispatchPruneKernelCpu(ilocality, fr->shift_vec); - wallcycle_sub_stop(wcycle, ewcsNONBONDED_PRUNING); - } - } - - nbv->dispatchNonbondedKernel(ilocality, *ic, stepWork, clearF, *fr, enerd, nrnb); -} - -static inline void clearRVecs(ArrayRef v, const bool useOpenmpThreading) -{ - int nth = gmx_omp_nthreads_get_simple_rvec_task(emntDefault, v.ssize()); - - /* Note that we would like to avoid this conditional by putting it - * into the omp pragma instead, but then we still take the full - * omp parallel for overhead (at least with gcc5). - */ - if (!useOpenmpThreading || nth == 1) - { - for (RVec& elem : v) - { - clear_rvec(elem); - } - } - else - { -#pragma omp parallel for num_threads(nth) schedule(static) - for (gmx::index i = 0; i < v.ssize(); i++) - { - clear_rvec(v[i]); - } - } -} - -/*! \brief Return an estimate of the average kinetic energy or 0 when unreliable - * - * \param groupOptions Group options, containing T-coupling options - */ -static real averageKineticEnergyEstimate(const t_grpopts& groupOptions) -{ - real nrdfCoupled = 0; - real nrdfUncoupled = 0; - real kineticEnergy = 0; - for (int g = 0; g < groupOptions.ngtc; g++) - { - if (groupOptions.tau_t[g] >= 0) - { - nrdfCoupled += groupOptions.nrdf[g]; - kineticEnergy += groupOptions.nrdf[g] * 0.5 * groupOptions.ref_t[g] * BOLTZ; - } - else - { - nrdfUncoupled += groupOptions.nrdf[g]; - } - } - - /* This conditional with > also catches nrdf=0 */ - if (nrdfCoupled > nrdfUncoupled) - { - return kineticEnergy * (nrdfCoupled + nrdfUncoupled) / nrdfCoupled; - } - else - { - return 0; - } -} - -/*! \brief This routine checks that the potential energy is finite. - * - * Always checks that the potential energy is finite. If step equals - * inputrec.init_step also checks that the magnitude of the potential energy - * is reasonable. Terminates with a fatal error when a check fails. - * Note that passing this check does not guarantee finite forces, - * since those use slightly different arithmetics. But in most cases - * there is just a narrow coordinate range where forces are not finite - * and energies are finite. - * - * \param[in] step The step number, used for checking and printing - * \param[in] enerd The energy data; the non-bonded group energies need to be added to - * enerd.term[F_EPOT] before calling this routine \param[in] inputrec The input record - */ -static void checkPotentialEnergyValidity(int64_t step, const gmx_enerdata_t& enerd, const t_inputrec& inputrec) -{ - /* Threshold valid for comparing absolute potential energy against - * the kinetic energy. Normally one should not consider absolute - * potential energy values, but with a factor of one million - * we should never get false positives. - */ - constexpr real c_thresholdFactor = 1e6; - - bool energyIsNotFinite = !std::isfinite(enerd.term[F_EPOT]); - real averageKineticEnergy = 0; - /* We only check for large potential energy at the initial step, - * because that is by far the most likely step for this too occur - * and because computing the average kinetic energy is not free. - * Note: nstcalcenergy >> 1 often does not allow to catch large energies - * before they become NaN. - */ - if (step == inputrec.init_step && EI_DYNAMICS(inputrec.eI)) - { - averageKineticEnergy = averageKineticEnergyEstimate(inputrec.opts); - } - - if (energyIsNotFinite - || (averageKineticEnergy > 0 && enerd.term[F_EPOT] > c_thresholdFactor * averageKineticEnergy)) - { - gmx_fatal( - FARGS, - "Step %" PRId64 - ": The total potential energy is %g, which is %s. The LJ and electrostatic " - "contributions to the energy are %g and %g, respectively. A %s potential energy " - "can be caused by overlapping interactions in bonded interactions or very large%s " - "coordinate values. Usually this is caused by a badly- or non-equilibrated initial " - "configuration, incorrect interactions or parameters in the topology.", - step, enerd.term[F_EPOT], energyIsNotFinite ? "not finite" : "extremely high", - enerd.term[F_LJ], enerd.term[F_COUL_SR], - energyIsNotFinite ? "non-finite" : "very high", energyIsNotFinite ? " or Nan" : ""); - } -} - -/*! \brief Return true if there are special forces computed this step. - * - * The conditionals exactly correspond to those in computeSpecialForces(). - */ -static bool haveSpecialForces(const t_inputrec& inputrec, - const gmx::ForceProviders& forceProviders, - const pull_t* pull_work, - const bool computeForces, - const gmx_edsam* ed) -{ - - return ((computeForces && forceProviders.hasForceProvider()) || // forceProviders - (inputrec.bPull && pull_have_potential(*pull_work)) || // pull - inputrec.bRot || // enforced rotation - (ed != nullptr) || // flooding - (inputrec.bIMD && computeForces)); // IMD -} - -/*! \brief Compute forces and/or energies for special algorithms - * - * The intention is to collect all calls to algorithms that compute - * forces on local atoms only and that do not contribute to the local - * virial sum (but add their virial contribution separately). - * Eventually these should likely all become ForceProviders. - * Within this function the intention is to have algorithms that do - * global communication at the end, so global barriers within the MD loop - * are as close together as possible. - * - * \param[in] fplog The log file - * \param[in] cr The communication record - * \param[in] inputrec The input record - * \param[in] awh The Awh module (nullptr if none in use). - * \param[in] enforcedRotation Enforced rotation module. - * \param[in] imdSession The IMD session - * \param[in] pull_work The pull work structure. - * \param[in] step The current MD step - * \param[in] t The current time - * \param[in,out] wcycle Wallcycle accounting struct - * \param[in,out] forceProviders Pointer to a list of force providers - * \param[in] box The unit cell - * \param[in] x The coordinates - * \param[in] mdatoms Per atom properties - * \param[in] lambda Array of free-energy lambda values - * \param[in] stepWork Step schedule flags - * \param[in,out] forceWithVirialMtsLevel0 Force and virial for MTS level0 forces - * \param[in,out] forceWithVirialMtsLevel1 Force and virial for MTS level1 forces, can be nullptr - * \param[in,out] enerd Energy buffer - * \param[in,out] ed Essential dynamics pointer - * \param[in] didNeighborSearch Tells if we did neighbor searching this step, used for ED sampling - * - * \todo Remove didNeighborSearch, which is used incorrectly. - * \todo Convert all other algorithms called here to ForceProviders. - */ -static void computeSpecialForces(FILE* fplog, - const t_commrec* cr, - const t_inputrec* inputrec, - gmx::Awh* awh, - gmx_enfrot* enforcedRotation, - gmx::ImdSession* imdSession, - pull_t* pull_work, - int64_t step, - double t, - gmx_wallcycle_t wcycle, - gmx::ForceProviders* forceProviders, - const matrix box, - gmx::ArrayRef x, - const t_mdatoms* mdatoms, - gmx::ArrayRef lambda, - const StepWorkload& stepWork, - gmx::ForceWithVirial* forceWithVirialMtsLevel0, - gmx::ForceWithVirial* forceWithVirialMtsLevel1, - gmx_enerdata_t* enerd, - gmx_edsam* ed, - bool didNeighborSearch) -{ - /* NOTE: Currently all ForceProviders only provide forces. - * When they also provide energies, remove this conditional. - */ - if (stepWork.computeForces) - { - gmx::ForceProviderInput forceProviderInput(x, *mdatoms, t, box, *cr); - gmx::ForceProviderOutput forceProviderOutput(forceWithVirialMtsLevel0, enerd); - - /* Collect forces from modules */ - forceProviders->calculateForces(forceProviderInput, &forceProviderOutput); - } - - if (inputrec->bPull && pull_have_potential(*pull_work)) - { - const int mtsLevel = forceGroupMtsLevel(inputrec->mtsLevels, gmx::MtsForceGroups::Pull); - if (mtsLevel == 0 || stepWork.computeSlowForces) - { - auto& forceWithVirial = (mtsLevel == 0) ? forceWithVirialMtsLevel0 : forceWithVirialMtsLevel1; - pull_potential_wrapper(cr, inputrec, box, x, forceWithVirial, mdatoms, enerd, pull_work, - lambda.data(), t, wcycle); - } - } - if (awh) - { - const int mtsLevel = forceGroupMtsLevel(inputrec->mtsLevels, gmx::MtsForceGroups::Pull); - if (mtsLevel == 0 || stepWork.computeSlowForces) - { - const bool needForeignEnergyDifferences = awh->needForeignEnergyDifferences(step); - std::vector foreignLambdaDeltaH, foreignLambdaDhDl; - if (needForeignEnergyDifferences) - { - enerd->foreignLambdaTerms.finalizePotentialContributions(enerd->dvdl_lin, lambda, - *inputrec->fepvals); - std::tie(foreignLambdaDeltaH, foreignLambdaDhDl) = enerd->foreignLambdaTerms.getTerms(cr); - } - - auto& forceWithVirial = (mtsLevel == 0) ? forceWithVirialMtsLevel0 : forceWithVirialMtsLevel1; - enerd->term[F_COM_PULL] += awh->applyBiasForcesAndUpdateBias( - inputrec->pbcType, mdatoms->massT, foreignLambdaDeltaH, foreignLambdaDhDl, box, - forceWithVirial, t, step, wcycle, fplog); - } - } - - rvec* f = as_rvec_array(forceWithVirialMtsLevel0->force_.data()); - - /* Add the forces from enforced rotation potentials (if any) */ - if (inputrec->bRot) - { - wallcycle_start(wcycle, ewcROTadd); - enerd->term[F_COM_PULL] += add_rot_forces(enforcedRotation, f, cr, step, t); - wallcycle_stop(wcycle, ewcROTadd); - } - - if (ed) - { - /* Note that since init_edsam() is called after the initialization - * of forcerec, edsam doesn't request the noVirSum force buffer. - * Thus if no other algorithm (e.g. PME) requires it, the forces - * here will contribute to the virial. - */ - do_flood(cr, inputrec, as_rvec_array(x.data()), f, ed, box, step, didNeighborSearch); - } - - /* Add forces from interactive molecular dynamics (IMD), if any */ - if (inputrec->bIMD && stepWork.computeForces) - { - imdSession->applyForces(f); - } -} - -/*! \brief Launch the prepare_step and spread stages of PME GPU. - * - * \param[in] pmedata The PME structure - * \param[in] box The box matrix - * \param[in] stepWork Step schedule flags - * \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory. - * \param[in] lambdaQ The Coulomb lambda of the current state. - * \param[in] wcycle The wallcycle structure - */ -static inline void launchPmeGpuSpread(gmx_pme_t* pmedata, - const matrix box, - const StepWorkload& stepWork, - GpuEventSynchronizer* xReadyOnDevice, - const real lambdaQ, - gmx_wallcycle_t wcycle) -{ - pme_gpu_prepare_computation(pmedata, box, wcycle, stepWork); - pme_gpu_launch_spread(pmedata, xReadyOnDevice, wcycle, lambdaQ); -} - -/*! \brief Launch the FFT and gather stages of PME GPU - * - * This function only implements setting the output forces (no accumulation). - * - * \param[in] pmedata The PME structure - * \param[in] lambdaQ The Coulomb lambda of the current system state. - * \param[in] wcycle The wallcycle structure - * \param[in] stepWork Step schedule flags - */ -static void launchPmeGpuFftAndGather(gmx_pme_t* pmedata, - const real lambdaQ, - gmx_wallcycle_t wcycle, - const gmx::StepWorkload& stepWork) -{ - pme_gpu_launch_complex_transforms(pmedata, wcycle, stepWork); - pme_gpu_launch_gather(pmedata, wcycle, lambdaQ); -} - -/*! \brief - * Polling wait for either of the PME or nonbonded GPU tasks. - * - * Instead of a static order in waiting for GPU tasks, this function - * polls checking which of the two tasks completes first, and does the - * associated force buffer reduction overlapped with the other task. - * By doing that, unlike static scheduling order, it can always overlap - * one of the reductions, regardless of the GPU task completion order. - * - * \param[in] nbv Nonbonded verlet structure - * \param[in,out] pmedata PME module data - * \param[in,out] forceOutputsNonbonded Force outputs for the non-bonded forces and shift forces - * \param[in,out] forceOutputsPme Force outputs for the PME forces and virial - * \param[in,out] enerd Energy data structure results are reduced into - * \param[in] lambdaQ The Coulomb lambda of the current system state. - * \param[in] stepWork Step schedule flags - * \param[in] wcycle The wallcycle structure - */ -static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t* nbv, - gmx_pme_t* pmedata, - gmx::ForceOutputs* forceOutputsNonbonded, - gmx::ForceOutputs* forceOutputsPme, - gmx_enerdata_t* enerd, - const real lambdaQ, - const StepWorkload& stepWork, - gmx_wallcycle_t wcycle) -{ - bool isPmeGpuDone = false; - bool isNbGpuDone = false; - - gmx::ArrayRef pmeGpuForces; - - while (!isPmeGpuDone || !isNbGpuDone) - { - if (!isPmeGpuDone) - { - GpuTaskCompletion completionType = - (isNbGpuDone) ? GpuTaskCompletion::Wait : GpuTaskCompletion::Check; - isPmeGpuDone = pme_gpu_try_finish_task(pmedata, stepWork, wcycle, - &forceOutputsPme->forceWithVirial(), enerd, - lambdaQ, completionType); - } - - if (!isNbGpuDone) - { - auto& forceBuffersNonbonded = forceOutputsNonbonded->forceWithShiftForces(); - GpuTaskCompletion completionType = - (isPmeGpuDone) ? GpuTaskCompletion::Wait : GpuTaskCompletion::Check; - isNbGpuDone = Nbnxm::gpu_try_finish_task( - nbv->gpu_nbv, stepWork, AtomLocality::Local, enerd->grpp.ener[egLJSR].data(), - enerd->grpp.ener[egCOULSR].data(), forceBuffersNonbonded.shiftForces(), - completionType, wcycle); - - if (isNbGpuDone) - { - nbv->atomdata_add_nbat_f_to_f(AtomLocality::Local, forceBuffersNonbonded.force()); - } - } - } -} - -/*! \brief Set up the different force buffers; also does clearing. - * - * \param[in] forceHelperBuffers Helper force buffers - * \param[in] force force array - * \param[in] stepWork Step schedule flags - * \param[out] wcycle wallcycle recording structure - * - * \returns Cleared force output structure - */ -static ForceOutputs setupForceOutputs(ForceHelperBuffers* forceHelperBuffers, - gmx::ArrayRefWithPadding force, - const StepWorkload& stepWork, - gmx_wallcycle_t wcycle) -{ - wallcycle_sub_start(wcycle, ewcsCLEAR_FORCE_BUFFER); - - /* NOTE: We assume fr->shiftForces is all zeros here */ - gmx::ForceWithShiftForces forceWithShiftForces(force, stepWork.computeVirial, - forceHelperBuffers->shiftForces()); - - if (stepWork.computeForces) - { - /* Clear the short- and long-range forces */ - clearRVecs(forceWithShiftForces.force(), true); - - /* Clear the shift forces */ - clearRVecs(forceWithShiftForces.shiftForces(), false); - } - - /* If we need to compute the virial, we might need a separate - * force buffer for algorithms for which the virial is calculated - * directly, such as PME. Otherwise, forceWithVirial uses the - * the same force (f in legacy calls) buffer as other algorithms. - */ - const bool useSeparateForceWithVirialBuffer = - (stepWork.computeForces - && (stepWork.computeVirial && forceHelperBuffers->haveDirectVirialContributions())); - /* forceWithVirial uses the local atom range only */ - gmx::ForceWithVirial forceWithVirial( - useSeparateForceWithVirialBuffer ? forceHelperBuffers->forceBufferForDirectVirialContributions() - : force.unpaddedArrayRef(), - stepWork.computeVirial); - - if (useSeparateForceWithVirialBuffer) - { - /* TODO: update comment - * We only compute forces on local atoms. Note that vsites can - * spread to non-local atoms, but that part of the buffer is - * cleared separately in the vsite spreading code. - */ - clearRVecs(forceWithVirial.force_, true); - } - - wallcycle_sub_stop(wcycle, ewcsCLEAR_FORCE_BUFFER); - - return ForceOutputs(forceWithShiftForces, forceHelperBuffers->haveDirectVirialContributions(), - forceWithVirial); -} - - -/*! \brief Set up flags that have the lifetime of the domain indicating what type of work is there to compute. - */ -static DomainLifetimeWorkload setupDomainLifetimeWorkload(const t_inputrec& inputrec, - const t_forcerec& fr, - const pull_t* pull_work, - const gmx_edsam* ed, - const t_mdatoms& mdatoms, - const SimulationWorkload& simulationWork, - const StepWorkload& stepWork) -{ - DomainLifetimeWorkload domainWork; - // Note that haveSpecialForces is constant over the whole run - domainWork.haveSpecialForces = - haveSpecialForces(inputrec, *fr.forceProviders, pull_work, stepWork.computeForces, ed); - domainWork.haveCpuListedForceWork = false; - domainWork.haveCpuBondedWork = false; - for (const auto& listedForces : fr.listedForces) - { - if (listedForces.haveCpuListedForces(*fr.fcdata)) - { - domainWork.haveCpuListedForceWork = true; - } - if (listedForces.haveCpuBondeds()) - { - domainWork.haveCpuBondedWork = true; - } - } - domainWork.haveGpuBondedWork = ((fr.gpuBonded != nullptr) && fr.gpuBonded->haveInteractions()); - // Note that haveFreeEnergyWork is constant over the whole run - domainWork.haveFreeEnergyWork = (fr.efep != efepNO && mdatoms.nPerturbed != 0); - // We assume we have local force work if there are CPU - // force tasks including PME or nonbondeds. - domainWork.haveCpuLocalForceWork = - domainWork.haveSpecialForces || domainWork.haveCpuListedForceWork - || domainWork.haveFreeEnergyWork || simulationWork.useCpuNonbonded || simulationWork.useCpuPme - || simulationWork.haveEwaldSurfaceContribution || inputrec.nwall > 0; - - return domainWork; -} - -/*! \brief Set up force flag stuct from the force bitmask. - * - * \param[in] legacyFlags Force bitmask flags used to construct the new flags - * \param[in] mtsLevels The multiple time-stepping levels, either empty or 2 levels - * \param[in] step The current MD step - * \param[in] simulationWork Simulation workload description. - * \param[in] rankHasPmeDuty If this rank computes PME. - * - * \returns New Stepworkload description. - */ -static StepWorkload setupStepWorkload(const int legacyFlags, - ArrayRef mtsLevels, - const int64_t step, - const SimulationWorkload& simulationWork, - const bool rankHasPmeDuty) -{ - GMX_ASSERT(mtsLevels.empty() || mtsLevels.size() == 2, "Expect 0 or 2 MTS levels"); - const bool computeSlowForces = (mtsLevels.empty() || step % mtsLevels[1].stepFactor == 0); - - StepWorkload flags; - flags.stateChanged = ((legacyFlags & GMX_FORCE_STATECHANGED) != 0); - flags.haveDynamicBox = ((legacyFlags & GMX_FORCE_DYNAMICBOX) != 0); - flags.doNeighborSearch = ((legacyFlags & GMX_FORCE_NS) != 0); - flags.computeSlowForces = computeSlowForces; - flags.computeVirial = ((legacyFlags & GMX_FORCE_VIRIAL) != 0); - flags.computeEnergy = ((legacyFlags & GMX_FORCE_ENERGY) != 0); - flags.computeForces = ((legacyFlags & GMX_FORCE_FORCES) != 0); - flags.computeListedForces = ((legacyFlags & GMX_FORCE_LISTED) != 0); - flags.computeNonbondedForces = - ((legacyFlags & GMX_FORCE_NONBONDED) != 0) && simulationWork.computeNonbonded - && !(simulationWork.computeNonbondedAtMtsLevel1 && !computeSlowForces); - flags.computeDhdl = ((legacyFlags & GMX_FORCE_DHDL) != 0); - - if (simulationWork.useGpuBufferOps) - { - GMX_ASSERT(simulationWork.useGpuNonbonded, - "Can only offload buffer ops if nonbonded computation is also offloaded"); - } - flags.useGpuXBufferOps = simulationWork.useGpuBufferOps; - // on virial steps the CPU reduction path is taken - flags.useGpuFBufferOps = simulationWork.useGpuBufferOps && !flags.computeVirial; - flags.useGpuPmeFReduction = flags.computeSlowForces && flags.useGpuFBufferOps && simulationWork.useGpuPme - && (rankHasPmeDuty || simulationWork.useGpuPmePpCommunication); - flags.useGpuXHalo = simulationWork.useGpuHaloExchange; - flags.useGpuFHalo = simulationWork.useGpuHaloExchange && flags.useGpuFBufferOps; - - return flags; -} - - -/* \brief Launch end-of-step GPU tasks: buffer clearing and rolling pruning. - * - * TODO: eliminate \p useGpuPmeOnThisRank when this is - * incorporated in DomainLifetimeWorkload. - */ -static void launchGpuEndOfStepTasks(nonbonded_verlet_t* nbv, - gmx::GpuBonded* gpuBonded, - gmx_pme_t* pmedata, - gmx_enerdata_t* enerd, - const gmx::MdrunScheduleWorkload& runScheduleWork, - bool useGpuPmeOnThisRank, - int64_t step, - gmx_wallcycle_t wcycle) -{ - if (runScheduleWork.simulationWork.useGpuNonbonded && runScheduleWork.stepWork.computeNonbondedForces) - { - /* Launch pruning before buffer clearing because the API overhead of the - * clear kernel launches can leave the GPU idle while it could be running - * the prune kernel. - */ - if (nbv->isDynamicPruningStepGpu(step)) - { - nbv->dispatchPruneKernelGpu(step); - } - - /* now clear the GPU outputs while we finish the step on the CPU */ - wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); - wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED); - Nbnxm::gpu_clear_outputs(nbv->gpu_nbv, runScheduleWork.stepWork.computeVirial); - wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); - wallcycle_stop(wcycle, ewcLAUNCH_GPU); - } - - if (useGpuPmeOnThisRank) - { - pme_gpu_reinit_computation(pmedata, wcycle); - } - - if (runScheduleWork.domainWork.haveGpuBondedWork && runScheduleWork.stepWork.computeEnergy) - { - // in principle this should be included in the DD balancing region, - // but generally it is infrequent so we'll omit it for the sake of - // simpler code - gpuBonded->waitAccumulateEnergyTerms(enerd); - - gpuBonded->clearEnergies(); - } -} - -//! \brief Data structure to hold dipole-related data and staging arrays -struct DipoleData -{ - //! Dipole staging for fast summing over MPI - gmx::DVec muStaging[2] = { { 0.0, 0.0, 0.0 } }; - //! Dipole staging for states A and B (index 0 and 1 resp.) - gmx::RVec muStateAB[2] = { { 0.0_real, 0.0_real, 0.0_real } }; -}; - - -static void reduceAndUpdateMuTot(DipoleData* dipoleData, - const t_commrec* cr, - const bool haveFreeEnergy, - gmx::ArrayRef lambda, - rvec muTotal, - const DDBalanceRegionHandler& ddBalanceRegionHandler) -{ - if (PAR(cr)) - { - gmx_sumd(2 * DIM, dipoleData->muStaging[0], cr); - ddBalanceRegionHandler.reopenRegionCpu(); - } - for (int i = 0; i < 2; i++) - { - for (int j = 0; j < DIM; j++) - { - dipoleData->muStateAB[i][j] = dipoleData->muStaging[i][j]; - } - } - - if (!haveFreeEnergy) - { - copy_rvec(dipoleData->muStateAB[0], muTotal); - } - else - { - for (int j = 0; j < DIM; j++) - { - muTotal[j] = (1.0 - lambda[efptCOUL]) * dipoleData->muStateAB[0][j] - + lambda[efptCOUL] * dipoleData->muStateAB[1][j]; - } - } -} - -/*! \brief Combines MTS level0 and level1 force buffes into a full and MTS-combined force buffer. - * - * \param[in] numAtoms The number of atoms to combine forces for - * \param[in,out] forceMtsLevel0 Input: F_level0, output: F_level0 + F_level1 - * \param[in,out] forceMts Input: F_level1, output: F_level0 + mtsFactor * F_level1 - * \param[in] mtsFactor The factor between the level0 and level1 time step - */ -static void combineMtsForces(const int numAtoms, - ArrayRef forceMtsLevel0, - ArrayRef forceMts, - const real mtsFactor) -{ - const int gmx_unused numThreads = gmx_omp_nthreads_get(emntDefault); -#pragma omp parallel for num_threads(numThreads) schedule(static) - for (int i = 0; i < numAtoms; i++) - { - const RVec forceMtsLevel0Tmp = forceMtsLevel0[i]; - forceMtsLevel0[i] += forceMts[i]; - forceMts[i] = forceMtsLevel0Tmp + mtsFactor * forceMts[i]; - } -} - -/*! \brief Setup for the local and non-local GPU force reductions: - * reinitialization plus the registration of forces and dependencies. - * - * \param [in] runScheduleWork Schedule workload flag structure - * \param [in] cr Communication record object - * \param [in] fr Force record object - */ -static void setupGpuForceReductions(gmx::MdrunScheduleWorkload* runScheduleWork, - const t_commrec* cr, - t_forcerec* fr) -{ - - nonbonded_verlet_t* nbv = fr->nbv.get(); - gmx::StatePropagatorDataGpu* stateGpu = fr->stateGpu; - - // (re-)initialize local GPU force reduction - const bool accumulate = - runScheduleWork->domainWork.haveCpuLocalForceWork || havePPDomainDecomposition(cr); - const int atomStart = 0; - fr->gpuForceReduction[gmx::AtomLocality::Local]->reinit( - stateGpu->getForces(), nbv->getNumAtoms(AtomLocality::Local), nbv->getGridIndices(), - atomStart, accumulate, stateGpu->fReducedOnDevice()); - - // register forces and add dependencies - fr->gpuForceReduction[gmx::AtomLocality::Local]->registerNbnxmForce(nbv->getGpuForces()); - - if (runScheduleWork->simulationWork.useGpuPme - && (thisRankHasDuty(cr, DUTY_PME) || runScheduleWork->simulationWork.useGpuPmePpCommunication)) - { - void* forcePtr = thisRankHasDuty(cr, DUTY_PME) ? pme_gpu_get_device_f(fr->pmedata) - : // PME force buffer on same GPU - fr->pmePpCommGpu->getGpuForceStagingPtr(); // buffer received from other GPU - fr->gpuForceReduction[gmx::AtomLocality::Local]->registerRvecForce(forcePtr); - - GpuEventSynchronizer* const pmeSynchronizer = - (thisRankHasDuty(cr, DUTY_PME) ? pme_gpu_get_f_ready_synchronizer(fr->pmedata) - : // PME force buffer on same GPU - fr->pmePpCommGpu->getForcesReadySynchronizer()); // buffer received from other GPU - fr->gpuForceReduction[gmx::AtomLocality::Local]->addDependency(pmeSynchronizer); - } - - if ((runScheduleWork->domainWork.haveCpuLocalForceWork || havePPDomainDecomposition(cr)) - && !runScheduleWork->simulationWork.useGpuHaloExchange) - { - auto forcesReadyLocality = havePPDomainDecomposition(cr) ? AtomLocality::Local : AtomLocality::All; - const bool useGpuForceBufferOps = true; - fr->gpuForceReduction[gmx::AtomLocality::Local]->addDependency( - stateGpu->getForcesReadyOnDeviceEvent(forcesReadyLocality, useGpuForceBufferOps)); - } - - if (runScheduleWork->simulationWork.useGpuHaloExchange) - { - fr->gpuForceReduction[gmx::AtomLocality::Local]->addDependency( - cr->dd->gpuHaloExchange[0][0]->getForcesReadyOnDeviceEvent()); - } - - if (havePPDomainDecomposition(cr)) - { - // (re-)initialize non-local GPU force reduction - const bool accumulate = runScheduleWork->domainWork.haveCpuBondedWork - || runScheduleWork->domainWork.haveFreeEnergyWork; - const int atomStart = dd_numHomeAtoms(*cr->dd); - fr->gpuForceReduction[gmx::AtomLocality::NonLocal]->reinit( - stateGpu->getForces(), nbv->getNumAtoms(AtomLocality::NonLocal), - nbv->getGridIndices(), atomStart, accumulate); - - // register forces and add dependencies - fr->gpuForceReduction[gmx::AtomLocality::NonLocal]->registerNbnxmForce(nbv->getGpuForces()); - if (runScheduleWork->domainWork.haveCpuBondedWork || runScheduleWork->domainWork.haveFreeEnergyWork) - { - fr->gpuForceReduction[gmx::AtomLocality::NonLocal]->addDependency( - stateGpu->getForcesReadyOnDeviceEvent(AtomLocality::NonLocal, true)); - } - } -} - - -void do_force(FILE* fplog, - const t_commrec* cr, - const gmx_multisim_t* ms, - const t_inputrec* inputrec, - gmx::Awh* awh, - gmx_enfrot* enforcedRotation, - gmx::ImdSession* imdSession, - pull_t* pull_work, - int64_t step, - t_nrnb* nrnb, - gmx_wallcycle_t wcycle, - const gmx_localtop_t* top, - const matrix box, - gmx::ArrayRefWithPadding x, - history_t* hist, - gmx::ForceBuffersView* forceView, - tensor vir_force, - const t_mdatoms* mdatoms, - gmx_enerdata_t* enerd, - gmx::ArrayRef lambda, - t_forcerec* fr, - gmx::MdrunScheduleWorkload* runScheduleWork, - gmx::VirtualSitesHandler* vsite, - rvec muTotal, - double t, - gmx_edsam* ed, - int legacyFlags, - const DDBalanceRegionHandler& ddBalanceRegionHandler) -{ - auto force = forceView->forceWithPadding(); - GMX_ASSERT(force.unpaddedArrayRef().ssize() >= fr->natoms_force_constr, - "The size of the force buffer should be at least the number of atoms to compute " - "forces for"); - - nonbonded_verlet_t* nbv = fr->nbv.get(); - interaction_const_t* ic = fr->ic; - - gmx::StatePropagatorDataGpu* stateGpu = fr->stateGpu; - - const SimulationWorkload& simulationWork = runScheduleWork->simulationWork; - - runScheduleWork->stepWork = setupStepWorkload(legacyFlags, inputrec->mtsLevels, step, - simulationWork, thisRankHasDuty(cr, DUTY_PME)); - const StepWorkload& stepWork = runScheduleWork->stepWork; - - const bool useGpuPmeOnThisRank = - simulationWork.useGpuPme && thisRankHasDuty(cr, DUTY_PME) && stepWork.computeSlowForces; - - /* At a search step we need to start the first balancing region - * somewhere early inside the step after communication during domain - * decomposition (and not during the previous step as usual). - */ - if (stepWork.doNeighborSearch) - { - ddBalanceRegionHandler.openBeforeForceComputationCpu(DdAllowBalanceRegionReopen::yes); - } - - clear_mat(vir_force); - - if (fr->pbcType != PbcType::No) - { - /* Compute shift vectors every step, - * because of pressure coupling or box deformation! - */ - if (stepWork.haveDynamicBox && stepWork.stateChanged) - { - calc_shifts(box, fr->shift_vec); - } - - const bool fillGrid = (stepWork.doNeighborSearch && stepWork.stateChanged); - const bool calcCGCM = (fillGrid && !DOMAINDECOMP(cr)); - if (calcCGCM) - { - put_atoms_in_box_omp(fr->pbcType, box, x.unpaddedArrayRef().subArray(0, mdatoms->homenr), - gmx_omp_nthreads_get(emntDefault)); - inc_nrnb(nrnb, eNR_SHIFTX, mdatoms->homenr); - } - } - - nbnxn_atomdata_copy_shiftvec(stepWork.haveDynamicBox, fr->shift_vec, nbv->nbat.get()); - - const bool pmeSendCoordinatesFromGpu = - GMX_MPI && simulationWork.useGpuPmePpCommunication && !(stepWork.doNeighborSearch); - const bool reinitGpuPmePpComms = - GMX_MPI && simulationWork.useGpuPmePpCommunication && (stepWork.doNeighborSearch); - - const auto localXReadyOnDevice = (useGpuPmeOnThisRank || simulationWork.useGpuBufferOps) - ? stateGpu->getCoordinatesReadyOnDeviceEvent( - AtomLocality::Local, simulationWork, stepWork) - : nullptr; - - // Copy coordinate from the GPU if update is on the GPU and there - // are forces to be computed on the CPU, or for the computation of - // virial, or if host-side data will be transferred from this task - // to a remote task for halo exchange or PME-PP communication. At - // search steps the current coordinates are already on the host, - // hence copy is not needed. - const bool haveHostPmePpComms = - !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePpCommunication; - - GMX_ASSERT(simulationWork.useGpuHaloExchange - == ((cr->dd != nullptr) && (!cr->dd->gpuHaloExchange[0].empty())), - "The GPU halo exchange is active, but it has not been constructed."); - const bool haveHostHaloExchangeComms = - havePPDomainDecomposition(cr) && !simulationWork.useGpuHaloExchange; - - bool gmx_used_in_debug haveCopiedXFromGpu = false; - if (simulationWork.useGpuUpdate && !stepWork.doNeighborSearch - && (runScheduleWork->domainWork.haveCpuLocalForceWork || stepWork.computeVirial - || haveHostPmePpComms || haveHostHaloExchangeComms || simulationWork.computeMuTot)) - { - stateGpu->copyCoordinatesFromGpu(x.unpaddedArrayRef(), AtomLocality::Local); - haveCopiedXFromGpu = true; - } - - // If coordinates are to be sent to PME task from CPU memory, perform that send here. - // Otherwise the send will occur after H2D coordinate transfer. - if (GMX_MPI && !thisRankHasDuty(cr, DUTY_PME) && !pmeSendCoordinatesFromGpu && stepWork.computeSlowForces) - { - /* Send particle coordinates to the pme nodes */ - if (!stepWork.doNeighborSearch && simulationWork.useGpuUpdate) - { - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - - gmx_pme_send_coordinates(fr, cr, box, as_rvec_array(x.unpaddedArrayRef().data()), lambda[efptCOUL], - lambda[efptVDW], (stepWork.computeVirial || stepWork.computeEnergy), - step, simulationWork.useGpuPmePpCommunication, reinitGpuPmePpComms, - pmeSendCoordinatesFromGpu, localXReadyOnDevice, wcycle); - } - - // Coordinates on the device are needed if PME or BufferOps are offloaded. - // The local coordinates can be copied right away. - // NOTE: Consider moving this copy to right after they are updated and constrained, - // if the later is not offloaded. - if (useGpuPmeOnThisRank || stepWork.useGpuXBufferOps) - { - if (stepWork.doNeighborSearch) - { - // TODO refactor this to do_md, after partitioning. - stateGpu->reinit(mdatoms->homenr, - cr->dd != nullptr ? dd_numAtomsZones(*cr->dd) : mdatoms->homenr); - if (useGpuPmeOnThisRank) - { - // TODO: This should be moved into PME setup function ( pme_gpu_prepare_computation(...) ) - pme_gpu_set_device_x(fr->pmedata, stateGpu->getCoordinates()); - } - } - // We need to copy coordinates when: - // 1. Update is not offloaded - // 2. The buffers were reinitialized on search step - if (!simulationWork.useGpuUpdate || stepWork.doNeighborSearch) - { - GMX_ASSERT(stateGpu != nullptr, "stateGpu should not be null"); - stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), AtomLocality::Local); - } - } - - // If coordinates are to be sent to PME task from GPU memory, perform that send here. - // Otherwise the send will occur before the H2D coordinate transfer. - if (!thisRankHasDuty(cr, DUTY_PME) && pmeSendCoordinatesFromGpu) - { - /* Send particle coordinates to the pme nodes */ - gmx_pme_send_coordinates(fr, cr, box, as_rvec_array(x.unpaddedArrayRef().data()), lambda[efptCOUL], - lambda[efptVDW], (stepWork.computeVirial || stepWork.computeEnergy), - step, simulationWork.useGpuPmePpCommunication, reinitGpuPmePpComms, - pmeSendCoordinatesFromGpu, localXReadyOnDevice, wcycle); - } - - if (useGpuPmeOnThisRank) - { - launchPmeGpuSpread(fr->pmedata, box, stepWork, localXReadyOnDevice, lambda[efptCOUL], wcycle); - } - - const gmx::DomainLifetimeWorkload& domainWork = runScheduleWork->domainWork; - - /* do gridding for pair search */ - if (stepWork.doNeighborSearch) - { - if (fr->wholeMoleculeTransform && stepWork.stateChanged) - { - fr->wholeMoleculeTransform->updateForAtomPbcJumps(x.unpaddedArrayRef(), box); - } - - // TODO - // - vzero is constant, do we need to pass it? - // - box_diag should be passed directly to nbnxn_put_on_grid - // - rvec vzero; - clear_rvec(vzero); - - rvec box_diag; - box_diag[XX] = box[XX][XX]; - box_diag[YY] = box[YY][YY]; - box_diag[ZZ] = box[ZZ][ZZ]; - - wallcycle_start(wcycle, ewcNS); - if (!DOMAINDECOMP(cr)) - { - wallcycle_sub_start(wcycle, ewcsNBS_GRID_LOCAL); - nbnxn_put_on_grid(nbv, box, 0, vzero, box_diag, nullptr, { 0, mdatoms->homenr }, -1, - fr->cginfo, x.unpaddedArrayRef(), 0, nullptr); - wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL); - } - else - { - wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL); - nbnxn_put_on_grid_nonlocal(nbv, domdec_zones(cr->dd), fr->cginfo, x.unpaddedArrayRef()); - wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL); - } - - nbv->setAtomProperties(gmx::constArrayRefFromArray(mdatoms->typeA, mdatoms->nr), - gmx::constArrayRefFromArray(mdatoms->chargeA, mdatoms->nr), fr->cginfo); - - wallcycle_stop(wcycle, ewcNS); - - /* initialize the GPU nbnxm atom data and bonded data structures */ - if (simulationWork.useGpuNonbonded) - { - // Note: cycle counting only nononbondeds, gpuBonded counts internally - wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); - wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED); - Nbnxm::gpu_init_atomdata(nbv->gpu_nbv, nbv->nbat.get()); - wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); - wallcycle_stop(wcycle, ewcLAUNCH_GPU); - - if (fr->gpuBonded) - { - /* Now we put all atoms on the grid, we can assign bonded - * interactions to the GPU, where the grid order is - * needed. Also the xq, f and fshift device buffers have - * been reallocated if needed, so the bonded code can - * learn about them. */ - // TODO the xq, f, and fshift buffers are now shared - // resources, so they should be maintained by a - // higher-level object than the nb module. - fr->gpuBonded->updateInteractionListsAndDeviceBuffers( - nbv->getGridIndices(), top->idef, Nbnxm::gpu_get_xq(nbv->gpu_nbv), - Nbnxm::gpu_get_f(nbv->gpu_nbv), Nbnxm::gpu_get_fshift(nbv->gpu_nbv)); - } - } - - // Need to run after the GPU-offload bonded interaction lists - // are set up to be able to determine whether there is bonded work. - runScheduleWork->domainWork = setupDomainLifetimeWorkload( - *inputrec, *fr, pull_work, ed, *mdatoms, simulationWork, stepWork); - - wallcycle_start_nocount(wcycle, ewcNS); - wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL); - /* Note that with a GPU the launch overhead of the list transfer is not timed separately */ - nbv->constructPairlist(InteractionLocality::Local, top->excls, step, nrnb); - - nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::Local); - - wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL); - wallcycle_stop(wcycle, ewcNS); - - if (stepWork.useGpuXBufferOps) - { - nbv->atomdata_init_copy_x_to_nbat_x_gpu(); - } - - if (simulationWork.useGpuBufferOps) - { - setupGpuForceReductions(runScheduleWork, cr, fr); - } - } - else if (!EI_TPI(inputrec->eI) && stepWork.computeNonbondedForces) - { - if (stepWork.useGpuXBufferOps) - { - GMX_ASSERT(stateGpu, "stateGpu should be valid when buffer ops are offloaded"); - nbv->convertCoordinatesGpu(AtomLocality::Local, false, stateGpu->getCoordinates(), - localXReadyOnDevice); - } - else - { - if (simulationWork.useGpuUpdate) - { - GMX_ASSERT(stateGpu, "need a valid stateGpu object"); - GMX_ASSERT(haveCopiedXFromGpu, - "a wait should only be triggered if copy has been scheduled"); - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - nbv->convertCoordinates(AtomLocality::Local, false, x.unpaddedArrayRef()); - } - } - - if (simulationWork.useGpuNonbonded && (stepWork.computeNonbondedForces || domainWork.haveGpuBondedWork)) - { - ddBalanceRegionHandler.openBeforeForceComputationGpu(); - - wallcycle_start(wcycle, ewcLAUNCH_GPU); - wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED); - Nbnxm::gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat.get()); - if (stepWork.doNeighborSearch || !stepWork.useGpuXBufferOps) - { - Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), AtomLocality::Local); - } - wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); - wallcycle_stop(wcycle, ewcLAUNCH_GPU); - // with X buffer ops offloaded to the GPU on all but the search steps - - // bonded work not split into separate local and non-local, so with DD - // we can only launch the kernel after non-local coordinates have been received. - if (domainWork.haveGpuBondedWork && !havePPDomainDecomposition(cr)) - { - fr->gpuBonded->setPbcAndlaunchKernel(fr->pbcType, box, fr->bMolPBC, stepWork); - } - - /* launch local nonbonded work on GPU */ - wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); - wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED); - do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local, enbvClearFNo, step, nrnb, wcycle); - wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); - wallcycle_stop(wcycle, ewcLAUNCH_GPU); - } - - if (useGpuPmeOnThisRank) - { - // In PME GPU and mixed mode we launch FFT / gather after the - // X copy/transform to allow overlap as well as after the GPU NB - // launch to avoid FFT launch overhead hijacking the CPU and delaying - // the nonbonded kernel. - launchPmeGpuFftAndGather(fr->pmedata, lambda[efptCOUL], wcycle, stepWork); - } - - /* Communicate coordinates and sum dipole if necessary + - do non-local pair search */ - if (havePPDomainDecomposition(cr)) - { - if (stepWork.doNeighborSearch) - { - // TODO: fuse this branch with the above large stepWork.doNeighborSearch block - wallcycle_start_nocount(wcycle, ewcNS); - wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL); - /* Note that with a GPU the launch overhead of the list transfer is not timed separately */ - nbv->constructPairlist(InteractionLocality::NonLocal, top->excls, step, nrnb); - - nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::NonLocal); - wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL); - wallcycle_stop(wcycle, ewcNS); - // TODO refactor this GPU halo exchange re-initialisation - // to location in do_md where GPU halo exchange is - // constructed at partitioning, after above stateGpu - // re-initialization has similarly been refactored - if (simulationWork.useGpuHaloExchange) - { - reinitGpuHaloExchange(*cr, stateGpu->getCoordinates(), stateGpu->getForces()); - } - } - else - { - if (stepWork.useGpuXHalo) - { - // The following must be called after local setCoordinates (which records an event - // when the coordinate data has been copied to the device). - communicateGpuHaloCoordinates(*cr, box, localXReadyOnDevice); - - if (domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork) - { - // non-local part of coordinate buffer must be copied back to host for CPU work - stateGpu->copyCoordinatesFromGpu(x.unpaddedArrayRef(), AtomLocality::NonLocal); - } - } - else - { - if (simulationWork.useGpuUpdate) - { - GMX_ASSERT(haveCopiedXFromGpu, - "a wait should only be triggered if copy has been scheduled"); - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - dd_move_x(cr->dd, box, x.unpaddedArrayRef(), wcycle); - } - - if (stepWork.useGpuXBufferOps) - { - if (!useGpuPmeOnThisRank && !stepWork.useGpuXHalo) - { - stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), AtomLocality::NonLocal); - } - nbv->convertCoordinatesGpu(AtomLocality::NonLocal, false, stateGpu->getCoordinates(), - stateGpu->getCoordinatesReadyOnDeviceEvent( - AtomLocality::NonLocal, simulationWork, stepWork)); - } - else - { - nbv->convertCoordinates(AtomLocality::NonLocal, false, x.unpaddedArrayRef()); - } - } - - if (simulationWork.useGpuNonbonded) - { - - if (stepWork.doNeighborSearch || !stepWork.useGpuXBufferOps) - { - wallcycle_start(wcycle, ewcLAUNCH_GPU); - wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED); - Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), AtomLocality::NonLocal); - wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); - wallcycle_stop(wcycle, ewcLAUNCH_GPU); - } - - if (domainWork.haveGpuBondedWork) - { - fr->gpuBonded->setPbcAndlaunchKernel(fr->pbcType, box, fr->bMolPBC, stepWork); - } - - /* launch non-local nonbonded tasks on GPU */ - wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); - wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED); - do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFNo, step, - nrnb, wcycle); - wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); - wallcycle_stop(wcycle, ewcLAUNCH_GPU); - } - } - - if (simulationWork.useGpuNonbonded && stepWork.computeNonbondedForces) - { - /* launch D2H copy-back F */ - wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); - wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED); - - if (havePPDomainDecomposition(cr)) - { - Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(), stepWork, AtomLocality::NonLocal); - } - Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(), stepWork, AtomLocality::Local); - wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); - - if (domainWork.haveGpuBondedWork && stepWork.computeEnergy) - { - fr->gpuBonded->launchEnergyTransfer(); - } - wallcycle_stop(wcycle, ewcLAUNCH_GPU); - } - - gmx::ArrayRef xWholeMolecules; - if (fr->wholeMoleculeTransform) - { - xWholeMolecules = fr->wholeMoleculeTransform->wholeMoleculeCoordinates(x.unpaddedArrayRef(), box); - } - - DipoleData dipoleData; - - if (simulationWork.computeMuTot) - { - const int start = 0; - - if (simulationWork.useGpuUpdate && !stepWork.doNeighborSearch) - { - GMX_ASSERT(haveCopiedXFromGpu, - "a wait should only be triggered if copy has been scheduled"); - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - - /* Calculate total (local) dipole moment in a temporary common array. - * This makes it possible to sum them over nodes faster. - */ - gmx::ArrayRef xRef = - (xWholeMolecules.empty() ? x.unpaddedArrayRef() : xWholeMolecules); - calc_mu(start, mdatoms->homenr, xRef, mdatoms->chargeA, mdatoms->chargeB, - mdatoms->nChargePerturbed, dipoleData.muStaging[0], dipoleData.muStaging[1]); - - reduceAndUpdateMuTot(&dipoleData, cr, (fr->efep != efepNO), lambda, muTotal, ddBalanceRegionHandler); - } - - /* Reset energies */ - reset_enerdata(enerd); - - if (DOMAINDECOMP(cr) && !thisRankHasDuty(cr, DUTY_PME)) - { - wallcycle_start(wcycle, ewcPPDURINGPME); - dd_force_flop_start(cr->dd, nrnb); - } - - // For the rest of the CPU tasks that depend on GPU-update produced coordinates, - // this wait ensures that the D2H transfer is complete. - if ((simulationWork.useGpuUpdate) - && (runScheduleWork->domainWork.haveCpuLocalForceWork || stepWork.computeVirial)) - { - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - - if (inputrec->bRot) - { - wallcycle_start(wcycle, ewcROT); - do_rotation(cr, enforcedRotation, box, as_rvec_array(x.unpaddedArrayRef().data()), t, step, - stepWork.doNeighborSearch); - wallcycle_stop(wcycle, ewcROT); - } - - /* Start the force cycle counter. - * Note that a different counter is used for dynamic load balancing. - */ - wallcycle_start(wcycle, ewcFORCE); - - /* Set up and clear force outputs: - * forceOutMtsLevel0: everything except what is in the other two outputs - * forceOutMtsLevel1: PME-mesh and listed-forces group 1 - * forceOutNonbonded: non-bonded forces - * Without multiple time stepping all point to the same object. - * With multiple time-stepping the use is different for MTS fast (level0 only) and slow steps. - */ - ForceOutputs forceOutMtsLevel0 = - setupForceOutputs(&fr->forceHelperBuffers[0], force, stepWork, wcycle); - - // Force output for MTS combined forces, only set at level1 MTS steps - std::optional forceOutMts = - (fr->useMts && stepWork.computeSlowForces) - ? std::optional(setupForceOutputs(&fr->forceHelperBuffers[1], - forceView->forceMtsCombinedWithPadding(), - stepWork, wcycle)) - : std::nullopt; - - ForceOutputs* forceOutMtsLevel1 = - fr->useMts ? (stepWork.computeSlowForces ? &forceOutMts.value() : nullptr) : &forceOutMtsLevel0; - - const bool nonbondedAtMtsLevel1 = runScheduleWork->simulationWork.computeNonbondedAtMtsLevel1; - - ForceOutputs* forceOutNonbonded = nonbondedAtMtsLevel1 ? forceOutMtsLevel1 : &forceOutMtsLevel0; - - if (inputrec->bPull && pull_have_constraint(*pull_work)) - { - clear_pull_forces(pull_work); - } - - /* We calculate the non-bonded forces, when done on the CPU, here. - * We do this before calling do_force_lowlevel, because in that - * function, the listed forces are calculated before PME, which - * does communication. With this order, non-bonded and listed - * force calculation imbalance can be balanced out by the domain - * decomposition load balancing. - */ - - const bool useOrEmulateGpuNb = simulationWork.useGpuNonbonded || fr->nbv->emulateGpu(); - - if (!useOrEmulateGpuNb) - { - do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local, enbvClearFYes, step, nrnb, wcycle); - } - - if (fr->efep != efepNO && stepWork.computeNonbondedForces) - { - /* Calculate the local and non-local free energy interactions here. - * Happens here on the CPU both with and without GPU. - */ - nbv->dispatchFreeEnergyKernel(InteractionLocality::Local, fr, - as_rvec_array(x.unpaddedArrayRef().data()), - &forceOutNonbonded->forceWithShiftForces(), *mdatoms, - inputrec->fepvals, lambda, enerd, stepWork, nrnb); - - if (havePPDomainDecomposition(cr)) - { - nbv->dispatchFreeEnergyKernel(InteractionLocality::NonLocal, fr, - as_rvec_array(x.unpaddedArrayRef().data()), - &forceOutNonbonded->forceWithShiftForces(), *mdatoms, - inputrec->fepvals, lambda, enerd, stepWork, nrnb); - } - } - - if (stepWork.computeNonbondedForces && !useOrEmulateGpuNb) - { - if (havePPDomainDecomposition(cr)) - { - do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFNo, step, - nrnb, wcycle); - } - - if (stepWork.computeForces) - { - /* Add all the non-bonded force to the normal force array. - * This can be split into a local and a non-local part when overlapping - * communication with calculation with domain decomposition. - */ - wallcycle_stop(wcycle, ewcFORCE); - nbv->atomdata_add_nbat_f_to_f(AtomLocality::All, - forceOutNonbonded->forceWithShiftForces().force()); - wallcycle_start_nocount(wcycle, ewcFORCE); - } - - /* If there are multiple fshift output buffers we need to reduce them */ - if (stepWork.computeVirial) - { - /* This is not in a subcounter because it takes a - negligible and constant-sized amount of time */ - nbnxn_atomdata_add_nbat_fshift_to_fshift( - *nbv->nbat, forceOutNonbonded->forceWithShiftForces().shiftForces()); - } - } - - // TODO Force flags should include haveFreeEnergyWork for this domain - if (stepWork.useGpuXHalo && (domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork)) - { - wallcycle_stop(wcycle, ewcFORCE); - /* Wait for non-local coordinate data to be copied from device */ - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::NonLocal); - wallcycle_start_nocount(wcycle, ewcFORCE); - } - - // Compute wall interactions, when present. - // Note: should be moved to special forces. - if (inputrec->nwall && stepWork.computeNonbondedForces) - { - /* foreign lambda component for walls */ - real dvdl_walls = do_walls(*inputrec, *fr, box, *mdatoms, x.unpaddedConstArrayRef(), - &forceOutMtsLevel0.forceWithVirial(), lambda[efptVDW], - enerd->grpp.ener[egLJSR].data(), nrnb); - enerd->dvdl_lin[efptVDW] += dvdl_walls; - } - - if (stepWork.computeListedForces) - { - /* Check whether we need to take into account PBC in listed interactions */ - bool needMolPbc = false; - for (const auto& listedForces : fr->listedForces) - { - if (listedForces.haveCpuListedForces(*fr->fcdata)) - { - needMolPbc = fr->bMolPBC; - } - } - - t_pbc pbc; - - if (needMolPbc) - { - /* Since all atoms are in the rectangular or triclinic unit-cell, - * only single box vector shifts (2 in x) are required. - */ - set_pbc_dd(&pbc, fr->pbcType, DOMAINDECOMP(cr) ? cr->dd->numCells : nullptr, TRUE, box); - } - - for (int mtsIndex = 0; mtsIndex < (fr->useMts && stepWork.computeSlowForces ? 2 : 1); mtsIndex++) - { - ListedForces& listedForces = fr->listedForces[mtsIndex]; - ForceOutputs& forceOut = (mtsIndex == 0 ? forceOutMtsLevel0 : *forceOutMtsLevel1); - listedForces.calculate( - wcycle, box, inputrec->fepvals, cr, ms, x, xWholeMolecules, fr->fcdata.get(), - hist, &forceOut, fr, &pbc, enerd, nrnb, lambda.data(), mdatoms, - DOMAINDECOMP(cr) ? cr->dd->globalAtomIndices.data() : nullptr, stepWork); - } - } - - if (stepWork.computeSlowForces) - { - calculateLongRangeNonbondeds(fr, inputrec, cr, nrnb, wcycle, mdatoms, - x.unpaddedConstArrayRef(), &forceOutMtsLevel1->forceWithVirial(), - enerd, box, lambda.data(), as_rvec_array(dipoleData.muStateAB), - stepWork, ddBalanceRegionHandler); - } - - /* PLUMED */ - if(plumedswitch){ - int plumedNeedsEnergy; - plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); - if(!plumedNeedsEnergy) plumed_cmd(plumedmain,"performCalc",nullptr); - } - /* END PLUMED */ - - wallcycle_stop(wcycle, ewcFORCE); - - // VdW dispersion correction, only computed on master rank to avoid double counting - if ((stepWork.computeEnergy || stepWork.computeVirial) && fr->dispersionCorrection && MASTER(cr)) - { - // Calculate long range corrections to pressure and energy - const DispersionCorrection::Correction correction = - fr->dispersionCorrection->calculate(box, lambda[efptVDW]); - - if (stepWork.computeEnergy) - { - enerd->term[F_DISPCORR] = correction.energy; - enerd->term[F_DVDL_VDW] += correction.dvdl; - enerd->dvdl_lin[efptVDW] += correction.dvdl; - } - if (stepWork.computeVirial) - { - correction.correctVirial(vir_force); - enerd->term[F_PDISPCORR] = correction.pressure; - } - } - - const bool needToReceivePmeResultsFromSeparateRank = - (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && stepWork.computeSlowForces); - - /* When running free energy perturbations steered by AWH and doing PME calculations on the - * GPU we must wait for the PME calculation (dhdl) results to finish before sampling the - * FEP dimension with AWH. */ - const bool needEarlyPmeResults = (awh != nullptr && awh->hasFepLambdaDimension() - && pme_run_mode(fr->pmedata) != PmeRunMode::None - && stepWork.computeEnergy && stepWork.computeSlowForces); - if (needEarlyPmeResults) - { - if (useGpuPmeOnThisRank) - { - pme_gpu_wait_and_reduce(fr->pmedata, stepWork, wcycle, - &forceOutMtsLevel1->forceWithVirial(), enerd, lambda[efptCOUL]); - } - else if (needToReceivePmeResultsFromSeparateRank) - { - /* In case of node-splitting, the PP nodes receive the long-range - * forces, virial and energy from the PME nodes here. - */ - pme_receive_force_ener(fr, cr, &forceOutMtsLevel1->forceWithVirial(), enerd, - simulationWork.useGpuPmePpCommunication, - stepWork.useGpuPmeFReduction, wcycle); - } - } - - computeSpecialForces(fplog, cr, inputrec, awh, enforcedRotation, imdSession, pull_work, step, t, - wcycle, fr->forceProviders, box, x.unpaddedArrayRef(), mdatoms, lambda, - stepWork, &forceOutMtsLevel0.forceWithVirial(), - forceOutMtsLevel1 ? &forceOutMtsLevel1->forceWithVirial() : nullptr, enerd, - ed, stepWork.doNeighborSearch); - - GMX_ASSERT(!(nonbondedAtMtsLevel1 && stepWork.useGpuFBufferOps), - "The schedule below does not allow for nonbonded MTS with GPU buffer ops"); - GMX_ASSERT(!(nonbondedAtMtsLevel1 && stepWork.useGpuFHalo), - "The schedule below does not allow for nonbonded MTS with GPU halo exchange"); - // Will store the amount of cycles spent waiting for the GPU that - // will be later used in the DLB accounting. - float cycles_wait_gpu = 0; - if (useOrEmulateGpuNb && stepWork.computeNonbondedForces) - { - auto& forceWithShiftForces = forceOutNonbonded->forceWithShiftForces(); - - /* wait for non-local forces (or calculate in emulation mode) */ - if (havePPDomainDecomposition(cr)) - { - if (simulationWork.useGpuNonbonded) - { - cycles_wait_gpu += Nbnxm::gpu_wait_finish_task( - nbv->gpu_nbv, stepWork, AtomLocality::NonLocal, enerd->grpp.ener[egLJSR].data(), - enerd->grpp.ener[egCOULSR].data(), forceWithShiftForces.shiftForces(), wcycle); - } - else - { - wallcycle_start_nocount(wcycle, ewcFORCE); - do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFYes, - step, nrnb, wcycle); - wallcycle_stop(wcycle, ewcFORCE); - } - - if (stepWork.useGpuFBufferOps) - { - // TODO: move this into DomainLifetimeWorkload, including the second part of the - // condition The bonded and free energy CPU tasks can have non-local force - // contributions which are a dependency for the GPU force reduction. - bool haveNonLocalForceContribInCpuBuffer = - domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork; - - if (haveNonLocalForceContribInCpuBuffer) - { - stateGpu->copyForcesToGpu(forceOutMtsLevel0.forceWithShiftForces().force(), - AtomLocality::NonLocal); - } - - - fr->gpuForceReduction[gmx::AtomLocality::NonLocal]->execute(); - - if (!stepWork.useGpuFHalo) - { - // copy from GPU input for dd_move_f() - stateGpu->copyForcesFromGpu(forceOutMtsLevel0.forceWithShiftForces().force(), - AtomLocality::NonLocal); - } - } - else - { - nbv->atomdata_add_nbat_f_to_f(AtomLocality::NonLocal, forceWithShiftForces.force()); - } - - if (fr->nbv->emulateGpu() && stepWork.computeVirial) - { - nbnxn_atomdata_add_nbat_fshift_to_fshift(*nbv->nbat, forceWithShiftForces.shiftForces()); - } - } - } - - /* Combining the forces for multiple time stepping before the halo exchange, when possible, - * avoids an extra halo exchange (when DD is used) and post-processing step. - */ - const bool combineMtsForcesBeforeHaloExchange = - (stepWork.computeForces && fr->useMts && stepWork.computeSlowForces - && (legacyFlags & GMX_FORCE_DO_NOT_NEED_NORMAL_FORCE) != 0 - && !(stepWork.computeVirial || simulationWork.useGpuNonbonded || useGpuPmeOnThisRank)); - if (combineMtsForcesBeforeHaloExchange) - { - const int numAtoms = havePPDomainDecomposition(cr) ? dd_numAtomsZones(*cr->dd) : mdatoms->homenr; - combineMtsForces(numAtoms, force.unpaddedArrayRef(), forceView->forceMtsCombined(), - inputrec->mtsLevels[1].stepFactor); - } - - if (havePPDomainDecomposition(cr)) - { - /* We are done with the CPU compute. - * We will now communicate the non-local forces. - * If we use a GPU this will overlap with GPU work, so in that case - * we do not close the DD force balancing region here. - */ - ddBalanceRegionHandler.closeAfterForceComputationCpu(); - - if (stepWork.computeForces) - { - - if (stepWork.useGpuFHalo) - { - if (domainWork.haveCpuLocalForceWork) - { - stateGpu->copyForcesToGpu(forceOutMtsLevel0.forceWithShiftForces().force(), - AtomLocality::Local); - } - communicateGpuHaloForces(*cr, domainWork.haveCpuLocalForceWork); - } - else - { - if (stepWork.useGpuFBufferOps) - { - stateGpu->waitForcesReadyOnHost(AtomLocality::NonLocal); - } - - // Without MTS or with MTS at slow steps with uncombined forces we need to - // communicate the fast forces - if (!fr->useMts || !combineMtsForcesBeforeHaloExchange) - { - dd_move_f(cr->dd, &forceOutMtsLevel0.forceWithShiftForces(), wcycle); - } - // With MTS we need to communicate the slow or combined (in forceOutMtsLevel1) forces - if (fr->useMts && stepWork.computeSlowForces) - { - dd_move_f(cr->dd, &forceOutMtsLevel1->forceWithShiftForces(), wcycle); - } - } - } - } - - // With both nonbonded and PME offloaded a GPU on the same rank, we use - // an alternating wait/reduction scheme. - // When running free energy perturbations steered by AWH and calculating PME on GPU, - // i.e. if needEarlyPmeResults == true, the PME results have already been reduced above. - bool alternateGpuWait = - (!c_disableAlternatingWait && useGpuPmeOnThisRank && simulationWork.useGpuNonbonded - && !DOMAINDECOMP(cr) && !stepWork.useGpuFBufferOps && !needEarlyPmeResults); - if (alternateGpuWait) - { - alternatePmeNbGpuWaitReduce(fr->nbv.get(), fr->pmedata, forceOutNonbonded, - forceOutMtsLevel1, enerd, lambda[efptCOUL], stepWork, wcycle); - } - - if (!alternateGpuWait && useGpuPmeOnThisRank && !needEarlyPmeResults) - { - pme_gpu_wait_and_reduce(fr->pmedata, stepWork, wcycle, - &forceOutMtsLevel1->forceWithVirial(), enerd, lambda[efptCOUL]); - } - - /* Wait for local GPU NB outputs on the non-alternating wait path */ - if (!alternateGpuWait && stepWork.computeNonbondedForces && simulationWork.useGpuNonbonded) - { - /* Measured overhead on CUDA and OpenCL with(out) GPU sharing - * is between 0.5 and 1.5 Mcycles. So 2 MCycles is an overestimate, - * but even with a step of 0.1 ms the difference is less than 1% - * of the step time. - */ - const float gpuWaitApiOverheadMargin = 2e6F; /* cycles */ - const float waitCycles = Nbnxm::gpu_wait_finish_task( - nbv->gpu_nbv, stepWork, AtomLocality::Local, enerd->grpp.ener[egLJSR].data(), - enerd->grpp.ener[egCOULSR].data(), - forceOutNonbonded->forceWithShiftForces().shiftForces(), wcycle); - - if (ddBalanceRegionHandler.useBalancingRegion()) - { - DdBalanceRegionWaitedForGpu waitedForGpu = DdBalanceRegionWaitedForGpu::yes; - if (stepWork.computeForces && waitCycles <= gpuWaitApiOverheadMargin) - { - /* We measured few cycles, it could be that the kernel - * and transfer finished earlier and there was no actual - * wait time, only API call overhead. - * Then the actual time could be anywhere between 0 and - * cycles_wait_est. We will use half of cycles_wait_est. - */ - waitedForGpu = DdBalanceRegionWaitedForGpu::no; - } - ddBalanceRegionHandler.closeAfterForceComputationGpu(cycles_wait_gpu, waitedForGpu); - } - } - - if (fr->nbv->emulateGpu()) - { - // NOTE: emulation kernel is not included in the balancing region, - // but emulation mode does not target performance anyway - wallcycle_start_nocount(wcycle, ewcFORCE); - do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local, - DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes, step, nrnb, wcycle); - wallcycle_stop(wcycle, ewcFORCE); - } - - // If on GPU PME-PP comms path, receive forces from PME before GPU buffer ops - // TODO refactor this and unify with below default-path call to the same function - // When running free energy perturbations steered by AWH and calculating PME on GPU, - // i.e. if needEarlyPmeResults == true, the PME results have already been reduced above. - if (needToReceivePmeResultsFromSeparateRank && simulationWork.useGpuPmePpCommunication && !needEarlyPmeResults) - { - /* In case of node-splitting, the PP nodes receive the long-range - * forces, virial and energy from the PME nodes here. - */ - pme_receive_force_ener(fr, cr, &forceOutMtsLevel1->forceWithVirial(), enerd, - simulationWork.useGpuPmePpCommunication, - stepWork.useGpuPmeFReduction, wcycle); - } - - - /* Do the nonbonded GPU (or emulation) force buffer reduction - * on the non-alternating path. */ - GMX_ASSERT(!(nonbondedAtMtsLevel1 && stepWork.useGpuFBufferOps), - "The schedule below does not allow for nonbonded MTS with GPU buffer ops"); - if (useOrEmulateGpuNb && !alternateGpuWait) - { - if (stepWork.useGpuFBufferOps) - { - ArrayRef forceWithShift = forceOutNonbonded->forceWithShiftForces().force(); - - // Flag to specify whether the CPU force buffer has contributions to - // local atoms. This depends on whether there are CPU-based force tasks - // or when DD is active the halo exchange has resulted in contributions - // from the non-local part. - const bool haveLocalForceContribInCpuBuffer = - (domainWork.haveCpuLocalForceWork || havePPDomainDecomposition(cr)); - - // TODO: move these steps as early as possible: - // - CPU f H2D should be as soon as all CPU-side forces are done - // - wait for force reduction does not need to block host (at least not here, it's sufficient to wait - // before the next CPU task that consumes the forces: vsite spread or update) - // - copy is not perfomed if GPU force halo exchange is active, because it would overwrite the result - // of the halo exchange. In that case the copy is instead performed above, before the exchange. - // These should be unified. - if (haveLocalForceContribInCpuBuffer && !stepWork.useGpuFHalo) - { - // Note: AtomLocality::All is used for the non-DD case because, as in this - // case copyForcesToGpu() uses a separate stream, it allows overlap of - // CPU force H2D with GPU force tasks on all streams including those in the - // local stream which would otherwise be implicit dependencies for the - // transfer and would not overlap. - auto locality = havePPDomainDecomposition(cr) ? AtomLocality::Local : AtomLocality::All; - - stateGpu->copyForcesToGpu(forceWithShift, locality); - } - - if (stepWork.computeNonbondedForces) - { - fr->gpuForceReduction[gmx::AtomLocality::Local]->execute(); - } - - // Copy forces to host if they are needed for update or if virtual sites are enabled. - // If there are vsites, we need to copy forces every step to spread vsite forces on host. - // TODO: When the output flags will be included in step workload, this copy can be combined with the - // copy call done in sim_utils(...) for the output. - // NOTE: If there are virtual sites, the forces are modified on host after this D2H copy. Hence, - // they should not be copied in do_md(...) for the output. - if (!simulationWork.useGpuUpdate - || (simulationWork.useGpuUpdate && DOMAINDECOMP(cr) && haveHostPmePpComms) || vsite) - { - stateGpu->copyForcesFromGpu(forceWithShift, AtomLocality::Local); - stateGpu->waitForcesReadyOnHost(AtomLocality::Local); - } - } - else if (stepWork.computeNonbondedForces) - { - ArrayRef forceWithShift = forceOutNonbonded->forceWithShiftForces().force(); - nbv->atomdata_add_nbat_f_to_f(AtomLocality::Local, forceWithShift); - } - } - - launchGpuEndOfStepTasks(nbv, fr->gpuBonded, fr->pmedata, enerd, *runScheduleWork, - useGpuPmeOnThisRank, step, wcycle); - - if (DOMAINDECOMP(cr)) - { - dd_force_flop_stop(cr->dd, nrnb); - } - - const bool haveCombinedMtsForces = (stepWork.computeForces && fr->useMts && stepWork.computeSlowForces - && combineMtsForcesBeforeHaloExchange); - if (stepWork.computeForces) - { - postProcessForceWithShiftForces(nrnb, wcycle, box, x.unpaddedArrayRef(), &forceOutMtsLevel0, - vir_force, *mdatoms, *fr, vsite, stepWork); - - if (fr->useMts && stepWork.computeSlowForces && !haveCombinedMtsForces) - { - postProcessForceWithShiftForces(nrnb, wcycle, box, x.unpaddedArrayRef(), forceOutMtsLevel1, - vir_force, *mdatoms, *fr, vsite, stepWork); - } - } - - // TODO refactor this and unify with above GPU PME-PP / GPU update path call to the same function - // When running free energy perturbations steered by AWH and calculating PME on GPU, - // i.e. if needEarlyPmeResults == true, the PME results have already been reduced above. - if (needToReceivePmeResultsFromSeparateRank && !simulationWork.useGpuPmePpCommunication - && !needEarlyPmeResults) - { - /* In case of node-splitting, the PP nodes receive the long-range - * forces, virial and energy from the PME nodes here. - */ - pme_receive_force_ener(fr, cr, &forceOutMtsLevel1->forceWithVirial(), enerd, - simulationWork.useGpuPmePpCommunication, false, wcycle); - } - - if (stepWork.computeForces) - { - /* If we don't use MTS or if we already combined the MTS forces before, we only - * need to post-process one ForceOutputs object here, called forceOutCombined, - * otherwise we have to post-process two outputs and then combine them. - */ - ForceOutputs& forceOutCombined = (haveCombinedMtsForces ? forceOutMts.value() : forceOutMtsLevel0); - postProcessForces(cr, step, nrnb, wcycle, box, x.unpaddedArrayRef(), &forceOutCombined, - vir_force, mdatoms, fr, vsite, stepWork); - - if (fr->useMts && stepWork.computeSlowForces && !haveCombinedMtsForces) - { - postProcessForces(cr, step, nrnb, wcycle, box, x.unpaddedArrayRef(), forceOutMtsLevel1, - vir_force, mdatoms, fr, vsite, stepWork); - - combineMtsForces(mdatoms->homenr, force.unpaddedArrayRef(), - forceView->forceMtsCombined(), inputrec->mtsLevels[1].stepFactor); - } - } - - if (stepWork.computeEnergy) - { - /* Compute the final potential energy terms */ - accumulatePotentialEnergies(enerd, lambda, inputrec->fepvals); - - if (!EI_TPI(inputrec->eI)) - { - checkPotentialEnergyValidity(step, *enerd, *inputrec); - } - } - - /* In case we don't have constraints and are using GPUs, the next balancing - * region starts here. - * Some "special" work at the end of do_force_cuts?, such as vsite spread, - * virial calculation and COM pulling, is not thus not included in - * the balance timing, which is ok as most tasks do communication. - */ - ddBalanceRegionHandler.openBeforeForceComputationCpu(DdAllowBalanceRegionReopen::no); -} diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdlib/sim_util.cpp.preplumed b/patches/gromacs-2021.7.diff/src/gromacs/mdlib/sim_util.cpp.preplumed deleted file mode 100644 index 2571b0d216..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdlib/sim_util.cpp.preplumed +++ /dev/null @@ -1,2164 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013-2019,2020,2021, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -#include "gmxpre.h" - -#include "config.h" - -#include -#include -#include -#include - -#include -#include - -#include "gromacs/applied_forces/awh/awh.h" -#include "gromacs/domdec/dlbtiming.h" -#include "gromacs/domdec/domdec.h" -#include "gromacs/domdec/domdec_struct.h" -#include "gromacs/domdec/gpuhaloexchange.h" -#include "gromacs/domdec/partition.h" -#include "gromacs/essentialdynamics/edsam.h" -#include "gromacs/ewald/pme.h" -#include "gromacs/ewald/pme_pp.h" -#include "gromacs/ewald/pme_pp_comm_gpu.h" -#include "gromacs/gmxlib/network.h" -#include "gromacs/gmxlib/nonbonded/nb_free_energy.h" -#include "gromacs/gmxlib/nonbonded/nb_kernel.h" -#include "gromacs/gmxlib/nonbonded/nonbonded.h" -#include "gromacs/gpu_utils/gpu_utils.h" -#include "gromacs/imd/imd.h" -#include "gromacs/listed_forces/disre.h" -#include "gromacs/listed_forces/gpubonded.h" -#include "gromacs/listed_forces/listed_forces.h" -#include "gromacs/listed_forces/orires.h" -#include "gromacs/math/arrayrefwithpadding.h" -#include "gromacs/math/functions.h" -#include "gromacs/math/units.h" -#include "gromacs/math/vec.h" -#include "gromacs/math/vecdump.h" -#include "gromacs/mdlib/calcmu.h" -#include "gromacs/mdlib/calcvir.h" -#include "gromacs/mdlib/constr.h" -#include "gromacs/mdlib/dispersioncorrection.h" -#include "gromacs/mdlib/enerdata_utils.h" -#include "gromacs/mdlib/force.h" -#include "gromacs/mdlib/force_flags.h" -#include "gromacs/mdlib/forcerec.h" -#include "gromacs/mdlib/gmx_omp_nthreads.h" -#include "gromacs/mdlib/update.h" -#include "gromacs/mdlib/vsite.h" -#include "gromacs/mdlib/wall.h" -#include "gromacs/mdlib/wholemoleculetransform.h" -#include "gromacs/mdtypes/commrec.h" -#include "gromacs/mdtypes/enerdata.h" -#include "gromacs/mdtypes/forcebuffers.h" -#include "gromacs/mdtypes/forceoutput.h" -#include "gromacs/mdtypes/forcerec.h" -#include "gromacs/mdtypes/iforceprovider.h" -#include "gromacs/mdtypes/inputrec.h" -#include "gromacs/mdtypes/md_enums.h" -#include "gromacs/mdtypes/mdatom.h" -#include "gromacs/mdtypes/multipletimestepping.h" -#include "gromacs/mdtypes/simulation_workload.h" -#include "gromacs/mdtypes/state.h" -#include "gromacs/mdtypes/state_propagator_data_gpu.h" -#include "gromacs/nbnxm/gpu_data_mgmt.h" -#include "gromacs/nbnxm/nbnxm.h" -#include "gromacs/nbnxm/nbnxm_gpu.h" -#include "gromacs/pbcutil/ishift.h" -#include "gromacs/pbcutil/pbc.h" -#include "gromacs/pulling/pull.h" -#include "gromacs/pulling/pull_rotation.h" -#include "gromacs/timing/cyclecounter.h" -#include "gromacs/timing/gpu_timing.h" -#include "gromacs/timing/wallcycle.h" -#include "gromacs/timing/wallcyclereporting.h" -#include "gromacs/timing/walltime_accounting.h" -#include "gromacs/topology/topology.h" -#include "gromacs/utility/arrayref.h" -#include "gromacs/utility/basedefinitions.h" -#include "gromacs/utility/cstringutil.h" -#include "gromacs/utility/exceptions.h" -#include "gromacs/utility/fatalerror.h" -#include "gromacs/utility/fixedcapacityvector.h" -#include "gromacs/utility/gmxassert.h" -#include "gromacs/utility/gmxmpi.h" -#include "gromacs/utility/logger.h" -#include "gromacs/utility/smalloc.h" -#include "gromacs/utility/strconvert.h" -#include "gromacs/utility/sysinfo.h" - -#include "gpuforcereduction.h" - -using gmx::ArrayRef; -using gmx::AtomLocality; -using gmx::DomainLifetimeWorkload; -using gmx::ForceOutputs; -using gmx::ForceWithShiftForces; -using gmx::InteractionLocality; -using gmx::RVec; -using gmx::SimulationWorkload; -using gmx::StepWorkload; - -// TODO: this environment variable allows us to verify before release -// that on less common architectures the total cost of polling is not larger than -// a blocking wait (so polling does not introduce overhead when the static -// PME-first ordering would suffice). -static const bool c_disableAlternatingWait = (getenv("GMX_DISABLE_ALTERNATING_GPU_WAIT") != nullptr); - -static void sum_forces(ArrayRef f, ArrayRef forceToAdd) -{ - GMX_ASSERT(f.size() >= forceToAdd.size(), "Accumulation buffer should be sufficiently large"); - const int end = forceToAdd.size(); - - int gmx_unused nt = gmx_omp_nthreads_get(emntDefault); -#pragma omp parallel for num_threads(nt) schedule(static) - for (int i = 0; i < end; i++) - { - rvec_inc(f[i], forceToAdd[i]); - } -} - -static void calc_virial(int start, - int homenr, - const rvec x[], - const gmx::ForceWithShiftForces& forceWithShiftForces, - tensor vir_part, - const matrix box, - t_nrnb* nrnb, - const t_forcerec* fr, - PbcType pbcType) -{ - /* The short-range virial from surrounding boxes */ - const rvec* fshift = as_rvec_array(forceWithShiftForces.shiftForces().data()); - calc_vir(SHIFTS, fr->shift_vec, fshift, vir_part, pbcType == PbcType::Screw, box); - inc_nrnb(nrnb, eNR_VIRIAL, SHIFTS); - - /* Calculate partial virial, for local atoms only, based on short range. - * Total virial is computed in global_stat, called from do_md - */ - const rvec* f = as_rvec_array(forceWithShiftForces.force().data()); - f_calc_vir(start, start + homenr, x, f, vir_part, box); - inc_nrnb(nrnb, eNR_VIRIAL, homenr); - - if (debug) - { - pr_rvecs(debug, 0, "vir_part", vir_part, DIM); - } -} - -static void pull_potential_wrapper(const t_commrec* cr, - const t_inputrec* ir, - const matrix box, - gmx::ArrayRef x, - gmx::ForceWithVirial* force, - const t_mdatoms* mdatoms, - gmx_enerdata_t* enerd, - pull_t* pull_work, - const real* lambda, - double t, - gmx_wallcycle_t wcycle) -{ - t_pbc pbc; - real dvdl; - - /* Calculate the center of mass forces, this requires communication, - * which is why pull_potential is called close to other communication. - */ - wallcycle_start(wcycle, ewcPULLPOT); - set_pbc(&pbc, ir->pbcType, box); - dvdl = 0; - enerd->term[F_COM_PULL] += - pull_potential(pull_work, mdatoms->massT, &pbc, cr, t, lambda[efptRESTRAINT], - as_rvec_array(x.data()), force, &dvdl); - enerd->dvdl_lin[efptRESTRAINT] += dvdl; - wallcycle_stop(wcycle, ewcPULLPOT); -} - -static void pme_receive_force_ener(t_forcerec* fr, - const t_commrec* cr, - gmx::ForceWithVirial* forceWithVirial, - gmx_enerdata_t* enerd, - bool useGpuPmePpComms, - bool receivePmeForceToGpu, - gmx_wallcycle_t wcycle) -{ - real e_q, e_lj, dvdl_q, dvdl_lj; - float cycles_ppdpme, cycles_seppme; - - cycles_ppdpme = wallcycle_stop(wcycle, ewcPPDURINGPME); - dd_cycles_add(cr->dd, cycles_ppdpme, ddCyclPPduringPME); - - /* In case of node-splitting, the PP nodes receive the long-range - * forces, virial and energy from the PME nodes here. - */ - wallcycle_start(wcycle, ewcPP_PMEWAITRECVF); - dvdl_q = 0; - dvdl_lj = 0; - gmx_pme_receive_f(fr->pmePpCommGpu.get(), cr, forceWithVirial, &e_q, &e_lj, &dvdl_q, &dvdl_lj, - useGpuPmePpComms, receivePmeForceToGpu, &cycles_seppme); - enerd->term[F_COUL_RECIP] += e_q; - enerd->term[F_LJ_RECIP] += e_lj; - enerd->dvdl_lin[efptCOUL] += dvdl_q; - enerd->dvdl_lin[efptVDW] += dvdl_lj; - - if (wcycle) - { - dd_cycles_add(cr->dd, cycles_seppme, ddCyclPME); - } - wallcycle_stop(wcycle, ewcPP_PMEWAITRECVF); -} - -static void print_large_forces(FILE* fp, - const t_mdatoms* md, - const t_commrec* cr, - int64_t step, - real forceTolerance, - ArrayRef x, - ArrayRef f) -{ - real force2Tolerance = gmx::square(forceTolerance); - gmx::index numNonFinite = 0; - for (int i = 0; i < md->homenr; i++) - { - real force2 = norm2(f[i]); - bool nonFinite = !std::isfinite(force2); - if (force2 >= force2Tolerance || nonFinite) - { - fprintf(fp, "step %" PRId64 " atom %6d x %8.3f %8.3f %8.3f force %12.5e\n", step, - ddglatnr(cr->dd, i), x[i][XX], x[i][YY], x[i][ZZ], std::sqrt(force2)); - } - if (nonFinite) - { - numNonFinite++; - } - } - if (numNonFinite > 0) - { - /* Note that with MPI this fatal call on one rank might interrupt - * the printing on other ranks. But we can only avoid that with - * an expensive MPI barrier that we would need at each step. - */ - gmx_fatal(FARGS, "At step %" PRId64 " detected non-finite forces on %td atoms", step, numNonFinite); - } -} - -//! When necessary, spreads forces on vsites and computes the virial for \p forceOutputs->forceWithShiftForces() -static void postProcessForceWithShiftForces(t_nrnb* nrnb, - gmx_wallcycle_t wcycle, - const matrix box, - ArrayRef x, - ForceOutputs* forceOutputs, - tensor vir_force, - const t_mdatoms& mdatoms, - const t_forcerec& fr, - gmx::VirtualSitesHandler* vsite, - const StepWorkload& stepWork) -{ - ForceWithShiftForces& forceWithShiftForces = forceOutputs->forceWithShiftForces(); - - /* If we have NoVirSum forces, but we do not calculate the virial, - * we later sum the forceWithShiftForces buffer together with - * the noVirSum buffer and spread the combined vsite forces at once. - */ - if (vsite && (!forceOutputs->haveForceWithVirial() || stepWork.computeVirial)) - { - using VirialHandling = gmx::VirtualSitesHandler::VirialHandling; - - auto f = forceWithShiftForces.force(); - auto fshift = forceWithShiftForces.shiftForces(); - const VirialHandling virialHandling = - (stepWork.computeVirial ? VirialHandling::Pbc : VirialHandling::None); - vsite->spreadForces(x, f, virialHandling, fshift, nullptr, nrnb, box, wcycle); - forceWithShiftForces.haveSpreadVsiteForces() = true; - } - - if (stepWork.computeVirial) - { - /* Calculation of the virial must be done after vsites! */ - calc_virial(0, mdatoms.homenr, as_rvec_array(x.data()), forceWithShiftForces, vir_force, - box, nrnb, &fr, fr.pbcType); - } -} - -//! Spread, compute virial for and sum forces, when necessary -static void postProcessForces(const t_commrec* cr, - int64_t step, - t_nrnb* nrnb, - gmx_wallcycle_t wcycle, - const matrix box, - ArrayRef x, - ForceOutputs* forceOutputs, - tensor vir_force, - const t_mdatoms* mdatoms, - const t_forcerec* fr, - gmx::VirtualSitesHandler* vsite, - const StepWorkload& stepWork) -{ - // Extract the final output force buffer, which is also the buffer for forces with shift forces - ArrayRef f = forceOutputs->forceWithShiftForces().force(); - - if (forceOutputs->haveForceWithVirial()) - { - auto& forceWithVirial = forceOutputs->forceWithVirial(); - - if (vsite) - { - /* Spread the mesh force on virtual sites to the other particles... - * This is parallellized. MPI communication is performed - * if the constructing atoms aren't local. - */ - GMX_ASSERT(!stepWork.computeVirial || f.data() != forceWithVirial.force_.data(), - "We need separate force buffers for shift and virial forces when " - "computing the virial"); - GMX_ASSERT(!stepWork.computeVirial - || forceOutputs->forceWithShiftForces().haveSpreadVsiteForces(), - "We should spread the force with shift forces separately when computing " - "the virial"); - const gmx::VirtualSitesHandler::VirialHandling virialHandling = - (stepWork.computeVirial ? gmx::VirtualSitesHandler::VirialHandling::NonLinear - : gmx::VirtualSitesHandler::VirialHandling::None); - matrix virial = { { 0 } }; - vsite->spreadForces(x, forceWithVirial.force_, virialHandling, {}, virial, nrnb, box, wcycle); - forceWithVirial.addVirialContribution(virial); - } - - if (stepWork.computeVirial) - { - /* Now add the forces, this is local */ - sum_forces(f, forceWithVirial.force_); - - /* Add the direct virial contributions */ - GMX_ASSERT( - forceWithVirial.computeVirial_, - "forceWithVirial should request virial computation when we request the virial"); - m_add(vir_force, forceWithVirial.getVirial(), vir_force); - - if (debug) - { - pr_rvecs(debug, 0, "vir_force", vir_force, DIM); - } - } - } - else - { - GMX_ASSERT(vsite == nullptr || forceOutputs->forceWithShiftForces().haveSpreadVsiteForces(), - "We should have spread the vsite forces (earlier)"); - } - - if (fr->print_force >= 0) - { - print_large_forces(stderr, mdatoms, cr, step, fr->print_force, x, f); - } -} - -static void do_nb_verlet(t_forcerec* fr, - const interaction_const_t* ic, - gmx_enerdata_t* enerd, - const StepWorkload& stepWork, - const InteractionLocality ilocality, - const int clearF, - const int64_t step, - t_nrnb* nrnb, - gmx_wallcycle_t wcycle) -{ - if (!stepWork.computeNonbondedForces) - { - /* skip non-bonded calculation */ - return; - } - - nonbonded_verlet_t* nbv = fr->nbv.get(); - - /* GPU kernel launch overhead is already timed separately */ - if (!nbv->useGpu()) - { - /* When dynamic pair-list pruning is requested, we need to prune - * at nstlistPrune steps. - */ - if (nbv->isDynamicPruningStepCpu(step)) - { - /* Prune the pair-list beyond fr->ic->rlistPrune using - * the current coordinates of the atoms. - */ - wallcycle_sub_start(wcycle, ewcsNONBONDED_PRUNING); - nbv->dispatchPruneKernelCpu(ilocality, fr->shift_vec); - wallcycle_sub_stop(wcycle, ewcsNONBONDED_PRUNING); - } - } - - nbv->dispatchNonbondedKernel(ilocality, *ic, stepWork, clearF, *fr, enerd, nrnb); -} - -static inline void clearRVecs(ArrayRef v, const bool useOpenmpThreading) -{ - int nth = gmx_omp_nthreads_get_simple_rvec_task(emntDefault, v.ssize()); - - /* Note that we would like to avoid this conditional by putting it - * into the omp pragma instead, but then we still take the full - * omp parallel for overhead (at least with gcc5). - */ - if (!useOpenmpThreading || nth == 1) - { - for (RVec& elem : v) - { - clear_rvec(elem); - } - } - else - { -#pragma omp parallel for num_threads(nth) schedule(static) - for (gmx::index i = 0; i < v.ssize(); i++) - { - clear_rvec(v[i]); - } - } -} - -/*! \brief Return an estimate of the average kinetic energy or 0 when unreliable - * - * \param groupOptions Group options, containing T-coupling options - */ -static real averageKineticEnergyEstimate(const t_grpopts& groupOptions) -{ - real nrdfCoupled = 0; - real nrdfUncoupled = 0; - real kineticEnergy = 0; - for (int g = 0; g < groupOptions.ngtc; g++) - { - if (groupOptions.tau_t[g] >= 0) - { - nrdfCoupled += groupOptions.nrdf[g]; - kineticEnergy += groupOptions.nrdf[g] * 0.5 * groupOptions.ref_t[g] * BOLTZ; - } - else - { - nrdfUncoupled += groupOptions.nrdf[g]; - } - } - - /* This conditional with > also catches nrdf=0 */ - if (nrdfCoupled > nrdfUncoupled) - { - return kineticEnergy * (nrdfCoupled + nrdfUncoupled) / nrdfCoupled; - } - else - { - return 0; - } -} - -/*! \brief This routine checks that the potential energy is finite. - * - * Always checks that the potential energy is finite. If step equals - * inputrec.init_step also checks that the magnitude of the potential energy - * is reasonable. Terminates with a fatal error when a check fails. - * Note that passing this check does not guarantee finite forces, - * since those use slightly different arithmetics. But in most cases - * there is just a narrow coordinate range where forces are not finite - * and energies are finite. - * - * \param[in] step The step number, used for checking and printing - * \param[in] enerd The energy data; the non-bonded group energies need to be added to - * enerd.term[F_EPOT] before calling this routine \param[in] inputrec The input record - */ -static void checkPotentialEnergyValidity(int64_t step, const gmx_enerdata_t& enerd, const t_inputrec& inputrec) -{ - /* Threshold valid for comparing absolute potential energy against - * the kinetic energy. Normally one should not consider absolute - * potential energy values, but with a factor of one million - * we should never get false positives. - */ - constexpr real c_thresholdFactor = 1e6; - - bool energyIsNotFinite = !std::isfinite(enerd.term[F_EPOT]); - real averageKineticEnergy = 0; - /* We only check for large potential energy at the initial step, - * because that is by far the most likely step for this too occur - * and because computing the average kinetic energy is not free. - * Note: nstcalcenergy >> 1 often does not allow to catch large energies - * before they become NaN. - */ - if (step == inputrec.init_step && EI_DYNAMICS(inputrec.eI)) - { - averageKineticEnergy = averageKineticEnergyEstimate(inputrec.opts); - } - - if (energyIsNotFinite - || (averageKineticEnergy > 0 && enerd.term[F_EPOT] > c_thresholdFactor * averageKineticEnergy)) - { - gmx_fatal( - FARGS, - "Step %" PRId64 - ": The total potential energy is %g, which is %s. The LJ and electrostatic " - "contributions to the energy are %g and %g, respectively. A %s potential energy " - "can be caused by overlapping interactions in bonded interactions or very large%s " - "coordinate values. Usually this is caused by a badly- or non-equilibrated initial " - "configuration, incorrect interactions or parameters in the topology.", - step, enerd.term[F_EPOT], energyIsNotFinite ? "not finite" : "extremely high", - enerd.term[F_LJ], enerd.term[F_COUL_SR], - energyIsNotFinite ? "non-finite" : "very high", energyIsNotFinite ? " or Nan" : ""); - } -} - -/*! \brief Return true if there are special forces computed this step. - * - * The conditionals exactly correspond to those in computeSpecialForces(). - */ -static bool haveSpecialForces(const t_inputrec& inputrec, - const gmx::ForceProviders& forceProviders, - const pull_t* pull_work, - const bool computeForces, - const gmx_edsam* ed) -{ - - return ((computeForces && forceProviders.hasForceProvider()) || // forceProviders - (inputrec.bPull && pull_have_potential(*pull_work)) || // pull - inputrec.bRot || // enforced rotation - (ed != nullptr) || // flooding - (inputrec.bIMD && computeForces)); // IMD -} - -/*! \brief Compute forces and/or energies for special algorithms - * - * The intention is to collect all calls to algorithms that compute - * forces on local atoms only and that do not contribute to the local - * virial sum (but add their virial contribution separately). - * Eventually these should likely all become ForceProviders. - * Within this function the intention is to have algorithms that do - * global communication at the end, so global barriers within the MD loop - * are as close together as possible. - * - * \param[in] fplog The log file - * \param[in] cr The communication record - * \param[in] inputrec The input record - * \param[in] awh The Awh module (nullptr if none in use). - * \param[in] enforcedRotation Enforced rotation module. - * \param[in] imdSession The IMD session - * \param[in] pull_work The pull work structure. - * \param[in] step The current MD step - * \param[in] t The current time - * \param[in,out] wcycle Wallcycle accounting struct - * \param[in,out] forceProviders Pointer to a list of force providers - * \param[in] box The unit cell - * \param[in] x The coordinates - * \param[in] mdatoms Per atom properties - * \param[in] lambda Array of free-energy lambda values - * \param[in] stepWork Step schedule flags - * \param[in,out] forceWithVirialMtsLevel0 Force and virial for MTS level0 forces - * \param[in,out] forceWithVirialMtsLevel1 Force and virial for MTS level1 forces, can be nullptr - * \param[in,out] enerd Energy buffer - * \param[in,out] ed Essential dynamics pointer - * \param[in] didNeighborSearch Tells if we did neighbor searching this step, used for ED sampling - * - * \todo Remove didNeighborSearch, which is used incorrectly. - * \todo Convert all other algorithms called here to ForceProviders. - */ -static void computeSpecialForces(FILE* fplog, - const t_commrec* cr, - const t_inputrec* inputrec, - gmx::Awh* awh, - gmx_enfrot* enforcedRotation, - gmx::ImdSession* imdSession, - pull_t* pull_work, - int64_t step, - double t, - gmx_wallcycle_t wcycle, - gmx::ForceProviders* forceProviders, - const matrix box, - gmx::ArrayRef x, - const t_mdatoms* mdatoms, - gmx::ArrayRef lambda, - const StepWorkload& stepWork, - gmx::ForceWithVirial* forceWithVirialMtsLevel0, - gmx::ForceWithVirial* forceWithVirialMtsLevel1, - gmx_enerdata_t* enerd, - gmx_edsam* ed, - bool didNeighborSearch) -{ - /* NOTE: Currently all ForceProviders only provide forces. - * When they also provide energies, remove this conditional. - */ - if (stepWork.computeForces) - { - gmx::ForceProviderInput forceProviderInput(x, *mdatoms, t, box, *cr); - gmx::ForceProviderOutput forceProviderOutput(forceWithVirialMtsLevel0, enerd); - - /* Collect forces from modules */ - forceProviders->calculateForces(forceProviderInput, &forceProviderOutput); - } - - if (inputrec->bPull && pull_have_potential(*pull_work)) - { - const int mtsLevel = forceGroupMtsLevel(inputrec->mtsLevels, gmx::MtsForceGroups::Pull); - if (mtsLevel == 0 || stepWork.computeSlowForces) - { - auto& forceWithVirial = (mtsLevel == 0) ? forceWithVirialMtsLevel0 : forceWithVirialMtsLevel1; - pull_potential_wrapper(cr, inputrec, box, x, forceWithVirial, mdatoms, enerd, pull_work, - lambda.data(), t, wcycle); - } - } - if (awh) - { - const int mtsLevel = forceGroupMtsLevel(inputrec->mtsLevels, gmx::MtsForceGroups::Pull); - if (mtsLevel == 0 || stepWork.computeSlowForces) - { - const bool needForeignEnergyDifferences = awh->needForeignEnergyDifferences(step); - std::vector foreignLambdaDeltaH, foreignLambdaDhDl; - if (needForeignEnergyDifferences) - { - enerd->foreignLambdaTerms.finalizePotentialContributions(enerd->dvdl_lin, lambda, - *inputrec->fepvals); - std::tie(foreignLambdaDeltaH, foreignLambdaDhDl) = enerd->foreignLambdaTerms.getTerms(cr); - } - - auto& forceWithVirial = (mtsLevel == 0) ? forceWithVirialMtsLevel0 : forceWithVirialMtsLevel1; - enerd->term[F_COM_PULL] += awh->applyBiasForcesAndUpdateBias( - inputrec->pbcType, mdatoms->massT, foreignLambdaDeltaH, foreignLambdaDhDl, box, - forceWithVirial, t, step, wcycle, fplog); - } - } - - rvec* f = as_rvec_array(forceWithVirialMtsLevel0->force_.data()); - - /* Add the forces from enforced rotation potentials (if any) */ - if (inputrec->bRot) - { - wallcycle_start(wcycle, ewcROTadd); - enerd->term[F_COM_PULL] += add_rot_forces(enforcedRotation, f, cr, step, t); - wallcycle_stop(wcycle, ewcROTadd); - } - - if (ed) - { - /* Note that since init_edsam() is called after the initialization - * of forcerec, edsam doesn't request the noVirSum force buffer. - * Thus if no other algorithm (e.g. PME) requires it, the forces - * here will contribute to the virial. - */ - do_flood(cr, inputrec, as_rvec_array(x.data()), f, ed, box, step, didNeighborSearch); - } - - /* Add forces from interactive molecular dynamics (IMD), if any */ - if (inputrec->bIMD && stepWork.computeForces) - { - imdSession->applyForces(f); - } -} - -/*! \brief Launch the prepare_step and spread stages of PME GPU. - * - * \param[in] pmedata The PME structure - * \param[in] box The box matrix - * \param[in] stepWork Step schedule flags - * \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory. - * \param[in] lambdaQ The Coulomb lambda of the current state. - * \param[in] wcycle The wallcycle structure - */ -static inline void launchPmeGpuSpread(gmx_pme_t* pmedata, - const matrix box, - const StepWorkload& stepWork, - GpuEventSynchronizer* xReadyOnDevice, - const real lambdaQ, - gmx_wallcycle_t wcycle) -{ - pme_gpu_prepare_computation(pmedata, box, wcycle, stepWork); - pme_gpu_launch_spread(pmedata, xReadyOnDevice, wcycle, lambdaQ); -} - -/*! \brief Launch the FFT and gather stages of PME GPU - * - * This function only implements setting the output forces (no accumulation). - * - * \param[in] pmedata The PME structure - * \param[in] lambdaQ The Coulomb lambda of the current system state. - * \param[in] wcycle The wallcycle structure - * \param[in] stepWork Step schedule flags - */ -static void launchPmeGpuFftAndGather(gmx_pme_t* pmedata, - const real lambdaQ, - gmx_wallcycle_t wcycle, - const gmx::StepWorkload& stepWork) -{ - pme_gpu_launch_complex_transforms(pmedata, wcycle, stepWork); - pme_gpu_launch_gather(pmedata, wcycle, lambdaQ); -} - -/*! \brief - * Polling wait for either of the PME or nonbonded GPU tasks. - * - * Instead of a static order in waiting for GPU tasks, this function - * polls checking which of the two tasks completes first, and does the - * associated force buffer reduction overlapped with the other task. - * By doing that, unlike static scheduling order, it can always overlap - * one of the reductions, regardless of the GPU task completion order. - * - * \param[in] nbv Nonbonded verlet structure - * \param[in,out] pmedata PME module data - * \param[in,out] forceOutputsNonbonded Force outputs for the non-bonded forces and shift forces - * \param[in,out] forceOutputsPme Force outputs for the PME forces and virial - * \param[in,out] enerd Energy data structure results are reduced into - * \param[in] lambdaQ The Coulomb lambda of the current system state. - * \param[in] stepWork Step schedule flags - * \param[in] wcycle The wallcycle structure - */ -static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t* nbv, - gmx_pme_t* pmedata, - gmx::ForceOutputs* forceOutputsNonbonded, - gmx::ForceOutputs* forceOutputsPme, - gmx_enerdata_t* enerd, - const real lambdaQ, - const StepWorkload& stepWork, - gmx_wallcycle_t wcycle) -{ - bool isPmeGpuDone = false; - bool isNbGpuDone = false; - - gmx::ArrayRef pmeGpuForces; - - while (!isPmeGpuDone || !isNbGpuDone) - { - if (!isPmeGpuDone) - { - GpuTaskCompletion completionType = - (isNbGpuDone) ? GpuTaskCompletion::Wait : GpuTaskCompletion::Check; - isPmeGpuDone = pme_gpu_try_finish_task(pmedata, stepWork, wcycle, - &forceOutputsPme->forceWithVirial(), enerd, - lambdaQ, completionType); - } - - if (!isNbGpuDone) - { - auto& forceBuffersNonbonded = forceOutputsNonbonded->forceWithShiftForces(); - GpuTaskCompletion completionType = - (isPmeGpuDone) ? GpuTaskCompletion::Wait : GpuTaskCompletion::Check; - isNbGpuDone = Nbnxm::gpu_try_finish_task( - nbv->gpu_nbv, stepWork, AtomLocality::Local, enerd->grpp.ener[egLJSR].data(), - enerd->grpp.ener[egCOULSR].data(), forceBuffersNonbonded.shiftForces(), - completionType, wcycle); - - if (isNbGpuDone) - { - nbv->atomdata_add_nbat_f_to_f(AtomLocality::Local, forceBuffersNonbonded.force()); - } - } - } -} - -/*! \brief Set up the different force buffers; also does clearing. - * - * \param[in] forceHelperBuffers Helper force buffers - * \param[in] force force array - * \param[in] stepWork Step schedule flags - * \param[out] wcycle wallcycle recording structure - * - * \returns Cleared force output structure - */ -static ForceOutputs setupForceOutputs(ForceHelperBuffers* forceHelperBuffers, - gmx::ArrayRefWithPadding force, - const StepWorkload& stepWork, - gmx_wallcycle_t wcycle) -{ - wallcycle_sub_start(wcycle, ewcsCLEAR_FORCE_BUFFER); - - /* NOTE: We assume fr->shiftForces is all zeros here */ - gmx::ForceWithShiftForces forceWithShiftForces(force, stepWork.computeVirial, - forceHelperBuffers->shiftForces()); - - if (stepWork.computeForces) - { - /* Clear the short- and long-range forces */ - clearRVecs(forceWithShiftForces.force(), true); - - /* Clear the shift forces */ - clearRVecs(forceWithShiftForces.shiftForces(), false); - } - - /* If we need to compute the virial, we might need a separate - * force buffer for algorithms for which the virial is calculated - * directly, such as PME. Otherwise, forceWithVirial uses the - * the same force (f in legacy calls) buffer as other algorithms. - */ - const bool useSeparateForceWithVirialBuffer = - (stepWork.computeForces - && (stepWork.computeVirial && forceHelperBuffers->haveDirectVirialContributions())); - /* forceWithVirial uses the local atom range only */ - gmx::ForceWithVirial forceWithVirial( - useSeparateForceWithVirialBuffer ? forceHelperBuffers->forceBufferForDirectVirialContributions() - : force.unpaddedArrayRef(), - stepWork.computeVirial); - - if (useSeparateForceWithVirialBuffer) - { - /* TODO: update comment - * We only compute forces on local atoms. Note that vsites can - * spread to non-local atoms, but that part of the buffer is - * cleared separately in the vsite spreading code. - */ - clearRVecs(forceWithVirial.force_, true); - } - - wallcycle_sub_stop(wcycle, ewcsCLEAR_FORCE_BUFFER); - - return ForceOutputs(forceWithShiftForces, forceHelperBuffers->haveDirectVirialContributions(), - forceWithVirial); -} - - -/*! \brief Set up flags that have the lifetime of the domain indicating what type of work is there to compute. - */ -static DomainLifetimeWorkload setupDomainLifetimeWorkload(const t_inputrec& inputrec, - const t_forcerec& fr, - const pull_t* pull_work, - const gmx_edsam* ed, - const t_mdatoms& mdatoms, - const SimulationWorkload& simulationWork, - const StepWorkload& stepWork) -{ - DomainLifetimeWorkload domainWork; - // Note that haveSpecialForces is constant over the whole run - domainWork.haveSpecialForces = - haveSpecialForces(inputrec, *fr.forceProviders, pull_work, stepWork.computeForces, ed); - domainWork.haveCpuListedForceWork = false; - domainWork.haveCpuBondedWork = false; - for (const auto& listedForces : fr.listedForces) - { - if (listedForces.haveCpuListedForces(*fr.fcdata)) - { - domainWork.haveCpuListedForceWork = true; - } - if (listedForces.haveCpuBondeds()) - { - domainWork.haveCpuBondedWork = true; - } - } - domainWork.haveGpuBondedWork = ((fr.gpuBonded != nullptr) && fr.gpuBonded->haveInteractions()); - // Note that haveFreeEnergyWork is constant over the whole run - domainWork.haveFreeEnergyWork = (fr.efep != efepNO && mdatoms.nPerturbed != 0); - // We assume we have local force work if there are CPU - // force tasks including PME or nonbondeds. - domainWork.haveCpuLocalForceWork = - domainWork.haveSpecialForces || domainWork.haveCpuListedForceWork - || domainWork.haveFreeEnergyWork || simulationWork.useCpuNonbonded || simulationWork.useCpuPme - || simulationWork.haveEwaldSurfaceContribution || inputrec.nwall > 0; - - return domainWork; -} - -/*! \brief Set up force flag stuct from the force bitmask. - * - * \param[in] legacyFlags Force bitmask flags used to construct the new flags - * \param[in] mtsLevels The multiple time-stepping levels, either empty or 2 levels - * \param[in] step The current MD step - * \param[in] simulationWork Simulation workload description. - * \param[in] rankHasPmeDuty If this rank computes PME. - * - * \returns New Stepworkload description. - */ -static StepWorkload setupStepWorkload(const int legacyFlags, - ArrayRef mtsLevels, - const int64_t step, - const SimulationWorkload& simulationWork, - const bool rankHasPmeDuty) -{ - GMX_ASSERT(mtsLevels.empty() || mtsLevels.size() == 2, "Expect 0 or 2 MTS levels"); - const bool computeSlowForces = (mtsLevels.empty() || step % mtsLevels[1].stepFactor == 0); - - StepWorkload flags; - flags.stateChanged = ((legacyFlags & GMX_FORCE_STATECHANGED) != 0); - flags.haveDynamicBox = ((legacyFlags & GMX_FORCE_DYNAMICBOX) != 0); - flags.doNeighborSearch = ((legacyFlags & GMX_FORCE_NS) != 0); - flags.computeSlowForces = computeSlowForces; - flags.computeVirial = ((legacyFlags & GMX_FORCE_VIRIAL) != 0); - flags.computeEnergy = ((legacyFlags & GMX_FORCE_ENERGY) != 0); - flags.computeForces = ((legacyFlags & GMX_FORCE_FORCES) != 0); - flags.computeListedForces = ((legacyFlags & GMX_FORCE_LISTED) != 0); - flags.computeNonbondedForces = - ((legacyFlags & GMX_FORCE_NONBONDED) != 0) && simulationWork.computeNonbonded - && !(simulationWork.computeNonbondedAtMtsLevel1 && !computeSlowForces); - flags.computeDhdl = ((legacyFlags & GMX_FORCE_DHDL) != 0); - - if (simulationWork.useGpuBufferOps) - { - GMX_ASSERT(simulationWork.useGpuNonbonded, - "Can only offload buffer ops if nonbonded computation is also offloaded"); - } - flags.useGpuXBufferOps = simulationWork.useGpuBufferOps; - // on virial steps the CPU reduction path is taken - flags.useGpuFBufferOps = simulationWork.useGpuBufferOps && !flags.computeVirial; - flags.useGpuPmeFReduction = flags.computeSlowForces && flags.useGpuFBufferOps && simulationWork.useGpuPme - && (rankHasPmeDuty || simulationWork.useGpuPmePpCommunication); - flags.useGpuXHalo = simulationWork.useGpuHaloExchange; - flags.useGpuFHalo = simulationWork.useGpuHaloExchange && flags.useGpuFBufferOps; - - return flags; -} - - -/* \brief Launch end-of-step GPU tasks: buffer clearing and rolling pruning. - * - * TODO: eliminate \p useGpuPmeOnThisRank when this is - * incorporated in DomainLifetimeWorkload. - */ -static void launchGpuEndOfStepTasks(nonbonded_verlet_t* nbv, - gmx::GpuBonded* gpuBonded, - gmx_pme_t* pmedata, - gmx_enerdata_t* enerd, - const gmx::MdrunScheduleWorkload& runScheduleWork, - bool useGpuPmeOnThisRank, - int64_t step, - gmx_wallcycle_t wcycle) -{ - if (runScheduleWork.simulationWork.useGpuNonbonded && runScheduleWork.stepWork.computeNonbondedForces) - { - /* Launch pruning before buffer clearing because the API overhead of the - * clear kernel launches can leave the GPU idle while it could be running - * the prune kernel. - */ - if (nbv->isDynamicPruningStepGpu(step)) - { - nbv->dispatchPruneKernelGpu(step); - } - - /* now clear the GPU outputs while we finish the step on the CPU */ - wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); - wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED); - Nbnxm::gpu_clear_outputs(nbv->gpu_nbv, runScheduleWork.stepWork.computeVirial); - wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); - wallcycle_stop(wcycle, ewcLAUNCH_GPU); - } - - if (useGpuPmeOnThisRank) - { - pme_gpu_reinit_computation(pmedata, wcycle); - } - - if (runScheduleWork.domainWork.haveGpuBondedWork && runScheduleWork.stepWork.computeEnergy) - { - // in principle this should be included in the DD balancing region, - // but generally it is infrequent so we'll omit it for the sake of - // simpler code - gpuBonded->waitAccumulateEnergyTerms(enerd); - - gpuBonded->clearEnergies(); - } -} - -//! \brief Data structure to hold dipole-related data and staging arrays -struct DipoleData -{ - //! Dipole staging for fast summing over MPI - gmx::DVec muStaging[2] = { { 0.0, 0.0, 0.0 } }; - //! Dipole staging for states A and B (index 0 and 1 resp.) - gmx::RVec muStateAB[2] = { { 0.0_real, 0.0_real, 0.0_real } }; -}; - - -static void reduceAndUpdateMuTot(DipoleData* dipoleData, - const t_commrec* cr, - const bool haveFreeEnergy, - gmx::ArrayRef lambda, - rvec muTotal, - const DDBalanceRegionHandler& ddBalanceRegionHandler) -{ - if (PAR(cr)) - { - gmx_sumd(2 * DIM, dipoleData->muStaging[0], cr); - ddBalanceRegionHandler.reopenRegionCpu(); - } - for (int i = 0; i < 2; i++) - { - for (int j = 0; j < DIM; j++) - { - dipoleData->muStateAB[i][j] = dipoleData->muStaging[i][j]; - } - } - - if (!haveFreeEnergy) - { - copy_rvec(dipoleData->muStateAB[0], muTotal); - } - else - { - for (int j = 0; j < DIM; j++) - { - muTotal[j] = (1.0 - lambda[efptCOUL]) * dipoleData->muStateAB[0][j] - + lambda[efptCOUL] * dipoleData->muStateAB[1][j]; - } - } -} - -/*! \brief Combines MTS level0 and level1 force buffes into a full and MTS-combined force buffer. - * - * \param[in] numAtoms The number of atoms to combine forces for - * \param[in,out] forceMtsLevel0 Input: F_level0, output: F_level0 + F_level1 - * \param[in,out] forceMts Input: F_level1, output: F_level0 + mtsFactor * F_level1 - * \param[in] mtsFactor The factor between the level0 and level1 time step - */ -static void combineMtsForces(const int numAtoms, - ArrayRef forceMtsLevel0, - ArrayRef forceMts, - const real mtsFactor) -{ - const int gmx_unused numThreads = gmx_omp_nthreads_get(emntDefault); -#pragma omp parallel for num_threads(numThreads) schedule(static) - for (int i = 0; i < numAtoms; i++) - { - const RVec forceMtsLevel0Tmp = forceMtsLevel0[i]; - forceMtsLevel0[i] += forceMts[i]; - forceMts[i] = forceMtsLevel0Tmp + mtsFactor * forceMts[i]; - } -} - -/*! \brief Setup for the local and non-local GPU force reductions: - * reinitialization plus the registration of forces and dependencies. - * - * \param [in] runScheduleWork Schedule workload flag structure - * \param [in] cr Communication record object - * \param [in] fr Force record object - */ -static void setupGpuForceReductions(gmx::MdrunScheduleWorkload* runScheduleWork, - const t_commrec* cr, - t_forcerec* fr) -{ - - nonbonded_verlet_t* nbv = fr->nbv.get(); - gmx::StatePropagatorDataGpu* stateGpu = fr->stateGpu; - - // (re-)initialize local GPU force reduction - const bool accumulate = - runScheduleWork->domainWork.haveCpuLocalForceWork || havePPDomainDecomposition(cr); - const int atomStart = 0; - fr->gpuForceReduction[gmx::AtomLocality::Local]->reinit( - stateGpu->getForces(), nbv->getNumAtoms(AtomLocality::Local), nbv->getGridIndices(), - atomStart, accumulate, stateGpu->fReducedOnDevice()); - - // register forces and add dependencies - fr->gpuForceReduction[gmx::AtomLocality::Local]->registerNbnxmForce(nbv->getGpuForces()); - - if (runScheduleWork->simulationWork.useGpuPme - && (thisRankHasDuty(cr, DUTY_PME) || runScheduleWork->simulationWork.useGpuPmePpCommunication)) - { - void* forcePtr = thisRankHasDuty(cr, DUTY_PME) ? pme_gpu_get_device_f(fr->pmedata) - : // PME force buffer on same GPU - fr->pmePpCommGpu->getGpuForceStagingPtr(); // buffer received from other GPU - fr->gpuForceReduction[gmx::AtomLocality::Local]->registerRvecForce(forcePtr); - - GpuEventSynchronizer* const pmeSynchronizer = - (thisRankHasDuty(cr, DUTY_PME) ? pme_gpu_get_f_ready_synchronizer(fr->pmedata) - : // PME force buffer on same GPU - fr->pmePpCommGpu->getForcesReadySynchronizer()); // buffer received from other GPU - fr->gpuForceReduction[gmx::AtomLocality::Local]->addDependency(pmeSynchronizer); - } - - if ((runScheduleWork->domainWork.haveCpuLocalForceWork || havePPDomainDecomposition(cr)) - && !runScheduleWork->simulationWork.useGpuHaloExchange) - { - auto forcesReadyLocality = havePPDomainDecomposition(cr) ? AtomLocality::Local : AtomLocality::All; - const bool useGpuForceBufferOps = true; - fr->gpuForceReduction[gmx::AtomLocality::Local]->addDependency( - stateGpu->getForcesReadyOnDeviceEvent(forcesReadyLocality, useGpuForceBufferOps)); - } - - if (runScheduleWork->simulationWork.useGpuHaloExchange) - { - fr->gpuForceReduction[gmx::AtomLocality::Local]->addDependency( - cr->dd->gpuHaloExchange[0][0]->getForcesReadyOnDeviceEvent()); - } - - if (havePPDomainDecomposition(cr)) - { - // (re-)initialize non-local GPU force reduction - const bool accumulate = runScheduleWork->domainWork.haveCpuBondedWork - || runScheduleWork->domainWork.haveFreeEnergyWork; - const int atomStart = dd_numHomeAtoms(*cr->dd); - fr->gpuForceReduction[gmx::AtomLocality::NonLocal]->reinit( - stateGpu->getForces(), nbv->getNumAtoms(AtomLocality::NonLocal), - nbv->getGridIndices(), atomStart, accumulate); - - // register forces and add dependencies - fr->gpuForceReduction[gmx::AtomLocality::NonLocal]->registerNbnxmForce(nbv->getGpuForces()); - if (runScheduleWork->domainWork.haveCpuBondedWork || runScheduleWork->domainWork.haveFreeEnergyWork) - { - fr->gpuForceReduction[gmx::AtomLocality::NonLocal]->addDependency( - stateGpu->getForcesReadyOnDeviceEvent(AtomLocality::NonLocal, true)); - } - } -} - - -void do_force(FILE* fplog, - const t_commrec* cr, - const gmx_multisim_t* ms, - const t_inputrec* inputrec, - gmx::Awh* awh, - gmx_enfrot* enforcedRotation, - gmx::ImdSession* imdSession, - pull_t* pull_work, - int64_t step, - t_nrnb* nrnb, - gmx_wallcycle_t wcycle, - const gmx_localtop_t* top, - const matrix box, - gmx::ArrayRefWithPadding x, - history_t* hist, - gmx::ForceBuffersView* forceView, - tensor vir_force, - const t_mdatoms* mdatoms, - gmx_enerdata_t* enerd, - gmx::ArrayRef lambda, - t_forcerec* fr, - gmx::MdrunScheduleWorkload* runScheduleWork, - gmx::VirtualSitesHandler* vsite, - rvec muTotal, - double t, - gmx_edsam* ed, - int legacyFlags, - const DDBalanceRegionHandler& ddBalanceRegionHandler) -{ - auto force = forceView->forceWithPadding(); - GMX_ASSERT(force.unpaddedArrayRef().ssize() >= fr->natoms_force_constr, - "The size of the force buffer should be at least the number of atoms to compute " - "forces for"); - - nonbonded_verlet_t* nbv = fr->nbv.get(); - interaction_const_t* ic = fr->ic; - - gmx::StatePropagatorDataGpu* stateGpu = fr->stateGpu; - - const SimulationWorkload& simulationWork = runScheduleWork->simulationWork; - - runScheduleWork->stepWork = setupStepWorkload(legacyFlags, inputrec->mtsLevels, step, - simulationWork, thisRankHasDuty(cr, DUTY_PME)); - const StepWorkload& stepWork = runScheduleWork->stepWork; - - const bool useGpuPmeOnThisRank = - simulationWork.useGpuPme && thisRankHasDuty(cr, DUTY_PME) && stepWork.computeSlowForces; - - /* At a search step we need to start the first balancing region - * somewhere early inside the step after communication during domain - * decomposition (and not during the previous step as usual). - */ - if (stepWork.doNeighborSearch) - { - ddBalanceRegionHandler.openBeforeForceComputationCpu(DdAllowBalanceRegionReopen::yes); - } - - clear_mat(vir_force); - - if (fr->pbcType != PbcType::No) - { - /* Compute shift vectors every step, - * because of pressure coupling or box deformation! - */ - if (stepWork.haveDynamicBox && stepWork.stateChanged) - { - calc_shifts(box, fr->shift_vec); - } - - const bool fillGrid = (stepWork.doNeighborSearch && stepWork.stateChanged); - const bool calcCGCM = (fillGrid && !DOMAINDECOMP(cr)); - if (calcCGCM) - { - put_atoms_in_box_omp(fr->pbcType, box, x.unpaddedArrayRef().subArray(0, mdatoms->homenr), - gmx_omp_nthreads_get(emntDefault)); - inc_nrnb(nrnb, eNR_SHIFTX, mdatoms->homenr); - } - } - - nbnxn_atomdata_copy_shiftvec(stepWork.haveDynamicBox, fr->shift_vec, nbv->nbat.get()); - - const bool pmeSendCoordinatesFromGpu = - GMX_MPI && simulationWork.useGpuPmePpCommunication && !(stepWork.doNeighborSearch); - const bool reinitGpuPmePpComms = - GMX_MPI && simulationWork.useGpuPmePpCommunication && (stepWork.doNeighborSearch); - - const auto localXReadyOnDevice = (useGpuPmeOnThisRank || simulationWork.useGpuBufferOps) - ? stateGpu->getCoordinatesReadyOnDeviceEvent( - AtomLocality::Local, simulationWork, stepWork) - : nullptr; - - // Copy coordinate from the GPU if update is on the GPU and there - // are forces to be computed on the CPU, or for the computation of - // virial, or if host-side data will be transferred from this task - // to a remote task for halo exchange or PME-PP communication. At - // search steps the current coordinates are already on the host, - // hence copy is not needed. - const bool haveHostPmePpComms = - !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePpCommunication; - - GMX_ASSERT(simulationWork.useGpuHaloExchange - == ((cr->dd != nullptr) && (!cr->dd->gpuHaloExchange[0].empty())), - "The GPU halo exchange is active, but it has not been constructed."); - const bool haveHostHaloExchangeComms = - havePPDomainDecomposition(cr) && !simulationWork.useGpuHaloExchange; - - bool gmx_used_in_debug haveCopiedXFromGpu = false; - if (simulationWork.useGpuUpdate && !stepWork.doNeighborSearch - && (runScheduleWork->domainWork.haveCpuLocalForceWork || stepWork.computeVirial - || haveHostPmePpComms || haveHostHaloExchangeComms || simulationWork.computeMuTot)) - { - stateGpu->copyCoordinatesFromGpu(x.unpaddedArrayRef(), AtomLocality::Local); - haveCopiedXFromGpu = true; - } - - // If coordinates are to be sent to PME task from CPU memory, perform that send here. - // Otherwise the send will occur after H2D coordinate transfer. - if (GMX_MPI && !thisRankHasDuty(cr, DUTY_PME) && !pmeSendCoordinatesFromGpu && stepWork.computeSlowForces) - { - /* Send particle coordinates to the pme nodes */ - if (!stepWork.doNeighborSearch && simulationWork.useGpuUpdate) - { - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - - gmx_pme_send_coordinates(fr, cr, box, as_rvec_array(x.unpaddedArrayRef().data()), lambda[efptCOUL], - lambda[efptVDW], (stepWork.computeVirial || stepWork.computeEnergy), - step, simulationWork.useGpuPmePpCommunication, reinitGpuPmePpComms, - pmeSendCoordinatesFromGpu, localXReadyOnDevice, wcycle); - } - - // Coordinates on the device are needed if PME or BufferOps are offloaded. - // The local coordinates can be copied right away. - // NOTE: Consider moving this copy to right after they are updated and constrained, - // if the later is not offloaded. - if (useGpuPmeOnThisRank || stepWork.useGpuXBufferOps) - { - if (stepWork.doNeighborSearch) - { - // TODO refactor this to do_md, after partitioning. - stateGpu->reinit(mdatoms->homenr, - cr->dd != nullptr ? dd_numAtomsZones(*cr->dd) : mdatoms->homenr); - if (useGpuPmeOnThisRank) - { - // TODO: This should be moved into PME setup function ( pme_gpu_prepare_computation(...) ) - pme_gpu_set_device_x(fr->pmedata, stateGpu->getCoordinates()); - } - } - // We need to copy coordinates when: - // 1. Update is not offloaded - // 2. The buffers were reinitialized on search step - if (!simulationWork.useGpuUpdate || stepWork.doNeighborSearch) - { - GMX_ASSERT(stateGpu != nullptr, "stateGpu should not be null"); - stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), AtomLocality::Local); - } - } - - // If coordinates are to be sent to PME task from GPU memory, perform that send here. - // Otherwise the send will occur before the H2D coordinate transfer. - if (!thisRankHasDuty(cr, DUTY_PME) && pmeSendCoordinatesFromGpu) - { - /* Send particle coordinates to the pme nodes */ - gmx_pme_send_coordinates(fr, cr, box, as_rvec_array(x.unpaddedArrayRef().data()), lambda[efptCOUL], - lambda[efptVDW], (stepWork.computeVirial || stepWork.computeEnergy), - step, simulationWork.useGpuPmePpCommunication, reinitGpuPmePpComms, - pmeSendCoordinatesFromGpu, localXReadyOnDevice, wcycle); - } - - if (useGpuPmeOnThisRank) - { - launchPmeGpuSpread(fr->pmedata, box, stepWork, localXReadyOnDevice, lambda[efptCOUL], wcycle); - } - - const gmx::DomainLifetimeWorkload& domainWork = runScheduleWork->domainWork; - - /* do gridding for pair search */ - if (stepWork.doNeighborSearch) - { - if (fr->wholeMoleculeTransform && stepWork.stateChanged) - { - fr->wholeMoleculeTransform->updateForAtomPbcJumps(x.unpaddedArrayRef(), box); - } - - // TODO - // - vzero is constant, do we need to pass it? - // - box_diag should be passed directly to nbnxn_put_on_grid - // - rvec vzero; - clear_rvec(vzero); - - rvec box_diag; - box_diag[XX] = box[XX][XX]; - box_diag[YY] = box[YY][YY]; - box_diag[ZZ] = box[ZZ][ZZ]; - - wallcycle_start(wcycle, ewcNS); - if (!DOMAINDECOMP(cr)) - { - wallcycle_sub_start(wcycle, ewcsNBS_GRID_LOCAL); - nbnxn_put_on_grid(nbv, box, 0, vzero, box_diag, nullptr, { 0, mdatoms->homenr }, -1, - fr->cginfo, x.unpaddedArrayRef(), 0, nullptr); - wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL); - } - else - { - wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL); - nbnxn_put_on_grid_nonlocal(nbv, domdec_zones(cr->dd), fr->cginfo, x.unpaddedArrayRef()); - wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL); - } - - nbv->setAtomProperties(gmx::constArrayRefFromArray(mdatoms->typeA, mdatoms->nr), - gmx::constArrayRefFromArray(mdatoms->chargeA, mdatoms->nr), fr->cginfo); - - wallcycle_stop(wcycle, ewcNS); - - /* initialize the GPU nbnxm atom data and bonded data structures */ - if (simulationWork.useGpuNonbonded) - { - // Note: cycle counting only nononbondeds, gpuBonded counts internally - wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); - wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED); - Nbnxm::gpu_init_atomdata(nbv->gpu_nbv, nbv->nbat.get()); - wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); - wallcycle_stop(wcycle, ewcLAUNCH_GPU); - - if (fr->gpuBonded) - { - /* Now we put all atoms on the grid, we can assign bonded - * interactions to the GPU, where the grid order is - * needed. Also the xq, f and fshift device buffers have - * been reallocated if needed, so the bonded code can - * learn about them. */ - // TODO the xq, f, and fshift buffers are now shared - // resources, so they should be maintained by a - // higher-level object than the nb module. - fr->gpuBonded->updateInteractionListsAndDeviceBuffers( - nbv->getGridIndices(), top->idef, Nbnxm::gpu_get_xq(nbv->gpu_nbv), - Nbnxm::gpu_get_f(nbv->gpu_nbv), Nbnxm::gpu_get_fshift(nbv->gpu_nbv)); - } - } - - // Need to run after the GPU-offload bonded interaction lists - // are set up to be able to determine whether there is bonded work. - runScheduleWork->domainWork = setupDomainLifetimeWorkload( - *inputrec, *fr, pull_work, ed, *mdatoms, simulationWork, stepWork); - - wallcycle_start_nocount(wcycle, ewcNS); - wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL); - /* Note that with a GPU the launch overhead of the list transfer is not timed separately */ - nbv->constructPairlist(InteractionLocality::Local, top->excls, step, nrnb); - - nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::Local); - - wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL); - wallcycle_stop(wcycle, ewcNS); - - if (stepWork.useGpuXBufferOps) - { - nbv->atomdata_init_copy_x_to_nbat_x_gpu(); - } - - if (simulationWork.useGpuBufferOps) - { - setupGpuForceReductions(runScheduleWork, cr, fr); - } - } - else if (!EI_TPI(inputrec->eI) && stepWork.computeNonbondedForces) - { - if (stepWork.useGpuXBufferOps) - { - GMX_ASSERT(stateGpu, "stateGpu should be valid when buffer ops are offloaded"); - nbv->convertCoordinatesGpu(AtomLocality::Local, false, stateGpu->getCoordinates(), - localXReadyOnDevice); - } - else - { - if (simulationWork.useGpuUpdate) - { - GMX_ASSERT(stateGpu, "need a valid stateGpu object"); - GMX_ASSERT(haveCopiedXFromGpu, - "a wait should only be triggered if copy has been scheduled"); - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - nbv->convertCoordinates(AtomLocality::Local, false, x.unpaddedArrayRef()); - } - } - - if (simulationWork.useGpuNonbonded && (stepWork.computeNonbondedForces || domainWork.haveGpuBondedWork)) - { - ddBalanceRegionHandler.openBeforeForceComputationGpu(); - - wallcycle_start(wcycle, ewcLAUNCH_GPU); - wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED); - Nbnxm::gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat.get()); - if (stepWork.doNeighborSearch || !stepWork.useGpuXBufferOps) - { - Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), AtomLocality::Local); - } - wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); - wallcycle_stop(wcycle, ewcLAUNCH_GPU); - // with X buffer ops offloaded to the GPU on all but the search steps - - // bonded work not split into separate local and non-local, so with DD - // we can only launch the kernel after non-local coordinates have been received. - if (domainWork.haveGpuBondedWork && !havePPDomainDecomposition(cr)) - { - fr->gpuBonded->setPbcAndlaunchKernel(fr->pbcType, box, fr->bMolPBC, stepWork); - } - - /* launch local nonbonded work on GPU */ - wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); - wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED); - do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local, enbvClearFNo, step, nrnb, wcycle); - wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); - wallcycle_stop(wcycle, ewcLAUNCH_GPU); - } - - if (useGpuPmeOnThisRank) - { - // In PME GPU and mixed mode we launch FFT / gather after the - // X copy/transform to allow overlap as well as after the GPU NB - // launch to avoid FFT launch overhead hijacking the CPU and delaying - // the nonbonded kernel. - launchPmeGpuFftAndGather(fr->pmedata, lambda[efptCOUL], wcycle, stepWork); - } - - /* Communicate coordinates and sum dipole if necessary + - do non-local pair search */ - if (havePPDomainDecomposition(cr)) - { - if (stepWork.doNeighborSearch) - { - // TODO: fuse this branch with the above large stepWork.doNeighborSearch block - wallcycle_start_nocount(wcycle, ewcNS); - wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL); - /* Note that with a GPU the launch overhead of the list transfer is not timed separately */ - nbv->constructPairlist(InteractionLocality::NonLocal, top->excls, step, nrnb); - - nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::NonLocal); - wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL); - wallcycle_stop(wcycle, ewcNS); - // TODO refactor this GPU halo exchange re-initialisation - // to location in do_md where GPU halo exchange is - // constructed at partitioning, after above stateGpu - // re-initialization has similarly been refactored - if (simulationWork.useGpuHaloExchange) - { - reinitGpuHaloExchange(*cr, stateGpu->getCoordinates(), stateGpu->getForces()); - } - } - else - { - if (stepWork.useGpuXHalo) - { - // The following must be called after local setCoordinates (which records an event - // when the coordinate data has been copied to the device). - communicateGpuHaloCoordinates(*cr, box, localXReadyOnDevice); - - if (domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork) - { - // non-local part of coordinate buffer must be copied back to host for CPU work - stateGpu->copyCoordinatesFromGpu(x.unpaddedArrayRef(), AtomLocality::NonLocal); - } - } - else - { - if (simulationWork.useGpuUpdate) - { - GMX_ASSERT(haveCopiedXFromGpu, - "a wait should only be triggered if copy has been scheduled"); - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - dd_move_x(cr->dd, box, x.unpaddedArrayRef(), wcycle); - } - - if (stepWork.useGpuXBufferOps) - { - if (!useGpuPmeOnThisRank && !stepWork.useGpuXHalo) - { - stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), AtomLocality::NonLocal); - } - nbv->convertCoordinatesGpu(AtomLocality::NonLocal, false, stateGpu->getCoordinates(), - stateGpu->getCoordinatesReadyOnDeviceEvent( - AtomLocality::NonLocal, simulationWork, stepWork)); - } - else - { - nbv->convertCoordinates(AtomLocality::NonLocal, false, x.unpaddedArrayRef()); - } - } - - if (simulationWork.useGpuNonbonded) - { - - if (stepWork.doNeighborSearch || !stepWork.useGpuXBufferOps) - { - wallcycle_start(wcycle, ewcLAUNCH_GPU); - wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED); - Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), AtomLocality::NonLocal); - wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); - wallcycle_stop(wcycle, ewcLAUNCH_GPU); - } - - if (domainWork.haveGpuBondedWork) - { - fr->gpuBonded->setPbcAndlaunchKernel(fr->pbcType, box, fr->bMolPBC, stepWork); - } - - /* launch non-local nonbonded tasks on GPU */ - wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); - wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED); - do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFNo, step, - nrnb, wcycle); - wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); - wallcycle_stop(wcycle, ewcLAUNCH_GPU); - } - } - - if (simulationWork.useGpuNonbonded && stepWork.computeNonbondedForces) - { - /* launch D2H copy-back F */ - wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); - wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED); - - if (havePPDomainDecomposition(cr)) - { - Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(), stepWork, AtomLocality::NonLocal); - } - Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(), stepWork, AtomLocality::Local); - wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); - - if (domainWork.haveGpuBondedWork && stepWork.computeEnergy) - { - fr->gpuBonded->launchEnergyTransfer(); - } - wallcycle_stop(wcycle, ewcLAUNCH_GPU); - } - - gmx::ArrayRef xWholeMolecules; - if (fr->wholeMoleculeTransform) - { - xWholeMolecules = fr->wholeMoleculeTransform->wholeMoleculeCoordinates(x.unpaddedArrayRef(), box); - } - - DipoleData dipoleData; - - if (simulationWork.computeMuTot) - { - const int start = 0; - - if (simulationWork.useGpuUpdate && !stepWork.doNeighborSearch) - { - GMX_ASSERT(haveCopiedXFromGpu, - "a wait should only be triggered if copy has been scheduled"); - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - - /* Calculate total (local) dipole moment in a temporary common array. - * This makes it possible to sum them over nodes faster. - */ - gmx::ArrayRef xRef = - (xWholeMolecules.empty() ? x.unpaddedArrayRef() : xWholeMolecules); - calc_mu(start, mdatoms->homenr, xRef, mdatoms->chargeA, mdatoms->chargeB, - mdatoms->nChargePerturbed, dipoleData.muStaging[0], dipoleData.muStaging[1]); - - reduceAndUpdateMuTot(&dipoleData, cr, (fr->efep != efepNO), lambda, muTotal, ddBalanceRegionHandler); - } - - /* Reset energies */ - reset_enerdata(enerd); - - if (DOMAINDECOMP(cr) && !thisRankHasDuty(cr, DUTY_PME)) - { - wallcycle_start(wcycle, ewcPPDURINGPME); - dd_force_flop_start(cr->dd, nrnb); - } - - // For the rest of the CPU tasks that depend on GPU-update produced coordinates, - // this wait ensures that the D2H transfer is complete. - if ((simulationWork.useGpuUpdate) - && (runScheduleWork->domainWork.haveCpuLocalForceWork || stepWork.computeVirial)) - { - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - - if (inputrec->bRot) - { - wallcycle_start(wcycle, ewcROT); - do_rotation(cr, enforcedRotation, box, as_rvec_array(x.unpaddedArrayRef().data()), t, step, - stepWork.doNeighborSearch); - wallcycle_stop(wcycle, ewcROT); - } - - /* Start the force cycle counter. - * Note that a different counter is used for dynamic load balancing. - */ - wallcycle_start(wcycle, ewcFORCE); - - /* Set up and clear force outputs: - * forceOutMtsLevel0: everything except what is in the other two outputs - * forceOutMtsLevel1: PME-mesh and listed-forces group 1 - * forceOutNonbonded: non-bonded forces - * Without multiple time stepping all point to the same object. - * With multiple time-stepping the use is different for MTS fast (level0 only) and slow steps. - */ - ForceOutputs forceOutMtsLevel0 = - setupForceOutputs(&fr->forceHelperBuffers[0], force, stepWork, wcycle); - - // Force output for MTS combined forces, only set at level1 MTS steps - std::optional forceOutMts = - (fr->useMts && stepWork.computeSlowForces) - ? std::optional(setupForceOutputs(&fr->forceHelperBuffers[1], - forceView->forceMtsCombinedWithPadding(), - stepWork, wcycle)) - : std::nullopt; - - ForceOutputs* forceOutMtsLevel1 = - fr->useMts ? (stepWork.computeSlowForces ? &forceOutMts.value() : nullptr) : &forceOutMtsLevel0; - - const bool nonbondedAtMtsLevel1 = runScheduleWork->simulationWork.computeNonbondedAtMtsLevel1; - - ForceOutputs* forceOutNonbonded = nonbondedAtMtsLevel1 ? forceOutMtsLevel1 : &forceOutMtsLevel0; - - if (inputrec->bPull && pull_have_constraint(*pull_work)) - { - clear_pull_forces(pull_work); - } - - /* We calculate the non-bonded forces, when done on the CPU, here. - * We do this before calling do_force_lowlevel, because in that - * function, the listed forces are calculated before PME, which - * does communication. With this order, non-bonded and listed - * force calculation imbalance can be balanced out by the domain - * decomposition load balancing. - */ - - const bool useOrEmulateGpuNb = simulationWork.useGpuNonbonded || fr->nbv->emulateGpu(); - - if (!useOrEmulateGpuNb) - { - do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local, enbvClearFYes, step, nrnb, wcycle); - } - - if (fr->efep != efepNO && stepWork.computeNonbondedForces) - { - /* Calculate the local and non-local free energy interactions here. - * Happens here on the CPU both with and without GPU. - */ - nbv->dispatchFreeEnergyKernel(InteractionLocality::Local, fr, - as_rvec_array(x.unpaddedArrayRef().data()), - &forceOutNonbonded->forceWithShiftForces(), *mdatoms, - inputrec->fepvals, lambda, enerd, stepWork, nrnb); - - if (havePPDomainDecomposition(cr)) - { - nbv->dispatchFreeEnergyKernel(InteractionLocality::NonLocal, fr, - as_rvec_array(x.unpaddedArrayRef().data()), - &forceOutNonbonded->forceWithShiftForces(), *mdatoms, - inputrec->fepvals, lambda, enerd, stepWork, nrnb); - } - } - - if (stepWork.computeNonbondedForces && !useOrEmulateGpuNb) - { - if (havePPDomainDecomposition(cr)) - { - do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFNo, step, - nrnb, wcycle); - } - - if (stepWork.computeForces) - { - /* Add all the non-bonded force to the normal force array. - * This can be split into a local and a non-local part when overlapping - * communication with calculation with domain decomposition. - */ - wallcycle_stop(wcycle, ewcFORCE); - nbv->atomdata_add_nbat_f_to_f(AtomLocality::All, - forceOutNonbonded->forceWithShiftForces().force()); - wallcycle_start_nocount(wcycle, ewcFORCE); - } - - /* If there are multiple fshift output buffers we need to reduce them */ - if (stepWork.computeVirial) - { - /* This is not in a subcounter because it takes a - negligible and constant-sized amount of time */ - nbnxn_atomdata_add_nbat_fshift_to_fshift( - *nbv->nbat, forceOutNonbonded->forceWithShiftForces().shiftForces()); - } - } - - // TODO Force flags should include haveFreeEnergyWork for this domain - if (stepWork.useGpuXHalo && (domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork)) - { - wallcycle_stop(wcycle, ewcFORCE); - /* Wait for non-local coordinate data to be copied from device */ - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::NonLocal); - wallcycle_start_nocount(wcycle, ewcFORCE); - } - - // Compute wall interactions, when present. - // Note: should be moved to special forces. - if (inputrec->nwall && stepWork.computeNonbondedForces) - { - /* foreign lambda component for walls */ - real dvdl_walls = do_walls(*inputrec, *fr, box, *mdatoms, x.unpaddedConstArrayRef(), - &forceOutMtsLevel0.forceWithVirial(), lambda[efptVDW], - enerd->grpp.ener[egLJSR].data(), nrnb); - enerd->dvdl_lin[efptVDW] += dvdl_walls; - } - - if (stepWork.computeListedForces) - { - /* Check whether we need to take into account PBC in listed interactions */ - bool needMolPbc = false; - for (const auto& listedForces : fr->listedForces) - { - if (listedForces.haveCpuListedForces(*fr->fcdata)) - { - needMolPbc = fr->bMolPBC; - } - } - - t_pbc pbc; - - if (needMolPbc) - { - /* Since all atoms are in the rectangular or triclinic unit-cell, - * only single box vector shifts (2 in x) are required. - */ - set_pbc_dd(&pbc, fr->pbcType, DOMAINDECOMP(cr) ? cr->dd->numCells : nullptr, TRUE, box); - } - - for (int mtsIndex = 0; mtsIndex < (fr->useMts && stepWork.computeSlowForces ? 2 : 1); mtsIndex++) - { - ListedForces& listedForces = fr->listedForces[mtsIndex]; - ForceOutputs& forceOut = (mtsIndex == 0 ? forceOutMtsLevel0 : *forceOutMtsLevel1); - listedForces.calculate( - wcycle, box, inputrec->fepvals, cr, ms, x, xWholeMolecules, fr->fcdata.get(), - hist, &forceOut, fr, &pbc, enerd, nrnb, lambda.data(), mdatoms, - DOMAINDECOMP(cr) ? cr->dd->globalAtomIndices.data() : nullptr, stepWork); - } - } - - if (stepWork.computeSlowForces) - { - calculateLongRangeNonbondeds(fr, inputrec, cr, nrnb, wcycle, mdatoms, - x.unpaddedConstArrayRef(), &forceOutMtsLevel1->forceWithVirial(), - enerd, box, lambda.data(), as_rvec_array(dipoleData.muStateAB), - stepWork, ddBalanceRegionHandler); - } - - wallcycle_stop(wcycle, ewcFORCE); - - // VdW dispersion correction, only computed on master rank to avoid double counting - if ((stepWork.computeEnergy || stepWork.computeVirial) && fr->dispersionCorrection && MASTER(cr)) - { - // Calculate long range corrections to pressure and energy - const DispersionCorrection::Correction correction = - fr->dispersionCorrection->calculate(box, lambda[efptVDW]); - - if (stepWork.computeEnergy) - { - enerd->term[F_DISPCORR] = correction.energy; - enerd->term[F_DVDL_VDW] += correction.dvdl; - enerd->dvdl_lin[efptVDW] += correction.dvdl; - } - if (stepWork.computeVirial) - { - correction.correctVirial(vir_force); - enerd->term[F_PDISPCORR] = correction.pressure; - } - } - - const bool needToReceivePmeResultsFromSeparateRank = - (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && stepWork.computeSlowForces); - - /* When running free energy perturbations steered by AWH and doing PME calculations on the - * GPU we must wait for the PME calculation (dhdl) results to finish before sampling the - * FEP dimension with AWH. */ - const bool needEarlyPmeResults = (awh != nullptr && awh->hasFepLambdaDimension() - && pme_run_mode(fr->pmedata) != PmeRunMode::None - && stepWork.computeEnergy && stepWork.computeSlowForces); - if (needEarlyPmeResults) - { - if (useGpuPmeOnThisRank) - { - pme_gpu_wait_and_reduce(fr->pmedata, stepWork, wcycle, - &forceOutMtsLevel1->forceWithVirial(), enerd, lambda[efptCOUL]); - } - else if (needToReceivePmeResultsFromSeparateRank) - { - /* In case of node-splitting, the PP nodes receive the long-range - * forces, virial and energy from the PME nodes here. - */ - pme_receive_force_ener(fr, cr, &forceOutMtsLevel1->forceWithVirial(), enerd, - simulationWork.useGpuPmePpCommunication, - stepWork.useGpuPmeFReduction, wcycle); - } - } - - computeSpecialForces(fplog, cr, inputrec, awh, enforcedRotation, imdSession, pull_work, step, t, - wcycle, fr->forceProviders, box, x.unpaddedArrayRef(), mdatoms, lambda, - stepWork, &forceOutMtsLevel0.forceWithVirial(), - forceOutMtsLevel1 ? &forceOutMtsLevel1->forceWithVirial() : nullptr, enerd, - ed, stepWork.doNeighborSearch); - - GMX_ASSERT(!(nonbondedAtMtsLevel1 && stepWork.useGpuFBufferOps), - "The schedule below does not allow for nonbonded MTS with GPU buffer ops"); - GMX_ASSERT(!(nonbondedAtMtsLevel1 && stepWork.useGpuFHalo), - "The schedule below does not allow for nonbonded MTS with GPU halo exchange"); - // Will store the amount of cycles spent waiting for the GPU that - // will be later used in the DLB accounting. - float cycles_wait_gpu = 0; - if (useOrEmulateGpuNb && stepWork.computeNonbondedForces) - { - auto& forceWithShiftForces = forceOutNonbonded->forceWithShiftForces(); - - /* wait for non-local forces (or calculate in emulation mode) */ - if (havePPDomainDecomposition(cr)) - { - if (simulationWork.useGpuNonbonded) - { - cycles_wait_gpu += Nbnxm::gpu_wait_finish_task( - nbv->gpu_nbv, stepWork, AtomLocality::NonLocal, enerd->grpp.ener[egLJSR].data(), - enerd->grpp.ener[egCOULSR].data(), forceWithShiftForces.shiftForces(), wcycle); - } - else - { - wallcycle_start_nocount(wcycle, ewcFORCE); - do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFYes, - step, nrnb, wcycle); - wallcycle_stop(wcycle, ewcFORCE); - } - - if (stepWork.useGpuFBufferOps) - { - // TODO: move this into DomainLifetimeWorkload, including the second part of the - // condition The bonded and free energy CPU tasks can have non-local force - // contributions which are a dependency for the GPU force reduction. - bool haveNonLocalForceContribInCpuBuffer = - domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork; - - if (haveNonLocalForceContribInCpuBuffer) - { - stateGpu->copyForcesToGpu(forceOutMtsLevel0.forceWithShiftForces().force(), - AtomLocality::NonLocal); - } - - - fr->gpuForceReduction[gmx::AtomLocality::NonLocal]->execute(); - - if (!stepWork.useGpuFHalo) - { - // copy from GPU input for dd_move_f() - stateGpu->copyForcesFromGpu(forceOutMtsLevel0.forceWithShiftForces().force(), - AtomLocality::NonLocal); - } - } - else - { - nbv->atomdata_add_nbat_f_to_f(AtomLocality::NonLocal, forceWithShiftForces.force()); - } - - if (fr->nbv->emulateGpu() && stepWork.computeVirial) - { - nbnxn_atomdata_add_nbat_fshift_to_fshift(*nbv->nbat, forceWithShiftForces.shiftForces()); - } - } - } - - /* Combining the forces for multiple time stepping before the halo exchange, when possible, - * avoids an extra halo exchange (when DD is used) and post-processing step. - */ - const bool combineMtsForcesBeforeHaloExchange = - (stepWork.computeForces && fr->useMts && stepWork.computeSlowForces - && (legacyFlags & GMX_FORCE_DO_NOT_NEED_NORMAL_FORCE) != 0 - && !(stepWork.computeVirial || simulationWork.useGpuNonbonded || useGpuPmeOnThisRank)); - if (combineMtsForcesBeforeHaloExchange) - { - const int numAtoms = havePPDomainDecomposition(cr) ? dd_numAtomsZones(*cr->dd) : mdatoms->homenr; - combineMtsForces(numAtoms, force.unpaddedArrayRef(), forceView->forceMtsCombined(), - inputrec->mtsLevels[1].stepFactor); - } - - if (havePPDomainDecomposition(cr)) - { - /* We are done with the CPU compute. - * We will now communicate the non-local forces. - * If we use a GPU this will overlap with GPU work, so in that case - * we do not close the DD force balancing region here. - */ - ddBalanceRegionHandler.closeAfterForceComputationCpu(); - - if (stepWork.computeForces) - { - - if (stepWork.useGpuFHalo) - { - if (domainWork.haveCpuLocalForceWork) - { - stateGpu->copyForcesToGpu(forceOutMtsLevel0.forceWithShiftForces().force(), - AtomLocality::Local); - } - communicateGpuHaloForces(*cr, domainWork.haveCpuLocalForceWork); - } - else - { - if (stepWork.useGpuFBufferOps) - { - stateGpu->waitForcesReadyOnHost(AtomLocality::NonLocal); - } - - // Without MTS or with MTS at slow steps with uncombined forces we need to - // communicate the fast forces - if (!fr->useMts || !combineMtsForcesBeforeHaloExchange) - { - dd_move_f(cr->dd, &forceOutMtsLevel0.forceWithShiftForces(), wcycle); - } - // With MTS we need to communicate the slow or combined (in forceOutMtsLevel1) forces - if (fr->useMts && stepWork.computeSlowForces) - { - dd_move_f(cr->dd, &forceOutMtsLevel1->forceWithShiftForces(), wcycle); - } - } - } - } - - // With both nonbonded and PME offloaded a GPU on the same rank, we use - // an alternating wait/reduction scheme. - // When running free energy perturbations steered by AWH and calculating PME on GPU, - // i.e. if needEarlyPmeResults == true, the PME results have already been reduced above. - bool alternateGpuWait = - (!c_disableAlternatingWait && useGpuPmeOnThisRank && simulationWork.useGpuNonbonded - && !DOMAINDECOMP(cr) && !stepWork.useGpuFBufferOps && !needEarlyPmeResults); - if (alternateGpuWait) - { - alternatePmeNbGpuWaitReduce(fr->nbv.get(), fr->pmedata, forceOutNonbonded, - forceOutMtsLevel1, enerd, lambda[efptCOUL], stepWork, wcycle); - } - - if (!alternateGpuWait && useGpuPmeOnThisRank && !needEarlyPmeResults) - { - pme_gpu_wait_and_reduce(fr->pmedata, stepWork, wcycle, - &forceOutMtsLevel1->forceWithVirial(), enerd, lambda[efptCOUL]); - } - - /* Wait for local GPU NB outputs on the non-alternating wait path */ - if (!alternateGpuWait && stepWork.computeNonbondedForces && simulationWork.useGpuNonbonded) - { - /* Measured overhead on CUDA and OpenCL with(out) GPU sharing - * is between 0.5 and 1.5 Mcycles. So 2 MCycles is an overestimate, - * but even with a step of 0.1 ms the difference is less than 1% - * of the step time. - */ - const float gpuWaitApiOverheadMargin = 2e6F; /* cycles */ - const float waitCycles = Nbnxm::gpu_wait_finish_task( - nbv->gpu_nbv, stepWork, AtomLocality::Local, enerd->grpp.ener[egLJSR].data(), - enerd->grpp.ener[egCOULSR].data(), - forceOutNonbonded->forceWithShiftForces().shiftForces(), wcycle); - - if (ddBalanceRegionHandler.useBalancingRegion()) - { - DdBalanceRegionWaitedForGpu waitedForGpu = DdBalanceRegionWaitedForGpu::yes; - if (stepWork.computeForces && waitCycles <= gpuWaitApiOverheadMargin) - { - /* We measured few cycles, it could be that the kernel - * and transfer finished earlier and there was no actual - * wait time, only API call overhead. - * Then the actual time could be anywhere between 0 and - * cycles_wait_est. We will use half of cycles_wait_est. - */ - waitedForGpu = DdBalanceRegionWaitedForGpu::no; - } - ddBalanceRegionHandler.closeAfterForceComputationGpu(cycles_wait_gpu, waitedForGpu); - } - } - - if (fr->nbv->emulateGpu()) - { - // NOTE: emulation kernel is not included in the balancing region, - // but emulation mode does not target performance anyway - wallcycle_start_nocount(wcycle, ewcFORCE); - do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local, - DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes, step, nrnb, wcycle); - wallcycle_stop(wcycle, ewcFORCE); - } - - // If on GPU PME-PP comms path, receive forces from PME before GPU buffer ops - // TODO refactor this and unify with below default-path call to the same function - // When running free energy perturbations steered by AWH and calculating PME on GPU, - // i.e. if needEarlyPmeResults == true, the PME results have already been reduced above. - if (needToReceivePmeResultsFromSeparateRank && simulationWork.useGpuPmePpCommunication && !needEarlyPmeResults) - { - /* In case of node-splitting, the PP nodes receive the long-range - * forces, virial and energy from the PME nodes here. - */ - pme_receive_force_ener(fr, cr, &forceOutMtsLevel1->forceWithVirial(), enerd, - simulationWork.useGpuPmePpCommunication, - stepWork.useGpuPmeFReduction, wcycle); - } - - - /* Do the nonbonded GPU (or emulation) force buffer reduction - * on the non-alternating path. */ - GMX_ASSERT(!(nonbondedAtMtsLevel1 && stepWork.useGpuFBufferOps), - "The schedule below does not allow for nonbonded MTS with GPU buffer ops"); - if (useOrEmulateGpuNb && !alternateGpuWait) - { - if (stepWork.useGpuFBufferOps) - { - ArrayRef forceWithShift = forceOutNonbonded->forceWithShiftForces().force(); - - // Flag to specify whether the CPU force buffer has contributions to - // local atoms. This depends on whether there are CPU-based force tasks - // or when DD is active the halo exchange has resulted in contributions - // from the non-local part. - const bool haveLocalForceContribInCpuBuffer = - (domainWork.haveCpuLocalForceWork || havePPDomainDecomposition(cr)); - - // TODO: move these steps as early as possible: - // - CPU f H2D should be as soon as all CPU-side forces are done - // - wait for force reduction does not need to block host (at least not here, it's sufficient to wait - // before the next CPU task that consumes the forces: vsite spread or update) - // - copy is not perfomed if GPU force halo exchange is active, because it would overwrite the result - // of the halo exchange. In that case the copy is instead performed above, before the exchange. - // These should be unified. - if (haveLocalForceContribInCpuBuffer && !stepWork.useGpuFHalo) - { - // Note: AtomLocality::All is used for the non-DD case because, as in this - // case copyForcesToGpu() uses a separate stream, it allows overlap of - // CPU force H2D with GPU force tasks on all streams including those in the - // local stream which would otherwise be implicit dependencies for the - // transfer and would not overlap. - auto locality = havePPDomainDecomposition(cr) ? AtomLocality::Local : AtomLocality::All; - - stateGpu->copyForcesToGpu(forceWithShift, locality); - } - - if (stepWork.computeNonbondedForces) - { - fr->gpuForceReduction[gmx::AtomLocality::Local]->execute(); - } - - // Copy forces to host if they are needed for update or if virtual sites are enabled. - // If there are vsites, we need to copy forces every step to spread vsite forces on host. - // TODO: When the output flags will be included in step workload, this copy can be combined with the - // copy call done in sim_utils(...) for the output. - // NOTE: If there are virtual sites, the forces are modified on host after this D2H copy. Hence, - // they should not be copied in do_md(...) for the output. - if (!simulationWork.useGpuUpdate - || (simulationWork.useGpuUpdate && DOMAINDECOMP(cr) && haveHostPmePpComms) || vsite) - { - stateGpu->copyForcesFromGpu(forceWithShift, AtomLocality::Local); - stateGpu->waitForcesReadyOnHost(AtomLocality::Local); - } - } - else if (stepWork.computeNonbondedForces) - { - ArrayRef forceWithShift = forceOutNonbonded->forceWithShiftForces().force(); - nbv->atomdata_add_nbat_f_to_f(AtomLocality::Local, forceWithShift); - } - } - - launchGpuEndOfStepTasks(nbv, fr->gpuBonded, fr->pmedata, enerd, *runScheduleWork, - useGpuPmeOnThisRank, step, wcycle); - - if (DOMAINDECOMP(cr)) - { - dd_force_flop_stop(cr->dd, nrnb); - } - - const bool haveCombinedMtsForces = (stepWork.computeForces && fr->useMts && stepWork.computeSlowForces - && combineMtsForcesBeforeHaloExchange); - if (stepWork.computeForces) - { - postProcessForceWithShiftForces(nrnb, wcycle, box, x.unpaddedArrayRef(), &forceOutMtsLevel0, - vir_force, *mdatoms, *fr, vsite, stepWork); - - if (fr->useMts && stepWork.computeSlowForces && !haveCombinedMtsForces) - { - postProcessForceWithShiftForces(nrnb, wcycle, box, x.unpaddedArrayRef(), forceOutMtsLevel1, - vir_force, *mdatoms, *fr, vsite, stepWork); - } - } - - // TODO refactor this and unify with above GPU PME-PP / GPU update path call to the same function - // When running free energy perturbations steered by AWH and calculating PME on GPU, - // i.e. if needEarlyPmeResults == true, the PME results have already been reduced above. - if (needToReceivePmeResultsFromSeparateRank && !simulationWork.useGpuPmePpCommunication - && !needEarlyPmeResults) - { - /* In case of node-splitting, the PP nodes receive the long-range - * forces, virial and energy from the PME nodes here. - */ - pme_receive_force_ener(fr, cr, &forceOutMtsLevel1->forceWithVirial(), enerd, - simulationWork.useGpuPmePpCommunication, false, wcycle); - } - - if (stepWork.computeForces) - { - /* If we don't use MTS or if we already combined the MTS forces before, we only - * need to post-process one ForceOutputs object here, called forceOutCombined, - * otherwise we have to post-process two outputs and then combine them. - */ - ForceOutputs& forceOutCombined = (haveCombinedMtsForces ? forceOutMts.value() : forceOutMtsLevel0); - postProcessForces(cr, step, nrnb, wcycle, box, x.unpaddedArrayRef(), &forceOutCombined, - vir_force, mdatoms, fr, vsite, stepWork); - - if (fr->useMts && stepWork.computeSlowForces && !haveCombinedMtsForces) - { - postProcessForces(cr, step, nrnb, wcycle, box, x.unpaddedArrayRef(), forceOutMtsLevel1, - vir_force, mdatoms, fr, vsite, stepWork); - - combineMtsForces(mdatoms->homenr, force.unpaddedArrayRef(), - forceView->forceMtsCombined(), inputrec->mtsLevels[1].stepFactor); - } - } - - if (stepWork.computeEnergy) - { - /* Compute the final potential energy terms */ - accumulatePotentialEnergies(enerd, lambda, inputrec->fepvals); - - if (!EI_TPI(inputrec->eI)) - { - checkPotentialEnergyValidity(step, *enerd, *inputrec); - } - } - - /* In case we don't have constraints and are using GPUs, the next balancing - * region starts here. - * Some "special" work at the end of do_force_cuts?, such as vsite spread, - * virial calculation and COM pulling, is not thus not included in - * the balance timing, which is ok as most tasks do communication. - */ - ddBalanceRegionHandler.openBeforeForceComputationCpu(DdAllowBalanceRegionReopen::no); -} diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/legacymdrunoptions.cpp b/patches/gromacs-2021.7.diff/src/gromacs/mdrun/legacymdrunoptions.cpp deleted file mode 100644 index 8706772915..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/legacymdrunoptions.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2011-2019,2020, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/*! \internal \file - * - * \brief This file declares helper functionality for legacy option handling for mdrun - * - * \author Berk Hess - * \author David van der Spoel - * \author Erik Lindahl - * \author Mark Abraham - * - * \ingroup module_mdrun - */ -#include "gmxpre.h" - -#include "legacymdrunoptions.h" - -#include - -#include "gromacs/math/functions.h" -#include "gromacs/utility/arrayref.h" -#include "gromacs/utility/arraysize.h" -#include "gromacs/utility/fatalerror.h" - -namespace gmx -{ - -/*! \brief Return whether the command-line parameter that - * will trigger a multi-simulation is set */ -static bool is_multisim_option_set(int argc, const char* const argv[]) -{ - for (int i = 0; i < argc; ++i) - { - if (strcmp(argv[i], "-multidir") == 0) - { - return true; - } - } - return false; -} - -int LegacyMdrunOptions::updateFromCommandLine(int argc, char** argv, ArrayRef desc) -{ - unsigned long PCA_Flags = PCA_CAN_SET_DEFFNM; - // With -multidir, the working directory still needs to be - // changed, so we can't check for the existence of files during - // parsing. It isn't useful to do any completion based on file - // system contents, either. - if (is_multisim_option_set(argc, argv)) - { - PCA_Flags |= PCA_DISABLE_INPUT_FILE_CHECKING; - } - - if (!parse_common_args(&argc, argv, PCA_Flags, ssize(filenames), filenames.data(), asize(pa), - pa, ssize(desc), desc.data(), 0, nullptr, &oenv)) - { - return 0; - } - - // Handle the options that permits the user to either declare - // which compatible GPUs are availble for use, or to select a GPU - // task assignment. Either could be in an environment variable (so - // that there is a way to customize it, when using MPI in - // heterogeneous contexts). - { - // TODO Argument parsing can't handle std::string. We should - // fix that by changing the parsing, once more of the roles of - // handling, validating and implementing defaults for user - // command-line options have been seperated. - hw_opt.gpuIdsAvailable = gpuIdsAvailable; - hw_opt.userGpuTaskAssignment = userGpuTaskAssignment; - - const char* env = getenv("GMX_GPU_ID"); - if (env != nullptr) - { - if (!hw_opt.gpuIdsAvailable.empty()) - { - gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time"); - } - hw_opt.gpuIdsAvailable = env; - } - - env = getenv("GMX_GPUTASKS"); - if (env != nullptr) - { - if (!hw_opt.userGpuTaskAssignment.empty()) - { - gmx_fatal(FARGS, "GMX_GPUTASKS and -gputasks can not be used at the same time"); - } - hw_opt.userGpuTaskAssignment = env; - } - - if (!hw_opt.gpuIdsAvailable.empty() && !hw_opt.userGpuTaskAssignment.empty()) - { - gmx_fatal(FARGS, "-gpu_id and -gputasks cannot be used at the same time"); - } - } - - hw_opt.threadAffinity = static_cast(nenum(thread_aff_opt_choices)); - - if (!opt2parg_bSet("-append", asize(pa), pa)) - { - mdrunOptions.appendingBehavior = AppendingBehavior::Auto; - } - else - { - if (opt2parg_bool("-append", asize(pa), pa)) - { - mdrunOptions.appendingBehavior = AppendingBehavior::Appending; - } - else - { - mdrunOptions.appendingBehavior = AppendingBehavior::NoAppending; - } - } - - mdrunOptions.rerun = opt2bSet("-rerun", ssize(filenames), filenames.data()); - mdrunOptions.ntompOptionIsSet = opt2parg_bSet("-ntomp", asize(pa), pa); - - domdecOptions.rankOrder = static_cast(nenum(ddrank_opt_choices)); - domdecOptions.dlbOption = static_cast(nenum(dddlb_opt_choices)); - domdecOptions.numCells[XX] = roundToInt(realddxyz[XX]); - domdecOptions.numCells[YY] = roundToInt(realddxyz[YY]); - domdecOptions.numCells[ZZ] = roundToInt(realddxyz[ZZ]); - - /* PLUMED */ - plumedswitch=0; - if (opt2bSet("-plumed", static_cast(filenames.size()), filenames.data())) plumedswitch=1; - if(plumedswitch){ - int real_precision=sizeof(real); - real energyUnits=1.0; - real lengthUnits=1.0; - real timeUnits=1.0; - - if(!plumed_installed()){ - gmx_fatal(FARGS,"Plumed is not available. Check your PLUMED_KERNEL variable."); - } - plumedmain=plumed_create(); - plumed_cmd(plumedmain,"setRealPrecision",&real_precision); - // this is not necessary for gromacs units: - plumed_cmd(plumedmain,"setMDEnergyUnits",&energyUnits); - plumed_cmd(plumedmain,"setMDLengthUnits",&lengthUnits); - plumed_cmd(plumedmain,"setMDTimeUnits",&timeUnits); - // - plumed_cmd(plumedmain,"setPlumedDat",ftp2fn(efDAT,static_cast(filenames.size()), filenames.data())); - plumedswitch=1; - } - /* PLUMED HREX*/ - if(getenv("PLUMED_HREX")) plumed_hrex=1; - if(plumed_hrex){ - if(!plumedswitch) gmx_fatal(FARGS,"-hrex (or PLUMED_HREX) requires -plumed"); - if(replExParams.exchangeInterval==0) gmx_fatal(FARGS,"-hrex (or PLUMED_HREX) replica exchange"); - if(replExParams.numExchanges!=0) gmx_fatal(FARGS,"-hrex (or PLUMED_HREX) not compatible with -nex"); - } - /* END PLUMED HREX */ - - /* END PLUMED */ - - return 1; -} - -LegacyMdrunOptions::~LegacyMdrunOptions() -{ - output_env_done(oenv); -} - -} // namespace gmx diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/legacymdrunoptions.cpp.preplumed b/patches/gromacs-2021.7.diff/src/gromacs/mdrun/legacymdrunoptions.cpp.preplumed deleted file mode 100644 index 42d0a7df38..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/legacymdrunoptions.cpp.preplumed +++ /dev/null @@ -1,168 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2011-2019,2020, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/*! \internal \file - * - * \brief This file declares helper functionality for legacy option handling for mdrun - * - * \author Berk Hess - * \author David van der Spoel - * \author Erik Lindahl - * \author Mark Abraham - * - * \ingroup module_mdrun - */ -#include "gmxpre.h" - -#include "legacymdrunoptions.h" - -#include - -#include "gromacs/math/functions.h" -#include "gromacs/utility/arrayref.h" -#include "gromacs/utility/arraysize.h" -#include "gromacs/utility/fatalerror.h" - -namespace gmx -{ - -/*! \brief Return whether the command-line parameter that - * will trigger a multi-simulation is set */ -static bool is_multisim_option_set(int argc, const char* const argv[]) -{ - for (int i = 0; i < argc; ++i) - { - if (strcmp(argv[i], "-multidir") == 0) - { - return true; - } - } - return false; -} - -int LegacyMdrunOptions::updateFromCommandLine(int argc, char** argv, ArrayRef desc) -{ - unsigned long PCA_Flags = PCA_CAN_SET_DEFFNM; - // With -multidir, the working directory still needs to be - // changed, so we can't check for the existence of files during - // parsing. It isn't useful to do any completion based on file - // system contents, either. - if (is_multisim_option_set(argc, argv)) - { - PCA_Flags |= PCA_DISABLE_INPUT_FILE_CHECKING; - } - - if (!parse_common_args(&argc, argv, PCA_Flags, ssize(filenames), filenames.data(), asize(pa), - pa, ssize(desc), desc.data(), 0, nullptr, &oenv)) - { - return 0; - } - - // Handle the options that permits the user to either declare - // which compatible GPUs are availble for use, or to select a GPU - // task assignment. Either could be in an environment variable (so - // that there is a way to customize it, when using MPI in - // heterogeneous contexts). - { - // TODO Argument parsing can't handle std::string. We should - // fix that by changing the parsing, once more of the roles of - // handling, validating and implementing defaults for user - // command-line options have been seperated. - hw_opt.gpuIdsAvailable = gpuIdsAvailable; - hw_opt.userGpuTaskAssignment = userGpuTaskAssignment; - - const char* env = getenv("GMX_GPU_ID"); - if (env != nullptr) - { - if (!hw_opt.gpuIdsAvailable.empty()) - { - gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time"); - } - hw_opt.gpuIdsAvailable = env; - } - - env = getenv("GMX_GPUTASKS"); - if (env != nullptr) - { - if (!hw_opt.userGpuTaskAssignment.empty()) - { - gmx_fatal(FARGS, "GMX_GPUTASKS and -gputasks can not be used at the same time"); - } - hw_opt.userGpuTaskAssignment = env; - } - - if (!hw_opt.gpuIdsAvailable.empty() && !hw_opt.userGpuTaskAssignment.empty()) - { - gmx_fatal(FARGS, "-gpu_id and -gputasks cannot be used at the same time"); - } - } - - hw_opt.threadAffinity = static_cast(nenum(thread_aff_opt_choices)); - - if (!opt2parg_bSet("-append", asize(pa), pa)) - { - mdrunOptions.appendingBehavior = AppendingBehavior::Auto; - } - else - { - if (opt2parg_bool("-append", asize(pa), pa)) - { - mdrunOptions.appendingBehavior = AppendingBehavior::Appending; - } - else - { - mdrunOptions.appendingBehavior = AppendingBehavior::NoAppending; - } - } - - mdrunOptions.rerun = opt2bSet("-rerun", ssize(filenames), filenames.data()); - mdrunOptions.ntompOptionIsSet = opt2parg_bSet("-ntomp", asize(pa), pa); - - domdecOptions.rankOrder = static_cast(nenum(ddrank_opt_choices)); - domdecOptions.dlbOption = static_cast(nenum(dddlb_opt_choices)); - domdecOptions.numCells[XX] = roundToInt(realddxyz[XX]); - domdecOptions.numCells[YY] = roundToInt(realddxyz[YY]); - domdecOptions.numCells[ZZ] = roundToInt(realddxyz[ZZ]); - - return 1; -} - -LegacyMdrunOptions::~LegacyMdrunOptions() -{ - output_env_done(oenv); -} - -} // namespace gmx diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/legacymdrunoptions.h b/patches/gromacs-2021.7.diff/src/gromacs/mdrun/legacymdrunoptions.h deleted file mode 100644 index 13ee9b89f9..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/legacymdrunoptions.h +++ /dev/null @@ -1,404 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2011-2020, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/*! \libinternal \file - * - * \brief This file declares helper functionality for legacy option handling for mdrun - * - * \author Berk Hess - * \author David van der Spoel - * \author Erik Lindahl - * \author Mark Abraham - * - * \ingroup module_mdrun - * \inlibraryapi - */ -#ifndef GMX_MDRUN_LEGACYMDRUNOPTIONS_H -#define GMX_MDRUN_LEGACYMDRUNOPTIONS_H - -#include "gromacs/commandline/filenm.h" -#include "gromacs/commandline/pargs.h" -#include "gromacs/domdec/options.h" -#include "gromacs/hardware/hw_info.h" -#include "gromacs/mdtypes/mdrunoptions.h" - -#include "replicaexchange.h" - -/* PLUMED */ -#include "../../../Plumed.h" -extern int plumedswitch; -extern plumed plumedmain; -/* END PLUMED */ - -/* PLUMED HREX */ -extern int plumed_hrex; -/* END PLUMED HREX */ - -namespace gmx - -{ - -/*! \libinternal - * \brief This class provides the same command-line option - * functionality to both CLI and API sessions. - * - * This class should not exist, but is necessary now to introduce - * support for the CLI and API without duplicating code. It should be - * eliminated following the TODOs below. - * - * \warning Instances provide lifetime scope for members that do not have - * effective lifetime management or which are frequently accessed unsafely. - * The caller is responsible for keeping a LegacyMdrunOptions object alive - * for as long as any consumers, direct or transitive. - * - * \todo Modules in mdrun should acquire proper option handling so - * that all of these declarations and defaults are local to the - * modules. - * - * \todo Contextual aspects, such as working directory - * and environment variable handling are more properly - * the role of SimulationContext, and should be moved there. - */ -class LegacyMdrunOptions -{ -public: - //! Ongoing collection of mdrun options - MdrunOptions mdrunOptions; - //! Options for the domain decomposition. - DomdecOptions domdecOptions; - //! Parallelism-related user options. - gmx_hw_opt_t hw_opt; - //! Command-line override for the duration of a neighbor list with the Verlet scheme. - int nstlist_cmdline = 0; - //! Parameters for replica-exchange simulations. - ReplicaExchangeParameters replExParams; - - //! Filename options to fill from command-line argument values. - std::vector filenames = { { { efTPR, nullptr, nullptr, ffREAD }, - { efTRN, "-o", nullptr, ffWRITE }, - { efCOMPRESSED, "-x", nullptr, ffOPTWR }, - { efCPT, "-cpi", nullptr, ffOPTRD | ffALLOW_MISSING }, - { efCPT, "-cpo", nullptr, ffOPTWR }, - { efSTO, "-c", "confout", ffWRITE }, - { efEDR, "-e", "ener", ffWRITE }, - { efLOG, "-g", "md", ffWRITE }, - { efXVG, "-dhdl", "dhdl", ffOPTWR }, - { efXVG, "-field", "field", ffOPTWR }, - { efXVG, "-table", "table", ffOPTRD }, - { efXVG, "-tablep", "tablep", ffOPTRD }, - { efXVG, "-tableb", "table", ffOPTRDMULT }, - { efTRX, "-rerun", "rerun", ffOPTRD }, - { efXVG, "-tpi", "tpi", ffOPTWR }, - { efXVG, "-tpid", "tpidist", ffOPTWR }, - { efEDI, "-ei", "sam", ffOPTRD }, - { efXVG, "-eo", "edsam", ffOPTWR }, - { efXVG, "-px", "pullx", ffOPTWR }, - { efXVG, "-pf", "pullf", ffOPTWR }, - { efXVG, "-ro", "rotation", ffOPTWR }, - { efLOG, "-ra", "rotangles", ffOPTWR }, - { efLOG, "-rs", "rotslabs", ffOPTWR }, - { efLOG, "-rt", "rottorque", ffOPTWR }, - { efMTX, "-mtx", "nm", ffOPTWR }, - { efRND, "-multidir", nullptr, ffOPTRDMULT }, - { efXVG, "-awh", "awhinit", ffOPTRD }, - { efDAT, "-plumed", "plumed", ffOPTRD }, /* PLUMED */ - { efDAT, "-membed", "membed", ffOPTRD }, - { efTOP, "-mp", "membed", ffOPTRD }, - { efNDX, "-mn", "membed", ffOPTRD }, - { efXVG, "-if", "imdforces", ffOPTWR }, - { efXVG, "-swap", "swapions", ffOPTWR } } }; - - //! Print a warning if any force is larger than this (in kJ/mol nm). - real pforce = -1; - - //! The value of the -append option - bool appendOption = true; - - /*! \brief Output context for writing text files - * - * \todo Clarify initialization, ownership, and lifetime. */ - gmx_output_env_t* oenv = nullptr; - - /*! \brief Command line options, defaults, docs and storage for them to fill. */ - /*! \{ */ - rvec realddxyz = { 0, 0, 0 }; - const char* ddrank_opt_choices[static_cast(DdRankOrder::Count) + 1] = { - nullptr, "interleave", "pp_pme", "cartesian", nullptr - }; - const char* dddlb_opt_choices[static_cast(DlbOption::Count) + 1] = { nullptr, "auto", "no", - "yes", nullptr }; - const char* thread_aff_opt_choices[static_cast(ThreadAffinity::Count) + 1] = { - nullptr, "auto", "on", "off", nullptr - }; - const char* nbpu_opt_choices[5] = { nullptr, "auto", "cpu", "gpu", nullptr }; - const char* pme_opt_choices[5] = { nullptr, "auto", "cpu", "gpu", nullptr }; - const char* pme_fft_opt_choices[5] = { nullptr, "auto", "cpu", "gpu", nullptr }; - const char* bonded_opt_choices[5] = { nullptr, "auto", "cpu", "gpu", nullptr }; - const char* update_opt_choices[5] = { nullptr, "auto", "cpu", "gpu", nullptr }; - const char* gpuIdsAvailable = ""; - const char* userGpuTaskAssignment = ""; - - - ImdOptions& imdOptions = mdrunOptions.imdOptions; - - t_pargs pa[49] = { - - { "-dd", FALSE, etRVEC, { &realddxyz }, "Domain decomposition grid, 0 is optimize" }, - { "-ddorder", FALSE, etENUM, { ddrank_opt_choices }, "DD rank order" }, - { "-npme", - FALSE, - etINT, - { &domdecOptions.numPmeRanks }, - "Number of separate ranks to be used for PME, -1 is guess" }, - { "-nt", - FALSE, - etINT, - { &hw_opt.nthreads_tot }, - "Total number of threads to start (0 is guess)" }, - { "-ntmpi", - FALSE, - etINT, - { &hw_opt.nthreads_tmpi }, - "Number of thread-MPI ranks to start (0 is guess)" }, - { "-ntomp", - FALSE, - etINT, - { &hw_opt.nthreads_omp }, - "Number of OpenMP threads per MPI rank to start (0 is guess)" }, - { "-ntomp_pme", - FALSE, - etINT, - { &hw_opt.nthreads_omp_pme }, - "Number of OpenMP threads per MPI rank to start (0 is -ntomp)" }, - { "-pin", - FALSE, - etENUM, - { thread_aff_opt_choices }, - "Whether mdrun should try to set thread affinities" }, - { "-pinoffset", - FALSE, - etINT, - { &hw_opt.core_pinning_offset }, - "The lowest logical core number to which mdrun should pin the first thread" }, - { "-pinstride", - FALSE, - etINT, - { &hw_opt.core_pinning_stride }, - "Pinning distance in logical cores for threads, use 0 to minimize the number of threads " - "per physical core" }, - { "-gpu_id", - FALSE, - etSTR, - { &gpuIdsAvailable }, - "List of unique GPU device IDs available to use" }, - { "-gputasks", - FALSE, - etSTR, - { &userGpuTaskAssignment }, - "List of GPU device IDs, mapping each PP task on each node to a device" }, - { "-ddcheck", - FALSE, - etBOOL, - { &domdecOptions.checkBondedInteractions }, - "Check for all bonded interactions with DD" }, - { "-ddbondcomm", - FALSE, - etBOOL, - { &domdecOptions.useBondedCommunication }, - "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" }, - { "-rdd", - FALSE, - etREAL, - { &domdecOptions.minimumCommunicationRange }, - "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial " - "coordinates" }, - { "-rcon", - FALSE, - etREAL, - { &domdecOptions.constraintCommunicationRange }, - "Maximum distance for P-LINCS (nm), 0 is estimate" }, - { "-dlb", FALSE, etENUM, { dddlb_opt_choices }, "Dynamic load balancing (with DD)" }, - { "-dds", - FALSE, - etREAL, - { &domdecOptions.dlbScaling }, - "Fraction in (0,1) by whose reciprocal the initial DD cell size will be increased in " - "order to " - "provide a margin in which dynamic load balancing can act while preserving the minimum " - "cell size." }, - { "-ddcsx", - FALSE, - etSTR, - { &domdecOptions.cellSizeX }, - "HIDDENA string containing a vector of the relative sizes in the x " - "direction of the corresponding DD cells. Only effective with static " - "load balancing." }, - { "-ddcsy", - FALSE, - etSTR, - { &domdecOptions.cellSizeY }, - "HIDDENA string containing a vector of the relative sizes in the y " - "direction of the corresponding DD cells. Only effective with static " - "load balancing." }, - { "-ddcsz", - FALSE, - etSTR, - { &domdecOptions.cellSizeZ }, - "HIDDENA string containing a vector of the relative sizes in the z " - "direction of the corresponding DD cells. Only effective with static " - "load balancing." }, - { "-nb", FALSE, etENUM, { nbpu_opt_choices }, "Calculate non-bonded interactions on" }, - { "-nstlist", - FALSE, - etINT, - { &nstlist_cmdline }, - "Set nstlist when using a Verlet buffer tolerance (0 is guess)" }, - { "-tunepme", - FALSE, - etBOOL, - { &mdrunOptions.tunePme }, - "Optimize PME load between PP/PME ranks or GPU/CPU" }, - { "-pme", FALSE, etENUM, { pme_opt_choices }, "Perform PME calculations on" }, - { "-pmefft", FALSE, etENUM, { pme_fft_opt_choices }, "Perform PME FFT calculations on" }, - { "-bonded", FALSE, etENUM, { bonded_opt_choices }, "Perform bonded calculations on" }, - { "-update", FALSE, etENUM, { update_opt_choices }, "Perform update and constraints on" }, - { "-v", FALSE, etBOOL, { &mdrunOptions.verbose }, "Be loud and noisy" }, - { "-pforce", FALSE, etREAL, { &pforce }, "Print all forces larger than this (kJ/mol nm)" }, - { "-reprod", - FALSE, - etBOOL, - { &mdrunOptions.reproducible }, - "Try to avoid optimizations that affect binary reproducibility" }, - { "-cpt", - FALSE, - etREAL, - { &mdrunOptions.checkpointOptions.period }, - "Checkpoint interval (minutes)" }, - { "-cpnum", - FALSE, - etBOOL, - { &mdrunOptions.checkpointOptions.keepAndNumberCheckpointFiles }, - "Keep and number checkpoint files" }, - { "-append", - FALSE, - etBOOL, - { &appendOption }, - "Append to previous output files when continuing from checkpoint instead of adding the " - "simulation part number to all file names" }, - { "-nsteps", - FALSE, - etINT64, - { &mdrunOptions.numStepsCommandline }, - "Run this number of steps (-1 means infinite, -2 means use mdp option, smaller is " - "invalid)" }, - { "-maxh", - FALSE, - etREAL, - { &mdrunOptions.maximumHoursToRun }, - "Terminate after 0.99 times this time (hours)" }, - { "-replex", - FALSE, - etINT, - { &replExParams.exchangeInterval }, - "Attempt replica exchange periodically with this period (steps)" }, - { "-nex", - FALSE, - etINT, - { &replExParams.numExchanges }, - "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion). " - " -nex zero or not specified gives neighbor replica exchange." }, - { "-reseed", - FALSE, - etINT, - { &replExParams.randomSeed }, - "Seed for replica exchange, -1 is generate a seed" }, - { "-hrex", FALSE, etBOOL, {&plumed_hrex}, /* PLUMED HREX */ - "Enable hamiltonian replica exchange" }, - { "-imdport", FALSE, etINT, { &imdOptions.port }, "HIDDENIMD listening port" }, - { "-imdwait", - FALSE, - etBOOL, - { &imdOptions.wait }, - "HIDDENPause the simulation while no IMD client is connected" }, - { "-imdterm", - FALSE, - etBOOL, - { &imdOptions.terminatable }, - "HIDDENAllow termination of the simulation from IMD client" }, - { "-imdpull", - FALSE, - etBOOL, - { &imdOptions.pull }, - "HIDDENAllow pulling in the simulation from IMD client" }, - { "-rerunvsite", - FALSE, - etBOOL, - { &mdrunOptions.rerunConstructVsites }, - "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" }, - { "-confout", - FALSE, - etBOOL, - { &mdrunOptions.writeConfout }, - "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last " - "step" }, - { "-stepout", - FALSE, - etINT, - { &mdrunOptions.verboseStepPrintInterval }, - "HIDDENFrequency of writing the remaining wall clock time for the run" }, - { "-resetstep", - FALSE, - etINT, - { &mdrunOptions.timingOptions.resetStep }, - "HIDDENReset cycle counters after these many time steps" }, - { "-resethway", - FALSE, - etBOOL, - { &mdrunOptions.timingOptions.resetHalfway }, - "HIDDENReset the cycle counters after half the number of steps or halfway " - "[TT]-maxh[tt]" } - }; - /*! \} */ - - //! Parses the command-line input and prepares to start mdrun. - int updateFromCommandLine(int argc, char** argv, ArrayRef desc); - - ~LegacyMdrunOptions(); -}; - -} // end namespace gmx - -#endif diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/legacymdrunoptions.h.preplumed b/patches/gromacs-2021.7.diff/src/gromacs/mdrun/legacymdrunoptions.h.preplumed deleted file mode 100644 index 474f6f0396..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/legacymdrunoptions.h.preplumed +++ /dev/null @@ -1,390 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2011-2020, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/*! \libinternal \file - * - * \brief This file declares helper functionality for legacy option handling for mdrun - * - * \author Berk Hess - * \author David van der Spoel - * \author Erik Lindahl - * \author Mark Abraham - * - * \ingroup module_mdrun - * \inlibraryapi - */ -#ifndef GMX_MDRUN_LEGACYMDRUNOPTIONS_H -#define GMX_MDRUN_LEGACYMDRUNOPTIONS_H - -#include "gromacs/commandline/filenm.h" -#include "gromacs/commandline/pargs.h" -#include "gromacs/domdec/options.h" -#include "gromacs/hardware/hw_info.h" -#include "gromacs/mdtypes/mdrunoptions.h" - -#include "replicaexchange.h" - -namespace gmx -{ - -/*! \libinternal - * \brief This class provides the same command-line option - * functionality to both CLI and API sessions. - * - * This class should not exist, but is necessary now to introduce - * support for the CLI and API without duplicating code. It should be - * eliminated following the TODOs below. - * - * \warning Instances provide lifetime scope for members that do not have - * effective lifetime management or which are frequently accessed unsafely. - * The caller is responsible for keeping a LegacyMdrunOptions object alive - * for as long as any consumers, direct or transitive. - * - * \todo Modules in mdrun should acquire proper option handling so - * that all of these declarations and defaults are local to the - * modules. - * - * \todo Contextual aspects, such as working directory - * and environment variable handling are more properly - * the role of SimulationContext, and should be moved there. - */ -class LegacyMdrunOptions -{ -public: - //! Ongoing collection of mdrun options - MdrunOptions mdrunOptions; - //! Options for the domain decomposition. - DomdecOptions domdecOptions; - //! Parallelism-related user options. - gmx_hw_opt_t hw_opt; - //! Command-line override for the duration of a neighbor list with the Verlet scheme. - int nstlist_cmdline = 0; - //! Parameters for replica-exchange simulations. - ReplicaExchangeParameters replExParams; - - //! Filename options to fill from command-line argument values. - std::vector filenames = { { { efTPR, nullptr, nullptr, ffREAD }, - { efTRN, "-o", nullptr, ffWRITE }, - { efCOMPRESSED, "-x", nullptr, ffOPTWR }, - { efCPT, "-cpi", nullptr, ffOPTRD | ffALLOW_MISSING }, - { efCPT, "-cpo", nullptr, ffOPTWR }, - { efSTO, "-c", "confout", ffWRITE }, - { efEDR, "-e", "ener", ffWRITE }, - { efLOG, "-g", "md", ffWRITE }, - { efXVG, "-dhdl", "dhdl", ffOPTWR }, - { efXVG, "-field", "field", ffOPTWR }, - { efXVG, "-table", "table", ffOPTRD }, - { efXVG, "-tablep", "tablep", ffOPTRD }, - { efXVG, "-tableb", "table", ffOPTRDMULT }, - { efTRX, "-rerun", "rerun", ffOPTRD }, - { efXVG, "-tpi", "tpi", ffOPTWR }, - { efXVG, "-tpid", "tpidist", ffOPTWR }, - { efEDI, "-ei", "sam", ffOPTRD }, - { efXVG, "-eo", "edsam", ffOPTWR }, - { efXVG, "-px", "pullx", ffOPTWR }, - { efXVG, "-pf", "pullf", ffOPTWR }, - { efXVG, "-ro", "rotation", ffOPTWR }, - { efLOG, "-ra", "rotangles", ffOPTWR }, - { efLOG, "-rs", "rotslabs", ffOPTWR }, - { efLOG, "-rt", "rottorque", ffOPTWR }, - { efMTX, "-mtx", "nm", ffOPTWR }, - { efRND, "-multidir", nullptr, ffOPTRDMULT }, - { efXVG, "-awh", "awhinit", ffOPTRD }, - { efDAT, "-membed", "membed", ffOPTRD }, - { efTOP, "-mp", "membed", ffOPTRD }, - { efNDX, "-mn", "membed", ffOPTRD }, - { efXVG, "-if", "imdforces", ffOPTWR }, - { efXVG, "-swap", "swapions", ffOPTWR } } }; - - //! Print a warning if any force is larger than this (in kJ/mol nm). - real pforce = -1; - - //! The value of the -append option - bool appendOption = true; - - /*! \brief Output context for writing text files - * - * \todo Clarify initialization, ownership, and lifetime. */ - gmx_output_env_t* oenv = nullptr; - - /*! \brief Command line options, defaults, docs and storage for them to fill. */ - /*! \{ */ - rvec realddxyz = { 0, 0, 0 }; - const char* ddrank_opt_choices[static_cast(DdRankOrder::Count) + 1] = { - nullptr, "interleave", "pp_pme", "cartesian", nullptr - }; - const char* dddlb_opt_choices[static_cast(DlbOption::Count) + 1] = { nullptr, "auto", "no", - "yes", nullptr }; - const char* thread_aff_opt_choices[static_cast(ThreadAffinity::Count) + 1] = { - nullptr, "auto", "on", "off", nullptr - }; - const char* nbpu_opt_choices[5] = { nullptr, "auto", "cpu", "gpu", nullptr }; - const char* pme_opt_choices[5] = { nullptr, "auto", "cpu", "gpu", nullptr }; - const char* pme_fft_opt_choices[5] = { nullptr, "auto", "cpu", "gpu", nullptr }; - const char* bonded_opt_choices[5] = { nullptr, "auto", "cpu", "gpu", nullptr }; - const char* update_opt_choices[5] = { nullptr, "auto", "cpu", "gpu", nullptr }; - const char* gpuIdsAvailable = ""; - const char* userGpuTaskAssignment = ""; - - - ImdOptions& imdOptions = mdrunOptions.imdOptions; - - t_pargs pa[48] = { - - { "-dd", FALSE, etRVEC, { &realddxyz }, "Domain decomposition grid, 0 is optimize" }, - { "-ddorder", FALSE, etENUM, { ddrank_opt_choices }, "DD rank order" }, - { "-npme", - FALSE, - etINT, - { &domdecOptions.numPmeRanks }, - "Number of separate ranks to be used for PME, -1 is guess" }, - { "-nt", - FALSE, - etINT, - { &hw_opt.nthreads_tot }, - "Total number of threads to start (0 is guess)" }, - { "-ntmpi", - FALSE, - etINT, - { &hw_opt.nthreads_tmpi }, - "Number of thread-MPI ranks to start (0 is guess)" }, - { "-ntomp", - FALSE, - etINT, - { &hw_opt.nthreads_omp }, - "Number of OpenMP threads per MPI rank to start (0 is guess)" }, - { "-ntomp_pme", - FALSE, - etINT, - { &hw_opt.nthreads_omp_pme }, - "Number of OpenMP threads per MPI rank to start (0 is -ntomp)" }, - { "-pin", - FALSE, - etENUM, - { thread_aff_opt_choices }, - "Whether mdrun should try to set thread affinities" }, - { "-pinoffset", - FALSE, - etINT, - { &hw_opt.core_pinning_offset }, - "The lowest logical core number to which mdrun should pin the first thread" }, - { "-pinstride", - FALSE, - etINT, - { &hw_opt.core_pinning_stride }, - "Pinning distance in logical cores for threads, use 0 to minimize the number of threads " - "per physical core" }, - { "-gpu_id", - FALSE, - etSTR, - { &gpuIdsAvailable }, - "List of unique GPU device IDs available to use" }, - { "-gputasks", - FALSE, - etSTR, - { &userGpuTaskAssignment }, - "List of GPU device IDs, mapping each PP task on each node to a device" }, - { "-ddcheck", - FALSE, - etBOOL, - { &domdecOptions.checkBondedInteractions }, - "Check for all bonded interactions with DD" }, - { "-ddbondcomm", - FALSE, - etBOOL, - { &domdecOptions.useBondedCommunication }, - "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" }, - { "-rdd", - FALSE, - etREAL, - { &domdecOptions.minimumCommunicationRange }, - "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial " - "coordinates" }, - { "-rcon", - FALSE, - etREAL, - { &domdecOptions.constraintCommunicationRange }, - "Maximum distance for P-LINCS (nm), 0 is estimate" }, - { "-dlb", FALSE, etENUM, { dddlb_opt_choices }, "Dynamic load balancing (with DD)" }, - { "-dds", - FALSE, - etREAL, - { &domdecOptions.dlbScaling }, - "Fraction in (0,1) by whose reciprocal the initial DD cell size will be increased in " - "order to " - "provide a margin in which dynamic load balancing can act while preserving the minimum " - "cell size." }, - { "-ddcsx", - FALSE, - etSTR, - { &domdecOptions.cellSizeX }, - "HIDDENA string containing a vector of the relative sizes in the x " - "direction of the corresponding DD cells. Only effective with static " - "load balancing." }, - { "-ddcsy", - FALSE, - etSTR, - { &domdecOptions.cellSizeY }, - "HIDDENA string containing a vector of the relative sizes in the y " - "direction of the corresponding DD cells. Only effective with static " - "load balancing." }, - { "-ddcsz", - FALSE, - etSTR, - { &domdecOptions.cellSizeZ }, - "HIDDENA string containing a vector of the relative sizes in the z " - "direction of the corresponding DD cells. Only effective with static " - "load balancing." }, - { "-nb", FALSE, etENUM, { nbpu_opt_choices }, "Calculate non-bonded interactions on" }, - { "-nstlist", - FALSE, - etINT, - { &nstlist_cmdline }, - "Set nstlist when using a Verlet buffer tolerance (0 is guess)" }, - { "-tunepme", - FALSE, - etBOOL, - { &mdrunOptions.tunePme }, - "Optimize PME load between PP/PME ranks or GPU/CPU" }, - { "-pme", FALSE, etENUM, { pme_opt_choices }, "Perform PME calculations on" }, - { "-pmefft", FALSE, etENUM, { pme_fft_opt_choices }, "Perform PME FFT calculations on" }, - { "-bonded", FALSE, etENUM, { bonded_opt_choices }, "Perform bonded calculations on" }, - { "-update", FALSE, etENUM, { update_opt_choices }, "Perform update and constraints on" }, - { "-v", FALSE, etBOOL, { &mdrunOptions.verbose }, "Be loud and noisy" }, - { "-pforce", FALSE, etREAL, { &pforce }, "Print all forces larger than this (kJ/mol nm)" }, - { "-reprod", - FALSE, - etBOOL, - { &mdrunOptions.reproducible }, - "Try to avoid optimizations that affect binary reproducibility" }, - { "-cpt", - FALSE, - etREAL, - { &mdrunOptions.checkpointOptions.period }, - "Checkpoint interval (minutes)" }, - { "-cpnum", - FALSE, - etBOOL, - { &mdrunOptions.checkpointOptions.keepAndNumberCheckpointFiles }, - "Keep and number checkpoint files" }, - { "-append", - FALSE, - etBOOL, - { &appendOption }, - "Append to previous output files when continuing from checkpoint instead of adding the " - "simulation part number to all file names" }, - { "-nsteps", - FALSE, - etINT64, - { &mdrunOptions.numStepsCommandline }, - "Run this number of steps (-1 means infinite, -2 means use mdp option, smaller is " - "invalid)" }, - { "-maxh", - FALSE, - etREAL, - { &mdrunOptions.maximumHoursToRun }, - "Terminate after 0.99 times this time (hours)" }, - { "-replex", - FALSE, - etINT, - { &replExParams.exchangeInterval }, - "Attempt replica exchange periodically with this period (steps)" }, - { "-nex", - FALSE, - etINT, - { &replExParams.numExchanges }, - "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion). " - " -nex zero or not specified gives neighbor replica exchange." }, - { "-reseed", - FALSE, - etINT, - { &replExParams.randomSeed }, - "Seed for replica exchange, -1 is generate a seed" }, - { "-imdport", FALSE, etINT, { &imdOptions.port }, "HIDDENIMD listening port" }, - { "-imdwait", - FALSE, - etBOOL, - { &imdOptions.wait }, - "HIDDENPause the simulation while no IMD client is connected" }, - { "-imdterm", - FALSE, - etBOOL, - { &imdOptions.terminatable }, - "HIDDENAllow termination of the simulation from IMD client" }, - { "-imdpull", - FALSE, - etBOOL, - { &imdOptions.pull }, - "HIDDENAllow pulling in the simulation from IMD client" }, - { "-rerunvsite", - FALSE, - etBOOL, - { &mdrunOptions.rerunConstructVsites }, - "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" }, - { "-confout", - FALSE, - etBOOL, - { &mdrunOptions.writeConfout }, - "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last " - "step" }, - { "-stepout", - FALSE, - etINT, - { &mdrunOptions.verboseStepPrintInterval }, - "HIDDENFrequency of writing the remaining wall clock time for the run" }, - { "-resetstep", - FALSE, - etINT, - { &mdrunOptions.timingOptions.resetStep }, - "HIDDENReset cycle counters after these many time steps" }, - { "-resethway", - FALSE, - etBOOL, - { &mdrunOptions.timingOptions.resetHalfway }, - "HIDDENReset the cycle counters after half the number of steps or halfway " - "[TT]-maxh[tt]" } - }; - /*! \} */ - - //! Parses the command-line input and prepares to start mdrun. - int updateFromCommandLine(int argc, char** argv, ArrayRef desc); - - ~LegacyMdrunOptions(); -}; - -} // end namespace gmx - -#endif diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/md.cpp b/patches/gromacs-2021.7.diff/src/gromacs/mdrun/md.cpp deleted file mode 100644 index 5f27c78570..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/md.cpp +++ /dev/null @@ -1,1936 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2011-2019,2020,2021, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/*! \internal \file - * - * \brief Implements the integrator for normal molecular dynamics simulations - * - * \author David van der Spoel - * \ingroup module_mdrun - */ -#include "gmxpre.h" - -#include -#include -#include -#include - -#include -#include -#include - -#include "gromacs/applied_forces/awh/awh.h" -#include "gromacs/commandline/filenm.h" -#include "gromacs/domdec/collect.h" -#include "gromacs/domdec/dlbtiming.h" -#include "gromacs/domdec/domdec.h" -#include "gromacs/domdec/domdec_network.h" -#include "gromacs/domdec/domdec_struct.h" -#include "gromacs/domdec/gpuhaloexchange.h" -#include "gromacs/domdec/mdsetup.h" -#include "gromacs/domdec/partition.h" -#include "gromacs/essentialdynamics/edsam.h" -#include "gromacs/ewald/pme_load_balancing.h" -#include "gromacs/ewald/pme_pp.h" -#include "gromacs/fileio/trxio.h" -#include "gromacs/gmxlib/network.h" -#include "gromacs/gmxlib/nrnb.h" -#include "gromacs/gpu_utils/device_stream_manager.h" -#include "gromacs/gpu_utils/gpu_utils.h" -#include "gromacs/math/units.h" -#include "gromacs/imd/imd.h" -#include "gromacs/listed_forces/listed_forces.h" -#include "gromacs/math/functions.h" -#include "gromacs/math/invertmatrix.h" -#include "gromacs/math/vec.h" -#include "gromacs/math/vectypes.h" -#include "gromacs/mdlib/checkpointhandler.h" -#include "gromacs/mdlib/compute_io.h" -#include "gromacs/mdlib/constr.h" -#include "gromacs/mdlib/coupling.h" -#include "gromacs/mdlib/ebin.h" -#include "gromacs/mdlib/enerdata_utils.h" -#include "gromacs/mdlib/energyoutput.h" -#include "gromacs/mdlib/expanded.h" -#include "gromacs/mdlib/force.h" -#include "gromacs/mdlib/force_flags.h" -#include "gromacs/mdlib/forcerec.h" -#include "gromacs/mdlib/freeenergyparameters.h" -#include "gromacs/mdlib/md_support.h" -#include "gromacs/mdlib/mdatoms.h" -#include "gromacs/mdlib/mdoutf.h" -#include "gromacs/mdlib/membed.h" -#include "gromacs/mdlib/resethandler.h" -#include "gromacs/mdlib/sighandler.h" -#include "gromacs/mdlib/simulationsignal.h" -#include "gromacs/mdlib/stat.h" -#include "gromacs/mdlib/stophandler.h" -#include "gromacs/mdlib/tgroup.h" -#include "gromacs/mdlib/trajectory_writing.h" -#include "gromacs/mdlib/update.h" -#include "gromacs/mdlib/update_constrain_gpu.h" -#include "gromacs/mdlib/vcm.h" -#include "gromacs/mdlib/vsite.h" -#include "gromacs/mdrunutility/handlerestart.h" -#include "gromacs/mdrunutility/multisim.h" -#include "gromacs/mdrunutility/printtime.h" -#include "gromacs/mdtypes/awh_history.h" -#include "gromacs/mdtypes/awh_params.h" -#include "gromacs/mdtypes/commrec.h" -#include "gromacs/mdtypes/df_history.h" -#include "gromacs/mdtypes/energyhistory.h" -#include "gromacs/mdtypes/fcdata.h" -#include "gromacs/mdtypes/forcebuffers.h" -#include "gromacs/mdtypes/forcerec.h" -#include "gromacs/mdtypes/group.h" -#include "gromacs/mdtypes/inputrec.h" -#include "gromacs/mdtypes/interaction_const.h" -#include "gromacs/mdtypes/md_enums.h" -#include "gromacs/mdtypes/mdatom.h" -#include "gromacs/mdtypes/mdrunoptions.h" -#include "gromacs/mdtypes/multipletimestepping.h" -#include "gromacs/mdtypes/observableshistory.h" -#include "gromacs/mdtypes/pullhistory.h" -#include "gromacs/mdtypes/simulation_workload.h" -#include "gromacs/mdtypes/state.h" -#include "gromacs/mdtypes/state_propagator_data_gpu.h" -#include "gromacs/modularsimulator/energydata.h" -#include "gromacs/nbnxm/gpu_data_mgmt.h" -#include "gromacs/nbnxm/nbnxm.h" -#include "gromacs/pbcutil/pbc.h" -#include "gromacs/pulling/output.h" -#include "gromacs/pulling/pull.h" -#include "gromacs/swap/swapcoords.h" -#include "gromacs/timing/wallcycle.h" -#include "gromacs/timing/walltime_accounting.h" -#include "gromacs/topology/atoms.h" -#include "gromacs/topology/idef.h" -#include "gromacs/topology/mtop_util.h" -#include "gromacs/topology/topology.h" -#include "gromacs/trajectory/trajectoryframe.h" -#include "gromacs/utility/basedefinitions.h" -#include "gromacs/utility/cstringutil.h" -#include "gromacs/utility/fatalerror.h" -#include "gromacs/utility/logger.h" -#include "gromacs/utility/real.h" -#include "gromacs/utility/smalloc.h" - -#include "legacysimulator.h" -#include "replicaexchange.h" -#include "shellfc.h" - -/* PLUMED */ -#include "../../../Plumed.h" -extern int plumedswitch; -extern plumed plumedmain; -/* END PLUMED */ - -/* PLUMED HREX */ -extern int plumed_hrex; -/* END PLUMED HREX */ - -using gmx::SimulationSignaller; - -void gmx::LegacySimulator::do_md() -{ - // TODO Historically, the EM and MD "integrators" used different - // names for the t_inputrec *parameter, but these must have the - // same name, now that it's a member of a struct. We use this ir - // alias to avoid a large ripple of nearly useless changes. - // t_inputrec is being replaced by IMdpOptionsProvider, so this - // will go away eventually. - t_inputrec* ir = inputrec; - int64_t step, step_rel; - double t, t0 = ir->init_t; - gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner; - gmx_bool bNS = FALSE, bNStList, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE; - gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE; - gmx_bool do_ene, do_log, do_verbose; - gmx_bool bMasterState; - unsigned int force_flags; - tensor force_vir = { { 0 } }, shake_vir = { { 0 } }, total_vir = { { 0 } }, pres = { { 0 } }; - int i, m; - rvec mu_tot; - matrix pressureCouplingMu, M; - gmx_repl_ex_t repl_ex = nullptr; - gmx_global_stat_t gstat; - gmx_shellfc_t* shellfc; - gmx_bool bSumEkinhOld, bDoReplEx, bDoReplExPrev, bExchanged, bNeedRepartition; - gmx_bool bTemp, bPres, bTrotter; - real dvdl_constr; - std::vector cbuf; - matrix lastbox; - int lamnew = 0; - /* for FEP */ - int nstfep = 0; - double cycles; - real saved_conserved_quantity = 0; - real last_ekin = 0; - t_extmass MassQ; - char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE]; - - /* PME load balancing data for GPU kernels */ - gmx_bool bPMETune = FALSE; - gmx_bool bPMETunePrinting = FALSE; - - bool bInteractiveMDstep = false; - - /* PLUMED */ - int plumedNeedsEnergy=0; - int plumedWantsToStop=0; - matrix plumed_vir; - real lambdaForce=0; - real realFepState=0; - /* END PLUMED */ - - /* Domain decomposition could incorrectly miss a bonded - interaction, but checking for that requires a global - communication stage, which does not otherwise happen in DD - code. So we do that alongside the first global energy reduction - after a new DD is made. These variables handle whether the - check happens, and the result it returns. */ - bool shouldCheckNumberOfBondedInteractions = false; - int totalNumberOfBondedInteractions = -1; - - SimulationSignals signals; - // Most global communnication stages don't propagate mdrun - // signals, and will use this object to achieve that. - SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false); - - if (!mdrunOptions.writeConfout) - { - // This is on by default, and the main known use case for - // turning it off is for convenience in benchmarking, which is - // something that should not show up in the general user - // interface. - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "The -noconfout functionality is deprecated, and may be removed in a " - "future version."); - } - - /* md-vv uses averaged full step velocities for T-control - md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control) - md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */ - bTrotter = (EI_VV(ir->eI) - && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir))); - - const bool bRerunMD = false; - - int nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr); - bGStatEveryStep = (nstglobalcomm == 1); - - const SimulationGroups* groups = &top_global->groups; - - std::unique_ptr ed = nullptr; - if (opt2bSet("-ei", nfile, fnm)) - { - /* Initialize essential dynamics sampling */ - ed = init_edsam(mdlog, opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm), top_global, - ir, cr, constr, state_global, observablesHistory, oenv, startingBehavior); - } - else if (observablesHistory->edsamHistory) - { - gmx_fatal(FARGS, - "The checkpoint is from a run with essential dynamics sampling, " - "but the current run did not specify the -ei option. " - "Either specify the -ei option to mdrun, or do not use this checkpoint file."); - } - - int* fep_state = MASTER(cr) ? &state_global->fep_state : nullptr; - gmx::ArrayRef lambda = MASTER(cr) ? state_global->lambda : gmx::ArrayRef(); - initialize_lambdas(fplog, *ir, MASTER(cr), fep_state, lambda); - Update upd(*ir, deform); - const bool doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd); - const bool useReplicaExchange = (replExParams.exchangeInterval > 0); - - const t_fcdata& fcdata = *fr->fcdata; - - bool simulationsShareState = false; - int nstSignalComm = nstglobalcomm; - { - // TODO This implementation of ensemble orientation restraints is nasty because - // a user can't just do multi-sim with single-sim orientation restraints. - bool usingEnsembleRestraints = - (fcdata.disres->nsystems > 1) || ((ms != nullptr) && (fcdata.orires->nr != 0)); - bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr)); - - // Replica exchange, ensemble restraints and AWH need all - // simulations to remain synchronized, so they need - // checkpoints and stop conditions to act on the same step, so - // the propagation of such signals must take place between - // simulations, not just within simulations. - // TODO: Make algorithm initializers set these flags. - simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim || (plumedswitch && ms); // PLUMED hack, if we have multiple sim and plumed we usually want them to be in sync - - if (simulationsShareState) - { - // Inter-simulation signal communication does not need to happen - // often, so we use a minimum of 200 steps to reduce overhead. - const int c_minimumInterSimulationSignallingInterval = 200; - nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1) / nstglobalcomm) - * nstglobalcomm; - } - } - - if (startingBehavior != StartingBehavior::RestartWithAppending) - { - pleaseCiteCouplingAlgorithms(fplog, *ir); - } - gmx_mdoutf* outf = - init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, mdModulesNotifier, ir, - top_global, oenv, wcycle, startingBehavior, simulationsShareState, ms); - gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work, - mdoutf_get_fp_dhdl(outf), false, startingBehavior, - simulationsShareState, mdModulesNotifier); - - gstat = global_stat_init(ir); - - const auto& simulationWork = runScheduleWork->simulationWork; - const bool useGpuForPme = simulationWork.useGpuPme; - const bool useGpuForNonbonded = simulationWork.useGpuNonbonded; - const bool useGpuForBufferOps = simulationWork.useGpuBufferOps; - const bool useGpuForUpdate = simulationWork.useGpuUpdate; - - /* Check for polarizable models and flexible constraints */ - shellfc = init_shell_flexcon(fplog, top_global, constr ? constr->numFlexibleConstraints() : 0, - ir->nstcalcenergy, DOMAINDECOMP(cr), useGpuForPme); - - { - double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1); - if ((io > 2000) && MASTER(cr)) - { - fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io); - } - } - - // Local state only becomes valid now. - std::unique_ptr stateInstance; - t_state* state; - - gmx_localtop_t top(top_global->ffparams); - - auto mdatoms = mdAtoms->mdatoms(); - - ForceBuffers f(fr->useMts, ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate) - ? PinningPolicy::PinnedIfSupported - : PinningPolicy::CannotBePinned); - if (DOMAINDECOMP(cr)) - { - stateInstance = std::make_unique(); - state = stateInstance.get(); - dd_init_local_state(cr->dd, state_global, state); - - /* Distribute the charge groups over the nodes from the master node */ - dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, state_global, *top_global, ir, - imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr, - nrnb, nullptr, FALSE); - shouldCheckNumberOfBondedInteractions = true; - upd.setNumAtoms(state->natoms); - } - else - { - state_change_natoms(state_global, state_global->natoms); - /* Copy the pointer to the global state */ - state = state_global; - - /* Generate and initialize new topology */ - mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr, &f, mdAtoms, constr, vsite, shellfc); - - upd.setNumAtoms(state->natoms); - } - - std::unique_ptr integrator; - - StatePropagatorDataGpu* stateGpu = fr->stateGpu; - - // TODO: the assertions below should be handled by UpdateConstraintsBuilder. - if (useGpuForUpdate) - { - GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr - || constr->numConstraintsTotal() == 0, - "Constraints in domain decomposition are only supported with update " - "groups if using GPU update.\n"); - GMX_RELEASE_ASSERT(ir->eConstrAlg != econtSHAKE || constr == nullptr - || constr->numConstraintsTotal() == 0, - "SHAKE is not supported with GPU update."); - GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps), - "Either PME or short-ranged non-bonded interaction tasks must run on " - "the GPU to use GPU update.\n"); - GMX_RELEASE_ASSERT(ir->eI == eiMD, - "Only the md integrator is supported with the GPU update.\n"); - GMX_RELEASE_ASSERT( - ir->etc != etcNOSEHOOVER, - "Nose-Hoover temperature coupling is not supported with the GPU update.\n"); - GMX_RELEASE_ASSERT( - ir->epc == epcNO || ir->epc == epcPARRINELLORAHMAN || ir->epc == epcBERENDSEN - || ir->epc == epcCRESCALE, - "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are supported " - "with the GPU update.\n"); - GMX_RELEASE_ASSERT(!mdatoms->haveVsites, - "Virtual sites are not supported with the GPU update.\n"); - GMX_RELEASE_ASSERT(ed == nullptr, - "Essential dynamics is not supported with the GPU update.\n"); - GMX_RELEASE_ASSERT(!ir->bPull || !pull_have_constraint(*ir->pull), - "Constraints pulling is not supported with the GPU update.\n"); - GMX_RELEASE_ASSERT(fcdata.orires->nr == 0, - "Orientation restraints are not supported with the GPU update.\n"); - GMX_RELEASE_ASSERT( - ir->efep == efepNO - || (!haveFepPerturbedMasses(*top_global) && !havePerturbedConstraints(*top_global)), - "Free energy perturbation of masses and constraints are not supported with the GPU " - "update."); - - if (constr != nullptr && constr->numConstraintsTotal() > 0) - { - GMX_LOG(mdlog.info) - .asParagraph() - .appendText("Updating coordinates and applying constraints on the GPU."); - } - else - { - GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU."); - } - GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr, - "Device stream manager should be initialized in order to use GPU " - "update-constraints."); - GMX_RELEASE_ASSERT( - fr->deviceStreamManager->streamIsValid(gmx::DeviceStreamType::UpdateAndConstraints), - "Update stream should be initialized in order to use GPU " - "update-constraints."); - integrator = std::make_unique( - *ir, *top_global, fr->deviceStreamManager->context(), - fr->deviceStreamManager->stream(gmx::DeviceStreamType::UpdateAndConstraints), - stateGpu->xUpdatedOnDevice(), wcycle); - - integrator->setPbc(PbcType::Xyz, state->box); - } - - if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate) - { - changePinningPolicy(&state->x, PinningPolicy::PinnedIfSupported); - } - if (useGpuForUpdate) - { - changePinningPolicy(&state->v, PinningPolicy::PinnedIfSupported); - } - - // NOTE: The global state is no longer used at this point. - // But state_global is still used as temporary storage space for writing - // the global state to file and potentially for replica exchange. - // (Global topology should persist.) - - update_mdatoms(mdatoms, state->lambda[efptMASS]); - - if (ir->bExpanded) - { - /* Check nstexpanded here, because the grompp check was broken */ - if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0) - { - gmx_fatal(FARGS, - "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy"); - } - init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation, ir, state->dfhist, mdlog); - } - - if (MASTER(cr)) - { - EnergyData::initializeEnergyHistory(startingBehavior, observablesHistory, &energyOutput); - } - - preparePrevStepPullCom(ir, pull_work, mdatoms->massT, state, state_global, cr, - startingBehavior != StartingBehavior::NewSimulation); - - // TODO: Remove this by converting AWH into a ForceProvider - auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms, - startingBehavior != StartingBehavior::NewSimulation, - shellfc != nullptr, opt2fn("-awh", nfile, fnm), pull_work); - - if (useReplicaExchange && MASTER(cr)) - { - repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir, replExParams); - } - /* PME tuning is only supported in the Verlet scheme, with PME for - * Coulomb. It is not supported with only LJ PME. */ - bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) && !mdrunOptions.reproducible - && ir->cutoff_scheme != ecutsGROUP); - - pme_load_balancing_t* pme_loadbal = nullptr; - if (bPMETune) - { - pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box, *fr->ic, *fr->nbv, fr->pmedata, - fr->nbv->useGpu()); - } - - if (!ir->bContinuation) - { - if (state->flags & (1U << estV)) - { - auto v = makeArrayRef(state->v); - /* Set the velocities of vsites, shells and frozen atoms to zero */ - for (i = 0; i < mdatoms->homenr; i++) - { - if (mdatoms->ptype[i] == eptVSite || mdatoms->ptype[i] == eptShell) - { - clear_rvec(v[i]); - } - else if (mdatoms->cFREEZE) - { - for (m = 0; m < DIM; m++) - { - if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m]) - { - v[i][m] = 0; - } - } - } - } - } - - if (constr) - { - /* Constrain the initial coordinates and velocities */ - do_constrain_first(fplog, constr, ir, mdatoms->nr, mdatoms->homenr, - state->x.arrayRefWithPadding(), state->v.arrayRefWithPadding(), - state->box, state->lambda[efptBONDED]); - } - if (vsite) - { - /* Construct the virtual sites for the initial configuration */ - vsite->construct(state->x, ir->delta_t, {}, state->box); - } - } - - if (ir->efep != efepNO) - { - /* Set free energy calculation frequency as the greatest common - * denominator of nstdhdl and repl_ex_nst. */ - nstfep = ir->fepvals->nstdhdl; - if (ir->bExpanded) - { - nstfep = std::gcd(ir->expandedvals->nstexpanded, nstfep); - } - if (useReplicaExchange) - { - nstfep = std::gcd(replExParams.exchangeInterval, nstfep); - } - if (ir->bDoAwh) - { - nstfep = std::gcd(ir->awhParams->nstSampleCoord, nstfep); - } - } - - /* Be REALLY careful about what flags you set here. You CANNOT assume - * this is the first step, since we might be restarting from a checkpoint, - * and in that case we should not do any modifications to the state. - */ - bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation); - - // When restarting from a checkpoint, it can be appropriate to - // initialize ekind from quantities in the checkpoint. Otherwise, - // compute_globals must initialize ekind before the simulation - // starts/restarts. However, only the master rank knows what was - // found in the checkpoint file, so we have to communicate in - // order to coordinate the restart. - // - // TODO Consider removing this communication if/when checkpoint - // reading directly follows .tpr reading, because all ranks can - // agree on hasReadEkinState at that time. - bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false; - if (PAR(cr)) - { - gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr->mpi_comm_mygroup); - } - if (hasReadEkinState) - { - restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate); - } - - unsigned int cglo_flags = - (CGLO_TEMPERATURE | CGLO_GSTAT | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0) - | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) | (hasReadEkinState ? CGLO_READEKIN : 0)); - - bSumEkinhOld = FALSE; - - t_vcm vcm(top_global->groups, *ir); - reportComRemovalInfo(fplog, vcm); - - /* To minimize communication, compute_globals computes the COM velocity - * and the kinetic energy for the velocities without COM motion removed. - * Thus to get the kinetic energy without the COM contribution, we need - * to call compute_globals twice. - */ - for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++) - { - unsigned int cglo_flags_iteration = cglo_flags; - if (bStopCM && cgloIteration == 0) - { - cglo_flags_iteration |= CGLO_STOPCM; - cglo_flags_iteration &= ~CGLO_TEMPERATURE; - } - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, nullptr, - enerd, force_vir, shake_vir, total_vir, pres, constr, &nullSignaller, - state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld, - cglo_flags_iteration - | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS - : 0)); - if (cglo_flags_iteration & CGLO_STOPCM) - { - /* At initialization, do not pass x with acceleration-correction mode - * to avoid (incorrect) correction of the initial coordinates. - */ - auto x = (vcm.mode == ecmLINEAR_ACCELERATION_CORRECTION) ? ArrayRef() - : makeArrayRef(state->x); - process_and_stopcm_grp(fplog, &vcm, *mdatoms, x, makeArrayRef(state->v)); - inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr); - } - } - checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global, &top, - makeConstArrayRef(state->x), state->box, - &shouldCheckNumberOfBondedInteractions); - if (ir->eI == eiVVAK) - { - /* a second call to get the half step temperature initialized as well */ - /* we do the same call as above, but turn the pressure off -- internally to - compute_globals, this is recognized as a velocity verlet half-step - kinetic energy calculation. This minimized excess variables, but - perhaps loses some logic?*/ - - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, nullptr, - enerd, force_vir, shake_vir, total_vir, pres, constr, &nullSignaller, - state->box, nullptr, &bSumEkinhOld, cglo_flags & ~CGLO_PRESSURE); - } - - /* Calculate the initial half step temperature, and save the ekinh_old */ - if (startingBehavior == StartingBehavior::NewSimulation) - { - for (i = 0; (i < ir->opts.ngtc); i++) - { - copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old); - } - } - - /* need to make an initiation call to get the Trotter variables set, as well as other constants - for non-trotter temperature control */ - auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter); - - if (MASTER(cr)) - { - if (!ir->bContinuation) - { - if (constr && ir->eConstrAlg == econtLINCS) - { - fprintf(fplog, "RMS relative constraint deviation after constraining: %.2e\n", - constr->rmsd()); - } - if (EI_STATE_VELOCITY(ir->eI)) - { - real temp = enerd->term[F_TEMP]; - if (ir->eI != eiVV) - { - /* Result of Ekin averaged over velocities of -half - * and +half step, while we only have -half step here. - */ - temp *= 2; - } - fprintf(fplog, "Initial temperature: %g K\n", temp); - } - } - - char tbuf[20]; - fprintf(stderr, "starting mdrun '%s'\n", *(top_global->name)); - if (ir->nsteps >= 0) - { - sprintf(tbuf, "%8.1f", (ir->init_step + ir->nsteps) * ir->delta_t); - } - else - { - sprintf(tbuf, "%s", "infinite"); - } - if (ir->init_step > 0) - { - fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n", - gmx_step_str(ir->init_step + ir->nsteps, sbuf), tbuf, - gmx_step_str(ir->init_step, sbuf2), ir->init_step * ir->delta_t); - } - else - { - fprintf(stderr, "%s steps, %s ps.\n", gmx_step_str(ir->nsteps, sbuf), tbuf); - } - fprintf(fplog, "\n"); - } - - /* PLUMED */ - if(plumedswitch){ - /* detect plumed API version */ - int pversion=0; - plumed_cmd(plumedmain,"getApiVersion",&pversion); - /* setting kbT is only implemented with api>1) */ - real kbT=ir->opts.ref_t[0]*BOLTZ; - if(pversion>1) plumed_cmd(plumedmain,"setKbT",&kbT); - if(pversion>2){ - int res=1; - if( (startingBehavior != StartingBehavior::NewSimulation) ) plumed_cmd(plumedmain,"setRestart",&res); - } - - if(ms && ms->numSimulations_>1) { - if(MASTER(cr)) plumed_cmd(plumedmain,"GREX setMPIIntercomm",&ms->mastersComm_); - if(PAR(cr)){ - if(DOMAINDECOMP(cr)) { - plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all); - }else{ - plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim); - } - } - plumed_cmd(plumedmain,"GREX init",nullptr); - } - if(PAR(cr)){ - if(DOMAINDECOMP(cr)) { - plumed_cmd(plumedmain,"setMPIComm",&cr->dd->mpi_comm_all); - } - } - plumed_cmd(plumedmain,"setNatoms",&top_global->natoms); - plumed_cmd(plumedmain,"setMDEngine","gromacs"); - plumed_cmd(plumedmain,"setLog",fplog); - real real_delta_t=ir->delta_t; - plumed_cmd(plumedmain,"setTimestep",&real_delta_t); - plumed_cmd(plumedmain,"init",nullptr); - - if(PAR(cr)){ - if(DOMAINDECOMP(cr)) { - int nat_home = dd_numHomeAtoms(*cr->dd); - plumed_cmd(plumedmain,"setAtomsNlocal",&nat_home); - plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data()); - } - } - realFepState = state->fep_state; - plumed_cmd(plumedmain, "setExtraCV lambda", &realFepState); - plumed_cmd(plumedmain, "setExtraCVForce lambda", &lambdaForce); - } - /* END PLUMED */ - - walltime_accounting_start_time(walltime_accounting); - wallcycle_start(wcycle, ewcRUN); - print_start(fplog, cr, walltime_accounting, "mdrun"); - - /*********************************************************** - * - * Loop over MD steps - * - ************************************************************/ - - bFirstStep = TRUE; - /* Skip the first Nose-Hoover integration when we get the state from tpx */ - bInitStep = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI); - bSumEkinhOld = FALSE; - bExchanged = FALSE; - bNeedRepartition = FALSE; - bDoReplEx = FALSE; - - step = ir->init_step; - step_rel = 0; - - auto stopHandler = stopHandlerBuilder->getStopHandlerMD( - compat::not_null(&signals[eglsSTOPCOND]), simulationsShareState, - MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm, - mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting); - - auto checkpointHandler = std::make_unique( - compat::make_not_null(&signals[eglsCHKPT]), simulationsShareState, - ir->nstlist == 0, MASTER(cr), mdrunOptions.writeConfout, - mdrunOptions.checkpointOptions.period); - - const bool resetCountersIsLocal = true; - auto resetHandler = std::make_unique( - compat::make_not_null(&signals[eglsRESETCOUNTERS]), - !resetCountersIsLocal, ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway, - mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting); - - const DDBalanceRegionHandler ddBalanceRegionHandler(cr); - - if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange) - { - logInitialMultisimStatus(ms, cr, mdlog, simulationsShareState, ir->nsteps, ir->init_step); - } - - /* and stop now if we should */ - bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps)); - while (!bLastStep) - { - - /* Determine if this is a neighbor search step */ - bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0); - - if (bPMETune && bNStList) - { - // This has to be here because PME load balancing is called so early. - // TODO: Move to after all booleans are defined. - if (useGpuForUpdate && !bFirstStep) - { - stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local); - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - /* PME grid + cut-off optimization with GPUs or PME nodes */ - pme_loadbal_do(pme_loadbal, cr, (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr, - fplog, mdlog, *ir, fr, state->box, state->x, wcycle, step, step_rel, - &bPMETunePrinting, simulationWork.useGpuPmePpCommunication); - } - - wallcycle_start(wcycle, ewcSTEP); - - bLastStep = (step_rel == ir->nsteps); - t = t0 + step * ir->delta_t; - - // TODO Refactor this, so that nstfep does not need a default value of zero - if (ir->efep != efepNO || ir->bSimTemp) - { - /* find and set the current lambdas */ - state->lambda = currentLambdas(step, *(ir->fepvals), state->fep_state); - - bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl); - bDoFEP = ((ir->efep != efepNO) && do_per_step(step, nstfep)); - bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded) - && (!bFirstStep)); - } - - bDoReplExPrev = bDoReplEx; - bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep - && do_per_step(step, replExParams.exchangeInterval)); - - if (doSimulatedAnnealing) - { - update_annealing_target_temp(ir, t, &upd); - } - - /* Stop Center of Mass motion */ - bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm)); - - /* Determine whether or not to do Neighbour Searching */ - bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition); - - /* Note that the stopHandler will cause termination at nstglobalcomm - * steps. Since this concides with nstcalcenergy, nsttcouple and/or - * nstpcouple steps, we have computed the half-step kinetic energy - * of the previous step and can always output energies at the last step. - */ - bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS); - - /* do_log triggers energy and virial calculation. Because this leads - * to different code paths, forces can be different. Thus for exact - * continuation we should avoid extra log output. - * Note that the || bLastStep can result in non-exact continuation - * beyond the last step. But we don't consider that to be an issue. - */ - do_log = (do_per_step(step, ir->nstlog) - || (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) || bLastStep); - do_verbose = mdrunOptions.verbose - && (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep); - - if (useGpuForUpdate && !bFirstStep && bNS) - { - // Copy velocities from the GPU on search steps to keep a copy on host (device buffers are reinitialized). - stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local); - stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local); - // Copy coordinate from the GPU when needed at the search step. - // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils. - // NOTE: If the coordinates are to be written into output file they are also copied separately before the output. - stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local); - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - - if (bNS && !(bFirstStep && ir->bContinuation)) - { - bMasterState = FALSE; - /* Correct the new box if it is too skewed */ - if (inputrecDynamicBox(ir)) - { - if (correct_box(fplog, step, state->box)) - { - bMasterState = TRUE; - // If update is offloaded, it should be informed about the box size change - if (useGpuForUpdate) - { - integrator->setPbc(PbcType::Xyz, state->box); - } - } - } - if (DOMAINDECOMP(cr) && bMasterState) - { - dd_collect_state(cr->dd, state, state_global); - } - - if (DOMAINDECOMP(cr)) - { - /* Repartition the domain decomposition */ - dd_partition_system(fplog, mdlog, step, cr, bMasterState, nstglobalcomm, state_global, - *top_global, ir, imdSession, pull_work, state, &f, mdAtoms, &top, - fr, vsite, constr, nrnb, wcycle, do_verbose && !bPMETunePrinting); - shouldCheckNumberOfBondedInteractions = true; - upd.setNumAtoms(state->natoms); - - /* PLUMED */ - if(plumedswitch){ - int nat_home = dd_numHomeAtoms(*cr->dd); - plumed_cmd(plumedmain,"setAtomsNlocal",&nat_home); - plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data()); - } - /* END PLUMED */ - } - } - - // Allocate or re-size GPU halo exchange object, if necessary - if (bNS && havePPDomainDecomposition(cr) && simulationWork.useGpuHaloExchange) - { - GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr, - "GPU device manager has to be initialized to use GPU " - "version of halo exchange."); - constructGpuHaloExchange(mdlog, *cr, *fr->deviceStreamManager, wcycle); - } - - if (MASTER(cr) && do_log) - { - gmx::EnergyOutput::printHeader(fplog, step, - t); /* can we improve the information printed here? */ - } - - if (ir->efep != efepNO) - { - update_mdatoms(mdatoms, state->lambda[efptMASS]); - } - - if (bExchanged) - { - - /* We need the kinetic energy at minus the half step for determining - * the full step kinetic energy and possibly for T-coupling.*/ - /* This may not be quite working correctly yet . . . . */ - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, wcycle, - enerd, nullptr, nullptr, nullptr, nullptr, constr, &nullSignaller, - state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld, - CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS); - checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global, - &top, makeConstArrayRef(state->x), state->box, - &shouldCheckNumberOfBondedInteractions); - } - clear_mat(force_vir); - - /* PLUMED HREX */ - gmx_bool bHREX = bDoReplEx && plumed_hrex; - - if (plumedswitch && bHREX) { - // gmx_enerdata_t *hrex_enerd; - int nlambda = enerd->foreignLambdaTerms.numLambdas(); - gmx_enerdata_t hrex_enerd(enerd->grpp.nener, nlambda == 0 ? 0 : nlambda - 1); - int repl = -1; - int nrepl = -1; - if (MASTER(cr)){ - repl = replica_exchange_get_repl(repl_ex); - nrepl = replica_exchange_get_nrepl(repl_ex); - } - - if (DOMAINDECOMP(cr)) { - dd_collect_state(cr->dd,state,state_global); - } else { - copy_state_serial(state, state_global); - } - - if(MASTER(cr)){ - if(repl%2==step/replExParams.exchangeInterval%2){ - if(repl-1>=0) exchange_state(ms,repl-1,state_global); - }else{ - if(repl+1box, state->x.arrayRefWithPadding(), &state->hist, - &f.view(), force_vir, mdatoms, &hrex_enerd, state->lambda, - fr, runScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr, - GMX_FORCE_STATECHANGED | - GMX_FORCE_DYNAMICBOX | - GMX_FORCE_ALLFORCES | - GMX_FORCE_VIRIAL | - GMX_FORCE_ENERGY | - GMX_FORCE_DHDL | - GMX_FORCE_NS, - ddBalanceRegionHandler); - - plumed_cmd(plumedmain,"GREX cacheLocalUSwap",&(&hrex_enerd)->term[F_EPOT]); - - /* exchange back */ - if (DOMAINDECOMP(cr)) { - dd_collect_state(cr->dd,state,state_global); - } else { - copy_state_serial(state, state_global); - } - - if(MASTER(cr)){ - if(repl%2==step/replExParams.exchangeInterval%2){ - if(repl-1>=0) exchange_state(ms,repl-1,state_global); - }else{ - if(repl+1dd); - plumed_cmd(plumedmain,"setAtomsNlocal",&nat_home); - plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data()); - } - } - bNS=true; - } - /* END PLUMED HREX */ - - checkpointHandler->decideIfCheckpointingThisStep(bNS||bDoReplExPrev, bFirstStep, bLastStep); - - /* Determine the energy and pressure: - * at nstcalcenergy steps and at energy output steps (set below). - */ - if (EI_VV(ir->eI) && (!bInitStep)) - { - bCalcEnerStep = do_per_step(step, ir->nstcalcenergy); - bCalcVir = bCalcEnerStep - || (ir->epc != epcNO - && (do_per_step(step, ir->nstpcouple) || do_per_step(step - 1, ir->nstpcouple))); - } - else - { - bCalcEnerStep = do_per_step(step, ir->nstcalcenergy); - bCalcVir = bCalcEnerStep || (ir->epc != epcNO && do_per_step(step, ir->nstpcouple)); - } - bCalcEner = bCalcEnerStep; - - do_ene = (do_per_step(step, ir->nstenergy) || bLastStep); - - if (do_ene || do_log || bDoReplEx) - { - bCalcVir = TRUE; - bCalcEner = TRUE; - } - - /* Do we need global communication ? */ - bGStat = (bCalcVir || bCalcEner || bStopCM || do_per_step(step, nstglobalcomm) - || (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step - 1, nstglobalcomm))); - - force_flags = (GMX_FORCE_STATECHANGED | ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0) - | GMX_FORCE_ALLFORCES | (bCalcVir ? GMX_FORCE_VIRIAL : 0) - | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (bDoFEP ? GMX_FORCE_DHDL : 0)); - if (fr->useMts && !do_per_step(step, ir->nstfout)) - { - force_flags |= GMX_FORCE_DO_NOT_NEED_NORMAL_FORCE; - } - - if (shellfc) - { - /* Now is the time to relax the shells */ - relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, enforcedRotation, step, ir, - imdSession, pull_work, bNS, force_flags, &top, constr, enerd, - state->natoms, state->x.arrayRefWithPadding(), - state->v.arrayRefWithPadding(), state->box, state->lambda, - &state->hist, &f.view(), force_vir, mdatoms, nrnb, wcycle, shellfc, - fr, runScheduleWork, t, mu_tot, vsite, ddBalanceRegionHandler); - } - else - { - /* The AWH history need to be saved _before_ doing force calculations where the AWH bias - is updated (or the AWH update will be performed twice for one step when continuing). - It would be best to call this update function from do_md_trajectory_writing but that - would occur after do_force. One would have to divide the update_awh function into one - function applying the AWH force and one doing the AWH bias update. The update AWH - bias function could then be called after do_md_trajectory_writing (then containing - update_awh_history). The checkpointing will in the future probably moved to the start - of the md loop which will rid of this issue. */ - if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr)) - { - awh->updateHistory(state_global->awhHistory.get()); - } - - /* The coordinates (x) are shifted (to get whole molecules) - * in do_force. - * This is parallellized as well, and does communication too. - * Check comments in sim_util.c - */ - - /* PLUMED */ - plumedNeedsEnergy=0; - if(plumedswitch){ - int pversion=0; - plumed_cmd(plumedmain,"getApiVersion",&pversion); - long int lstep=step; plumed_cmd(plumedmain,"setStepLong",&lstep); - plumed_cmd(plumedmain,"setPositions",&state->x[0][0]); - plumed_cmd(plumedmain,"setMasses",&mdatoms->massT[0]); - plumed_cmd(plumedmain,"setCharges",&mdatoms->chargeA[0]); - plumed_cmd(plumedmain,"setBox",&state->box[0][0]); - plumed_cmd(plumedmain,"prepareCalc",nullptr); - plumed_cmd(plumedmain,"setStopFlag",&plumedWantsToStop); - int checkp=0; if(checkpointHandler->isCheckpointingStep()) checkp=1; - if(pversion>3) plumed_cmd(plumedmain,"doCheckPoint",&checkp); - plumed_cmd(plumedmain,"setForces",&f.view().force()[0][0]); - plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); - if(plumedNeedsEnergy) force_flags |= GMX_FORCE_ENERGY | GMX_FORCE_VIRIAL; - clear_mat(plumed_vir); - plumed_cmd(plumedmain,"setVirial",&plumed_vir[0][0]); - } - /* END PLUMED */ - do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession, pull_work, step, - nrnb, wcycle, &top, state->box, state->x.arrayRefWithPadding(), &state->hist, - &f.view(), force_vir, mdatoms, enerd, state->lambda, fr, runScheduleWork, - vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr, - (bNS ? GMX_FORCE_NS : 0) | force_flags, ddBalanceRegionHandler); - /* PLUMED */ - if(plumedswitch){ - if(plumedNeedsEnergy){ - msmul(force_vir,2.0,plumed_vir); - plumed_cmd(plumedmain,"setEnergy",&enerd->term[F_EPOT]); - plumed_cmd(plumedmain,"performCalc",nullptr); - msmul(plumed_vir,0.5,force_vir); - } else { - msmul(plumed_vir,0.5,plumed_vir); - m_add(force_vir,plumed_vir,force_vir); - } - if(bDoReplEx) plumed_cmd(plumedmain,"GREX savePositions",nullptr); - if(plumedWantsToStop) ir->nsteps=step_rel+1; - if(bHREX) plumed_cmd(plumedmain,"GREX cacheLocalUNow",&enerd->term[F_EPOT]); - } - /* END PLUMED */ - } - - // VV integrators do not need the following velocity half step - // if it is the first step after starting from a checkpoint. - // That is, the half step is needed on all other steps, and - // also the first step when starting from a .tpr file. - if (EI_VV(ir->eI) && (!bFirstStep || startingBehavior == StartingBehavior::NewSimulation)) - /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */ - { - rvec* vbuf = nullptr; - - wallcycle_start(wcycle, ewcUPDATE); - if (ir->eI == eiVV && bInitStep) - { - /* if using velocity verlet with full time step Ekin, - * take the first half step only to compute the - * virial for the first step. From there, - * revert back to the initial coordinates - * so that the input is actually the initial step. - */ - snew(vbuf, state->natoms); - copy_rvecn(state->v.rvec_array(), vbuf, 0, - state->natoms); /* should make this better for parallelizing? */ - } - else - { - /* this is for NHC in the Ekin(t+dt/2) version of vv */ - trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, - trotter_seq, ettTSEQ1); - } - - upd.update_coords(*ir, step, mdatoms, state, f.view().forceWithPadding(), fcdata, ekind, - M, etrtVELOCITY1, cr, constr != nullptr); - - wallcycle_stop(wcycle, ewcUPDATE); - constrain_velocities(constr, do_log, do_ene, step, state, nullptr, bCalcVir, shake_vir); - wallcycle_start(wcycle, ewcUPDATE); - /* if VV, compute the pressure and constraints */ - /* For VV2, we strictly only need this if using pressure - * control, but we really would like to have accurate pressures - * printed out. - * Think about ways around this in the future? - * For now, keep this choice in comments. - */ - /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */ - /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/ - bPres = TRUE; - bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK)); - if (bCalcEner && ir->eI == eiVVAK) - { - bSumEkinhOld = TRUE; - } - /* for vv, the first half of the integration actually corresponds to the previous step. - So we need information from the last step in the first half of the integration */ - if (bGStat || do_per_step(step - 1, nstglobalcomm)) - { - wallcycle_stop(wcycle, ewcUPDATE); - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, wcycle, - enerd, force_vir, shake_vir, total_vir, pres, constr, &nullSignaller, - state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld, - (bGStat ? CGLO_GSTAT : 0) | (bCalcEner ? CGLO_ENERGY : 0) - | (bTemp ? CGLO_TEMPERATURE : 0) | (bPres ? CGLO_PRESSURE : 0) - | (bPres ? CGLO_CONSTRAINT : 0) | (bStopCM ? CGLO_STOPCM : 0) - | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS - : 0) - | CGLO_SCALEEKIN); - /* explanation of above: - a) We compute Ekin at the full time step - if 1) we are using the AveVel Ekin, and it's not the - initial step, or 2) if we are using AveEkin, but need the full - time step kinetic energy for the pressure (always true now, since we want accurate statistics). - b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in - EkinAveVel because it's needed for the pressure */ - checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, - top_global, &top, makeConstArrayRef(state->x), - state->box, &shouldCheckNumberOfBondedInteractions); - if (bStopCM) - { - process_and_stopcm_grp(fplog, &vcm, *mdatoms, makeArrayRef(state->x), - makeArrayRef(state->v)); - inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr); - } - wallcycle_start(wcycle, ewcUPDATE); - } - /* temperature scaling and pressure scaling to produce the extended variables at t+dt */ - if (!bInitStep) - { - if (bTrotter) - { - m_add(force_vir, shake_vir, - total_vir); /* we need the un-dispersion corrected total vir here */ - trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, - trotter_seq, ettTSEQ2); - - /* TODO This is only needed when we're about to write - * a checkpoint, because we use it after the restart - * (in a kludge?). But what should we be doing if - * the startingBehavior is NewSimulation or bInitStep are true? */ - if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)) - { - copy_mat(shake_vir, state->svir_prev); - copy_mat(force_vir, state->fvir_prev); - } - if ((inputrecNptTrotter(ir) || inputrecNvtTrotter(ir)) && ir->eI == eiVV) - { - /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */ - enerd->term[F_TEMP] = - sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE); - enerd->term[F_EKIN] = trace(ekind->ekin); - } - } - else if (bExchanged) - { - wallcycle_stop(wcycle, ewcUPDATE); - /* We need the kinetic energy at minus the half step for determining - * the full step kinetic energy and possibly for T-coupling.*/ - /* This may not be quite working correctly yet . . . . */ - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, wcycle, - enerd, nullptr, nullptr, nullptr, nullptr, constr, &nullSignaller, - state->box, nullptr, &bSumEkinhOld, CGLO_GSTAT | CGLO_TEMPERATURE); - wallcycle_start(wcycle, ewcUPDATE); - } - } - /* if it's the initial step, we performed this first step just to get the constraint virial */ - if (ir->eI == eiVV && bInitStep) - { - copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms); - sfree(vbuf); - } - wallcycle_stop(wcycle, ewcUPDATE); - } - - /* compute the conserved quantity */ - if (EI_VV(ir->eI)) - { - saved_conserved_quantity = NPT_energy(ir, state, &MassQ); - if (ir->eI == eiVV) - { - last_ekin = enerd->term[F_EKIN]; - } - if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) - { - saved_conserved_quantity -= enerd->term[F_DISPCORR]; - } - /* sum up the foreign kinetic energy and dK/dl terms for vv. currently done every step so that dhdl is correct in the .edr */ - if (ir->efep != efepNO) - { - accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals); - } - } - - /* ######## END FIRST UPDATE STEP ############## */ - /* ######## If doing VV, we now have v(dt) ###### */ - if (bDoExpanded) - { - /* perform extended ensemble sampling in lambda - we don't - actually move to the new state before outputting - statistics, but if performing simulated tempering, we - do update the velocities and the tau_t. */ - - lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, - state->dfhist, step, state->v.rvec_array(), mdatoms, &realFepState); - /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */ - if (MASTER(cr)) - { - copy_df_history(state_global->dfhist, state->dfhist); - } - } - - // Copy coordinate from the GPU for the output/checkpointing if the update is offloaded and - // coordinates have not already been copied for i) search or ii) CPU force tasks. - if (useGpuForUpdate && !bNS && !runScheduleWork->domainWork.haveCpuLocalForceWork - && (do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed) - || checkpointHandler->isCheckpointingStep())) - { - stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local); - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - // Copy velocities if needed for the output/checkpointing. - // NOTE: Copy on the search steps is done at the beginning of the step. - if (useGpuForUpdate && !bNS - && (do_per_step(step, ir->nstvout) || checkpointHandler->isCheckpointingStep())) - { - stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local); - stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local); - } - // Copy forces for the output if the forces were reduced on the GPU (not the case on virial steps) - // and update is offloaded hence forces are kept on the GPU for update and have not been - // already transferred in do_force(). - // TODO: There should be an improved, explicit mechanism that ensures this copy is only executed - // when the forces are ready on the GPU -- the same synchronizer should be used as the one - // prior to GPU update. - // TODO: When the output flags will be included in step workload, this copy can be combined with the - // copy call in do_force(...). - // NOTE: The forces should not be copied here if the vsites are present, since they were modified - // on host after the D2H copy in do_force(...). - if (runScheduleWork->stepWork.useGpuFBufferOps && (simulationWork.useGpuUpdate && !vsite) - && do_per_step(step, ir->nstfout)) - { - stateGpu->copyForcesFromGpu(f.view().force(), AtomLocality::Local); - stateGpu->waitForcesReadyOnHost(AtomLocality::Local); - } - /* Now we have the energies and forces corresponding to the - * coordinates at time t. We must output all of this before - * the update. - */ - do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, ir, state, state_global, - observablesHistory, top_global, fr, outf, energyOutput, ekind, - f.view().force(), checkpointHandler->isCheckpointingStep(), - bRerunMD, bLastStep, mdrunOptions.writeConfout, bSumEkinhOld); - /* Check if IMD step and do IMD communication, if bIMD is TRUE. */ - bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t); - - /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */ - if (startingBehavior != StartingBehavior::NewSimulation && bFirstStep - && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))) - { - copy_mat(state->svir_prev, shake_vir); - copy_mat(state->fvir_prev, force_vir); - } - - stopHandler->setSignal(); - resetHandler->setSignal(walltime_accounting); - - if (bGStat || !PAR(cr)) - { - /* In parallel we only have to check for checkpointing in steps - * where we do global communication, - * otherwise the other nodes don't know. - */ - checkpointHandler->setSignal(walltime_accounting); - } - - /* ######### START SECOND UPDATE STEP ################# */ - - /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen - controlled in preprocessing */ - - if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */ - { - gmx_bool bIfRandomize; - bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr); - /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */ - if (constr && bIfRandomize) - { - constrain_velocities(constr, do_log, do_ene, step, state, nullptr, false, nullptr); - } - } - /* Box is changed in update() when we do pressure coupling, - * but we should still use the old box for energy corrections and when - * writing it to the energy file, so it matches the trajectory files for - * the same timestep above. Make a copy in a separate array. - */ - copy_mat(state->box, lastbox); - - dvdl_constr = 0; - - if (!useGpuForUpdate) - { - wallcycle_start(wcycle, ewcUPDATE); - } - /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */ - if (bTrotter) - { - trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3); - /* We can only do Berendsen coupling after we have summed - * the kinetic energy or virial. Since the happens - * in global_state after update, we should only do it at - * step % nstlist = 1 with bGStatEveryStep=FALSE. - */ - } - else - { - update_tcouple(step, ir, state, ekind, &MassQ, mdatoms); - update_pcouple_before_coordinates(fplog, step, ir, state, pressureCouplingMu, M, bInitStep); - } - - if (EI_VV(ir->eI)) - { - /* velocity half-step update */ - upd.update_coords(*ir, step, mdatoms, state, f.view().forceWithPadding(), fcdata, ekind, - M, etrtVELOCITY2, cr, constr != nullptr); - } - - /* Above, initialize just copies ekinh into ekin, - * it doesn't copy position (for VV), - * and entire integrator for MD. - */ - - if (ir->eI == eiVVAK) - { - cbuf.resize(state->x.size()); - std::copy(state->x.begin(), state->x.end(), cbuf.begin()); - } - - /* With leap-frog type integrators we compute the kinetic energy - * at a whole time step as the average of the half-time step kinetic - * energies of two subsequent steps. Therefore we need to compute the - * half step kinetic energy also if we need energies at the next step. - */ - const bool needHalfStepKineticEnergy = - (!EI_VV(ir->eI) && (do_per_step(step + 1, nstglobalcomm) || step_rel + 1 == ir->nsteps)); - - // Parrinello-Rahman requires the pressure to be availible before the update to compute - // the velocity scaling matrix. Hence, it runs one step after the nstpcouple step. - const bool doParrinelloRahman = (ir->epc == epcPARRINELLORAHMAN - && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple)); - - if (useGpuForUpdate) - { - if (bNS && (bFirstStep || DOMAINDECOMP(cr))) - { - integrator->set(stateGpu->getCoordinates(), stateGpu->getVelocities(), - stateGpu->getForces(), top.idef, *mdatoms, ekind->ngtc); - - // Copy data to the GPU after buffers might have being reinitialized - stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local); - stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local); - } - - if (simulationWork.useGpuPme && !runScheduleWork->simulationWork.useGpuPmePpCommunication - && !thisRankHasDuty(cr, DUTY_PME)) - { - // The PME forces were recieved to the host, so have to be copied - stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::All); - } - else if (!runScheduleWork->stepWork.useGpuFBufferOps) - { - // The buffer ops were not offloaded this step, so the forces are on the - // host and have to be copied - stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::Local); - } - - const bool doTemperatureScaling = - (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple)); - - // This applies Leap-Frog, LINCS and SETTLE in succession - integrator->integrate(stateGpu->getForcesReadyOnDeviceEvent( - AtomLocality::Local, runScheduleWork->stepWork.useGpuFBufferOps), - ir->delta_t, true, bCalcVir, shake_vir, doTemperatureScaling, - ekind->tcstat, doParrinelloRahman, ir->nstpcouple * ir->delta_t, M); - - // Copy velocities D2H after update if: - // - Globals are computed this step (includes the energy output steps). - // - Temperature is needed for the next step. - if (bGStat || needHalfStepKineticEnergy) - { - stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local); - stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local); - } - } - else - { - /* With multiple time stepping we need to do an additional normal - * update step to obtain the virial, as the actual MTS integration - * using an acceleration where the slow forces are multiplied by mtsFactor. - * Using that acceleration would result in a virial with the slow - * force contribution would be a factor mtsFactor too large. - */ - if (fr->useMts && bCalcVir && constr != nullptr) - { - upd.update_for_constraint_virial(*ir, *mdatoms, *state, f.view().forceWithPadding(), *ekind); - - constrain_coordinates(constr, do_log, do_ene, step, state, - upd.xp()->arrayRefWithPadding(), &dvdl_constr, bCalcVir, shake_vir); - } - - ArrayRefWithPadding forceCombined = - (fr->useMts && step % ir->mtsLevels[1].stepFactor == 0) - ? f.view().forceMtsCombinedWithPadding() - : f.view().forceWithPadding(); - upd.update_coords(*ir, step, mdatoms, state, forceCombined, fcdata, ekind, M, - etrtPOSITION, cr, constr != nullptr); - - wallcycle_stop(wcycle, ewcUPDATE); - - constrain_coordinates(constr, do_log, do_ene, step, state, upd.xp()->arrayRefWithPadding(), - &dvdl_constr, bCalcVir && !fr->useMts, shake_vir); - - upd.update_sd_second_half(*ir, step, &dvdl_constr, mdatoms, state, cr, nrnb, wcycle, - constr, do_log, do_ene); - upd.finish_update(*ir, mdatoms, state, wcycle, constr != nullptr); - } - - if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM) - { - updatePrevStepPullCom(pull_work, state); - } - - if (ir->eI == eiVVAK) - { - /* erase F_EKIN and F_TEMP here? */ - /* just compute the kinetic energy at the half step to perform a trotter step */ - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, wcycle, enerd, - force_vir, shake_vir, total_vir, pres, constr, &nullSignaller, lastbox, - nullptr, &bSumEkinhOld, (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE); - wallcycle_start(wcycle, ewcUPDATE); - trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4); - /* now we know the scaling, we can compute the positions again */ - std::copy(cbuf.begin(), cbuf.end(), state->x.begin()); - - upd.update_coords(*ir, step, mdatoms, state, f.view().forceWithPadding(), fcdata, ekind, - M, etrtPOSITION, cr, constr != nullptr); - wallcycle_stop(wcycle, ewcUPDATE); - - /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */ - /* are the small terms in the shake_vir here due - * to numerical errors, or are they important - * physically? I'm thinking they are just errors, but not completely sure. - * For now, will call without actually constraining, constr=nullptr*/ - upd.finish_update(*ir, mdatoms, state, wcycle, false); - } - if (EI_VV(ir->eI)) - { - /* this factor or 2 correction is necessary - because half of the constraint force is removed - in the vv step, so we have to double it. See - the Issue #1255. It is not yet clear - if the factor of 2 is exact, or just a very - good approximation, and this will be - investigated. The next step is to see if this - can be done adding a dhdl contribution from the - rattle step, but this is somewhat more - complicated with the current code. Will be - investigated, hopefully for 4.6.3. However, - this current solution is much better than - having it completely wrong. - */ - enerd->term[F_DVDL_CONSTR] += 2 * dvdl_constr; - } - else - { - enerd->term[F_DVDL_CONSTR] += dvdl_constr; - } - - if (vsite != nullptr) - { - wallcycle_start(wcycle, ewcVSITECONSTR); - vsite->construct(state->x, ir->delta_t, state->v, state->box); - wallcycle_stop(wcycle, ewcVSITECONSTR); - } - - /* ############## IF NOT VV, Calculate globals HERE ############ */ - /* With Leap-Frog we can skip compute_globals at - * non-communication steps, but we need to calculate - * the kinetic energy one step before communication. - */ - { - // Organize to do inter-simulation signalling on steps if - // and when algorithms require it. - const bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm)); - - if (bGStat || needHalfStepKineticEnergy || doInterSimSignal) - { - // Copy coordinates when needed to stop the CM motion. - if (useGpuForUpdate && !EI_VV(ir->eI) && bStopCM) - { - stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local); - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - // Since we're already communicating at this step, we - // can propagate intra-simulation signals. Note that - // check_nstglobalcomm has the responsibility for - // choosing the value of nstglobalcomm that is one way - // bGStat becomes true, so we can't get into a - // situation where e.g. checkpointing can't be - // signalled. - bool doIntraSimSignal = true; - SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal); - - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, - wcycle, enerd, force_vir, shake_vir, total_vir, pres, constr, - &signaller, lastbox, &totalNumberOfBondedInteractions, &bSumEkinhOld, - (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0) - | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0) - | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) - | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT - | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS - : 0)); - checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, - top_global, &top, makeConstArrayRef(state->x), - state->box, &shouldCheckNumberOfBondedInteractions); - if (!EI_VV(ir->eI) && bStopCM) - { - process_and_stopcm_grp(fplog, &vcm, *mdatoms, makeArrayRef(state->x), - makeArrayRef(state->v)); - inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr); - - // TODO: The special case of removing CM motion should be dealt more gracefully - if (useGpuForUpdate) - { - stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local); - // Here we block until the H2D copy completes because event sync with the - // force kernels that use the coordinates on the next steps is not implemented - // (not because of a race on state->x being modified on the CPU while H2D is in progress). - stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local); - // If the COM removal changed the velocities on the CPU, this has to be accounted for. - if (vcm.mode != ecmNO) - { - stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local); - } - } - } - } - } - - /* ############# END CALC EKIN AND PRESSURE ################# */ - - /* Note: this is OK, but there are some numerical precision issues with using the convergence of - the virial that should probably be addressed eventually. state->veta has better properies, - but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could - generate the new shake_vir, but test the veta value for convergence. This will take some thought. */ - - if (ir->efep != efepNO && !EI_VV(ir->eI)) - { - /* Sum up the foreign energy and dK/dl terms for md and sd. - Currently done every step so that dH/dl is correct in the .edr */ - accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals); - } - - update_pcouple_after_coordinates(fplog, step, ir, mdatoms, pres, force_vir, shake_vir, - pressureCouplingMu, state, nrnb, upd.deform(), !useGpuForUpdate); - - const bool doBerendsenPressureCoupling = - (inputrec->epc == epcBERENDSEN && do_per_step(step, inputrec->nstpcouple)); - const bool doCRescalePressureCoupling = - (inputrec->epc == epcCRESCALE && do_per_step(step, inputrec->nstpcouple)); - if (useGpuForUpdate - && (doBerendsenPressureCoupling || doCRescalePressureCoupling || doParrinelloRahman)) - { - integrator->scaleCoordinates(pressureCouplingMu); - if (doCRescalePressureCoupling) - { - matrix pressureCouplingInvMu; - gmx::invertBoxMatrix(pressureCouplingMu, pressureCouplingInvMu); - integrator->scaleVelocities(pressureCouplingInvMu); - } - integrator->setPbc(PbcType::Xyz, state->box); - } - - /* ################# END UPDATE STEP 2 ################# */ - /* #### We now have r(t+dt) and v(t+dt/2) ############# */ - - /* The coordinates (x) were unshifted in update */ - if (!bGStat) - { - /* We will not sum ekinh_old, - * so signal that we still have to do it. - */ - bSumEkinhOld = TRUE; - } - - if (bCalcEner) - { - /* ######### BEGIN PREPARING EDR OUTPUT ########### */ - - /* use the directly determined last velocity, not actually the averaged half steps */ - if (bTrotter && ir->eI == eiVV) - { - enerd->term[F_EKIN] = last_ekin; - } - enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN]; - - if (integratorHasConservedEnergyQuantity(ir)) - { - if (EI_VV(ir->eI)) - { - enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity; - } - else - { - enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ); - } - } - /* ######### END PREPARING EDR OUTPUT ########### */ - } - - /* Output stuff */ - if (MASTER(cr)) - { - if (fplog && do_log && bDoExpanded) - { - /* only needed if doing expanded ensemble */ - PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, - ir->bSimTemp ? ir->simtempvals : nullptr, - state_global->dfhist, state->fep_state, ir->nstlog, step); - } - if (bCalcEner) - { - energyOutput.addDataAtEnergyStep( - bDoDHDL, bCalcEnerStep, t, mdatoms->tmass, enerd, ir->fepvals, - ir->expandedvals, lastbox, - PTCouplingArrays{ state->boxv, state->nosehoover_xi, state->nosehoover_vxi, - state->nhpres_xi, state->nhpres_vxi }, - state->fep_state, shake_vir, force_vir, total_vir, pres, ekind, mu_tot, constr); - } - else - { - energyOutput.recordNonEnergyStep(); - } - - gmx_bool do_dr = do_per_step(step, ir->nstdisreout); - gmx_bool do_or = do_per_step(step, ir->nstorireout); - - if (doSimulatedAnnealing) - { - gmx::EnergyOutput::printAnnealingTemperatures(do_log ? fplog : nullptr, groups, - &(ir->opts)); - } - if (do_log || do_ene || do_dr || do_or) - { - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or, - do_log ? fplog : nullptr, step, t, - fr->fcdata.get(), awh.get()); - } - if (do_log && ir->bDoAwh && awh->hasFepLambdaDimension()) - { - const bool isInitialOutput = false; - printLambdaStateToLog(fplog, state->lambda, isInitialOutput); - } - - if (ir->bPull) - { - pull_print_output(pull_work, step, t); - } - - if (do_per_step(step, ir->nstlog)) - { - if (fflush(fplog) != 0) - { - gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?"); - } - } - } - if (bDoExpanded) - { - /* Have to do this part _after_ outputting the logfile and the edr file */ - /* Gets written into the state at the beginning of next loop*/ - state->fep_state = lamnew; - if(plumedswitch) - { - realFepState = state->fep_state; - } - } - else if (ir->bDoAwh && awh->needForeignEnergyDifferences(step)) - { - state->fep_state = awh->fepLambdaState(); - } - /* Print the remaining wall clock time for the run */ - if (isMasterSimMasterRank(ms, MASTER(cr)) && (do_verbose || gmx_got_usr_signal()) && !bPMETunePrinting) - { - if (shellfc) - { - fprintf(stderr, "\n"); - } - print_time(stderr, walltime_accounting, step, ir, cr); - } - - /* Ion/water position swapping. - * Not done in last step since trajectory writing happens before this call - * in the MD loop and exchanges would be lost anyway. */ - bNeedRepartition = FALSE; - if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && do_per_step(step, ir->swap->nstswap)) - { - bNeedRepartition = - do_swapcoords(cr, step, t, ir, swap, wcycle, as_rvec_array(state->x.data()), - state->box, MASTER(cr) && mdrunOptions.verbose, bRerunMD); - - if (bNeedRepartition && DOMAINDECOMP(cr)) - { - dd_collect_state(cr->dd, state, state_global); - } - } - - /* Replica exchange */ - bExchanged = FALSE; - if (bDoReplEx) - { - bExchanged = replica_exchange(fplog, cr, ms, repl_ex, state_global, enerd, state, step, t); - } - - if ((bExchanged || bNeedRepartition) && DOMAINDECOMP(cr)) - { - dd_partition_system(fplog, mdlog, step, cr, TRUE, 1, state_global, *top_global, ir, - imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr, - nrnb, wcycle, FALSE); - shouldCheckNumberOfBondedInteractions = true; - upd.setNumAtoms(state->natoms); - } - - bFirstStep = FALSE; - bInitStep = FALSE; - - /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */ - /* With all integrators, except VV, we need to retain the pressure - * at the current step for coupling at the next step. - */ - if ((state->flags & (1U << estPRES_PREV)) - && (bGStatEveryStep || (ir->nstpcouple > 0 && step % ir->nstpcouple == 0))) - { - /* Store the pressure in t_state for pressure coupling - * at the next MD step. - */ - copy_mat(pres, state->pres_prev); - } - - /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */ - - if ((membed != nullptr) && (!bLastStep)) - { - rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data())); - } - - cycles = wallcycle_stop(wcycle, ewcSTEP); - if (DOMAINDECOMP(cr) && wcycle) - { - dd_cycles_add(cr->dd, cycles, ddCyclStep); - } - - /* increase the MD step number */ - step++; - step_rel++; - -#if GMX_FAHCORE - if (MASTER(cr)) - { - fcReportProgress(ir->nsteps + ir->init_step, step); - } -#endif - - resetHandler->resetCounters(step, step_rel, mdlog, fplog, cr, fr->nbv.get(), nrnb, - fr->pmedata, pme_loadbal, wcycle, walltime_accounting); - - /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */ - imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner); - } - /* End of main MD loop */ - - /* Closing TNG files can include compressing data. Therefore it is good to do that - * before stopping the time measurements. */ - mdoutf_tng_close(outf); - - /* Stop measuring walltime */ - walltime_accounting_end_time(walltime_accounting); - - if (!thisRankHasDuty(cr, DUTY_PME)) - { - /* Tell the PME only node to finish */ - gmx_pme_send_finish(cr); - } - - if (MASTER(cr)) - { - if (ir->nstcalcenergy > 0) - { - energyOutput.printEnergyConservation(fplog, ir->simulation_part, EI_MD(ir->eI)); - - gmx::EnergyOutput::printAnnealingTemperatures(fplog, groups, &(ir->opts)); - energyOutput.printAverages(fplog, groups); - } - } - done_mdoutf(outf); - - if (bPMETune) - { - pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu()); - } - - done_shellfc(fplog, shellfc, step_rel); - - if (useReplicaExchange && MASTER(cr)) - { - print_replica_exchange_statistics(fplog, repl_ex); - } - - walltime_accounting_set_nsteps_done(walltime_accounting, step_rel); - - global_stat_destroy(gstat); -} diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/md.cpp.preplumed b/patches/gromacs-2021.7.diff/src/gromacs/mdrun/md.cpp.preplumed deleted file mode 100644 index bc367f6e57..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/md.cpp.preplumed +++ /dev/null @@ -1,1728 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2011-2019,2020,2021, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/*! \internal \file - * - * \brief Implements the integrator for normal molecular dynamics simulations - * - * \author David van der Spoel - * \ingroup module_mdrun - */ -#include "gmxpre.h" - -#include -#include -#include -#include - -#include -#include -#include - -#include "gromacs/applied_forces/awh/awh.h" -#include "gromacs/commandline/filenm.h" -#include "gromacs/domdec/collect.h" -#include "gromacs/domdec/dlbtiming.h" -#include "gromacs/domdec/domdec.h" -#include "gromacs/domdec/domdec_network.h" -#include "gromacs/domdec/domdec_struct.h" -#include "gromacs/domdec/gpuhaloexchange.h" -#include "gromacs/domdec/mdsetup.h" -#include "gromacs/domdec/partition.h" -#include "gromacs/essentialdynamics/edsam.h" -#include "gromacs/ewald/pme_load_balancing.h" -#include "gromacs/ewald/pme_pp.h" -#include "gromacs/fileio/trxio.h" -#include "gromacs/gmxlib/network.h" -#include "gromacs/gmxlib/nrnb.h" -#include "gromacs/gpu_utils/device_stream_manager.h" -#include "gromacs/gpu_utils/gpu_utils.h" -#include "gromacs/imd/imd.h" -#include "gromacs/listed_forces/listed_forces.h" -#include "gromacs/math/functions.h" -#include "gromacs/math/invertmatrix.h" -#include "gromacs/math/vec.h" -#include "gromacs/math/vectypes.h" -#include "gromacs/mdlib/checkpointhandler.h" -#include "gromacs/mdlib/compute_io.h" -#include "gromacs/mdlib/constr.h" -#include "gromacs/mdlib/coupling.h" -#include "gromacs/mdlib/ebin.h" -#include "gromacs/mdlib/enerdata_utils.h" -#include "gromacs/mdlib/energyoutput.h" -#include "gromacs/mdlib/expanded.h" -#include "gromacs/mdlib/force.h" -#include "gromacs/mdlib/force_flags.h" -#include "gromacs/mdlib/forcerec.h" -#include "gromacs/mdlib/freeenergyparameters.h" -#include "gromacs/mdlib/md_support.h" -#include "gromacs/mdlib/mdatoms.h" -#include "gromacs/mdlib/mdoutf.h" -#include "gromacs/mdlib/membed.h" -#include "gromacs/mdlib/resethandler.h" -#include "gromacs/mdlib/sighandler.h" -#include "gromacs/mdlib/simulationsignal.h" -#include "gromacs/mdlib/stat.h" -#include "gromacs/mdlib/stophandler.h" -#include "gromacs/mdlib/tgroup.h" -#include "gromacs/mdlib/trajectory_writing.h" -#include "gromacs/mdlib/update.h" -#include "gromacs/mdlib/update_constrain_gpu.h" -#include "gromacs/mdlib/vcm.h" -#include "gromacs/mdlib/vsite.h" -#include "gromacs/mdrunutility/handlerestart.h" -#include "gromacs/mdrunutility/multisim.h" -#include "gromacs/mdrunutility/printtime.h" -#include "gromacs/mdtypes/awh_history.h" -#include "gromacs/mdtypes/awh_params.h" -#include "gromacs/mdtypes/commrec.h" -#include "gromacs/mdtypes/df_history.h" -#include "gromacs/mdtypes/energyhistory.h" -#include "gromacs/mdtypes/fcdata.h" -#include "gromacs/mdtypes/forcebuffers.h" -#include "gromacs/mdtypes/forcerec.h" -#include "gromacs/mdtypes/group.h" -#include "gromacs/mdtypes/inputrec.h" -#include "gromacs/mdtypes/interaction_const.h" -#include "gromacs/mdtypes/md_enums.h" -#include "gromacs/mdtypes/mdatom.h" -#include "gromacs/mdtypes/mdrunoptions.h" -#include "gromacs/mdtypes/multipletimestepping.h" -#include "gromacs/mdtypes/observableshistory.h" -#include "gromacs/mdtypes/pullhistory.h" -#include "gromacs/mdtypes/simulation_workload.h" -#include "gromacs/mdtypes/state.h" -#include "gromacs/mdtypes/state_propagator_data_gpu.h" -#include "gromacs/modularsimulator/energydata.h" -#include "gromacs/nbnxm/gpu_data_mgmt.h" -#include "gromacs/nbnxm/nbnxm.h" -#include "gromacs/pbcutil/pbc.h" -#include "gromacs/pulling/output.h" -#include "gromacs/pulling/pull.h" -#include "gromacs/swap/swapcoords.h" -#include "gromacs/timing/wallcycle.h" -#include "gromacs/timing/walltime_accounting.h" -#include "gromacs/topology/atoms.h" -#include "gromacs/topology/idef.h" -#include "gromacs/topology/mtop_util.h" -#include "gromacs/topology/topology.h" -#include "gromacs/trajectory/trajectoryframe.h" -#include "gromacs/utility/basedefinitions.h" -#include "gromacs/utility/cstringutil.h" -#include "gromacs/utility/fatalerror.h" -#include "gromacs/utility/logger.h" -#include "gromacs/utility/real.h" -#include "gromacs/utility/smalloc.h" - -#include "legacysimulator.h" -#include "replicaexchange.h" -#include "shellfc.h" - -using gmx::SimulationSignaller; - -void gmx::LegacySimulator::do_md() -{ - // TODO Historically, the EM and MD "integrators" used different - // names for the t_inputrec *parameter, but these must have the - // same name, now that it's a member of a struct. We use this ir - // alias to avoid a large ripple of nearly useless changes. - // t_inputrec is being replaced by IMdpOptionsProvider, so this - // will go away eventually. - t_inputrec* ir = inputrec; - int64_t step, step_rel; - double t, t0 = ir->init_t; - gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner; - gmx_bool bNS = FALSE, bNStList, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE; - gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE; - gmx_bool do_ene, do_log, do_verbose; - gmx_bool bMasterState; - unsigned int force_flags; - tensor force_vir = { { 0 } }, shake_vir = { { 0 } }, total_vir = { { 0 } }, pres = { { 0 } }; - int i, m; - rvec mu_tot; - matrix pressureCouplingMu, M; - gmx_repl_ex_t repl_ex = nullptr; - gmx_global_stat_t gstat; - gmx_shellfc_t* shellfc; - gmx_bool bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition; - gmx_bool bTemp, bPres, bTrotter; - real dvdl_constr; - std::vector cbuf; - matrix lastbox; - int lamnew = 0; - /* for FEP */ - int nstfep = 0; - double cycles; - real saved_conserved_quantity = 0; - real last_ekin = 0; - t_extmass MassQ; - char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE]; - - /* PME load balancing data for GPU kernels */ - gmx_bool bPMETune = FALSE; - gmx_bool bPMETunePrinting = FALSE; - - bool bInteractiveMDstep = false; - - /* Domain decomposition could incorrectly miss a bonded - interaction, but checking for that requires a global - communication stage, which does not otherwise happen in DD - code. So we do that alongside the first global energy reduction - after a new DD is made. These variables handle whether the - check happens, and the result it returns. */ - bool shouldCheckNumberOfBondedInteractions = false; - int totalNumberOfBondedInteractions = -1; - - SimulationSignals signals; - // Most global communnication stages don't propagate mdrun - // signals, and will use this object to achieve that. - SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false); - - if (!mdrunOptions.writeConfout) - { - // This is on by default, and the main known use case for - // turning it off is for convenience in benchmarking, which is - // something that should not show up in the general user - // interface. - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "The -noconfout functionality is deprecated, and may be removed in a " - "future version."); - } - - /* md-vv uses averaged full step velocities for T-control - md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control) - md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */ - bTrotter = (EI_VV(ir->eI) - && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir))); - - const bool bRerunMD = false; - - int nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr); - bGStatEveryStep = (nstglobalcomm == 1); - - const SimulationGroups* groups = &top_global->groups; - - std::unique_ptr ed = nullptr; - if (opt2bSet("-ei", nfile, fnm)) - { - /* Initialize essential dynamics sampling */ - ed = init_edsam(mdlog, opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm), top_global, - ir, cr, constr, state_global, observablesHistory, oenv, startingBehavior); - } - else if (observablesHistory->edsamHistory) - { - gmx_fatal(FARGS, - "The checkpoint is from a run with essential dynamics sampling, " - "but the current run did not specify the -ei option. " - "Either specify the -ei option to mdrun, or do not use this checkpoint file."); - } - - int* fep_state = MASTER(cr) ? &state_global->fep_state : nullptr; - gmx::ArrayRef lambda = MASTER(cr) ? state_global->lambda : gmx::ArrayRef(); - initialize_lambdas(fplog, *ir, MASTER(cr), fep_state, lambda); - Update upd(*ir, deform); - const bool doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd); - const bool useReplicaExchange = (replExParams.exchangeInterval > 0); - - const t_fcdata& fcdata = *fr->fcdata; - - bool simulationsShareState = false; - int nstSignalComm = nstglobalcomm; - { - // TODO This implementation of ensemble orientation restraints is nasty because - // a user can't just do multi-sim with single-sim orientation restraints. - bool usingEnsembleRestraints = - (fcdata.disres->nsystems > 1) || ((ms != nullptr) && (fcdata.orires->nr != 0)); - bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr)); - - // Replica exchange, ensemble restraints and AWH need all - // simulations to remain synchronized, so they need - // checkpoints and stop conditions to act on the same step, so - // the propagation of such signals must take place between - // simulations, not just within simulations. - // TODO: Make algorithm initializers set these flags. - simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim; - - if (simulationsShareState) - { - // Inter-simulation signal communication does not need to happen - // often, so we use a minimum of 200 steps to reduce overhead. - const int c_minimumInterSimulationSignallingInterval = 200; - nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1) / nstglobalcomm) - * nstglobalcomm; - } - } - - if (startingBehavior != StartingBehavior::RestartWithAppending) - { - pleaseCiteCouplingAlgorithms(fplog, *ir); - } - gmx_mdoutf* outf = - init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, mdModulesNotifier, ir, - top_global, oenv, wcycle, startingBehavior, simulationsShareState, ms); - gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work, - mdoutf_get_fp_dhdl(outf), false, startingBehavior, - simulationsShareState, mdModulesNotifier); - - gstat = global_stat_init(ir); - - const auto& simulationWork = runScheduleWork->simulationWork; - const bool useGpuForPme = simulationWork.useGpuPme; - const bool useGpuForNonbonded = simulationWork.useGpuNonbonded; - const bool useGpuForBufferOps = simulationWork.useGpuBufferOps; - const bool useGpuForUpdate = simulationWork.useGpuUpdate; - - /* Check for polarizable models and flexible constraints */ - shellfc = init_shell_flexcon(fplog, top_global, constr ? constr->numFlexibleConstraints() : 0, - ir->nstcalcenergy, DOMAINDECOMP(cr), useGpuForPme); - - { - double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1); - if ((io > 2000) && MASTER(cr)) - { - fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io); - } - } - - // Local state only becomes valid now. - std::unique_ptr stateInstance; - t_state* state; - - gmx_localtop_t top(top_global->ffparams); - - auto mdatoms = mdAtoms->mdatoms(); - - ForceBuffers f(fr->useMts, ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate) - ? PinningPolicy::PinnedIfSupported - : PinningPolicy::CannotBePinned); - if (DOMAINDECOMP(cr)) - { - stateInstance = std::make_unique(); - state = stateInstance.get(); - dd_init_local_state(cr->dd, state_global, state); - - /* Distribute the charge groups over the nodes from the master node */ - dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, state_global, *top_global, ir, - imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr, - nrnb, nullptr, FALSE); - shouldCheckNumberOfBondedInteractions = true; - upd.setNumAtoms(state->natoms); - } - else - { - state_change_natoms(state_global, state_global->natoms); - /* Copy the pointer to the global state */ - state = state_global; - - /* Generate and initialize new topology */ - mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr, &f, mdAtoms, constr, vsite, shellfc); - - upd.setNumAtoms(state->natoms); - } - - std::unique_ptr integrator; - - StatePropagatorDataGpu* stateGpu = fr->stateGpu; - - // TODO: the assertions below should be handled by UpdateConstraintsBuilder. - if (useGpuForUpdate) - { - GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr - || constr->numConstraintsTotal() == 0, - "Constraints in domain decomposition are only supported with update " - "groups if using GPU update.\n"); - GMX_RELEASE_ASSERT(ir->eConstrAlg != econtSHAKE || constr == nullptr - || constr->numConstraintsTotal() == 0, - "SHAKE is not supported with GPU update."); - GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps), - "Either PME or short-ranged non-bonded interaction tasks must run on " - "the GPU to use GPU update.\n"); - GMX_RELEASE_ASSERT(ir->eI == eiMD, - "Only the md integrator is supported with the GPU update.\n"); - GMX_RELEASE_ASSERT( - ir->etc != etcNOSEHOOVER, - "Nose-Hoover temperature coupling is not supported with the GPU update.\n"); - GMX_RELEASE_ASSERT( - ir->epc == epcNO || ir->epc == epcPARRINELLORAHMAN || ir->epc == epcBERENDSEN - || ir->epc == epcCRESCALE, - "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are supported " - "with the GPU update.\n"); - GMX_RELEASE_ASSERT(!mdatoms->haveVsites, - "Virtual sites are not supported with the GPU update.\n"); - GMX_RELEASE_ASSERT(ed == nullptr, - "Essential dynamics is not supported with the GPU update.\n"); - GMX_RELEASE_ASSERT(!ir->bPull || !pull_have_constraint(*ir->pull), - "Constraints pulling is not supported with the GPU update.\n"); - GMX_RELEASE_ASSERT(fcdata.orires->nr == 0, - "Orientation restraints are not supported with the GPU update.\n"); - GMX_RELEASE_ASSERT( - ir->efep == efepNO - || (!haveFepPerturbedMasses(*top_global) && !havePerturbedConstraints(*top_global)), - "Free energy perturbation of masses and constraints are not supported with the GPU " - "update."); - - if (constr != nullptr && constr->numConstraintsTotal() > 0) - { - GMX_LOG(mdlog.info) - .asParagraph() - .appendText("Updating coordinates and applying constraints on the GPU."); - } - else - { - GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU."); - } - GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr, - "Device stream manager should be initialized in order to use GPU " - "update-constraints."); - GMX_RELEASE_ASSERT( - fr->deviceStreamManager->streamIsValid(gmx::DeviceStreamType::UpdateAndConstraints), - "Update stream should be initialized in order to use GPU " - "update-constraints."); - integrator = std::make_unique( - *ir, *top_global, fr->deviceStreamManager->context(), - fr->deviceStreamManager->stream(gmx::DeviceStreamType::UpdateAndConstraints), - stateGpu->xUpdatedOnDevice(), wcycle); - - integrator->setPbc(PbcType::Xyz, state->box); - } - - if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate) - { - changePinningPolicy(&state->x, PinningPolicy::PinnedIfSupported); - } - if (useGpuForUpdate) - { - changePinningPolicy(&state->v, PinningPolicy::PinnedIfSupported); - } - - // NOTE: The global state is no longer used at this point. - // But state_global is still used as temporary storage space for writing - // the global state to file and potentially for replica exchange. - // (Global topology should persist.) - - update_mdatoms(mdatoms, state->lambda[efptMASS]); - - if (ir->bExpanded) - { - /* Check nstexpanded here, because the grompp check was broken */ - if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0) - { - gmx_fatal(FARGS, - "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy"); - } - init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation, ir, state->dfhist); - } - - if (MASTER(cr)) - { - EnergyData::initializeEnergyHistory(startingBehavior, observablesHistory, &energyOutput); - } - - preparePrevStepPullCom(ir, pull_work, mdatoms->massT, state, state_global, cr, - startingBehavior != StartingBehavior::NewSimulation); - - // TODO: Remove this by converting AWH into a ForceProvider - auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms, - startingBehavior != StartingBehavior::NewSimulation, - shellfc != nullptr, opt2fn("-awh", nfile, fnm), pull_work); - - if (useReplicaExchange && MASTER(cr)) - { - repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir, replExParams); - } - /* PME tuning is only supported in the Verlet scheme, with PME for - * Coulomb. It is not supported with only LJ PME. */ - bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) && !mdrunOptions.reproducible - && ir->cutoff_scheme != ecutsGROUP); - - pme_load_balancing_t* pme_loadbal = nullptr; - if (bPMETune) - { - pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box, *fr->ic, *fr->nbv, fr->pmedata, - fr->nbv->useGpu()); - } - - if (!ir->bContinuation) - { - if (state->flags & (1U << estV)) - { - auto v = makeArrayRef(state->v); - /* Set the velocities of vsites, shells and frozen atoms to zero */ - for (i = 0; i < mdatoms->homenr; i++) - { - if (mdatoms->ptype[i] == eptVSite || mdatoms->ptype[i] == eptShell) - { - clear_rvec(v[i]); - } - else if (mdatoms->cFREEZE) - { - for (m = 0; m < DIM; m++) - { - if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m]) - { - v[i][m] = 0; - } - } - } - } - } - - if (constr) - { - /* Constrain the initial coordinates and velocities */ - do_constrain_first(fplog, constr, ir, mdatoms->nr, mdatoms->homenr, - state->x.arrayRefWithPadding(), state->v.arrayRefWithPadding(), - state->box, state->lambda[efptBONDED]); - } - if (vsite) - { - /* Construct the virtual sites for the initial configuration */ - vsite->construct(state->x, ir->delta_t, {}, state->box); - } - } - - if (ir->efep != efepNO) - { - /* Set free energy calculation frequency as the greatest common - * denominator of nstdhdl and repl_ex_nst. */ - nstfep = ir->fepvals->nstdhdl; - if (ir->bExpanded) - { - nstfep = std::gcd(ir->expandedvals->nstexpanded, nstfep); - } - if (useReplicaExchange) - { - nstfep = std::gcd(replExParams.exchangeInterval, nstfep); - } - if (ir->bDoAwh) - { - nstfep = std::gcd(ir->awhParams->nstSampleCoord, nstfep); - } - } - - /* Be REALLY careful about what flags you set here. You CANNOT assume - * this is the first step, since we might be restarting from a checkpoint, - * and in that case we should not do any modifications to the state. - */ - bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation); - - // When restarting from a checkpoint, it can be appropriate to - // initialize ekind from quantities in the checkpoint. Otherwise, - // compute_globals must initialize ekind before the simulation - // starts/restarts. However, only the master rank knows what was - // found in the checkpoint file, so we have to communicate in - // order to coordinate the restart. - // - // TODO Consider removing this communication if/when checkpoint - // reading directly follows .tpr reading, because all ranks can - // agree on hasReadEkinState at that time. - bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false; - if (PAR(cr)) - { - gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr->mpi_comm_mygroup); - } - if (hasReadEkinState) - { - restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate); - } - - unsigned int cglo_flags = - (CGLO_TEMPERATURE | CGLO_GSTAT | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0) - | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) | (hasReadEkinState ? CGLO_READEKIN : 0)); - - bSumEkinhOld = FALSE; - - t_vcm vcm(top_global->groups, *ir); - reportComRemovalInfo(fplog, vcm); - - /* To minimize communication, compute_globals computes the COM velocity - * and the kinetic energy for the velocities without COM motion removed. - * Thus to get the kinetic energy without the COM contribution, we need - * to call compute_globals twice. - */ - for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++) - { - unsigned int cglo_flags_iteration = cglo_flags; - if (bStopCM && cgloIteration == 0) - { - cglo_flags_iteration |= CGLO_STOPCM; - cglo_flags_iteration &= ~CGLO_TEMPERATURE; - } - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, nullptr, - enerd, force_vir, shake_vir, total_vir, pres, constr, &nullSignaller, - state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld, - cglo_flags_iteration - | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS - : 0)); - if (cglo_flags_iteration & CGLO_STOPCM) - { - /* At initialization, do not pass x with acceleration-correction mode - * to avoid (incorrect) correction of the initial coordinates. - */ - auto x = (vcm.mode == ecmLINEAR_ACCELERATION_CORRECTION) ? ArrayRef() - : makeArrayRef(state->x); - process_and_stopcm_grp(fplog, &vcm, *mdatoms, x, makeArrayRef(state->v)); - inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr); - } - } - checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global, &top, - makeConstArrayRef(state->x), state->box, - &shouldCheckNumberOfBondedInteractions); - if (ir->eI == eiVVAK) - { - /* a second call to get the half step temperature initialized as well */ - /* we do the same call as above, but turn the pressure off -- internally to - compute_globals, this is recognized as a velocity verlet half-step - kinetic energy calculation. This minimized excess variables, but - perhaps loses some logic?*/ - - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, nullptr, - enerd, force_vir, shake_vir, total_vir, pres, constr, &nullSignaller, - state->box, nullptr, &bSumEkinhOld, cglo_flags & ~CGLO_PRESSURE); - } - - /* Calculate the initial half step temperature, and save the ekinh_old */ - if (startingBehavior == StartingBehavior::NewSimulation) - { - for (i = 0; (i < ir->opts.ngtc); i++) - { - copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old); - } - } - - /* need to make an initiation call to get the Trotter variables set, as well as other constants - for non-trotter temperature control */ - auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter); - - if (MASTER(cr)) - { - if (!ir->bContinuation) - { - if (constr && ir->eConstrAlg == econtLINCS) - { - fprintf(fplog, "RMS relative constraint deviation after constraining: %.2e\n", - constr->rmsd()); - } - if (EI_STATE_VELOCITY(ir->eI)) - { - real temp = enerd->term[F_TEMP]; - if (ir->eI != eiVV) - { - /* Result of Ekin averaged over velocities of -half - * and +half step, while we only have -half step here. - */ - temp *= 2; - } - fprintf(fplog, "Initial temperature: %g K\n", temp); - } - } - - char tbuf[20]; - fprintf(stderr, "starting mdrun '%s'\n", *(top_global->name)); - if (ir->nsteps >= 0) - { - sprintf(tbuf, "%8.1f", (ir->init_step + ir->nsteps) * ir->delta_t); - } - else - { - sprintf(tbuf, "%s", "infinite"); - } - if (ir->init_step > 0) - { - fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n", - gmx_step_str(ir->init_step + ir->nsteps, sbuf), tbuf, - gmx_step_str(ir->init_step, sbuf2), ir->init_step * ir->delta_t); - } - else - { - fprintf(stderr, "%s steps, %s ps.\n", gmx_step_str(ir->nsteps, sbuf), tbuf); - } - fprintf(fplog, "\n"); - } - - walltime_accounting_start_time(walltime_accounting); - wallcycle_start(wcycle, ewcRUN); - print_start(fplog, cr, walltime_accounting, "mdrun"); - - /*********************************************************** - * - * Loop over MD steps - * - ************************************************************/ - - bFirstStep = TRUE; - /* Skip the first Nose-Hoover integration when we get the state from tpx */ - bInitStep = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI); - bSumEkinhOld = FALSE; - bExchanged = FALSE; - bNeedRepartition = FALSE; - - step = ir->init_step; - step_rel = 0; - - auto stopHandler = stopHandlerBuilder->getStopHandlerMD( - compat::not_null(&signals[eglsSTOPCOND]), simulationsShareState, - MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm, - mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting); - - auto checkpointHandler = std::make_unique( - compat::make_not_null(&signals[eglsCHKPT]), simulationsShareState, - ir->nstlist == 0, MASTER(cr), mdrunOptions.writeConfout, - mdrunOptions.checkpointOptions.period); - - const bool resetCountersIsLocal = true; - auto resetHandler = std::make_unique( - compat::make_not_null(&signals[eglsRESETCOUNTERS]), - !resetCountersIsLocal, ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway, - mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting); - - const DDBalanceRegionHandler ddBalanceRegionHandler(cr); - - if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange) - { - logInitialMultisimStatus(ms, cr, mdlog, simulationsShareState, ir->nsteps, ir->init_step); - } - - /* and stop now if we should */ - bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps)); - while (!bLastStep) - { - - /* Determine if this is a neighbor search step */ - bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0); - - if (bPMETune && bNStList) - { - // This has to be here because PME load balancing is called so early. - // TODO: Move to after all booleans are defined. - if (useGpuForUpdate && !bFirstStep) - { - stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local); - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - /* PME grid + cut-off optimization with GPUs or PME nodes */ - pme_loadbal_do(pme_loadbal, cr, (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr, - fplog, mdlog, *ir, fr, state->box, state->x, wcycle, step, step_rel, - &bPMETunePrinting, simulationWork.useGpuPmePpCommunication); - } - - wallcycle_start(wcycle, ewcSTEP); - - bLastStep = (step_rel == ir->nsteps); - t = t0 + step * ir->delta_t; - - // TODO Refactor this, so that nstfep does not need a default value of zero - if (ir->efep != efepNO || ir->bSimTemp) - { - /* find and set the current lambdas */ - state->lambda = currentLambdas(step, *(ir->fepvals), state->fep_state); - - bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl); - bDoFEP = ((ir->efep != efepNO) && do_per_step(step, nstfep)); - bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded) - && (!bFirstStep)); - } - - bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep - && do_per_step(step, replExParams.exchangeInterval)); - - if (doSimulatedAnnealing) - { - update_annealing_target_temp(ir, t, &upd); - } - - /* Stop Center of Mass motion */ - bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm)); - - /* Determine whether or not to do Neighbour Searching */ - bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition); - - /* Note that the stopHandler will cause termination at nstglobalcomm - * steps. Since this concides with nstcalcenergy, nsttcouple and/or - * nstpcouple steps, we have computed the half-step kinetic energy - * of the previous step and can always output energies at the last step. - */ - bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS); - - /* do_log triggers energy and virial calculation. Because this leads - * to different code paths, forces can be different. Thus for exact - * continuation we should avoid extra log output. - * Note that the || bLastStep can result in non-exact continuation - * beyond the last step. But we don't consider that to be an issue. - */ - do_log = (do_per_step(step, ir->nstlog) - || (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) || bLastStep); - do_verbose = mdrunOptions.verbose - && (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep); - - if (useGpuForUpdate && !bFirstStep && bNS) - { - // Copy velocities from the GPU on search steps to keep a copy on host (device buffers are reinitialized). - stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local); - stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local); - // Copy coordinate from the GPU when needed at the search step. - // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils. - // NOTE: If the coordinates are to be written into output file they are also copied separately before the output. - stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local); - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - - if (bNS && !(bFirstStep && ir->bContinuation)) - { - bMasterState = FALSE; - /* Correct the new box if it is too skewed */ - if (inputrecDynamicBox(ir)) - { - if (correct_box(fplog, step, state->box)) - { - bMasterState = TRUE; - // If update is offloaded, it should be informed about the box size change - if (useGpuForUpdate) - { - integrator->setPbc(PbcType::Xyz, state->box); - } - } - } - if (DOMAINDECOMP(cr) && bMasterState) - { - dd_collect_state(cr->dd, state, state_global); - } - - if (DOMAINDECOMP(cr)) - { - /* Repartition the domain decomposition */ - dd_partition_system(fplog, mdlog, step, cr, bMasterState, nstglobalcomm, state_global, - *top_global, ir, imdSession, pull_work, state, &f, mdAtoms, &top, - fr, vsite, constr, nrnb, wcycle, do_verbose && !bPMETunePrinting); - shouldCheckNumberOfBondedInteractions = true; - upd.setNumAtoms(state->natoms); - } - } - - // Allocate or re-size GPU halo exchange object, if necessary - if (bNS && havePPDomainDecomposition(cr) && simulationWork.useGpuHaloExchange) - { - GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr, - "GPU device manager has to be initialized to use GPU " - "version of halo exchange."); - constructGpuHaloExchange(mdlog, *cr, *fr->deviceStreamManager, wcycle); - } - - if (MASTER(cr) && do_log) - { - gmx::EnergyOutput::printHeader(fplog, step, - t); /* can we improve the information printed here? */ - } - - if (ir->efep != efepNO) - { - update_mdatoms(mdatoms, state->lambda[efptMASS]); - } - - if (bExchanged) - { - - /* We need the kinetic energy at minus the half step for determining - * the full step kinetic energy and possibly for T-coupling.*/ - /* This may not be quite working correctly yet . . . . */ - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, wcycle, - enerd, nullptr, nullptr, nullptr, nullptr, constr, &nullSignaller, - state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld, - CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS); - checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global, - &top, makeConstArrayRef(state->x), state->box, - &shouldCheckNumberOfBondedInteractions); - } - clear_mat(force_vir); - - checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep); - - /* Determine the energy and pressure: - * at nstcalcenergy steps and at energy output steps (set below). - */ - if (EI_VV(ir->eI) && (!bInitStep)) - { - bCalcEnerStep = do_per_step(step, ir->nstcalcenergy); - bCalcVir = bCalcEnerStep - || (ir->epc != epcNO - && (do_per_step(step, ir->nstpcouple) || do_per_step(step - 1, ir->nstpcouple))); - } - else - { - bCalcEnerStep = do_per_step(step, ir->nstcalcenergy); - bCalcVir = bCalcEnerStep || (ir->epc != epcNO && do_per_step(step, ir->nstpcouple)); - } - bCalcEner = bCalcEnerStep; - - do_ene = (do_per_step(step, ir->nstenergy) || bLastStep); - - if (do_ene || do_log || bDoReplEx) - { - bCalcVir = TRUE; - bCalcEner = TRUE; - } - - /* Do we need global communication ? */ - bGStat = (bCalcVir || bCalcEner || bStopCM || do_per_step(step, nstglobalcomm) - || (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step - 1, nstglobalcomm))); - - force_flags = (GMX_FORCE_STATECHANGED | ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0) - | GMX_FORCE_ALLFORCES | (bCalcVir ? GMX_FORCE_VIRIAL : 0) - | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (bDoFEP ? GMX_FORCE_DHDL : 0)); - if (fr->useMts && !do_per_step(step, ir->nstfout)) - { - force_flags |= GMX_FORCE_DO_NOT_NEED_NORMAL_FORCE; - } - - if (shellfc) - { - /* Now is the time to relax the shells */ - relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, enforcedRotation, step, ir, - imdSession, pull_work, bNS, force_flags, &top, constr, enerd, - state->natoms, state->x.arrayRefWithPadding(), - state->v.arrayRefWithPadding(), state->box, state->lambda, - &state->hist, &f.view(), force_vir, mdatoms, nrnb, wcycle, shellfc, - fr, runScheduleWork, t, mu_tot, vsite, ddBalanceRegionHandler); - } - else - { - /* The AWH history need to be saved _before_ doing force calculations where the AWH bias - is updated (or the AWH update will be performed twice for one step when continuing). - It would be best to call this update function from do_md_trajectory_writing but that - would occur after do_force. One would have to divide the update_awh function into one - function applying the AWH force and one doing the AWH bias update. The update AWH - bias function could then be called after do_md_trajectory_writing (then containing - update_awh_history). The checkpointing will in the future probably moved to the start - of the md loop which will rid of this issue. */ - if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr)) - { - awh->updateHistory(state_global->awhHistory.get()); - } - - /* The coordinates (x) are shifted (to get whole molecules) - * in do_force. - * This is parallellized as well, and does communication too. - * Check comments in sim_util.c - */ - do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession, pull_work, step, - nrnb, wcycle, &top, state->box, state->x.arrayRefWithPadding(), &state->hist, - &f.view(), force_vir, mdatoms, enerd, state->lambda, fr, runScheduleWork, - vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr, - (bNS ? GMX_FORCE_NS : 0) | force_flags, ddBalanceRegionHandler); - } - - // VV integrators do not need the following velocity half step - // if it is the first step after starting from a checkpoint. - // That is, the half step is needed on all other steps, and - // also the first step when starting from a .tpr file. - if (EI_VV(ir->eI) && (!bFirstStep || startingBehavior == StartingBehavior::NewSimulation)) - /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */ - { - rvec* vbuf = nullptr; - - wallcycle_start(wcycle, ewcUPDATE); - if (ir->eI == eiVV && bInitStep) - { - /* if using velocity verlet with full time step Ekin, - * take the first half step only to compute the - * virial for the first step. From there, - * revert back to the initial coordinates - * so that the input is actually the initial step. - */ - snew(vbuf, state->natoms); - copy_rvecn(state->v.rvec_array(), vbuf, 0, - state->natoms); /* should make this better for parallelizing? */ - } - else - { - /* this is for NHC in the Ekin(t+dt/2) version of vv */ - trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, - trotter_seq, ettTSEQ1); - } - - upd.update_coords(*ir, step, mdatoms, state, f.view().forceWithPadding(), fcdata, ekind, - M, etrtVELOCITY1, cr, constr != nullptr); - - wallcycle_stop(wcycle, ewcUPDATE); - constrain_velocities(constr, do_log, do_ene, step, state, nullptr, bCalcVir, shake_vir); - wallcycle_start(wcycle, ewcUPDATE); - /* if VV, compute the pressure and constraints */ - /* For VV2, we strictly only need this if using pressure - * control, but we really would like to have accurate pressures - * printed out. - * Think about ways around this in the future? - * For now, keep this choice in comments. - */ - /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */ - /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/ - bPres = TRUE; - bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK)); - if (bCalcEner && ir->eI == eiVVAK) - { - bSumEkinhOld = TRUE; - } - /* for vv, the first half of the integration actually corresponds to the previous step. - So we need information from the last step in the first half of the integration */ - if (bGStat || do_per_step(step - 1, nstglobalcomm)) - { - wallcycle_stop(wcycle, ewcUPDATE); - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, wcycle, - enerd, force_vir, shake_vir, total_vir, pres, constr, &nullSignaller, - state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld, - (bGStat ? CGLO_GSTAT : 0) | (bCalcEner ? CGLO_ENERGY : 0) - | (bTemp ? CGLO_TEMPERATURE : 0) | (bPres ? CGLO_PRESSURE : 0) - | (bPres ? CGLO_CONSTRAINT : 0) | (bStopCM ? CGLO_STOPCM : 0) - | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS - : 0) - | CGLO_SCALEEKIN); - /* explanation of above: - a) We compute Ekin at the full time step - if 1) we are using the AveVel Ekin, and it's not the - initial step, or 2) if we are using AveEkin, but need the full - time step kinetic energy for the pressure (always true now, since we want accurate statistics). - b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in - EkinAveVel because it's needed for the pressure */ - checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, - top_global, &top, makeConstArrayRef(state->x), - state->box, &shouldCheckNumberOfBondedInteractions); - if (bStopCM) - { - process_and_stopcm_grp(fplog, &vcm, *mdatoms, makeArrayRef(state->x), - makeArrayRef(state->v)); - inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr); - } - wallcycle_start(wcycle, ewcUPDATE); - } - /* temperature scaling and pressure scaling to produce the extended variables at t+dt */ - if (!bInitStep) - { - if (bTrotter) - { - m_add(force_vir, shake_vir, - total_vir); /* we need the un-dispersion corrected total vir here */ - trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, - trotter_seq, ettTSEQ2); - - /* TODO This is only needed when we're about to write - * a checkpoint, because we use it after the restart - * (in a kludge?). But what should we be doing if - * the startingBehavior is NewSimulation or bInitStep are true? */ - if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)) - { - copy_mat(shake_vir, state->svir_prev); - copy_mat(force_vir, state->fvir_prev); - } - if ((inputrecNptTrotter(ir) || inputrecNvtTrotter(ir)) && ir->eI == eiVV) - { - /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */ - enerd->term[F_TEMP] = - sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE); - enerd->term[F_EKIN] = trace(ekind->ekin); - } - } - else if (bExchanged) - { - wallcycle_stop(wcycle, ewcUPDATE); - /* We need the kinetic energy at minus the half step for determining - * the full step kinetic energy and possibly for T-coupling.*/ - /* This may not be quite working correctly yet . . . . */ - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, wcycle, - enerd, nullptr, nullptr, nullptr, nullptr, constr, &nullSignaller, - state->box, nullptr, &bSumEkinhOld, CGLO_GSTAT | CGLO_TEMPERATURE); - wallcycle_start(wcycle, ewcUPDATE); - } - } - /* if it's the initial step, we performed this first step just to get the constraint virial */ - if (ir->eI == eiVV && bInitStep) - { - copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms); - sfree(vbuf); - } - wallcycle_stop(wcycle, ewcUPDATE); - } - - /* compute the conserved quantity */ - if (EI_VV(ir->eI)) - { - saved_conserved_quantity = NPT_energy(ir, state, &MassQ); - if (ir->eI == eiVV) - { - last_ekin = enerd->term[F_EKIN]; - } - if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) - { - saved_conserved_quantity -= enerd->term[F_DISPCORR]; - } - /* sum up the foreign kinetic energy and dK/dl terms for vv. currently done every step so that dhdl is correct in the .edr */ - if (ir->efep != efepNO) - { - accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals); - } - } - - /* ######## END FIRST UPDATE STEP ############## */ - /* ######## If doing VV, we now have v(dt) ###### */ - if (bDoExpanded) - { - /* perform extended ensemble sampling in lambda - we don't - actually move to the new state before outputting - statistics, but if performing simulated tempering, we - do update the velocities and the tau_t. */ - - lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, - state->dfhist, step, state->v.rvec_array(), mdatoms); - /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */ - if (MASTER(cr)) - { - copy_df_history(state_global->dfhist, state->dfhist); - } - } - - // Copy coordinate from the GPU for the output/checkpointing if the update is offloaded and - // coordinates have not already been copied for i) search or ii) CPU force tasks. - if (useGpuForUpdate && !bNS && !runScheduleWork->domainWork.haveCpuLocalForceWork - && (do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed) - || checkpointHandler->isCheckpointingStep())) - { - stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local); - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - // Copy velocities if needed for the output/checkpointing. - // NOTE: Copy on the search steps is done at the beginning of the step. - if (useGpuForUpdate && !bNS - && (do_per_step(step, ir->nstvout) || checkpointHandler->isCheckpointingStep())) - { - stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local); - stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local); - } - // Copy forces for the output if the forces were reduced on the GPU (not the case on virial steps) - // and update is offloaded hence forces are kept on the GPU for update and have not been - // already transferred in do_force(). - // TODO: There should be an improved, explicit mechanism that ensures this copy is only executed - // when the forces are ready on the GPU -- the same synchronizer should be used as the one - // prior to GPU update. - // TODO: When the output flags will be included in step workload, this copy can be combined with the - // copy call in do_force(...). - // NOTE: The forces should not be copied here if the vsites are present, since they were modified - // on host after the D2H copy in do_force(...). - if (runScheduleWork->stepWork.useGpuFBufferOps && (simulationWork.useGpuUpdate && !vsite) - && do_per_step(step, ir->nstfout)) - { - stateGpu->copyForcesFromGpu(f.view().force(), AtomLocality::Local); - stateGpu->waitForcesReadyOnHost(AtomLocality::Local); - } - /* Now we have the energies and forces corresponding to the - * coordinates at time t. We must output all of this before - * the update. - */ - do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, ir, state, state_global, - observablesHistory, top_global, fr, outf, energyOutput, ekind, - f.view().force(), checkpointHandler->isCheckpointingStep(), - bRerunMD, bLastStep, mdrunOptions.writeConfout, bSumEkinhOld); - /* Check if IMD step and do IMD communication, if bIMD is TRUE. */ - bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t); - - /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */ - if (startingBehavior != StartingBehavior::NewSimulation && bFirstStep - && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))) - { - copy_mat(state->svir_prev, shake_vir); - copy_mat(state->fvir_prev, force_vir); - } - - stopHandler->setSignal(); - resetHandler->setSignal(walltime_accounting); - - if (bGStat || !PAR(cr)) - { - /* In parallel we only have to check for checkpointing in steps - * where we do global communication, - * otherwise the other nodes don't know. - */ - checkpointHandler->setSignal(walltime_accounting); - } - - /* ######### START SECOND UPDATE STEP ################# */ - - /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen - controlled in preprocessing */ - - if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */ - { - gmx_bool bIfRandomize; - bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr); - /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */ - if (constr && bIfRandomize) - { - constrain_velocities(constr, do_log, do_ene, step, state, nullptr, false, nullptr); - } - } - /* Box is changed in update() when we do pressure coupling, - * but we should still use the old box for energy corrections and when - * writing it to the energy file, so it matches the trajectory files for - * the same timestep above. Make a copy in a separate array. - */ - copy_mat(state->box, lastbox); - - dvdl_constr = 0; - - if (!useGpuForUpdate) - { - wallcycle_start(wcycle, ewcUPDATE); - } - /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */ - if (bTrotter) - { - trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3); - /* We can only do Berendsen coupling after we have summed - * the kinetic energy or virial. Since the happens - * in global_state after update, we should only do it at - * step % nstlist = 1 with bGStatEveryStep=FALSE. - */ - } - else - { - update_tcouple(step, ir, state, ekind, &MassQ, mdatoms); - update_pcouple_before_coordinates(fplog, step, ir, state, pressureCouplingMu, M, bInitStep); - } - - if (EI_VV(ir->eI)) - { - /* velocity half-step update */ - upd.update_coords(*ir, step, mdatoms, state, f.view().forceWithPadding(), fcdata, ekind, - M, etrtVELOCITY2, cr, constr != nullptr); - } - - /* Above, initialize just copies ekinh into ekin, - * it doesn't copy position (for VV), - * and entire integrator for MD. - */ - - if (ir->eI == eiVVAK) - { - cbuf.resize(state->x.size()); - std::copy(state->x.begin(), state->x.end(), cbuf.begin()); - } - - /* With leap-frog type integrators we compute the kinetic energy - * at a whole time step as the average of the half-time step kinetic - * energies of two subsequent steps. Therefore we need to compute the - * half step kinetic energy also if we need energies at the next step. - */ - const bool needHalfStepKineticEnergy = - (!EI_VV(ir->eI) && (do_per_step(step + 1, nstglobalcomm) || step_rel + 1 == ir->nsteps)); - - // Parrinello-Rahman requires the pressure to be availible before the update to compute - // the velocity scaling matrix. Hence, it runs one step after the nstpcouple step. - const bool doParrinelloRahman = (ir->epc == epcPARRINELLORAHMAN - && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple)); - - if (useGpuForUpdate) - { - if (bNS && (bFirstStep || DOMAINDECOMP(cr))) - { - integrator->set(stateGpu->getCoordinates(), stateGpu->getVelocities(), - stateGpu->getForces(), top.idef, *mdatoms, ekind->ngtc); - - // Copy data to the GPU after buffers might have being reinitialized - stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local); - stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local); - } - - if (simulationWork.useGpuPme && !runScheduleWork->simulationWork.useGpuPmePpCommunication - && !thisRankHasDuty(cr, DUTY_PME)) - { - // The PME forces were recieved to the host, so have to be copied - stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::All); - } - else if (!runScheduleWork->stepWork.useGpuFBufferOps) - { - // The buffer ops were not offloaded this step, so the forces are on the - // host and have to be copied - stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::Local); - } - - const bool doTemperatureScaling = - (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple)); - - // This applies Leap-Frog, LINCS and SETTLE in succession - integrator->integrate(stateGpu->getForcesReadyOnDeviceEvent( - AtomLocality::Local, runScheduleWork->stepWork.useGpuFBufferOps), - ir->delta_t, true, bCalcVir, shake_vir, doTemperatureScaling, - ekind->tcstat, doParrinelloRahman, ir->nstpcouple * ir->delta_t, M); - - // Copy velocities D2H after update if: - // - Globals are computed this step (includes the energy output steps). - // - Temperature is needed for the next step. - if (bGStat || needHalfStepKineticEnergy) - { - stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local); - stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local); - } - } - else - { - /* With multiple time stepping we need to do an additional normal - * update step to obtain the virial, as the actual MTS integration - * using an acceleration where the slow forces are multiplied by mtsFactor. - * Using that acceleration would result in a virial with the slow - * force contribution would be a factor mtsFactor too large. - */ - if (fr->useMts && bCalcVir && constr != nullptr) - { - upd.update_for_constraint_virial(*ir, *mdatoms, *state, f.view().forceWithPadding(), *ekind); - - constrain_coordinates(constr, do_log, do_ene, step, state, - upd.xp()->arrayRefWithPadding(), &dvdl_constr, bCalcVir, shake_vir); - } - - ArrayRefWithPadding forceCombined = - (fr->useMts && step % ir->mtsLevels[1].stepFactor == 0) - ? f.view().forceMtsCombinedWithPadding() - : f.view().forceWithPadding(); - upd.update_coords(*ir, step, mdatoms, state, forceCombined, fcdata, ekind, M, - etrtPOSITION, cr, constr != nullptr); - - wallcycle_stop(wcycle, ewcUPDATE); - - constrain_coordinates(constr, do_log, do_ene, step, state, upd.xp()->arrayRefWithPadding(), - &dvdl_constr, bCalcVir && !fr->useMts, shake_vir); - - upd.update_sd_second_half(*ir, step, &dvdl_constr, mdatoms, state, cr, nrnb, wcycle, - constr, do_log, do_ene); - upd.finish_update(*ir, mdatoms, state, wcycle, constr != nullptr); - } - - if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM) - { - updatePrevStepPullCom(pull_work, state); - } - - if (ir->eI == eiVVAK) - { - /* erase F_EKIN and F_TEMP here? */ - /* just compute the kinetic energy at the half step to perform a trotter step */ - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, wcycle, enerd, - force_vir, shake_vir, total_vir, pres, constr, &nullSignaller, lastbox, - nullptr, &bSumEkinhOld, (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE); - wallcycle_start(wcycle, ewcUPDATE); - trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4); - /* now we know the scaling, we can compute the positions again */ - std::copy(cbuf.begin(), cbuf.end(), state->x.begin()); - - upd.update_coords(*ir, step, mdatoms, state, f.view().forceWithPadding(), fcdata, ekind, - M, etrtPOSITION, cr, constr != nullptr); - wallcycle_stop(wcycle, ewcUPDATE); - - /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */ - /* are the small terms in the shake_vir here due - * to numerical errors, or are they important - * physically? I'm thinking they are just errors, but not completely sure. - * For now, will call without actually constraining, constr=NULL*/ - upd.finish_update(*ir, mdatoms, state, wcycle, false); - } - if (EI_VV(ir->eI)) - { - /* this factor or 2 correction is necessary - because half of the constraint force is removed - in the vv step, so we have to double it. See - the Issue #1255. It is not yet clear - if the factor of 2 is exact, or just a very - good approximation, and this will be - investigated. The next step is to see if this - can be done adding a dhdl contribution from the - rattle step, but this is somewhat more - complicated with the current code. Will be - investigated, hopefully for 4.6.3. However, - this current solution is much better than - having it completely wrong. - */ - enerd->term[F_DVDL_CONSTR] += 2 * dvdl_constr; - } - else - { - enerd->term[F_DVDL_CONSTR] += dvdl_constr; - } - - if (vsite != nullptr) - { - wallcycle_start(wcycle, ewcVSITECONSTR); - vsite->construct(state->x, ir->delta_t, state->v, state->box); - wallcycle_stop(wcycle, ewcVSITECONSTR); - } - - /* ############## IF NOT VV, Calculate globals HERE ############ */ - /* With Leap-Frog we can skip compute_globals at - * non-communication steps, but we need to calculate - * the kinetic energy one step before communication. - */ - { - // Organize to do inter-simulation signalling on steps if - // and when algorithms require it. - const bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm)); - - if (bGStat || needHalfStepKineticEnergy || doInterSimSignal) - { - // Copy coordinates when needed to stop the CM motion. - if (useGpuForUpdate && !EI_VV(ir->eI) && bStopCM) - { - stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local); - stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); - } - // Since we're already communicating at this step, we - // can propagate intra-simulation signals. Note that - // check_nstglobalcomm has the responsibility for - // choosing the value of nstglobalcomm that is one way - // bGStat becomes true, so we can't get into a - // situation where e.g. checkpointing can't be - // signalled. - bool doIntraSimSignal = true; - SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal); - - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, - wcycle, enerd, force_vir, shake_vir, total_vir, pres, constr, - &signaller, lastbox, &totalNumberOfBondedInteractions, &bSumEkinhOld, - (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0) - | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0) - | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) - | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT - | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS - : 0)); - checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, - top_global, &top, makeConstArrayRef(state->x), - state->box, &shouldCheckNumberOfBondedInteractions); - if (!EI_VV(ir->eI) && bStopCM) - { - process_and_stopcm_grp(fplog, &vcm, *mdatoms, makeArrayRef(state->x), - makeArrayRef(state->v)); - inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr); - - // TODO: The special case of removing CM motion should be dealt more gracefully - if (useGpuForUpdate) - { - stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local); - // Here we block until the H2D copy completes because event sync with the - // force kernels that use the coordinates on the next steps is not implemented - // (not because of a race on state->x being modified on the CPU while H2D is in progress). - stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local); - // If the COM removal changed the velocities on the CPU, this has to be accounted for. - if (vcm.mode != ecmNO) - { - stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local); - } - } - } - } - } - - /* ############# END CALC EKIN AND PRESSURE ################# */ - - /* Note: this is OK, but there are some numerical precision issues with using the convergence of - the virial that should probably be addressed eventually. state->veta has better properies, - but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could - generate the new shake_vir, but test the veta value for convergence. This will take some thought. */ - - if (ir->efep != efepNO && !EI_VV(ir->eI)) - { - /* Sum up the foreign energy and dK/dl terms for md and sd. - Currently done every step so that dH/dl is correct in the .edr */ - accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals); - } - - update_pcouple_after_coordinates(fplog, step, ir, mdatoms, pres, force_vir, shake_vir, - pressureCouplingMu, state, nrnb, upd.deform(), !useGpuForUpdate); - - const bool doBerendsenPressureCoupling = - (inputrec->epc == epcBERENDSEN && do_per_step(step, inputrec->nstpcouple)); - const bool doCRescalePressureCoupling = - (inputrec->epc == epcCRESCALE && do_per_step(step, inputrec->nstpcouple)); - if (useGpuForUpdate - && (doBerendsenPressureCoupling || doCRescalePressureCoupling || doParrinelloRahman)) - { - integrator->scaleCoordinates(pressureCouplingMu); - if (doCRescalePressureCoupling) - { - matrix pressureCouplingInvMu; - gmx::invertBoxMatrix(pressureCouplingMu, pressureCouplingInvMu); - integrator->scaleVelocities(pressureCouplingInvMu); - } - integrator->setPbc(PbcType::Xyz, state->box); - } - - /* ################# END UPDATE STEP 2 ################# */ - /* #### We now have r(t+dt) and v(t+dt/2) ############# */ - - /* The coordinates (x) were unshifted in update */ - if (!bGStat) - { - /* We will not sum ekinh_old, - * so signal that we still have to do it. - */ - bSumEkinhOld = TRUE; - } - - if (bCalcEner) - { - /* ######### BEGIN PREPARING EDR OUTPUT ########### */ - - /* use the directly determined last velocity, not actually the averaged half steps */ - if (bTrotter && ir->eI == eiVV) - { - enerd->term[F_EKIN] = last_ekin; - } - enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN]; - - if (integratorHasConservedEnergyQuantity(ir)) - { - if (EI_VV(ir->eI)) - { - enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity; - } - else - { - enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ); - } - } - /* ######### END PREPARING EDR OUTPUT ########### */ - } - - /* Output stuff */ - if (MASTER(cr)) - { - if (fplog && do_log && bDoExpanded) - { - /* only needed if doing expanded ensemble */ - PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, - ir->bSimTemp ? ir->simtempvals : nullptr, - state_global->dfhist, state->fep_state, ir->nstlog, step); - } - if (bCalcEner) - { - energyOutput.addDataAtEnergyStep( - bDoDHDL, bCalcEnerStep, t, mdatoms->tmass, enerd, ir->fepvals, - ir->expandedvals, lastbox, - PTCouplingArrays{ state->boxv, state->nosehoover_xi, state->nosehoover_vxi, - state->nhpres_xi, state->nhpres_vxi }, - state->fep_state, shake_vir, force_vir, total_vir, pres, ekind, mu_tot, constr); - } - else - { - energyOutput.recordNonEnergyStep(); - } - - gmx_bool do_dr = do_per_step(step, ir->nstdisreout); - gmx_bool do_or = do_per_step(step, ir->nstorireout); - - if (doSimulatedAnnealing) - { - gmx::EnergyOutput::printAnnealingTemperatures(do_log ? fplog : nullptr, groups, - &(ir->opts)); - } - if (do_log || do_ene || do_dr || do_or) - { - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or, - do_log ? fplog : nullptr, step, t, - fr->fcdata.get(), awh.get()); - } - if (do_log && ir->bDoAwh && awh->hasFepLambdaDimension()) - { - const bool isInitialOutput = false; - printLambdaStateToLog(fplog, state->lambda, isInitialOutput); - } - - if (ir->bPull) - { - pull_print_output(pull_work, step, t); - } - - if (do_per_step(step, ir->nstlog)) - { - if (fflush(fplog) != 0) - { - gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?"); - } - } - } - if (bDoExpanded) - { - /* Have to do this part _after_ outputting the logfile and the edr file */ - /* Gets written into the state at the beginning of next loop*/ - state->fep_state = lamnew; - } - else if (ir->bDoAwh && awh->needForeignEnergyDifferences(step)) - { - state->fep_state = awh->fepLambdaState(); - } - /* Print the remaining wall clock time for the run */ - if (isMasterSimMasterRank(ms, MASTER(cr)) && (do_verbose || gmx_got_usr_signal()) && !bPMETunePrinting) - { - if (shellfc) - { - fprintf(stderr, "\n"); - } - print_time(stderr, walltime_accounting, step, ir, cr); - } - - /* Ion/water position swapping. - * Not done in last step since trajectory writing happens before this call - * in the MD loop and exchanges would be lost anyway. */ - bNeedRepartition = FALSE; - if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && do_per_step(step, ir->swap->nstswap)) - { - bNeedRepartition = - do_swapcoords(cr, step, t, ir, swap, wcycle, as_rvec_array(state->x.data()), - state->box, MASTER(cr) && mdrunOptions.verbose, bRerunMD); - - if (bNeedRepartition && DOMAINDECOMP(cr)) - { - dd_collect_state(cr->dd, state, state_global); - } - } - - /* Replica exchange */ - bExchanged = FALSE; - if (bDoReplEx) - { - bExchanged = replica_exchange(fplog, cr, ms, repl_ex, state_global, enerd, state, step, t); - } - - if ((bExchanged || bNeedRepartition) && DOMAINDECOMP(cr)) - { - dd_partition_system(fplog, mdlog, step, cr, TRUE, 1, state_global, *top_global, ir, - imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr, - nrnb, wcycle, FALSE); - shouldCheckNumberOfBondedInteractions = true; - upd.setNumAtoms(state->natoms); - } - - bFirstStep = FALSE; - bInitStep = FALSE; - - /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */ - /* With all integrators, except VV, we need to retain the pressure - * at the current step for coupling at the next step. - */ - if ((state->flags & (1U << estPRES_PREV)) - && (bGStatEveryStep || (ir->nstpcouple > 0 && step % ir->nstpcouple == 0))) - { - /* Store the pressure in t_state for pressure coupling - * at the next MD step. - */ - copy_mat(pres, state->pres_prev); - } - - /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */ - - if ((membed != nullptr) && (!bLastStep)) - { - rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data())); - } - - cycles = wallcycle_stop(wcycle, ewcSTEP); - if (DOMAINDECOMP(cr) && wcycle) - { - dd_cycles_add(cr->dd, cycles, ddCyclStep); - } - - /* increase the MD step number */ - step++; - step_rel++; - -#if GMX_FAHCORE - if (MASTER(cr)) - { - fcReportProgress(ir->nsteps + ir->init_step, step); - } -#endif - - resetHandler->resetCounters(step, step_rel, mdlog, fplog, cr, fr->nbv.get(), nrnb, - fr->pmedata, pme_loadbal, wcycle, walltime_accounting); - - /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */ - imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner); - } - /* End of main MD loop */ - - /* Closing TNG files can include compressing data. Therefore it is good to do that - * before stopping the time measurements. */ - mdoutf_tng_close(outf); - - /* Stop measuring walltime */ - walltime_accounting_end_time(walltime_accounting); - - if (!thisRankHasDuty(cr, DUTY_PME)) - { - /* Tell the PME only node to finish */ - gmx_pme_send_finish(cr); - } - - if (MASTER(cr)) - { - if (ir->nstcalcenergy > 0) - { - energyOutput.printEnergyConservation(fplog, ir->simulation_part, EI_MD(ir->eI)); - - gmx::EnergyOutput::printAnnealingTemperatures(fplog, groups, &(ir->opts)); - energyOutput.printAverages(fplog, groups); - } - } - done_mdoutf(outf); - - if (bPMETune) - { - pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu()); - } - - done_shellfc(fplog, shellfc, step_rel); - - if (useReplicaExchange && MASTER(cr)) - { - print_replica_exchange_statistics(fplog, repl_ex); - } - - walltime_accounting_set_nsteps_done(walltime_accounting, step_rel); - - global_stat_destroy(gstat); -} diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/minimize.cpp b/patches/gromacs-2021.7.diff/src/gromacs/mdrun/minimize.cpp deleted file mode 100644 index 0d628b98d4..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/minimize.cpp +++ /dev/null @@ -1,2958 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017 The GROMACS development team. - * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/*! \internal \file - * - * \brief This file defines integrators for energy minimization - * - * \author Berk Hess - * \author Erik Lindahl - * \ingroup module_mdrun - */ -#include "gmxpre.h" - -#include "config.h" - -#include -#include -#include - -#include -#include -#include - -#include "gromacs/commandline/filenm.h" -#include "gromacs/domdec/collect.h" -#include "gromacs/domdec/dlbtiming.h" -#include "gromacs/domdec/domdec.h" -#include "gromacs/domdec/domdec_struct.h" -#include "gromacs/domdec/mdsetup.h" -#include "gromacs/domdec/partition.h" -#include "gromacs/ewald/pme_pp.h" -#include "gromacs/fileio/confio.h" -#include "gromacs/fileio/mtxio.h" -#include "gromacs/gmxlib/network.h" -#include "gromacs/gmxlib/nrnb.h" -#include "gromacs/imd/imd.h" -#include "gromacs/linearalgebra/sparsematrix.h" -#include "gromacs/listed_forces/listed_forces.h" -#include "gromacs/math/functions.h" -#include "gromacs/math/vec.h" -#include "gromacs/mdlib/constr.h" -#include "gromacs/mdlib/coupling.h" -#include "gromacs/mdlib/dispersioncorrection.h" -#include "gromacs/mdlib/ebin.h" -#include "gromacs/mdlib/enerdata_utils.h" -#include "gromacs/mdlib/energyoutput.h" -#include "gromacs/mdlib/force.h" -#include "gromacs/mdlib/force_flags.h" -#include "gromacs/mdlib/forcerec.h" -#include "gromacs/mdlib/gmx_omp_nthreads.h" -#include "gromacs/mdlib/md_support.h" -#include "gromacs/mdlib/mdatoms.h" -#include "gromacs/mdlib/stat.h" -#include "gromacs/mdlib/tgroup.h" -#include "gromacs/mdlib/trajectory_writing.h" -#include "gromacs/mdlib/update.h" -#include "gromacs/mdlib/vsite.h" -#include "gromacs/mdrunutility/handlerestart.h" -#include "gromacs/mdrunutility/multisim.h" /*PLUMED*/ -#include "gromacs/mdrunutility/printtime.h" -#include "gromacs/mdtypes/checkpointdata.h" -#include "gromacs/mdtypes/commrec.h" -#include "gromacs/mdtypes/forcebuffers.h" -#include "gromacs/mdtypes/forcerec.h" -#include "gromacs/mdtypes/inputrec.h" -#include "gromacs/mdtypes/interaction_const.h" -#include "gromacs/mdtypes/md_enums.h" -#include "gromacs/mdtypes/mdatom.h" -#include "gromacs/mdtypes/mdrunoptions.h" -#include "gromacs/mdtypes/state.h" -#include "gromacs/pbcutil/pbc.h" -#include "gromacs/timing/wallcycle.h" -#include "gromacs/timing/walltime_accounting.h" -#include "gromacs/topology/mtop_util.h" -#include "gromacs/topology/topology.h" -#include "gromacs/utility/cstringutil.h" -#include "gromacs/utility/exceptions.h" -#include "gromacs/utility/fatalerror.h" -#include "gromacs/utility/logger.h" -#include "gromacs/utility/smalloc.h" - -#include "legacysimulator.h" -#include "shellfc.h" - -using gmx::ArrayRef; -using gmx::MdrunScheduleWorkload; -using gmx::RVec; -using gmx::VirtualSitesHandler; - -/* PLUMED */ -#include "../../../Plumed.h" -extern int plumedswitch; -extern plumed plumedmain; -/* END PLUMED */ - -//! Utility structure for manipulating states during EM -typedef struct em_state -{ - //! Copy of the global state - t_state s; - //! Force array - gmx::ForceBuffers f; - //! Potential energy - real epot; - //! Norm of the force - real fnorm; - //! Maximum force - real fmax; - //! Direction - int a_fmax; -} em_state_t; - -//! Print the EM starting conditions -static void print_em_start(FILE* fplog, - const t_commrec* cr, - gmx_walltime_accounting_t walltime_accounting, - gmx_wallcycle_t wcycle, - const char* name) -{ - walltime_accounting_start_time(walltime_accounting); - wallcycle_start(wcycle, ewcRUN); - print_start(fplog, cr, walltime_accounting, name); -} - -//! Stop counting time for EM -static void em_time_end(gmx_walltime_accounting_t walltime_accounting, gmx_wallcycle_t wcycle) -{ - wallcycle_stop(wcycle, ewcRUN); - - walltime_accounting_end_time(walltime_accounting); -} - -//! Printing a log file and console header -static void sp_header(FILE* out, const char* minimizer, real ftol, int nsteps) -{ - fprintf(out, "\n"); - fprintf(out, "%s:\n", minimizer); - fprintf(out, " Tolerance (Fmax) = %12.5e\n", ftol); - fprintf(out, " Number of steps = %12d\n", nsteps); -} - -//! Print warning message -static void warn_step(FILE* fp, real ftol, real fmax, gmx_bool bLastStep, gmx_bool bConstrain) -{ - constexpr bool realIsDouble = GMX_DOUBLE; - char buffer[2048]; - - if (!std::isfinite(fmax)) - { - sprintf(buffer, - "\nEnergy minimization has stopped because the force " - "on at least one atom is not finite. This usually means " - "atoms are overlapping. Modify the input coordinates to " - "remove atom overlap or use soft-core potentials with " - "the free energy code to avoid infinite forces.\n%s", - !realIsDouble ? "You could also be lucky that switching to double precision " - "is sufficient to obtain finite forces.\n" - : ""); - } - else if (bLastStep) - { - sprintf(buffer, - "\nEnergy minimization reached the maximum number " - "of steps before the forces reached the requested " - "precision Fmax < %g.\n", - ftol); - } - else - { - sprintf(buffer, - "\nEnergy minimization has stopped, but the forces have " - "not converged to the requested precision Fmax < %g (which " - "may not be possible for your system). It stopped " - "because the algorithm tried to make a new step whose size " - "was too small, or there was no change in the energy since " - "last step. Either way, we regard the minimization as " - "converged to within the available machine precision, " - "given your starting configuration and EM parameters.\n%s%s", - ftol, - !realIsDouble ? "\nDouble precision normally gives you higher accuracy, but " - "this is often not needed for preparing to run molecular " - "dynamics.\n" - : "", - bConstrain ? "You might need to increase your constraint accuracy, or turn\n" - "off constraints altogether (set constraints = none in mdp file)\n" - : ""); - } - - fputs(wrap_lines(buffer, 78, 0, FALSE), stderr); - fputs(wrap_lines(buffer, 78, 0, FALSE), fp); -} - -//! Print message about convergence of the EM -static void print_converged(FILE* fp, - const char* alg, - real ftol, - int64_t count, - gmx_bool bDone, - int64_t nsteps, - const em_state_t* ems, - double sqrtNumAtoms) -{ - char buf[STEPSTRSIZE]; - - if (bDone) - { - fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n", alg, ftol, gmx_step_str(count, buf)); - } - else if (count < nsteps) - { - fprintf(fp, - "\n%s converged to machine precision in %s steps,\n" - "but did not reach the requested Fmax < %g.\n", - alg, gmx_step_str(count, buf), ftol); - } - else - { - fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n", alg, ftol, - gmx_step_str(count, buf)); - } - -#if GMX_DOUBLE - fprintf(fp, "Potential Energy = %21.14e\n", ems->epot); - fprintf(fp, "Maximum force = %21.14e on atom %d\n", ems->fmax, ems->a_fmax + 1); - fprintf(fp, "Norm of force = %21.14e\n", ems->fnorm / sqrtNumAtoms); -#else - fprintf(fp, "Potential Energy = %14.7e\n", ems->epot); - fprintf(fp, "Maximum force = %14.7e on atom %d\n", ems->fmax, ems->a_fmax + 1); - fprintf(fp, "Norm of force = %14.7e\n", ems->fnorm / sqrtNumAtoms); -#endif -} - -//! Compute the norm and max of the force array in parallel -static void get_f_norm_max(const t_commrec* cr, - t_grpopts* opts, - t_mdatoms* mdatoms, - gmx::ArrayRef f, - real* fnorm, - real* fmax, - int* a_fmax) -{ - double fnorm2, *sum; - real fmax2, fam; - int la_max, a_max, start, end, i, m, gf; - - /* This routine finds the largest force and returns it. - * On parallel machines the global max is taken. - */ - fnorm2 = 0; - fmax2 = 0; - la_max = -1; - start = 0; - end = mdatoms->homenr; - if (mdatoms->cFREEZE) - { - for (i = start; i < end; i++) - { - gf = mdatoms->cFREEZE[i]; - fam = 0; - for (m = 0; m < DIM; m++) - { - if (!opts->nFreeze[gf][m]) - { - fam += gmx::square(f[i][m]); - } - } - fnorm2 += fam; - if (fam > fmax2) - { - fmax2 = fam; - la_max = i; - } - } - } - else - { - for (i = start; i < end; i++) - { - fam = norm2(f[i]); - fnorm2 += fam; - if (fam > fmax2) - { - fmax2 = fam; - la_max = i; - } - } - } - - if (la_max >= 0 && DOMAINDECOMP(cr)) - { - a_max = cr->dd->globalAtomIndices[la_max]; - } - else - { - a_max = la_max; - } - if (PAR(cr)) - { - snew(sum, 2 * cr->nnodes + 1); - sum[2 * cr->nodeid] = fmax2; - sum[2 * cr->nodeid + 1] = a_max; - sum[2 * cr->nnodes] = fnorm2; - gmx_sumd(2 * cr->nnodes + 1, sum, cr); - fnorm2 = sum[2 * cr->nnodes]; - /* Determine the global maximum */ - for (i = 0; i < cr->nnodes; i++) - { - if (sum[2 * i] > fmax2) - { - fmax2 = sum[2 * i]; - a_max = gmx::roundToInt(sum[2 * i + 1]); - } - } - sfree(sum); - } - - if (fnorm) - { - *fnorm = sqrt(fnorm2); - } - if (fmax) - { - *fmax = sqrt(fmax2); - } - if (a_fmax) - { - *a_fmax = a_max; - } -} - -//! Compute the norm of the force -static void get_state_f_norm_max(const t_commrec* cr, t_grpopts* opts, t_mdatoms* mdatoms, em_state_t* ems) -{ - get_f_norm_max(cr, opts, mdatoms, ems->f.view().force(), &ems->fnorm, &ems->fmax, &ems->a_fmax); -} - -//! Initialize the energy minimization -static void init_em(FILE* fplog, - const gmx::MDLogger& mdlog, - const char* title, - const t_commrec* cr, - const gmx_multisim_t *ms, /* PLUMED */ - t_inputrec* ir, - gmx::ImdSession* imdSession, - pull_t* pull_work, - t_state* state_global, - const gmx_mtop_t* top_global, - em_state_t* ems, - gmx_localtop_t* top, - t_nrnb* nrnb, - t_forcerec* fr, - gmx::MDAtoms* mdAtoms, - gmx_global_stat_t* gstat, - VirtualSitesHandler* vsite, - gmx::Constraints* constr, - gmx_shellfc_t** shellfc) -{ - real dvdl_constr; - - if (fplog) - { - fprintf(fplog, "Initiating %s\n", title); - } - - if (MASTER(cr)) - { - state_global->ngtc = 0; - } - int* fep_state = MASTER(cr) ? &state_global->fep_state : nullptr; - gmx::ArrayRef lambda = MASTER(cr) ? state_global->lambda : gmx::ArrayRef(); - initialize_lambdas(fplog, *ir, MASTER(cr), fep_state, lambda); - - if (ir->eI == eiNM) - { - GMX_ASSERT(shellfc != nullptr, "With NM we always support shells"); - - *shellfc = - init_shell_flexcon(stdout, top_global, constr ? constr->numFlexibleConstraints() : 0, - ir->nstcalcenergy, DOMAINDECOMP(cr), thisRankHasDuty(cr, DUTY_PME)); - } - else - { - GMX_ASSERT(EI_ENERGY_MINIMIZATION(ir->eI), - "This else currently only handles energy minimizers, consider if your algorithm " - "needs shell/flexible-constraint support"); - - /* With energy minimization, shells and flexible constraints are - * automatically minimized when treated like normal DOFS. - */ - if (shellfc != nullptr) - { - *shellfc = nullptr; - } - } - - if (DOMAINDECOMP(cr)) - { - dd_init_local_state(cr->dd, state_global, &ems->s); - - /* Distribute the charge groups over the nodes from the master node */ - dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, state_global, *top_global, ir, - imdSession, pull_work, &ems->s, &ems->f, mdAtoms, top, fr, vsite, - constr, nrnb, nullptr, FALSE); - dd_store_state(cr->dd, &ems->s); - } - else - { - state_change_natoms(state_global, state_global->natoms); - /* Just copy the state */ - ems->s = *state_global; - state_change_natoms(&ems->s, ems->s.natoms); - - mdAlgorithmsSetupAtomData(cr, ir, *top_global, top, fr, &ems->f, mdAtoms, constr, vsite, - shellfc ? *shellfc : nullptr); - } - - update_mdatoms(mdAtoms->mdatoms(), ems->s.lambda[efptMASS]); - - if (constr) - { - // TODO how should this cross-module support dependency be managed? - if (ir->eConstrAlg == econtSHAKE && gmx_mtop_ftype_count(top_global, F_CONSTR) > 0) - { - gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n", - econstr_names[econtSHAKE], econstr_names[econtLINCS]); - } - - if (!ir->bContinuation) - { - /* Constrain the starting coordinates */ - bool needsLogging = true; - bool computeEnergy = true; - bool computeVirial = false; - dvdl_constr = 0; - constr->apply(needsLogging, computeEnergy, -1, 0, 1.0, ems->s.x.arrayRefWithPadding(), - ems->s.x.arrayRefWithPadding(), ArrayRef(), ems->s.box, - ems->s.lambda[efptFEP], &dvdl_constr, gmx::ArrayRefWithPadding(), - computeVirial, nullptr, gmx::ConstraintVariable::Positions); - } - } - - if (PAR(cr)) - { - *gstat = global_stat_init(ir); - } - else - { - *gstat = nullptr; - } - - calc_shifts(ems->s.box, fr->shift_vec); - - /* PLUMED */ - if(plumedswitch){ - if(ms && ms->numSimulations_>1) { - if(MASTER(cr)) plumed_cmd(plumedmain,"GREX setMPIIntercomm",&ms->mastersComm_); - if(PAR(cr)){ - if(DOMAINDECOMP(cr)) { - plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all); - }else{ - plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim); - } - } - plumed_cmd(plumedmain,"GREX init",nullptr); - } - if(PAR(cr)){ - if(DOMAINDECOMP(cr)) { - plumed_cmd(plumedmain,"setMPIComm",&cr->dd->mpi_comm_all); - }else{ - plumed_cmd(plumedmain,"setMPIComm",&cr->mpi_comm_mysim); - } - } - plumed_cmd(plumedmain,"setNatoms",&top_global->natoms); - plumed_cmd(plumedmain,"setMDEngine","gromacs"); - plumed_cmd(plumedmain,"setLog",fplog); - real real_delta_t; - real_delta_t=ir->delta_t; - plumed_cmd(plumedmain,"setTimestep",&real_delta_t); - plumed_cmd(plumedmain,"init",nullptr); - - if(PAR(cr)){ - if(DOMAINDECOMP(cr)) { - int nat_home = dd_numHomeAtoms(*cr->dd); - plumed_cmd(plumedmain,"setAtomsNlocal",&nat_home); - plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data()); - - } - } - } - /* END PLUMED */ -} - -//! Finalize the minimization -static void finish_em(const t_commrec* cr, - gmx_mdoutf_t outf, - gmx_walltime_accounting_t walltime_accounting, - gmx_wallcycle_t wcycle) -{ - if (!thisRankHasDuty(cr, DUTY_PME)) - { - /* Tell the PME only node to finish */ - gmx_pme_send_finish(cr); - } - - done_mdoutf(outf); - - em_time_end(walltime_accounting, wcycle); -} - -//! Swap two different EM states during minimization -static void swap_em_state(em_state_t** ems1, em_state_t** ems2) -{ - em_state_t* tmp; - - tmp = *ems1; - *ems1 = *ems2; - *ems2 = tmp; -} - -//! Save the EM trajectory -static void write_em_traj(FILE* fplog, - const t_commrec* cr, - gmx_mdoutf_t outf, - gmx_bool bX, - gmx_bool bF, - const char* confout, - const gmx_mtop_t* top_global, - t_inputrec* ir, - int64_t step, - em_state_t* state, - t_state* state_global, - ObservablesHistory* observablesHistory) -{ - int mdof_flags = 0; - - if (bX) - { - mdof_flags |= MDOF_X; - } - if (bF) - { - mdof_flags |= MDOF_F; - } - - /* If we want IMD output, set appropriate MDOF flag */ - if (ir->bIMD) - { - mdof_flags |= MDOF_IMD; - } - - gmx::WriteCheckpointDataHolder checkpointDataHolder; - mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global->natoms, step, - static_cast(step), &state->s, state_global, - observablesHistory, state->f.view().force(), &checkpointDataHolder); - - if (confout != nullptr) - { - if (DOMAINDECOMP(cr)) - { - /* If bX=true, x was collected to state_global in the call above */ - if (!bX) - { - auto globalXRef = MASTER(cr) ? state_global->x : gmx::ArrayRef(); - dd_collect_vec(cr->dd, state->s.ddp_count, state->s.ddp_count_cg_gl, state->s.cg_gl, - state->s.x, globalXRef); - } - } - else - { - /* Copy the local state pointer */ - state_global = &state->s; - } - - if (MASTER(cr)) - { - if (ir->pbcType != PbcType::No && !ir->bPeriodicMols && DOMAINDECOMP(cr)) - { - /* Make molecules whole only for confout writing */ - do_pbc_mtop(ir->pbcType, state->s.box, top_global, state_global->x.rvec_array()); - } - - write_sto_conf_mtop(confout, *top_global->name, top_global, - state_global->x.rvec_array(), nullptr, ir->pbcType, state->s.box); - } - } -} - -//! \brief Do one minimization step -// -// \returns true when the step succeeded, false when a constraint error occurred -static bool do_em_step(const t_commrec* cr, - t_inputrec* ir, - t_mdatoms* md, - em_state_t* ems1, - real a, - gmx::ArrayRefWithPadding force, - em_state_t* ems2, - gmx::Constraints* constr, - int64_t count) - -{ - t_state *s1, *s2; - int start, end; - real dvdl_constr; - int nthreads gmx_unused; - - bool validStep = true; - - s1 = &ems1->s; - s2 = &ems2->s; - - if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count) - { - gmx_incons("state mismatch in do_em_step"); - } - - s2->flags = s1->flags; - - if (s2->natoms != s1->natoms) - { - state_change_natoms(s2, s1->natoms); - ems2->f.resize(s2->natoms); - } - if (DOMAINDECOMP(cr) && s2->cg_gl.size() != s1->cg_gl.size()) - { - s2->cg_gl.resize(s1->cg_gl.size()); - } - - copy_mat(s1->box, s2->box); - /* Copy free energy state */ - s2->lambda = s1->lambda; - copy_mat(s1->box, s2->box); - - start = 0; - end = md->homenr; - - nthreads = gmx_omp_nthreads_get(emntUpdate); -#pragma omp parallel num_threads(nthreads) - { - const rvec* x1 = s1->x.rvec_array(); - rvec* x2 = s2->x.rvec_array(); - const rvec* f = as_rvec_array(force.unpaddedArrayRef().data()); - - int gf = 0; -#pragma omp for schedule(static) nowait - for (int i = start; i < end; i++) - { - try - { - if (md->cFREEZE) - { - gf = md->cFREEZE[i]; - } - for (int m = 0; m < DIM; m++) - { - if (ir->opts.nFreeze[gf][m]) - { - x2[i][m] = x1[i][m]; - } - else - { - x2[i][m] = x1[i][m] + a * f[i][m]; - } - } - } - GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR - } - - if (s2->flags & (1 << estCGP)) - { - /* Copy the CG p vector */ - const rvec* p1 = s1->cg_p.rvec_array(); - rvec* p2 = s2->cg_p.rvec_array(); -#pragma omp for schedule(static) nowait - for (int i = start; i < end; i++) - { - // Trivial OpenMP block that does not throw - copy_rvec(p1[i], p2[i]); - } - } - - if (DOMAINDECOMP(cr)) - { - /* OpenMP does not supported unsigned loop variables */ -#pragma omp for schedule(static) nowait - for (gmx::index i = 0; i < gmx::ssize(s2->cg_gl); i++) - { - s2->cg_gl[i] = s1->cg_gl[i]; - } - } - } - - if (DOMAINDECOMP(cr)) - { - s2->ddp_count = s1->ddp_count; - s2->ddp_count_cg_gl = s1->ddp_count_cg_gl; - } - - if (constr) - { - dvdl_constr = 0; - validStep = constr->apply( - TRUE, TRUE, count, 0, 1.0, s1->x.arrayRefWithPadding(), s2->x.arrayRefWithPadding(), - ArrayRef(), s2->box, s2->lambda[efptBONDED], &dvdl_constr, - gmx::ArrayRefWithPadding(), false, nullptr, gmx::ConstraintVariable::Positions); - - if (cr->nnodes > 1) - { - /* This global reduction will affect performance at high - * parallelization, but we can not really avoid it. - * But usually EM is not run at high parallelization. - */ - int reductionBuffer = static_cast(!validStep); - gmx_sumi(1, &reductionBuffer, cr); - validStep = (reductionBuffer == 0); - } - - // We should move this check to the different minimizers - if (!validStep && ir->eI != eiSteep) - { - gmx_fatal(FARGS, - "The coordinates could not be constrained. Minimizer '%s' can not handle " - "constraint failures, use minimizer '%s' before using '%s'.", - EI(ir->eI), EI(eiSteep), EI(ir->eI)); - } - } - - return validStep; -} - -//! Prepare EM for using domain decomposition parallellization -static void em_dd_partition_system(FILE* fplog, - const gmx::MDLogger& mdlog, - int step, - const t_commrec* cr, - const gmx_mtop_t* top_global, - t_inputrec* ir, - gmx::ImdSession* imdSession, - pull_t* pull_work, - em_state_t* ems, - gmx_localtop_t* top, - gmx::MDAtoms* mdAtoms, - t_forcerec* fr, - VirtualSitesHandler* vsite, - gmx::Constraints* constr, - t_nrnb* nrnb, - gmx_wallcycle_t wcycle) -{ - /* Repartition the domain decomposition */ - dd_partition_system(fplog, mdlog, step, cr, FALSE, 1, nullptr, *top_global, ir, imdSession, pull_work, - &ems->s, &ems->f, mdAtoms, top, fr, vsite, constr, nrnb, wcycle, FALSE); - dd_store_state(cr->dd, &ems->s); -} - -namespace -{ - -/*! \brief Class to handle the work of setting and doing an energy evaluation. - * - * This class is a mere aggregate of parameters to pass to evaluate an - * energy, so that future changes to names and types of them consume - * less time when refactoring other code. - * - * Aggregate initialization is used, for which the chief risk is that - * if a member is added at the end and not all initializer lists are - * updated, then the member will be value initialized, which will - * typically mean initialization to zero. - * - * Use a braced initializer list to construct one of these. */ -class EnergyEvaluator -{ -public: - /*! \brief Evaluates an energy on the state in \c ems. - * - * \todo In practice, the same objects mu_tot, vir, and pres - * are always passed to this function, so we would rather have - * them as data members. However, their C-array types are - * unsuited for aggregate initialization. When the types - * improve, the call signature of this method can be reduced. - */ - void run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst); - //! Handles logging (deprecated). - FILE* fplog; - //! Handles logging. - const gmx::MDLogger& mdlog; - //! Handles communication. - const t_commrec* cr; - //! Coordinates multi-simulations. - const gmx_multisim_t* ms; - //! Holds the simulation topology. - const gmx_mtop_t* top_global; - //! Holds the domain topology. - gmx_localtop_t* top; - //! User input options. - t_inputrec* inputrec; - //! The Interactive Molecular Dynamics session. - gmx::ImdSession* imdSession; - //! The pull work object. - pull_t* pull_work; - //! Manages flop accounting. - t_nrnb* nrnb; - //! Manages wall cycle accounting. - gmx_wallcycle_t wcycle; - //! Coordinates global reduction. - gmx_global_stat_t gstat; - //! Handles virtual sites. - VirtualSitesHandler* vsite; - //! Handles constraints. - gmx::Constraints* constr; - //! Per-atom data for this domain. - gmx::MDAtoms* mdAtoms; - //! Handles how to calculate the forces. - t_forcerec* fr; - //! Schedule of force-calculation work each step for this task. - MdrunScheduleWorkload* runScheduleWork; - //! Stores the computed energies. - gmx_enerdata_t* enerd; -}; - -void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst) -{ - real t; - gmx_bool bNS; - tensor force_vir, shake_vir, ekin; - real dvdl_constr; - real terminate = 0; - - /* Set the time to the initial time, the time does not change during EM */ - t = inputrec->init_t; - - if (bFirst || (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) - { - /* This is the first state or an old state used before the last ns */ - bNS = TRUE; - } - else - { - bNS = FALSE; - if (inputrec->nstlist > 0) - { - bNS = TRUE; - } - } - - if (vsite) - { - vsite->construct(ems->s.x, 1, {}, ems->s.box); - } - - if (DOMAINDECOMP(cr) && bNS) - { - /* Repartition the domain decomposition */ - em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, imdSession, pull_work, - ems, top, mdAtoms, fr, vsite, constr, nrnb, wcycle); - } - - /* Calc force & energy on new trial position */ - /* do_force always puts the charge groups in the box and shifts again - * We do not unshift, so molecules are always whole in congrad.c - */ - /* PLUMED */ - int plumedNeedsEnergy=0; - matrix plumed_vir; - if(plumedswitch){ - long int lstep=count; plumed_cmd(plumedmain,"setStepLong",&lstep); - plumed_cmd(plumedmain,"setPositions",&ems->s.x[0][0]); - plumed_cmd(plumedmain,"setMasses",&mdAtoms->mdatoms()->massT[0]); - plumed_cmd(plumedmain,"setCharges",&mdAtoms->mdatoms()->chargeA[0]); - plumed_cmd(plumedmain,"setBox",&ems->s.box[0][0]); - plumed_cmd(plumedmain,"prepareCalc",nullptr); - plumed_cmd(plumedmain,"setForces",&ems->f.view().force()[0][0]); - plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); - clear_mat(plumed_vir); - plumed_cmd(plumedmain,"setVirial",&plumed_vir[0][0]); - } - /* END PLUMED */ - - do_force(fplog, cr, ms, inputrec, nullptr, nullptr, imdSession, pull_work, count, nrnb, wcycle, - top, ems->s.box, ems->s.x.arrayRefWithPadding(), &ems->s.hist, &ems->f.view(), force_vir, - mdAtoms->mdatoms(), enerd, ems->s.lambda, fr, runScheduleWork, vsite, mu_tot, t, nullptr, - GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY - | (bNS ? GMX_FORCE_NS : 0), - DDBalanceRegionHandler(cr)); - - /* PLUMED */ - if(plumedswitch){ - if(plumedNeedsEnergy) { - msmul(force_vir,2.0,plumed_vir); - plumed_cmd(plumedmain,"setEnergy",&enerd->term[F_EPOT]); - plumed_cmd(plumedmain,"performCalc",nullptr); - msmul(plumed_vir,0.5,force_vir); - } else { - msmul(plumed_vir,0.5,plumed_vir); - m_add(force_vir,plumed_vir,force_vir); - } - } - /* END PLUMED */ - - /* Clear the unused shake virial and pressure */ - clear_mat(shake_vir); - clear_mat(pres); - - /* Communicate stuff when parallel */ - if (PAR(cr) && inputrec->eI != eiNM) - { - wallcycle_start(wcycle, ewcMoveE); - - global_stat(gstat, cr, enerd, force_vir, shake_vir, inputrec, nullptr, nullptr, nullptr, 1, - &terminate, nullptr, FALSE, CGLO_ENERGY | CGLO_PRESSURE | CGLO_CONSTRAINT); - - wallcycle_stop(wcycle, ewcMoveE); - } - - if (fr->dispersionCorrection) - { - /* Calculate long range corrections to pressure and energy */ - const DispersionCorrection::Correction correction = - fr->dispersionCorrection->calculate(ems->s.box, ems->s.lambda[efptVDW]); - - enerd->term[F_DISPCORR] = correction.energy; - enerd->term[F_EPOT] += correction.energy; - enerd->term[F_PRES] += correction.pressure; - enerd->term[F_DVDL] += correction.dvdl; - } - else - { - enerd->term[F_DISPCORR] = 0; - } - - ems->epot = enerd->term[F_EPOT]; - - if (constr) - { - /* Project out the constraint components of the force */ - bool needsLogging = false; - bool computeEnergy = false; - bool computeVirial = true; - dvdl_constr = 0; - auto f = ems->f.view().forceWithPadding(); - constr->apply(needsLogging, computeEnergy, count, 0, 1.0, ems->s.x.arrayRefWithPadding(), f, - f.unpaddedArrayRef(), ems->s.box, ems->s.lambda[efptBONDED], &dvdl_constr, - gmx::ArrayRefWithPadding(), computeVirial, shake_vir, - gmx::ConstraintVariable::ForceDispl); - enerd->term[F_DVDL_CONSTR] += dvdl_constr; - m_add(force_vir, shake_vir, vir); - } - else - { - copy_mat(force_vir, vir); - } - - clear_mat(ekin); - enerd->term[F_PRES] = calc_pres(fr->pbcType, inputrec->nwall, ems->s.box, ekin, vir, pres); - - if (inputrec->efep != efepNO) - { - accumulateKineticLambdaComponents(enerd, ems->s.lambda, *inputrec->fepvals); - } - - if (EI_ENERGY_MINIMIZATION(inputrec->eI)) - { - get_state_f_norm_max(cr, &(inputrec->opts), mdAtoms->mdatoms(), ems); - } -} - -} // namespace - -//! Parallel utility summing energies and forces -static double reorder_partsum(const t_commrec* cr, - t_grpopts* opts, - const gmx_mtop_t* top_global, - const em_state_t* s_min, - const em_state_t* s_b) -{ - if (debug) - { - fprintf(debug, "Doing reorder_partsum\n"); - } - - auto fm = s_min->f.view().force(); - auto fb = s_b->f.view().force(); - - /* Collect fm in a global vector fmg. - * This conflicts with the spirit of domain decomposition, - * but to fully optimize this a much more complicated algorithm is required. - */ - const int natoms = top_global->natoms; - rvec* fmg; - snew(fmg, natoms); - - gmx::ArrayRef indicesMin = s_min->s.cg_gl; - int i = 0; - for (int a : indicesMin) - { - copy_rvec(fm[i], fmg[a]); - i++; - } - gmx_sum(top_global->natoms * 3, fmg[0], cr); - - /* Now we will determine the part of the sum for the cgs in state s_b */ - gmx::ArrayRef indicesB = s_b->s.cg_gl; - - double partsum = 0; - i = 0; - int gf = 0; - gmx::ArrayRef grpnrFREEZE = - top_global->groups.groupNumbers[SimulationAtomGroupType::Freeze]; - for (int a : indicesB) - { - if (!grpnrFREEZE.empty()) - { - gf = grpnrFREEZE[i]; - } - for (int m = 0; m < DIM; m++) - { - if (!opts->nFreeze[gf][m]) - { - partsum += (fb[i][m] - fmg[a][m]) * fb[i][m]; - } - } - i++; - } - - sfree(fmg); - - return partsum; -} - -//! Print some stuff, like beta, whatever that means. -static real pr_beta(const t_commrec* cr, - t_grpopts* opts, - t_mdatoms* mdatoms, - const gmx_mtop_t* top_global, - const em_state_t* s_min, - const em_state_t* s_b) -{ - double sum; - - /* This is just the classical Polak-Ribiere calculation of beta; - * it looks a bit complicated since we take freeze groups into account, - * and might have to sum it in parallel runs. - */ - - if (!DOMAINDECOMP(cr) - || (s_min->s.ddp_count == cr->dd->ddp_count && s_b->s.ddp_count == cr->dd->ddp_count)) - { - auto fm = s_min->f.view().force(); - auto fb = s_b->f.view().force(); - sum = 0; - int gf = 0; - /* This part of code can be incorrect with DD, - * since the atom ordering in s_b and s_min might differ. - */ - for (int i = 0; i < mdatoms->homenr; i++) - { - if (mdatoms->cFREEZE) - { - gf = mdatoms->cFREEZE[i]; - } - for (int m = 0; m < DIM; m++) - { - if (!opts->nFreeze[gf][m]) - { - sum += (fb[i][m] - fm[i][m]) * fb[i][m]; - } - } - } - } - else - { - /* We need to reorder cgs while summing */ - sum = reorder_partsum(cr, opts, top_global, s_min, s_b); - } - if (PAR(cr)) - { - gmx_sumd(1, &sum, cr); - } - - return sum / gmx::square(s_min->fnorm); -} - -namespace gmx -{ - -void LegacySimulator::do_cg() -{ - const char* CG = "Polak-Ribiere Conjugate Gradients"; - - gmx_localtop_t top(top_global->ffparams); - gmx_global_stat_t gstat; - double tmp, minstep; - real stepsize; - real a, b, c, beta = 0.0; - real epot_repl = 0; - real pnorm; - gmx_bool converged, foundlower; - rvec mu_tot = { 0 }; - gmx_bool do_log = FALSE, do_ene = FALSE, do_x, do_f; - tensor vir, pres; - int number_steps, neval = 0, nstcg = inputrec->nstcgsteep; - int m, step, nminstep; - auto mdatoms = mdAtoms->mdatoms(); - - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "Note that activating conjugate gradient energy minimization via the " - "integrator .mdp option and the command gmx mdrun may " - "be available in a different form in a future version of GROMACS, " - "e.g. gmx minimize and an .mdp option."); - - step = 0; - - if (MASTER(cr)) - { - // In CG, the state is extended with a search direction - state_global->flags |= (1 << estCGP); - - // Ensure the extra per-atom state array gets allocated - state_change_natoms(state_global, state_global->natoms); - - // Initialize the search direction to zero - for (RVec& cg_p : state_global->cg_p) - { - cg_p = { 0, 0, 0 }; - } - } - - /* Create 4 states on the stack and extract pointers that we will swap */ - em_state_t s0{}, s1{}, s2{}, s3{}; - em_state_t* s_min = &s0; - em_state_t* s_a = &s1; - em_state_t* s_b = &s2; - em_state_t* s_c = &s3; - - /* Init em and store the local state in s_min */ - init_em(fplog, mdlog, CG, cr, ms /*PLUMED*/, inputrec, imdSession, pull_work, state_global, top_global, s_min, - &top, nrnb, fr, mdAtoms, &gstat, vsite, constr, nullptr); - const bool simulationsShareState = false; - gmx_mdoutf* outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, - mdModulesNotifier, inputrec, top_global, nullptr, wcycle, - StartingBehavior::NewSimulation, simulationsShareState, ms); - gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work, - nullptr, false, StartingBehavior::NewSimulation, - simulationsShareState, mdModulesNotifier); - - /* Print to log file */ - print_em_start(fplog, cr, walltime_accounting, wcycle, CG); - - /* Max number of steps */ - number_steps = inputrec->nsteps; - - if (MASTER(cr)) - { - sp_header(stderr, CG, inputrec->em_tol, number_steps); - } - if (fplog) - { - sp_header(fplog, CG, inputrec->em_tol, number_steps); - } - - EnergyEvaluator energyEvaluator{ fplog, mdlog, cr, ms, top_global, &top, - inputrec, imdSession, pull_work, nrnb, wcycle, gstat, - vsite, constr, mdAtoms, fr, runScheduleWork, enerd }; - /* Call the force routine and some auxiliary (neighboursearching etc.) */ - /* do_force always puts the charge groups in the box and shifts again - * We do not unshift, so molecules are always whole in congrad.c - */ - energyEvaluator.run(s_min, mu_tot, vir, pres, -1, TRUE); - - if (MASTER(cr)) - { - /* Copy stuff to the energy bin for easy printing etc. */ - matrix nullBox = {}; - energyOutput.addDataAtEnergyStep(false, false, static_cast(step), mdatoms->tmass, - enerd, nullptr, nullptr, nullBox, PTCouplingArrays(), 0, - nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); - - EnergyOutput::printHeader(fplog, step, step); - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, - step, fr->fcdata.get(), nullptr); - } - - /* Estimate/guess the initial stepsize */ - stepsize = inputrec->em_stepsize / s_min->fnorm; - - if (MASTER(cr)) - { - double sqrtNumAtoms = sqrt(static_cast(state_global->natoms)); - fprintf(stderr, " F-max = %12.5e on atom %d\n", s_min->fmax, s_min->a_fmax + 1); - fprintf(stderr, " F-Norm = %12.5e\n", s_min->fnorm / sqrtNumAtoms); - fprintf(stderr, "\n"); - /* and copy to the log file too... */ - fprintf(fplog, " F-max = %12.5e on atom %d\n", s_min->fmax, s_min->a_fmax + 1); - fprintf(fplog, " F-Norm = %12.5e\n", s_min->fnorm / sqrtNumAtoms); - fprintf(fplog, "\n"); - } - /* Start the loop over CG steps. - * Each successful step is counted, and we continue until - * we either converge or reach the max number of steps. - */ - converged = FALSE; - for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++) - { - - /* start taking steps in a new direction - * First time we enter the routine, beta=0, and the direction is - * simply the negative gradient. - */ - - /* Calculate the new direction in p, and the gradient in this direction, gpa */ - gmx::ArrayRef pm = s_min->s.cg_p; - gmx::ArrayRef sfm = s_min->f.view().force(); - double gpa = 0; - int gf = 0; - for (int i = 0; i < mdatoms->homenr; i++) - { - if (mdatoms->cFREEZE) - { - gf = mdatoms->cFREEZE[i]; - } - for (m = 0; m < DIM; m++) - { - if (!inputrec->opts.nFreeze[gf][m]) - { - pm[i][m] = sfm[i][m] + beta * pm[i][m]; - gpa -= pm[i][m] * sfm[i][m]; - /* f is negative gradient, thus the sign */ - } - else - { - pm[i][m] = 0; - } - } - } - - /* Sum the gradient along the line across CPUs */ - if (PAR(cr)) - { - gmx_sumd(1, &gpa, cr); - } - - /* Calculate the norm of the search vector */ - get_f_norm_max(cr, &(inputrec->opts), mdatoms, pm, &pnorm, nullptr, nullptr); - - /* Just in case stepsize reaches zero due to numerical precision... */ - if (stepsize <= 0) - { - stepsize = inputrec->em_stepsize / pnorm; - } - - /* - * Double check the value of the derivative in the search direction. - * If it is positive it must be due to the old information in the - * CG formula, so just remove that and start over with beta=0. - * This corresponds to a steepest descent step. - */ - if (gpa > 0) - { - beta = 0; - step--; /* Don't count this step since we are restarting */ - continue; /* Go back to the beginning of the big for-loop */ - } - - /* Calculate minimum allowed stepsize, before the average (norm) - * relative change in coordinate is smaller than precision - */ - minstep = 0; - auto s_min_x = makeArrayRef(s_min->s.x); - for (int i = 0; i < mdatoms->homenr; i++) - { - for (m = 0; m < DIM; m++) - { - tmp = fabs(s_min_x[i][m]); - if (tmp < 1.0) - { - tmp = 1.0; - } - tmp = pm[i][m] / tmp; - minstep += tmp * tmp; - } - } - /* Add up from all CPUs */ - if (PAR(cr)) - { - gmx_sumd(1, &minstep, cr); - } - - minstep = GMX_REAL_EPS / sqrt(minstep / (3 * top_global->natoms)); - - if (stepsize < minstep) - { - converged = TRUE; - break; - } - - /* Write coordinates if necessary */ - do_x = do_per_step(step, inputrec->nstxout); - do_f = do_per_step(step, inputrec->nstfout); - - write_em_traj(fplog, cr, outf, do_x, do_f, nullptr, top_global, inputrec, step, s_min, - state_global, observablesHistory); - - /* Take a step downhill. - * In theory, we should minimize the function along this direction. - * That is quite possible, but it turns out to take 5-10 function evaluations - * for each line. However, we dont really need to find the exact minimum - - * it is much better to start a new CG step in a modified direction as soon - * as we are close to it. This will save a lot of energy evaluations. - * - * In practice, we just try to take a single step. - * If it worked (i.e. lowered the energy), we increase the stepsize but - * the continue straight to the next CG step without trying to find any minimum. - * If it didn't work (higher energy), there must be a minimum somewhere between - * the old position and the new one. - * - * Due to the finite numerical accuracy, it turns out that it is a good idea - * to even accept a SMALL increase in energy, if the derivative is still downhill. - * This leads to lower final energies in the tests I've done. / Erik - */ - s_a->epot = s_min->epot; - a = 0.0; - c = a + stepsize; /* reference position along line is zero */ - - if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) - { - em_dd_partition_system(fplog, mdlog, step, cr, top_global, inputrec, imdSession, - pull_work, s_min, &top, mdAtoms, fr, vsite, constr, nrnb, wcycle); - } - - /* Take a trial step (new coords in s_c) */ - do_em_step(cr, inputrec, mdatoms, s_min, c, s_min->s.cg_p.constArrayRefWithPadding(), s_c, - constr, -1); - - neval++; - /* Calculate energy for the trial step */ - energyEvaluator.run(s_c, mu_tot, vir, pres, -1, FALSE); - - /* Calc derivative along line */ - const rvec* pc = s_c->s.cg_p.rvec_array(); - gmx::ArrayRef sfc = s_c->f.view().force(); - double gpc = 0; - for (int i = 0; i < mdatoms->homenr; i++) - { - for (m = 0; m < DIM; m++) - { - gpc -= pc[i][m] * sfc[i][m]; /* f is negative gradient, thus the sign */ - } - } - /* Sum the gradient along the line across CPUs */ - if (PAR(cr)) - { - gmx_sumd(1, &gpc, cr); - } - - /* This is the max amount of increase in energy we tolerate */ - tmp = std::sqrt(GMX_REAL_EPS) * fabs(s_a->epot); - - /* Accept the step if the energy is lower, or if it is not significantly higher - * and the line derivative is still negative. - */ - if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) - { - foundlower = TRUE; - /* Great, we found a better energy. Increase step for next iteration - * if we are still going down, decrease it otherwise - */ - if (gpc < 0) - { - stepsize *= 1.618034; /* The golden section */ - } - else - { - stepsize *= 0.618034; /* 1/golden section */ - } - } - else - { - /* New energy is the same or higher. We will have to do some work - * to find a smaller value in the interval. Take smaller step next time! - */ - foundlower = FALSE; - stepsize *= 0.618034; - } - - - /* OK, if we didn't find a lower value we will have to locate one now - there must - * be one in the interval [a=0,c]. - * The same thing is valid here, though: Don't spend dozens of iterations to find - * the line minimum. We try to interpolate based on the derivative at the endpoints, - * and only continue until we find a lower value. In most cases this means 1-2 iterations. - * - * I also have a safeguard for potentially really pathological functions so we never - * take more than 20 steps before we give up ... - * - * If we already found a lower value we just skip this step and continue to the update. - */ - double gpb; - if (!foundlower) - { - nminstep = 0; - - do - { - /* Select a new trial point. - * If the derivatives at points a & c have different sign we interpolate to zero, - * otherwise just do a bisection. - */ - if (gpa < 0 && gpc > 0) - { - b = a + gpa * (a - c) / (gpc - gpa); - } - else - { - b = 0.5 * (a + c); - } - - /* safeguard if interpolation close to machine accuracy causes errors: - * never go outside the interval - */ - if (b <= a || b >= c) - { - b = 0.5 * (a + c); - } - - if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) - { - /* Reload the old state */ - em_dd_partition_system(fplog, mdlog, -1, cr, top_global, inputrec, imdSession, pull_work, - s_min, &top, mdAtoms, fr, vsite, constr, nrnb, wcycle); - } - - /* Take a trial step to this new point - new coords in s_b */ - do_em_step(cr, inputrec, mdatoms, s_min, b, - s_min->s.cg_p.constArrayRefWithPadding(), s_b, constr, -1); - - neval++; - /* Calculate energy for the trial step */ - energyEvaluator.run(s_b, mu_tot, vir, pres, -1, FALSE); - - /* p does not change within a step, but since the domain decomposition - * might change, we have to use cg_p of s_b here. - */ - const rvec* pb = s_b->s.cg_p.rvec_array(); - gmx::ArrayRef sfb = s_b->f.view().force(); - gpb = 0; - for (int i = 0; i < mdatoms->homenr; i++) - { - for (m = 0; m < DIM; m++) - { - gpb -= pb[i][m] * sfb[i][m]; /* f is negative gradient, thus the sign */ - } - } - /* Sum the gradient along the line across CPUs */ - if (PAR(cr)) - { - gmx_sumd(1, &gpb, cr); - } - - if (debug) - { - fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n", s_a->epot, s_b->epot, - s_c->epot, gpb); - } - - epot_repl = s_b->epot; - - /* Keep one of the intervals based on the value of the derivative at the new point */ - if (gpb > 0) - { - /* Replace c endpoint with b */ - swap_em_state(&s_b, &s_c); - c = b; - gpc = gpb; - } - else - { - /* Replace a endpoint with b */ - swap_em_state(&s_b, &s_a); - a = b; - gpa = gpb; - } - - /* - * Stop search as soon as we find a value smaller than the endpoints. - * Never run more than 20 steps, no matter what. - */ - nminstep++; - } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) && (nminstep < 20)); - - if (std::fabs(epot_repl - s_min->epot) < fabs(s_min->epot) * GMX_REAL_EPS || nminstep >= 20) - { - /* OK. We couldn't find a significantly lower energy. - * If beta==0 this was steepest descent, and then we give up. - * If not, set beta=0 and restart with steepest descent before quitting. - */ - if (beta == 0.0) - { - /* Converged */ - converged = TRUE; - break; - } - else - { - /* Reset memory before giving up */ - beta = 0.0; - continue; - } - } - - /* Select min energy state of A & C, put the best in B. - */ - if (s_c->epot < s_a->epot) - { - if (debug) - { - fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n", s_c->epot, - s_a->epot); - } - swap_em_state(&s_b, &s_c); - gpb = gpc; - } - else - { - if (debug) - { - fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n", s_a->epot, - s_c->epot); - } - swap_em_state(&s_b, &s_a); - gpb = gpa; - } - } - else - { - if (debug) - { - fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n", s_c->epot); - } - swap_em_state(&s_b, &s_c); - gpb = gpc; - } - - /* new search direction */ - /* beta = 0 means forget all memory and restart with steepest descents. */ - if (nstcg && ((step % nstcg) == 0)) - { - beta = 0.0; - } - else - { - /* s_min->fnorm cannot be zero, because then we would have converged - * and broken out. - */ - - /* Polak-Ribiere update. - * Change to fnorm2/fnorm2_old for Fletcher-Reeves - */ - beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b); - } - /* Limit beta to prevent oscillations */ - if (fabs(beta) > 5.0) - { - beta = 0.0; - } - - - /* update positions */ - swap_em_state(&s_min, &s_b); - gpa = gpb; - - /* Print it if necessary */ - if (MASTER(cr)) - { - if (mdrunOptions.verbose) - { - double sqrtNumAtoms = sqrt(static_cast(state_global->natoms)); - fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", step, - s_min->epot, s_min->fnorm / sqrtNumAtoms, s_min->fmax, s_min->a_fmax + 1); - fflush(stderr); - } - /* Store the new (lower) energies */ - matrix nullBox = {}; - energyOutput.addDataAtEnergyStep(false, false, static_cast(step), mdatoms->tmass, - enerd, nullptr, nullptr, nullBox, PTCouplingArrays(), 0, - nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); - - do_log = do_per_step(step, inputrec->nstlog); - do_ene = do_per_step(step, inputrec->nstenergy); - - imdSession->fillEnergyRecord(step, TRUE); - - if (do_log) - { - EnergyOutput::printHeader(fplog, step, step); - } - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, - do_log ? fplog : nullptr, step, step, - fr->fcdata.get(), nullptr); - } - - /* Send energies and positions to the IMD client if bIMD is TRUE. */ - if (MASTER(cr) && imdSession->run(step, TRUE, state_global->box, state_global->x.rvec_array(), 0)) - { - imdSession->sendPositionsAndEnergies(); - } - - /* Stop when the maximum force lies below tolerance. - * If we have reached machine precision, converged is already set to true. - */ - converged = converged || (s_min->fmax < inputrec->em_tol); - - } /* End of the loop */ - - if (converged) - { - step--; /* we never took that last step in this case */ - } - if (s_min->fmax > inputrec->em_tol) - { - if (MASTER(cr)) - { - warn_step(fplog, inputrec->em_tol, s_min->fmax, step - 1 == number_steps, FALSE); - } - converged = FALSE; - } - - if (MASTER(cr)) - { - /* If we printed energy and/or logfile last step (which was the last step) - * we don't have to do it again, but otherwise print the final values. - */ - if (!do_log) - { - /* Write final value to log since we didn't do anything the last step */ - EnergyOutput::printHeader(fplog, step, step); - } - if (!do_ene || !do_log) - { - /* Write final energy file entries */ - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, - !do_log ? fplog : nullptr, step, step, - fr->fcdata.get(), nullptr); - } - } - - /* Print some stuff... */ - if (MASTER(cr)) - { - fprintf(stderr, "\nwriting lowest energy coordinates.\n"); - } - - /* IMPORTANT! - * For accurate normal mode calculation it is imperative that we - * store the last conformation into the full precision binary trajectory. - * - * However, we should only do it if we did NOT already write this step - * above (which we did if do_x or do_f was true). - */ - /* Note that with 0 < nstfout != nstxout we can end up with two frames - * in the trajectory with the same step number. - */ - do_x = !do_per_step(step, inputrec->nstxout); - do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout)); - - write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), top_global, inputrec, - step, s_min, state_global, observablesHistory); - - - if (MASTER(cr)) - { - double sqrtNumAtoms = sqrt(static_cast(state_global->natoms)); - print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps, s_min, sqrtNumAtoms); - print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps, s_min, sqrtNumAtoms); - - fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); - } - - finish_em(cr, outf, walltime_accounting, wcycle); - - /* To print the actual number of steps we needed somewhere */ - walltime_accounting_set_nsteps_done(walltime_accounting, step); -} - - -void LegacySimulator::do_lbfgs() -{ - static const char* LBFGS = "Low-Memory BFGS Minimizer"; - em_state_t ems; - gmx_localtop_t top(top_global->ffparams); - gmx_global_stat_t gstat; - int ncorr, nmaxcorr, point, cp, neval, nminstep; - double stepsize, step_taken, gpa, gpb, gpc, tmp, minstep; - real * rho, *alpha, *p, *s, **dx, **dg; - real a, b, c, maxdelta, delta; - real diag, Epot0; - real dgdx, dgdg, sq, yr, beta; - gmx_bool converged; - rvec mu_tot = { 0 }; - gmx_bool do_log, do_ene, do_x, do_f, foundlower, *frozen; - tensor vir, pres; - int start, end, number_steps; - int i, k, m, n, gf, step; - int mdof_flags; - auto mdatoms = mdAtoms->mdatoms(); - - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "Note that activating L-BFGS energy minimization via the " - "integrator .mdp option and the command gmx mdrun may " - "be available in a different form in a future version of GROMACS, " - "e.g. gmx minimize and an .mdp option."); - - if (PAR(cr)) - { - gmx_fatal(FARGS, "L-BFGS minimization only supports a single rank"); - } - - if (nullptr != constr) - { - gmx_fatal( - FARGS, - "The combination of constraints and L-BFGS minimization is not implemented. Either " - "do not use constraints, or use another minimizer (e.g. steepest descent)."); - } - - n = 3 * state_global->natoms; - nmaxcorr = inputrec->nbfgscorr; - - snew(frozen, n); - - snew(p, n); - snew(rho, nmaxcorr); - snew(alpha, nmaxcorr); - - snew(dx, nmaxcorr); - for (i = 0; i < nmaxcorr; i++) - { - snew(dx[i], n); - } - - snew(dg, nmaxcorr); - for (i = 0; i < nmaxcorr; i++) - { - snew(dg[i], n); - } - - step = 0; - neval = 0; - - /* Init em */ - init_em(fplog, mdlog, LBFGS, cr, ms /*PLUMED*/, inputrec, imdSession, pull_work, state_global, top_global, - &ems, &top, nrnb, fr, mdAtoms, &gstat, vsite, constr, nullptr); - const bool simulationsShareState = false; - gmx_mdoutf* outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, - mdModulesNotifier, inputrec, top_global, nullptr, wcycle, - StartingBehavior::NewSimulation, simulationsShareState, ms); - gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work, - nullptr, false, StartingBehavior::NewSimulation, - simulationsShareState, mdModulesNotifier); - - start = 0; - end = mdatoms->homenr; - - /* We need 4 working states */ - em_state_t s0{}, s1{}, s2{}, s3{}; - em_state_t* sa = &s0; - em_state_t* sb = &s1; - em_state_t* sc = &s2; - em_state_t* last = &s3; - /* Initialize by copying the state from ems (we could skip x and f here) */ - *sa = ems; - *sb = ems; - *sc = ems; - - /* Print to log file */ - print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS); - - do_log = do_ene = do_x = do_f = TRUE; - - /* Max number of steps */ - number_steps = inputrec->nsteps; - - /* Create a 3*natoms index to tell whether each degree of freedom is frozen */ - gf = 0; - for (i = start; i < end; i++) - { - if (mdatoms->cFREEZE) - { - gf = mdatoms->cFREEZE[i]; - } - for (m = 0; m < DIM; m++) - { - frozen[3 * i + m] = (inputrec->opts.nFreeze[gf][m] != 0); - } - } - if (MASTER(cr)) - { - sp_header(stderr, LBFGS, inputrec->em_tol, number_steps); - } - if (fplog) - { - sp_header(fplog, LBFGS, inputrec->em_tol, number_steps); - } - - if (vsite) - { - vsite->construct(state_global->x, 1, {}, state_global->box); - } - - /* Call the force routine and some auxiliary (neighboursearching etc.) */ - /* do_force always puts the charge groups in the box and shifts again - * We do not unshift, so molecules are always whole - */ - neval++; - EnergyEvaluator energyEvaluator{ fplog, mdlog, cr, ms, top_global, &top, - inputrec, imdSession, pull_work, nrnb, wcycle, gstat, - vsite, constr, mdAtoms, fr, runScheduleWork, enerd }; - energyEvaluator.run(&ems, mu_tot, vir, pres, -1, TRUE); - - if (MASTER(cr)) - { - /* Copy stuff to the energy bin for easy printing etc. */ - matrix nullBox = {}; - energyOutput.addDataAtEnergyStep(false, false, static_cast(step), mdatoms->tmass, - enerd, nullptr, nullptr, nullBox, PTCouplingArrays(), 0, - nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); - - EnergyOutput::printHeader(fplog, step, step); - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, - step, fr->fcdata.get(), nullptr); - } - - /* Set the initial step. - * since it will be multiplied by the non-normalized search direction - * vector (force vector the first time), we scale it by the - * norm of the force. - */ - - if (MASTER(cr)) - { - double sqrtNumAtoms = sqrt(static_cast(state_global->natoms)); - fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr); - fprintf(stderr, " F-max = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1); - fprintf(stderr, " F-Norm = %12.5e\n", ems.fnorm / sqrtNumAtoms); - fprintf(stderr, "\n"); - /* and copy to the log file too... */ - fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr); - fprintf(fplog, " F-max = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1); - fprintf(fplog, " F-Norm = %12.5e\n", ems.fnorm / sqrtNumAtoms); - fprintf(fplog, "\n"); - } - - // Point is an index to the memory of search directions, where 0 is the first one. - point = 0; - - // Set initial search direction to the force (-gradient), or 0 for frozen particles. - real* fInit = static_cast(ems.f.view().force().data()[0]); - for (i = 0; i < n; i++) - { - if (!frozen[i]) - { - dx[point][i] = fInit[i]; /* Initial search direction */ - } - else - { - dx[point][i] = 0; - } - } - - // Stepsize will be modified during the search, and actually it is not critical - // (the main efficiency in the algorithm comes from changing directions), but - // we still need an initial value, so estimate it as the inverse of the norm - // so we take small steps where the potential fluctuates a lot. - stepsize = 1.0 / ems.fnorm; - - /* Start the loop over BFGS steps. - * Each successful step is counted, and we continue until - * we either converge or reach the max number of steps. - */ - - ncorr = 0; - - /* Set the gradient from the force */ - converged = FALSE; - for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++) - { - - /* Write coordinates if necessary */ - do_x = do_per_step(step, inputrec->nstxout); - do_f = do_per_step(step, inputrec->nstfout); - - mdof_flags = 0; - if (do_x) - { - mdof_flags |= MDOF_X; - } - - if (do_f) - { - mdof_flags |= MDOF_F; - } - - if (inputrec->bIMD) - { - mdof_flags |= MDOF_IMD; - } - - gmx::WriteCheckpointDataHolder checkpointDataHolder; - mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global->natoms, step, - static_cast(step), &ems.s, state_global, observablesHistory, - ems.f.view().force(), &checkpointDataHolder); - - /* Do the linesearching in the direction dx[point][0..(n-1)] */ - - /* make s a pointer to current search direction - point=0 first time we get here */ - s = dx[point]; - - real* xx = static_cast(ems.s.x.rvec_array()[0]); - real* ff = static_cast(ems.f.view().force().data()[0]); - - // calculate line gradient in position A - for (gpa = 0, i = 0; i < n; i++) - { - gpa -= s[i] * ff[i]; - } - - /* Calculate minimum allowed stepsize along the line, before the average (norm) - * relative change in coordinate is smaller than precision - */ - for (minstep = 0, i = 0; i < n; i++) - { - tmp = fabs(xx[i]); - if (tmp < 1.0) - { - tmp = 1.0; - } - tmp = s[i] / tmp; - minstep += tmp * tmp; - } - minstep = GMX_REAL_EPS / sqrt(minstep / n); - - if (stepsize < minstep) - { - converged = TRUE; - break; - } - - // Before taking any steps along the line, store the old position - *last = ems; - real* lastx = static_cast(last->s.x.data()[0]); - real* lastf = static_cast(last->f.view().force().data()[0]); - Epot0 = ems.epot; - - *sa = ems; - - /* Take a step downhill. - * In theory, we should find the actual minimum of the function in this - * direction, somewhere along the line. - * That is quite possible, but it turns out to take 5-10 function evaluations - * for each line. However, we dont really need to find the exact minimum - - * it is much better to start a new BFGS step in a modified direction as soon - * as we are close to it. This will save a lot of energy evaluations. - * - * In practice, we just try to take a single step. - * If it worked (i.e. lowered the energy), we increase the stepsize but - * continue straight to the next BFGS step without trying to find any minimum, - * i.e. we change the search direction too. If the line was smooth, it is - * likely we are in a smooth region, and then it makes sense to take longer - * steps in the modified search direction too. - * - * If it didn't work (higher energy), there must be a minimum somewhere between - * the old position and the new one. Then we need to start by finding a lower - * value before we change search direction. Since the energy was apparently - * quite rough, we need to decrease the step size. - * - * Due to the finite numerical accuracy, it turns out that it is a good idea - * to accept a SMALL increase in energy, if the derivative is still downhill. - * This leads to lower final energies in the tests I've done. / Erik - */ - - // State "A" is the first position along the line. - // reference position along line is initially zero - a = 0.0; - - // Check stepsize first. We do not allow displacements - // larger than emstep. - // - do - { - // Pick a new position C by adding stepsize to A. - c = a + stepsize; - - // Calculate what the largest change in any individual coordinate - // would be (translation along line * gradient along line) - maxdelta = 0; - for (i = 0; i < n; i++) - { - delta = c * s[i]; - if (delta > maxdelta) - { - maxdelta = delta; - } - } - // If any displacement is larger than the stepsize limit, reduce the step - if (maxdelta > inputrec->em_stepsize) - { - stepsize *= 0.1; - } - } while (maxdelta > inputrec->em_stepsize); - - // Take a trial step and move the coordinate array xc[] to position C - real* xc = static_cast(sc->s.x.rvec_array()[0]); - for (i = 0; i < n; i++) - { - xc[i] = lastx[i] + c * s[i]; - } - - neval++; - // Calculate energy for the trial step in position C - energyEvaluator.run(sc, mu_tot, vir, pres, step, FALSE); - - // Calc line gradient in position C - real* fc = static_cast(sc->f.view().force()[0]); - for (gpc = 0, i = 0; i < n; i++) - { - gpc -= s[i] * fc[i]; /* f is negative gradient, thus the sign */ - } - /* Sum the gradient along the line across CPUs */ - if (PAR(cr)) - { - gmx_sumd(1, &gpc, cr); - } - - // This is the max amount of increase in energy we tolerate. - // By allowing VERY small changes (close to numerical precision) we - // frequently find even better (lower) final energies. - tmp = std::sqrt(GMX_REAL_EPS) * fabs(sa->epot); - - // Accept the step if the energy is lower in the new position C (compared to A), - // or if it is not significantly higher and the line derivative is still negative. - foundlower = sc->epot < sa->epot || (gpc < 0 && sc->epot < (sa->epot + tmp)); - // If true, great, we found a better energy. We no longer try to alter the - // stepsize, but simply accept this new better position. The we select a new - // search direction instead, which will be much more efficient than continuing - // to take smaller steps along a line. Set fnorm based on the new C position, - // which will be used to update the stepsize to 1/fnorm further down. - - // If false, the energy is NOT lower in point C, i.e. it will be the same - // or higher than in point A. In this case it is pointless to move to point C, - // so we will have to do more iterations along the same line to find a smaller - // value in the interval [A=0.0,C]. - // Here, A is still 0.0, but that will change when we do a search in the interval - // [0.0,C] below. That search we will do by interpolation or bisection rather - // than with the stepsize, so no need to modify it. For the next search direction - // it will be reset to 1/fnorm anyway. - - if (!foundlower) - { - // OK, if we didn't find a lower value we will have to locate one now - there must - // be one in the interval [a,c]. - // The same thing is valid here, though: Don't spend dozens of iterations to find - // the line minimum. We try to interpolate based on the derivative at the endpoints, - // and only continue until we find a lower value. In most cases this means 1-2 iterations. - // I also have a safeguard for potentially really pathological functions so we never - // take more than 20 steps before we give up. - // If we already found a lower value we just skip this step and continue to the update. - real fnorm = 0; - nminstep = 0; - do - { - // Select a new trial point B in the interval [A,C]. - // If the derivatives at points a & c have different sign we interpolate to zero, - // otherwise just do a bisection since there might be multiple minima/maxima - // inside the interval. - if (gpa < 0 && gpc > 0) - { - b = a + gpa * (a - c) / (gpc - gpa); - } - else - { - b = 0.5 * (a + c); - } - - /* safeguard if interpolation close to machine accuracy causes errors: - * never go outside the interval - */ - if (b <= a || b >= c) - { - b = 0.5 * (a + c); - } - - // Take a trial step to point B - real* xb = static_cast(sb->s.x.rvec_array()[0]); - for (i = 0; i < n; i++) - { - xb[i] = lastx[i] + b * s[i]; - } - - neval++; - // Calculate energy for the trial step in point B - energyEvaluator.run(sb, mu_tot, vir, pres, step, FALSE); - fnorm = sb->fnorm; - - // Calculate gradient in point B - real* fb = static_cast(sb->f.view().force()[0]); - for (gpb = 0, i = 0; i < n; i++) - { - gpb -= s[i] * fb[i]; /* f is negative gradient, thus the sign */ - } - /* Sum the gradient along the line across CPUs */ - if (PAR(cr)) - { - gmx_sumd(1, &gpb, cr); - } - - // Keep one of the intervals [A,B] or [B,C] based on the value of the derivative - // at the new point B, and rename the endpoints of this new interval A and C. - if (gpb > 0) - { - /* Replace c endpoint with b */ - c = b; - /* copy state b to c */ - *sc = *sb; - } - else - { - /* Replace a endpoint with b */ - a = b; - /* copy state b to a */ - *sa = *sb; - } - - /* - * Stop search as soon as we find a value smaller than the endpoints, - * or if the tolerance is below machine precision. - * Never run more than 20 steps, no matter what. - */ - nminstep++; - } while ((sb->epot > sa->epot || sb->epot > sc->epot) && (nminstep < 20)); - - if (std::fabs(sb->epot - Epot0) < GMX_REAL_EPS || nminstep >= 20) - { - /* OK. We couldn't find a significantly lower energy. - * If ncorr==0 this was steepest descent, and then we give up. - * If not, reset memory to restart as steepest descent before quitting. - */ - if (ncorr == 0) - { - /* Converged */ - converged = TRUE; - break; - } - else - { - /* Reset memory */ - ncorr = 0; - /* Search in gradient direction */ - for (i = 0; i < n; i++) - { - dx[point][i] = ff[i]; - } - /* Reset stepsize */ - stepsize = 1.0 / fnorm; - continue; - } - } - - /* Select min energy state of A & C, put the best in xx/ff/Epot - */ - if (sc->epot < sa->epot) - { - /* Use state C */ - ems = *sc; - step_taken = c; - } - else - { - /* Use state A */ - ems = *sa; - step_taken = a; - } - } - else - { - /* found lower */ - /* Use state C */ - ems = *sc; - step_taken = c; - } - - /* Update the memory information, and calculate a new - * approximation of the inverse hessian - */ - - /* Have new data in Epot, xx, ff */ - if (ncorr < nmaxcorr) - { - ncorr++; - } - - for (i = 0; i < n; i++) - { - dg[point][i] = lastf[i] - ff[i]; - dx[point][i] *= step_taken; - } - - dgdg = 0; - dgdx = 0; - for (i = 0; i < n; i++) - { - dgdg += dg[point][i] * dg[point][i]; - dgdx += dg[point][i] * dx[point][i]; - } - - diag = dgdx / dgdg; - - rho[point] = 1.0 / dgdx; - point++; - - if (point >= nmaxcorr) - { - point = 0; - } - - /* Update */ - for (i = 0; i < n; i++) - { - p[i] = ff[i]; - } - - cp = point; - - /* Recursive update. First go back over the memory points */ - for (k = 0; k < ncorr; k++) - { - cp--; - if (cp < 0) - { - cp = ncorr - 1; - } - - sq = 0; - for (i = 0; i < n; i++) - { - sq += dx[cp][i] * p[i]; - } - - alpha[cp] = rho[cp] * sq; - - for (i = 0; i < n; i++) - { - p[i] -= alpha[cp] * dg[cp][i]; - } - } - - for (i = 0; i < n; i++) - { - p[i] *= diag; - } - - /* And then go forward again */ - for (k = 0; k < ncorr; k++) - { - yr = 0; - for (i = 0; i < n; i++) - { - yr += p[i] * dg[cp][i]; - } - - beta = rho[cp] * yr; - beta = alpha[cp] - beta; - - for (i = 0; i < n; i++) - { - p[i] += beta * dx[cp][i]; - } - - cp++; - if (cp >= ncorr) - { - cp = 0; - } - } - - for (i = 0; i < n; i++) - { - if (!frozen[i]) - { - dx[point][i] = p[i]; - } - else - { - dx[point][i] = 0; - } - } - - /* Print it if necessary */ - if (MASTER(cr)) - { - if (mdrunOptions.verbose) - { - double sqrtNumAtoms = sqrt(static_cast(state_global->natoms)); - fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", step, - ems.epot, ems.fnorm / sqrtNumAtoms, ems.fmax, ems.a_fmax + 1); - fflush(stderr); - } - /* Store the new (lower) energies */ - matrix nullBox = {}; - energyOutput.addDataAtEnergyStep(false, false, static_cast(step), mdatoms->tmass, - enerd, nullptr, nullptr, nullBox, PTCouplingArrays(), 0, - nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); - - do_log = do_per_step(step, inputrec->nstlog); - do_ene = do_per_step(step, inputrec->nstenergy); - - imdSession->fillEnergyRecord(step, TRUE); - - if (do_log) - { - EnergyOutput::printHeader(fplog, step, step); - } - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, - do_log ? fplog : nullptr, step, step, - fr->fcdata.get(), nullptr); - } - - /* Send x and E to IMD client, if bIMD is TRUE. */ - if (imdSession->run(step, TRUE, state_global->box, state_global->x.rvec_array(), 0) && MASTER(cr)) - { - imdSession->sendPositionsAndEnergies(); - } - - // Reset stepsize in we are doing more iterations - stepsize = 1.0; - - /* Stop when the maximum force lies below tolerance. - * If we have reached machine precision, converged is already set to true. - */ - converged = converged || (ems.fmax < inputrec->em_tol); - - } /* End of the loop */ - - if (converged) - { - step--; /* we never took that last step in this case */ - } - if (ems.fmax > inputrec->em_tol) - { - if (MASTER(cr)) - { - warn_step(fplog, inputrec->em_tol, ems.fmax, step - 1 == number_steps, FALSE); - } - converged = FALSE; - } - - /* If we printed energy and/or logfile last step (which was the last step) - * we don't have to do it again, but otherwise print the final values. - */ - if (!do_log) /* Write final value to log since we didn't do anythin last step */ - { - EnergyOutput::printHeader(fplog, step, step); - } - if (!do_ene || !do_log) /* Write final energy file entries */ - { - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, - !do_log ? fplog : nullptr, step, step, fr->fcdata.get(), - nullptr); - } - - /* Print some stuff... */ - if (MASTER(cr)) - { - fprintf(stderr, "\nwriting lowest energy coordinates.\n"); - } - - /* IMPORTANT! - * For accurate normal mode calculation it is imperative that we - * store the last conformation into the full precision binary trajectory. - * - * However, we should only do it if we did NOT already write this step - * above (which we did if do_x or do_f was true). - */ - do_x = !do_per_step(step, inputrec->nstxout); - do_f = !do_per_step(step, inputrec->nstfout); - write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), top_global, inputrec, - step, &ems, state_global, observablesHistory); - - if (MASTER(cr)) - { - double sqrtNumAtoms = sqrt(static_cast(state_global->natoms)); - print_converged(stderr, LBFGS, inputrec->em_tol, step, converged, number_steps, &ems, sqrtNumAtoms); - print_converged(fplog, LBFGS, inputrec->em_tol, step, converged, number_steps, &ems, sqrtNumAtoms); - - fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); - } - - finish_em(cr, outf, walltime_accounting, wcycle); - - /* To print the actual number of steps we needed somewhere */ - walltime_accounting_set_nsteps_done(walltime_accounting, step); -} - -void LegacySimulator::do_steep() -{ - const char* SD = "Steepest Descents"; - gmx_localtop_t top(top_global->ffparams); - gmx_global_stat_t gstat; - real stepsize; - real ustep; - gmx_bool bDone, bAbort, do_x, do_f; - tensor vir, pres; - rvec mu_tot = { 0 }; - int nsteps; - int count = 0; - int steps_accepted = 0; - auto mdatoms = mdAtoms->mdatoms(); - - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "Note that activating steepest-descent energy minimization via the " - "integrator .mdp option and the command gmx mdrun may " - "be available in a different form in a future version of GROMACS, " - "e.g. gmx minimize and an .mdp option."); - - /* Create 2 states on the stack and extract pointers that we will swap */ - em_state_t s0{}, s1{}; - em_state_t* s_min = &s0; - em_state_t* s_try = &s1; - - /* Init em and store the local state in s_try */ - init_em(fplog, mdlog, SD, cr, ms /*PLUMED*/, inputrec, imdSession, pull_work, state_global, top_global, s_try, - &top, nrnb, fr, mdAtoms, &gstat, vsite, constr, nullptr); - const bool simulationsShareState = false; - gmx_mdoutf* outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, - mdModulesNotifier, inputrec, top_global, nullptr, wcycle, - StartingBehavior::NewSimulation, simulationsShareState, ms); - gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work, - nullptr, false, StartingBehavior::NewSimulation, - simulationsShareState, mdModulesNotifier); - - /* Print to log file */ - print_em_start(fplog, cr, walltime_accounting, wcycle, SD); - - /* Set variables for stepsize (in nm). This is the largest - * step that we are going to make in any direction. - */ - ustep = inputrec->em_stepsize; - stepsize = 0; - - /* Max number of steps */ - nsteps = inputrec->nsteps; - - if (MASTER(cr)) - { - /* Print to the screen */ - sp_header(stderr, SD, inputrec->em_tol, nsteps); - } - if (fplog) - { - sp_header(fplog, SD, inputrec->em_tol, nsteps); - } - EnergyEvaluator energyEvaluator{ fplog, mdlog, cr, ms, top_global, &top, - inputrec, imdSession, pull_work, nrnb, wcycle, gstat, - vsite, constr, mdAtoms, fr, runScheduleWork, enerd }; - - /**** HERE STARTS THE LOOP **** - * count is the counter for the number of steps - * bDone will be TRUE when the minimization has converged - * bAbort will be TRUE when nsteps steps have been performed or when - * the stepsize becomes smaller than is reasonable for machine precision - */ - count = 0; - bDone = FALSE; - bAbort = FALSE; - while (!bDone && !bAbort) - { - bAbort = (nsteps >= 0) && (count == nsteps); - - /* set new coordinates, except for first step */ - bool validStep = true; - if (count > 0) - { - validStep = do_em_step(cr, inputrec, mdatoms, s_min, stepsize, - s_min->f.view().forceWithPadding(), s_try, constr, count); - } - - if (validStep) - { - energyEvaluator.run(s_try, mu_tot, vir, pres, count, count == 0); - } - else - { - // Signal constraint error during stepping with energy=inf - s_try->epot = std::numeric_limits::infinity(); - } - - if (MASTER(cr)) - { - EnergyOutput::printHeader(fplog, count, count); - } - - if (count == 0) - { - s_min->epot = s_try->epot; - } - - /* Print it if necessary */ - if (MASTER(cr)) - { - if (mdrunOptions.verbose) - { - fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c", - count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax + 1, - ((count == 0) || (s_try->epot < s_min->epot)) ? '\n' : '\r'); - fflush(stderr); - } - - if ((count == 0) || (s_try->epot < s_min->epot)) - { - /* Store the new (lower) energies */ - matrix nullBox = {}; - energyOutput.addDataAtEnergyStep(false, false, static_cast(count), - mdatoms->tmass, enerd, nullptr, nullptr, nullBox, - PTCouplingArrays(), 0, nullptr, nullptr, vir, pres, - nullptr, mu_tot, constr); - - imdSession->fillEnergyRecord(count, TRUE); - - const bool do_dr = do_per_step(steps_accepted, inputrec->nstdisreout); - const bool do_or = do_per_step(steps_accepted, inputrec->nstorireout); - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, do_dr, do_or, - fplog, count, count, fr->fcdata.get(), nullptr); - fflush(fplog); - } - } - - /* Now if the new energy is smaller than the previous... - * or if this is the first step! - * or if we did random steps! - */ - - if ((count == 0) || (s_try->epot < s_min->epot)) - { - steps_accepted++; - - /* Test whether the convergence criterion is met... */ - bDone = (s_try->fmax < inputrec->em_tol); - - /* Copy the arrays for force, positions and energy */ - /* The 'Min' array always holds the coords and forces of the minimal - sampled energy */ - swap_em_state(&s_min, &s_try); - if (count > 0) - { - ustep *= 1.2; - } - - /* Write to trn, if necessary */ - do_x = do_per_step(steps_accepted, inputrec->nstxout); - do_f = do_per_step(steps_accepted, inputrec->nstfout); - write_em_traj(fplog, cr, outf, do_x, do_f, nullptr, top_global, inputrec, count, s_min, - state_global, observablesHistory); - } - else - { - /* If energy is not smaller make the step smaller... */ - ustep *= 0.5; - - if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) - { - /* Reload the old state */ - em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, imdSession, - pull_work, s_min, &top, mdAtoms, fr, vsite, constr, nrnb, wcycle); - } - } - - // If the force is very small after finishing minimization, - // we risk dividing by zero when calculating the step size. - // So we check first if the minimization has stopped before - // trying to obtain a new step size. - if (!bDone) - { - /* Determine new step */ - stepsize = ustep / s_min->fmax; - } - - /* Check if stepsize is too small, with 1 nm as a characteristic length */ -#if GMX_DOUBLE - if (count == nsteps || ustep < 1e-12) -#else - if (count == nsteps || ustep < 1e-6) -#endif - { - if (MASTER(cr)) - { - warn_step(fplog, inputrec->em_tol, s_min->fmax, count == nsteps, constr != nullptr); - } - bAbort = TRUE; - } - - /* Send IMD energies and positions, if bIMD is TRUE. */ - if (imdSession->run(count, TRUE, MASTER(cr) ? state_global->box : nullptr, - MASTER(cr) ? state_global->x.rvec_array() : nullptr, 0) - && MASTER(cr)) - { - imdSession->sendPositionsAndEnergies(); - } - - count++; - } /* End of the loop */ - - /* Print some data... */ - if (MASTER(cr)) - { - fprintf(stderr, "\nwriting lowest energy coordinates.\n"); - } - write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout != 0, ftp2fn(efSTO, nfile, fnm), - top_global, inputrec, count, s_min, state_global, observablesHistory); - - if (MASTER(cr)) - { - double sqrtNumAtoms = sqrt(static_cast(state_global->natoms)); - - print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps, s_min, sqrtNumAtoms); - print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps, s_min, sqrtNumAtoms); - } - - finish_em(cr, outf, walltime_accounting, wcycle); - - /* To print the actual number of steps we needed somewhere */ - inputrec->nsteps = count; - - walltime_accounting_set_nsteps_done(walltime_accounting, count); -} - -void LegacySimulator::do_nm() -{ - const char* NM = "Normal Mode Analysis"; - int nnodes; - gmx_localtop_t top(top_global->ffparams); - gmx_global_stat_t gstat; - tensor vir, pres; - rvec mu_tot = { 0 }; - rvec* dfdx; - gmx_bool bSparse; /* use sparse matrix storage format */ - size_t sz; - gmx_sparsematrix_t* sparse_matrix = nullptr; - real* full_matrix = nullptr; - - /* added with respect to mdrun */ - int row, col; - real der_range = 10.0 * std::sqrt(GMX_REAL_EPS); - real x_min; - bool bIsMaster = MASTER(cr); - auto mdatoms = mdAtoms->mdatoms(); - - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "Note that activating normal-mode analysis via the integrator " - ".mdp option and the command gmx mdrun may " - "be available in a different form in a future version of GROMACS, " - "e.g. gmx normal-modes."); - - if (constr != nullptr) - { - gmx_fatal( - FARGS, - "Constraints present with Normal Mode Analysis, this combination is not supported"); - } - - gmx_shellfc_t* shellfc; - - em_state_t state_work{}; - - /* Init em and store the local state in state_minimum */ - init_em(fplog, mdlog, NM, cr, ms /*PLUMED*/, inputrec, imdSession, pull_work, state_global, top_global, - &state_work, &top, nrnb, fr, mdAtoms, &gstat, vsite, constr, &shellfc); - const bool simulationsShareState = false; - gmx_mdoutf* outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, - mdModulesNotifier, inputrec, top_global, nullptr, wcycle, - StartingBehavior::NewSimulation, simulationsShareState, ms); - - std::vector atom_index = get_atom_index(top_global); - std::vector fneg(atom_index.size(), { 0, 0, 0 }); - snew(dfdx, atom_index.size()); - -#if !GMX_DOUBLE - if (bIsMaster) - { - fprintf(stderr, - "NOTE: This version of GROMACS has been compiled in single precision,\n" - " which MIGHT not be accurate enough for normal mode analysis.\n" - " GROMACS now uses sparse matrix storage, so the memory requirements\n" - " are fairly modest even if you recompile in double precision.\n\n"); - } -#endif - - /* Check if we can/should use sparse storage format. - * - * Sparse format is only useful when the Hessian itself is sparse, which it - * will be when we use a cutoff. - * For small systems (n<1000) it is easier to always use full matrix format, though. - */ - if (EEL_FULL(fr->ic->eeltype) || fr->rlist == 0.0) - { - GMX_LOG(mdlog.warning) - .appendText("Non-cutoff electrostatics used, forcing full Hessian format."); - bSparse = FALSE; - } - else if (atom_index.size() < 1000) - { - GMX_LOG(mdlog.warning) - .appendTextFormatted("Small system size (N=%zu), using full Hessian format.", - atom_index.size()); - bSparse = FALSE; - } - else - { - GMX_LOG(mdlog.warning).appendText("Using compressed symmetric sparse Hessian format."); - bSparse = TRUE; - } - - /* Number of dimensions, based on real atoms, that is not vsites or shell */ - sz = DIM * atom_index.size(); - - fprintf(stderr, "Allocating Hessian memory...\n\n"); - - if (bSparse) - { - sparse_matrix = gmx_sparsematrix_init(sz); - sparse_matrix->compressed_symmetric = TRUE; - } - else - { - snew(full_matrix, sz * sz); - } - - /* Write start time and temperature */ - print_em_start(fplog, cr, walltime_accounting, wcycle, NM); - - /* fudge nr of steps to nr of atoms */ - inputrec->nsteps = atom_index.size() * 2; - - if (bIsMaster) - { - fprintf(stderr, "starting normal mode calculation '%s'\n%" PRId64 " steps.\n\n", - *(top_global->name), inputrec->nsteps); - } - - nnodes = cr->nnodes; - - /* Make evaluate_energy do a single node force calculation */ - cr->nnodes = 1; - EnergyEvaluator energyEvaluator{ fplog, mdlog, cr, ms, top_global, &top, - inputrec, imdSession, pull_work, nrnb, wcycle, gstat, - vsite, constr, mdAtoms, fr, runScheduleWork, enerd }; - energyEvaluator.run(&state_work, mu_tot, vir, pres, -1, TRUE); - cr->nnodes = nnodes; - - /* if forces are not small, warn user */ - get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, &state_work); - - GMX_LOG(mdlog.warning).appendTextFormatted("Maximum force:%12.5e", state_work.fmax); - if (state_work.fmax > 1.0e-3) - { - GMX_LOG(mdlog.warning) - .appendText( - "The force is probably not small enough to " - "ensure that you are at a minimum.\n" - "Be aware that negative eigenvalues may occur\n" - "when the resulting matrix is diagonalized."); - } - - /*********************************************************** - * - * Loop over all pairs in matrix - * - * do_force called twice. Once with positive and - * once with negative displacement - * - ************************************************************/ - - /* Steps are divided one by one over the nodes */ - bool bNS = true; - auto state_work_x = makeArrayRef(state_work.s.x); - auto state_work_f = state_work.f.view().force(); - for (index aid = cr->nodeid; aid < ssize(atom_index); aid += nnodes) - { - size_t atom = atom_index[aid]; - for (size_t d = 0; d < DIM; d++) - { - int64_t step = 0; - int force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES; - double t = 0; - - x_min = state_work_x[atom][d]; - - for (unsigned int dx = 0; (dx < 2); dx++) - { - if (dx == 0) - { - state_work_x[atom][d] = x_min - der_range; - } - else - { - state_work_x[atom][d] = x_min + der_range; - } - - /* Make evaluate_energy do a single node force calculation */ - cr->nnodes = 1; - if (shellfc) - { - /* Now is the time to relax the shells */ - relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, nullptr, step, inputrec, - imdSession, pull_work, bNS, force_flags, &top, constr, enerd, - state_work.s.natoms, state_work.s.x.arrayRefWithPadding(), - state_work.s.v.arrayRefWithPadding(), state_work.s.box, - state_work.s.lambda, &state_work.s.hist, &state_work.f.view(), - vir, mdatoms, nrnb, wcycle, shellfc, fr, runScheduleWork, t, - mu_tot, vsite, DDBalanceRegionHandler(nullptr)); - bNS = false; - step++; - } - else - { - energyEvaluator.run(&state_work, mu_tot, vir, pres, aid * 2 + dx, FALSE); - } - - cr->nnodes = nnodes; - - if (dx == 0) - { - std::copy(state_work_f.begin(), state_work_f.begin() + atom_index.size(), - fneg.begin()); - } - } - - /* x is restored to original */ - state_work_x[atom][d] = x_min; - - for (size_t j = 0; j < atom_index.size(); j++) - { - for (size_t k = 0; (k < DIM); k++) - { - dfdx[j][k] = -(state_work_f[atom_index[j]][k] - fneg[j][k]) / (2 * der_range); - } - } - - if (!bIsMaster) - { -#if GMX_MPI -# define mpi_type GMX_MPI_REAL - MPI_Send(dfdx[0], atom_index.size() * DIM, mpi_type, MASTER(cr), cr->nodeid, - cr->mpi_comm_mygroup); -#endif - } - else - { - for (index node = 0; (node < nnodes && aid + node < ssize(atom_index)); node++) - { - if (node > 0) - { -#if GMX_MPI - MPI_Status stat; - MPI_Recv(dfdx[0], atom_index.size() * DIM, mpi_type, node, node, - cr->mpi_comm_mygroup, &stat); -# undef mpi_type -#endif - } - - row = (aid + node) * DIM + d; - - for (size_t j = 0; j < atom_index.size(); j++) - { - for (size_t k = 0; k < DIM; k++) - { - col = j * DIM + k; - - if (bSparse) - { - if (col >= row && dfdx[j][k] != 0.0) - { - gmx_sparsematrix_increment_value(sparse_matrix, row, col, dfdx[j][k]); - } - } - else - { - full_matrix[row * sz + col] = dfdx[j][k]; - } - } - } - } - } - - if (mdrunOptions.verbose && fplog) - { - fflush(fplog); - } - } - /* write progress */ - if (bIsMaster && mdrunOptions.verbose) - { - fprintf(stderr, "\rFinished step %d out of %td", - std::min(atom + nnodes, atom_index.size()), ssize(atom_index)); - fflush(stderr); - } - } - - if (bIsMaster) - { - fprintf(stderr, "\n\nWriting Hessian...\n"); - gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix); - } - - finish_em(cr, outf, walltime_accounting, wcycle); - - walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size() * 2); -} - -} // namespace gmx diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/minimize.cpp.preplumed b/patches/gromacs-2021.7.diff/src/gromacs/mdrun/minimize.cpp.preplumed deleted file mode 100644 index b6b9376e3a..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/minimize.cpp.preplumed +++ /dev/null @@ -1,2880 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017 The GROMACS development team. - * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/*! \internal \file - * - * \brief This file defines integrators for energy minimization - * - * \author Berk Hess - * \author Erik Lindahl - * \ingroup module_mdrun - */ -#include "gmxpre.h" - -#include "config.h" - -#include -#include -#include - -#include -#include -#include - -#include "gromacs/commandline/filenm.h" -#include "gromacs/domdec/collect.h" -#include "gromacs/domdec/dlbtiming.h" -#include "gromacs/domdec/domdec.h" -#include "gromacs/domdec/domdec_struct.h" -#include "gromacs/domdec/mdsetup.h" -#include "gromacs/domdec/partition.h" -#include "gromacs/ewald/pme_pp.h" -#include "gromacs/fileio/confio.h" -#include "gromacs/fileio/mtxio.h" -#include "gromacs/gmxlib/network.h" -#include "gromacs/gmxlib/nrnb.h" -#include "gromacs/imd/imd.h" -#include "gromacs/linearalgebra/sparsematrix.h" -#include "gromacs/listed_forces/listed_forces.h" -#include "gromacs/math/functions.h" -#include "gromacs/math/vec.h" -#include "gromacs/mdlib/constr.h" -#include "gromacs/mdlib/coupling.h" -#include "gromacs/mdlib/dispersioncorrection.h" -#include "gromacs/mdlib/ebin.h" -#include "gromacs/mdlib/enerdata_utils.h" -#include "gromacs/mdlib/energyoutput.h" -#include "gromacs/mdlib/force.h" -#include "gromacs/mdlib/force_flags.h" -#include "gromacs/mdlib/forcerec.h" -#include "gromacs/mdlib/gmx_omp_nthreads.h" -#include "gromacs/mdlib/md_support.h" -#include "gromacs/mdlib/mdatoms.h" -#include "gromacs/mdlib/stat.h" -#include "gromacs/mdlib/tgroup.h" -#include "gromacs/mdlib/trajectory_writing.h" -#include "gromacs/mdlib/update.h" -#include "gromacs/mdlib/vsite.h" -#include "gromacs/mdrunutility/handlerestart.h" -#include "gromacs/mdrunutility/printtime.h" -#include "gromacs/mdtypes/checkpointdata.h" -#include "gromacs/mdtypes/commrec.h" -#include "gromacs/mdtypes/forcebuffers.h" -#include "gromacs/mdtypes/forcerec.h" -#include "gromacs/mdtypes/inputrec.h" -#include "gromacs/mdtypes/interaction_const.h" -#include "gromacs/mdtypes/md_enums.h" -#include "gromacs/mdtypes/mdatom.h" -#include "gromacs/mdtypes/mdrunoptions.h" -#include "gromacs/mdtypes/state.h" -#include "gromacs/pbcutil/pbc.h" -#include "gromacs/timing/wallcycle.h" -#include "gromacs/timing/walltime_accounting.h" -#include "gromacs/topology/mtop_util.h" -#include "gromacs/topology/topology.h" -#include "gromacs/utility/cstringutil.h" -#include "gromacs/utility/exceptions.h" -#include "gromacs/utility/fatalerror.h" -#include "gromacs/utility/logger.h" -#include "gromacs/utility/smalloc.h" - -#include "legacysimulator.h" -#include "shellfc.h" - -using gmx::ArrayRef; -using gmx::MdrunScheduleWorkload; -using gmx::RVec; -using gmx::VirtualSitesHandler; - -//! Utility structure for manipulating states during EM -typedef struct em_state -{ - //! Copy of the global state - t_state s; - //! Force array - gmx::ForceBuffers f; - //! Potential energy - real epot; - //! Norm of the force - real fnorm; - //! Maximum force - real fmax; - //! Direction - int a_fmax; -} em_state_t; - -//! Print the EM starting conditions -static void print_em_start(FILE* fplog, - const t_commrec* cr, - gmx_walltime_accounting_t walltime_accounting, - gmx_wallcycle_t wcycle, - const char* name) -{ - walltime_accounting_start_time(walltime_accounting); - wallcycle_start(wcycle, ewcRUN); - print_start(fplog, cr, walltime_accounting, name); -} - -//! Stop counting time for EM -static void em_time_end(gmx_walltime_accounting_t walltime_accounting, gmx_wallcycle_t wcycle) -{ - wallcycle_stop(wcycle, ewcRUN); - - walltime_accounting_end_time(walltime_accounting); -} - -//! Printing a log file and console header -static void sp_header(FILE* out, const char* minimizer, real ftol, int nsteps) -{ - fprintf(out, "\n"); - fprintf(out, "%s:\n", minimizer); - fprintf(out, " Tolerance (Fmax) = %12.5e\n", ftol); - fprintf(out, " Number of steps = %12d\n", nsteps); -} - -//! Print warning message -static void warn_step(FILE* fp, real ftol, real fmax, gmx_bool bLastStep, gmx_bool bConstrain) -{ - constexpr bool realIsDouble = GMX_DOUBLE; - char buffer[2048]; - - if (!std::isfinite(fmax)) - { - sprintf(buffer, - "\nEnergy minimization has stopped because the force " - "on at least one atom is not finite. This usually means " - "atoms are overlapping. Modify the input coordinates to " - "remove atom overlap or use soft-core potentials with " - "the free energy code to avoid infinite forces.\n%s", - !realIsDouble ? "You could also be lucky that switching to double precision " - "is sufficient to obtain finite forces.\n" - : ""); - } - else if (bLastStep) - { - sprintf(buffer, - "\nEnergy minimization reached the maximum number " - "of steps before the forces reached the requested " - "precision Fmax < %g.\n", - ftol); - } - else - { - sprintf(buffer, - "\nEnergy minimization has stopped, but the forces have " - "not converged to the requested precision Fmax < %g (which " - "may not be possible for your system). It stopped " - "because the algorithm tried to make a new step whose size " - "was too small, or there was no change in the energy since " - "last step. Either way, we regard the minimization as " - "converged to within the available machine precision, " - "given your starting configuration and EM parameters.\n%s%s", - ftol, - !realIsDouble ? "\nDouble precision normally gives you higher accuracy, but " - "this is often not needed for preparing to run molecular " - "dynamics.\n" - : "", - bConstrain ? "You might need to increase your constraint accuracy, or turn\n" - "off constraints altogether (set constraints = none in mdp file)\n" - : ""); - } - - fputs(wrap_lines(buffer, 78, 0, FALSE), stderr); - fputs(wrap_lines(buffer, 78, 0, FALSE), fp); -} - -//! Print message about convergence of the EM -static void print_converged(FILE* fp, - const char* alg, - real ftol, - int64_t count, - gmx_bool bDone, - int64_t nsteps, - const em_state_t* ems, - double sqrtNumAtoms) -{ - char buf[STEPSTRSIZE]; - - if (bDone) - { - fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n", alg, ftol, gmx_step_str(count, buf)); - } - else if (count < nsteps) - { - fprintf(fp, - "\n%s converged to machine precision in %s steps,\n" - "but did not reach the requested Fmax < %g.\n", - alg, gmx_step_str(count, buf), ftol); - } - else - { - fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n", alg, ftol, - gmx_step_str(count, buf)); - } - -#if GMX_DOUBLE - fprintf(fp, "Potential Energy = %21.14e\n", ems->epot); - fprintf(fp, "Maximum force = %21.14e on atom %d\n", ems->fmax, ems->a_fmax + 1); - fprintf(fp, "Norm of force = %21.14e\n", ems->fnorm / sqrtNumAtoms); -#else - fprintf(fp, "Potential Energy = %14.7e\n", ems->epot); - fprintf(fp, "Maximum force = %14.7e on atom %d\n", ems->fmax, ems->a_fmax + 1); - fprintf(fp, "Norm of force = %14.7e\n", ems->fnorm / sqrtNumAtoms); -#endif -} - -//! Compute the norm and max of the force array in parallel -static void get_f_norm_max(const t_commrec* cr, - t_grpopts* opts, - t_mdatoms* mdatoms, - gmx::ArrayRef f, - real* fnorm, - real* fmax, - int* a_fmax) -{ - double fnorm2, *sum; - real fmax2, fam; - int la_max, a_max, start, end, i, m, gf; - - /* This routine finds the largest force and returns it. - * On parallel machines the global max is taken. - */ - fnorm2 = 0; - fmax2 = 0; - la_max = -1; - start = 0; - end = mdatoms->homenr; - if (mdatoms->cFREEZE) - { - for (i = start; i < end; i++) - { - gf = mdatoms->cFREEZE[i]; - fam = 0; - for (m = 0; m < DIM; m++) - { - if (!opts->nFreeze[gf][m]) - { - fam += gmx::square(f[i][m]); - } - } - fnorm2 += fam; - if (fam > fmax2) - { - fmax2 = fam; - la_max = i; - } - } - } - else - { - for (i = start; i < end; i++) - { - fam = norm2(f[i]); - fnorm2 += fam; - if (fam > fmax2) - { - fmax2 = fam; - la_max = i; - } - } - } - - if (la_max >= 0 && DOMAINDECOMP(cr)) - { - a_max = cr->dd->globalAtomIndices[la_max]; - } - else - { - a_max = la_max; - } - if (PAR(cr)) - { - snew(sum, 2 * cr->nnodes + 1); - sum[2 * cr->nodeid] = fmax2; - sum[2 * cr->nodeid + 1] = a_max; - sum[2 * cr->nnodes] = fnorm2; - gmx_sumd(2 * cr->nnodes + 1, sum, cr); - fnorm2 = sum[2 * cr->nnodes]; - /* Determine the global maximum */ - for (i = 0; i < cr->nnodes; i++) - { - if (sum[2 * i] > fmax2) - { - fmax2 = sum[2 * i]; - a_max = gmx::roundToInt(sum[2 * i + 1]); - } - } - sfree(sum); - } - - if (fnorm) - { - *fnorm = sqrt(fnorm2); - } - if (fmax) - { - *fmax = sqrt(fmax2); - } - if (a_fmax) - { - *a_fmax = a_max; - } -} - -//! Compute the norm of the force -static void get_state_f_norm_max(const t_commrec* cr, t_grpopts* opts, t_mdatoms* mdatoms, em_state_t* ems) -{ - get_f_norm_max(cr, opts, mdatoms, ems->f.view().force(), &ems->fnorm, &ems->fmax, &ems->a_fmax); -} - -//! Initialize the energy minimization -static void init_em(FILE* fplog, - const gmx::MDLogger& mdlog, - const char* title, - const t_commrec* cr, - t_inputrec* ir, - gmx::ImdSession* imdSession, - pull_t* pull_work, - t_state* state_global, - const gmx_mtop_t* top_global, - em_state_t* ems, - gmx_localtop_t* top, - t_nrnb* nrnb, - t_forcerec* fr, - gmx::MDAtoms* mdAtoms, - gmx_global_stat_t* gstat, - VirtualSitesHandler* vsite, - gmx::Constraints* constr, - gmx_shellfc_t** shellfc) -{ - real dvdl_constr; - - if (fplog) - { - fprintf(fplog, "Initiating %s\n", title); - } - - if (MASTER(cr)) - { - state_global->ngtc = 0; - } - int* fep_state = MASTER(cr) ? &state_global->fep_state : nullptr; - gmx::ArrayRef lambda = MASTER(cr) ? state_global->lambda : gmx::ArrayRef(); - initialize_lambdas(fplog, *ir, MASTER(cr), fep_state, lambda); - - if (ir->eI == eiNM) - { - GMX_ASSERT(shellfc != nullptr, "With NM we always support shells"); - - *shellfc = - init_shell_flexcon(stdout, top_global, constr ? constr->numFlexibleConstraints() : 0, - ir->nstcalcenergy, DOMAINDECOMP(cr), thisRankHasDuty(cr, DUTY_PME)); - } - else - { - GMX_ASSERT(EI_ENERGY_MINIMIZATION(ir->eI), - "This else currently only handles energy minimizers, consider if your algorithm " - "needs shell/flexible-constraint support"); - - /* With energy minimization, shells and flexible constraints are - * automatically minimized when treated like normal DOFS. - */ - if (shellfc != nullptr) - { - *shellfc = nullptr; - } - } - - if (DOMAINDECOMP(cr)) - { - dd_init_local_state(cr->dd, state_global, &ems->s); - - /* Distribute the charge groups over the nodes from the master node */ - dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, state_global, *top_global, ir, - imdSession, pull_work, &ems->s, &ems->f, mdAtoms, top, fr, vsite, - constr, nrnb, nullptr, FALSE); - dd_store_state(cr->dd, &ems->s); - } - else - { - state_change_natoms(state_global, state_global->natoms); - /* Just copy the state */ - ems->s = *state_global; - state_change_natoms(&ems->s, ems->s.natoms); - - mdAlgorithmsSetupAtomData(cr, ir, *top_global, top, fr, &ems->f, mdAtoms, constr, vsite, - shellfc ? *shellfc : nullptr); - } - - update_mdatoms(mdAtoms->mdatoms(), ems->s.lambda[efptMASS]); - - if (constr) - { - // TODO how should this cross-module support dependency be managed? - if (ir->eConstrAlg == econtSHAKE && gmx_mtop_ftype_count(top_global, F_CONSTR) > 0) - { - gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n", - econstr_names[econtSHAKE], econstr_names[econtLINCS]); - } - - if (!ir->bContinuation) - { - /* Constrain the starting coordinates */ - bool needsLogging = true; - bool computeEnergy = true; - bool computeVirial = false; - dvdl_constr = 0; - constr->apply(needsLogging, computeEnergy, -1, 0, 1.0, ems->s.x.arrayRefWithPadding(), - ems->s.x.arrayRefWithPadding(), ArrayRef(), ems->s.box, - ems->s.lambda[efptFEP], &dvdl_constr, gmx::ArrayRefWithPadding(), - computeVirial, nullptr, gmx::ConstraintVariable::Positions); - } - } - - if (PAR(cr)) - { - *gstat = global_stat_init(ir); - } - else - { - *gstat = nullptr; - } - - calc_shifts(ems->s.box, fr->shift_vec); -} - -//! Finalize the minimization -static void finish_em(const t_commrec* cr, - gmx_mdoutf_t outf, - gmx_walltime_accounting_t walltime_accounting, - gmx_wallcycle_t wcycle) -{ - if (!thisRankHasDuty(cr, DUTY_PME)) - { - /* Tell the PME only node to finish */ - gmx_pme_send_finish(cr); - } - - done_mdoutf(outf); - - em_time_end(walltime_accounting, wcycle); -} - -//! Swap two different EM states during minimization -static void swap_em_state(em_state_t** ems1, em_state_t** ems2) -{ - em_state_t* tmp; - - tmp = *ems1; - *ems1 = *ems2; - *ems2 = tmp; -} - -//! Save the EM trajectory -static void write_em_traj(FILE* fplog, - const t_commrec* cr, - gmx_mdoutf_t outf, - gmx_bool bX, - gmx_bool bF, - const char* confout, - const gmx_mtop_t* top_global, - t_inputrec* ir, - int64_t step, - em_state_t* state, - t_state* state_global, - ObservablesHistory* observablesHistory) -{ - int mdof_flags = 0; - - if (bX) - { - mdof_flags |= MDOF_X; - } - if (bF) - { - mdof_flags |= MDOF_F; - } - - /* If we want IMD output, set appropriate MDOF flag */ - if (ir->bIMD) - { - mdof_flags |= MDOF_IMD; - } - - gmx::WriteCheckpointDataHolder checkpointDataHolder; - mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global->natoms, step, - static_cast(step), &state->s, state_global, - observablesHistory, state->f.view().force(), &checkpointDataHolder); - - if (confout != nullptr) - { - if (DOMAINDECOMP(cr)) - { - /* If bX=true, x was collected to state_global in the call above */ - if (!bX) - { - auto globalXRef = MASTER(cr) ? state_global->x : gmx::ArrayRef(); - dd_collect_vec(cr->dd, state->s.ddp_count, state->s.ddp_count_cg_gl, state->s.cg_gl, - state->s.x, globalXRef); - } - } - else - { - /* Copy the local state pointer */ - state_global = &state->s; - } - - if (MASTER(cr)) - { - if (ir->pbcType != PbcType::No && !ir->bPeriodicMols && DOMAINDECOMP(cr)) - { - /* Make molecules whole only for confout writing */ - do_pbc_mtop(ir->pbcType, state->s.box, top_global, state_global->x.rvec_array()); - } - - write_sto_conf_mtop(confout, *top_global->name, top_global, - state_global->x.rvec_array(), nullptr, ir->pbcType, state->s.box); - } - } -} - -//! \brief Do one minimization step -// -// \returns true when the step succeeded, false when a constraint error occurred -static bool do_em_step(const t_commrec* cr, - t_inputrec* ir, - t_mdatoms* md, - em_state_t* ems1, - real a, - gmx::ArrayRefWithPadding force, - em_state_t* ems2, - gmx::Constraints* constr, - int64_t count) - -{ - t_state *s1, *s2; - int start, end; - real dvdl_constr; - int nthreads gmx_unused; - - bool validStep = true; - - s1 = &ems1->s; - s2 = &ems2->s; - - if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count) - { - gmx_incons("state mismatch in do_em_step"); - } - - s2->flags = s1->flags; - - if (s2->natoms != s1->natoms) - { - state_change_natoms(s2, s1->natoms); - ems2->f.resize(s2->natoms); - } - if (DOMAINDECOMP(cr) && s2->cg_gl.size() != s1->cg_gl.size()) - { - s2->cg_gl.resize(s1->cg_gl.size()); - } - - copy_mat(s1->box, s2->box); - /* Copy free energy state */ - s2->lambda = s1->lambda; - copy_mat(s1->box, s2->box); - - start = 0; - end = md->homenr; - - nthreads = gmx_omp_nthreads_get(emntUpdate); -#pragma omp parallel num_threads(nthreads) - { - const rvec* x1 = s1->x.rvec_array(); - rvec* x2 = s2->x.rvec_array(); - const rvec* f = as_rvec_array(force.unpaddedArrayRef().data()); - - int gf = 0; -#pragma omp for schedule(static) nowait - for (int i = start; i < end; i++) - { - try - { - if (md->cFREEZE) - { - gf = md->cFREEZE[i]; - } - for (int m = 0; m < DIM; m++) - { - if (ir->opts.nFreeze[gf][m]) - { - x2[i][m] = x1[i][m]; - } - else - { - x2[i][m] = x1[i][m] + a * f[i][m]; - } - } - } - GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR - } - - if (s2->flags & (1 << estCGP)) - { - /* Copy the CG p vector */ - const rvec* p1 = s1->cg_p.rvec_array(); - rvec* p2 = s2->cg_p.rvec_array(); -#pragma omp for schedule(static) nowait - for (int i = start; i < end; i++) - { - // Trivial OpenMP block that does not throw - copy_rvec(p1[i], p2[i]); - } - } - - if (DOMAINDECOMP(cr)) - { - /* OpenMP does not supported unsigned loop variables */ -#pragma omp for schedule(static) nowait - for (gmx::index i = 0; i < gmx::ssize(s2->cg_gl); i++) - { - s2->cg_gl[i] = s1->cg_gl[i]; - } - } - } - - if (DOMAINDECOMP(cr)) - { - s2->ddp_count = s1->ddp_count; - s2->ddp_count_cg_gl = s1->ddp_count_cg_gl; - } - - if (constr) - { - dvdl_constr = 0; - validStep = constr->apply( - TRUE, TRUE, count, 0, 1.0, s1->x.arrayRefWithPadding(), s2->x.arrayRefWithPadding(), - ArrayRef(), s2->box, s2->lambda[efptBONDED], &dvdl_constr, - gmx::ArrayRefWithPadding(), false, nullptr, gmx::ConstraintVariable::Positions); - - if (cr->nnodes > 1) - { - /* This global reduction will affect performance at high - * parallelization, but we can not really avoid it. - * But usually EM is not run at high parallelization. - */ - int reductionBuffer = static_cast(!validStep); - gmx_sumi(1, &reductionBuffer, cr); - validStep = (reductionBuffer == 0); - } - - // We should move this check to the different minimizers - if (!validStep && ir->eI != eiSteep) - { - gmx_fatal(FARGS, - "The coordinates could not be constrained. Minimizer '%s' can not handle " - "constraint failures, use minimizer '%s' before using '%s'.", - EI(ir->eI), EI(eiSteep), EI(ir->eI)); - } - } - - return validStep; -} - -//! Prepare EM for using domain decomposition parallellization -static void em_dd_partition_system(FILE* fplog, - const gmx::MDLogger& mdlog, - int step, - const t_commrec* cr, - const gmx_mtop_t* top_global, - t_inputrec* ir, - gmx::ImdSession* imdSession, - pull_t* pull_work, - em_state_t* ems, - gmx_localtop_t* top, - gmx::MDAtoms* mdAtoms, - t_forcerec* fr, - VirtualSitesHandler* vsite, - gmx::Constraints* constr, - t_nrnb* nrnb, - gmx_wallcycle_t wcycle) -{ - /* Repartition the domain decomposition */ - dd_partition_system(fplog, mdlog, step, cr, FALSE, 1, nullptr, *top_global, ir, imdSession, pull_work, - &ems->s, &ems->f, mdAtoms, top, fr, vsite, constr, nrnb, wcycle, FALSE); - dd_store_state(cr->dd, &ems->s); -} - -namespace -{ - -/*! \brief Class to handle the work of setting and doing an energy evaluation. - * - * This class is a mere aggregate of parameters to pass to evaluate an - * energy, so that future changes to names and types of them consume - * less time when refactoring other code. - * - * Aggregate initialization is used, for which the chief risk is that - * if a member is added at the end and not all initializer lists are - * updated, then the member will be value initialized, which will - * typically mean initialization to zero. - * - * Use a braced initializer list to construct one of these. */ -class EnergyEvaluator -{ -public: - /*! \brief Evaluates an energy on the state in \c ems. - * - * \todo In practice, the same objects mu_tot, vir, and pres - * are always passed to this function, so we would rather have - * them as data members. However, their C-array types are - * unsuited for aggregate initialization. When the types - * improve, the call signature of this method can be reduced. - */ - void run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst); - //! Handles logging (deprecated). - FILE* fplog; - //! Handles logging. - const gmx::MDLogger& mdlog; - //! Handles communication. - const t_commrec* cr; - //! Coordinates multi-simulations. - const gmx_multisim_t* ms; - //! Holds the simulation topology. - const gmx_mtop_t* top_global; - //! Holds the domain topology. - gmx_localtop_t* top; - //! User input options. - t_inputrec* inputrec; - //! The Interactive Molecular Dynamics session. - gmx::ImdSession* imdSession; - //! The pull work object. - pull_t* pull_work; - //! Manages flop accounting. - t_nrnb* nrnb; - //! Manages wall cycle accounting. - gmx_wallcycle_t wcycle; - //! Coordinates global reduction. - gmx_global_stat_t gstat; - //! Handles virtual sites. - VirtualSitesHandler* vsite; - //! Handles constraints. - gmx::Constraints* constr; - //! Per-atom data for this domain. - gmx::MDAtoms* mdAtoms; - //! Handles how to calculate the forces. - t_forcerec* fr; - //! Schedule of force-calculation work each step for this task. - MdrunScheduleWorkload* runScheduleWork; - //! Stores the computed energies. - gmx_enerdata_t* enerd; -}; - -void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst) -{ - real t; - gmx_bool bNS; - tensor force_vir, shake_vir, ekin; - real dvdl_constr; - real terminate = 0; - - /* Set the time to the initial time, the time does not change during EM */ - t = inputrec->init_t; - - if (bFirst || (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) - { - /* This is the first state or an old state used before the last ns */ - bNS = TRUE; - } - else - { - bNS = FALSE; - if (inputrec->nstlist > 0) - { - bNS = TRUE; - } - } - - if (vsite) - { - vsite->construct(ems->s.x, 1, {}, ems->s.box); - } - - if (DOMAINDECOMP(cr) && bNS) - { - /* Repartition the domain decomposition */ - em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, imdSession, pull_work, - ems, top, mdAtoms, fr, vsite, constr, nrnb, wcycle); - } - - /* Calc force & energy on new trial position */ - /* do_force always puts the charge groups in the box and shifts again - * We do not unshift, so molecules are always whole in congrad.c - */ - do_force(fplog, cr, ms, inputrec, nullptr, nullptr, imdSession, pull_work, count, nrnb, wcycle, - top, ems->s.box, ems->s.x.arrayRefWithPadding(), &ems->s.hist, &ems->f.view(), force_vir, - mdAtoms->mdatoms(), enerd, ems->s.lambda, fr, runScheduleWork, vsite, mu_tot, t, nullptr, - GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY - | (bNS ? GMX_FORCE_NS : 0), - DDBalanceRegionHandler(cr)); - - /* Clear the unused shake virial and pressure */ - clear_mat(shake_vir); - clear_mat(pres); - - /* Communicate stuff when parallel */ - if (PAR(cr) && inputrec->eI != eiNM) - { - wallcycle_start(wcycle, ewcMoveE); - - global_stat(gstat, cr, enerd, force_vir, shake_vir, inputrec, nullptr, nullptr, nullptr, 1, - &terminate, nullptr, FALSE, CGLO_ENERGY | CGLO_PRESSURE | CGLO_CONSTRAINT); - - wallcycle_stop(wcycle, ewcMoveE); - } - - if (fr->dispersionCorrection) - { - /* Calculate long range corrections to pressure and energy */ - const DispersionCorrection::Correction correction = - fr->dispersionCorrection->calculate(ems->s.box, ems->s.lambda[efptVDW]); - - enerd->term[F_DISPCORR] = correction.energy; - enerd->term[F_EPOT] += correction.energy; - enerd->term[F_PRES] += correction.pressure; - enerd->term[F_DVDL] += correction.dvdl; - } - else - { - enerd->term[F_DISPCORR] = 0; - } - - ems->epot = enerd->term[F_EPOT]; - - if (constr) - { - /* Project out the constraint components of the force */ - bool needsLogging = false; - bool computeEnergy = false; - bool computeVirial = true; - dvdl_constr = 0; - auto f = ems->f.view().forceWithPadding(); - constr->apply(needsLogging, computeEnergy, count, 0, 1.0, ems->s.x.arrayRefWithPadding(), f, - f.unpaddedArrayRef(), ems->s.box, ems->s.lambda[efptBONDED], &dvdl_constr, - gmx::ArrayRefWithPadding(), computeVirial, shake_vir, - gmx::ConstraintVariable::ForceDispl); - enerd->term[F_DVDL_CONSTR] += dvdl_constr; - m_add(force_vir, shake_vir, vir); - } - else - { - copy_mat(force_vir, vir); - } - - clear_mat(ekin); - enerd->term[F_PRES] = calc_pres(fr->pbcType, inputrec->nwall, ems->s.box, ekin, vir, pres); - - if (inputrec->efep != efepNO) - { - accumulateKineticLambdaComponents(enerd, ems->s.lambda, *inputrec->fepvals); - } - - if (EI_ENERGY_MINIMIZATION(inputrec->eI)) - { - get_state_f_norm_max(cr, &(inputrec->opts), mdAtoms->mdatoms(), ems); - } -} - -} // namespace - -//! Parallel utility summing energies and forces -static double reorder_partsum(const t_commrec* cr, - t_grpopts* opts, - const gmx_mtop_t* top_global, - const em_state_t* s_min, - const em_state_t* s_b) -{ - if (debug) - { - fprintf(debug, "Doing reorder_partsum\n"); - } - - auto fm = s_min->f.view().force(); - auto fb = s_b->f.view().force(); - - /* Collect fm in a global vector fmg. - * This conflicts with the spirit of domain decomposition, - * but to fully optimize this a much more complicated algorithm is required. - */ - const int natoms = top_global->natoms; - rvec* fmg; - snew(fmg, natoms); - - gmx::ArrayRef indicesMin = s_min->s.cg_gl; - int i = 0; - for (int a : indicesMin) - { - copy_rvec(fm[i], fmg[a]); - i++; - } - gmx_sum(top_global->natoms * 3, fmg[0], cr); - - /* Now we will determine the part of the sum for the cgs in state s_b */ - gmx::ArrayRef indicesB = s_b->s.cg_gl; - - double partsum = 0; - i = 0; - int gf = 0; - gmx::ArrayRef grpnrFREEZE = - top_global->groups.groupNumbers[SimulationAtomGroupType::Freeze]; - for (int a : indicesB) - { - if (!grpnrFREEZE.empty()) - { - gf = grpnrFREEZE[i]; - } - for (int m = 0; m < DIM; m++) - { - if (!opts->nFreeze[gf][m]) - { - partsum += (fb[i][m] - fmg[a][m]) * fb[i][m]; - } - } - i++; - } - - sfree(fmg); - - return partsum; -} - -//! Print some stuff, like beta, whatever that means. -static real pr_beta(const t_commrec* cr, - t_grpopts* opts, - t_mdatoms* mdatoms, - const gmx_mtop_t* top_global, - const em_state_t* s_min, - const em_state_t* s_b) -{ - double sum; - - /* This is just the classical Polak-Ribiere calculation of beta; - * it looks a bit complicated since we take freeze groups into account, - * and might have to sum it in parallel runs. - */ - - if (!DOMAINDECOMP(cr) - || (s_min->s.ddp_count == cr->dd->ddp_count && s_b->s.ddp_count == cr->dd->ddp_count)) - { - auto fm = s_min->f.view().force(); - auto fb = s_b->f.view().force(); - sum = 0; - int gf = 0; - /* This part of code can be incorrect with DD, - * since the atom ordering in s_b and s_min might differ. - */ - for (int i = 0; i < mdatoms->homenr; i++) - { - if (mdatoms->cFREEZE) - { - gf = mdatoms->cFREEZE[i]; - } - for (int m = 0; m < DIM; m++) - { - if (!opts->nFreeze[gf][m]) - { - sum += (fb[i][m] - fm[i][m]) * fb[i][m]; - } - } - } - } - else - { - /* We need to reorder cgs while summing */ - sum = reorder_partsum(cr, opts, top_global, s_min, s_b); - } - if (PAR(cr)) - { - gmx_sumd(1, &sum, cr); - } - - return sum / gmx::square(s_min->fnorm); -} - -namespace gmx -{ - -void LegacySimulator::do_cg() -{ - const char* CG = "Polak-Ribiere Conjugate Gradients"; - - gmx_localtop_t top(top_global->ffparams); - gmx_global_stat_t gstat; - double tmp, minstep; - real stepsize; - real a, b, c, beta = 0.0; - real epot_repl = 0; - real pnorm; - gmx_bool converged, foundlower; - rvec mu_tot = { 0 }; - gmx_bool do_log = FALSE, do_ene = FALSE, do_x, do_f; - tensor vir, pres; - int number_steps, neval = 0, nstcg = inputrec->nstcgsteep; - int m, step, nminstep; - auto mdatoms = mdAtoms->mdatoms(); - - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "Note that activating conjugate gradient energy minimization via the " - "integrator .mdp option and the command gmx mdrun may " - "be available in a different form in a future version of GROMACS, " - "e.g. gmx minimize and an .mdp option."); - - step = 0; - - if (MASTER(cr)) - { - // In CG, the state is extended with a search direction - state_global->flags |= (1 << estCGP); - - // Ensure the extra per-atom state array gets allocated - state_change_natoms(state_global, state_global->natoms); - - // Initialize the search direction to zero - for (RVec& cg_p : state_global->cg_p) - { - cg_p = { 0, 0, 0 }; - } - } - - /* Create 4 states on the stack and extract pointers that we will swap */ - em_state_t s0{}, s1{}, s2{}, s3{}; - em_state_t* s_min = &s0; - em_state_t* s_a = &s1; - em_state_t* s_b = &s2; - em_state_t* s_c = &s3; - - /* Init em and store the local state in s_min */ - init_em(fplog, mdlog, CG, cr, inputrec, imdSession, pull_work, state_global, top_global, s_min, - &top, nrnb, fr, mdAtoms, &gstat, vsite, constr, nullptr); - const bool simulationsShareState = false; - gmx_mdoutf* outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, - mdModulesNotifier, inputrec, top_global, nullptr, wcycle, - StartingBehavior::NewSimulation, simulationsShareState, ms); - gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work, - nullptr, false, StartingBehavior::NewSimulation, - simulationsShareState, mdModulesNotifier); - - /* Print to log file */ - print_em_start(fplog, cr, walltime_accounting, wcycle, CG); - - /* Max number of steps */ - number_steps = inputrec->nsteps; - - if (MASTER(cr)) - { - sp_header(stderr, CG, inputrec->em_tol, number_steps); - } - if (fplog) - { - sp_header(fplog, CG, inputrec->em_tol, number_steps); - } - - EnergyEvaluator energyEvaluator{ fplog, mdlog, cr, ms, top_global, &top, - inputrec, imdSession, pull_work, nrnb, wcycle, gstat, - vsite, constr, mdAtoms, fr, runScheduleWork, enerd }; - /* Call the force routine and some auxiliary (neighboursearching etc.) */ - /* do_force always puts the charge groups in the box and shifts again - * We do not unshift, so molecules are always whole in congrad.c - */ - energyEvaluator.run(s_min, mu_tot, vir, pres, -1, TRUE); - - if (MASTER(cr)) - { - /* Copy stuff to the energy bin for easy printing etc. */ - matrix nullBox = {}; - energyOutput.addDataAtEnergyStep(false, false, static_cast(step), mdatoms->tmass, - enerd, nullptr, nullptr, nullBox, PTCouplingArrays(), 0, - nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); - - EnergyOutput::printHeader(fplog, step, step); - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, - step, fr->fcdata.get(), nullptr); - } - - /* Estimate/guess the initial stepsize */ - stepsize = inputrec->em_stepsize / s_min->fnorm; - - if (MASTER(cr)) - { - double sqrtNumAtoms = sqrt(static_cast(state_global->natoms)); - fprintf(stderr, " F-max = %12.5e on atom %d\n", s_min->fmax, s_min->a_fmax + 1); - fprintf(stderr, " F-Norm = %12.5e\n", s_min->fnorm / sqrtNumAtoms); - fprintf(stderr, "\n"); - /* and copy to the log file too... */ - fprintf(fplog, " F-max = %12.5e on atom %d\n", s_min->fmax, s_min->a_fmax + 1); - fprintf(fplog, " F-Norm = %12.5e\n", s_min->fnorm / sqrtNumAtoms); - fprintf(fplog, "\n"); - } - /* Start the loop over CG steps. - * Each successful step is counted, and we continue until - * we either converge or reach the max number of steps. - */ - converged = FALSE; - for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++) - { - - /* start taking steps in a new direction - * First time we enter the routine, beta=0, and the direction is - * simply the negative gradient. - */ - - /* Calculate the new direction in p, and the gradient in this direction, gpa */ - gmx::ArrayRef pm = s_min->s.cg_p; - gmx::ArrayRef sfm = s_min->f.view().force(); - double gpa = 0; - int gf = 0; - for (int i = 0; i < mdatoms->homenr; i++) - { - if (mdatoms->cFREEZE) - { - gf = mdatoms->cFREEZE[i]; - } - for (m = 0; m < DIM; m++) - { - if (!inputrec->opts.nFreeze[gf][m]) - { - pm[i][m] = sfm[i][m] + beta * pm[i][m]; - gpa -= pm[i][m] * sfm[i][m]; - /* f is negative gradient, thus the sign */ - } - else - { - pm[i][m] = 0; - } - } - } - - /* Sum the gradient along the line across CPUs */ - if (PAR(cr)) - { - gmx_sumd(1, &gpa, cr); - } - - /* Calculate the norm of the search vector */ - get_f_norm_max(cr, &(inputrec->opts), mdatoms, pm, &pnorm, nullptr, nullptr); - - /* Just in case stepsize reaches zero due to numerical precision... */ - if (stepsize <= 0) - { - stepsize = inputrec->em_stepsize / pnorm; - } - - /* - * Double check the value of the derivative in the search direction. - * If it is positive it must be due to the old information in the - * CG formula, so just remove that and start over with beta=0. - * This corresponds to a steepest descent step. - */ - if (gpa > 0) - { - beta = 0; - step--; /* Don't count this step since we are restarting */ - continue; /* Go back to the beginning of the big for-loop */ - } - - /* Calculate minimum allowed stepsize, before the average (norm) - * relative change in coordinate is smaller than precision - */ - minstep = 0; - auto s_min_x = makeArrayRef(s_min->s.x); - for (int i = 0; i < mdatoms->homenr; i++) - { - for (m = 0; m < DIM; m++) - { - tmp = fabs(s_min_x[i][m]); - if (tmp < 1.0) - { - tmp = 1.0; - } - tmp = pm[i][m] / tmp; - minstep += tmp * tmp; - } - } - /* Add up from all CPUs */ - if (PAR(cr)) - { - gmx_sumd(1, &minstep, cr); - } - - minstep = GMX_REAL_EPS / sqrt(minstep / (3 * top_global->natoms)); - - if (stepsize < minstep) - { - converged = TRUE; - break; - } - - /* Write coordinates if necessary */ - do_x = do_per_step(step, inputrec->nstxout); - do_f = do_per_step(step, inputrec->nstfout); - - write_em_traj(fplog, cr, outf, do_x, do_f, nullptr, top_global, inputrec, step, s_min, - state_global, observablesHistory); - - /* Take a step downhill. - * In theory, we should minimize the function along this direction. - * That is quite possible, but it turns out to take 5-10 function evaluations - * for each line. However, we dont really need to find the exact minimum - - * it is much better to start a new CG step in a modified direction as soon - * as we are close to it. This will save a lot of energy evaluations. - * - * In practice, we just try to take a single step. - * If it worked (i.e. lowered the energy), we increase the stepsize but - * the continue straight to the next CG step without trying to find any minimum. - * If it didn't work (higher energy), there must be a minimum somewhere between - * the old position and the new one. - * - * Due to the finite numerical accuracy, it turns out that it is a good idea - * to even accept a SMALL increase in energy, if the derivative is still downhill. - * This leads to lower final energies in the tests I've done. / Erik - */ - s_a->epot = s_min->epot; - a = 0.0; - c = a + stepsize; /* reference position along line is zero */ - - if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) - { - em_dd_partition_system(fplog, mdlog, step, cr, top_global, inputrec, imdSession, - pull_work, s_min, &top, mdAtoms, fr, vsite, constr, nrnb, wcycle); - } - - /* Take a trial step (new coords in s_c) */ - do_em_step(cr, inputrec, mdatoms, s_min, c, s_min->s.cg_p.constArrayRefWithPadding(), s_c, - constr, -1); - - neval++; - /* Calculate energy for the trial step */ - energyEvaluator.run(s_c, mu_tot, vir, pres, -1, FALSE); - - /* Calc derivative along line */ - const rvec* pc = s_c->s.cg_p.rvec_array(); - gmx::ArrayRef sfc = s_c->f.view().force(); - double gpc = 0; - for (int i = 0; i < mdatoms->homenr; i++) - { - for (m = 0; m < DIM; m++) - { - gpc -= pc[i][m] * sfc[i][m]; /* f is negative gradient, thus the sign */ - } - } - /* Sum the gradient along the line across CPUs */ - if (PAR(cr)) - { - gmx_sumd(1, &gpc, cr); - } - - /* This is the max amount of increase in energy we tolerate */ - tmp = std::sqrt(GMX_REAL_EPS) * fabs(s_a->epot); - - /* Accept the step if the energy is lower, or if it is not significantly higher - * and the line derivative is still negative. - */ - if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) - { - foundlower = TRUE; - /* Great, we found a better energy. Increase step for next iteration - * if we are still going down, decrease it otherwise - */ - if (gpc < 0) - { - stepsize *= 1.618034; /* The golden section */ - } - else - { - stepsize *= 0.618034; /* 1/golden section */ - } - } - else - { - /* New energy is the same or higher. We will have to do some work - * to find a smaller value in the interval. Take smaller step next time! - */ - foundlower = FALSE; - stepsize *= 0.618034; - } - - - /* OK, if we didn't find a lower value we will have to locate one now - there must - * be one in the interval [a=0,c]. - * The same thing is valid here, though: Don't spend dozens of iterations to find - * the line minimum. We try to interpolate based on the derivative at the endpoints, - * and only continue until we find a lower value. In most cases this means 1-2 iterations. - * - * I also have a safeguard for potentially really pathological functions so we never - * take more than 20 steps before we give up ... - * - * If we already found a lower value we just skip this step and continue to the update. - */ - double gpb; - if (!foundlower) - { - nminstep = 0; - - do - { - /* Select a new trial point. - * If the derivatives at points a & c have different sign we interpolate to zero, - * otherwise just do a bisection. - */ - if (gpa < 0 && gpc > 0) - { - b = a + gpa * (a - c) / (gpc - gpa); - } - else - { - b = 0.5 * (a + c); - } - - /* safeguard if interpolation close to machine accuracy causes errors: - * never go outside the interval - */ - if (b <= a || b >= c) - { - b = 0.5 * (a + c); - } - - if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) - { - /* Reload the old state */ - em_dd_partition_system(fplog, mdlog, -1, cr, top_global, inputrec, imdSession, pull_work, - s_min, &top, mdAtoms, fr, vsite, constr, nrnb, wcycle); - } - - /* Take a trial step to this new point - new coords in s_b */ - do_em_step(cr, inputrec, mdatoms, s_min, b, - s_min->s.cg_p.constArrayRefWithPadding(), s_b, constr, -1); - - neval++; - /* Calculate energy for the trial step */ - energyEvaluator.run(s_b, mu_tot, vir, pres, -1, FALSE); - - /* p does not change within a step, but since the domain decomposition - * might change, we have to use cg_p of s_b here. - */ - const rvec* pb = s_b->s.cg_p.rvec_array(); - gmx::ArrayRef sfb = s_b->f.view().force(); - gpb = 0; - for (int i = 0; i < mdatoms->homenr; i++) - { - for (m = 0; m < DIM; m++) - { - gpb -= pb[i][m] * sfb[i][m]; /* f is negative gradient, thus the sign */ - } - } - /* Sum the gradient along the line across CPUs */ - if (PAR(cr)) - { - gmx_sumd(1, &gpb, cr); - } - - if (debug) - { - fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n", s_a->epot, s_b->epot, - s_c->epot, gpb); - } - - epot_repl = s_b->epot; - - /* Keep one of the intervals based on the value of the derivative at the new point */ - if (gpb > 0) - { - /* Replace c endpoint with b */ - swap_em_state(&s_b, &s_c); - c = b; - gpc = gpb; - } - else - { - /* Replace a endpoint with b */ - swap_em_state(&s_b, &s_a); - a = b; - gpa = gpb; - } - - /* - * Stop search as soon as we find a value smaller than the endpoints. - * Never run more than 20 steps, no matter what. - */ - nminstep++; - } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) && (nminstep < 20)); - - if (std::fabs(epot_repl - s_min->epot) < fabs(s_min->epot) * GMX_REAL_EPS || nminstep >= 20) - { - /* OK. We couldn't find a significantly lower energy. - * If beta==0 this was steepest descent, and then we give up. - * If not, set beta=0 and restart with steepest descent before quitting. - */ - if (beta == 0.0) - { - /* Converged */ - converged = TRUE; - break; - } - else - { - /* Reset memory before giving up */ - beta = 0.0; - continue; - } - } - - /* Select min energy state of A & C, put the best in B. - */ - if (s_c->epot < s_a->epot) - { - if (debug) - { - fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n", s_c->epot, - s_a->epot); - } - swap_em_state(&s_b, &s_c); - gpb = gpc; - } - else - { - if (debug) - { - fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n", s_a->epot, - s_c->epot); - } - swap_em_state(&s_b, &s_a); - gpb = gpa; - } - } - else - { - if (debug) - { - fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n", s_c->epot); - } - swap_em_state(&s_b, &s_c); - gpb = gpc; - } - - /* new search direction */ - /* beta = 0 means forget all memory and restart with steepest descents. */ - if (nstcg && ((step % nstcg) == 0)) - { - beta = 0.0; - } - else - { - /* s_min->fnorm cannot be zero, because then we would have converged - * and broken out. - */ - - /* Polak-Ribiere update. - * Change to fnorm2/fnorm2_old for Fletcher-Reeves - */ - beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b); - } - /* Limit beta to prevent oscillations */ - if (fabs(beta) > 5.0) - { - beta = 0.0; - } - - - /* update positions */ - swap_em_state(&s_min, &s_b); - gpa = gpb; - - /* Print it if necessary */ - if (MASTER(cr)) - { - if (mdrunOptions.verbose) - { - double sqrtNumAtoms = sqrt(static_cast(state_global->natoms)); - fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", step, - s_min->epot, s_min->fnorm / sqrtNumAtoms, s_min->fmax, s_min->a_fmax + 1); - fflush(stderr); - } - /* Store the new (lower) energies */ - matrix nullBox = {}; - energyOutput.addDataAtEnergyStep(false, false, static_cast(step), mdatoms->tmass, - enerd, nullptr, nullptr, nullBox, PTCouplingArrays(), 0, - nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); - - do_log = do_per_step(step, inputrec->nstlog); - do_ene = do_per_step(step, inputrec->nstenergy); - - imdSession->fillEnergyRecord(step, TRUE); - - if (do_log) - { - EnergyOutput::printHeader(fplog, step, step); - } - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, - do_log ? fplog : nullptr, step, step, - fr->fcdata.get(), nullptr); - } - - /* Send energies and positions to the IMD client if bIMD is TRUE. */ - if (MASTER(cr) && imdSession->run(step, TRUE, state_global->box, state_global->x.rvec_array(), 0)) - { - imdSession->sendPositionsAndEnergies(); - } - - /* Stop when the maximum force lies below tolerance. - * If we have reached machine precision, converged is already set to true. - */ - converged = converged || (s_min->fmax < inputrec->em_tol); - - } /* End of the loop */ - - if (converged) - { - step--; /* we never took that last step in this case */ - } - if (s_min->fmax > inputrec->em_tol) - { - if (MASTER(cr)) - { - warn_step(fplog, inputrec->em_tol, s_min->fmax, step - 1 == number_steps, FALSE); - } - converged = FALSE; - } - - if (MASTER(cr)) - { - /* If we printed energy and/or logfile last step (which was the last step) - * we don't have to do it again, but otherwise print the final values. - */ - if (!do_log) - { - /* Write final value to log since we didn't do anything the last step */ - EnergyOutput::printHeader(fplog, step, step); - } - if (!do_ene || !do_log) - { - /* Write final energy file entries */ - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, - !do_log ? fplog : nullptr, step, step, - fr->fcdata.get(), nullptr); - } - } - - /* Print some stuff... */ - if (MASTER(cr)) - { - fprintf(stderr, "\nwriting lowest energy coordinates.\n"); - } - - /* IMPORTANT! - * For accurate normal mode calculation it is imperative that we - * store the last conformation into the full precision binary trajectory. - * - * However, we should only do it if we did NOT already write this step - * above (which we did if do_x or do_f was true). - */ - /* Note that with 0 < nstfout != nstxout we can end up with two frames - * in the trajectory with the same step number. - */ - do_x = !do_per_step(step, inputrec->nstxout); - do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout)); - - write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), top_global, inputrec, - step, s_min, state_global, observablesHistory); - - - if (MASTER(cr)) - { - double sqrtNumAtoms = sqrt(static_cast(state_global->natoms)); - print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps, s_min, sqrtNumAtoms); - print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps, s_min, sqrtNumAtoms); - - fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); - } - - finish_em(cr, outf, walltime_accounting, wcycle); - - /* To print the actual number of steps we needed somewhere */ - walltime_accounting_set_nsteps_done(walltime_accounting, step); -} - - -void LegacySimulator::do_lbfgs() -{ - static const char* LBFGS = "Low-Memory BFGS Minimizer"; - em_state_t ems; - gmx_localtop_t top(top_global->ffparams); - gmx_global_stat_t gstat; - int ncorr, nmaxcorr, point, cp, neval, nminstep; - double stepsize, step_taken, gpa, gpb, gpc, tmp, minstep; - real * rho, *alpha, *p, *s, **dx, **dg; - real a, b, c, maxdelta, delta; - real diag, Epot0; - real dgdx, dgdg, sq, yr, beta; - gmx_bool converged; - rvec mu_tot = { 0 }; - gmx_bool do_log, do_ene, do_x, do_f, foundlower, *frozen; - tensor vir, pres; - int start, end, number_steps; - int i, k, m, n, gf, step; - int mdof_flags; - auto mdatoms = mdAtoms->mdatoms(); - - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "Note that activating L-BFGS energy minimization via the " - "integrator .mdp option and the command gmx mdrun may " - "be available in a different form in a future version of GROMACS, " - "e.g. gmx minimize and an .mdp option."); - - if (PAR(cr)) - { - gmx_fatal(FARGS, "L-BFGS minimization only supports a single rank"); - } - - if (nullptr != constr) - { - gmx_fatal( - FARGS, - "The combination of constraints and L-BFGS minimization is not implemented. Either " - "do not use constraints, or use another minimizer (e.g. steepest descent)."); - } - - n = 3 * state_global->natoms; - nmaxcorr = inputrec->nbfgscorr; - - snew(frozen, n); - - snew(p, n); - snew(rho, nmaxcorr); - snew(alpha, nmaxcorr); - - snew(dx, nmaxcorr); - for (i = 0; i < nmaxcorr; i++) - { - snew(dx[i], n); - } - - snew(dg, nmaxcorr); - for (i = 0; i < nmaxcorr; i++) - { - snew(dg[i], n); - } - - step = 0; - neval = 0; - - /* Init em */ - init_em(fplog, mdlog, LBFGS, cr, inputrec, imdSession, pull_work, state_global, top_global, - &ems, &top, nrnb, fr, mdAtoms, &gstat, vsite, constr, nullptr); - const bool simulationsShareState = false; - gmx_mdoutf* outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, - mdModulesNotifier, inputrec, top_global, nullptr, wcycle, - StartingBehavior::NewSimulation, simulationsShareState, ms); - gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work, - nullptr, false, StartingBehavior::NewSimulation, - simulationsShareState, mdModulesNotifier); - - start = 0; - end = mdatoms->homenr; - - /* We need 4 working states */ - em_state_t s0{}, s1{}, s2{}, s3{}; - em_state_t* sa = &s0; - em_state_t* sb = &s1; - em_state_t* sc = &s2; - em_state_t* last = &s3; - /* Initialize by copying the state from ems (we could skip x and f here) */ - *sa = ems; - *sb = ems; - *sc = ems; - - /* Print to log file */ - print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS); - - do_log = do_ene = do_x = do_f = TRUE; - - /* Max number of steps */ - number_steps = inputrec->nsteps; - - /* Create a 3*natoms index to tell whether each degree of freedom is frozen */ - gf = 0; - for (i = start; i < end; i++) - { - if (mdatoms->cFREEZE) - { - gf = mdatoms->cFREEZE[i]; - } - for (m = 0; m < DIM; m++) - { - frozen[3 * i + m] = (inputrec->opts.nFreeze[gf][m] != 0); - } - } - if (MASTER(cr)) - { - sp_header(stderr, LBFGS, inputrec->em_tol, number_steps); - } - if (fplog) - { - sp_header(fplog, LBFGS, inputrec->em_tol, number_steps); - } - - if (vsite) - { - vsite->construct(state_global->x, 1, {}, state_global->box); - } - - /* Call the force routine and some auxiliary (neighboursearching etc.) */ - /* do_force always puts the charge groups in the box and shifts again - * We do not unshift, so molecules are always whole - */ - neval++; - EnergyEvaluator energyEvaluator{ fplog, mdlog, cr, ms, top_global, &top, - inputrec, imdSession, pull_work, nrnb, wcycle, gstat, - vsite, constr, mdAtoms, fr, runScheduleWork, enerd }; - energyEvaluator.run(&ems, mu_tot, vir, pres, -1, TRUE); - - if (MASTER(cr)) - { - /* Copy stuff to the energy bin for easy printing etc. */ - matrix nullBox = {}; - energyOutput.addDataAtEnergyStep(false, false, static_cast(step), mdatoms->tmass, - enerd, nullptr, nullptr, nullBox, PTCouplingArrays(), 0, - nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); - - EnergyOutput::printHeader(fplog, step, step); - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, - step, fr->fcdata.get(), nullptr); - } - - /* Set the initial step. - * since it will be multiplied by the non-normalized search direction - * vector (force vector the first time), we scale it by the - * norm of the force. - */ - - if (MASTER(cr)) - { - double sqrtNumAtoms = sqrt(static_cast(state_global->natoms)); - fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr); - fprintf(stderr, " F-max = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1); - fprintf(stderr, " F-Norm = %12.5e\n", ems.fnorm / sqrtNumAtoms); - fprintf(stderr, "\n"); - /* and copy to the log file too... */ - fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr); - fprintf(fplog, " F-max = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1); - fprintf(fplog, " F-Norm = %12.5e\n", ems.fnorm / sqrtNumAtoms); - fprintf(fplog, "\n"); - } - - // Point is an index to the memory of search directions, where 0 is the first one. - point = 0; - - // Set initial search direction to the force (-gradient), or 0 for frozen particles. - real* fInit = static_cast(ems.f.view().force().data()[0]); - for (i = 0; i < n; i++) - { - if (!frozen[i]) - { - dx[point][i] = fInit[i]; /* Initial search direction */ - } - else - { - dx[point][i] = 0; - } - } - - // Stepsize will be modified during the search, and actually it is not critical - // (the main efficiency in the algorithm comes from changing directions), but - // we still need an initial value, so estimate it as the inverse of the norm - // so we take small steps where the potential fluctuates a lot. - stepsize = 1.0 / ems.fnorm; - - /* Start the loop over BFGS steps. - * Each successful step is counted, and we continue until - * we either converge or reach the max number of steps. - */ - - ncorr = 0; - - /* Set the gradient from the force */ - converged = FALSE; - for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++) - { - - /* Write coordinates if necessary */ - do_x = do_per_step(step, inputrec->nstxout); - do_f = do_per_step(step, inputrec->nstfout); - - mdof_flags = 0; - if (do_x) - { - mdof_flags |= MDOF_X; - } - - if (do_f) - { - mdof_flags |= MDOF_F; - } - - if (inputrec->bIMD) - { - mdof_flags |= MDOF_IMD; - } - - gmx::WriteCheckpointDataHolder checkpointDataHolder; - mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global->natoms, step, - static_cast(step), &ems.s, state_global, observablesHistory, - ems.f.view().force(), &checkpointDataHolder); - - /* Do the linesearching in the direction dx[point][0..(n-1)] */ - - /* make s a pointer to current search direction - point=0 first time we get here */ - s = dx[point]; - - real* xx = static_cast(ems.s.x.rvec_array()[0]); - real* ff = static_cast(ems.f.view().force().data()[0]); - - // calculate line gradient in position A - for (gpa = 0, i = 0; i < n; i++) - { - gpa -= s[i] * ff[i]; - } - - /* Calculate minimum allowed stepsize along the line, before the average (norm) - * relative change in coordinate is smaller than precision - */ - for (minstep = 0, i = 0; i < n; i++) - { - tmp = fabs(xx[i]); - if (tmp < 1.0) - { - tmp = 1.0; - } - tmp = s[i] / tmp; - minstep += tmp * tmp; - } - minstep = GMX_REAL_EPS / sqrt(minstep / n); - - if (stepsize < minstep) - { - converged = TRUE; - break; - } - - // Before taking any steps along the line, store the old position - *last = ems; - real* lastx = static_cast(last->s.x.data()[0]); - real* lastf = static_cast(last->f.view().force().data()[0]); - Epot0 = ems.epot; - - *sa = ems; - - /* Take a step downhill. - * In theory, we should find the actual minimum of the function in this - * direction, somewhere along the line. - * That is quite possible, but it turns out to take 5-10 function evaluations - * for each line. However, we dont really need to find the exact minimum - - * it is much better to start a new BFGS step in a modified direction as soon - * as we are close to it. This will save a lot of energy evaluations. - * - * In practice, we just try to take a single step. - * If it worked (i.e. lowered the energy), we increase the stepsize but - * continue straight to the next BFGS step without trying to find any minimum, - * i.e. we change the search direction too. If the line was smooth, it is - * likely we are in a smooth region, and then it makes sense to take longer - * steps in the modified search direction too. - * - * If it didn't work (higher energy), there must be a minimum somewhere between - * the old position and the new one. Then we need to start by finding a lower - * value before we change search direction. Since the energy was apparently - * quite rough, we need to decrease the step size. - * - * Due to the finite numerical accuracy, it turns out that it is a good idea - * to accept a SMALL increase in energy, if the derivative is still downhill. - * This leads to lower final energies in the tests I've done. / Erik - */ - - // State "A" is the first position along the line. - // reference position along line is initially zero - a = 0.0; - - // Check stepsize first. We do not allow displacements - // larger than emstep. - // - do - { - // Pick a new position C by adding stepsize to A. - c = a + stepsize; - - // Calculate what the largest change in any individual coordinate - // would be (translation along line * gradient along line) - maxdelta = 0; - for (i = 0; i < n; i++) - { - delta = c * s[i]; - if (delta > maxdelta) - { - maxdelta = delta; - } - } - // If any displacement is larger than the stepsize limit, reduce the step - if (maxdelta > inputrec->em_stepsize) - { - stepsize *= 0.1; - } - } while (maxdelta > inputrec->em_stepsize); - - // Take a trial step and move the coordinate array xc[] to position C - real* xc = static_cast(sc->s.x.rvec_array()[0]); - for (i = 0; i < n; i++) - { - xc[i] = lastx[i] + c * s[i]; - } - - neval++; - // Calculate energy for the trial step in position C - energyEvaluator.run(sc, mu_tot, vir, pres, step, FALSE); - - // Calc line gradient in position C - real* fc = static_cast(sc->f.view().force()[0]); - for (gpc = 0, i = 0; i < n; i++) - { - gpc -= s[i] * fc[i]; /* f is negative gradient, thus the sign */ - } - /* Sum the gradient along the line across CPUs */ - if (PAR(cr)) - { - gmx_sumd(1, &gpc, cr); - } - - // This is the max amount of increase in energy we tolerate. - // By allowing VERY small changes (close to numerical precision) we - // frequently find even better (lower) final energies. - tmp = std::sqrt(GMX_REAL_EPS) * fabs(sa->epot); - - // Accept the step if the energy is lower in the new position C (compared to A), - // or if it is not significantly higher and the line derivative is still negative. - foundlower = sc->epot < sa->epot || (gpc < 0 && sc->epot < (sa->epot + tmp)); - // If true, great, we found a better energy. We no longer try to alter the - // stepsize, but simply accept this new better position. The we select a new - // search direction instead, which will be much more efficient than continuing - // to take smaller steps along a line. Set fnorm based on the new C position, - // which will be used to update the stepsize to 1/fnorm further down. - - // If false, the energy is NOT lower in point C, i.e. it will be the same - // or higher than in point A. In this case it is pointless to move to point C, - // so we will have to do more iterations along the same line to find a smaller - // value in the interval [A=0.0,C]. - // Here, A is still 0.0, but that will change when we do a search in the interval - // [0.0,C] below. That search we will do by interpolation or bisection rather - // than with the stepsize, so no need to modify it. For the next search direction - // it will be reset to 1/fnorm anyway. - - if (!foundlower) - { - // OK, if we didn't find a lower value we will have to locate one now - there must - // be one in the interval [a,c]. - // The same thing is valid here, though: Don't spend dozens of iterations to find - // the line minimum. We try to interpolate based on the derivative at the endpoints, - // and only continue until we find a lower value. In most cases this means 1-2 iterations. - // I also have a safeguard for potentially really pathological functions so we never - // take more than 20 steps before we give up. - // If we already found a lower value we just skip this step and continue to the update. - real fnorm = 0; - nminstep = 0; - do - { - // Select a new trial point B in the interval [A,C]. - // If the derivatives at points a & c have different sign we interpolate to zero, - // otherwise just do a bisection since there might be multiple minima/maxima - // inside the interval. - if (gpa < 0 && gpc > 0) - { - b = a + gpa * (a - c) / (gpc - gpa); - } - else - { - b = 0.5 * (a + c); - } - - /* safeguard if interpolation close to machine accuracy causes errors: - * never go outside the interval - */ - if (b <= a || b >= c) - { - b = 0.5 * (a + c); - } - - // Take a trial step to point B - real* xb = static_cast(sb->s.x.rvec_array()[0]); - for (i = 0; i < n; i++) - { - xb[i] = lastx[i] + b * s[i]; - } - - neval++; - // Calculate energy for the trial step in point B - energyEvaluator.run(sb, mu_tot, vir, pres, step, FALSE); - fnorm = sb->fnorm; - - // Calculate gradient in point B - real* fb = static_cast(sb->f.view().force()[0]); - for (gpb = 0, i = 0; i < n; i++) - { - gpb -= s[i] * fb[i]; /* f is negative gradient, thus the sign */ - } - /* Sum the gradient along the line across CPUs */ - if (PAR(cr)) - { - gmx_sumd(1, &gpb, cr); - } - - // Keep one of the intervals [A,B] or [B,C] based on the value of the derivative - // at the new point B, and rename the endpoints of this new interval A and C. - if (gpb > 0) - { - /* Replace c endpoint with b */ - c = b; - /* copy state b to c */ - *sc = *sb; - } - else - { - /* Replace a endpoint with b */ - a = b; - /* copy state b to a */ - *sa = *sb; - } - - /* - * Stop search as soon as we find a value smaller than the endpoints, - * or if the tolerance is below machine precision. - * Never run more than 20 steps, no matter what. - */ - nminstep++; - } while ((sb->epot > sa->epot || sb->epot > sc->epot) && (nminstep < 20)); - - if (std::fabs(sb->epot - Epot0) < GMX_REAL_EPS || nminstep >= 20) - { - /* OK. We couldn't find a significantly lower energy. - * If ncorr==0 this was steepest descent, and then we give up. - * If not, reset memory to restart as steepest descent before quitting. - */ - if (ncorr == 0) - { - /* Converged */ - converged = TRUE; - break; - } - else - { - /* Reset memory */ - ncorr = 0; - /* Search in gradient direction */ - for (i = 0; i < n; i++) - { - dx[point][i] = ff[i]; - } - /* Reset stepsize */ - stepsize = 1.0 / fnorm; - continue; - } - } - - /* Select min energy state of A & C, put the best in xx/ff/Epot - */ - if (sc->epot < sa->epot) - { - /* Use state C */ - ems = *sc; - step_taken = c; - } - else - { - /* Use state A */ - ems = *sa; - step_taken = a; - } - } - else - { - /* found lower */ - /* Use state C */ - ems = *sc; - step_taken = c; - } - - /* Update the memory information, and calculate a new - * approximation of the inverse hessian - */ - - /* Have new data in Epot, xx, ff */ - if (ncorr < nmaxcorr) - { - ncorr++; - } - - for (i = 0; i < n; i++) - { - dg[point][i] = lastf[i] - ff[i]; - dx[point][i] *= step_taken; - } - - dgdg = 0; - dgdx = 0; - for (i = 0; i < n; i++) - { - dgdg += dg[point][i] * dg[point][i]; - dgdx += dg[point][i] * dx[point][i]; - } - - diag = dgdx / dgdg; - - rho[point] = 1.0 / dgdx; - point++; - - if (point >= nmaxcorr) - { - point = 0; - } - - /* Update */ - for (i = 0; i < n; i++) - { - p[i] = ff[i]; - } - - cp = point; - - /* Recursive update. First go back over the memory points */ - for (k = 0; k < ncorr; k++) - { - cp--; - if (cp < 0) - { - cp = ncorr - 1; - } - - sq = 0; - for (i = 0; i < n; i++) - { - sq += dx[cp][i] * p[i]; - } - - alpha[cp] = rho[cp] * sq; - - for (i = 0; i < n; i++) - { - p[i] -= alpha[cp] * dg[cp][i]; - } - } - - for (i = 0; i < n; i++) - { - p[i] *= diag; - } - - /* And then go forward again */ - for (k = 0; k < ncorr; k++) - { - yr = 0; - for (i = 0; i < n; i++) - { - yr += p[i] * dg[cp][i]; - } - - beta = rho[cp] * yr; - beta = alpha[cp] - beta; - - for (i = 0; i < n; i++) - { - p[i] += beta * dx[cp][i]; - } - - cp++; - if (cp >= ncorr) - { - cp = 0; - } - } - - for (i = 0; i < n; i++) - { - if (!frozen[i]) - { - dx[point][i] = p[i]; - } - else - { - dx[point][i] = 0; - } - } - - /* Print it if necessary */ - if (MASTER(cr)) - { - if (mdrunOptions.verbose) - { - double sqrtNumAtoms = sqrt(static_cast(state_global->natoms)); - fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", step, - ems.epot, ems.fnorm / sqrtNumAtoms, ems.fmax, ems.a_fmax + 1); - fflush(stderr); - } - /* Store the new (lower) energies */ - matrix nullBox = {}; - energyOutput.addDataAtEnergyStep(false, false, static_cast(step), mdatoms->tmass, - enerd, nullptr, nullptr, nullBox, PTCouplingArrays(), 0, - nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); - - do_log = do_per_step(step, inputrec->nstlog); - do_ene = do_per_step(step, inputrec->nstenergy); - - imdSession->fillEnergyRecord(step, TRUE); - - if (do_log) - { - EnergyOutput::printHeader(fplog, step, step); - } - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, - do_log ? fplog : nullptr, step, step, - fr->fcdata.get(), nullptr); - } - - /* Send x and E to IMD client, if bIMD is TRUE. */ - if (imdSession->run(step, TRUE, state_global->box, state_global->x.rvec_array(), 0) && MASTER(cr)) - { - imdSession->sendPositionsAndEnergies(); - } - - // Reset stepsize in we are doing more iterations - stepsize = 1.0; - - /* Stop when the maximum force lies below tolerance. - * If we have reached machine precision, converged is already set to true. - */ - converged = converged || (ems.fmax < inputrec->em_tol); - - } /* End of the loop */ - - if (converged) - { - step--; /* we never took that last step in this case */ - } - if (ems.fmax > inputrec->em_tol) - { - if (MASTER(cr)) - { - warn_step(fplog, inputrec->em_tol, ems.fmax, step - 1 == number_steps, FALSE); - } - converged = FALSE; - } - - /* If we printed energy and/or logfile last step (which was the last step) - * we don't have to do it again, but otherwise print the final values. - */ - if (!do_log) /* Write final value to log since we didn't do anythin last step */ - { - EnergyOutput::printHeader(fplog, step, step); - } - if (!do_ene || !do_log) /* Write final energy file entries */ - { - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, - !do_log ? fplog : nullptr, step, step, fr->fcdata.get(), - nullptr); - } - - /* Print some stuff... */ - if (MASTER(cr)) - { - fprintf(stderr, "\nwriting lowest energy coordinates.\n"); - } - - /* IMPORTANT! - * For accurate normal mode calculation it is imperative that we - * store the last conformation into the full precision binary trajectory. - * - * However, we should only do it if we did NOT already write this step - * above (which we did if do_x or do_f was true). - */ - do_x = !do_per_step(step, inputrec->nstxout); - do_f = !do_per_step(step, inputrec->nstfout); - write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), top_global, inputrec, - step, &ems, state_global, observablesHistory); - - if (MASTER(cr)) - { - double sqrtNumAtoms = sqrt(static_cast(state_global->natoms)); - print_converged(stderr, LBFGS, inputrec->em_tol, step, converged, number_steps, &ems, sqrtNumAtoms); - print_converged(fplog, LBFGS, inputrec->em_tol, step, converged, number_steps, &ems, sqrtNumAtoms); - - fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); - } - - finish_em(cr, outf, walltime_accounting, wcycle); - - /* To print the actual number of steps we needed somewhere */ - walltime_accounting_set_nsteps_done(walltime_accounting, step); -} - -void LegacySimulator::do_steep() -{ - const char* SD = "Steepest Descents"; - gmx_localtop_t top(top_global->ffparams); - gmx_global_stat_t gstat; - real stepsize; - real ustep; - gmx_bool bDone, bAbort, do_x, do_f; - tensor vir, pres; - rvec mu_tot = { 0 }; - int nsteps; - int count = 0; - int steps_accepted = 0; - auto mdatoms = mdAtoms->mdatoms(); - - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "Note that activating steepest-descent energy minimization via the " - "integrator .mdp option and the command gmx mdrun may " - "be available in a different form in a future version of GROMACS, " - "e.g. gmx minimize and an .mdp option."); - - /* Create 2 states on the stack and extract pointers that we will swap */ - em_state_t s0{}, s1{}; - em_state_t* s_min = &s0; - em_state_t* s_try = &s1; - - /* Init em and store the local state in s_try */ - init_em(fplog, mdlog, SD, cr, inputrec, imdSession, pull_work, state_global, top_global, s_try, - &top, nrnb, fr, mdAtoms, &gstat, vsite, constr, nullptr); - const bool simulationsShareState = false; - gmx_mdoutf* outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, - mdModulesNotifier, inputrec, top_global, nullptr, wcycle, - StartingBehavior::NewSimulation, simulationsShareState, ms); - gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work, - nullptr, false, StartingBehavior::NewSimulation, - simulationsShareState, mdModulesNotifier); - - /* Print to log file */ - print_em_start(fplog, cr, walltime_accounting, wcycle, SD); - - /* Set variables for stepsize (in nm). This is the largest - * step that we are going to make in any direction. - */ - ustep = inputrec->em_stepsize; - stepsize = 0; - - /* Max number of steps */ - nsteps = inputrec->nsteps; - - if (MASTER(cr)) - { - /* Print to the screen */ - sp_header(stderr, SD, inputrec->em_tol, nsteps); - } - if (fplog) - { - sp_header(fplog, SD, inputrec->em_tol, nsteps); - } - EnergyEvaluator energyEvaluator{ fplog, mdlog, cr, ms, top_global, &top, - inputrec, imdSession, pull_work, nrnb, wcycle, gstat, - vsite, constr, mdAtoms, fr, runScheduleWork, enerd }; - - /**** HERE STARTS THE LOOP **** - * count is the counter for the number of steps - * bDone will be TRUE when the minimization has converged - * bAbort will be TRUE when nsteps steps have been performed or when - * the stepsize becomes smaller than is reasonable for machine precision - */ - count = 0; - bDone = FALSE; - bAbort = FALSE; - while (!bDone && !bAbort) - { - bAbort = (nsteps >= 0) && (count == nsteps); - - /* set new coordinates, except for first step */ - bool validStep = true; - if (count > 0) - { - validStep = do_em_step(cr, inputrec, mdatoms, s_min, stepsize, - s_min->f.view().forceWithPadding(), s_try, constr, count); - } - - if (validStep) - { - energyEvaluator.run(s_try, mu_tot, vir, pres, count, count == 0); - } - else - { - // Signal constraint error during stepping with energy=inf - s_try->epot = std::numeric_limits::infinity(); - } - - if (MASTER(cr)) - { - EnergyOutput::printHeader(fplog, count, count); - } - - if (count == 0) - { - s_min->epot = s_try->epot; - } - - /* Print it if necessary */ - if (MASTER(cr)) - { - if (mdrunOptions.verbose) - { - fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c", - count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax + 1, - ((count == 0) || (s_try->epot < s_min->epot)) ? '\n' : '\r'); - fflush(stderr); - } - - if ((count == 0) || (s_try->epot < s_min->epot)) - { - /* Store the new (lower) energies */ - matrix nullBox = {}; - energyOutput.addDataAtEnergyStep(false, false, static_cast(count), - mdatoms->tmass, enerd, nullptr, nullptr, nullBox, - PTCouplingArrays(), 0, nullptr, nullptr, vir, pres, - nullptr, mu_tot, constr); - - imdSession->fillEnergyRecord(count, TRUE); - - const bool do_dr = do_per_step(steps_accepted, inputrec->nstdisreout); - const bool do_or = do_per_step(steps_accepted, inputrec->nstorireout); - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, do_dr, do_or, - fplog, count, count, fr->fcdata.get(), nullptr); - fflush(fplog); - } - } - - /* Now if the new energy is smaller than the previous... - * or if this is the first step! - * or if we did random steps! - */ - - if ((count == 0) || (s_try->epot < s_min->epot)) - { - steps_accepted++; - - /* Test whether the convergence criterion is met... */ - bDone = (s_try->fmax < inputrec->em_tol); - - /* Copy the arrays for force, positions and energy */ - /* The 'Min' array always holds the coords and forces of the minimal - sampled energy */ - swap_em_state(&s_min, &s_try); - if (count > 0) - { - ustep *= 1.2; - } - - /* Write to trn, if necessary */ - do_x = do_per_step(steps_accepted, inputrec->nstxout); - do_f = do_per_step(steps_accepted, inputrec->nstfout); - write_em_traj(fplog, cr, outf, do_x, do_f, nullptr, top_global, inputrec, count, s_min, - state_global, observablesHistory); - } - else - { - /* If energy is not smaller make the step smaller... */ - ustep *= 0.5; - - if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) - { - /* Reload the old state */ - em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, imdSession, - pull_work, s_min, &top, mdAtoms, fr, vsite, constr, nrnb, wcycle); - } - } - - // If the force is very small after finishing minimization, - // we risk dividing by zero when calculating the step size. - // So we check first if the minimization has stopped before - // trying to obtain a new step size. - if (!bDone) - { - /* Determine new step */ - stepsize = ustep / s_min->fmax; - } - - /* Check if stepsize is too small, with 1 nm as a characteristic length */ -#if GMX_DOUBLE - if (count == nsteps || ustep < 1e-12) -#else - if (count == nsteps || ustep < 1e-6) -#endif - { - if (MASTER(cr)) - { - warn_step(fplog, inputrec->em_tol, s_min->fmax, count == nsteps, constr != nullptr); - } - bAbort = TRUE; - } - - /* Send IMD energies and positions, if bIMD is TRUE. */ - if (imdSession->run(count, TRUE, MASTER(cr) ? state_global->box : nullptr, - MASTER(cr) ? state_global->x.rvec_array() : nullptr, 0) - && MASTER(cr)) - { - imdSession->sendPositionsAndEnergies(); - } - - count++; - } /* End of the loop */ - - /* Print some data... */ - if (MASTER(cr)) - { - fprintf(stderr, "\nwriting lowest energy coordinates.\n"); - } - write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout != 0, ftp2fn(efSTO, nfile, fnm), - top_global, inputrec, count, s_min, state_global, observablesHistory); - - if (MASTER(cr)) - { - double sqrtNumAtoms = sqrt(static_cast(state_global->natoms)); - - print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps, s_min, sqrtNumAtoms); - print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps, s_min, sqrtNumAtoms); - } - - finish_em(cr, outf, walltime_accounting, wcycle); - - /* To print the actual number of steps we needed somewhere */ - inputrec->nsteps = count; - - walltime_accounting_set_nsteps_done(walltime_accounting, count); -} - -void LegacySimulator::do_nm() -{ - const char* NM = "Normal Mode Analysis"; - int nnodes; - gmx_localtop_t top(top_global->ffparams); - gmx_global_stat_t gstat; - tensor vir, pres; - rvec mu_tot = { 0 }; - rvec* dfdx; - gmx_bool bSparse; /* use sparse matrix storage format */ - size_t sz; - gmx_sparsematrix_t* sparse_matrix = nullptr; - real* full_matrix = nullptr; - - /* added with respect to mdrun */ - int row, col; - real der_range = 10.0 * std::sqrt(GMX_REAL_EPS); - real x_min; - bool bIsMaster = MASTER(cr); - auto mdatoms = mdAtoms->mdatoms(); - - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "Note that activating normal-mode analysis via the integrator " - ".mdp option and the command gmx mdrun may " - "be available in a different form in a future version of GROMACS, " - "e.g. gmx normal-modes."); - - if (constr != nullptr) - { - gmx_fatal( - FARGS, - "Constraints present with Normal Mode Analysis, this combination is not supported"); - } - - gmx_shellfc_t* shellfc; - - em_state_t state_work{}; - - /* Init em and store the local state in state_minimum */ - init_em(fplog, mdlog, NM, cr, inputrec, imdSession, pull_work, state_global, top_global, - &state_work, &top, nrnb, fr, mdAtoms, &gstat, vsite, constr, &shellfc); - const bool simulationsShareState = false; - gmx_mdoutf* outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, - mdModulesNotifier, inputrec, top_global, nullptr, wcycle, - StartingBehavior::NewSimulation, simulationsShareState, ms); - - std::vector atom_index = get_atom_index(top_global); - std::vector fneg(atom_index.size(), { 0, 0, 0 }); - snew(dfdx, atom_index.size()); - -#if !GMX_DOUBLE - if (bIsMaster) - { - fprintf(stderr, - "NOTE: This version of GROMACS has been compiled in single precision,\n" - " which MIGHT not be accurate enough for normal mode analysis.\n" - " GROMACS now uses sparse matrix storage, so the memory requirements\n" - " are fairly modest even if you recompile in double precision.\n\n"); - } -#endif - - /* Check if we can/should use sparse storage format. - * - * Sparse format is only useful when the Hessian itself is sparse, which it - * will be when we use a cutoff. - * For small systems (n<1000) it is easier to always use full matrix format, though. - */ - if (EEL_FULL(fr->ic->eeltype) || fr->rlist == 0.0) - { - GMX_LOG(mdlog.warning) - .appendText("Non-cutoff electrostatics used, forcing full Hessian format."); - bSparse = FALSE; - } - else if (atom_index.size() < 1000) - { - GMX_LOG(mdlog.warning) - .appendTextFormatted("Small system size (N=%zu), using full Hessian format.", - atom_index.size()); - bSparse = FALSE; - } - else - { - GMX_LOG(mdlog.warning).appendText("Using compressed symmetric sparse Hessian format."); - bSparse = TRUE; - } - - /* Number of dimensions, based on real atoms, that is not vsites or shell */ - sz = DIM * atom_index.size(); - - fprintf(stderr, "Allocating Hessian memory...\n\n"); - - if (bSparse) - { - sparse_matrix = gmx_sparsematrix_init(sz); - sparse_matrix->compressed_symmetric = TRUE; - } - else - { - snew(full_matrix, sz * sz); - } - - /* Write start time and temperature */ - print_em_start(fplog, cr, walltime_accounting, wcycle, NM); - - /* fudge nr of steps to nr of atoms */ - inputrec->nsteps = atom_index.size() * 2; - - if (bIsMaster) - { - fprintf(stderr, "starting normal mode calculation '%s'\n%" PRId64 " steps.\n\n", - *(top_global->name), inputrec->nsteps); - } - - nnodes = cr->nnodes; - - /* Make evaluate_energy do a single node force calculation */ - cr->nnodes = 1; - EnergyEvaluator energyEvaluator{ fplog, mdlog, cr, ms, top_global, &top, - inputrec, imdSession, pull_work, nrnb, wcycle, gstat, - vsite, constr, mdAtoms, fr, runScheduleWork, enerd }; - energyEvaluator.run(&state_work, mu_tot, vir, pres, -1, TRUE); - cr->nnodes = nnodes; - - /* if forces are not small, warn user */ - get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, &state_work); - - GMX_LOG(mdlog.warning).appendTextFormatted("Maximum force:%12.5e", state_work.fmax); - if (state_work.fmax > 1.0e-3) - { - GMX_LOG(mdlog.warning) - .appendText( - "The force is probably not small enough to " - "ensure that you are at a minimum.\n" - "Be aware that negative eigenvalues may occur\n" - "when the resulting matrix is diagonalized."); - } - - /*********************************************************** - * - * Loop over all pairs in matrix - * - * do_force called twice. Once with positive and - * once with negative displacement - * - ************************************************************/ - - /* Steps are divided one by one over the nodes */ - bool bNS = true; - auto state_work_x = makeArrayRef(state_work.s.x); - auto state_work_f = state_work.f.view().force(); - for (index aid = cr->nodeid; aid < ssize(atom_index); aid += nnodes) - { - size_t atom = atom_index[aid]; - for (size_t d = 0; d < DIM; d++) - { - int64_t step = 0; - int force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES; - double t = 0; - - x_min = state_work_x[atom][d]; - - for (unsigned int dx = 0; (dx < 2); dx++) - { - if (dx == 0) - { - state_work_x[atom][d] = x_min - der_range; - } - else - { - state_work_x[atom][d] = x_min + der_range; - } - - /* Make evaluate_energy do a single node force calculation */ - cr->nnodes = 1; - if (shellfc) - { - /* Now is the time to relax the shells */ - relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, nullptr, step, inputrec, - imdSession, pull_work, bNS, force_flags, &top, constr, enerd, - state_work.s.natoms, state_work.s.x.arrayRefWithPadding(), - state_work.s.v.arrayRefWithPadding(), state_work.s.box, - state_work.s.lambda, &state_work.s.hist, &state_work.f.view(), - vir, mdatoms, nrnb, wcycle, shellfc, fr, runScheduleWork, t, - mu_tot, vsite, DDBalanceRegionHandler(nullptr)); - bNS = false; - step++; - } - else - { - energyEvaluator.run(&state_work, mu_tot, vir, pres, aid * 2 + dx, FALSE); - } - - cr->nnodes = nnodes; - - if (dx == 0) - { - std::copy(state_work_f.begin(), state_work_f.begin() + atom_index.size(), - fneg.begin()); - } - } - - /* x is restored to original */ - state_work_x[atom][d] = x_min; - - for (size_t j = 0; j < atom_index.size(); j++) - { - for (size_t k = 0; (k < DIM); k++) - { - dfdx[j][k] = -(state_work_f[atom_index[j]][k] - fneg[j][k]) / (2 * der_range); - } - } - - if (!bIsMaster) - { -#if GMX_MPI -# define mpi_type GMX_MPI_REAL - MPI_Send(dfdx[0], atom_index.size() * DIM, mpi_type, MASTER(cr), cr->nodeid, - cr->mpi_comm_mygroup); -#endif - } - else - { - for (index node = 0; (node < nnodes && aid + node < ssize(atom_index)); node++) - { - if (node > 0) - { -#if GMX_MPI - MPI_Status stat; - MPI_Recv(dfdx[0], atom_index.size() * DIM, mpi_type, node, node, - cr->mpi_comm_mygroup, &stat); -# undef mpi_type -#endif - } - - row = (aid + node) * DIM + d; - - for (size_t j = 0; j < atom_index.size(); j++) - { - for (size_t k = 0; k < DIM; k++) - { - col = j * DIM + k; - - if (bSparse) - { - if (col >= row && dfdx[j][k] != 0.0) - { - gmx_sparsematrix_increment_value(sparse_matrix, row, col, dfdx[j][k]); - } - } - else - { - full_matrix[row * sz + col] = dfdx[j][k]; - } - } - } - } - } - - if (mdrunOptions.verbose && fplog) - { - fflush(fplog); - } - } - /* write progress */ - if (bIsMaster && mdrunOptions.verbose) - { - fprintf(stderr, "\rFinished step %d out of %td", - std::min(atom + nnodes, atom_index.size()), ssize(atom_index)); - fflush(stderr); - } - } - - if (bIsMaster) - { - fprintf(stderr, "\n\nWriting Hessian...\n"); - gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix); - } - - finish_em(cr, outf, walltime_accounting, wcycle); - - walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size() * 2); -} - -} // namespace gmx diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/replicaexchange.cpp b/patches/gromacs-2021.7.diff/src/gromacs/mdrun/replicaexchange.cpp deleted file mode 100644 index a75917ae09..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/replicaexchange.cpp +++ /dev/null @@ -1,1493 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2011-2019,2020, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ - -/*! \internal \file - * - * \brief Implements the replica exchange routines. - * - * \author David van der Spoel - * \author Mark Abraham - * \ingroup module_mdrun - */ -#include "gmxpre.h" - -#include "replicaexchange.h" - -#include "config.h" - -#include - -#include - -#include "gromacs/domdec/collect.h" -#include "gromacs/gmxlib/network.h" -#include "gromacs/math/units.h" -#include "gromacs/math/vec.h" -#include "gromacs/mdrunutility/multisim.h" -#include "gromacs/mdtypes/commrec.h" -#include "gromacs/mdtypes/enerdata.h" -#include "gromacs/mdtypes/inputrec.h" -#include "gromacs/mdtypes/md_enums.h" -#include "gromacs/mdtypes/state.h" -#include "gromacs/random/threefry.h" -#include "gromacs/random/uniformintdistribution.h" -#include "gromacs/random/uniformrealdistribution.h" -#include "gromacs/utility/fatalerror.h" -#include "gromacs/utility/pleasecite.h" -#include "gromacs/utility/smalloc.h" - - -/* PLUMED */ -#include "../../../Plumed.h" -extern int plumedswitch; -extern plumed plumedmain; -/* END PLUMED */ - -/* PLUMED HREX */ -extern int plumed_hrex; -/* END PLUMED HREX */ - -//! Helps cut off probability values. -constexpr int c_probabilityCutoff = 100; - -/* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */ - -//! Rank in the multisimulation -#define MSRANK(ms, nodeid) (nodeid) - -//! Enum for replica exchange flavours -enum -{ - ereTEMP, - ereLAMBDA, - ereENDSINGLE, - ereTL, - ereNR -}; -/*! \brief Strings describing replica exchange flavours. - * - * end_single_marker merely notes the end of single variable replica - * exchange. All types higher than it are multiple replica exchange - * methods. - * - * Eventually, should add 'pressure', 'temperature and pressure', - * 'lambda_and_pressure', 'temperature_lambda_pressure'?; Let's wait - * until we feel better about the pressure control methods giving - * exact ensembles. Right now, we assume constant pressure */ -static const char* erename[ereNR] = { "temperature", "lambda", "end_single_marker", - "temperature and lambda" }; - -//! Working data for replica exchange. -struct gmx_repl_ex -{ - //! Replica ID - int repl; - //! Total number of replica - int nrepl; - //! Temperature - real temp; - //! Replica exchange type from ere enum - int type; - //! Quantity, e.g. temperature or lambda; first index is ere, second index is replica ID - real** q; - //! Use constant pressure and temperature - gmx_bool bNPT; - //! Replica pressures - real* pres; - //! Replica indices - int* ind; - //! Used for keeping track of all the replica swaps - int* allswaps; - //! Replica exchange interval (number of steps) - int nst; - //! Number of exchanges per interval - int nex; - //! Random seed - int seed; - //! Number of even and odd replica change attempts - int nattempt[2]; - //! Sum of probabilities - real* prob_sum; - //! Number of moves between replicas i and j - int** nmoves; - //! i-th element of the array is the number of exchanges between replica i-1 and i - int* nexchange; - - /*! \brief Helper arrays for replica exchange; allocated here - * so they don't have to be allocated each time */ - //! \{ - int* destinations; - int** cyclic; - int** order; - int* tmpswap; - gmx_bool* incycle; - gmx_bool* bEx; - //! \} - - //! Helper arrays to hold the quantities that are exchanged. - //! \{ - real* prob; - real* Epot; - real* beta; - real* Vol; - real** de; - //! \} -}; - -// TODO We should add Doxygen here some time. -//! \cond - -static gmx_bool repl_quantity(const gmx_multisim_t* ms, struct gmx_repl_ex* re, int ere, real q) -{ - real* qall; - gmx_bool bDiff; - int s; - - snew(qall, ms->numSimulations_); - qall[re->repl] = q; - gmx_sum_sim(ms->numSimulations_, qall, ms); - - /* PLUMED */ - //bDiff = FALSE; - //for (s = 1; s < ms->numSimulations_; s++) - //{ - // if (qall[s] != qall[0]) - // { - bDiff = TRUE; - // } - //} - /* PLUMED */ - - if (bDiff) - { - /* Set the replica exchange type and quantities */ - re->type = ere; - - snew(re->q[ere], re->nrepl); - for (s = 0; s < ms->numSimulations_; s++) - { - re->q[ere][s] = qall[s]; - } - } - sfree(qall); - return bDiff; -} - -gmx_repl_ex_t init_replica_exchange(FILE* fplog, - const gmx_multisim_t* ms, - int numAtomsInSystem, - const t_inputrec* ir, - const ReplicaExchangeParameters& replExParams) -{ - real pres; - int i, j; - struct gmx_repl_ex* re; - gmx_bool bTemp; - gmx_bool bLambda = FALSE; - - fprintf(fplog, "\nInitializing Replica Exchange\n"); - - if (!isMultiSim(ms) || ms->numSimulations_ == 1) - { - gmx_fatal(FARGS, - "Nothing to exchange with only one replica, maybe you forgot to set the " - "-multidir option of mdrun?"); - } - if (replExParams.numExchanges < 0) - { - gmx_fatal(FARGS, "Replica exchange number of exchanges needs to be positive"); - } - - if (!EI_DYNAMICS(ir->eI)) - { - gmx_fatal(FARGS, "Replica exchange is only supported by dynamical simulations"); - /* Note that PAR(cr) is defined by cr->nnodes > 1, which is - * distinct from isMultiSim(ms). A multi-simulation only runs - * with real MPI parallelism, but this does not imply PAR(cr) - * is true! - * - * Since we are using a dynamical integrator, the only - * decomposition is DD, so PAR(cr) and DOMAINDECOMP(cr) are - * synonymous. The only way for cr->nnodes > 1 to be true is - * if we are using DD. */ - } - - snew(re, 1); - - re->repl = ms->simulationIndex_; - re->nrepl = ms->numSimulations_; - snew(re->q, ereENDSINGLE); - - fprintf(fplog, "Repl There are %d replicas:\n", re->nrepl); - - /* We only check that the number of atoms in the systms match. - * This, of course, do not guarantee that the systems are the same, - * but it does guarantee that we can perform replica exchange. - */ - check_multi_int(fplog, ms, numAtomsInSystem, "the number of atoms", FALSE); - check_multi_int(fplog, ms, ir->eI, "the integrator", FALSE); - check_multi_int64(fplog, ms, ir->init_step + ir->nsteps, "init_step+nsteps", FALSE); - const int nst = replExParams.exchangeInterval; - check_multi_int64(fplog, ms, (ir->init_step + nst - 1) / nst, - "first exchange step: init_step/-replex", FALSE); - check_multi_int(fplog, ms, ir->etc, "the temperature coupling", FALSE); - check_multi_int(fplog, ms, ir->opts.ngtc, "the number of temperature coupling groups", FALSE); - check_multi_int(fplog, ms, ir->epc, "the pressure coupling", FALSE); - check_multi_int(fplog, ms, ir->efep, "free energy", FALSE); - check_multi_int(fplog, ms, ir->fepvals->n_lambda, "number of lambda states", FALSE); - - re->temp = ir->opts.ref_t[0]; - for (i = 1; (i < ir->opts.ngtc); i++) - { - if (ir->opts.ref_t[i] != re->temp) - { - fprintf(fplog, - "\nWARNING: The temperatures of the different temperature coupling groups are " - "not identical\n\n"); - fprintf(stderr, - "\nWARNING: The temperatures of the different temperature coupling groups are " - "not identical\n\n"); - } - } - - re->type = -1; - bTemp = repl_quantity(ms, re, ereTEMP, re->temp); - if (ir->efep != efepNO) - { - bLambda = repl_quantity(ms, re, ereLAMBDA, static_cast(ir->fepvals->init_fep_state)); - } - if (re->type == -1) /* nothing was assigned */ - { - gmx_fatal(FARGS, - "The properties of the %d systems are all the same, there is nothing to exchange", - re->nrepl); - } - if (bLambda && bTemp) - { - re->type = ereTL; - } - - if (bTemp) - { - please_cite(fplog, "Sugita1999a"); - if (ir->epc != epcNO) - { - re->bNPT = TRUE; - fprintf(fplog, "Repl Using Constant Pressure REMD.\n"); - please_cite(fplog, "Okabe2001a"); - } - if (ir->etc == etcBERENDSEN) - { - gmx_fatal(FARGS, - "REMD with the %s thermostat does not produce correct potential energy " - "distributions, consider using the %s thermostat instead", - ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE)); - } - } - if (bLambda) - { - if (ir->fepvals->delta_lambda != 0) /* check this? */ - { - gmx_fatal(FARGS, "delta_lambda is not zero"); - } - } - if (re->bNPT) - { - snew(re->pres, re->nrepl); - if (ir->epct == epctSURFACETENSION) - { - pres = ir->ref_p[ZZ][ZZ]; - } - else - { - pres = 0; - j = 0; - for (i = 0; i < DIM; i++) - { - if (ir->compress[i][i] != 0) - { - pres += ir->ref_p[i][i]; - j++; - } - } - pres /= j; - } - re->pres[re->repl] = pres; - gmx_sum_sim(re->nrepl, re->pres, ms); - } - - /* Make an index for increasing replica order */ - /* only makes sense if one or the other is varying, not both! - if both are varying, we trust the order the person gave. */ - snew(re->ind, re->nrepl); - for (i = 0; i < re->nrepl; i++) - { - re->ind[i] = i; - } - - /* PLUMED */ - // plumed2: check if we want alternative patterns (i.e. for bias-exchange metaD) - // in those cases replicas can share the same temperature. - /* - if (re->type < ereENDSINGLE) - { - - for (i = 0; i < re->nrepl; i++) - { - for (j = i + 1; j < re->nrepl; j++) - { - if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]]) - {*/ - /* Unordered replicas are supposed to work, but there - * is still an issues somewhere. - * Note that at this point still re->ind[i]=i. - */ - /* - gmx_fatal(FARGS, - "Replicas with indices %d < %d have %ss %g > %g, please order your " - "replicas on increasing %s", - i, j, erename[re->type], re->q[re->type][i], re->q[re->type][j], - erename[re->type]); - } - else if (re->q[re->type][re->ind[j]] == re->q[re->type][re->ind[i]]) - { - gmx_fatal(FARGS, "Two replicas have identical %ss", erename[re->type]); - } - } - } - } - */ - /* END PLUMED */ - - /* keep track of all the swaps, starting with the initial placement. */ - snew(re->allswaps, re->nrepl); - for (i = 0; i < re->nrepl; i++) - { - re->allswaps[i] = re->ind[i]; - } - - switch (re->type) - { - case ereTEMP: - fprintf(fplog, "\nReplica exchange in temperature\n"); - for (i = 0; i < re->nrepl; i++) - { - fprintf(fplog, " %5.1f", re->q[re->type][re->ind[i]]); - } - fprintf(fplog, "\n"); - break; - case ereLAMBDA: - fprintf(fplog, "\nReplica exchange in lambda\n"); - for (i = 0; i < re->nrepl; i++) - { - fprintf(fplog, " %3d", static_cast(re->q[re->type][re->ind[i]])); - } - fprintf(fplog, "\n"); - break; - case ereTL: - fprintf(fplog, "\nReplica exchange in temperature and lambda state\n"); - for (i = 0; i < re->nrepl; i++) - { - fprintf(fplog, " %5.1f", re->q[ereTEMP][re->ind[i]]); - } - fprintf(fplog, "\n"); - for (i = 0; i < re->nrepl; i++) - { - fprintf(fplog, " %5d", static_cast(re->q[ereLAMBDA][re->ind[i]])); - } - fprintf(fplog, "\n"); - break; - default: gmx_incons("Unknown replica exchange quantity"); - } - if (re->bNPT) - { - fprintf(fplog, "\nRepl p"); - for (i = 0; i < re->nrepl; i++) - { - fprintf(fplog, " %5.2f", re->pres[re->ind[i]]); - } - - for (i = 0; i < re->nrepl; i++) - { - if ((i > 0) && (re->pres[re->ind[i]] < re->pres[re->ind[i - 1]])) - { - fprintf(fplog, - "\nWARNING: The reference pressures decrease with increasing " - "temperatures\n\n"); - fprintf(stderr, - "\nWARNING: The reference pressures decrease with increasing " - "temperatures\n\n"); - } - } - } - re->nst = nst; - if (replExParams.randomSeed == -1) - { - if (isMasterSim(ms)) - { - re->seed = static_cast(gmx::makeRandomSeed()); - } - else - { - re->seed = 0; - } - gmx_sumi_sim(1, &(re->seed), ms); - } - else - { - re->seed = replExParams.randomSeed; - } - fprintf(fplog, "\nReplica exchange interval: %d\n", re->nst); - fprintf(fplog, "\nReplica random seed: %d\n", re->seed); - - re->nattempt[0] = 0; - re->nattempt[1] = 0; - - snew(re->prob_sum, re->nrepl); - snew(re->nexchange, re->nrepl); - snew(re->nmoves, re->nrepl); - for (i = 0; i < re->nrepl; i++) - { - snew(re->nmoves[i], re->nrepl); - } - fprintf(fplog, "Replica exchange information below: ex and x = exchange, pr = probability\n"); - - /* generate space for the helper functions so we don't have to snew each time */ - - snew(re->destinations, re->nrepl); - snew(re->incycle, re->nrepl); - snew(re->tmpswap, re->nrepl); - snew(re->cyclic, re->nrepl); - snew(re->order, re->nrepl); - for (i = 0; i < re->nrepl; i++) - { - snew(re->cyclic[i], re->nrepl + 1); - snew(re->order[i], re->nrepl); - } - /* allocate space for the functions storing the data for the replicas */ - /* not all of these arrays needed in all cases, but they don't take - up much space, since the max size is nrepl**2 */ - snew(re->prob, re->nrepl); - snew(re->bEx, re->nrepl); - snew(re->beta, re->nrepl); - snew(re->Vol, re->nrepl); - snew(re->Epot, re->nrepl); - snew(re->de, re->nrepl); - for (i = 0; i < re->nrepl; i++) - { - snew(re->de[i], re->nrepl); - } - re->nex = replExParams.numExchanges; - return re; -} - -static void exchange_reals(const gmx_multisim_t gmx_unused* ms, int gmx_unused b, real* v, int n) -{ - real* buf; - int i; - - if (v) - { - snew(buf, n); -#if GMX_MPI - /* - MPI_Sendrecv(v, n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, - buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, - ms->mastersComm_,MPI_STATUS_IGNORE); - */ - { - MPI_Request mpi_req; - - MPI_Isend(v, n * sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, ms->mastersComm_, &mpi_req); - MPI_Recv(buf, n * sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, ms->mastersComm_, MPI_STATUS_IGNORE); - MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); - } -#endif - for (i = 0; i < n; i++) - { - v[i] = buf[i]; - } - sfree(buf); - } -} - - -static void exchange_doubles(const gmx_multisim_t gmx_unused* ms, int gmx_unused b, double* v, int n) -{ - double* buf; - int i; - - if (v) - { - snew(buf, n); -#if GMX_MPI - /* - MPI_Sendrecv(v, n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, - buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, - ms->mastersComm_,MPI_STATUS_IGNORE); - */ - { - MPI_Request mpi_req; - - MPI_Isend(v, n * sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, ms->mastersComm_, &mpi_req); - MPI_Recv(buf, n * sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, ms->mastersComm_, - MPI_STATUS_IGNORE); - MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); - } -#endif - for (i = 0; i < n; i++) - { - v[i] = buf[i]; - } - sfree(buf); - } -} - -static void exchange_rvecs(const gmx_multisim_t gmx_unused* ms, int gmx_unused b, rvec* v, int n) -{ - rvec* buf; - int i; - - if (v) - { - snew(buf, n); -#if GMX_MPI - /* - MPI_Sendrecv(v[0], n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, - buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, - ms->mastersComm_,MPI_STATUS_IGNORE); - */ - { - MPI_Request mpi_req; - - MPI_Isend(v[0], n * sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, ms->mastersComm_, &mpi_req); - MPI_Recv(buf[0], n * sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, ms->mastersComm_, - MPI_STATUS_IGNORE); - MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); - } -#endif - for (i = 0; i < n; i++) - { - copy_rvec(buf[i], v[i]); - } - sfree(buf); - } -} - -/* PLUMED HREX */ -void exchange_state(const gmx_multisim_t* ms, int b, t_state* state) -/* END PLUMED HREX */ -{ - /* When t_state changes, this code should be updated. */ - int ngtc, nnhpres; - ngtc = state->ngtc * state->nhchainlength; - nnhpres = state->nnhpres * state->nhchainlength; - exchange_rvecs(ms, b, state->box, DIM); - exchange_rvecs(ms, b, state->box_rel, DIM); - exchange_rvecs(ms, b, state->boxv, DIM); - exchange_reals(ms, b, &(state->veta), 1); - exchange_reals(ms, b, &(state->vol0), 1); - exchange_rvecs(ms, b, state->svir_prev, DIM); - exchange_rvecs(ms, b, state->fvir_prev, DIM); - exchange_rvecs(ms, b, state->pres_prev, DIM); - exchange_doubles(ms, b, state->nosehoover_xi.data(), ngtc); - exchange_doubles(ms, b, state->nosehoover_vxi.data(), ngtc); - exchange_doubles(ms, b, state->nhpres_xi.data(), nnhpres); - exchange_doubles(ms, b, state->nhpres_vxi.data(), nnhpres); - exchange_doubles(ms, b, state->therm_integral.data(), state->ngtc); - exchange_doubles(ms, b, &state->baros_integral, 1); - exchange_rvecs(ms, b, state->x.rvec_array(), state->natoms); - exchange_rvecs(ms, b, state->v.rvec_array(), state->natoms); -} - -/* PLUMED HREX */ -void copy_state_serial(const t_state* src, t_state* dest) -/* END PLUMED HREX */ -{ - if (dest != src) - { - /* Currently the local state is always a pointer to the global - * in serial, so we should never end up here. - * TODO: Implement a (trivial) t_state copy once converted to C++. - */ - GMX_RELEASE_ASSERT(false, "State copying is currently not implemented in replica exchange"); - } -} - -static void scale_velocities(gmx::ArrayRef velocities, real fac) -{ - for (auto& v : velocities) - { - v *= fac; - } -} - -static void print_transition_matrix(FILE* fplog, int n, int** nmoves, const int* nattempt) -{ - int i, j, ntot; - float Tprint; - - ntot = nattempt[0] + nattempt[1]; - fprintf(fplog, "\n"); - fprintf(fplog, "Repl"); - for (i = 0; i < n; i++) - { - fprintf(fplog, " "); /* put the title closer to the center */ - } - fprintf(fplog, "Empirical Transition Matrix\n"); - - fprintf(fplog, "Repl"); - for (i = 0; i < n; i++) - { - fprintf(fplog, "%8d", (i + 1)); - } - fprintf(fplog, "\n"); - - for (i = 0; i < n; i++) - { - fprintf(fplog, "Repl"); - for (j = 0; j < n; j++) - { - Tprint = 0.0; - if (nmoves[i][j] > 0) - { - Tprint = nmoves[i][j] / (2.0 * ntot); - } - fprintf(fplog, "%8.4f", Tprint); - } - fprintf(fplog, "%3d\n", i); - } -} - -static void print_ind(FILE* fplog, const char* leg, int n, int* ind, const gmx_bool* bEx) -{ - int i; - - fprintf(fplog, "Repl %2s %2d", leg, ind[0]); - for (i = 1; i < n; i++) - { - fprintf(fplog, " %c %2d", (bEx != nullptr && bEx[i]) ? 'x' : ' ', ind[i]); - } - fprintf(fplog, "\n"); -} - -static void print_allswitchind(FILE* fplog, int n, int* pind, int* allswaps, int* tmpswap) -{ - int i; - - for (i = 0; i < n; i++) - { - tmpswap[i] = allswaps[i]; - } - for (i = 0; i < n; i++) - { - allswaps[i] = tmpswap[pind[i]]; - } - - fprintf(fplog, "\nAccepted Exchanges: "); - for (i = 0; i < n; i++) - { - fprintf(fplog, "%d ", pind[i]); - } - fprintf(fplog, "\n"); - - /* the "Order After Exchange" is the state label corresponding to the configuration that - started in state listed in order, i.e. - - 3 0 1 2 - - means that the: - configuration starting in simulation 3 is now in simulation 0, - configuration starting in simulation 0 is now in simulation 1, - configuration starting in simulation 1 is now in simulation 2, - configuration starting in simulation 2 is now in simulation 3 - */ - fprintf(fplog, "Order After Exchange: "); - for (i = 0; i < n; i++) - { - fprintf(fplog, "%d ", allswaps[i]); - } - fprintf(fplog, "\n\n"); -} - -static void print_prob(FILE* fplog, const char* leg, int n, real* prob) -{ - int i; - char buf[8]; - - fprintf(fplog, "Repl %2s ", leg); - for (i = 1; i < n; i++) - { - if (prob[i] >= 0) - { - sprintf(buf, "%4.2f", prob[i]); - fprintf(fplog, " %3s", buf[0] == '1' ? "1.0" : buf + 1); - } - else - { - fprintf(fplog, " "); - } - } - fprintf(fplog, "\n"); -} - -static void print_count(FILE* fplog, const char* leg, int n, int* count) -{ - int i; - - fprintf(fplog, "Repl %2s ", leg); - for (i = 1; i < n; i++) - { - fprintf(fplog, " %4d", count[i]); - } - fprintf(fplog, "\n"); -} - -static real calc_delta(FILE* fplog, gmx_bool bPrint, struct gmx_repl_ex* re, int a, int b, int ap, int bp) -{ - - real ediff, dpV, delta = 0; - real* Epot = re->Epot; - real* Vol = re->Vol; - real** de = re->de; - real* beta = re->beta; - - /* Two cases; we are permuted and not. In all cases, setting ap = a and bp = b will reduce - to the non permuted case */ - - switch (re->type) - { - case ereTEMP: - /* - * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439 - */ - ediff = Epot[b] - Epot[a]; - delta = -(beta[bp] - beta[ap]) * ediff; - break; - case ereLAMBDA: - /* two cases: when we are permuted, and not. */ - /* non-permuted: - ediff = E_new - E_old - = [H_b(x_a) + H_a(x_b)] - [H_b(x_b) + H_a(x_a)] - = [H_b(x_a) - H_a(x_a)] + [H_a(x_b) - H_b(x_b)] - = de[b][a] + de[a][b] */ - - /* permuted: - ediff = E_new - E_old - = [H_bp(x_a) + H_ap(x_b)] - [H_bp(x_b) + H_ap(x_a)] - = [H_bp(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_bp(x_b)] - = [H_bp(x_a) - H_a(x_a) + H_a(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_b(x_b) + H_b(x_b) - H_bp(x_b)] - = [H_bp(x_a) - H_a(x_a)] - [H_ap(x_a) - H_a(x_a)] + [H_ap(x_b) - H_b(x_b)] - H_bp(x_b) - H_b(x_b)] - = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]) */ - /* but, in the current code implementation, we flip configurations, not indices . . . - So let's examine that. - = [H_b(x_ap) - H_a(x_a)] - [H_a(x_ap) - H_a(x_a)] + [H_a(x_bp) - H_b(x_b)] - H_b(x_bp) - H_b(x_b)] - = [H_b(x_ap) - H_a(x_ap)] + [H_a(x_bp) - H_b(x_pb)] - = (de[b][ap] - de[a][ap]) + (de[a][bp] - de[b][bp] - So, if we exchange b<=> bp and a<=> ap, we return to the same result. - So the simple solution is to flip the - position of perturbed and original indices in the tests. - */ - - ediff = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]); - delta = ediff * beta[a]; /* assume all same temperature in this case */ - break; - case ereTL: - /* not permuted: */ - /* delta = reduced E_new - reduced E_old - = [beta_b H_b(x_a) + beta_a H_a(x_b)] - [beta_b H_b(x_b) + beta_a H_a(x_a)] - = [beta_b H_b(x_a) - beta_a H_a(x_a)] + [beta_a H_a(x_b) - beta_b H_b(x_b)] - = [beta_b dH_b(x_a) + beta_b H_a(x_a) - beta_a H_a(x_a)] + - [beta_a dH_a(x_b) + beta_a H_b(x_b) - beta_b H_b(x_b)] - = [beta_b dH_b(x_a) + [beta_a dH_a(x_b) + - beta_b (H_a(x_a) - H_b(x_b)]) - beta_a (H_a(x_a) - H_b(x_b)) - = beta_b dH_b(x_a) + beta_a dH_a(x_b) - (beta_b - beta_a)(H_b(x_b) - H_a(x_a) */ - /* delta = beta[b]*de[b][a] + beta[a]*de[a][b] - (beta[b] - beta[a])*(Epot[b] - Epot[a]; */ - /* permuted (big breath!) */ - /* delta = reduced E_new - reduced E_old - = [beta_bp H_bp(x_a) + beta_ap H_ap(x_b)] - [beta_bp H_bp(x_b) + beta_ap H_ap(x_a)] - = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] - = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] - - beta_pb H_a(x_a) + beta_ap H_a(x_a) + beta_pb H_a(x_a) - beta_ap H_a(x_a) - - beta_ap H_b(x_b) + beta_bp H_b(x_b) + beta_ap H_b(x_b) - beta_bp H_b(x_b) - = [(beta_bp H_bp(x_a) - beta_bp H_a(x_a)) - (beta_ap H_ap(x_a) - beta_ap H_a(x_a))] + - [(beta_ap H_ap(x_b) - beta_ap H_b(x_b)) - (beta_bp H_bp(x_b) - beta_bp H_b(x_b))] - + beta_pb H_a(x_a) - beta_ap H_a(x_a) + beta_ap H_b(x_b) - beta_bp H_b(x_b) - = [beta_bp (H_bp(x_a) - H_a(x_a)) - beta_ap (H_ap(x_a) - H_a(x_a))] + - [beta_ap (H_ap(x_b) - H_b(x_b)) - beta_bp (H_bp(x_b) - H_b(x_b))] - + beta_pb (H_a(x_a) - H_b(x_b)) - beta_ap (H_a(x_a) - H_b(x_b)) - = ([beta_bp de[bp][a] - beta_ap de[ap][a]) + beta_ap de[ap][b] - beta_bp de[bp][b]) - + (beta_pb-beta_ap)(H_a(x_a) - H_b(x_b)) */ - delta = beta[bp] * (de[bp][a] - de[bp][b]) + beta[ap] * (de[ap][b] - de[ap][a]) - - (beta[bp] - beta[ap]) * (Epot[b] - Epot[a]); - break; - default: gmx_incons("Unknown replica exchange quantity"); - } - if (bPrint) - { - fprintf(fplog, "Repl %d <-> %d dE_term = %10.3e (kT)\n", a, b, delta); - } -/* PLUMED HREX */ -/* this is necessary because with plumed HREX the energy contribution is - already taken into account */ - if(plumed_hrex) delta=0.0; -/* END PLUMED HREX */ - if (re->bNPT) - { - /* revist the calculation for 5.0. Might be some improvements. */ - dpV = (beta[ap] * re->pres[ap] - beta[bp] * re->pres[bp]) * (Vol[b] - Vol[a]) / PRESFAC; - if (bPrint) - { - fprintf(fplog, " dpV = %10.3e d = %10.3e\n", dpV, delta + dpV); - } - delta += dpV; - } - return delta; -} - -static void test_for_replica_exchange(FILE* fplog, - const gmx_multisim_t* ms, - struct gmx_repl_ex* re, - const gmx_enerdata_t* enerd, - real vol, - int64_t step, - real time) -{ - int m, i, j, a, b, ap, bp, i0, i1, tmp; - real delta = 0; - gmx_bool bPrint, bMultiEx; - gmx_bool* bEx = re->bEx; - real* prob = re->prob; - int* pind = re->destinations; /* permuted index */ - gmx_bool bEpot = FALSE; - gmx_bool bDLambda = FALSE; - gmx_bool bVol = FALSE; - gmx::ThreeFry2x64<64> rng(re->seed, gmx::RandomDomain::ReplicaExchange); - gmx::UniformRealDistribution uniformRealDist; - gmx::UniformIntDistribution uniformNreplDist(0, re->nrepl - 1); - - bMultiEx = (re->nex > 1); /* multiple exchanges at each state */ - fprintf(fplog, "Replica exchange at step %" PRId64 " time %.5f\n", step, time); - - if (re->bNPT) - { - for (i = 0; i < re->nrepl; i++) - { - re->Vol[i] = 0; - } - bVol = TRUE; - re->Vol[re->repl] = vol; - } - if ((re->type == ereTEMP || re->type == ereTL)) - { - for (i = 0; i < re->nrepl; i++) - { - re->Epot[i] = 0; - } - bEpot = TRUE; - re->Epot[re->repl] = enerd->term[F_EPOT]; - /* temperatures of different states*/ - for (i = 0; i < re->nrepl; i++) - { - re->beta[i] = 1.0 / (re->q[ereTEMP][i] * BOLTZ); - } - } - else - { - for (i = 0; i < re->nrepl; i++) - { - re->beta[i] = 1.0 / (re->temp * BOLTZ); /* we have a single temperature */ - } - } - if (re->type == ereLAMBDA || re->type == ereTL) - { - bDLambda = TRUE; - /* lambda differences. */ - /* de[i][j] is the energy of the jth simulation in the ith Hamiltonian - minus the energy of the jth simulation in the jth Hamiltonian */ - for (i = 0; i < re->nrepl; i++) - { - for (j = 0; j < re->nrepl; j++) - { - re->de[i][j] = 0; - } - } - for (i = 0; i < re->nrepl; i++) - { - re->de[i][re->repl] = enerd->foreignLambdaTerms.deltaH(re->q[ereLAMBDA][i]); - } - } - - /* now actually do the communication */ - if (bVol) - { - gmx_sum_sim(re->nrepl, re->Vol, ms); - } - if (bEpot) - { - gmx_sum_sim(re->nrepl, re->Epot, ms); - } - if (bDLambda) - { - for (i = 0; i < re->nrepl; i++) - { - gmx_sum_sim(re->nrepl, re->de[i], ms); - } - } - - /* make a duplicate set of indices for shuffling */ - for (i = 0; i < re->nrepl; i++) - { - pind[i] = re->ind[i]; - } - - rng.restart(step, 0); - - /* PLUMED */ - int plumed_test_exchange_pattern=0; - if(plumed_test_exchange_pattern && plumed_hrex) gmx_fatal(FARGS,"hrex not compatible with ad hoc exchange patterns"); - /* END PLUMED */ - - if (bMultiEx) - { - /* multiple random switch exchange */ - int nself = 0; - - - for (i = 0; i < re->nex + nself; i++) - { - // For now this is superfluous, but just in case we ever add more - // calls in different branches it is safer to always reset the distribution. - uniformNreplDist.reset(); - - /* randomly select a pair */ - /* in theory, could reduce this by identifying only which switches had a nonneglibible - probability of occurring (log p > -100) and only operate on those switches */ - /* find out which state it is from, and what label that state currently has. Likely - more work that useful. */ - i0 = uniformNreplDist(rng); - i1 = uniformNreplDist(rng); - if (i0 == i1) - { - nself++; - continue; /* self-exchange, back up and do it again */ - } - - a = re->ind[i0]; /* what are the indices of these states? */ - b = re->ind[i1]; - ap = pind[i0]; - bp = pind[i1]; - - bPrint = FALSE; /* too noisy */ - /* calculate the energy difference */ - /* if the code changes to flip the STATES, rather than the configurations, - use the commented version of the code */ - /* delta = calc_delta(fplog,bPrint,re,a,b,ap,bp); */ - delta = calc_delta(fplog, bPrint, re, ap, bp, a, b); - - /* we actually only use the first space in the prob and bEx array, - since there are actually many switches between pairs. */ - - if (delta <= 0) - { - /* accepted */ - prob[0] = 1; - bEx[0] = TRUE; - } - else - { - if (delta > c_probabilityCutoff) - { - prob[0] = 0; - } - else - { - prob[0] = exp(-delta); - } - // roll a number to determine if accepted. For now it is superfluous to - // reset, but just in case we ever add more calls in different branches - // it is safer to always reset the distribution. - uniformRealDist.reset(); - bEx[0] = uniformRealDist(rng) < prob[0]; - } - re->prob_sum[0] += prob[0]; - - if (bEx[0]) - { - /* swap the states */ - tmp = pind[i0]; - pind[i0] = pind[i1]; - pind[i1] = tmp; - } - } - re->nattempt[0]++; /* keep track of total permutation trials here */ - print_allswitchind(fplog, re->nrepl, pind, re->allswaps, re->tmpswap); - } - else - { - /* standard nearest neighbor replica exchange */ - - m = (step / re->nst) % 2; - /* PLUMED */ - if(plumedswitch){ - int partner=re->repl; - plumed_cmd(plumedmain,"getExchangesFlag",&plumed_test_exchange_pattern); - if(plumed_test_exchange_pattern>0){ - int *list; - snew(list,re->nrepl); - plumed_cmd(plumedmain,"setNumberOfReplicas",&(re->nrepl)); - plumed_cmd(plumedmain,"getExchangesList",list); - for(i=0; inrepl; i++) re->ind[i]=list[i]; - sfree(list); - } - - for(i=1; inrepl; i++) { - if (i % 2 != m) continue; - a = re->ind[i-1]; - b = re->ind[i]; - if(re->repl==a) partner=b; - if(re->repl==b) partner=a; - } - plumed_cmd(plumedmain,"GREX setPartner",&partner); - plumed_cmd(plumedmain,"GREX calculate",nullptr); - plumed_cmd(plumedmain,"GREX shareAllDeltaBias",nullptr); - } - /* END PLUMED */ - for (i = 1; i < re->nrepl; i++) - { - a = re->ind[i - 1]; - b = re->ind[i]; - - bPrint = (re->repl == a || re->repl == b); - if (i % 2 == m) - { - delta = calc_delta(fplog, bPrint, re, a, b, a, b); - /* PLUMED */ - if(plumedswitch){ - real adb,bdb,dplumed; - char buf[300]; - sprintf(buf,"GREX getDeltaBias %d",a); plumed_cmd(plumedmain,buf,&adb); - sprintf(buf,"GREX getDeltaBias %d",b); plumed_cmd(plumedmain,buf,&bdb); - dplumed=adb*re->beta[a]+bdb*re->beta[b]; - delta+=dplumed; - if (bPrint) - fprintf(fplog,"dplumed = %10.3e dE_Term = %10.3e (kT)\n",dplumed,delta); - } - /* END PLUMED */ - if (delta <= 0) - { - /* accepted */ - prob[i] = 1; - bEx[i] = TRUE; - } - else - { - if (delta > c_probabilityCutoff) - { - prob[i] = 0; - } - else - { - prob[i] = exp(-delta); - } - // roll a number to determine if accepted. For now it is superfluous to - // reset, but just in case we ever add more calls in different branches - // it is safer to always reset the distribution. - uniformRealDist.reset(); - bEx[i] = uniformRealDist(rng) < prob[i]; - } - re->prob_sum[i] += prob[i]; - - if (bEx[i]) - { - /* PLUMED */ - if(!plumed_test_exchange_pattern) { - /* standard neighbour swapping */ - /* swap these two */ - tmp = pind[i - 1]; - pind[i - 1] = pind[i]; - pind[i] = tmp; - re->nexchange[i]++; /* statistics for back compatibility */ - } else { - /* alternative swapping patterns */ - tmp = pind[a]; - pind[a] = pind[b]; - pind[b] = tmp; - re->nexchange[i]++; /* statistics for back compatibility */ - } - /* END PLUMED */ - } - } - else - { - prob[i] = -1; - bEx[i] = FALSE; - } - } - /* print some statistics */ - print_ind(fplog, "ex", re->nrepl, re->ind, bEx); - print_prob(fplog, "pr", re->nrepl, prob); - fprintf(fplog, "\n"); - re->nattempt[m]++; - } - - /* PLUMED */ - if(plumed_test_exchange_pattern>0) { - for (i = 0; i < re->nrepl; i++) - { - re->ind[i] = i; - } - } - /* END PLUMED */ - - /* record which moves were made and accepted */ - for (i = 0; i < re->nrepl; i++) - { - re->nmoves[re->ind[i]][pind[i]] += 1; - re->nmoves[pind[i]][re->ind[i]] += 1; - } - fflush(fplog); /* make sure we can see what the last exchange was */ -} - -static void cyclic_decomposition(const int* destinations, int** cyclic, gmx_bool* incycle, const int nrepl, int* nswap) -{ - - int i, j, c, p; - int maxlen = 1; - for (i = 0; i < nrepl; i++) - { - incycle[i] = FALSE; - } - for (i = 0; i < nrepl; i++) /* one cycle for each replica */ - { - if (incycle[i]) - { - cyclic[i][0] = -1; - continue; - } - cyclic[i][0] = i; - incycle[i] = TRUE; - c = 1; - p = i; - for (j = 0; j < nrepl; j++) /* potentially all cycles are part, but we will break first */ - { - p = destinations[p]; /* start permuting */ - if (p == i) - { - cyclic[i][c] = -1; - if (c > maxlen) - { - maxlen = c; - } - break; /* we've reached the original element, the cycle is complete, and we marked the end. */ - } - else - { - cyclic[i][c] = p; /* each permutation gives a new member of the cycle */ - incycle[p] = TRUE; - c++; - } - } - } - *nswap = maxlen - 1; - - if (debug) - { - for (i = 0; i < nrepl; i++) - { - fprintf(debug, "Cycle %d:", i); - for (j = 0; j < nrepl; j++) - { - if (cyclic[i][j] < 0) - { - break; - } - fprintf(debug, "%2d", cyclic[i][j]); - } - fprintf(debug, "\n"); - } - fflush(debug); - } -} - -static void compute_exchange_order(int** cyclic, int** order, const int nrepl, const int maxswap) -{ - int i, j; - - for (j = 0; j < maxswap; j++) - { - for (i = 0; i < nrepl; i++) - { - if (cyclic[i][j + 1] >= 0) - { - order[cyclic[i][j + 1]][j] = cyclic[i][j]; - order[cyclic[i][j]][j] = cyclic[i][j + 1]; - } - } - for (i = 0; i < nrepl; i++) - { - if (order[i][j] < 0) - { - order[i][j] = i; /* if it's not exchanging, it should stay this round*/ - } - } - } - - if (debug) - { - fprintf(debug, "Replica Exchange Order\n"); - for (i = 0; i < nrepl; i++) - { - fprintf(debug, "Replica %d:", i); - for (j = 0; j < maxswap; j++) - { - if (order[i][j] < 0) - { - break; - } - fprintf(debug, "%2d", order[i][j]); - } - fprintf(debug, "\n"); - } - fflush(debug); - } -} - -static void prepare_to_do_exchange(struct gmx_repl_ex* re, const int replica_id, int* maxswap, gmx_bool* bThisReplicaExchanged) -{ - int i, j; - /* Hold the cyclic decomposition of the (multiple) replica - * exchange. */ - gmx_bool bAnyReplicaExchanged = FALSE; - *bThisReplicaExchanged = FALSE; - - for (i = 0; i < re->nrepl; i++) - { - if (re->destinations[i] != re->ind[i]) - { - /* only mark as exchanged if the index has been shuffled */ - bAnyReplicaExchanged = TRUE; - break; - } - } - if (bAnyReplicaExchanged) - { - /* reinitialize the placeholder arrays */ - for (i = 0; i < re->nrepl; i++) - { - for (j = 0; j < re->nrepl; j++) - { - re->cyclic[i][j] = -1; - re->order[i][j] = -1; - } - } - - /* Identify the cyclic decomposition of the permutation (very - * fast if neighbor replica exchange). */ - cyclic_decomposition(re->destinations, re->cyclic, re->incycle, re->nrepl, maxswap); - - /* Now translate the decomposition into a replica exchange - * order at each step. */ - compute_exchange_order(re->cyclic, re->order, re->nrepl, *maxswap); - - /* Did this replica do any exchange at any point? */ - for (j = 0; j < *maxswap; j++) - { - if (replica_id != re->order[replica_id][j]) - { - *bThisReplicaExchanged = TRUE; - break; - } - } - } -} - -gmx_bool replica_exchange(FILE* fplog, - const t_commrec* cr, - const gmx_multisim_t* ms, - struct gmx_repl_ex* re, - t_state* state, - const gmx_enerdata_t* enerd, - t_state* state_local, - int64_t step, - real time) -{ - int j; - int replica_id = 0; - int exchange_partner; - int maxswap = 0; - /* Number of rounds of exchanges needed to deal with any multiple - * exchanges. */ - /* Where each replica ends up after the exchange attempt(s). */ - /* The order in which multiple exchanges will occur. */ - gmx_bool bThisReplicaExchanged = FALSE; - - /* PLUMED */ - if(plumedswitch)plumed_cmd(plumedmain,"GREX prepare",nullptr); - /* END PLUMED */ - - if (MASTER(cr)) - { - replica_id = re->repl; - test_for_replica_exchange(fplog, ms, re, enerd, det(state_local->box), step, time); - prepare_to_do_exchange(re, replica_id, &maxswap, &bThisReplicaExchanged); - } - /* Do intra-simulation broadcast so all processors belonging to - * each simulation know whether they need to participate in - * collecting the state. Otherwise, they might as well get on with - * the next thing to do. */ - if (DOMAINDECOMP(cr)) - { -#if GMX_MPI - MPI_Bcast(&bThisReplicaExchanged, sizeof(gmx_bool), MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mygroup); -#endif - } - - if (bThisReplicaExchanged) - { - /* Exchange the states */ - /* Collect the global state on the master node */ - if (DOMAINDECOMP(cr)) - { - dd_collect_state(cr->dd, state_local, state); - } - else - { - copy_state_serial(state_local, state); - } - - if (MASTER(cr)) - { - /* There will be only one swap cycle with standard replica - * exchange, but there may be multiple swap cycles if we - * allow multiple swaps. */ - - for (j = 0; j < maxswap; j++) - { - exchange_partner = re->order[replica_id][j]; - - if (exchange_partner != replica_id) - { - /* Exchange the global states between the master nodes */ - if (debug) - { - fprintf(debug, "Exchanging %d with %d\n", replica_id, exchange_partner); - } - exchange_state(ms, exchange_partner, state); - } - } - /* For temperature-type replica exchange, we need to scale - * the velocities. */ - if (re->type == ereTEMP || re->type == ereTL) - { - scale_velocities(state->v, std::sqrt(re->q[ereTEMP][replica_id] - / re->q[ereTEMP][re->destinations[replica_id]])); - } - } - - /* With domain decomposition the global state is distributed later */ - if (!DOMAINDECOMP(cr)) - { - /* Copy the global state to the local state data structure */ - copy_state_serial(state, state_local); - } - } - - return bThisReplicaExchanged; -} - -void print_replica_exchange_statistics(FILE* fplog, struct gmx_repl_ex* re) -{ - int i; - - fprintf(fplog, "\nReplica exchange statistics\n"); - - if (re->nex == 0) - { - fprintf(fplog, "Repl %d attempts, %d odd, %d even\n", re->nattempt[0] + re->nattempt[1], - re->nattempt[1], re->nattempt[0]); - - fprintf(fplog, "Repl average probabilities:\n"); - for (i = 1; i < re->nrepl; i++) - { - if (re->nattempt[i % 2] == 0) - { - re->prob[i] = 0; - } - else - { - re->prob[i] = re->prob_sum[i] / re->nattempt[i % 2]; - } - } - print_ind(fplog, "", re->nrepl, re->ind, nullptr); - print_prob(fplog, "", re->nrepl, re->prob); - - fprintf(fplog, "Repl number of exchanges:\n"); - print_ind(fplog, "", re->nrepl, re->ind, nullptr); - print_count(fplog, "", re->nrepl, re->nexchange); - - fprintf(fplog, "Repl average number of exchanges:\n"); - for (i = 1; i < re->nrepl; i++) - { - if (re->nattempt[i % 2] == 0) - { - re->prob[i] = 0; - } - else - { - re->prob[i] = (static_cast(re->nexchange[i])) / re->nattempt[i % 2]; - } - } - print_ind(fplog, "", re->nrepl, re->ind, nullptr); - print_prob(fplog, "", re->nrepl, re->prob); - - fprintf(fplog, "\n"); - } - /* print the transition matrix */ - print_transition_matrix(fplog, re->nrepl, re->nmoves, re->nattempt); -} - -/* PLUMED HREX */ -int replica_exchange_get_repl(const gmx_repl_ex_t re){ - return re->repl; -}; - -int replica_exchange_get_nrepl(const gmx_repl_ex_t re){ - return re->nrepl; -}; -/* END PLUMED HREX */ -//! \endcond diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/replicaexchange.cpp.preplumed b/patches/gromacs-2021.7.diff/src/gromacs/mdrun/replicaexchange.cpp.preplumed deleted file mode 100644 index c40161d9ef..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/replicaexchange.cpp.preplumed +++ /dev/null @@ -1,1389 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2011-2019,2020, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ - -/*! \internal \file - * - * \brief Implements the replica exchange routines. - * - * \author David van der Spoel - * \author Mark Abraham - * \ingroup module_mdrun - */ -#include "gmxpre.h" - -#include "replicaexchange.h" - -#include "config.h" - -#include - -#include - -#include "gromacs/domdec/collect.h" -#include "gromacs/gmxlib/network.h" -#include "gromacs/math/units.h" -#include "gromacs/math/vec.h" -#include "gromacs/mdrunutility/multisim.h" -#include "gromacs/mdtypes/commrec.h" -#include "gromacs/mdtypes/enerdata.h" -#include "gromacs/mdtypes/inputrec.h" -#include "gromacs/mdtypes/md_enums.h" -#include "gromacs/mdtypes/state.h" -#include "gromacs/random/threefry.h" -#include "gromacs/random/uniformintdistribution.h" -#include "gromacs/random/uniformrealdistribution.h" -#include "gromacs/utility/fatalerror.h" -#include "gromacs/utility/pleasecite.h" -#include "gromacs/utility/smalloc.h" - -//! Helps cut off probability values. -constexpr int c_probabilityCutoff = 100; - -/* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */ - -//! Rank in the multisimulation -#define MSRANK(ms, nodeid) (nodeid) - -//! Enum for replica exchange flavours -enum -{ - ereTEMP, - ereLAMBDA, - ereENDSINGLE, - ereTL, - ereNR -}; -/*! \brief Strings describing replica exchange flavours. - * - * end_single_marker merely notes the end of single variable replica - * exchange. All types higher than it are multiple replica exchange - * methods. - * - * Eventually, should add 'pressure', 'temperature and pressure', - * 'lambda_and_pressure', 'temperature_lambda_pressure'?; Let's wait - * until we feel better about the pressure control methods giving - * exact ensembles. Right now, we assume constant pressure */ -static const char* erename[ereNR] = { "temperature", "lambda", "end_single_marker", - "temperature and lambda" }; - -//! Working data for replica exchange. -struct gmx_repl_ex -{ - //! Replica ID - int repl; - //! Total number of replica - int nrepl; - //! Temperature - real temp; - //! Replica exchange type from ere enum - int type; - //! Quantity, e.g. temperature or lambda; first index is ere, second index is replica ID - real** q; - //! Use constant pressure and temperature - gmx_bool bNPT; - //! Replica pressures - real* pres; - //! Replica indices - int* ind; - //! Used for keeping track of all the replica swaps - int* allswaps; - //! Replica exchange interval (number of steps) - int nst; - //! Number of exchanges per interval - int nex; - //! Random seed - int seed; - //! Number of even and odd replica change attempts - int nattempt[2]; - //! Sum of probabilities - real* prob_sum; - //! Number of moves between replicas i and j - int** nmoves; - //! i-th element of the array is the number of exchanges between replica i-1 and i - int* nexchange; - - /*! \brief Helper arrays for replica exchange; allocated here - * so they don't have to be allocated each time */ - //! \{ - int* destinations; - int** cyclic; - int** order; - int* tmpswap; - gmx_bool* incycle; - gmx_bool* bEx; - //! \} - - //! Helper arrays to hold the quantities that are exchanged. - //! \{ - real* prob; - real* Epot; - real* beta; - real* Vol; - real** de; - //! \} -}; - -// TODO We should add Doxygen here some time. -//! \cond - -static gmx_bool repl_quantity(const gmx_multisim_t* ms, struct gmx_repl_ex* re, int ere, real q) -{ - real* qall; - gmx_bool bDiff; - int s; - - snew(qall, ms->numSimulations_); - qall[re->repl] = q; - gmx_sum_sim(ms->numSimulations_, qall, ms); - - bDiff = FALSE; - for (s = 1; s < ms->numSimulations_; s++) - { - if (qall[s] != qall[0]) - { - bDiff = TRUE; - } - } - - if (bDiff) - { - /* Set the replica exchange type and quantities */ - re->type = ere; - - snew(re->q[ere], re->nrepl); - for (s = 0; s < ms->numSimulations_; s++) - { - re->q[ere][s] = qall[s]; - } - } - sfree(qall); - return bDiff; -} - -gmx_repl_ex_t init_replica_exchange(FILE* fplog, - const gmx_multisim_t* ms, - int numAtomsInSystem, - const t_inputrec* ir, - const ReplicaExchangeParameters& replExParams) -{ - real pres; - int i, j; - struct gmx_repl_ex* re; - gmx_bool bTemp; - gmx_bool bLambda = FALSE; - - fprintf(fplog, "\nInitializing Replica Exchange\n"); - - if (!isMultiSim(ms) || ms->numSimulations_ == 1) - { - gmx_fatal(FARGS, - "Nothing to exchange with only one replica, maybe you forgot to set the " - "-multidir option of mdrun?"); - } - if (replExParams.numExchanges < 0) - { - gmx_fatal(FARGS, "Replica exchange number of exchanges needs to be positive"); - } - - if (!EI_DYNAMICS(ir->eI)) - { - gmx_fatal(FARGS, "Replica exchange is only supported by dynamical simulations"); - /* Note that PAR(cr) is defined by cr->nnodes > 1, which is - * distinct from isMultiSim(ms). A multi-simulation only runs - * with real MPI parallelism, but this does not imply PAR(cr) - * is true! - * - * Since we are using a dynamical integrator, the only - * decomposition is DD, so PAR(cr) and DOMAINDECOMP(cr) are - * synonymous. The only way for cr->nnodes > 1 to be true is - * if we are using DD. */ - } - - snew(re, 1); - - re->repl = ms->simulationIndex_; - re->nrepl = ms->numSimulations_; - snew(re->q, ereENDSINGLE); - - fprintf(fplog, "Repl There are %d replicas:\n", re->nrepl); - - /* We only check that the number of atoms in the systms match. - * This, of course, do not guarantee that the systems are the same, - * but it does guarantee that we can perform replica exchange. - */ - check_multi_int(fplog, ms, numAtomsInSystem, "the number of atoms", FALSE); - check_multi_int(fplog, ms, ir->eI, "the integrator", FALSE); - check_multi_int64(fplog, ms, ir->init_step + ir->nsteps, "init_step+nsteps", FALSE); - const int nst = replExParams.exchangeInterval; - check_multi_int64(fplog, ms, (ir->init_step + nst - 1) / nst, - "first exchange step: init_step/-replex", FALSE); - check_multi_int(fplog, ms, ir->etc, "the temperature coupling", FALSE); - check_multi_int(fplog, ms, ir->opts.ngtc, "the number of temperature coupling groups", FALSE); - check_multi_int(fplog, ms, ir->epc, "the pressure coupling", FALSE); - check_multi_int(fplog, ms, ir->efep, "free energy", FALSE); - check_multi_int(fplog, ms, ir->fepvals->n_lambda, "number of lambda states", FALSE); - - re->temp = ir->opts.ref_t[0]; - for (i = 1; (i < ir->opts.ngtc); i++) - { - if (ir->opts.ref_t[i] != re->temp) - { - fprintf(fplog, - "\nWARNING: The temperatures of the different temperature coupling groups are " - "not identical\n\n"); - fprintf(stderr, - "\nWARNING: The temperatures of the different temperature coupling groups are " - "not identical\n\n"); - } - } - - re->type = -1; - bTemp = repl_quantity(ms, re, ereTEMP, re->temp); - if (ir->efep != efepNO) - { - bLambda = repl_quantity(ms, re, ereLAMBDA, static_cast(ir->fepvals->init_fep_state)); - } - if (re->type == -1) /* nothing was assigned */ - { - gmx_fatal(FARGS, - "The properties of the %d systems are all the same, there is nothing to exchange", - re->nrepl); - } - if (bLambda && bTemp) - { - re->type = ereTL; - } - - if (bTemp) - { - please_cite(fplog, "Sugita1999a"); - if (ir->epc != epcNO) - { - re->bNPT = TRUE; - fprintf(fplog, "Repl Using Constant Pressure REMD.\n"); - please_cite(fplog, "Okabe2001a"); - } - if (ir->etc == etcBERENDSEN) - { - gmx_fatal(FARGS, - "REMD with the %s thermostat does not produce correct potential energy " - "distributions, consider using the %s thermostat instead", - ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE)); - } - } - if (bLambda) - { - if (ir->fepvals->delta_lambda != 0) /* check this? */ - { - gmx_fatal(FARGS, "delta_lambda is not zero"); - } - } - if (re->bNPT) - { - snew(re->pres, re->nrepl); - if (ir->epct == epctSURFACETENSION) - { - pres = ir->ref_p[ZZ][ZZ]; - } - else - { - pres = 0; - j = 0; - for (i = 0; i < DIM; i++) - { - if (ir->compress[i][i] != 0) - { - pres += ir->ref_p[i][i]; - j++; - } - } - pres /= j; - } - re->pres[re->repl] = pres; - gmx_sum_sim(re->nrepl, re->pres, ms); - } - - /* Make an index for increasing replica order */ - /* only makes sense if one or the other is varying, not both! - if both are varying, we trust the order the person gave. */ - snew(re->ind, re->nrepl); - for (i = 0; i < re->nrepl; i++) - { - re->ind[i] = i; - } - - if (re->type < ereENDSINGLE) - { - - for (i = 0; i < re->nrepl; i++) - { - for (j = i + 1; j < re->nrepl; j++) - { - if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]]) - { - /* Unordered replicas are supposed to work, but there - * is still an issues somewhere. - * Note that at this point still re->ind[i]=i. - */ - gmx_fatal(FARGS, - "Replicas with indices %d < %d have %ss %g > %g, please order your " - "replicas on increasing %s", - i, j, erename[re->type], re->q[re->type][i], re->q[re->type][j], - erename[re->type]); - } - else if (re->q[re->type][re->ind[j]] == re->q[re->type][re->ind[i]]) - { - gmx_fatal(FARGS, "Two replicas have identical %ss", erename[re->type]); - } - } - } - } - - /* keep track of all the swaps, starting with the initial placement. */ - snew(re->allswaps, re->nrepl); - for (i = 0; i < re->nrepl; i++) - { - re->allswaps[i] = re->ind[i]; - } - - switch (re->type) - { - case ereTEMP: - fprintf(fplog, "\nReplica exchange in temperature\n"); - for (i = 0; i < re->nrepl; i++) - { - fprintf(fplog, " %5.1f", re->q[re->type][re->ind[i]]); - } - fprintf(fplog, "\n"); - break; - case ereLAMBDA: - fprintf(fplog, "\nReplica exchange in lambda\n"); - for (i = 0; i < re->nrepl; i++) - { - fprintf(fplog, " %3d", static_cast(re->q[re->type][re->ind[i]])); - } - fprintf(fplog, "\n"); - break; - case ereTL: - fprintf(fplog, "\nReplica exchange in temperature and lambda state\n"); - for (i = 0; i < re->nrepl; i++) - { - fprintf(fplog, " %5.1f", re->q[ereTEMP][re->ind[i]]); - } - fprintf(fplog, "\n"); - for (i = 0; i < re->nrepl; i++) - { - fprintf(fplog, " %5d", static_cast(re->q[ereLAMBDA][re->ind[i]])); - } - fprintf(fplog, "\n"); - break; - default: gmx_incons("Unknown replica exchange quantity"); - } - if (re->bNPT) - { - fprintf(fplog, "\nRepl p"); - for (i = 0; i < re->nrepl; i++) - { - fprintf(fplog, " %5.2f", re->pres[re->ind[i]]); - } - - for (i = 0; i < re->nrepl; i++) - { - if ((i > 0) && (re->pres[re->ind[i]] < re->pres[re->ind[i - 1]])) - { - fprintf(fplog, - "\nWARNING: The reference pressures decrease with increasing " - "temperatures\n\n"); - fprintf(stderr, - "\nWARNING: The reference pressures decrease with increasing " - "temperatures\n\n"); - } - } - } - re->nst = nst; - if (replExParams.randomSeed == -1) - { - if (isMasterSim(ms)) - { - re->seed = static_cast(gmx::makeRandomSeed()); - } - else - { - re->seed = 0; - } - gmx_sumi_sim(1, &(re->seed), ms); - } - else - { - re->seed = replExParams.randomSeed; - } - fprintf(fplog, "\nReplica exchange interval: %d\n", re->nst); - fprintf(fplog, "\nReplica random seed: %d\n", re->seed); - - re->nattempt[0] = 0; - re->nattempt[1] = 0; - - snew(re->prob_sum, re->nrepl); - snew(re->nexchange, re->nrepl); - snew(re->nmoves, re->nrepl); - for (i = 0; i < re->nrepl; i++) - { - snew(re->nmoves[i], re->nrepl); - } - fprintf(fplog, "Replica exchange information below: ex and x = exchange, pr = probability\n"); - - /* generate space for the helper functions so we don't have to snew each time */ - - snew(re->destinations, re->nrepl); - snew(re->incycle, re->nrepl); - snew(re->tmpswap, re->nrepl); - snew(re->cyclic, re->nrepl); - snew(re->order, re->nrepl); - for (i = 0; i < re->nrepl; i++) - { - snew(re->cyclic[i], re->nrepl + 1); - snew(re->order[i], re->nrepl); - } - /* allocate space for the functions storing the data for the replicas */ - /* not all of these arrays needed in all cases, but they don't take - up much space, since the max size is nrepl**2 */ - snew(re->prob, re->nrepl); - snew(re->bEx, re->nrepl); - snew(re->beta, re->nrepl); - snew(re->Vol, re->nrepl); - snew(re->Epot, re->nrepl); - snew(re->de, re->nrepl); - for (i = 0; i < re->nrepl; i++) - { - snew(re->de[i], re->nrepl); - } - re->nex = replExParams.numExchanges; - return re; -} - -static void exchange_reals(const gmx_multisim_t gmx_unused* ms, int gmx_unused b, real* v, int n) -{ - real* buf; - int i; - - if (v) - { - snew(buf, n); -#if GMX_MPI - /* - MPI_Sendrecv(v, n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, - buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, - ms->mastersComm_,MPI_STATUS_IGNORE); - */ - { - MPI_Request mpi_req; - - MPI_Isend(v, n * sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, ms->mastersComm_, &mpi_req); - MPI_Recv(buf, n * sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, ms->mastersComm_, MPI_STATUS_IGNORE); - MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); - } -#endif - for (i = 0; i < n; i++) - { - v[i] = buf[i]; - } - sfree(buf); - } -} - - -static void exchange_doubles(const gmx_multisim_t gmx_unused* ms, int gmx_unused b, double* v, int n) -{ - double* buf; - int i; - - if (v) - { - snew(buf, n); -#if GMX_MPI - /* - MPI_Sendrecv(v, n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, - buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, - ms->mastersComm_,MPI_STATUS_IGNORE); - */ - { - MPI_Request mpi_req; - - MPI_Isend(v, n * sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, ms->mastersComm_, &mpi_req); - MPI_Recv(buf, n * sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, ms->mastersComm_, - MPI_STATUS_IGNORE); - MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); - } -#endif - for (i = 0; i < n; i++) - { - v[i] = buf[i]; - } - sfree(buf); - } -} - -static void exchange_rvecs(const gmx_multisim_t gmx_unused* ms, int gmx_unused b, rvec* v, int n) -{ - rvec* buf; - int i; - - if (v) - { - snew(buf, n); -#if GMX_MPI - /* - MPI_Sendrecv(v[0], n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, - buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, - ms->mastersComm_,MPI_STATUS_IGNORE); - */ - { - MPI_Request mpi_req; - - MPI_Isend(v[0], n * sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, ms->mastersComm_, &mpi_req); - MPI_Recv(buf[0], n * sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, ms->mastersComm_, - MPI_STATUS_IGNORE); - MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); - } -#endif - for (i = 0; i < n; i++) - { - copy_rvec(buf[i], v[i]); - } - sfree(buf); - } -} - -static void exchange_state(const gmx_multisim_t* ms, int b, t_state* state) -{ - /* When t_state changes, this code should be updated. */ - int ngtc, nnhpres; - ngtc = state->ngtc * state->nhchainlength; - nnhpres = state->nnhpres * state->nhchainlength; - exchange_rvecs(ms, b, state->box, DIM); - exchange_rvecs(ms, b, state->box_rel, DIM); - exchange_rvecs(ms, b, state->boxv, DIM); - exchange_reals(ms, b, &(state->veta), 1); - exchange_reals(ms, b, &(state->vol0), 1); - exchange_rvecs(ms, b, state->svir_prev, DIM); - exchange_rvecs(ms, b, state->fvir_prev, DIM); - exchange_rvecs(ms, b, state->pres_prev, DIM); - exchange_doubles(ms, b, state->nosehoover_xi.data(), ngtc); - exchange_doubles(ms, b, state->nosehoover_vxi.data(), ngtc); - exchange_doubles(ms, b, state->nhpres_xi.data(), nnhpres); - exchange_doubles(ms, b, state->nhpres_vxi.data(), nnhpres); - exchange_doubles(ms, b, state->therm_integral.data(), state->ngtc); - exchange_doubles(ms, b, &state->baros_integral, 1); - exchange_rvecs(ms, b, state->x.rvec_array(), state->natoms); - exchange_rvecs(ms, b, state->v.rvec_array(), state->natoms); -} - -static void copy_state_serial(const t_state* src, t_state* dest) -{ - if (dest != src) - { - /* Currently the local state is always a pointer to the global - * in serial, so we should never end up here. - * TODO: Implement a (trivial) t_state copy once converted to C++. - */ - GMX_RELEASE_ASSERT(false, "State copying is currently not implemented in replica exchange"); - } -} - -static void scale_velocities(gmx::ArrayRef velocities, real fac) -{ - for (auto& v : velocities) - { - v *= fac; - } -} - -static void print_transition_matrix(FILE* fplog, int n, int** nmoves, const int* nattempt) -{ - int i, j, ntot; - float Tprint; - - ntot = nattempt[0] + nattempt[1]; - fprintf(fplog, "\n"); - fprintf(fplog, "Repl"); - for (i = 0; i < n; i++) - { - fprintf(fplog, " "); /* put the title closer to the center */ - } - fprintf(fplog, "Empirical Transition Matrix\n"); - - fprintf(fplog, "Repl"); - for (i = 0; i < n; i++) - { - fprintf(fplog, "%8d", (i + 1)); - } - fprintf(fplog, "\n"); - - for (i = 0; i < n; i++) - { - fprintf(fplog, "Repl"); - for (j = 0; j < n; j++) - { - Tprint = 0.0; - if (nmoves[i][j] > 0) - { - Tprint = nmoves[i][j] / (2.0 * ntot); - } - fprintf(fplog, "%8.4f", Tprint); - } - fprintf(fplog, "%3d\n", i); - } -} - -static void print_ind(FILE* fplog, const char* leg, int n, int* ind, const gmx_bool* bEx) -{ - int i; - - fprintf(fplog, "Repl %2s %2d", leg, ind[0]); - for (i = 1; i < n; i++) - { - fprintf(fplog, " %c %2d", (bEx != nullptr && bEx[i]) ? 'x' : ' ', ind[i]); - } - fprintf(fplog, "\n"); -} - -static void print_allswitchind(FILE* fplog, int n, int* pind, int* allswaps, int* tmpswap) -{ - int i; - - for (i = 0; i < n; i++) - { - tmpswap[i] = allswaps[i]; - } - for (i = 0; i < n; i++) - { - allswaps[i] = tmpswap[pind[i]]; - } - - fprintf(fplog, "\nAccepted Exchanges: "); - for (i = 0; i < n; i++) - { - fprintf(fplog, "%d ", pind[i]); - } - fprintf(fplog, "\n"); - - /* the "Order After Exchange" is the state label corresponding to the configuration that - started in state listed in order, i.e. - - 3 0 1 2 - - means that the: - configuration starting in simulation 3 is now in simulation 0, - configuration starting in simulation 0 is now in simulation 1, - configuration starting in simulation 1 is now in simulation 2, - configuration starting in simulation 2 is now in simulation 3 - */ - fprintf(fplog, "Order After Exchange: "); - for (i = 0; i < n; i++) - { - fprintf(fplog, "%d ", allswaps[i]); - } - fprintf(fplog, "\n\n"); -} - -static void print_prob(FILE* fplog, const char* leg, int n, real* prob) -{ - int i; - char buf[8]; - - fprintf(fplog, "Repl %2s ", leg); - for (i = 1; i < n; i++) - { - if (prob[i] >= 0) - { - sprintf(buf, "%4.2f", prob[i]); - fprintf(fplog, " %3s", buf[0] == '1' ? "1.0" : buf + 1); - } - else - { - fprintf(fplog, " "); - } - } - fprintf(fplog, "\n"); -} - -static void print_count(FILE* fplog, const char* leg, int n, int* count) -{ - int i; - - fprintf(fplog, "Repl %2s ", leg); - for (i = 1; i < n; i++) - { - fprintf(fplog, " %4d", count[i]); - } - fprintf(fplog, "\n"); -} - -static real calc_delta(FILE* fplog, gmx_bool bPrint, struct gmx_repl_ex* re, int a, int b, int ap, int bp) -{ - - real ediff, dpV, delta = 0; - real* Epot = re->Epot; - real* Vol = re->Vol; - real** de = re->de; - real* beta = re->beta; - - /* Two cases; we are permuted and not. In all cases, setting ap = a and bp = b will reduce - to the non permuted case */ - - switch (re->type) - { - case ereTEMP: - /* - * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439 - */ - ediff = Epot[b] - Epot[a]; - delta = -(beta[bp] - beta[ap]) * ediff; - break; - case ereLAMBDA: - /* two cases: when we are permuted, and not. */ - /* non-permuted: - ediff = E_new - E_old - = [H_b(x_a) + H_a(x_b)] - [H_b(x_b) + H_a(x_a)] - = [H_b(x_a) - H_a(x_a)] + [H_a(x_b) - H_b(x_b)] - = de[b][a] + de[a][b] */ - - /* permuted: - ediff = E_new - E_old - = [H_bp(x_a) + H_ap(x_b)] - [H_bp(x_b) + H_ap(x_a)] - = [H_bp(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_bp(x_b)] - = [H_bp(x_a) - H_a(x_a) + H_a(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_b(x_b) + H_b(x_b) - H_bp(x_b)] - = [H_bp(x_a) - H_a(x_a)] - [H_ap(x_a) - H_a(x_a)] + [H_ap(x_b) - H_b(x_b)] - H_bp(x_b) - H_b(x_b)] - = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]) */ - /* but, in the current code implementation, we flip configurations, not indices . . . - So let's examine that. - = [H_b(x_ap) - H_a(x_a)] - [H_a(x_ap) - H_a(x_a)] + [H_a(x_bp) - H_b(x_b)] - H_b(x_bp) - H_b(x_b)] - = [H_b(x_ap) - H_a(x_ap)] + [H_a(x_bp) - H_b(x_pb)] - = (de[b][ap] - de[a][ap]) + (de[a][bp] - de[b][bp] - So, if we exchange b<=> bp and a<=> ap, we return to the same result. - So the simple solution is to flip the - position of perturbed and original indices in the tests. - */ - - ediff = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]); - delta = ediff * beta[a]; /* assume all same temperature in this case */ - break; - case ereTL: - /* not permuted: */ - /* delta = reduced E_new - reduced E_old - = [beta_b H_b(x_a) + beta_a H_a(x_b)] - [beta_b H_b(x_b) + beta_a H_a(x_a)] - = [beta_b H_b(x_a) - beta_a H_a(x_a)] + [beta_a H_a(x_b) - beta_b H_b(x_b)] - = [beta_b dH_b(x_a) + beta_b H_a(x_a) - beta_a H_a(x_a)] + - [beta_a dH_a(x_b) + beta_a H_b(x_b) - beta_b H_b(x_b)] - = [beta_b dH_b(x_a) + [beta_a dH_a(x_b) + - beta_b (H_a(x_a) - H_b(x_b)]) - beta_a (H_a(x_a) - H_b(x_b)) - = beta_b dH_b(x_a) + beta_a dH_a(x_b) - (beta_b - beta_a)(H_b(x_b) - H_a(x_a) */ - /* delta = beta[b]*de[b][a] + beta[a]*de[a][b] - (beta[b] - beta[a])*(Epot[b] - Epot[a]; */ - /* permuted (big breath!) */ - /* delta = reduced E_new - reduced E_old - = [beta_bp H_bp(x_a) + beta_ap H_ap(x_b)] - [beta_bp H_bp(x_b) + beta_ap H_ap(x_a)] - = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] - = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] - - beta_pb H_a(x_a) + beta_ap H_a(x_a) + beta_pb H_a(x_a) - beta_ap H_a(x_a) - - beta_ap H_b(x_b) + beta_bp H_b(x_b) + beta_ap H_b(x_b) - beta_bp H_b(x_b) - = [(beta_bp H_bp(x_a) - beta_bp H_a(x_a)) - (beta_ap H_ap(x_a) - beta_ap H_a(x_a))] + - [(beta_ap H_ap(x_b) - beta_ap H_b(x_b)) - (beta_bp H_bp(x_b) - beta_bp H_b(x_b))] - + beta_pb H_a(x_a) - beta_ap H_a(x_a) + beta_ap H_b(x_b) - beta_bp H_b(x_b) - = [beta_bp (H_bp(x_a) - H_a(x_a)) - beta_ap (H_ap(x_a) - H_a(x_a))] + - [beta_ap (H_ap(x_b) - H_b(x_b)) - beta_bp (H_bp(x_b) - H_b(x_b))] - + beta_pb (H_a(x_a) - H_b(x_b)) - beta_ap (H_a(x_a) - H_b(x_b)) - = ([beta_bp de[bp][a] - beta_ap de[ap][a]) + beta_ap de[ap][b] - beta_bp de[bp][b]) - + (beta_pb-beta_ap)(H_a(x_a) - H_b(x_b)) */ - delta = beta[bp] * (de[bp][a] - de[bp][b]) + beta[ap] * (de[ap][b] - de[ap][a]) - - (beta[bp] - beta[ap]) * (Epot[b] - Epot[a]); - break; - default: gmx_incons("Unknown replica exchange quantity"); - } - if (bPrint) - { - fprintf(fplog, "Repl %d <-> %d dE_term = %10.3e (kT)\n", a, b, delta); - } - if (re->bNPT) - { - /* revist the calculation for 5.0. Might be some improvements. */ - dpV = (beta[ap] * re->pres[ap] - beta[bp] * re->pres[bp]) * (Vol[b] - Vol[a]) / PRESFAC; - if (bPrint) - { - fprintf(fplog, " dpV = %10.3e d = %10.3e\n", dpV, delta + dpV); - } - delta += dpV; - } - return delta; -} - -static void test_for_replica_exchange(FILE* fplog, - const gmx_multisim_t* ms, - struct gmx_repl_ex* re, - const gmx_enerdata_t* enerd, - real vol, - int64_t step, - real time) -{ - int m, i, j, a, b, ap, bp, i0, i1, tmp; - real delta = 0; - gmx_bool bPrint, bMultiEx; - gmx_bool* bEx = re->bEx; - real* prob = re->prob; - int* pind = re->destinations; /* permuted index */ - gmx_bool bEpot = FALSE; - gmx_bool bDLambda = FALSE; - gmx_bool bVol = FALSE; - gmx::ThreeFry2x64<64> rng(re->seed, gmx::RandomDomain::ReplicaExchange); - gmx::UniformRealDistribution uniformRealDist; - gmx::UniformIntDistribution uniformNreplDist(0, re->nrepl - 1); - - bMultiEx = (re->nex > 1); /* multiple exchanges at each state */ - fprintf(fplog, "Replica exchange at step %" PRId64 " time %.5f\n", step, time); - - if (re->bNPT) - { - for (i = 0; i < re->nrepl; i++) - { - re->Vol[i] = 0; - } - bVol = TRUE; - re->Vol[re->repl] = vol; - } - if ((re->type == ereTEMP || re->type == ereTL)) - { - for (i = 0; i < re->nrepl; i++) - { - re->Epot[i] = 0; - } - bEpot = TRUE; - re->Epot[re->repl] = enerd->term[F_EPOT]; - /* temperatures of different states*/ - for (i = 0; i < re->nrepl; i++) - { - re->beta[i] = 1.0 / (re->q[ereTEMP][i] * BOLTZ); - } - } - else - { - for (i = 0; i < re->nrepl; i++) - { - re->beta[i] = 1.0 / (re->temp * BOLTZ); /* we have a single temperature */ - } - } - if (re->type == ereLAMBDA || re->type == ereTL) - { - bDLambda = TRUE; - /* lambda differences. */ - /* de[i][j] is the energy of the jth simulation in the ith Hamiltonian - minus the energy of the jth simulation in the jth Hamiltonian */ - for (i = 0; i < re->nrepl; i++) - { - for (j = 0; j < re->nrepl; j++) - { - re->de[i][j] = 0; - } - } - for (i = 0; i < re->nrepl; i++) - { - re->de[i][re->repl] = enerd->foreignLambdaTerms.deltaH(re->q[ereLAMBDA][i]); - } - } - - /* now actually do the communication */ - if (bVol) - { - gmx_sum_sim(re->nrepl, re->Vol, ms); - } - if (bEpot) - { - gmx_sum_sim(re->nrepl, re->Epot, ms); - } - if (bDLambda) - { - for (i = 0; i < re->nrepl; i++) - { - gmx_sum_sim(re->nrepl, re->de[i], ms); - } - } - - /* make a duplicate set of indices for shuffling */ - for (i = 0; i < re->nrepl; i++) - { - pind[i] = re->ind[i]; - } - - rng.restart(step, 0); - - if (bMultiEx) - { - /* multiple random switch exchange */ - int nself = 0; - - - for (i = 0; i < re->nex + nself; i++) - { - // For now this is superfluous, but just in case we ever add more - // calls in different branches it is safer to always reset the distribution. - uniformNreplDist.reset(); - - /* randomly select a pair */ - /* in theory, could reduce this by identifying only which switches had a nonneglibible - probability of occurring (log p > -100) and only operate on those switches */ - /* find out which state it is from, and what label that state currently has. Likely - more work that useful. */ - i0 = uniformNreplDist(rng); - i1 = uniformNreplDist(rng); - if (i0 == i1) - { - nself++; - continue; /* self-exchange, back up and do it again */ - } - - a = re->ind[i0]; /* what are the indices of these states? */ - b = re->ind[i1]; - ap = pind[i0]; - bp = pind[i1]; - - bPrint = FALSE; /* too noisy */ - /* calculate the energy difference */ - /* if the code changes to flip the STATES, rather than the configurations, - use the commented version of the code */ - /* delta = calc_delta(fplog,bPrint,re,a,b,ap,bp); */ - delta = calc_delta(fplog, bPrint, re, ap, bp, a, b); - - /* we actually only use the first space in the prob and bEx array, - since there are actually many switches between pairs. */ - - if (delta <= 0) - { - /* accepted */ - prob[0] = 1; - bEx[0] = TRUE; - } - else - { - if (delta > c_probabilityCutoff) - { - prob[0] = 0; - } - else - { - prob[0] = exp(-delta); - } - // roll a number to determine if accepted. For now it is superfluous to - // reset, but just in case we ever add more calls in different branches - // it is safer to always reset the distribution. - uniformRealDist.reset(); - bEx[0] = uniformRealDist(rng) < prob[0]; - } - re->prob_sum[0] += prob[0]; - - if (bEx[0]) - { - /* swap the states */ - tmp = pind[i0]; - pind[i0] = pind[i1]; - pind[i1] = tmp; - } - } - re->nattempt[0]++; /* keep track of total permutation trials here */ - print_allswitchind(fplog, re->nrepl, pind, re->allswaps, re->tmpswap); - } - else - { - /* standard nearest neighbor replica exchange */ - - m = (step / re->nst) % 2; - for (i = 1; i < re->nrepl; i++) - { - a = re->ind[i - 1]; - b = re->ind[i]; - - bPrint = (re->repl == a || re->repl == b); - if (i % 2 == m) - { - delta = calc_delta(fplog, bPrint, re, a, b, a, b); - if (delta <= 0) - { - /* accepted */ - prob[i] = 1; - bEx[i] = TRUE; - } - else - { - if (delta > c_probabilityCutoff) - { - prob[i] = 0; - } - else - { - prob[i] = exp(-delta); - } - // roll a number to determine if accepted. For now it is superfluous to - // reset, but just in case we ever add more calls in different branches - // it is safer to always reset the distribution. - uniformRealDist.reset(); - bEx[i] = uniformRealDist(rng) < prob[i]; - } - re->prob_sum[i] += prob[i]; - - if (bEx[i]) - { - /* swap these two */ - tmp = pind[i - 1]; - pind[i - 1] = pind[i]; - pind[i] = tmp; - re->nexchange[i]++; /* statistics for back compatibility */ - } - } - else - { - prob[i] = -1; - bEx[i] = FALSE; - } - } - /* print some statistics */ - print_ind(fplog, "ex", re->nrepl, re->ind, bEx); - print_prob(fplog, "pr", re->nrepl, prob); - fprintf(fplog, "\n"); - re->nattempt[m]++; - } - - /* record which moves were made and accepted */ - for (i = 0; i < re->nrepl; i++) - { - re->nmoves[re->ind[i]][pind[i]] += 1; - re->nmoves[pind[i]][re->ind[i]] += 1; - } - fflush(fplog); /* make sure we can see what the last exchange was */ -} - -static void cyclic_decomposition(const int* destinations, int** cyclic, gmx_bool* incycle, const int nrepl, int* nswap) -{ - - int i, j, c, p; - int maxlen = 1; - for (i = 0; i < nrepl; i++) - { - incycle[i] = FALSE; - } - for (i = 0; i < nrepl; i++) /* one cycle for each replica */ - { - if (incycle[i]) - { - cyclic[i][0] = -1; - continue; - } - cyclic[i][0] = i; - incycle[i] = TRUE; - c = 1; - p = i; - for (j = 0; j < nrepl; j++) /* potentially all cycles are part, but we will break first */ - { - p = destinations[p]; /* start permuting */ - if (p == i) - { - cyclic[i][c] = -1; - if (c > maxlen) - { - maxlen = c; - } - break; /* we've reached the original element, the cycle is complete, and we marked the end. */ - } - else - { - cyclic[i][c] = p; /* each permutation gives a new member of the cycle */ - incycle[p] = TRUE; - c++; - } - } - } - *nswap = maxlen - 1; - - if (debug) - { - for (i = 0; i < nrepl; i++) - { - fprintf(debug, "Cycle %d:", i); - for (j = 0; j < nrepl; j++) - { - if (cyclic[i][j] < 0) - { - break; - } - fprintf(debug, "%2d", cyclic[i][j]); - } - fprintf(debug, "\n"); - } - fflush(debug); - } -} - -static void compute_exchange_order(int** cyclic, int** order, const int nrepl, const int maxswap) -{ - int i, j; - - for (j = 0; j < maxswap; j++) - { - for (i = 0; i < nrepl; i++) - { - if (cyclic[i][j + 1] >= 0) - { - order[cyclic[i][j + 1]][j] = cyclic[i][j]; - order[cyclic[i][j]][j] = cyclic[i][j + 1]; - } - } - for (i = 0; i < nrepl; i++) - { - if (order[i][j] < 0) - { - order[i][j] = i; /* if it's not exchanging, it should stay this round*/ - } - } - } - - if (debug) - { - fprintf(debug, "Replica Exchange Order\n"); - for (i = 0; i < nrepl; i++) - { - fprintf(debug, "Replica %d:", i); - for (j = 0; j < maxswap; j++) - { - if (order[i][j] < 0) - { - break; - } - fprintf(debug, "%2d", order[i][j]); - } - fprintf(debug, "\n"); - } - fflush(debug); - } -} - -static void prepare_to_do_exchange(struct gmx_repl_ex* re, const int replica_id, int* maxswap, gmx_bool* bThisReplicaExchanged) -{ - int i, j; - /* Hold the cyclic decomposition of the (multiple) replica - * exchange. */ - gmx_bool bAnyReplicaExchanged = FALSE; - *bThisReplicaExchanged = FALSE; - - for (i = 0; i < re->nrepl; i++) - { - if (re->destinations[i] != re->ind[i]) - { - /* only mark as exchanged if the index has been shuffled */ - bAnyReplicaExchanged = TRUE; - break; - } - } - if (bAnyReplicaExchanged) - { - /* reinitialize the placeholder arrays */ - for (i = 0; i < re->nrepl; i++) - { - for (j = 0; j < re->nrepl; j++) - { - re->cyclic[i][j] = -1; - re->order[i][j] = -1; - } - } - - /* Identify the cyclic decomposition of the permutation (very - * fast if neighbor replica exchange). */ - cyclic_decomposition(re->destinations, re->cyclic, re->incycle, re->nrepl, maxswap); - - /* Now translate the decomposition into a replica exchange - * order at each step. */ - compute_exchange_order(re->cyclic, re->order, re->nrepl, *maxswap); - - /* Did this replica do any exchange at any point? */ - for (j = 0; j < *maxswap; j++) - { - if (replica_id != re->order[replica_id][j]) - { - *bThisReplicaExchanged = TRUE; - break; - } - } - } -} - -gmx_bool replica_exchange(FILE* fplog, - const t_commrec* cr, - const gmx_multisim_t* ms, - struct gmx_repl_ex* re, - t_state* state, - const gmx_enerdata_t* enerd, - t_state* state_local, - int64_t step, - real time) -{ - int j; - int replica_id = 0; - int exchange_partner; - int maxswap = 0; - /* Number of rounds of exchanges needed to deal with any multiple - * exchanges. */ - /* Where each replica ends up after the exchange attempt(s). */ - /* The order in which multiple exchanges will occur. */ - gmx_bool bThisReplicaExchanged = FALSE; - - if (MASTER(cr)) - { - replica_id = re->repl; - test_for_replica_exchange(fplog, ms, re, enerd, det(state_local->box), step, time); - prepare_to_do_exchange(re, replica_id, &maxswap, &bThisReplicaExchanged); - } - /* Do intra-simulation broadcast so all processors belonging to - * each simulation know whether they need to participate in - * collecting the state. Otherwise, they might as well get on with - * the next thing to do. */ - if (DOMAINDECOMP(cr)) - { -#if GMX_MPI - MPI_Bcast(&bThisReplicaExchanged, sizeof(gmx_bool), MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mygroup); -#endif - } - - if (bThisReplicaExchanged) - { - /* Exchange the states */ - /* Collect the global state on the master node */ - if (DOMAINDECOMP(cr)) - { - dd_collect_state(cr->dd, state_local, state); - } - else - { - copy_state_serial(state_local, state); - } - - if (MASTER(cr)) - { - /* There will be only one swap cycle with standard replica - * exchange, but there may be multiple swap cycles if we - * allow multiple swaps. */ - - for (j = 0; j < maxswap; j++) - { - exchange_partner = re->order[replica_id][j]; - - if (exchange_partner != replica_id) - { - /* Exchange the global states between the master nodes */ - if (debug) - { - fprintf(debug, "Exchanging %d with %d\n", replica_id, exchange_partner); - } - exchange_state(ms, exchange_partner, state); - } - } - /* For temperature-type replica exchange, we need to scale - * the velocities. */ - if (re->type == ereTEMP || re->type == ereTL) - { - scale_velocities(state->v, std::sqrt(re->q[ereTEMP][replica_id] - / re->q[ereTEMP][re->destinations[replica_id]])); - } - } - - /* With domain decomposition the global state is distributed later */ - if (!DOMAINDECOMP(cr)) - { - /* Copy the global state to the local state data structure */ - copy_state_serial(state, state_local); - } - } - - return bThisReplicaExchanged; -} - -void print_replica_exchange_statistics(FILE* fplog, struct gmx_repl_ex* re) -{ - int i; - - fprintf(fplog, "\nReplica exchange statistics\n"); - - if (re->nex == 0) - { - fprintf(fplog, "Repl %d attempts, %d odd, %d even\n", re->nattempt[0] + re->nattempt[1], - re->nattempt[1], re->nattempt[0]); - - fprintf(fplog, "Repl average probabilities:\n"); - for (i = 1; i < re->nrepl; i++) - { - if (re->nattempt[i % 2] == 0) - { - re->prob[i] = 0; - } - else - { - re->prob[i] = re->prob_sum[i] / re->nattempt[i % 2]; - } - } - print_ind(fplog, "", re->nrepl, re->ind, nullptr); - print_prob(fplog, "", re->nrepl, re->prob); - - fprintf(fplog, "Repl number of exchanges:\n"); - print_ind(fplog, "", re->nrepl, re->ind, nullptr); - print_count(fplog, "", re->nrepl, re->nexchange); - - fprintf(fplog, "Repl average number of exchanges:\n"); - for (i = 1; i < re->nrepl; i++) - { - if (re->nattempt[i % 2] == 0) - { - re->prob[i] = 0; - } - else - { - re->prob[i] = (static_cast(re->nexchange[i])) / re->nattempt[i % 2]; - } - } - print_ind(fplog, "", re->nrepl, re->ind, nullptr); - print_prob(fplog, "", re->nrepl, re->prob); - - fprintf(fplog, "\n"); - } - /* print the transition matrix */ - print_transition_matrix(fplog, re->nrepl, re->nmoves, re->nattempt); -} - -//! \endcond diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/replicaexchange.h b/patches/gromacs-2021.7.diff/src/gromacs/mdrun/replicaexchange.h deleted file mode 100644 index 108632d94d..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/replicaexchange.h +++ /dev/null @@ -1,118 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2011,2012,2013,2014,2015 by the GROMACS development team. - * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/*! \libinternal \file - * - * \brief Declares the routines for replica exchange. - * - * \author David van der Spoel - * \author Mark Abraham - * - * \ingroup module_mdrun - */ -#ifndef GMX_MDRUN_REPLICAEXCHANGE_H -#define GMX_MDRUN_REPLICAEXCHANGE_H - -#include - -#include "gromacs/utility/basedefinitions.h" -#include "gromacs/utility/real.h" - -struct gmx_enerdata_t; -struct gmx_multisim_t; -struct t_commrec; -struct t_inputrec; -class t_state; - -/*! \libinternal - * \brief The parameters for the replica exchange algorithm. */ -struct ReplicaExchangeParameters -{ - //! Interval in steps at which to attempt exchanges, 0 means no replica exchange. - int exchangeInterval = 0; - //! The number of exchanges to attempt at an exchange step. - int numExchanges = 0; - //! The random seed, -1 means generate a seed. - int randomSeed = -1; -}; - -//! Abstract type for replica exchange -typedef struct gmx_repl_ex* gmx_repl_ex_t; - -/*! \brief Setup function. - * - * Should only be called on the master ranks */ -gmx_repl_ex_t init_replica_exchange(FILE* fplog, - const gmx_multisim_t* ms, - int numAtomsInSystem, - const t_inputrec* ir, - const ReplicaExchangeParameters& replExParams); - -/*! \brief Attempts replica exchange. - * - * Should be called on all ranks. When running each replica in - * parallel, this routine collects the state on the master rank before - * exchange. With domain decomposition, the global state after - * exchange is stored in state and still needs to be redistributed - * over the ranks. - * - * \returns TRUE if the state has been exchanged. - */ -gmx_bool replica_exchange(FILE* fplog, - const t_commrec* cr, - const gmx_multisim_t* ms, - gmx_repl_ex_t re, - t_state* state, - const gmx_enerdata_t* enerd, - t_state* state_local, - int64_t step, - real time); - -/*! \brief Prints replica exchange statistics to the log file. - * - * Should only be called on the master ranks */ -void print_replica_exchange_statistics(FILE* fplog, gmx_repl_ex_t re); - -/* PLUMED HREX */ -extern int replica_exchange_get_repl(const gmx_repl_ex_t re); -extern int replica_exchange_get_nrepl(const gmx_repl_ex_t re); -extern void pd_collect_state(const t_commrec *cr, t_state *state); -extern void exchange_state(const gmx_multisim_t *ms, int b, t_state *state); -extern void copy_state_serial(const t_state *src, t_state *dest); -/* END PLUMED HREX */ - -#endif diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/replicaexchange.h.preplumed b/patches/gromacs-2021.7.diff/src/gromacs/mdrun/replicaexchange.h.preplumed deleted file mode 100644 index e8cb9bdce3..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/replicaexchange.h.preplumed +++ /dev/null @@ -1,110 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2011,2012,2013,2014,2015 by the GROMACS development team. - * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/*! \libinternal \file - * - * \brief Declares the routines for replica exchange. - * - * \author David van der Spoel - * \author Mark Abraham - * - * \ingroup module_mdrun - */ -#ifndef GMX_MDRUN_REPLICAEXCHANGE_H -#define GMX_MDRUN_REPLICAEXCHANGE_H - -#include - -#include "gromacs/utility/basedefinitions.h" -#include "gromacs/utility/real.h" - -struct gmx_enerdata_t; -struct gmx_multisim_t; -struct t_commrec; -struct t_inputrec; -class t_state; - -/*! \libinternal - * \brief The parameters for the replica exchange algorithm. */ -struct ReplicaExchangeParameters -{ - //! Interval in steps at which to attempt exchanges, 0 means no replica exchange. - int exchangeInterval = 0; - //! The number of exchanges to attempt at an exchange step. - int numExchanges = 0; - //! The random seed, -1 means generate a seed. - int randomSeed = -1; -}; - -//! Abstract type for replica exchange -typedef struct gmx_repl_ex* gmx_repl_ex_t; - -/*! \brief Setup function. - * - * Should only be called on the master ranks */ -gmx_repl_ex_t init_replica_exchange(FILE* fplog, - const gmx_multisim_t* ms, - int numAtomsInSystem, - const t_inputrec* ir, - const ReplicaExchangeParameters& replExParams); - -/*! \brief Attempts replica exchange. - * - * Should be called on all ranks. When running each replica in - * parallel, this routine collects the state on the master rank before - * exchange. With domain decomposition, the global state after - * exchange is stored in state and still needs to be redistributed - * over the ranks. - * - * \returns TRUE if the state has been exchanged. - */ -gmx_bool replica_exchange(FILE* fplog, - const t_commrec* cr, - const gmx_multisim_t* ms, - gmx_repl_ex_t re, - t_state* state, - const gmx_enerdata_t* enerd, - t_state* state_local, - int64_t step, - real time); - -/*! \brief Prints replica exchange statistics to the log file. - * - * Should only be called on the master ranks */ -void print_replica_exchange_statistics(FILE* fplog, gmx_repl_ex_t re); - -#endif diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/rerun.cpp b/patches/gromacs-2021.7.diff/src/gromacs/mdrun/rerun.cpp deleted file mode 100644 index e1de8813ef..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/rerun.cpp +++ /dev/null @@ -1,805 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/*! \internal \file - * - * \brief Implements the loop for simulation reruns - * - * \author Pascal Merz - * \ingroup module_mdrun - */ -#include "gmxpre.h" - -#include -#include -#include -#include - -#include -#include - -#include "gromacs/applied_forces/awh/awh.h" -#include "gromacs/commandline/filenm.h" -#include "gromacs/domdec/collect.h" -#include "gromacs/domdec/dlbtiming.h" -#include "gromacs/domdec/domdec.h" -#include "gromacs/domdec/domdec_network.h" -#include "gromacs/domdec/domdec_struct.h" -#include "gromacs/domdec/mdsetup.h" -#include "gromacs/domdec/partition.h" -#include "gromacs/essentialdynamics/edsam.h" -#include "gromacs/ewald/pme_load_balancing.h" -#include "gromacs/ewald/pme_pp.h" -#include "gromacs/fileio/trxio.h" -#include "gromacs/gmxlib/network.h" -#include "gromacs/gmxlib/nrnb.h" -#include "gromacs/gpu_utils/gpu_utils.h" -#include "gromacs/math/units.h" -#include "gromacs/listed_forces/listed_forces.h" -#include "gromacs/math/functions.h" -#include "gromacs/math/utilities.h" -#include "gromacs/math/vec.h" -#include "gromacs/math/vectypes.h" -#include "gromacs/mdlib/checkpointhandler.h" -#include "gromacs/mdlib/compute_io.h" -#include "gromacs/mdlib/constr.h" -#include "gromacs/mdlib/ebin.h" -#include "gromacs/mdlib/enerdata_utils.h" -#include "gromacs/mdlib/energyoutput.h" -#include "gromacs/mdlib/expanded.h" -#include "gromacs/mdlib/force.h" -#include "gromacs/mdlib/force_flags.h" -#include "gromacs/mdlib/forcerec.h" -#include "gromacs/mdlib/freeenergyparameters.h" -#include "gromacs/mdlib/md_support.h" -#include "gromacs/mdlib/mdatoms.h" -#include "gromacs/mdlib/mdoutf.h" -#include "gromacs/mdlib/membed.h" -#include "gromacs/mdlib/resethandler.h" -#include "gromacs/mdlib/sighandler.h" -#include "gromacs/mdlib/simulationsignal.h" -#include "gromacs/mdlib/stat.h" -#include "gromacs/mdlib/stophandler.h" -#include "gromacs/mdlib/tgroup.h" -#include "gromacs/mdlib/trajectory_writing.h" -#include "gromacs/mdlib/update.h" -#include "gromacs/mdlib/vcm.h" -#include "gromacs/mdlib/vsite.h" -#include "gromacs/mdrunutility/handlerestart.h" -#include "gromacs/mdrunutility/multisim.h" -#include "gromacs/mdrunutility/printtime.h" -#include "gromacs/mdtypes/awh_history.h" -#include "gromacs/mdtypes/awh_params.h" -#include "gromacs/mdtypes/commrec.h" -#include "gromacs/mdtypes/df_history.h" -#include "gromacs/mdtypes/energyhistory.h" -#include "gromacs/mdtypes/forcebuffers.h" -#include "gromacs/mdtypes/forcerec.h" -#include "gromacs/mdtypes/group.h" -#include "gromacs/mdtypes/inputrec.h" -#include "gromacs/mdtypes/interaction_const.h" -#include "gromacs/mdtypes/md_enums.h" -#include "gromacs/mdtypes/mdatom.h" -#include "gromacs/mdtypes/mdrunoptions.h" -#include "gromacs/mdtypes/observableshistory.h" -#include "gromacs/mdtypes/simulation_workload.h" -#include "gromacs/mdtypes/state.h" -#include "gromacs/mimic/utilities.h" -#include "gromacs/pbcutil/pbc.h" -#include "gromacs/pulling/output.h" -#include "gromacs/pulling/pull.h" -#include "gromacs/swap/swapcoords.h" -#include "gromacs/timing/wallcycle.h" -#include "gromacs/timing/walltime_accounting.h" -#include "gromacs/topology/atoms.h" -#include "gromacs/topology/idef.h" -#include "gromacs/topology/mtop_util.h" -#include "gromacs/topology/topology.h" -#include "gromacs/trajectory/trajectoryframe.h" -#include "gromacs/utility/basedefinitions.h" -#include "gromacs/utility/cstringutil.h" -#include "gromacs/utility/fatalerror.h" -#include "gromacs/utility/logger.h" -#include "gromacs/utility/real.h" - -#include "legacysimulator.h" -#include "replicaexchange.h" -#include "shellfc.h" - -/* PLUMED */ -#include "../../../Plumed.h" -extern int plumedswitch; -extern plumed plumedmain; -/* END PLUMED */ - -using gmx::SimulationSignaller; -using gmx::VirtualSitesHandler; - -/*! \brief Copy the state from \p rerunFrame to \p globalState and, if requested, construct vsites - * - * \param[in] rerunFrame The trajectory frame to compute energy/forces for - * \param[in,out] globalState The global state container - * \param[in] constructVsites When true, vsite coordinates are constructed - * \param[in] vsite Vsite setup, can be nullptr when \p constructVsites = false - * \param[in] timeStep Time step, used for constructing vsites - */ -static void prepareRerunState(const t_trxframe& rerunFrame, - t_state* globalState, - bool constructVsites, - const VirtualSitesHandler* vsite, - double timeStep) -{ - auto x = makeArrayRef(globalState->x); - auto rerunX = arrayRefFromArray(reinterpret_cast(rerunFrame.x), globalState->natoms); - std::copy(rerunX.begin(), rerunX.end(), x.begin()); - copy_mat(rerunFrame.box, globalState->box); - - if (constructVsites) - { - GMX_ASSERT(vsite, "Need valid vsite for constructing vsites"); - - vsite->construct(globalState->x, timeStep, globalState->v, globalState->box); - } -} - -void gmx::LegacySimulator::do_rerun() -{ - // TODO Historically, the EM and MD "integrators" used different - // names for the t_inputrec *parameter, but these must have the - // same name, now that it's a member of a struct. We use this ir - // alias to avoid a large ripple of nearly useless changes. - // t_inputrec is being replaced by IMdpOptionsProvider, so this - // will go away eventually. - t_inputrec* ir = inputrec; - int64_t step, step_rel; - double t; - bool isLastStep = false; - bool doFreeEnergyPerturbation = false; - unsigned int force_flags; - tensor force_vir, shake_vir, total_vir, pres; - t_trxstatus* status = nullptr; - rvec mu_tot; - t_trxframe rerun_fr; - gmx_localtop_t top(top_global->ffparams); - ForceBuffers f; - gmx_global_stat_t gstat; - gmx_shellfc_t* shellfc; - - double cycles; - - /* PLUMED */ - int plumedNeedsEnergy=0; - int plumedWantsToStop=0; - matrix plumed_vir; - real lambdaForce=0; - real realFepState=0; - /* END PLUMED */ - - /* Domain decomposition could incorrectly miss a bonded - interaction, but checking for that requires a global - communication stage, which does not otherwise happen in DD - code. So we do that alongside the first global energy reduction - after a new DD is made. These variables handle whether the - check happens, and the result it returns. */ - bool shouldCheckNumberOfBondedInteractions = false; - int totalNumberOfBondedInteractions = -1; - - SimulationSignals signals; - // Most global communnication stages don't propagate mdrun - // signals, and will use this object to achieve that. - SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false); - - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "Note that it is planned that the command gmx mdrun -rerun will " - "be available in a different form in a future version of GROMACS, " - "e.g. gmx rerun -f."); - - if (ir->efep != efepNO - && (mdAtoms->mdatoms()->nMassPerturbed > 0 || (constr && constr->havePerturbedConstraints()))) - { - gmx_fatal(FARGS, - "Perturbed masses or constraints are not supported by rerun. " - "Either make a .tpr without mass and constraint perturbation, " - "or use GROMACS 2018.4, 2018.5 or later 2018 version."); - } - if (ir->bExpanded) - { - gmx_fatal(FARGS, "Expanded ensemble not supported by rerun."); - } - if (ir->bSimTemp) - { - gmx_fatal(FARGS, "Simulated tempering not supported by rerun."); - } - if (ir->bDoAwh) - { - gmx_fatal(FARGS, "AWH not supported by rerun."); - } - if (replExParams.exchangeInterval > 0) - { - gmx_fatal(FARGS, "Replica exchange not supported by rerun."); - } - if (opt2bSet("-ei", nfile, fnm) || observablesHistory->edsamHistory != nullptr) - { - gmx_fatal(FARGS, "Essential dynamics not supported by rerun."); - } - if (ir->bIMD) - { - gmx_fatal(FARGS, "Interactive MD not supported by rerun."); - } - if (isMultiSim(ms)) - { - gmx_fatal(FARGS, "Multiple simulations not supported by rerun."); - } - if (std::any_of(ir->opts.annealing, ir->opts.annealing + ir->opts.ngtc, - [](int i) { return i != eannNO; })) - { - gmx_fatal(FARGS, "Simulated annealing not supported by rerun."); - } - - /* Rerun can't work if an output file name is the same as the input file name. - * If this is the case, the user will get an error telling them what the issue is. - */ - if (strcmp(opt2fn("-rerun", nfile, fnm), opt2fn("-o", nfile, fnm)) == 0 - || strcmp(opt2fn("-rerun", nfile, fnm), opt2fn("-x", nfile, fnm)) == 0) - { - gmx_fatal(FARGS, - "When using mdrun -rerun, the name of the input trajectory file " - "%s cannot be identical to the name of an output file (whether " - "given explicitly with -o or -x, or by default)", - opt2fn("-rerun", nfile, fnm)); - } - - /* Settings for rerun */ - ir->nstlist = 1; - ir->nstcalcenergy = 1; - int nstglobalcomm = 1; - const bool bNS = true; - - ir->nstxout_compressed = 0; - const SimulationGroups* groups = &top_global->groups; - if (ir->eI == eiMimic) - { - auto nonConstGlobalTopology = const_cast(top_global); - nonConstGlobalTopology->intermolecularExclusionGroup = genQmmmIndices(*top_global); - } - int* fep_state = MASTER(cr) ? &state_global->fep_state : nullptr; - gmx::ArrayRef lambda = MASTER(cr) ? state_global->lambda : gmx::ArrayRef(); - initialize_lambdas(fplog, *ir, MASTER(cr), fep_state, lambda); - const bool simulationsShareState = false; - gmx_mdoutf* outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, - mdModulesNotifier, ir, top_global, oenv, wcycle, - StartingBehavior::NewSimulation, simulationsShareState, ms); - gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work, - mdoutf_get_fp_dhdl(outf), true, StartingBehavior::NewSimulation, - simulationsShareState, mdModulesNotifier); - - gstat = global_stat_init(ir); - - /* Check for polarizable models and flexible constraints */ - shellfc = init_shell_flexcon(fplog, top_global, constr ? constr->numFlexibleConstraints() : 0, - ir->nstcalcenergy, DOMAINDECOMP(cr), - runScheduleWork->simulationWork.useGpuPme); - - { - double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1); - if ((io > 2000) && MASTER(cr)) - { - fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io); - } - } - - // Local state only becomes valid now. - std::unique_ptr stateInstance; - t_state* state; - - if (DOMAINDECOMP(cr)) - { - stateInstance = std::make_unique(); - state = stateInstance.get(); - dd_init_local_state(cr->dd, state_global, state); - - /* Distribute the charge groups over the nodes from the master node */ - dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, state_global, *top_global, ir, - imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr, - nrnb, nullptr, FALSE); - shouldCheckNumberOfBondedInteractions = true; - } - else - { - state_change_natoms(state_global, state_global->natoms); - /* Copy the pointer to the global state */ - state = state_global; - - mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr, &f, mdAtoms, constr, vsite, shellfc); - } - - auto mdatoms = mdAtoms->mdatoms(); - - // NOTE: The global state is no longer used at this point. - // But state_global is still used as temporary storage space for writing - // the global state to file and potentially for replica exchange. - // (Global topology should persist.) - - update_mdatoms(mdatoms, state->lambda[efptMASS]); - - if (ir->efep != efepNO && ir->fepvals->nstdhdl != 0) - { - doFreeEnergyPerturbation = true; - } - - { - int cglo_flags = - (CGLO_GSTAT - | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)); - bool bSumEkinhOld = false; - t_vcm* vcm = nullptr; - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, vcm, nullptr, enerd, - force_vir, shake_vir, total_vir, pres, constr, &nullSignaller, state->box, - &totalNumberOfBondedInteractions, &bSumEkinhOld, cglo_flags); - } - checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global, &top, - makeConstArrayRef(state->x), state->box, - &shouldCheckNumberOfBondedInteractions); - - if (MASTER(cr)) - { - fprintf(stderr, - "starting md rerun '%s', reading coordinates from" - " input trajectory '%s'\n\n", - *(top_global->name), opt2fn("-rerun", nfile, fnm)); - if (mdrunOptions.verbose) - { - fprintf(stderr, - "Calculated time to finish depends on nsteps from " - "run input file,\nwhich may not correspond to the time " - "needed to process input trajectory.\n\n"); - } - fprintf(fplog, "\n"); - } - - /* PLUMED */ - if(plumedswitch){ - /* detect plumed API version */ - int pversion=0; - plumed_cmd(plumedmain,"getApiVersion",&pversion); - /* setting kbT is only implemented with api>1) */ - real kbT=ir->opts.ref_t[0]*BOLTZ; - if(pversion>1) plumed_cmd(plumedmain,"setKbT",&kbT); - if(pversion>2){ - int res=1; - if( (startingBehavior != StartingBehavior::NewSimulation) ) plumed_cmd(plumedmain,"setRestart",&res); - } - - if(PAR(cr)){ - if(DOMAINDECOMP(cr)) { - plumed_cmd(plumedmain,"setMPIComm",&cr->dd->mpi_comm_all); - } - } - plumed_cmd(plumedmain,"setNatoms",&top_global->natoms); - plumed_cmd(plumedmain,"setMDEngine","gromacs"); - plumed_cmd(plumedmain,"setLog",fplog); - real real_delta_t=ir->delta_t; - plumed_cmd(plumedmain,"setTimestep",&real_delta_t); - plumed_cmd(plumedmain,"init",nullptr); - - if(PAR(cr)){ - if(DOMAINDECOMP(cr)) { - int nat_home = dd_numHomeAtoms(*cr->dd); - plumed_cmd(plumedmain,"setAtomsNlocal",&nat_home); - plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data()); - } - } - realFepState = state->fep_state; - plumed_cmd(plumedmain, "setExtraCV lambda", &realFepState); - plumed_cmd(plumedmain, "setExtraCVForce lambda", &lambdaForce); - } - /* END PLUMED */ - - walltime_accounting_start_time(walltime_accounting); - wallcycle_start(wcycle, ewcRUN); - print_start(fplog, cr, walltime_accounting, "mdrun"); - - /*********************************************************** - * - * Loop over MD steps - * - ************************************************************/ - - if (constr) - { - GMX_LOG(mdlog.info) - .asParagraph() - .appendText("Simulations has constraints. Rerun does not recalculate constraints."); - } - - rerun_fr.natoms = 0; - if (MASTER(cr)) - { - isLastStep = !read_first_frame(oenv, &status, opt2fn("-rerun", nfile, fnm), &rerun_fr, TRX_NEED_X); - if (rerun_fr.natoms != top_global->natoms) - { - gmx_fatal(FARGS, - "Number of atoms in trajectory (%d) does not match the " - "run input file (%d)\n", - rerun_fr.natoms, top_global->natoms); - } - - if (ir->pbcType != PbcType::No) - { - if (!rerun_fr.bBox) - { - gmx_fatal(FARGS, - "Rerun trajectory frame step %" PRId64 - " time %f " - "does not contain a box, while pbc is used", - rerun_fr.step, rerun_fr.time); - } - if (max_cutoff2(ir->pbcType, rerun_fr.box) < gmx::square(fr->rlist)) - { - gmx_fatal(FARGS, - "Rerun trajectory frame step %" PRId64 - " time %f " - "has too small box dimensions", - rerun_fr.step, rerun_fr.time); - } - } - } - - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "Rerun does not report kinetic energy, total energy, temperature, virial and " - "pressure."); - - if (PAR(cr)) - { - rerun_parallel_comm(cr, &rerun_fr, &isLastStep); - } - - if (ir->pbcType != PbcType::No) - { - /* Set the shift vectors. - * Necessary here when have a static box different from the tpr box. - */ - calc_shifts(rerun_fr.box, fr->shift_vec); - } - - step = ir->init_step; - step_rel = 0; - - auto stopHandler = stopHandlerBuilder->getStopHandlerMD( - compat::not_null(&signals[eglsSTOPCOND]), false, MASTER(cr), - ir->nstlist, mdrunOptions.reproducible, nstglobalcomm, mdrunOptions.maximumHoursToRun, - ir->nstlist == 0, fplog, step, bNS, walltime_accounting); - - // we don't do counter resetting in rerun - finish will always be valid - walltime_accounting_set_valid_finish(walltime_accounting); - - const DDBalanceRegionHandler ddBalanceRegionHandler(cr); - - /* and stop now if we should */ - isLastStep = (isLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps)); - while (!isLastStep) - { - wallcycle_start(wcycle, ewcSTEP); - - if (rerun_fr.bStep) - { - step = rerun_fr.step; - step_rel = step - ir->init_step; - } - if (rerun_fr.bTime) - { - t = rerun_fr.time; - } - else - { - t = step; - } - - if (ir->efep != efepNO && MASTER(cr)) - { - if (rerun_fr.bLambda) - { - ir->fepvals->init_lambda = rerun_fr.lambda; - } - else - { - if (rerun_fr.bFepState) - { - state->fep_state = rerun_fr.fep_state; - } - } - - state_global->lambda = currentLambdas(step, *(ir->fepvals), state->fep_state); - } - - if (MASTER(cr)) - { - const bool constructVsites = ((vsite != nullptr) && mdrunOptions.rerunConstructVsites); - if (constructVsites && DOMAINDECOMP(cr)) - { - gmx_fatal(FARGS, - "Vsite recalculation with -rerun is not implemented with domain " - "decomposition, " - "use a single rank"); - } - prepareRerunState(rerun_fr, state_global, constructVsites, vsite, ir->delta_t); - } - - isLastStep = isLastStep || stopHandler->stoppingAfterCurrentStep(bNS); - - if (DOMAINDECOMP(cr)) - { - /* Repartition the domain decomposition */ - const bool bMasterState = true; - dd_partition_system(fplog, mdlog, step, cr, bMasterState, nstglobalcomm, state_global, - *top_global, ir, imdSession, pull_work, state, &f, mdAtoms, &top, - fr, vsite, constr, nrnb, wcycle, mdrunOptions.verbose); - shouldCheckNumberOfBondedInteractions = true; - /* PLUMED */ - if(plumedswitch){ - int nat_home = dd_numHomeAtoms(*cr->dd); - plumed_cmd(plumedmain,"setAtomsNlocal",&nat_home); - plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data()); - } - /* END PLUMED */ - } - - if (MASTER(cr)) - { - EnergyOutput::printHeader(fplog, step, t); /* can we improve the information printed here? */ - } - - if (ir->efep != efepNO) - { - update_mdatoms(mdatoms, state->lambda[efptMASS]); - } - - force_flags = (GMX_FORCE_STATECHANGED | GMX_FORCE_DYNAMICBOX | GMX_FORCE_ALLFORCES - | GMX_FORCE_VIRIAL | // TODO: Get rid of this once #2649 and #3400 are solved - GMX_FORCE_ENERGY | (doFreeEnergyPerturbation ? GMX_FORCE_DHDL : 0)); - - if (shellfc) - { - /* Now is the time to relax the shells */ - relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, enforcedRotation, step, ir, - imdSession, pull_work, bNS, force_flags, &top, constr, enerd, - state->natoms, state->x.arrayRefWithPadding(), - state->v.arrayRefWithPadding(), state->box, state->lambda, - &state->hist, &f.view(), force_vir, mdatoms, nrnb, wcycle, shellfc, - fr, runScheduleWork, t, mu_tot, vsite, ddBalanceRegionHandler); - } - else - { - /* The coordinates (x) are shifted (to get whole molecules) - * in do_force. - * This is parallellized as well, and does communication too. - * Check comments in sim_util.c - */ - Awh* awh = nullptr; - gmx_edsam* ed = nullptr; - /* PLUMED */ - plumedNeedsEnergy=0; - if(plumedswitch){ - int pversion=0; - plumed_cmd(plumedmain,"getApiVersion",&pversion); - long int lstep=step; plumed_cmd(plumedmain,"setStepLong",&lstep); - plumed_cmd(plumedmain,"setPositions",&state->x[0][0]); - plumed_cmd(plumedmain,"setMasses",&mdatoms->massT[0]); - plumed_cmd(plumedmain,"setCharges",&mdatoms->chargeA[0]); - plumed_cmd(plumedmain,"setBox",&state->box[0][0]); - plumed_cmd(plumedmain,"prepareCalc",nullptr); - plumed_cmd(plumedmain,"setStopFlag",&plumedWantsToStop); - plumed_cmd(plumedmain,"setForces",&f.view().force()[0][0]); - plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); - if(plumedNeedsEnergy) force_flags |= GMX_FORCE_ENERGY | GMX_FORCE_VIRIAL; - clear_mat(plumed_vir); - plumed_cmd(plumedmain,"setVirial",&plumed_vir[0][0]); - } - /* END PLUMED */ - do_force(fplog, cr, ms, ir, awh, enforcedRotation, imdSession, pull_work, step, nrnb, - wcycle, &top, state->box, state->x.arrayRefWithPadding(), &state->hist, - &f.view(), force_vir, mdatoms, enerd, state->lambda, fr, runScheduleWork, - vsite, mu_tot, t, ed, GMX_FORCE_NS | force_flags, ddBalanceRegionHandler); - /* PLUMED */ - if(plumedswitch){ - if(plumedNeedsEnergy){ - msmul(force_vir,2.0,plumed_vir); - plumed_cmd(plumedmain,"setEnergy",&enerd->term[F_EPOT]); - plumed_cmd(plumedmain,"performCalc",nullptr); - msmul(plumed_vir,0.5,force_vir); - } else { - msmul(plumed_vir,0.5,plumed_vir); - m_add(force_vir,plumed_vir,force_vir); - } - if(plumedWantsToStop) isLastStep = true; - } - /* END PLUMED */ - } - - /* Now we have the energies and forces corresponding to the - * coordinates at time t. - */ - { - const bool isCheckpointingStep = false; - const bool doRerun = true; - const bool bSumEkinhOld = false; - do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, ir, state, - state_global, observablesHistory, top_global, fr, outf, - energyOutput, ekind, f.view().force(), isCheckpointingStep, - doRerun, isLastStep, mdrunOptions.writeConfout, bSumEkinhOld); - } - - stopHandler->setSignal(); - - if (vsite != nullptr) - { - wallcycle_start(wcycle, ewcVSITECONSTR); - vsite->construct(state->x, ir->delta_t, state->v, state->box); - wallcycle_stop(wcycle, ewcVSITECONSTR); - } - - { - const bool doInterSimSignal = false; - const bool doIntraSimSignal = true; - bool bSumEkinhOld = false; - t_vcm* vcm = nullptr; - SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal); - - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, vcm, wcycle, - enerd, force_vir, shake_vir, total_vir, pres, constr, &signaller, - state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld, - CGLO_GSTAT | CGLO_ENERGY - | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS - : 0)); - checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global, - &top, makeConstArrayRef(state->x), state->box, - &shouldCheckNumberOfBondedInteractions); - } - - /* Note: this is OK, but there are some numerical precision issues with using the convergence of - the virial that should probably be addressed eventually. state->veta has better properies, - but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could - generate the new shake_vir, but test the veta value for convergence. This will take some thought. */ - - /* Output stuff */ - if (MASTER(cr)) - { - const bool bCalcEnerStep = true; - energyOutput.addDataAtEnergyStep( - doFreeEnergyPerturbation, bCalcEnerStep, t, mdatoms->tmass, enerd, ir->fepvals, - ir->expandedvals, state->box, - PTCouplingArrays({ state->boxv, state->nosehoover_xi, state->nosehoover_vxi, - state->nhpres_xi, state->nhpres_vxi }), - state->fep_state, shake_vir, force_vir, total_vir, pres, ekind, mu_tot, constr); - - const bool do_ene = true; - const bool do_log = true; - Awh* awh = nullptr; - const bool do_dr = ir->nstdisreout != 0; - const bool do_or = ir->nstorireout != 0; - - EnergyOutput::printAnnealingTemperatures(do_log ? fplog : nullptr, groups, &(ir->opts)); - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or, - do_log ? fplog : nullptr, step, t, fr->fcdata.get(), awh); - - if (ir->bPull) - { - pull_print_output(pull_work, step, t); - } - - if (do_per_step(step, ir->nstlog)) - { - if (fflush(fplog) != 0) - { - gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?"); - } - } - } - - /* Print the remaining wall clock time for the run */ - if (isMasterSimMasterRank(ms, MASTER(cr)) && (mdrunOptions.verbose || gmx_got_usr_signal())) - { - if (shellfc) - { - fprintf(stderr, "\n"); - } - print_time(stderr, walltime_accounting, step, ir, cr); - } - - /* Ion/water position swapping. - * Not done in last step since trajectory writing happens before this call - * in the MD loop and exchanges would be lost anyway. */ - if ((ir->eSwapCoords != eswapNO) && (step > 0) && !isLastStep && do_per_step(step, ir->swap->nstswap)) - { - const bool doRerun = true; - do_swapcoords(cr, step, t, ir, swap, wcycle, rerun_fr.x, rerun_fr.box, - MASTER(cr) && mdrunOptions.verbose, doRerun); - } - - if (MASTER(cr)) - { - /* read next frame from input trajectory */ - isLastStep = !read_next_frame(oenv, status, &rerun_fr); - } - - if (PAR(cr)) - { - rerun_parallel_comm(cr, &rerun_fr, &isLastStep); - } - - cycles = wallcycle_stop(wcycle, ewcSTEP); - if (DOMAINDECOMP(cr) && wcycle) - { - dd_cycles_add(cr->dd, cycles, ddCyclStep); - } - - if (!rerun_fr.bStep) - { - /* increase the MD step number */ - step++; - step_rel++; - } - } - /* End of main MD loop */ - - /* Closing TNG files can include compressing data. Therefore it is good to do that - * before stopping the time measurements. */ - mdoutf_tng_close(outf); - - /* Stop measuring walltime */ - walltime_accounting_end_time(walltime_accounting); - - if (MASTER(cr)) - { - close_trx(status); - } - - if (!thisRankHasDuty(cr, DUTY_PME)) - { - /* Tell the PME only node to finish */ - gmx_pme_send_finish(cr); - } - - done_mdoutf(outf); - - done_shellfc(fplog, shellfc, step_rel); - - walltime_accounting_set_nsteps_done(walltime_accounting, step_rel); -} diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/rerun.cpp.preplumed b/patches/gromacs-2021.7.diff/src/gromacs/mdrun/rerun.cpp.preplumed deleted file mode 100644 index 36333d3c94..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/rerun.cpp.preplumed +++ /dev/null @@ -1,712 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/*! \internal \file - * - * \brief Implements the loop for simulation reruns - * - * \author Pascal Merz - * \ingroup module_mdrun - */ -#include "gmxpre.h" - -#include -#include -#include -#include - -#include -#include - -#include "gromacs/applied_forces/awh/awh.h" -#include "gromacs/commandline/filenm.h" -#include "gromacs/domdec/collect.h" -#include "gromacs/domdec/dlbtiming.h" -#include "gromacs/domdec/domdec.h" -#include "gromacs/domdec/domdec_network.h" -#include "gromacs/domdec/domdec_struct.h" -#include "gromacs/domdec/mdsetup.h" -#include "gromacs/domdec/partition.h" -#include "gromacs/essentialdynamics/edsam.h" -#include "gromacs/ewald/pme_load_balancing.h" -#include "gromacs/ewald/pme_pp.h" -#include "gromacs/fileio/trxio.h" -#include "gromacs/gmxlib/network.h" -#include "gromacs/gmxlib/nrnb.h" -#include "gromacs/gpu_utils/gpu_utils.h" -#include "gromacs/listed_forces/listed_forces.h" -#include "gromacs/math/functions.h" -#include "gromacs/math/utilities.h" -#include "gromacs/math/vec.h" -#include "gromacs/math/vectypes.h" -#include "gromacs/mdlib/checkpointhandler.h" -#include "gromacs/mdlib/compute_io.h" -#include "gromacs/mdlib/constr.h" -#include "gromacs/mdlib/ebin.h" -#include "gromacs/mdlib/enerdata_utils.h" -#include "gromacs/mdlib/energyoutput.h" -#include "gromacs/mdlib/expanded.h" -#include "gromacs/mdlib/force.h" -#include "gromacs/mdlib/force_flags.h" -#include "gromacs/mdlib/forcerec.h" -#include "gromacs/mdlib/freeenergyparameters.h" -#include "gromacs/mdlib/md_support.h" -#include "gromacs/mdlib/mdatoms.h" -#include "gromacs/mdlib/mdoutf.h" -#include "gromacs/mdlib/membed.h" -#include "gromacs/mdlib/resethandler.h" -#include "gromacs/mdlib/sighandler.h" -#include "gromacs/mdlib/simulationsignal.h" -#include "gromacs/mdlib/stat.h" -#include "gromacs/mdlib/stophandler.h" -#include "gromacs/mdlib/tgroup.h" -#include "gromacs/mdlib/trajectory_writing.h" -#include "gromacs/mdlib/update.h" -#include "gromacs/mdlib/vcm.h" -#include "gromacs/mdlib/vsite.h" -#include "gromacs/mdrunutility/handlerestart.h" -#include "gromacs/mdrunutility/multisim.h" -#include "gromacs/mdrunutility/printtime.h" -#include "gromacs/mdtypes/awh_history.h" -#include "gromacs/mdtypes/awh_params.h" -#include "gromacs/mdtypes/commrec.h" -#include "gromacs/mdtypes/df_history.h" -#include "gromacs/mdtypes/energyhistory.h" -#include "gromacs/mdtypes/forcebuffers.h" -#include "gromacs/mdtypes/forcerec.h" -#include "gromacs/mdtypes/group.h" -#include "gromacs/mdtypes/inputrec.h" -#include "gromacs/mdtypes/interaction_const.h" -#include "gromacs/mdtypes/md_enums.h" -#include "gromacs/mdtypes/mdatom.h" -#include "gromacs/mdtypes/mdrunoptions.h" -#include "gromacs/mdtypes/observableshistory.h" -#include "gromacs/mdtypes/simulation_workload.h" -#include "gromacs/mdtypes/state.h" -#include "gromacs/mimic/utilities.h" -#include "gromacs/pbcutil/pbc.h" -#include "gromacs/pulling/output.h" -#include "gromacs/pulling/pull.h" -#include "gromacs/swap/swapcoords.h" -#include "gromacs/timing/wallcycle.h" -#include "gromacs/timing/walltime_accounting.h" -#include "gromacs/topology/atoms.h" -#include "gromacs/topology/idef.h" -#include "gromacs/topology/mtop_util.h" -#include "gromacs/topology/topology.h" -#include "gromacs/trajectory/trajectoryframe.h" -#include "gromacs/utility/basedefinitions.h" -#include "gromacs/utility/cstringutil.h" -#include "gromacs/utility/fatalerror.h" -#include "gromacs/utility/logger.h" -#include "gromacs/utility/real.h" - -#include "legacysimulator.h" -#include "replicaexchange.h" -#include "shellfc.h" - -using gmx::SimulationSignaller; -using gmx::VirtualSitesHandler; - -/*! \brief Copy the state from \p rerunFrame to \p globalState and, if requested, construct vsites - * - * \param[in] rerunFrame The trajectory frame to compute energy/forces for - * \param[in,out] globalState The global state container - * \param[in] constructVsites When true, vsite coordinates are constructed - * \param[in] vsite Vsite setup, can be nullptr when \p constructVsites = false - * \param[in] timeStep Time step, used for constructing vsites - */ -static void prepareRerunState(const t_trxframe& rerunFrame, - t_state* globalState, - bool constructVsites, - const VirtualSitesHandler* vsite, - double timeStep) -{ - auto x = makeArrayRef(globalState->x); - auto rerunX = arrayRefFromArray(reinterpret_cast(rerunFrame.x), globalState->natoms); - std::copy(rerunX.begin(), rerunX.end(), x.begin()); - copy_mat(rerunFrame.box, globalState->box); - - if (constructVsites) - { - GMX_ASSERT(vsite, "Need valid vsite for constructing vsites"); - - vsite->construct(globalState->x, timeStep, globalState->v, globalState->box); - } -} - -void gmx::LegacySimulator::do_rerun() -{ - // TODO Historically, the EM and MD "integrators" used different - // names for the t_inputrec *parameter, but these must have the - // same name, now that it's a member of a struct. We use this ir - // alias to avoid a large ripple of nearly useless changes. - // t_inputrec is being replaced by IMdpOptionsProvider, so this - // will go away eventually. - t_inputrec* ir = inputrec; - int64_t step, step_rel; - double t; - bool isLastStep = false; - bool doFreeEnergyPerturbation = false; - unsigned int force_flags; - tensor force_vir, shake_vir, total_vir, pres; - t_trxstatus* status = nullptr; - rvec mu_tot; - t_trxframe rerun_fr; - gmx_localtop_t top(top_global->ffparams); - ForceBuffers f; - gmx_global_stat_t gstat; - gmx_shellfc_t* shellfc; - - double cycles; - - /* Domain decomposition could incorrectly miss a bonded - interaction, but checking for that requires a global - communication stage, which does not otherwise happen in DD - code. So we do that alongside the first global energy reduction - after a new DD is made. These variables handle whether the - check happens, and the result it returns. */ - bool shouldCheckNumberOfBondedInteractions = false; - int totalNumberOfBondedInteractions = -1; - - SimulationSignals signals; - // Most global communnication stages don't propagate mdrun - // signals, and will use this object to achieve that. - SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false); - - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "Note that it is planned that the command gmx mdrun -rerun will " - "be available in a different form in a future version of GROMACS, " - "e.g. gmx rerun -f."); - - if (ir->efep != efepNO - && (mdAtoms->mdatoms()->nMassPerturbed > 0 || (constr && constr->havePerturbedConstraints()))) - { - gmx_fatal(FARGS, - "Perturbed masses or constraints are not supported by rerun. " - "Either make a .tpr without mass and constraint perturbation, " - "or use GROMACS 2018.4, 2018.5 or later 2018 version."); - } - if (ir->bExpanded) - { - gmx_fatal(FARGS, "Expanded ensemble not supported by rerun."); - } - if (ir->bSimTemp) - { - gmx_fatal(FARGS, "Simulated tempering not supported by rerun."); - } - if (ir->bDoAwh) - { - gmx_fatal(FARGS, "AWH not supported by rerun."); - } - if (replExParams.exchangeInterval > 0) - { - gmx_fatal(FARGS, "Replica exchange not supported by rerun."); - } - if (opt2bSet("-ei", nfile, fnm) || observablesHistory->edsamHistory != nullptr) - { - gmx_fatal(FARGS, "Essential dynamics not supported by rerun."); - } - if (ir->bIMD) - { - gmx_fatal(FARGS, "Interactive MD not supported by rerun."); - } - if (isMultiSim(ms)) - { - gmx_fatal(FARGS, "Multiple simulations not supported by rerun."); - } - if (std::any_of(ir->opts.annealing, ir->opts.annealing + ir->opts.ngtc, - [](int i) { return i != eannNO; })) - { - gmx_fatal(FARGS, "Simulated annealing not supported by rerun."); - } - - /* Rerun can't work if an output file name is the same as the input file name. - * If this is the case, the user will get an error telling them what the issue is. - */ - if (strcmp(opt2fn("-rerun", nfile, fnm), opt2fn("-o", nfile, fnm)) == 0 - || strcmp(opt2fn("-rerun", nfile, fnm), opt2fn("-x", nfile, fnm)) == 0) - { - gmx_fatal(FARGS, - "When using mdrun -rerun, the name of the input trajectory file " - "%s cannot be identical to the name of an output file (whether " - "given explicitly with -o or -x, or by default)", - opt2fn("-rerun", nfile, fnm)); - } - - /* Settings for rerun */ - ir->nstlist = 1; - ir->nstcalcenergy = 1; - int nstglobalcomm = 1; - const bool bNS = true; - - ir->nstxout_compressed = 0; - const SimulationGroups* groups = &top_global->groups; - if (ir->eI == eiMimic) - { - auto nonConstGlobalTopology = const_cast(top_global); - nonConstGlobalTopology->intermolecularExclusionGroup = genQmmmIndices(*top_global); - } - int* fep_state = MASTER(cr) ? &state_global->fep_state : nullptr; - gmx::ArrayRef lambda = MASTER(cr) ? state_global->lambda : gmx::ArrayRef(); - initialize_lambdas(fplog, *ir, MASTER(cr), fep_state, lambda); - const bool simulationsShareState = false; - gmx_mdoutf* outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, - mdModulesNotifier, ir, top_global, oenv, wcycle, - StartingBehavior::NewSimulation, simulationsShareState, ms); - gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work, - mdoutf_get_fp_dhdl(outf), true, StartingBehavior::NewSimulation, - simulationsShareState, mdModulesNotifier); - - gstat = global_stat_init(ir); - - /* Check for polarizable models and flexible constraints */ - shellfc = init_shell_flexcon(fplog, top_global, constr ? constr->numFlexibleConstraints() : 0, - ir->nstcalcenergy, DOMAINDECOMP(cr), - runScheduleWork->simulationWork.useGpuPme); - - { - double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1); - if ((io > 2000) && MASTER(cr)) - { - fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io); - } - } - - // Local state only becomes valid now. - std::unique_ptr stateInstance; - t_state* state; - - if (DOMAINDECOMP(cr)) - { - stateInstance = std::make_unique(); - state = stateInstance.get(); - dd_init_local_state(cr->dd, state_global, state); - - /* Distribute the charge groups over the nodes from the master node */ - dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, state_global, *top_global, ir, - imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr, - nrnb, nullptr, FALSE); - shouldCheckNumberOfBondedInteractions = true; - } - else - { - state_change_natoms(state_global, state_global->natoms); - /* Copy the pointer to the global state */ - state = state_global; - - mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr, &f, mdAtoms, constr, vsite, shellfc); - } - - auto mdatoms = mdAtoms->mdatoms(); - - // NOTE: The global state is no longer used at this point. - // But state_global is still used as temporary storage space for writing - // the global state to file and potentially for replica exchange. - // (Global topology should persist.) - - update_mdatoms(mdatoms, state->lambda[efptMASS]); - - if (ir->efep != efepNO && ir->fepvals->nstdhdl != 0) - { - doFreeEnergyPerturbation = true; - } - - { - int cglo_flags = - (CGLO_GSTAT - | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)); - bool bSumEkinhOld = false; - t_vcm* vcm = nullptr; - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, vcm, nullptr, enerd, - force_vir, shake_vir, total_vir, pres, constr, &nullSignaller, state->box, - &totalNumberOfBondedInteractions, &bSumEkinhOld, cglo_flags); - } - checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global, &top, - makeConstArrayRef(state->x), state->box, - &shouldCheckNumberOfBondedInteractions); - - if (MASTER(cr)) - { - fprintf(stderr, - "starting md rerun '%s', reading coordinates from" - " input trajectory '%s'\n\n", - *(top_global->name), opt2fn("-rerun", nfile, fnm)); - if (mdrunOptions.verbose) - { - fprintf(stderr, - "Calculated time to finish depends on nsteps from " - "run input file,\nwhich may not correspond to the time " - "needed to process input trajectory.\n\n"); - } - fprintf(fplog, "\n"); - } - - walltime_accounting_start_time(walltime_accounting); - wallcycle_start(wcycle, ewcRUN); - print_start(fplog, cr, walltime_accounting, "mdrun"); - - /*********************************************************** - * - * Loop over MD steps - * - ************************************************************/ - - if (constr) - { - GMX_LOG(mdlog.info) - .asParagraph() - .appendText("Simulations has constraints. Rerun does not recalculate constraints."); - } - - rerun_fr.natoms = 0; - if (MASTER(cr)) - { - isLastStep = !read_first_frame(oenv, &status, opt2fn("-rerun", nfile, fnm), &rerun_fr, TRX_NEED_X); - if (rerun_fr.natoms != top_global->natoms) - { - gmx_fatal(FARGS, - "Number of atoms in trajectory (%d) does not match the " - "run input file (%d)\n", - rerun_fr.natoms, top_global->natoms); - } - - if (ir->pbcType != PbcType::No) - { - if (!rerun_fr.bBox) - { - gmx_fatal(FARGS, - "Rerun trajectory frame step %" PRId64 - " time %f " - "does not contain a box, while pbc is used", - rerun_fr.step, rerun_fr.time); - } - if (max_cutoff2(ir->pbcType, rerun_fr.box) < gmx::square(fr->rlist)) - { - gmx_fatal(FARGS, - "Rerun trajectory frame step %" PRId64 - " time %f " - "has too small box dimensions", - rerun_fr.step, rerun_fr.time); - } - } - } - - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "Rerun does not report kinetic energy, total energy, temperature, virial and " - "pressure."); - - if (PAR(cr)) - { - rerun_parallel_comm(cr, &rerun_fr, &isLastStep); - } - - if (ir->pbcType != PbcType::No) - { - /* Set the shift vectors. - * Necessary here when have a static box different from the tpr box. - */ - calc_shifts(rerun_fr.box, fr->shift_vec); - } - - step = ir->init_step; - step_rel = 0; - - auto stopHandler = stopHandlerBuilder->getStopHandlerMD( - compat::not_null(&signals[eglsSTOPCOND]), false, MASTER(cr), - ir->nstlist, mdrunOptions.reproducible, nstglobalcomm, mdrunOptions.maximumHoursToRun, - ir->nstlist == 0, fplog, step, bNS, walltime_accounting); - - // we don't do counter resetting in rerun - finish will always be valid - walltime_accounting_set_valid_finish(walltime_accounting); - - const DDBalanceRegionHandler ddBalanceRegionHandler(cr); - - /* and stop now if we should */ - isLastStep = (isLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps)); - while (!isLastStep) - { - wallcycle_start(wcycle, ewcSTEP); - - if (rerun_fr.bStep) - { - step = rerun_fr.step; - step_rel = step - ir->init_step; - } - if (rerun_fr.bTime) - { - t = rerun_fr.time; - } - else - { - t = step; - } - - if (ir->efep != efepNO && MASTER(cr)) - { - if (rerun_fr.bLambda) - { - ir->fepvals->init_lambda = rerun_fr.lambda; - } - else - { - if (rerun_fr.bFepState) - { - state->fep_state = rerun_fr.fep_state; - } - } - - state_global->lambda = currentLambdas(step, *(ir->fepvals), state->fep_state); - } - - if (MASTER(cr)) - { - const bool constructVsites = ((vsite != nullptr) && mdrunOptions.rerunConstructVsites); - if (constructVsites && DOMAINDECOMP(cr)) - { - gmx_fatal(FARGS, - "Vsite recalculation with -rerun is not implemented with domain " - "decomposition, " - "use a single rank"); - } - prepareRerunState(rerun_fr, state_global, constructVsites, vsite, ir->delta_t); - } - - isLastStep = isLastStep || stopHandler->stoppingAfterCurrentStep(bNS); - - if (DOMAINDECOMP(cr)) - { - /* Repartition the domain decomposition */ - const bool bMasterState = true; - dd_partition_system(fplog, mdlog, step, cr, bMasterState, nstglobalcomm, state_global, - *top_global, ir, imdSession, pull_work, state, &f, mdAtoms, &top, - fr, vsite, constr, nrnb, wcycle, mdrunOptions.verbose); - shouldCheckNumberOfBondedInteractions = true; - } - - if (MASTER(cr)) - { - EnergyOutput::printHeader(fplog, step, t); /* can we improve the information printed here? */ - } - - if (ir->efep != efepNO) - { - update_mdatoms(mdatoms, state->lambda[efptMASS]); - } - - force_flags = (GMX_FORCE_STATECHANGED | GMX_FORCE_DYNAMICBOX | GMX_FORCE_ALLFORCES - | GMX_FORCE_VIRIAL | // TODO: Get rid of this once #2649 and #3400 are solved - GMX_FORCE_ENERGY | (doFreeEnergyPerturbation ? GMX_FORCE_DHDL : 0)); - - if (shellfc) - { - /* Now is the time to relax the shells */ - relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, enforcedRotation, step, ir, - imdSession, pull_work, bNS, force_flags, &top, constr, enerd, - state->natoms, state->x.arrayRefWithPadding(), - state->v.arrayRefWithPadding(), state->box, state->lambda, - &state->hist, &f.view(), force_vir, mdatoms, nrnb, wcycle, shellfc, - fr, runScheduleWork, t, mu_tot, vsite, ddBalanceRegionHandler); - } - else - { - /* The coordinates (x) are shifted (to get whole molecules) - * in do_force. - * This is parallellized as well, and does communication too. - * Check comments in sim_util.c - */ - Awh* awh = nullptr; - gmx_edsam* ed = nullptr; - do_force(fplog, cr, ms, ir, awh, enforcedRotation, imdSession, pull_work, step, nrnb, - wcycle, &top, state->box, state->x.arrayRefWithPadding(), &state->hist, - &f.view(), force_vir, mdatoms, enerd, state->lambda, fr, runScheduleWork, - vsite, mu_tot, t, ed, GMX_FORCE_NS | force_flags, ddBalanceRegionHandler); - } - - /* Now we have the energies and forces corresponding to the - * coordinates at time t. - */ - { - const bool isCheckpointingStep = false; - const bool doRerun = true; - const bool bSumEkinhOld = false; - do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, ir, state, - state_global, observablesHistory, top_global, fr, outf, - energyOutput, ekind, f.view().force(), isCheckpointingStep, - doRerun, isLastStep, mdrunOptions.writeConfout, bSumEkinhOld); - } - - stopHandler->setSignal(); - - if (vsite != nullptr) - { - wallcycle_start(wcycle, ewcVSITECONSTR); - vsite->construct(state->x, ir->delta_t, state->v, state->box); - wallcycle_stop(wcycle, ewcVSITECONSTR); - } - - { - const bool doInterSimSignal = false; - const bool doIntraSimSignal = true; - bool bSumEkinhOld = false; - t_vcm* vcm = nullptr; - SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal); - - compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x), - makeConstArrayRef(state->v), state->box, mdatoms, nrnb, vcm, wcycle, - enerd, force_vir, shake_vir, total_vir, pres, constr, &signaller, - state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld, - CGLO_GSTAT | CGLO_ENERGY - | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS - : 0)); - checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global, - &top, makeConstArrayRef(state->x), state->box, - &shouldCheckNumberOfBondedInteractions); - } - - /* Note: this is OK, but there are some numerical precision issues with using the convergence of - the virial that should probably be addressed eventually. state->veta has better properies, - but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could - generate the new shake_vir, but test the veta value for convergence. This will take some thought. */ - - /* Output stuff */ - if (MASTER(cr)) - { - const bool bCalcEnerStep = true; - energyOutput.addDataAtEnergyStep( - doFreeEnergyPerturbation, bCalcEnerStep, t, mdatoms->tmass, enerd, ir->fepvals, - ir->expandedvals, state->box, - PTCouplingArrays({ state->boxv, state->nosehoover_xi, state->nosehoover_vxi, - state->nhpres_xi, state->nhpres_vxi }), - state->fep_state, shake_vir, force_vir, total_vir, pres, ekind, mu_tot, constr); - - const bool do_ene = true; - const bool do_log = true; - Awh* awh = nullptr; - const bool do_dr = ir->nstdisreout != 0; - const bool do_or = ir->nstorireout != 0; - - EnergyOutput::printAnnealingTemperatures(do_log ? fplog : nullptr, groups, &(ir->opts)); - energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or, - do_log ? fplog : nullptr, step, t, fr->fcdata.get(), awh); - - if (ir->bPull) - { - pull_print_output(pull_work, step, t); - } - - if (do_per_step(step, ir->nstlog)) - { - if (fflush(fplog) != 0) - { - gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?"); - } - } - } - - /* Print the remaining wall clock time for the run */ - if (isMasterSimMasterRank(ms, MASTER(cr)) && (mdrunOptions.verbose || gmx_got_usr_signal())) - { - if (shellfc) - { - fprintf(stderr, "\n"); - } - print_time(stderr, walltime_accounting, step, ir, cr); - } - - /* Ion/water position swapping. - * Not done in last step since trajectory writing happens before this call - * in the MD loop and exchanges would be lost anyway. */ - if ((ir->eSwapCoords != eswapNO) && (step > 0) && !isLastStep && do_per_step(step, ir->swap->nstswap)) - { - const bool doRerun = true; - do_swapcoords(cr, step, t, ir, swap, wcycle, rerun_fr.x, rerun_fr.box, - MASTER(cr) && mdrunOptions.verbose, doRerun); - } - - if (MASTER(cr)) - { - /* read next frame from input trajectory */ - isLastStep = !read_next_frame(oenv, status, &rerun_fr); - } - - if (PAR(cr)) - { - rerun_parallel_comm(cr, &rerun_fr, &isLastStep); - } - - cycles = wallcycle_stop(wcycle, ewcSTEP); - if (DOMAINDECOMP(cr) && wcycle) - { - dd_cycles_add(cr->dd, cycles, ddCyclStep); - } - - if (!rerun_fr.bStep) - { - /* increase the MD step number */ - step++; - step_rel++; - } - } - /* End of main MD loop */ - - /* Closing TNG files can include compressing data. Therefore it is good to do that - * before stopping the time measurements. */ - mdoutf_tng_close(outf); - - /* Stop measuring walltime */ - walltime_accounting_end_time(walltime_accounting); - - if (MASTER(cr)) - { - close_trx(status); - } - - if (!thisRankHasDuty(cr, DUTY_PME)) - { - /* Tell the PME only node to finish */ - gmx_pme_send_finish(cr); - } - - done_mdoutf(outf); - - done_shellfc(fplog, shellfc, step_rel); - - walltime_accounting_set_nsteps_done(walltime_accounting, step_rel); -} diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/runner.cpp b/patches/gromacs-2021.7.diff/src/gromacs/mdrun/runner.cpp deleted file mode 100644 index eaaf3ae457..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/runner.cpp +++ /dev/null @@ -1,2394 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2011-2019,2020,2021, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/*! \internal \file - * - * \brief Implements the MD runner routine calling all integrators. - * - * \author David van der Spoel - * \ingroup module_mdrun - */ -#include "gmxpre.h" - -#include "runner.h" - -#include "config.h" - -#include -#include -#include -#include -#include - -#include -#include - -#include "gromacs/commandline/filenm.h" -#include "gromacs/domdec/builder.h" -#include "gromacs/domdec/domdec.h" -#include "gromacs/domdec/domdec_struct.h" -#include "gromacs/domdec/gpuhaloexchange.h" -#include "gromacs/domdec/localatomsetmanager.h" -#include "gromacs/domdec/partition.h" -#include "gromacs/ewald/ewald_utils.h" -#include "gromacs/ewald/pme_gpu_program.h" -#include "gromacs/ewald/pme_only.h" -#include "gromacs/ewald/pme_pp_comm_gpu.h" -#include "gromacs/fileio/checkpoint.h" -#include "gromacs/fileio/gmxfio.h" -#include "gromacs/fileio/oenv.h" -#include "gromacs/fileio/tpxio.h" -#include "gromacs/gmxlib/network.h" -#include "gromacs/gmxlib/nrnb.h" -#include "gromacs/gpu_utils/device_stream_manager.h" -#include "gromacs/hardware/cpuinfo.h" -#include "gromacs/hardware/detecthardware.h" -#include "gromacs/hardware/device_management.h" -#include "gromacs/hardware/hardwaretopology.h" -#include "gromacs/hardware/printhardware.h" -#include "gromacs/imd/imd.h" -#include "gromacs/listed_forces/disre.h" -#include "gromacs/listed_forces/gpubonded.h" -#include "gromacs/listed_forces/listed_forces.h" -#include "gromacs/listed_forces/orires.h" -#include "gromacs/math/functions.h" -#include "gromacs/math/utilities.h" -#include "gromacs/math/vec.h" -#include "gromacs/mdlib/boxdeformation.h" -#include "gromacs/mdlib/broadcaststructs.h" -#include "gromacs/mdlib/calc_verletbuf.h" -#include "gromacs/mdlib/dispersioncorrection.h" -#include "gromacs/mdlib/enerdata_utils.h" -#include "gromacs/mdlib/force.h" -#include "gromacs/mdlib/forcerec.h" -#include "gromacs/mdlib/gmx_omp_nthreads.h" -#include "gromacs/mdlib/gpuforcereduction.h" -#include "gromacs/mdlib/makeconstraints.h" -#include "gromacs/mdlib/md_support.h" -#include "gromacs/mdlib/mdatoms.h" -#include "gromacs/mdlib/sighandler.h" -#include "gromacs/mdlib/stophandler.h" -#include "gromacs/mdlib/tgroup.h" -#include "gromacs/mdlib/updategroups.h" -#include "gromacs/mdlib/vsite.h" -#include "gromacs/mdrun/mdmodules.h" -#include "gromacs/mdrun/simulationcontext.h" -#include "gromacs/mdrun/simulationinput.h" -#include "gromacs/mdrun/simulationinputhandle.h" -#include "gromacs/mdrunutility/handlerestart.h" -#include "gromacs/mdrunutility/logging.h" -#include "gromacs/mdrunutility/multisim.h" -#include "gromacs/mdrunutility/printtime.h" -#include "gromacs/mdrunutility/threadaffinity.h" -#include "gromacs/mdtypes/checkpointdata.h" -#include "gromacs/mdtypes/commrec.h" -#include "gromacs/mdtypes/enerdata.h" -#include "gromacs/mdtypes/fcdata.h" -#include "gromacs/mdtypes/forcerec.h" -#include "gromacs/mdtypes/group.h" -#include "gromacs/mdtypes/inputrec.h" -#include "gromacs/mdtypes/interaction_const.h" -#include "gromacs/mdtypes/md_enums.h" -#include "gromacs/mdtypes/mdatom.h" -#include "gromacs/mdtypes/mdrunoptions.h" -#include "gromacs/mdtypes/observableshistory.h" -#include "gromacs/mdtypes/simulation_workload.h" -#include "gromacs/mdtypes/state.h" -#include "gromacs/mdtypes/state_propagator_data_gpu.h" -#include "gromacs/modularsimulator/modularsimulator.h" -#include "gromacs/nbnxm/gpu_data_mgmt.h" -#include "gromacs/nbnxm/nbnxm.h" -#include "gromacs/nbnxm/pairlist_tuning.h" -#include "gromacs/pbcutil/pbc.h" -#include "gromacs/pulling/output.h" -#include "gromacs/pulling/pull.h" -#include "gromacs/pulling/pull_rotation.h" -#include "gromacs/restraint/manager.h" -#include "gromacs/restraint/restraintmdmodule.h" -#include "gromacs/restraint/restraintpotential.h" -#include "gromacs/swap/swapcoords.h" -#include "gromacs/taskassignment/decidegpuusage.h" -#include "gromacs/taskassignment/decidesimulationworkload.h" -#include "gromacs/taskassignment/resourcedivision.h" -#include "gromacs/taskassignment/taskassignment.h" -#include "gromacs/taskassignment/usergpuids.h" -#include "gromacs/timing/gpu_timing.h" -#include "gromacs/timing/wallcycle.h" -#include "gromacs/timing/wallcyclereporting.h" -#include "gromacs/topology/mtop_util.h" -#include "gromacs/trajectory/trajectoryframe.h" -#include "gromacs/utility/basenetwork.h" -#include "gromacs/utility/cstringutil.h" -#include "gromacs/utility/exceptions.h" -#include "gromacs/utility/fatalerror.h" -#include "gromacs/utility/filestream.h" -#include "gromacs/utility/gmxassert.h" -#include "gromacs/utility/gmxmpi.h" -#include "gromacs/utility/keyvaluetree.h" -#include "gromacs/utility/logger.h" -#include "gromacs/utility/loggerbuilder.h" -#include "gromacs/utility/mdmodulenotification.h" -#include "gromacs/utility/physicalnodecommunicator.h" -#include "gromacs/utility/pleasecite.h" -#include "gromacs/utility/programcontext.h" -#include "gromacs/utility/smalloc.h" -#include "gromacs/utility/stringutil.h" - -#include "isimulator.h" -#include "membedholder.h" -#include "replicaexchange.h" -#include "simulatorbuilder.h" - -/* PLUMED */ -#include "../../../Plumed.h" -int plumedswitch; -plumed plumedmain; -/* END PLUMED */ - -/* PLUMED HREX */ -int plumed_hrex; -/* END PLUMED HREX */ - -namespace gmx -{ - - -/*! \brief Manage any development feature flag variables encountered - * - * The use of dev features indicated by environment variables is - * logged in order to ensure that runs with such features enabled can - * be identified from their log and standard output. Any cross - * dependencies are also checked, and if unsatisfied, a fatal error - * issued. - * - * Note that some development features overrides are applied already here: - * the GPU communication flags are set to false in non-tMPI and non-CUDA builds. - * - * \param[in] mdlog Logger object. - * \param[in] useGpuForNonbonded True if the nonbonded task is offloaded in this run. - * \param[in] pmeRunMode The PME run mode for this run - * \returns The object populated with development feature flags. - */ -static DevelopmentFeatureFlags manageDevelopmentFeatures(const gmx::MDLogger& mdlog, - const bool useGpuForNonbonded, - const PmeRunMode pmeRunMode) -{ - DevelopmentFeatureFlags devFlags; - - // Some builds of GCC 5 give false positive warnings that these - // getenv results are ignored when clearly they are used. -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-result" - - devFlags.enableGpuBufferOps = - GMX_GPU_CUDA && useGpuForNonbonded && (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr); - devFlags.enableGpuHaloExchange = GMX_GPU_CUDA && GMX_THREAD_MPI && getenv("GMX_GPU_DD_COMMS") != nullptr; - devFlags.forceGpuUpdateDefault = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr) || GMX_FAHCORE; - devFlags.enableGpuPmePPComm = - GMX_GPU_CUDA && GMX_THREAD_MPI && getenv("GMX_GPU_PME_PP_COMMS") != nullptr; - -#pragma GCC diagnostic pop - - if (devFlags.enableGpuBufferOps) - { - GMX_LOG(mdlog.warning) - .asParagraph() - .appendTextFormatted( - "This run uses the 'GPU buffer ops' feature, enabled by the " - "GMX_USE_GPU_BUFFER_OPS environment variable."); - } - - if (devFlags.forceGpuUpdateDefault) - { - GMX_LOG(mdlog.warning) - .asParagraph() - .appendTextFormatted( - "This run will default to '-update gpu' as requested by the " - "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable. GPU update with domain " - "decomposition lacks substantial testing and should be used with caution."); - } - - if (devFlags.enableGpuHaloExchange) - { - if (useGpuForNonbonded) - { - if (!devFlags.enableGpuBufferOps) - { - GMX_LOG(mdlog.warning) - .asParagraph() - .appendTextFormatted( - "Enabling GPU buffer operations required by GMX_GPU_DD_COMMS " - "(equivalent with GMX_USE_GPU_BUFFER_OPS=1)."); - devFlags.enableGpuBufferOps = true; - } - GMX_LOG(mdlog.warning) - .asParagraph() - .appendTextFormatted( - "This run has requested the 'GPU halo exchange' feature, enabled by " - "the " - "GMX_GPU_DD_COMMS environment variable."); - } - else - { - GMX_LOG(mdlog.warning) - .asParagraph() - .appendTextFormatted( - "GMX_GPU_DD_COMMS environment variable detected, but the 'GPU " - "halo exchange' feature will not be enabled as nonbonded interactions " - "are not offloaded."); - devFlags.enableGpuHaloExchange = false; - } - } - - if (devFlags.enableGpuPmePPComm) - { - if (pmeRunMode == PmeRunMode::GPU) - { - if (!devFlags.enableGpuBufferOps) - { - GMX_LOG(mdlog.warning) - .asParagraph() - .appendTextFormatted( - "Enabling GPU buffer operations required by GMX_GPU_PME_PP_COMMS " - "(equivalent with GMX_USE_GPU_BUFFER_OPS=1)."); - devFlags.enableGpuBufferOps = true; - } - GMX_LOG(mdlog.warning) - .asParagraph() - .appendTextFormatted( - "This run uses the 'GPU PME-PP communications' feature, enabled " - "by the GMX_GPU_PME_PP_COMMS environment variable."); - } - else - { - std::string clarification; - if (pmeRunMode == PmeRunMode::Mixed) - { - clarification = - "PME FFT and gather are not offloaded to the GPU (PME is running in mixed " - "mode)."; - } - else - { - clarification = "PME is not offloaded to the GPU."; - } - GMX_LOG(mdlog.warning) - .asParagraph() - .appendText( - "GMX_GPU_PME_PP_COMMS environment variable detected, but the " - "'GPU PME-PP communications' feature was not enabled as " - + clarification); - devFlags.enableGpuPmePPComm = false; - } - } - - return devFlags; -} - -/*! \brief Barrier for safe simultaneous thread access to mdrunner data - * - * Used to ensure that the master thread does not modify mdrunner during copy - * on the spawned threads. */ -static void threadMpiMdrunnerAccessBarrier() -{ -#if GMX_THREAD_MPI - MPI_Barrier(MPI_COMM_WORLD); -#endif -} - -Mdrunner Mdrunner::cloneOnSpawnedThread() const -{ - auto newRunner = Mdrunner(std::make_unique()); - - // All runners in the same process share a restraint manager resource because it is - // part of the interface to the client code, which is associated only with the - // original thread. Handles to the same resources can be obtained by copy. - { - newRunner.restraintManager_ = std::make_unique(*restraintManager_); - } - - // Copy members of master runner. - // \todo Replace with builder when Simulation context and/or runner phases are better defined. - // Ref https://gitlab.com/gromacs/gromacs/-/issues/2587 and https://gitlab.com/gromacs/gromacs/-/issues/2375 - newRunner.hw_opt = hw_opt; - newRunner.filenames = filenames; - - newRunner.hwinfo_ = hwinfo_; - newRunner.oenv = oenv; - newRunner.mdrunOptions = mdrunOptions; - newRunner.domdecOptions = domdecOptions; - newRunner.nbpu_opt = nbpu_opt; - newRunner.pme_opt = pme_opt; - newRunner.pme_fft_opt = pme_fft_opt; - newRunner.bonded_opt = bonded_opt; - newRunner.update_opt = update_opt; - newRunner.nstlist_cmdline = nstlist_cmdline; - newRunner.replExParams = replExParams; - newRunner.pforce = pforce; - // Give the spawned thread the newly created valid communicator - // for the simulation. - newRunner.libraryWorldCommunicator = MPI_COMM_WORLD; - newRunner.simulationCommunicator = MPI_COMM_WORLD; - newRunner.ms = ms; - newRunner.startingBehavior = startingBehavior; - newRunner.stopHandlerBuilder_ = std::make_unique(*stopHandlerBuilder_); - newRunner.inputHolder_ = inputHolder_; - - threadMpiMdrunnerAccessBarrier(); - - return newRunner; -} - -/*! \brief The callback used for running on spawned threads. - * - * Obtains the pointer to the master mdrunner object from the one - * argument permitted to the thread-launch API call, copies it to make - * a new runner for this thread, reinitializes necessary data, and - * proceeds to the simulation. */ -static void mdrunner_start_fn(const void* arg) -{ - try - { - auto masterMdrunner = reinterpret_cast(arg); - /* copy the arg list to make sure that it's thread-local. This - doesn't copy pointed-to items, of course; fnm, cr and fplog - are reset in the call below, all others should be const. */ - gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread(); - mdrunner.mdrunner(); - } - GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR -} - - -void Mdrunner::spawnThreads(int numThreadsToLaunch) -{ -#if GMX_THREAD_MPI - /* now spawn new threads that start mdrunner_start_fn(), while - the main thread returns. Thread affinity is handled later. */ - if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE, mdrunner_start_fn, - static_cast(this)) - != TMPI_SUCCESS) - { - GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads")); - } - - // Give the master thread the newly created valid communicator for - // the simulation. - libraryWorldCommunicator = MPI_COMM_WORLD; - simulationCommunicator = MPI_COMM_WORLD; - threadMpiMdrunnerAccessBarrier(); -#else - GMX_UNUSED_VALUE(numThreadsToLaunch); - GMX_UNUSED_VALUE(mdrunner_start_fn); -#endif -} - -} // namespace gmx - -/*! \brief Initialize variables for Verlet scheme simulation */ -static void prepare_verlet_scheme(FILE* fplog, - t_commrec* cr, - t_inputrec* ir, - int nstlist_cmdline, - const gmx_mtop_t* mtop, - const matrix box, - bool makeGpuPairList, - const gmx::CpuInfo& cpuinfo) -{ - // We checked the cut-offs in grompp, but double-check here. - // We have PME+LJcutoff kernels for rcoulomb>rvdw. - if (EEL_PME_EWALD(ir->coulombtype) && ir->vdwtype == eelCUT) - { - GMX_RELEASE_ASSERT(ir->rcoulomb >= ir->rvdw, - "With Verlet lists and PME we should have rcoulomb>=rvdw"); - } - else - { - GMX_RELEASE_ASSERT(ir->rcoulomb == ir->rvdw, - "With Verlet lists and no PME rcoulomb and rvdw should be identical"); - } - /* For NVE simulations, we will retain the initial list buffer */ - if (EI_DYNAMICS(ir->eI) && ir->verletbuf_tol > 0 && !(EI_MD(ir->eI) && ir->etc == etcNO)) - { - /* Update the Verlet buffer size for the current run setup */ - - /* Here we assume SIMD-enabled kernels are being used. But as currently - * calc_verlet_buffer_size gives the same results for 4x8 and 4x4 - * and 4x2 gives a larger buffer than 4x4, this is ok. - */ - ListSetupType listType = - (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported); - VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType); - - const real rlist_new = - calcVerletBufferSize(*mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup); - - if (rlist_new != ir->rlist) - { - if (fplog != nullptr) - { - fprintf(fplog, - "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n", - ir->rlist, rlist_new, listSetup.cluster_size_i, listSetup.cluster_size_j); - } - ir->rlist = rlist_new; - } - } - - if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0)) - { - gmx_fatal(FARGS, "Can not set nstlist without %s", - !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance"); - } - - if (EI_DYNAMICS(ir->eI)) - { - /* Set or try nstlist values */ - increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo); - } -} - -/*! \brief Override the nslist value in inputrec - * - * with value passed on the command line (if any) - */ -static void override_nsteps_cmdline(const gmx::MDLogger& mdlog, int64_t nsteps_cmdline, t_inputrec* ir) -{ - assert(ir); - - /* override with anything else than the default -2 */ - if (nsteps_cmdline > -2) - { - char sbuf_steps[STEPSTRSIZE]; - char sbuf_msg[STRLEN]; - - ir->nsteps = nsteps_cmdline; - if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1) - { - sprintf(sbuf_msg, - "Overriding nsteps with value passed on the command line: %s steps, %.3g ps", - gmx_step_str(nsteps_cmdline, sbuf_steps), fabs(nsteps_cmdline * ir->delta_t)); - } - else - { - sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps", - gmx_step_str(nsteps_cmdline, sbuf_steps)); - } - - GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg); - } - else if (nsteps_cmdline < -2) - { - gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64, nsteps_cmdline); - } - /* Do nothing if nsteps_cmdline == -2 */ -} - -namespace gmx -{ - -/*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings. - * - * If not, and if a warning may be issued, logs a warning about - * falling back to CPU code. With thread-MPI, only the first - * call to this function should have \c issueWarning true. */ -static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger& mdlog, const t_inputrec& ir, bool issueWarning) -{ - bool gpuIsUseful = true; - std::string warning; - - if (ir.opts.ngener - ir.nwall > 1) - { - /* The GPU code does not support more than one energy group. - * If the user requested GPUs explicitly, a fatal error is given later. - */ - gpuIsUseful = false; - warning = - "Multiple energy groups is not implemented for GPUs, falling back to the CPU. " - "For better performance, run on the GPU without energy groups and then do " - "gmx mdrun -rerun option on the trajectory with an energy group .tpr file."; - } - - if (EI_TPI(ir.eI)) - { - gpuIsUseful = false; - warning = "TPI is not implemented for GPUs."; - } - - if (!gpuIsUseful && issueWarning) - { - GMX_LOG(mdlog.warning).asParagraph().appendText(warning); - } - - return gpuIsUseful; -} - -//! Initializes the logger for mdrun. -static gmx::LoggerOwner buildLogger(FILE* fplog, const bool isSimulationMasterRank) -{ - gmx::LoggerBuilder builder; - if (fplog != nullptr) - { - builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog); - } - if (isSimulationMasterRank) - { - builder.addTargetStream(gmx::MDLogger::LogLevel::Warning, &gmx::TextOutputFile::standardError()); - } - return builder.build(); -} - -//! Make a TaskTarget from an mdrun argument string. -static TaskTarget findTaskTarget(const char* optionString) -{ - TaskTarget returnValue = TaskTarget::Auto; - - if (strncmp(optionString, "auto", 3) == 0) - { - returnValue = TaskTarget::Auto; - } - else if (strncmp(optionString, "cpu", 3) == 0) - { - returnValue = TaskTarget::Cpu; - } - else if (strncmp(optionString, "gpu", 3) == 0) - { - returnValue = TaskTarget::Gpu; - } - else - { - GMX_ASSERT(false, "Option string should have been checked for sanity already"); - } - - return returnValue; -} - -//! Finish run, aggregate data to print performance info. -static void finish_run(FILE* fplog, - const gmx::MDLogger& mdlog, - const t_commrec* cr, - const t_inputrec* inputrec, - t_nrnb nrnb[], - gmx_wallcycle_t wcycle, - gmx_walltime_accounting_t walltime_accounting, - nonbonded_verlet_t* nbv, - const gmx_pme_t* pme, - gmx_bool bWriteStat) -{ - double delta_t = 0; - double nbfs = 0, mflop = 0; - double elapsed_time, elapsed_time_over_all_ranks, elapsed_time_over_all_threads, - elapsed_time_over_all_threads_over_all_ranks; - /* Control whether it is valid to print a report. Only the - simulation master may print, but it should not do so if the run - terminated e.g. before a scheduled reset step. This is - complicated by the fact that PME ranks are unaware of the - reason why they were sent a pmerecvqxFINISH. To avoid - communication deadlocks, we always do the communication for the - report, even if we've decided not to write the report, because - how long it takes to finish the run is not important when we've - decided not to report on the simulation performance. - - Further, we only report performance for dynamical integrators, - because those are the only ones for which we plan to - consider doing any optimizations. */ - bool printReport = EI_DYNAMICS(inputrec->eI) && SIMMASTER(cr); - - if (printReport && !walltime_accounting_get_valid_finish(walltime_accounting)) - { - GMX_LOG(mdlog.warning) - .asParagraph() - .appendText("Simulation ended prematurely, no performance report will be written."); - printReport = false; - } - - t_nrnb* nrnb_tot; - std::unique_ptr nrnbTotalStorage; - if (cr->nnodes > 1) - { - nrnbTotalStorage = std::make_unique(); - nrnb_tot = nrnbTotalStorage.get(); -#if GMX_MPI - MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim); -#endif - } - else - { - nrnb_tot = nrnb; - } - - elapsed_time = walltime_accounting_get_time_since_reset(walltime_accounting); - elapsed_time_over_all_threads = - walltime_accounting_get_time_since_reset_over_all_threads(walltime_accounting); - if (cr->nnodes > 1) - { -#if GMX_MPI - /* reduce elapsed_time over all MPI ranks in the current simulation */ - MPI_Allreduce(&elapsed_time, &elapsed_time_over_all_ranks, 1, MPI_DOUBLE, MPI_SUM, - cr->mpi_comm_mysim); - elapsed_time_over_all_ranks /= cr->nnodes; - /* Reduce elapsed_time_over_all_threads over all MPI ranks in the - * current simulation. */ - MPI_Allreduce(&elapsed_time_over_all_threads, &elapsed_time_over_all_threads_over_all_ranks, - 1, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim); -#endif - } - else - { - elapsed_time_over_all_ranks = elapsed_time; - elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads; - } - - if (printReport) - { - print_flop(fplog, nrnb_tot, &nbfs, &mflop); - } - - if (thisRankHasDuty(cr, DUTY_PP) && DOMAINDECOMP(cr)) - { - print_dd_statistics(cr, inputrec, fplog); - } - - /* TODO Move the responsibility for any scaling by thread counts - * to the code that handled the thread region, so that there's a - * mechanism to keep cycle counting working during the transition - * to task parallelism. */ - int nthreads_pp = gmx_omp_nthreads_get(emntNonbonded); - int nthreads_pme = gmx_omp_nthreads_get(emntPME); - wallcycle_scale_by_num_threads(wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP), - nthreads_pp, nthreads_pme); - auto cycle_sum(wallcycle_sum(cr, wcycle)); - - if (printReport) - { - auto nbnxn_gpu_timings = - (nbv != nullptr && nbv->useGpu()) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr; - gmx_wallclock_gpu_pme_t pme_gpu_timings = {}; - - if (pme_gpu_task_enabled(pme)) - { - pme_gpu_get_timings(pme, &pme_gpu_timings); - } - wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme, - elapsed_time_over_all_ranks, wcycle, cycle_sum, nbnxn_gpu_timings, - &pme_gpu_timings); - - if (EI_DYNAMICS(inputrec->eI)) - { - delta_t = inputrec->delta_t; - } - - if (fplog) - { - print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks, elapsed_time_over_all_ranks, - walltime_accounting_get_nsteps_done_since_reset(walltime_accounting), - delta_t, nbfs, mflop); - } - if (bWriteStat) - { - print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks, elapsed_time_over_all_ranks, - walltime_accounting_get_nsteps_done_since_reset(walltime_accounting), - delta_t, nbfs, mflop); - } - } -} - -int Mdrunner::mdrunner() -{ - matrix box; - t_forcerec* fr = nullptr; - real ewaldcoeff_q = 0; - real ewaldcoeff_lj = 0; - int nChargePerturbed = -1, nTypePerturbed = 0; - gmx_wallcycle_t wcycle; - gmx_walltime_accounting_t walltime_accounting = nullptr; - MembedHolder membedHolder(filenames.size(), filenames.data()); - - /* CAUTION: threads may be started later on in this function, so - cr doesn't reflect the final parallel state right now */ - gmx_mtop_t mtop; - - /* TODO: inputrec should tell us whether we use an algorithm, not a file option */ - const bool doEssentialDynamics = opt2bSet("-ei", filenames.size(), filenames.data()); - const bool doRerun = mdrunOptions.rerun; - - // Handle task-assignment related user options. - EmulateGpuNonbonded emulateGpuNonbonded = - (getenv("GMX_EMULATE_GPU") != nullptr ? EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No); - - std::vector userGpuTaskAssignment; - try - { - userGpuTaskAssignment = parseUserTaskAssignmentString(hw_opt.userGpuTaskAssignment); - } - GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR - auto nonbondedTarget = findTaskTarget(nbpu_opt); - auto pmeTarget = findTaskTarget(pme_opt); - auto pmeFftTarget = findTaskTarget(pme_fft_opt); - auto bondedTarget = findTaskTarget(bonded_opt); - auto updateTarget = findTaskTarget(update_opt); - - FILE* fplog = nullptr; - // If we are appending, we don't write log output because we need - // to check that the old log file matches what the checkpoint file - // expects. Otherwise, we should start to write log output now if - // there is a file ready for it. - if (logFileHandle != nullptr && startingBehavior != StartingBehavior::RestartWithAppending) - { - fplog = gmx_fio_getfp(logFileHandle); - } - const bool isSimulationMasterRank = findIsSimulationMasterRank(ms, simulationCommunicator); - gmx::LoggerOwner logOwner(buildLogger(fplog, isSimulationMasterRank)); - gmx::MDLogger mdlog(logOwner.logger()); - - gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo_); - - std::vector gpuIdsToUse = makeGpuIdsToUse(hwinfo_->deviceInfoList, hw_opt.gpuIdsAvailable); - const int numDevicesToUse = gmx::ssize(gpuIdsToUse); - - // Print citation requests after all software/hardware printing - pleaseCiteGromacs(fplog); - - // Note: legacy program logic relies on checking whether these pointers are assigned. - // Objects may or may not be allocated later. - std::unique_ptr inputrec; - std::unique_ptr globalState; - - auto partialDeserializedTpr = std::make_unique(); - - if (isSimulationMasterRank) - { - // Allocate objects to be initialized by later function calls. - /* Only the master rank has the global state */ - globalState = std::make_unique(); - inputrec = std::make_unique(); - - /* Read (nearly) all data required for the simulation - * and keep the partly serialized tpr contents to send to other ranks later - */ - applyGlobalSimulationState(*inputHolder_.get(), partialDeserializedTpr.get(), - globalState.get(), inputrec.get(), &mtop); - } - - /* Check and update the hardware options for internal consistency */ - checkAndUpdateHardwareOptions(mdlog, &hw_opt, isSimulationMasterRank, domdecOptions.numPmeRanks, - inputrec.get()); - - if (GMX_THREAD_MPI && isSimulationMasterRank) - { - bool useGpuForNonbonded = false; - bool useGpuForPme = false; - try - { - GMX_RELEASE_ASSERT(inputrec != nullptr, "Keep the compiler happy"); - - // If the user specified the number of ranks, then we must - // respect that, but in default mode, we need to allow for - // the number of GPUs to choose the number of ranks. - auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr); - useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi( - nonbondedTarget, numDevicesToUse, userGpuTaskAssignment, emulateGpuNonbonded, - canUseGpuForNonbonded, - gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI), - hw_opt.nthreads_tmpi); - useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi( - useGpuForNonbonded, pmeTarget, pmeFftTarget, numDevicesToUse, userGpuTaskAssignment, - *hwinfo_, *inputrec, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks); - } - GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR - - /* Determine how many thread-MPI ranks to start. - * - * TODO Over-writing the user-supplied value here does - * prevent any possible subsequent checks from working - * correctly. */ - hw_opt.nthreads_tmpi = - get_nthreads_mpi(hwinfo_, &hw_opt, numDevicesToUse, useGpuForNonbonded, useGpuForPme, - inputrec.get(), &mtop, mdlog, membedHolder.doMembed()); - - // Now start the threads for thread MPI. - spawnThreads(hw_opt.nthreads_tmpi); - // The spawned threads enter mdrunner() and execution of - // master and spawned threads joins at the end of this block. - } - - GMX_RELEASE_ASSERT(!GMX_MPI || ms || simulationCommunicator != MPI_COMM_NULL, - "Must have valid communicator unless running a multi-simulation"); - CommrecHandle crHandle = init_commrec(simulationCommunicator); - t_commrec* cr = crHandle.get(); - GMX_RELEASE_ASSERT(cr != nullptr, "Must have valid commrec"); - - PhysicalNodeCommunicator physicalNodeComm(libraryWorldCommunicator, gmx_physicalnode_id_hash()); - - // If we detected the topology on this system, double-check that it makes sense - if (hwinfo_->hardwareTopology->isThisSystem()) - { - hardwareTopologyDoubleCheckDetection(mdlog, *hwinfo_->hardwareTopology); - } - - if (PAR(cr)) - { - /* now broadcast everything to the non-master nodes/threads: */ - if (!isSimulationMasterRank) - { - // Until now, only the master rank has a non-null pointer. - // On non-master ranks, allocate the object that will receive data in the following call. - inputrec = std::make_unique(); - } - init_parallel(cr->mpiDefaultCommunicator, MASTER(cr), inputrec.get(), &mtop, - partialDeserializedTpr.get()); - } - GMX_RELEASE_ASSERT(inputrec != nullptr, "All ranks should have a valid inputrec now"); - partialDeserializedTpr.reset(nullptr); - - // Now the number of ranks is known to all ranks, and each knows - // the inputrec read by the master rank. The ranks can now all run - // the task-deciding functions and will agree on the result - // without needing to communicate. - const bool useDomainDecomposition = (PAR(cr) && !(EI_TPI(inputrec->eI) || inputrec->eI == eiNM)); - - // Note that these variables describe only their own node. - // - // Note that when bonded interactions run on a GPU they always run - // alongside a nonbonded task, so do not influence task assignment - // even though they affect the force calculation workload. - bool useGpuForNonbonded = false; - bool useGpuForPme = false; - bool useGpuForBonded = false; - bool useGpuForUpdate = false; - bool gpusWereDetected = hwinfo_->ngpu_compatible_tot > 0; - try - { - // It's possible that there are different numbers of GPUs on - // different nodes, which is the user's responsibility to - // handle. If unsuitable, we will notice that during task - // assignment. - auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr); - useGpuForNonbonded = decideWhetherToUseGpusForNonbonded( - nonbondedTarget, userGpuTaskAssignment, emulateGpuNonbonded, canUseGpuForNonbonded, - gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI), gpusWereDetected); - useGpuForPme = decideWhetherToUseGpusForPme( - useGpuForNonbonded, pmeTarget, pmeFftTarget, userGpuTaskAssignment, *hwinfo_, - *inputrec, cr->sizeOfDefaultCommunicator, domdecOptions.numPmeRanks, gpusWereDetected); - useGpuForBonded = decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme, - bondedTarget, *inputrec, mtop, - domdecOptions.numPmeRanks, gpusWereDetected); - } - GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR - - const PmeRunMode pmeRunMode = determinePmeRunMode(useGpuForPme, pmeFftTarget, *inputrec); - - // Initialize development feature flags that enabled by environment variable - // and report those features that are enabled. - const DevelopmentFeatureFlags devFlags = - manageDevelopmentFeatures(mdlog, useGpuForNonbonded, pmeRunMode); - - const bool useModularSimulator = - checkUseModularSimulator(false, inputrec.get(), doRerun, mtop, ms, replExParams, - nullptr, doEssentialDynamics, membedHolder.doMembed() && (plumedswitch==0) /* PLUMED */); - - // Build restraints. - // TODO: hide restraint implementation details from Mdrunner. - // There is nothing unique about restraints at this point as far as the - // Mdrunner is concerned. The Mdrunner should just be getting a sequence of - // factory functions from the SimulationContext on which to call mdModules_->add(). - // TODO: capture all restraints into a single RestraintModule, passed to the runner builder. - for (auto&& restraint : restraintManager_->getRestraints()) - { - auto module = RestraintMDModule::create(restraint, restraint->sites()); - mdModules_->add(std::move(module)); - } - - // TODO: Error handling - mdModules_->assignOptionsToModules(*inputrec->params, nullptr); - // now that the MdModules know their options, they know which callbacks to sign up to - mdModules_->subscribeToSimulationSetupNotifications(); - const auto& mdModulesNotifier = mdModules_->notifier().simulationSetupNotifications_; - - if (inputrec->internalParameters != nullptr) - { - mdModulesNotifier.notify(*inputrec->internalParameters); - } - - if (fplog != nullptr) - { - pr_inputrec(fplog, 0, "Input Parameters", inputrec.get(), FALSE); - fprintf(fplog, "\n"); - } - - if (SIMMASTER(cr)) - { - /* In rerun, set velocities to zero if present */ - if (doRerun && ((globalState->flags & (1 << estV)) != 0)) - { - // rerun does not use velocities - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "Rerun trajectory contains velocities. Rerun does only evaluate " - "potential energy and forces. The velocities will be ignored."); - for (int i = 0; i < globalState->natoms; i++) - { - clear_rvec(globalState->v[i]); - } - globalState->flags &= ~(1 << estV); - } - - /* now make sure the state is initialized and propagated */ - set_state_entries(globalState.get(), inputrec.get(), useModularSimulator); - } - - /* NM and TPI parallelize over force/energy calculations, not atoms, - * so we need to initialize and broadcast the global state. - */ - if (inputrec->eI == eiNM || inputrec->eI == eiTPI) - { - if (!MASTER(cr)) - { - globalState = std::make_unique(); - } - broadcastStateWithoutDynamics(cr->mpiDefaultCommunicator, DOMAINDECOMP(cr), PAR(cr), - globalState.get()); - } - - /* A parallel command line option consistency check that we can - only do after any threads have started. */ - if (!PAR(cr) - && (domdecOptions.numCells[XX] > 1 || domdecOptions.numCells[YY] > 1 - || domdecOptions.numCells[ZZ] > 1 || domdecOptions.numPmeRanks > 0)) - { - gmx_fatal(FARGS, - "The -dd or -npme option request a parallel simulation, " -#if !GMX_MPI - "but %s was compiled without threads or MPI enabled", - output_env_get_program_display_name(oenv)); -#elif GMX_THREAD_MPI - "but the number of MPI-threads (option -ntmpi) is not set or is 1"); -#else - "but %s was not started through mpirun/mpiexec or only one rank was requested " - "through mpirun/mpiexec", - output_env_get_program_display_name(oenv)); -#endif - } - - if (doRerun && (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI)) - { - gmx_fatal(FARGS, - "The .mdp file specified an energy mininization or normal mode algorithm, and " - "these are not compatible with mdrun -rerun"); - } - - if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))) - { - if (domdecOptions.numPmeRanks > 0) - { - gmx_fatal_collective(FARGS, cr->mpiDefaultCommunicator, MASTER(cr), - "PME-only ranks are requested, but the system does not use PME " - "for electrostatics or LJ"); - } - - domdecOptions.numPmeRanks = 0; - } - - if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0) - { - /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can - * improve performance with many threads per GPU, since our OpenMP - * scaling is bad, but it's difficult to automate the setup. - */ - domdecOptions.numPmeRanks = 0; - } - if (useGpuForPme) - { - if (domdecOptions.numPmeRanks < 0) - { - domdecOptions.numPmeRanks = 0; - // TODO possibly print a note that one can opt-in for a separate PME GPU rank? - } - else - { - GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1, - "PME GPU decomposition is not supported"); - } - } - - /* NMR restraints must be initialized before load_checkpoint, - * since with time averaging the history is added to t_state. - * For proper consistency check we therefore need to extend - * t_state here. - * So the PME-only nodes (if present) will also initialize - * the distance restraints. - */ - - /* This needs to be called before read_checkpoint to extend the state */ - t_disresdata* disresdata; - snew(disresdata, 1); - init_disres(fplog, &mtop, inputrec.get(), DisResRunMode::MDRun, - MASTER(cr) ? DDRole::Master : DDRole::Agent, - PAR(cr) ? NumRanks::Multiple : NumRanks::Single, cr->mpi_comm_mysim, ms, disresdata, - globalState.get(), replExParams.exchangeInterval > 0); - - t_oriresdata* oriresdata; - snew(oriresdata, 1); - init_orires(fplog, &mtop, inputrec.get(), cr, ms, globalState.get(), oriresdata); - - auto deform = prepareBoxDeformation( - globalState != nullptr ? globalState->box : box, MASTER(cr) ? DDRole::Master : DDRole::Agent, - PAR(cr) ? NumRanks::Multiple : NumRanks::Single, cr->mpi_comm_mygroup, *inputrec); - -#if GMX_FAHCORE - /* We have to remember the generation's first step before reading checkpoint. - This way, we can report to the F@H core both the generation's first step - and the restored first step, thus making it able to distinguish between - an interruption/resume and start of the n-th generation simulation. - Having this information, the F@H core can correctly calculate and report - the progress. - */ - int gen_first_step = 0; - if (MASTER(cr)) - { - gen_first_step = inputrec->init_step; - } -#endif - - ObservablesHistory observablesHistory = {}; - - auto modularSimulatorCheckpointData = std::make_unique(); - if (startingBehavior != StartingBehavior::NewSimulation) - { - /* Check if checkpoint file exists before doing continuation. - * This way we can use identical input options for the first and subsequent runs... - */ - if (mdrunOptions.numStepsCommandline > -2) - { - /* Temporarily set the number of steps to unlimited to avoid - * triggering the nsteps check in load_checkpoint(). - * This hack will go away soon when the -nsteps option is removed. - */ - inputrec->nsteps = -1; - } - - // Finish applying initial simulation state information from external sources on all ranks. - // Reconcile checkpoint file data with Mdrunner state established up to this point. - applyLocalState(*inputHolder_.get(), logFileHandle, cr, domdecOptions.numCells, - inputrec.get(), globalState.get(), &observablesHistory, - mdrunOptions.reproducible, mdModules_->notifier(), - modularSimulatorCheckpointData.get(), useModularSimulator); - // TODO: (#3652) Synchronize filesystem state, SimulationInput contents, and program - // invariants - // on all code paths. - // Write checkpoint or provide hook to update SimulationInput. - // If there was a checkpoint file, SimulationInput contains more information - // than if there wasn't. At this point, we have synchronized the in-memory - // state with the filesystem state only for restarted simulations. We should - // be calling applyLocalState unconditionally and expect that the completeness - // of SimulationInput is not dependent on its creation method. - - if (startingBehavior == StartingBehavior::RestartWithAppending && logFileHandle) - { - // Now we can start normal logging to the truncated log file. - fplog = gmx_fio_getfp(logFileHandle); - prepareLogAppending(fplog); - logOwner = buildLogger(fplog, MASTER(cr)); - mdlog = logOwner.logger(); - } - } - -#if GMX_FAHCORE - if (MASTER(cr)) - { - fcRegisterSteps(inputrec->nsteps + inputrec->init_step, gen_first_step); - } -#endif - - if (mdrunOptions.numStepsCommandline > -2) - { - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "The -nsteps functionality is deprecated, and may be removed in a future " - "version. " - "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp " - "file field."); - } - /* override nsteps with value set on the commandline */ - override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec.get()); - - if (isSimulationMasterRank) - { - copy_mat(globalState->box, box); - } - - if (PAR(cr)) - { - gmx_bcast(sizeof(box), box, cr->mpiDefaultCommunicator); - } - - if (inputrec->cutoff_scheme != ecutsVERLET) - { - gmx_fatal(FARGS, - "This group-scheme .tpr file can no longer be run by mdrun. Please update to the " - "Verlet scheme, or use an earlier version of GROMACS if necessary."); - } - /* Update rlist and nstlist. */ - /* Note: prepare_verlet_scheme is calling increaseNstlist(...), which (while attempting to - * increase rlist) tries to check if the newly chosen value fits with the DD scheme. As this is - * run before any DD scheme is set up, this check is never executed. See #3334 for more details. - */ - prepare_verlet_scheme(fplog, cr, inputrec.get(), nstlist_cmdline, &mtop, box, - useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), - *hwinfo_->cpuInfo); - - // This builder is necessary while we have multi-part construction - // of DD. Before DD is constructed, we use the existence of - // the builder object to indicate that further construction of DD - // is needed. - std::unique_ptr ddBuilder; - if (useDomainDecomposition) - { - ddBuilder = std::make_unique( - mdlog, cr, domdecOptions, mdrunOptions, mtop, *inputrec, box, - positionsFromStatePointer(globalState.get())); - } - else - { - /* PME, if used, is done on all nodes with 1D decomposition */ - cr->nnodes = cr->sizeOfDefaultCommunicator; - cr->sim_nodeid = cr->rankInDefaultCommunicator; - cr->nodeid = cr->rankInDefaultCommunicator; - cr->npmenodes = 0; - cr->duty = (DUTY_PP | DUTY_PME); - - if (inputrec->pbcType == PbcType::Screw) - { - gmx_fatal(FARGS, "pbc=screw is only implemented with domain decomposition"); - } - } - - // Produce the task assignment for this rank - done after DD is constructed - GpuTaskAssignments gpuTaskAssignments = GpuTaskAssignmentsBuilder::build( - gpuIdsToUse, userGpuTaskAssignment, *hwinfo_, simulationCommunicator, physicalNodeComm, - nonbondedTarget, pmeTarget, bondedTarget, updateTarget, useGpuForNonbonded, - useGpuForPme, thisRankHasDuty(cr, DUTY_PP), - // TODO cr->duty & DUTY_PME should imply that a PME - // algorithm is active, but currently does not. - EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME)); - - // Get the device handles for the modules, nullptr when no task is assigned. - int deviceId = -1; - DeviceInformation* deviceInfo = gpuTaskAssignments.initDevice(&deviceId); - - // timing enabling - TODO put this in gpu_utils (even though generally this is just option handling?) - bool useTiming = true; - - if (GMX_GPU_CUDA) - { - /* WARNING: CUDA timings are incorrect with multiple streams. - * This is the main reason why they are disabled by default. - */ - // TODO: Consider turning on by default when we can detect nr of streams. - useTiming = (getenv("GMX_ENABLE_GPU_TIMING") != nullptr); - } - else if (GMX_GPU_OPENCL) - { - useTiming = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr); - } - - // TODO Currently this is always built, yet DD partition code - // checks if it is built before using it. Probably it should - // become an MDModule that is made only when another module - // requires it (e.g. pull, CompEl, density fitting), so that we - // don't update the local atom sets unilaterally every step. - LocalAtomSetManager atomSets; - if (ddBuilder) - { - // TODO Pass the GPU streams to ddBuilder to use in buffer - // transfers (e.g. halo exchange) - cr->dd = ddBuilder->build(&atomSets); - // The builder's job is done, so destruct it - ddBuilder.reset(nullptr); - // Note that local state still does not exist yet. - } - // Ensure that all atoms within the same update group are in the - // same periodic image. Otherwise, a simulation that did not use - // update groups (e.g. a single-rank simulation) cannot always be - // correctly restarted in a way that does use update groups - // (e.g. a multi-rank simulation). - if (isSimulationMasterRank) - { - const bool useUpdateGroups = cr->dd ? ddUsesUpdateGroups(*cr->dd) : false; - if (useUpdateGroups) - { - putUpdateGroupAtomsInSamePeriodicImage(*cr->dd, mtop, globalState->box, globalState->x); - } - } - - // The GPU update is decided here because we need to know whether the constraints or - // SETTLEs can span accross the domain borders (i.e. whether or not update groups are - // defined). This is only known after DD is initialized, hence decision on using GPU - // update is done so late. - try - { - const bool useUpdateGroups = cr->dd ? ddUsesUpdateGroups(*cr->dd) : false; - const bool haveFrozenAtoms = inputrecFrozenAtoms(inputrec.get()); - - useGpuForUpdate = decideWhetherToUseGpuForUpdate( - useDomainDecomposition, useUpdateGroups, pmeRunMode, domdecOptions.numPmeRanks > 0, - useGpuForNonbonded, updateTarget, gpusWereDetected, *inputrec, mtop, - doEssentialDynamics, gmx_mtop_ftype_count(mtop, F_ORIRES) > 0, - replExParams.exchangeInterval > 0, haveFrozenAtoms, doRerun, devFlags, mdlog); - } - GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR - - const bool printHostName = (cr->nnodes > 1); - gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode, useGpuForUpdate); - - const bool disableNonbondedCalculation = (getenv("GMX_NO_NONBONDED") != nullptr); - if (disableNonbondedCalculation) - { - /* turn off non-bonded calculations */ - GMX_LOG(mdlog.warning) - .asParagraph() - .appendText( - "Found environment variable GMX_NO_NONBONDED.\n" - "Disabling nonbonded calculations."); - } - - MdrunScheduleWorkload runScheduleWork; - - bool useGpuDirectHalo = decideWhetherToUseGpuForHalo( - devFlags, havePPDomainDecomposition(cr), useGpuForNonbonded, useModularSimulator, - doRerun, EI_ENERGY_MINIMIZATION(inputrec->eI)); - - // Also populates the simulation constant workload description. - runScheduleWork.simulationWork = createSimulationWorkload( - *inputrec, disableNonbondedCalculation, devFlags, useGpuForNonbonded, pmeRunMode, - useGpuForBonded, useGpuForUpdate, useGpuDirectHalo); - - std::unique_ptr deviceStreamManager = nullptr; - - if (deviceInfo != nullptr) - { - if (DOMAINDECOMP(cr) && thisRankHasDuty(cr, DUTY_PP)) - { - dd_setup_dlb_resource_sharing(cr, deviceId); - } - deviceStreamManager = std::make_unique( - *deviceInfo, havePPDomainDecomposition(cr), runScheduleWork.simulationWork, useTiming); - } - - // If the user chose a task assignment, give them some hints - // where appropriate. - if (!userGpuTaskAssignment.empty()) - { - gpuTaskAssignments.logPerformanceHints(mdlog, numDevicesToUse); - } - - if (PAR(cr)) - { - /* After possible communicator splitting in make_dd_communicators. - * we can set up the intra/inter node communication. - */ - gmx_setup_nodecomm(fplog, cr); - } - -#if GMX_MPI - if (isMultiSim(ms)) - { - GMX_LOG(mdlog.warning) - .asParagraph() - .appendTextFormatted( - "This is simulation %d out of %d running as a composite GROMACS\n" - "multi-simulation job. Setup for this simulation:\n", - ms->simulationIndex_, ms->numSimulations_); - } - GMX_LOG(mdlog.warning) - .appendTextFormatted("Using %d MPI %s\n", cr->nnodes, -# if GMX_THREAD_MPI - cr->nnodes == 1 ? "thread" : "threads" -# else - cr->nnodes == 1 ? "process" : "processes" -# endif - ); - fflush(stderr); -#endif - - // If mdrun -pin auto honors any affinity setting that already - // exists. If so, it is nice to provide feedback about whether - // that existing affinity setting was from OpenMP or something - // else, so we run this code both before and after we initialize - // the OpenMP support. - gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo_->nthreads_hw_avail, FALSE); - /* Check and update the number of OpenMP threads requested */ - checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo_, cr, ms, physicalNodeComm.size_, - pmeRunMode, mtop, *inputrec); - - gmx_omp_nthreads_init(mdlog, cr, hwinfo_->nthreads_hw_avail, physicalNodeComm.size_, - hw_opt.nthreads_omp, hw_opt.nthreads_omp_pme, !thisRankHasDuty(cr, DUTY_PP)); - - // Enable FP exception detection, but not in - // Release mode and not for compilers with known buggy FP - // exception support (clang with any optimization) or suspected - // buggy FP exception support (gcc 7.* with optimization). -#if !defined NDEBUG \ - && !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \ - && defined __OPTIMIZE__) - const bool bEnableFPE = true; -#else - const bool bEnableFPE = false; -#endif - // FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run() - if (bEnableFPE) - { - gmx_feenableexcept(); - } - - /* Now that we know the setup is consistent, check for efficiency */ - check_resource_division_efficiency(hwinfo_, gpuTaskAssignments.thisRankHasAnyGpuTask(), - mdrunOptions.ntompOptionIsSet, cr, mdlog); - - /* getting number of PP/PME threads on this MPI / tMPI rank. - PME: env variable should be read only on one node to make sure it is - identical everywhere; - */ - const int numThreadsOnThisRank = thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded) - : gmx_omp_nthreads_get(emntPME); - checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid, *hwinfo_->hardwareTopology, - physicalNodeComm, mdlog); - - // Enable Peer access between GPUs where available - // Only for DD, only master PP rank needs to perform setup, and only if thread MPI plus - // any of the GPU communication features are active. - if (DOMAINDECOMP(cr) && MASTER(cr) && thisRankHasDuty(cr, DUTY_PP) && GMX_THREAD_MPI - && (runScheduleWork.simulationWork.useGpuHaloExchange - || runScheduleWork.simulationWork.useGpuPmePpCommunication)) - { - setupGpuDevicePeerAccess(gpuIdsToUse, mdlog); - } - - if (hw_opt.threadAffinity != ThreadAffinity::Off) - { - /* Before setting affinity, check whether the affinity has changed - * - which indicates that probably the OpenMP library has changed it - * since we first checked). - */ - gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo_->nthreads_hw_avail, TRUE); - - int numThreadsOnThisNode, intraNodeThreadOffset; - analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode, - &intraNodeThreadOffset); - - /* Set the CPU affinity */ - gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo_->hardwareTopology, numThreadsOnThisRank, - numThreadsOnThisNode, intraNodeThreadOffset, nullptr); - } - - if (mdrunOptions.timingOptions.resetStep > -1) - { - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "The -resetstep functionality is deprecated, and may be removed in a " - "future version."); - } - wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr); - - if (PAR(cr)) - { - /* Master synchronizes its value of reset_counters with all nodes - * including PME only nodes */ - int64_t reset_counters = wcycle_get_reset_counters(wcycle); - gmx_bcast(sizeof(reset_counters), &reset_counters, cr->mpi_comm_mysim); - wcycle_set_reset_counters(wcycle, reset_counters); - } - - // Membrane embedding must be initialized before we call init_forcerec() - membedHolder.initializeMembed(fplog, filenames.size(), filenames.data(), &mtop, inputrec.get(), - globalState.get(), cr, &mdrunOptions.checkpointOptions.period); - - const bool thisRankHasPmeGpuTask = gpuTaskAssignments.thisRankHasPmeGpuTask(); - std::unique_ptr mdAtoms; - std::unique_ptr vsite; - std::unique_ptr gpuBonded; - - t_nrnb nrnb; - if (thisRankHasDuty(cr, DUTY_PP)) - { - mdModulesNotifier.notify(*cr); - mdModulesNotifier.notify(&atomSets); - mdModulesNotifier.notify(inputrec->pbcType); - mdModulesNotifier.notify(SimulationTimeStep{ inputrec->delta_t }); - /* Initiate forcerecord */ - fr = new t_forcerec; - fr->forceProviders = mdModules_->initForceProviders(); - init_forcerec(fplog, mdlog, fr, inputrec.get(), &mtop, cr, box, - opt2fn("-table", filenames.size(), filenames.data()), - opt2fn("-tablep", filenames.size(), filenames.data()), - opt2fns("-tableb", filenames.size(), filenames.data()), pforce); - // Dirty hack, for fixing disres and orires should be made mdmodules - fr->fcdata->disres = disresdata; - fr->fcdata->orires = oriresdata; - - // Save a handle to device stream manager to use elsewhere in the code - // TODO: Forcerec is not a correct place to store it. - fr->deviceStreamManager = deviceStreamManager.get(); - - if (runScheduleWork.simulationWork.useGpuPmePpCommunication && !thisRankHasDuty(cr, DUTY_PME)) - { - GMX_RELEASE_ASSERT( - deviceStreamManager != nullptr, - "GPU device stream manager should be valid in order to use PME-PP direct " - "communications."); - GMX_RELEASE_ASSERT( - deviceStreamManager->streamIsValid(DeviceStreamType::PmePpTransfer), - "GPU PP-PME stream should be valid in order to use GPU PME-PP direct " - "communications."); - fr->pmePpCommGpu = std::make_unique( - cr->mpi_comm_mysim, cr->dd->pme_nodeid, deviceStreamManager->context(), - deviceStreamManager->stream(DeviceStreamType::PmePpTransfer)); - } - - fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec.get(), fr, cr, *hwinfo_, - runScheduleWork.simulationWork.useGpuNonbonded, - deviceStreamManager.get(), &mtop, box, wcycle); - // TODO: Move the logic below to a GPU bonded builder - if (runScheduleWork.simulationWork.useGpuBonded) - { - GMX_RELEASE_ASSERT(deviceStreamManager != nullptr, - "GPU device stream manager should be valid in order to use GPU " - "version of bonded forces."); - gpuBonded = std::make_unique( - mtop.ffparams, fr->ic->epsfac * fr->fudgeQQ, deviceStreamManager->context(), - deviceStreamManager->bondedStream(havePPDomainDecomposition(cr)), wcycle); - fr->gpuBonded = gpuBonded.get(); - } - - /* Initialize the mdAtoms structure. - * mdAtoms is not filled with atom data, - * as this can not be done now with domain decomposition. - */ - mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask); - if (globalState && thisRankHasPmeGpuTask) - { - // The pinning of coordinates in the global state object works, because we only use - // PME on GPU without DD or on a separate PME rank, and because the local state pointer - // points to the global state object without DD. - // FIXME: MD and EM separately set up the local state - this should happen in the same - // function, which should also perform the pinning. - changePinningPolicy(&globalState->x, pme_get_pinning_policy()); - } - - /* Initialize the virtual site communication */ - vsite = makeVirtualSitesHandler(mtop, cr, fr->pbcType); - - calc_shifts(box, fr->shift_vec); - - /* With periodic molecules the charge groups should be whole at start up - * and the virtual sites should not be far from their proper positions. - */ - if (!inputrec->bContinuation && MASTER(cr) - && !(inputrec->pbcType != PbcType::No && inputrec->bPeriodicMols)) - { - /* Make molecules whole at start of run */ - if (fr->pbcType != PbcType::No) - { - do_pbc_first_mtop(fplog, inputrec->pbcType, box, &mtop, globalState->x.rvec_array()); - } - if (vsite) - { - /* Correct initial vsite positions are required - * for the initial distribution in the domain decomposition - * and for the initial shell prediction. - */ - constructVirtualSitesGlobal(mtop, globalState->x); - } - } - - if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype)) - { - ewaldcoeff_q = fr->ic->ewaldcoeff_q; - ewaldcoeff_lj = fr->ic->ewaldcoeff_lj; - } - } - else - { - /* This is a PME only node */ - - GMX_ASSERT(globalState == nullptr, - "We don't need the state on a PME only rank and expect it to be unitialized"); - - ewaldcoeff_q = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol); - ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj); - } - - gmx_pme_t* sepPmeData = nullptr; - // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks - GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr), - "Double-checking that only PME-only ranks have no forcerec"); - gmx_pme_t*& pmedata = fr ? fr->pmedata : sepPmeData; - - // TODO should live in ewald module once its testing is improved - // - // Later, this program could contain kernels that might be later - // re-used as auto-tuning progresses, or subsequent simulations - // are invoked. - PmeGpuProgramStorage pmeGpuProgram; - if (thisRankHasPmeGpuTask) - { - GMX_RELEASE_ASSERT( - (deviceStreamManager != nullptr), - "GPU device stream manager should be initialized in order to use GPU for PME."); - GMX_RELEASE_ASSERT((deviceInfo != nullptr), - "GPU device should be initialized in order to use GPU for PME."); - pmeGpuProgram = buildPmeGpuProgram(deviceStreamManager->context()); - } - - /* Initiate PME if necessary, - * either on all nodes or on dedicated PME nodes only. */ - if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)) - { - if (mdAtoms && mdAtoms->mdatoms()) - { - nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed; - if (EVDW_PME(inputrec->vdwtype)) - { - nTypePerturbed = mdAtoms->mdatoms()->nTypePerturbed; - } - } - if (cr->npmenodes > 0) - { - /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/ - gmx_bcast(sizeof(nChargePerturbed), &nChargePerturbed, cr->mpi_comm_mysim); - gmx_bcast(sizeof(nTypePerturbed), &nTypePerturbed, cr->mpi_comm_mysim); - } - - if (thisRankHasDuty(cr, DUTY_PME)) - { - try - { - // TODO: This should be in the builder. - GMX_RELEASE_ASSERT(!runScheduleWork.simulationWork.useGpuPme - || (deviceStreamManager != nullptr), - "Device stream manager should be valid in order to use GPU " - "version of PME."); - GMX_RELEASE_ASSERT( - !runScheduleWork.simulationWork.useGpuPme - || deviceStreamManager->streamIsValid(DeviceStreamType::Pme), - "GPU PME stream should be valid in order to use GPU version of PME."); - - const DeviceContext* deviceContext = runScheduleWork.simulationWork.useGpuPme - ? &deviceStreamManager->context() - : nullptr; - const DeviceStream* pmeStream = - runScheduleWork.simulationWork.useGpuPme - ? &deviceStreamManager->stream(DeviceStreamType::Pme) - : nullptr; - - pmedata = gmx_pme_init(cr, getNumPmeDomains(cr->dd), inputrec.get(), - nChargePerturbed != 0, nTypePerturbed != 0, - mdrunOptions.reproducible, ewaldcoeff_q, ewaldcoeff_lj, - gmx_omp_nthreads_get(emntPME), pmeRunMode, nullptr, - deviceContext, pmeStream, pmeGpuProgram.get(), mdlog); - } - GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR - } - } - - - if (EI_DYNAMICS(inputrec->eI)) - { - /* Turn on signal handling on all nodes */ - /* - * (A user signal from the PME nodes (if any) - * is communicated to the PP nodes. - */ - signal_handler_install(); - } - - pull_t* pull_work = nullptr; - if (thisRankHasDuty(cr, DUTY_PP)) - { - /* Assumes uniform use of the number of OpenMP threads */ - walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault)); - - if (inputrec->bPull) - { - /* Initialize pull code */ - pull_work = init_pull(fplog, inputrec->pull.get(), inputrec.get(), &mtop, cr, &atomSets, - inputrec->fepvals->init_lambda); - if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage) - { - initPullHistory(pull_work, &observablesHistory); - } - if (EI_DYNAMICS(inputrec->eI) && MASTER(cr)) - { - init_pull_output_files(pull_work, filenames.size(), filenames.data(), oenv, startingBehavior); - } - } - - std::unique_ptr enforcedRotation; - if (inputrec->bRot) - { - /* Initialize enforced rotation code */ - enforcedRotation = init_rot(fplog, inputrec.get(), filenames.size(), filenames.data(), - cr, &atomSets, globalState.get(), &mtop, oenv, mdrunOptions, - startingBehavior); - } - - t_swap* swap = nullptr; - if (inputrec->eSwapCoords != eswapNO) - { - /* Initialize ion swapping code */ - swap = init_swapcoords(fplog, inputrec.get(), - opt2fn_master("-swap", filenames.size(), filenames.data(), cr), - &mtop, globalState.get(), &observablesHistory, cr, &atomSets, - oenv, mdrunOptions, startingBehavior); - } - - /* Let makeConstraints know whether we have essential dynamics constraints. */ - auto constr = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics, fplog, cr, - ms, &nrnb, wcycle, fr->bMolPBC); - - /* Energy terms and groups */ - gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(), - inputrec->fepvals->n_lambda); - - // cos acceleration is only supported by md, but older tpr - // files might still combine it with other integrators - GMX_RELEASE_ASSERT(inputrec->cos_accel == 0.0 || inputrec->eI == eiMD, - "cos_acceleration is only supported by integrator=md"); - - /* Kinetic energy data */ - gmx_ekindata_t ekind; - init_ekindata(fplog, &mtop, &(inputrec->opts), &ekind, inputrec->cos_accel); - - /* Set up interactive MD (IMD) */ - auto imdSession = - makeImdSession(inputrec.get(), cr, wcycle, &enerd, ms, &mtop, mdlog, - MASTER(cr) ? globalState->x.rvec_array() : nullptr, filenames.size(), - filenames.data(), oenv, mdrunOptions.imdOptions, startingBehavior); - - if (DOMAINDECOMP(cr)) - { - GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP"); - /* This call is not included in init_domain_decomposition mainly - * because fr->cginfo_mb is set later. - */ - dd_init_bondeds(fplog, cr->dd, mtop, vsite.get(), inputrec.get(), - domdecOptions.checkBondedInteractions, fr->cginfo_mb); - } - - if (runScheduleWork.simulationWork.useGpuBufferOps) - { - fr->gpuForceReduction[gmx::AtomLocality::Local] = std::make_unique( - deviceStreamManager->context(), - deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedLocal), wcycle); - fr->gpuForceReduction[gmx::AtomLocality::NonLocal] = std::make_unique( - deviceStreamManager->context(), - deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedNonLocal), wcycle); - } - - std::unique_ptr stateGpu; - if (gpusWereDetected - && ((runScheduleWork.simulationWork.useGpuPme && thisRankHasDuty(cr, DUTY_PME)) - || runScheduleWork.simulationWork.useGpuBufferOps)) - { - GpuApiCallBehavior transferKind = (inputrec->eI == eiMD && !doRerun && !useModularSimulator) - ? GpuApiCallBehavior::Async - : GpuApiCallBehavior::Sync; - GMX_RELEASE_ASSERT(deviceStreamManager != nullptr, - "GPU device stream manager should be initialized to use GPU."); - stateGpu = std::make_unique( - *deviceStreamManager, transferKind, pme_gpu_get_block_size(fr->pmedata), wcycle); - fr->stateGpu = stateGpu.get(); - } - - GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator."); - SimulatorBuilder simulatorBuilder; - - simulatorBuilder.add(SimulatorStateData(globalState.get(), &observablesHistory, &enerd, &ekind)); - simulatorBuilder.add(std::move(membedHolder)); - simulatorBuilder.add(std::move(stopHandlerBuilder_)); - simulatorBuilder.add(SimulatorConfig(mdrunOptions, startingBehavior, &runScheduleWork)); - - - simulatorBuilder.add(SimulatorEnv(fplog, cr, ms, mdlog, oenv)); - simulatorBuilder.add(Profiling(&nrnb, walltime_accounting, wcycle)); - simulatorBuilder.add(ConstraintsParam( - constr.get(), enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr, - vsite.get())); - // TODO: Separate `fr` to a separate add, and make the `build` handle the coupling sensibly. - simulatorBuilder.add(LegacyInput(static_cast(filenames.size()), filenames.data(), - inputrec.get(), fr)); - simulatorBuilder.add(ReplicaExchangeParameters(replExParams)); - simulatorBuilder.add(InteractiveMD(imdSession.get())); - simulatorBuilder.add(SimulatorModules(mdModules_->outputProvider(), mdModules_->notifier())); - simulatorBuilder.add(CenterOfMassPulling(pull_work)); - // Todo move to an MDModule - simulatorBuilder.add(IonSwapping(swap)); - simulatorBuilder.add(TopologyData(&mtop, mdAtoms.get())); - simulatorBuilder.add(BoxDeformationHandle(deform.get())); - simulatorBuilder.add(std::move(modularSimulatorCheckpointData)); - - /* PLUMED */ - if(plumedswitch){ - if(useModularSimulator) gmx_fatal(FARGS, "PLUMED is not yet compatible with GROMACS new modular simulator"); - /* detect plumed API version */ - int pversion=0; - plumed_cmd(plumedmain,"getApiVersion",&pversion); - if(pversion>5) { - int nth = gmx_omp_nthreads_get(emntDefault); - plumed_cmd(plumedmain,"setNumOMPthreads",&nth); - } - /* set GPU device id */ - if(pversion>9) { - plumed_cmd(plumedmain,"setGpuDeviceId", &deviceId); - } - if(useGpuForUpdate) { - GMX_LOG(mdlog.warning) - .asParagraph() - .appendTextFormatted( - "This simulation is resident on GPU (-update gpu)\n" - "but also runs PLUMED (-plumed ). Unless plumed actions are performed\n" - "only on neighbour list search and/or file writing steps, this will lead to WRONG RESULTS.\n" - "Stop it and run it again with -update cpu.\n"); - } - } - /* END PLUMED */ - - // build and run simulator object based on user-input - auto simulator = simulatorBuilder.build(useModularSimulator); - simulator->run(); - - if (fr->pmePpCommGpu) - { - // destroy object since it is no longer required. (This needs to be done while the GPU context still exists.) - fr->pmePpCommGpu.reset(); - } - - if (inputrec->bPull) - { - finish_pull(pull_work); - } - finish_swapcoords(swap); - } - else - { - GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP"); - /* do PME only */ - walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME)); - gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec.get(), pmeRunMode, - deviceStreamManager.get()); - } - - wallcycle_stop(wcycle, ewcRUN); - - /* Finish up, write some stuff - * if rerunMD, don't write last frame again - */ - finish_run(fplog, mdlog, cr, inputrec.get(), &nrnb, wcycle, walltime_accounting, - fr ? fr->nbv.get() : nullptr, pmedata, EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms)); - - // clean up cycle counter - wallcycle_destroy(wcycle); - - deviceStreamManager.reset(nullptr); - // Free PME data - if (pmedata) - { - gmx_pme_destroy(pmedata); - pmedata = nullptr; - } - - // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x, - // before we destroy the GPU context(s) - // Pinned buffers are associated with contexts in CUDA. - // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go. - mdAtoms.reset(nullptr); - globalState.reset(nullptr); - mdModules_.reset(nullptr); // destruct force providers here as they might also use the GPU - gpuBonded.reset(nullptr); - /* Free pinned buffers in *fr */ - delete fr; - fr = nullptr; - // TODO convert to C++ so we can get rid of these frees - sfree(disresdata); - sfree(oriresdata); - - if (!hwinfo_->deviceInfoList.empty()) - { - /* stop the GPU profiler (only CUDA) */ - stopGpuProfiler(); - } - - /* With tMPI we need to wait for all ranks to finish deallocation before - * destroying the CUDA context as some tMPI ranks may be sharing - * GPU and context. - * - * This is not a concern in OpenCL where we use one context per rank. - * - * Note: it is safe to not call the barrier on the ranks which do not use GPU, - * but it is easier and more futureproof to call it on the whole node. - * - * Note that this function needs to be called even if GPUs are not used - * in this run because the PME ranks have no knowledge of whether GPUs - * are used or not, but all ranks need to enter the barrier below. - * \todo Remove this physical node barrier after making sure - * that it's not needed anymore (with a shared GPU run). - */ - if (GMX_THREAD_MPI) - { - physicalNodeComm.barrier(); - } - releaseDevice(deviceInfo); - - /* Does what it says */ - print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime()); - walltime_accounting_destroy(walltime_accounting); - - /* PLUMED */ - if(plumedswitch){ - plumed_finalize(plumedmain); - } - /* END PLUMED */ - - // Ensure log file content is written - if (logFileHandle) - { - gmx_fio_flush(logFileHandle); - } - - /* Reset FPEs (important for unit tests) by disabling them. Assumes no - * exceptions were enabled before function was called. */ - if (bEnableFPE) - { - gmx_fedisableexcept(); - } - - auto rc = static_cast(gmx_get_stop_condition()); - -#if GMX_THREAD_MPI - /* we need to join all threads. The sub-threads join when they - exit this function, but the master thread needs to be told to - wait for that. */ - if (MASTER(cr)) - { - tMPI_Finalize(); - } -#endif - return rc; -} // namespace gmx - -Mdrunner::~Mdrunner() -{ - // Clean up of the Manager. - // This will end up getting called on every thread-MPI rank, which is unnecessary, - // but okay as long as threads synchronize some time before adding or accessing - // a new set of restraints. - if (restraintManager_) - { - restraintManager_->clear(); - GMX_ASSERT(restraintManager_->countRestraints() == 0, - "restraints added during runner life time should be cleared at runner " - "destruction."); - } -}; - -void Mdrunner::addPotential(std::shared_ptr puller, const std::string& name) -{ - GMX_ASSERT(restraintManager_, "Mdrunner must have a restraint manager."); - // Not sure if this should be logged through the md logger or something else, - // but it is helpful to have some sort of INFO level message sent somewhere. - // std::cout << "Registering restraint named " << name << std::endl; - - // When multiple restraints are used, it may be wasteful to register them separately. - // Maybe instead register an entire Restraint Manager as a force provider. - restraintManager_->addToSpec(std::move(puller), name); -} - -Mdrunner::Mdrunner(std::unique_ptr mdModules) : mdModules_(std::move(mdModules)) {} - -Mdrunner::Mdrunner(Mdrunner&&) noexcept = default; - -//NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265 in CentOS 7 -Mdrunner& Mdrunner::operator=(Mdrunner&& /*handle*/) noexcept(BUGFREE_NOEXCEPT_STRING) = default; - -class Mdrunner::BuilderImplementation -{ -public: - BuilderImplementation() = delete; - BuilderImplementation(std::unique_ptr mdModules, compat::not_null context); - ~BuilderImplementation(); - - BuilderImplementation& setExtraMdrunOptions(const MdrunOptions& options, - real forceWarningThreshold, - StartingBehavior startingBehavior); - - void addHardwareDetectionResult(const gmx_hw_info_t* hwinfo); - - void addDomdec(const DomdecOptions& options); - - void addInput(SimulationInputHandle inputHolder); - - void addVerletList(int nstlist); - - void addReplicaExchange(const ReplicaExchangeParameters& params); - - void addNonBonded(const char* nbpu_opt); - - void addPME(const char* pme_opt_, const char* pme_fft_opt_); - - void addBondedTaskAssignment(const char* bonded_opt); - - void addUpdateTaskAssignment(const char* update_opt); - - void addHardwareOptions(const gmx_hw_opt_t& hardwareOptions); - - void addFilenames(ArrayRef filenames); - - void addOutputEnvironment(gmx_output_env_t* outputEnvironment); - - void addLogFile(t_fileio* logFileHandle); - - void addStopHandlerBuilder(std::unique_ptr builder); - - Mdrunner build(); - -private: - // Default parameters copied from runner.h - // \todo Clarify source(s) of default parameters. - - const char* nbpu_opt_ = nullptr; - const char* pme_opt_ = nullptr; - const char* pme_fft_opt_ = nullptr; - const char* bonded_opt_ = nullptr; - const char* update_opt_ = nullptr; - - MdrunOptions mdrunOptions_; - - DomdecOptions domdecOptions_; - - ReplicaExchangeParameters replicaExchangeParameters_; - - //! Command-line override for the duration of a neighbor list with the Verlet scheme. - int nstlist_ = 0; - - //! World communicator, used for hardware detection and task assignment - MPI_Comm libraryWorldCommunicator_ = MPI_COMM_NULL; - - //! Multisim communicator handle. - gmx_multisim_t* multiSimulation_; - - //! mdrun communicator - MPI_Comm simulationCommunicator_ = MPI_COMM_NULL; - - //! Print a warning if any force is larger than this (in kJ/mol nm). - real forceWarningThreshold_ = -1; - - //! Whether the simulation will start afresh, or restart with/without appending. - StartingBehavior startingBehavior_ = StartingBehavior::NewSimulation; - - //! The modules that comprise the functionality of mdrun. - std::unique_ptr mdModules_; - - //! Detected hardware. - const gmx_hw_info_t* hwinfo_ = nullptr; - - //! \brief Parallelism information. - gmx_hw_opt_t hardwareOptions_; - - //! filename options for simulation. - ArrayRef filenames_; - - /*! \brief Handle to output environment. - * - * \todo gmx_output_env_t needs lifetime management. - */ - gmx_output_env_t* outputEnvironment_ = nullptr; - - /*! \brief Non-owning handle to MD log file. - * - * \todo Context should own output facilities for client. - * \todo Improve log file handle management. - * \internal - * Code managing the FILE* relies on the ability to set it to - * nullptr to check whether the filehandle is valid. - */ - t_fileio* logFileHandle_ = nullptr; - - /*! - * \brief Builder for simulation stop signal handler. - */ - std::unique_ptr stopHandlerBuilder_ = nullptr; - - /*! - * \brief Sources for initial simulation state. - * - * See issue #3652 for near-term refinements to the SimulationInput interface. - * - * See issue #3379 for broader discussion on API aspects of simulation inputs and outputs. - */ - SimulationInputHandle inputHolder_; -}; - -Mdrunner::BuilderImplementation::BuilderImplementation(std::unique_ptr mdModules, - compat::not_null context) : - mdModules_(std::move(mdModules)) -{ - libraryWorldCommunicator_ = context->libraryWorldCommunicator_; - simulationCommunicator_ = context->simulationCommunicator_; - multiSimulation_ = context->multiSimulation_.get(); -} - -Mdrunner::BuilderImplementation::~BuilderImplementation() = default; - -Mdrunner::BuilderImplementation& -Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions& options, - const real forceWarningThreshold, - const StartingBehavior startingBehavior) -{ - mdrunOptions_ = options; - forceWarningThreshold_ = forceWarningThreshold; - startingBehavior_ = startingBehavior; - return *this; -} - -void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions& options) -{ - domdecOptions_ = options; -} - -void Mdrunner::BuilderImplementation::addVerletList(int nstlist) -{ - nstlist_ = nstlist; -} - -void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters& params) -{ - replicaExchangeParameters_ = params; -} - -Mdrunner Mdrunner::BuilderImplementation::build() -{ - auto newRunner = Mdrunner(std::move(mdModules_)); - - newRunner.mdrunOptions = mdrunOptions_; - newRunner.pforce = forceWarningThreshold_; - newRunner.startingBehavior = startingBehavior_; - newRunner.domdecOptions = domdecOptions_; - - // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid - newRunner.hw_opt = hardwareOptions_; - - // No invariant to check. This parameter exists to optionally override other behavior. - newRunner.nstlist_cmdline = nstlist_; - - newRunner.replExParams = replicaExchangeParameters_; - - newRunner.filenames = filenames_; - - newRunner.libraryWorldCommunicator = libraryWorldCommunicator_; - - newRunner.simulationCommunicator = simulationCommunicator_; - - // nullptr is a valid value for the multisim handle - newRunner.ms = multiSimulation_; - - if (hwinfo_) - { - newRunner.hwinfo_ = hwinfo_; - } - else - { - GMX_THROW(gmx::APIError( - "MdrunnerBuilder::addHardwareDetectionResult() is required before build()")); - } - - if (inputHolder_) - { - newRunner.inputHolder_ = std::move(inputHolder_); - } - else - { - GMX_THROW(gmx::APIError("MdrunnerBuilder::addInput() is required before build().")); - } - - // \todo Clarify ownership and lifetime management for gmx_output_env_t - // \todo Update sanity checking when output environment has clearly specified invariants. - // Initialization and default values for oenv are not well specified in the current version. - if (outputEnvironment_) - { - newRunner.oenv = outputEnvironment_; - } - else - { - GMX_THROW(gmx::APIError( - "MdrunnerBuilder::addOutputEnvironment() is required before build()")); - } - - newRunner.logFileHandle = logFileHandle_; - - if (nbpu_opt_) - { - newRunner.nbpu_opt = nbpu_opt_; - } - else - { - GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()")); - } - - if (pme_opt_ && pme_fft_opt_) - { - newRunner.pme_opt = pme_opt_; - newRunner.pme_fft_opt = pme_fft_opt_; - } - else - { - GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()")); - } - - if (bonded_opt_) - { - newRunner.bonded_opt = bonded_opt_; - } - else - { - GMX_THROW(gmx::APIError( - "MdrunnerBuilder::addBondedTaskAssignment() is required before build()")); - } - - if (update_opt_) - { - newRunner.update_opt = update_opt_; - } - else - { - GMX_THROW(gmx::APIError( - "MdrunnerBuilder::addUpdateTaskAssignment() is required before build() ")); - } - - - newRunner.restraintManager_ = std::make_unique(); - - if (stopHandlerBuilder_) - { - newRunner.stopHandlerBuilder_ = std::move(stopHandlerBuilder_); - } - else - { - newRunner.stopHandlerBuilder_ = std::make_unique(); - } - - return newRunner; -} - -void Mdrunner::BuilderImplementation::addHardwareDetectionResult(const gmx_hw_info_t* hwinfo) -{ - hwinfo_ = hwinfo; -} - -void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt) -{ - nbpu_opt_ = nbpu_opt; -} - -void Mdrunner::BuilderImplementation::addPME(const char* pme_opt, const char* pme_fft_opt) -{ - pme_opt_ = pme_opt; - pme_fft_opt_ = pme_fft_opt; -} - -void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt) -{ - bonded_opt_ = bonded_opt; -} - -void Mdrunner::BuilderImplementation::addUpdateTaskAssignment(const char* update_opt) -{ - update_opt_ = update_opt; -} - -void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t& hardwareOptions) -{ - hardwareOptions_ = hardwareOptions; -} - -void Mdrunner::BuilderImplementation::addFilenames(ArrayRef filenames) -{ - filenames_ = filenames; -} - -void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment) -{ - outputEnvironment_ = outputEnvironment; -} - -void Mdrunner::BuilderImplementation::addLogFile(t_fileio* logFileHandle) -{ - logFileHandle_ = logFileHandle; -} - -void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr builder) -{ - stopHandlerBuilder_ = std::move(builder); -} - -void Mdrunner::BuilderImplementation::addInput(SimulationInputHandle inputHolder) -{ - inputHolder_ = std::move(inputHolder); -} - -MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr mdModules, - compat::not_null context) : - impl_{ std::make_unique(std::move(mdModules), context) } -{ -} - -MdrunnerBuilder::~MdrunnerBuilder() = default; - -MdrunnerBuilder& MdrunnerBuilder::addHardwareDetectionResult(const gmx_hw_info_t* hwinfo) -{ - impl_->addHardwareDetectionResult(hwinfo); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addSimulationMethod(const MdrunOptions& options, - real forceWarningThreshold, - const StartingBehavior startingBehavior) -{ - impl_->setExtraMdrunOptions(options, forceWarningThreshold, startingBehavior); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addDomainDecomposition(const DomdecOptions& options) -{ - impl_->addDomdec(options); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addNeighborList(int nstlist) -{ - impl_->addVerletList(nstlist); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters& params) -{ - impl_->addReplicaExchange(params); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addNonBonded(const char* nbpu_opt) -{ - impl_->addNonBonded(nbpu_opt); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addElectrostatics(const char* pme_opt, const char* pme_fft_opt) -{ - // The builder method may become more general in the future, but in this version, - // parameters for PME electrostatics are both required and the only parameters - // available. - if (pme_opt && pme_fft_opt) - { - impl_->addPME(pme_opt, pme_fft_opt); - } - else - { - GMX_THROW( - gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers.")); - } - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt) -{ - impl_->addBondedTaskAssignment(bonded_opt); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addUpdateTaskAssignment(const char* update_opt) -{ - impl_->addUpdateTaskAssignment(update_opt); - return *this; -} - -Mdrunner MdrunnerBuilder::build() -{ - return impl_->build(); -} - -MdrunnerBuilder& MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t& hardwareOptions) -{ - impl_->addHardwareOptions(hardwareOptions); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addFilenames(ArrayRef filenames) -{ - impl_->addFilenames(filenames); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment) -{ - impl_->addOutputEnvironment(outputEnvironment); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addLogFile(t_fileio* logFileHandle) -{ - impl_->addLogFile(logFileHandle); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addStopHandlerBuilder(std::unique_ptr builder) -{ - impl_->addStopHandlerBuilder(std::move(builder)); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addInput(SimulationInputHandle input) -{ - impl_->addInput(std::move(input)); - return *this; -} - -MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder&&) noexcept = default; - -MdrunnerBuilder& MdrunnerBuilder::operator=(MdrunnerBuilder&&) noexcept = default; - -} // namespace gmx diff --git a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/runner.cpp.preplumed b/patches/gromacs-2021.7.diff/src/gromacs/mdrun/runner.cpp.preplumed deleted file mode 100644 index 232d994e1a..0000000000 --- a/patches/gromacs-2021.7.diff/src/gromacs/mdrun/runner.cpp.preplumed +++ /dev/null @@ -1,2352 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 1991-2000, University of Groningen, The Netherlands. - * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2011-2019,2020,2021, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/*! \internal \file - * - * \brief Implements the MD runner routine calling all integrators. - * - * \author David van der Spoel - * \ingroup module_mdrun - */ -#include "gmxpre.h" - -#include "runner.h" - -#include "config.h" - -#include -#include -#include -#include -#include - -#include -#include - -#include "gromacs/commandline/filenm.h" -#include "gromacs/domdec/builder.h" -#include "gromacs/domdec/domdec.h" -#include "gromacs/domdec/domdec_struct.h" -#include "gromacs/domdec/gpuhaloexchange.h" -#include "gromacs/domdec/localatomsetmanager.h" -#include "gromacs/domdec/partition.h" -#include "gromacs/ewald/ewald_utils.h" -#include "gromacs/ewald/pme_gpu_program.h" -#include "gromacs/ewald/pme_only.h" -#include "gromacs/ewald/pme_pp_comm_gpu.h" -#include "gromacs/fileio/checkpoint.h" -#include "gromacs/fileio/gmxfio.h" -#include "gromacs/fileio/oenv.h" -#include "gromacs/fileio/tpxio.h" -#include "gromacs/gmxlib/network.h" -#include "gromacs/gmxlib/nrnb.h" -#include "gromacs/gpu_utils/device_stream_manager.h" -#include "gromacs/hardware/cpuinfo.h" -#include "gromacs/hardware/detecthardware.h" -#include "gromacs/hardware/device_management.h" -#include "gromacs/hardware/hardwaretopology.h" -#include "gromacs/hardware/printhardware.h" -#include "gromacs/imd/imd.h" -#include "gromacs/listed_forces/disre.h" -#include "gromacs/listed_forces/gpubonded.h" -#include "gromacs/listed_forces/listed_forces.h" -#include "gromacs/listed_forces/orires.h" -#include "gromacs/math/functions.h" -#include "gromacs/math/utilities.h" -#include "gromacs/math/vec.h" -#include "gromacs/mdlib/boxdeformation.h" -#include "gromacs/mdlib/broadcaststructs.h" -#include "gromacs/mdlib/calc_verletbuf.h" -#include "gromacs/mdlib/dispersioncorrection.h" -#include "gromacs/mdlib/enerdata_utils.h" -#include "gromacs/mdlib/force.h" -#include "gromacs/mdlib/forcerec.h" -#include "gromacs/mdlib/gmx_omp_nthreads.h" -#include "gromacs/mdlib/gpuforcereduction.h" -#include "gromacs/mdlib/makeconstraints.h" -#include "gromacs/mdlib/md_support.h" -#include "gromacs/mdlib/mdatoms.h" -#include "gromacs/mdlib/sighandler.h" -#include "gromacs/mdlib/stophandler.h" -#include "gromacs/mdlib/tgroup.h" -#include "gromacs/mdlib/updategroups.h" -#include "gromacs/mdlib/vsite.h" -#include "gromacs/mdrun/mdmodules.h" -#include "gromacs/mdrun/simulationcontext.h" -#include "gromacs/mdrun/simulationinput.h" -#include "gromacs/mdrun/simulationinputhandle.h" -#include "gromacs/mdrunutility/handlerestart.h" -#include "gromacs/mdrunutility/logging.h" -#include "gromacs/mdrunutility/multisim.h" -#include "gromacs/mdrunutility/printtime.h" -#include "gromacs/mdrunutility/threadaffinity.h" -#include "gromacs/mdtypes/checkpointdata.h" -#include "gromacs/mdtypes/commrec.h" -#include "gromacs/mdtypes/enerdata.h" -#include "gromacs/mdtypes/fcdata.h" -#include "gromacs/mdtypes/forcerec.h" -#include "gromacs/mdtypes/group.h" -#include "gromacs/mdtypes/inputrec.h" -#include "gromacs/mdtypes/interaction_const.h" -#include "gromacs/mdtypes/md_enums.h" -#include "gromacs/mdtypes/mdatom.h" -#include "gromacs/mdtypes/mdrunoptions.h" -#include "gromacs/mdtypes/observableshistory.h" -#include "gromacs/mdtypes/simulation_workload.h" -#include "gromacs/mdtypes/state.h" -#include "gromacs/mdtypes/state_propagator_data_gpu.h" -#include "gromacs/modularsimulator/modularsimulator.h" -#include "gromacs/nbnxm/gpu_data_mgmt.h" -#include "gromacs/nbnxm/nbnxm.h" -#include "gromacs/nbnxm/pairlist_tuning.h" -#include "gromacs/pbcutil/pbc.h" -#include "gromacs/pulling/output.h" -#include "gromacs/pulling/pull.h" -#include "gromacs/pulling/pull_rotation.h" -#include "gromacs/restraint/manager.h" -#include "gromacs/restraint/restraintmdmodule.h" -#include "gromacs/restraint/restraintpotential.h" -#include "gromacs/swap/swapcoords.h" -#include "gromacs/taskassignment/decidegpuusage.h" -#include "gromacs/taskassignment/decidesimulationworkload.h" -#include "gromacs/taskassignment/resourcedivision.h" -#include "gromacs/taskassignment/taskassignment.h" -#include "gromacs/taskassignment/usergpuids.h" -#include "gromacs/timing/gpu_timing.h" -#include "gromacs/timing/wallcycle.h" -#include "gromacs/timing/wallcyclereporting.h" -#include "gromacs/topology/mtop_util.h" -#include "gromacs/trajectory/trajectoryframe.h" -#include "gromacs/utility/basenetwork.h" -#include "gromacs/utility/cstringutil.h" -#include "gromacs/utility/exceptions.h" -#include "gromacs/utility/fatalerror.h" -#include "gromacs/utility/filestream.h" -#include "gromacs/utility/gmxassert.h" -#include "gromacs/utility/gmxmpi.h" -#include "gromacs/utility/keyvaluetree.h" -#include "gromacs/utility/logger.h" -#include "gromacs/utility/loggerbuilder.h" -#include "gromacs/utility/mdmodulenotification.h" -#include "gromacs/utility/physicalnodecommunicator.h" -#include "gromacs/utility/pleasecite.h" -#include "gromacs/utility/programcontext.h" -#include "gromacs/utility/smalloc.h" -#include "gromacs/utility/stringutil.h" - -#include "isimulator.h" -#include "membedholder.h" -#include "replicaexchange.h" -#include "simulatorbuilder.h" - -namespace gmx -{ - - -/*! \brief Manage any development feature flag variables encountered - * - * The use of dev features indicated by environment variables is - * logged in order to ensure that runs with such features enabled can - * be identified from their log and standard output. Any cross - * dependencies are also checked, and if unsatisfied, a fatal error - * issued. - * - * Note that some development features overrides are applied already here: - * the GPU communication flags are set to false in non-tMPI and non-CUDA builds. - * - * \param[in] mdlog Logger object. - * \param[in] useGpuForNonbonded True if the nonbonded task is offloaded in this run. - * \param[in] pmeRunMode The PME run mode for this run - * \returns The object populated with development feature flags. - */ -static DevelopmentFeatureFlags manageDevelopmentFeatures(const gmx::MDLogger& mdlog, - const bool useGpuForNonbonded, - const PmeRunMode pmeRunMode) -{ - DevelopmentFeatureFlags devFlags; - - // Some builds of GCC 5 give false positive warnings that these - // getenv results are ignored when clearly they are used. -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-result" - - devFlags.enableGpuBufferOps = - GMX_GPU_CUDA && useGpuForNonbonded && (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr); - devFlags.enableGpuHaloExchange = GMX_GPU_CUDA && GMX_THREAD_MPI && getenv("GMX_GPU_DD_COMMS") != nullptr; - devFlags.forceGpuUpdateDefault = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr) || GMX_FAHCORE; - devFlags.enableGpuPmePPComm = - GMX_GPU_CUDA && GMX_THREAD_MPI && getenv("GMX_GPU_PME_PP_COMMS") != nullptr; - -#pragma GCC diagnostic pop - - if (devFlags.enableGpuBufferOps) - { - GMX_LOG(mdlog.warning) - .asParagraph() - .appendTextFormatted( - "This run uses the 'GPU buffer ops' feature, enabled by the " - "GMX_USE_GPU_BUFFER_OPS environment variable."); - } - - if (devFlags.forceGpuUpdateDefault) - { - GMX_LOG(mdlog.warning) - .asParagraph() - .appendTextFormatted( - "This run will default to '-update gpu' as requested by the " - "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable. GPU update with domain " - "decomposition lacks substantial testing and should be used with caution."); - } - - if (devFlags.enableGpuHaloExchange) - { - if (useGpuForNonbonded) - { - if (!devFlags.enableGpuBufferOps) - { - GMX_LOG(mdlog.warning) - .asParagraph() - .appendTextFormatted( - "Enabling GPU buffer operations required by GMX_GPU_DD_COMMS " - "(equivalent with GMX_USE_GPU_BUFFER_OPS=1)."); - devFlags.enableGpuBufferOps = true; - } - GMX_LOG(mdlog.warning) - .asParagraph() - .appendTextFormatted( - "This run has requested the 'GPU halo exchange' feature, enabled by " - "the " - "GMX_GPU_DD_COMMS environment variable."); - } - else - { - GMX_LOG(mdlog.warning) - .asParagraph() - .appendTextFormatted( - "GMX_GPU_DD_COMMS environment variable detected, but the 'GPU " - "halo exchange' feature will not be enabled as nonbonded interactions " - "are not offloaded."); - devFlags.enableGpuHaloExchange = false; - } - } - - if (devFlags.enableGpuPmePPComm) - { - if (pmeRunMode == PmeRunMode::GPU) - { - if (!devFlags.enableGpuBufferOps) - { - GMX_LOG(mdlog.warning) - .asParagraph() - .appendTextFormatted( - "Enabling GPU buffer operations required by GMX_GPU_PME_PP_COMMS " - "(equivalent with GMX_USE_GPU_BUFFER_OPS=1)."); - devFlags.enableGpuBufferOps = true; - } - GMX_LOG(mdlog.warning) - .asParagraph() - .appendTextFormatted( - "This run uses the 'GPU PME-PP communications' feature, enabled " - "by the GMX_GPU_PME_PP_COMMS environment variable."); - } - else - { - std::string clarification; - if (pmeRunMode == PmeRunMode::Mixed) - { - clarification = - "PME FFT and gather are not offloaded to the GPU (PME is running in mixed " - "mode)."; - } - else - { - clarification = "PME is not offloaded to the GPU."; - } - GMX_LOG(mdlog.warning) - .asParagraph() - .appendText( - "GMX_GPU_PME_PP_COMMS environment variable detected, but the " - "'GPU PME-PP communications' feature was not enabled as " - + clarification); - devFlags.enableGpuPmePPComm = false; - } - } - - return devFlags; -} - -/*! \brief Barrier for safe simultaneous thread access to mdrunner data - * - * Used to ensure that the master thread does not modify mdrunner during copy - * on the spawned threads. */ -static void threadMpiMdrunnerAccessBarrier() -{ -#if GMX_THREAD_MPI - MPI_Barrier(MPI_COMM_WORLD); -#endif -} - -Mdrunner Mdrunner::cloneOnSpawnedThread() const -{ - auto newRunner = Mdrunner(std::make_unique()); - - // All runners in the same process share a restraint manager resource because it is - // part of the interface to the client code, which is associated only with the - // original thread. Handles to the same resources can be obtained by copy. - { - newRunner.restraintManager_ = std::make_unique(*restraintManager_); - } - - // Copy members of master runner. - // \todo Replace with builder when Simulation context and/or runner phases are better defined. - // Ref https://gitlab.com/gromacs/gromacs/-/issues/2587 and https://gitlab.com/gromacs/gromacs/-/issues/2375 - newRunner.hw_opt = hw_opt; - newRunner.filenames = filenames; - - newRunner.hwinfo_ = hwinfo_; - newRunner.oenv = oenv; - newRunner.mdrunOptions = mdrunOptions; - newRunner.domdecOptions = domdecOptions; - newRunner.nbpu_opt = nbpu_opt; - newRunner.pme_opt = pme_opt; - newRunner.pme_fft_opt = pme_fft_opt; - newRunner.bonded_opt = bonded_opt; - newRunner.update_opt = update_opt; - newRunner.nstlist_cmdline = nstlist_cmdline; - newRunner.replExParams = replExParams; - newRunner.pforce = pforce; - // Give the spawned thread the newly created valid communicator - // for the simulation. - newRunner.libraryWorldCommunicator = MPI_COMM_WORLD; - newRunner.simulationCommunicator = MPI_COMM_WORLD; - newRunner.ms = ms; - newRunner.startingBehavior = startingBehavior; - newRunner.stopHandlerBuilder_ = std::make_unique(*stopHandlerBuilder_); - newRunner.inputHolder_ = inputHolder_; - - threadMpiMdrunnerAccessBarrier(); - - return newRunner; -} - -/*! \brief The callback used for running on spawned threads. - * - * Obtains the pointer to the master mdrunner object from the one - * argument permitted to the thread-launch API call, copies it to make - * a new runner for this thread, reinitializes necessary data, and - * proceeds to the simulation. */ -static void mdrunner_start_fn(const void* arg) -{ - try - { - auto masterMdrunner = reinterpret_cast(arg); - /* copy the arg list to make sure that it's thread-local. This - doesn't copy pointed-to items, of course; fnm, cr and fplog - are reset in the call below, all others should be const. */ - gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread(); - mdrunner.mdrunner(); - } - GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR -} - - -void Mdrunner::spawnThreads(int numThreadsToLaunch) -{ -#if GMX_THREAD_MPI - /* now spawn new threads that start mdrunner_start_fn(), while - the main thread returns. Thread affinity is handled later. */ - if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE, mdrunner_start_fn, - static_cast(this)) - != TMPI_SUCCESS) - { - GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads")); - } - - // Give the master thread the newly created valid communicator for - // the simulation. - libraryWorldCommunicator = MPI_COMM_WORLD; - simulationCommunicator = MPI_COMM_WORLD; - threadMpiMdrunnerAccessBarrier(); -#else - GMX_UNUSED_VALUE(numThreadsToLaunch); - GMX_UNUSED_VALUE(mdrunner_start_fn); -#endif -} - -} // namespace gmx - -/*! \brief Initialize variables for Verlet scheme simulation */ -static void prepare_verlet_scheme(FILE* fplog, - t_commrec* cr, - t_inputrec* ir, - int nstlist_cmdline, - const gmx_mtop_t* mtop, - const matrix box, - bool makeGpuPairList, - const gmx::CpuInfo& cpuinfo) -{ - // We checked the cut-offs in grompp, but double-check here. - // We have PME+LJcutoff kernels for rcoulomb>rvdw. - if (EEL_PME_EWALD(ir->coulombtype) && ir->vdwtype == eelCUT) - { - GMX_RELEASE_ASSERT(ir->rcoulomb >= ir->rvdw, - "With Verlet lists and PME we should have rcoulomb>=rvdw"); - } - else - { - GMX_RELEASE_ASSERT(ir->rcoulomb == ir->rvdw, - "With Verlet lists and no PME rcoulomb and rvdw should be identical"); - } - /* For NVE simulations, we will retain the initial list buffer */ - if (EI_DYNAMICS(ir->eI) && ir->verletbuf_tol > 0 && !(EI_MD(ir->eI) && ir->etc == etcNO)) - { - /* Update the Verlet buffer size for the current run setup */ - - /* Here we assume SIMD-enabled kernels are being used. But as currently - * calc_verlet_buffer_size gives the same results for 4x8 and 4x4 - * and 4x2 gives a larger buffer than 4x4, this is ok. - */ - ListSetupType listType = - (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported); - VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType); - - const real rlist_new = - calcVerletBufferSize(*mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup); - - if (rlist_new != ir->rlist) - { - if (fplog != nullptr) - { - fprintf(fplog, - "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n", - ir->rlist, rlist_new, listSetup.cluster_size_i, listSetup.cluster_size_j); - } - ir->rlist = rlist_new; - } - } - - if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0)) - { - gmx_fatal(FARGS, "Can not set nstlist without %s", - !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance"); - } - - if (EI_DYNAMICS(ir->eI)) - { - /* Set or try nstlist values */ - increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo); - } -} - -/*! \brief Override the nslist value in inputrec - * - * with value passed on the command line (if any) - */ -static void override_nsteps_cmdline(const gmx::MDLogger& mdlog, int64_t nsteps_cmdline, t_inputrec* ir) -{ - assert(ir); - - /* override with anything else than the default -2 */ - if (nsteps_cmdline > -2) - { - char sbuf_steps[STEPSTRSIZE]; - char sbuf_msg[STRLEN]; - - ir->nsteps = nsteps_cmdline; - if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1) - { - sprintf(sbuf_msg, - "Overriding nsteps with value passed on the command line: %s steps, %.3g ps", - gmx_step_str(nsteps_cmdline, sbuf_steps), fabs(nsteps_cmdline * ir->delta_t)); - } - else - { - sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps", - gmx_step_str(nsteps_cmdline, sbuf_steps)); - } - - GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg); - } - else if (nsteps_cmdline < -2) - { - gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64, nsteps_cmdline); - } - /* Do nothing if nsteps_cmdline == -2 */ -} - -namespace gmx -{ - -/*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings. - * - * If not, and if a warning may be issued, logs a warning about - * falling back to CPU code. With thread-MPI, only the first - * call to this function should have \c issueWarning true. */ -static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger& mdlog, const t_inputrec& ir, bool issueWarning) -{ - bool gpuIsUseful = true; - std::string warning; - - if (ir.opts.ngener - ir.nwall > 1) - { - /* The GPU code does not support more than one energy group. - * If the user requested GPUs explicitly, a fatal error is given later. - */ - gpuIsUseful = false; - warning = - "Multiple energy groups is not implemented for GPUs, falling back to the CPU. " - "For better performance, run on the GPU without energy groups and then do " - "gmx mdrun -rerun option on the trajectory with an energy group .tpr file."; - } - - if (EI_TPI(ir.eI)) - { - gpuIsUseful = false; - warning = "TPI is not implemented for GPUs."; - } - - if (!gpuIsUseful && issueWarning) - { - GMX_LOG(mdlog.warning).asParagraph().appendText(warning); - } - - return gpuIsUseful; -} - -//! Initializes the logger for mdrun. -static gmx::LoggerOwner buildLogger(FILE* fplog, const bool isSimulationMasterRank) -{ - gmx::LoggerBuilder builder; - if (fplog != nullptr) - { - builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog); - } - if (isSimulationMasterRank) - { - builder.addTargetStream(gmx::MDLogger::LogLevel::Warning, &gmx::TextOutputFile::standardError()); - } - return builder.build(); -} - -//! Make a TaskTarget from an mdrun argument string. -static TaskTarget findTaskTarget(const char* optionString) -{ - TaskTarget returnValue = TaskTarget::Auto; - - if (strncmp(optionString, "auto", 3) == 0) - { - returnValue = TaskTarget::Auto; - } - else if (strncmp(optionString, "cpu", 3) == 0) - { - returnValue = TaskTarget::Cpu; - } - else if (strncmp(optionString, "gpu", 3) == 0) - { - returnValue = TaskTarget::Gpu; - } - else - { - GMX_ASSERT(false, "Option string should have been checked for sanity already"); - } - - return returnValue; -} - -//! Finish run, aggregate data to print performance info. -static void finish_run(FILE* fplog, - const gmx::MDLogger& mdlog, - const t_commrec* cr, - const t_inputrec* inputrec, - t_nrnb nrnb[], - gmx_wallcycle_t wcycle, - gmx_walltime_accounting_t walltime_accounting, - nonbonded_verlet_t* nbv, - const gmx_pme_t* pme, - gmx_bool bWriteStat) -{ - double delta_t = 0; - double nbfs = 0, mflop = 0; - double elapsed_time, elapsed_time_over_all_ranks, elapsed_time_over_all_threads, - elapsed_time_over_all_threads_over_all_ranks; - /* Control whether it is valid to print a report. Only the - simulation master may print, but it should not do so if the run - terminated e.g. before a scheduled reset step. This is - complicated by the fact that PME ranks are unaware of the - reason why they were sent a pmerecvqxFINISH. To avoid - communication deadlocks, we always do the communication for the - report, even if we've decided not to write the report, because - how long it takes to finish the run is not important when we've - decided not to report on the simulation performance. - - Further, we only report performance for dynamical integrators, - because those are the only ones for which we plan to - consider doing any optimizations. */ - bool printReport = EI_DYNAMICS(inputrec->eI) && SIMMASTER(cr); - - if (printReport && !walltime_accounting_get_valid_finish(walltime_accounting)) - { - GMX_LOG(mdlog.warning) - .asParagraph() - .appendText("Simulation ended prematurely, no performance report will be written."); - printReport = false; - } - - t_nrnb* nrnb_tot; - std::unique_ptr nrnbTotalStorage; - if (cr->nnodes > 1) - { - nrnbTotalStorage = std::make_unique(); - nrnb_tot = nrnbTotalStorage.get(); -#if GMX_MPI - MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim); -#endif - } - else - { - nrnb_tot = nrnb; - } - - elapsed_time = walltime_accounting_get_time_since_reset(walltime_accounting); - elapsed_time_over_all_threads = - walltime_accounting_get_time_since_reset_over_all_threads(walltime_accounting); - if (cr->nnodes > 1) - { -#if GMX_MPI - /* reduce elapsed_time over all MPI ranks in the current simulation */ - MPI_Allreduce(&elapsed_time, &elapsed_time_over_all_ranks, 1, MPI_DOUBLE, MPI_SUM, - cr->mpi_comm_mysim); - elapsed_time_over_all_ranks /= cr->nnodes; - /* Reduce elapsed_time_over_all_threads over all MPI ranks in the - * current simulation. */ - MPI_Allreduce(&elapsed_time_over_all_threads, &elapsed_time_over_all_threads_over_all_ranks, - 1, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim); -#endif - } - else - { - elapsed_time_over_all_ranks = elapsed_time; - elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads; - } - - if (printReport) - { - print_flop(fplog, nrnb_tot, &nbfs, &mflop); - } - - if (thisRankHasDuty(cr, DUTY_PP) && DOMAINDECOMP(cr)) - { - print_dd_statistics(cr, inputrec, fplog); - } - - /* TODO Move the responsibility for any scaling by thread counts - * to the code that handled the thread region, so that there's a - * mechanism to keep cycle counting working during the transition - * to task parallelism. */ - int nthreads_pp = gmx_omp_nthreads_get(emntNonbonded); - int nthreads_pme = gmx_omp_nthreads_get(emntPME); - wallcycle_scale_by_num_threads(wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP), - nthreads_pp, nthreads_pme); - auto cycle_sum(wallcycle_sum(cr, wcycle)); - - if (printReport) - { - auto nbnxn_gpu_timings = - (nbv != nullptr && nbv->useGpu()) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr; - gmx_wallclock_gpu_pme_t pme_gpu_timings = {}; - - if (pme_gpu_task_enabled(pme)) - { - pme_gpu_get_timings(pme, &pme_gpu_timings); - } - wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme, - elapsed_time_over_all_ranks, wcycle, cycle_sum, nbnxn_gpu_timings, - &pme_gpu_timings); - - if (EI_DYNAMICS(inputrec->eI)) - { - delta_t = inputrec->delta_t; - } - - if (fplog) - { - print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks, elapsed_time_over_all_ranks, - walltime_accounting_get_nsteps_done_since_reset(walltime_accounting), - delta_t, nbfs, mflop); - } - if (bWriteStat) - { - print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks, elapsed_time_over_all_ranks, - walltime_accounting_get_nsteps_done_since_reset(walltime_accounting), - delta_t, nbfs, mflop); - } - } -} - -int Mdrunner::mdrunner() -{ - matrix box; - t_forcerec* fr = nullptr; - real ewaldcoeff_q = 0; - real ewaldcoeff_lj = 0; - int nChargePerturbed = -1, nTypePerturbed = 0; - gmx_wallcycle_t wcycle; - gmx_walltime_accounting_t walltime_accounting = nullptr; - MembedHolder membedHolder(filenames.size(), filenames.data()); - - /* CAUTION: threads may be started later on in this function, so - cr doesn't reflect the final parallel state right now */ - gmx_mtop_t mtop; - - /* TODO: inputrec should tell us whether we use an algorithm, not a file option */ - const bool doEssentialDynamics = opt2bSet("-ei", filenames.size(), filenames.data()); - const bool doRerun = mdrunOptions.rerun; - - // Handle task-assignment related user options. - EmulateGpuNonbonded emulateGpuNonbonded = - (getenv("GMX_EMULATE_GPU") != nullptr ? EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No); - - std::vector userGpuTaskAssignment; - try - { - userGpuTaskAssignment = parseUserTaskAssignmentString(hw_opt.userGpuTaskAssignment); - } - GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR - auto nonbondedTarget = findTaskTarget(nbpu_opt); - auto pmeTarget = findTaskTarget(pme_opt); - auto pmeFftTarget = findTaskTarget(pme_fft_opt); - auto bondedTarget = findTaskTarget(bonded_opt); - auto updateTarget = findTaskTarget(update_opt); - - FILE* fplog = nullptr; - // If we are appending, we don't write log output because we need - // to check that the old log file matches what the checkpoint file - // expects. Otherwise, we should start to write log output now if - // there is a file ready for it. - if (logFileHandle != nullptr && startingBehavior != StartingBehavior::RestartWithAppending) - { - fplog = gmx_fio_getfp(logFileHandle); - } - const bool isSimulationMasterRank = findIsSimulationMasterRank(ms, simulationCommunicator); - gmx::LoggerOwner logOwner(buildLogger(fplog, isSimulationMasterRank)); - gmx::MDLogger mdlog(logOwner.logger()); - - gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo_); - - std::vector gpuIdsToUse = makeGpuIdsToUse(hwinfo_->deviceInfoList, hw_opt.gpuIdsAvailable); - const int numDevicesToUse = gmx::ssize(gpuIdsToUse); - - // Print citation requests after all software/hardware printing - pleaseCiteGromacs(fplog); - - // Note: legacy program logic relies on checking whether these pointers are assigned. - // Objects may or may not be allocated later. - std::unique_ptr inputrec; - std::unique_ptr globalState; - - auto partialDeserializedTpr = std::make_unique(); - - if (isSimulationMasterRank) - { - // Allocate objects to be initialized by later function calls. - /* Only the master rank has the global state */ - globalState = std::make_unique(); - inputrec = std::make_unique(); - - /* Read (nearly) all data required for the simulation - * and keep the partly serialized tpr contents to send to other ranks later - */ - applyGlobalSimulationState(*inputHolder_.get(), partialDeserializedTpr.get(), - globalState.get(), inputrec.get(), &mtop); - } - - /* Check and update the hardware options for internal consistency */ - checkAndUpdateHardwareOptions(mdlog, &hw_opt, isSimulationMasterRank, domdecOptions.numPmeRanks, - inputrec.get()); - - if (GMX_THREAD_MPI && isSimulationMasterRank) - { - bool useGpuForNonbonded = false; - bool useGpuForPme = false; - try - { - GMX_RELEASE_ASSERT(inputrec != nullptr, "Keep the compiler happy"); - - // If the user specified the number of ranks, then we must - // respect that, but in default mode, we need to allow for - // the number of GPUs to choose the number of ranks. - auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr); - useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi( - nonbondedTarget, numDevicesToUse, userGpuTaskAssignment, emulateGpuNonbonded, - canUseGpuForNonbonded, - gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI), - hw_opt.nthreads_tmpi); - useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi( - useGpuForNonbonded, pmeTarget, pmeFftTarget, numDevicesToUse, userGpuTaskAssignment, - *hwinfo_, *inputrec, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks); - } - GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR - - /* Determine how many thread-MPI ranks to start. - * - * TODO Over-writing the user-supplied value here does - * prevent any possible subsequent checks from working - * correctly. */ - hw_opt.nthreads_tmpi = - get_nthreads_mpi(hwinfo_, &hw_opt, numDevicesToUse, useGpuForNonbonded, useGpuForPme, - inputrec.get(), &mtop, mdlog, membedHolder.doMembed()); - - // Now start the threads for thread MPI. - spawnThreads(hw_opt.nthreads_tmpi); - // The spawned threads enter mdrunner() and execution of - // master and spawned threads joins at the end of this block. - } - - GMX_RELEASE_ASSERT(!GMX_MPI || ms || simulationCommunicator != MPI_COMM_NULL, - "Must have valid communicator unless running a multi-simulation"); - CommrecHandle crHandle = init_commrec(simulationCommunicator); - t_commrec* cr = crHandle.get(); - GMX_RELEASE_ASSERT(cr != nullptr, "Must have valid commrec"); - - PhysicalNodeCommunicator physicalNodeComm(libraryWorldCommunicator, gmx_physicalnode_id_hash()); - - // If we detected the topology on this system, double-check that it makes sense - if (hwinfo_->hardwareTopology->isThisSystem()) - { - hardwareTopologyDoubleCheckDetection(mdlog, *hwinfo_->hardwareTopology); - } - - if (PAR(cr)) - { - /* now broadcast everything to the non-master nodes/threads: */ - if (!isSimulationMasterRank) - { - // Until now, only the master rank has a non-null pointer. - // On non-master ranks, allocate the object that will receive data in the following call. - inputrec = std::make_unique(); - } - init_parallel(cr->mpiDefaultCommunicator, MASTER(cr), inputrec.get(), &mtop, - partialDeserializedTpr.get()); - } - GMX_RELEASE_ASSERT(inputrec != nullptr, "All ranks should have a valid inputrec now"); - partialDeserializedTpr.reset(nullptr); - - // Now the number of ranks is known to all ranks, and each knows - // the inputrec read by the master rank. The ranks can now all run - // the task-deciding functions and will agree on the result - // without needing to communicate. - const bool useDomainDecomposition = (PAR(cr) && !(EI_TPI(inputrec->eI) || inputrec->eI == eiNM)); - - // Note that these variables describe only their own node. - // - // Note that when bonded interactions run on a GPU they always run - // alongside a nonbonded task, so do not influence task assignment - // even though they affect the force calculation workload. - bool useGpuForNonbonded = false; - bool useGpuForPme = false; - bool useGpuForBonded = false; - bool useGpuForUpdate = false; - bool gpusWereDetected = hwinfo_->ngpu_compatible_tot > 0; - try - { - // It's possible that there are different numbers of GPUs on - // different nodes, which is the user's responsibility to - // handle. If unsuitable, we will notice that during task - // assignment. - auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr); - useGpuForNonbonded = decideWhetherToUseGpusForNonbonded( - nonbondedTarget, userGpuTaskAssignment, emulateGpuNonbonded, canUseGpuForNonbonded, - gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI), gpusWereDetected); - useGpuForPme = decideWhetherToUseGpusForPme( - useGpuForNonbonded, pmeTarget, pmeFftTarget, userGpuTaskAssignment, *hwinfo_, - *inputrec, cr->sizeOfDefaultCommunicator, domdecOptions.numPmeRanks, gpusWereDetected); - useGpuForBonded = decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme, - bondedTarget, *inputrec, mtop, - domdecOptions.numPmeRanks, gpusWereDetected); - } - GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR - - const PmeRunMode pmeRunMode = determinePmeRunMode(useGpuForPme, pmeFftTarget, *inputrec); - - // Initialize development feature flags that enabled by environment variable - // and report those features that are enabled. - const DevelopmentFeatureFlags devFlags = - manageDevelopmentFeatures(mdlog, useGpuForNonbonded, pmeRunMode); - - const bool useModularSimulator = - checkUseModularSimulator(false, inputrec.get(), doRerun, mtop, ms, replExParams, - nullptr, doEssentialDynamics, membedHolder.doMembed()); - - // Build restraints. - // TODO: hide restraint implementation details from Mdrunner. - // There is nothing unique about restraints at this point as far as the - // Mdrunner is concerned. The Mdrunner should just be getting a sequence of - // factory functions from the SimulationContext on which to call mdModules_->add(). - // TODO: capture all restraints into a single RestraintModule, passed to the runner builder. - for (auto&& restraint : restraintManager_->getRestraints()) - { - auto module = RestraintMDModule::create(restraint, restraint->sites()); - mdModules_->add(std::move(module)); - } - - // TODO: Error handling - mdModules_->assignOptionsToModules(*inputrec->params, nullptr); - // now that the MdModules know their options, they know which callbacks to sign up to - mdModules_->subscribeToSimulationSetupNotifications(); - const auto& mdModulesNotifier = mdModules_->notifier().simulationSetupNotifications_; - - if (inputrec->internalParameters != nullptr) - { - mdModulesNotifier.notify(*inputrec->internalParameters); - } - - if (fplog != nullptr) - { - pr_inputrec(fplog, 0, "Input Parameters", inputrec.get(), FALSE); - fprintf(fplog, "\n"); - } - - if (SIMMASTER(cr)) - { - /* In rerun, set velocities to zero if present */ - if (doRerun && ((globalState->flags & (1 << estV)) != 0)) - { - // rerun does not use velocities - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "Rerun trajectory contains velocities. Rerun does only evaluate " - "potential energy and forces. The velocities will be ignored."); - for (int i = 0; i < globalState->natoms; i++) - { - clear_rvec(globalState->v[i]); - } - globalState->flags &= ~(1 << estV); - } - - /* now make sure the state is initialized and propagated */ - set_state_entries(globalState.get(), inputrec.get(), useModularSimulator); - } - - /* NM and TPI parallelize over force/energy calculations, not atoms, - * so we need to initialize and broadcast the global state. - */ - if (inputrec->eI == eiNM || inputrec->eI == eiTPI) - { - if (!MASTER(cr)) - { - globalState = std::make_unique(); - } - broadcastStateWithoutDynamics(cr->mpiDefaultCommunicator, DOMAINDECOMP(cr), PAR(cr), - globalState.get()); - } - - /* A parallel command line option consistency check that we can - only do after any threads have started. */ - if (!PAR(cr) - && (domdecOptions.numCells[XX] > 1 || domdecOptions.numCells[YY] > 1 - || domdecOptions.numCells[ZZ] > 1 || domdecOptions.numPmeRanks > 0)) - { - gmx_fatal(FARGS, - "The -dd or -npme option request a parallel simulation, " -#if !GMX_MPI - "but %s was compiled without threads or MPI enabled", - output_env_get_program_display_name(oenv)); -#elif GMX_THREAD_MPI - "but the number of MPI-threads (option -ntmpi) is not set or is 1"); -#else - "but %s was not started through mpirun/mpiexec or only one rank was requested " - "through mpirun/mpiexec", - output_env_get_program_display_name(oenv)); -#endif - } - - if (doRerun && (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI)) - { - gmx_fatal(FARGS, - "The .mdp file specified an energy mininization or normal mode algorithm, and " - "these are not compatible with mdrun -rerun"); - } - - if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))) - { - if (domdecOptions.numPmeRanks > 0) - { - gmx_fatal_collective(FARGS, cr->mpiDefaultCommunicator, MASTER(cr), - "PME-only ranks are requested, but the system does not use PME " - "for electrostatics or LJ"); - } - - domdecOptions.numPmeRanks = 0; - } - - if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0) - { - /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can - * improve performance with many threads per GPU, since our OpenMP - * scaling is bad, but it's difficult to automate the setup. - */ - domdecOptions.numPmeRanks = 0; - } - if (useGpuForPme) - { - if (domdecOptions.numPmeRanks < 0) - { - domdecOptions.numPmeRanks = 0; - // TODO possibly print a note that one can opt-in for a separate PME GPU rank? - } - else - { - GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1, - "PME GPU decomposition is not supported"); - } - } - - /* NMR restraints must be initialized before load_checkpoint, - * since with time averaging the history is added to t_state. - * For proper consistency check we therefore need to extend - * t_state here. - * So the PME-only nodes (if present) will also initialize - * the distance restraints. - */ - - /* This needs to be called before read_checkpoint to extend the state */ - t_disresdata* disresdata; - snew(disresdata, 1); - init_disres(fplog, &mtop, inputrec.get(), DisResRunMode::MDRun, - MASTER(cr) ? DDRole::Master : DDRole::Agent, - PAR(cr) ? NumRanks::Multiple : NumRanks::Single, cr->mpi_comm_mysim, ms, disresdata, - globalState.get(), replExParams.exchangeInterval > 0); - - t_oriresdata* oriresdata; - snew(oriresdata, 1); - init_orires(fplog, &mtop, inputrec.get(), cr, ms, globalState.get(), oriresdata); - - auto deform = prepareBoxDeformation( - globalState != nullptr ? globalState->box : box, MASTER(cr) ? DDRole::Master : DDRole::Agent, - PAR(cr) ? NumRanks::Multiple : NumRanks::Single, cr->mpi_comm_mygroup, *inputrec); - -#if GMX_FAHCORE - /* We have to remember the generation's first step before reading checkpoint. - This way, we can report to the F@H core both the generation's first step - and the restored first step, thus making it able to distinguish between - an interruption/resume and start of the n-th generation simulation. - Having this information, the F@H core can correctly calculate and report - the progress. - */ - int gen_first_step = 0; - if (MASTER(cr)) - { - gen_first_step = inputrec->init_step; - } -#endif - - ObservablesHistory observablesHistory = {}; - - auto modularSimulatorCheckpointData = std::make_unique(); - if (startingBehavior != StartingBehavior::NewSimulation) - { - /* Check if checkpoint file exists before doing continuation. - * This way we can use identical input options for the first and subsequent runs... - */ - if (mdrunOptions.numStepsCommandline > -2) - { - /* Temporarily set the number of steps to unlimited to avoid - * triggering the nsteps check in load_checkpoint(). - * This hack will go away soon when the -nsteps option is removed. - */ - inputrec->nsteps = -1; - } - - // Finish applying initial simulation state information from external sources on all ranks. - // Reconcile checkpoint file data with Mdrunner state established up to this point. - applyLocalState(*inputHolder_.get(), logFileHandle, cr, domdecOptions.numCells, - inputrec.get(), globalState.get(), &observablesHistory, - mdrunOptions.reproducible, mdModules_->notifier(), - modularSimulatorCheckpointData.get(), useModularSimulator); - // TODO: (#3652) Synchronize filesystem state, SimulationInput contents, and program - // invariants - // on all code paths. - // Write checkpoint or provide hook to update SimulationInput. - // If there was a checkpoint file, SimulationInput contains more information - // than if there wasn't. At this point, we have synchronized the in-memory - // state with the filesystem state only for restarted simulations. We should - // be calling applyLocalState unconditionally and expect that the completeness - // of SimulationInput is not dependent on its creation method. - - if (startingBehavior == StartingBehavior::RestartWithAppending && logFileHandle) - { - // Now we can start normal logging to the truncated log file. - fplog = gmx_fio_getfp(logFileHandle); - prepareLogAppending(fplog); - logOwner = buildLogger(fplog, MASTER(cr)); - mdlog = logOwner.logger(); - } - } - -#if GMX_FAHCORE - if (MASTER(cr)) - { - fcRegisterSteps(inputrec->nsteps + inputrec->init_step, gen_first_step); - } -#endif - - if (mdrunOptions.numStepsCommandline > -2) - { - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "The -nsteps functionality is deprecated, and may be removed in a future " - "version. " - "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp " - "file field."); - } - /* override nsteps with value set on the commandline */ - override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec.get()); - - if (isSimulationMasterRank) - { - copy_mat(globalState->box, box); - } - - if (PAR(cr)) - { - gmx_bcast(sizeof(box), box, cr->mpiDefaultCommunicator); - } - - if (inputrec->cutoff_scheme != ecutsVERLET) - { - gmx_fatal(FARGS, - "This group-scheme .tpr file can no longer be run by mdrun. Please update to the " - "Verlet scheme, or use an earlier version of GROMACS if necessary."); - } - /* Update rlist and nstlist. */ - /* Note: prepare_verlet_scheme is calling increaseNstlist(...), which (while attempting to - * increase rlist) tries to check if the newly chosen value fits with the DD scheme. As this is - * run before any DD scheme is set up, this check is never executed. See #3334 for more details. - */ - prepare_verlet_scheme(fplog, cr, inputrec.get(), nstlist_cmdline, &mtop, box, - useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), - *hwinfo_->cpuInfo); - - // This builder is necessary while we have multi-part construction - // of DD. Before DD is constructed, we use the existence of - // the builder object to indicate that further construction of DD - // is needed. - std::unique_ptr ddBuilder; - if (useDomainDecomposition) - { - ddBuilder = std::make_unique( - mdlog, cr, domdecOptions, mdrunOptions, mtop, *inputrec, box, - positionsFromStatePointer(globalState.get())); - } - else - { - /* PME, if used, is done on all nodes with 1D decomposition */ - cr->nnodes = cr->sizeOfDefaultCommunicator; - cr->sim_nodeid = cr->rankInDefaultCommunicator; - cr->nodeid = cr->rankInDefaultCommunicator; - cr->npmenodes = 0; - cr->duty = (DUTY_PP | DUTY_PME); - - if (inputrec->pbcType == PbcType::Screw) - { - gmx_fatal(FARGS, "pbc=screw is only implemented with domain decomposition"); - } - } - - // Produce the task assignment for this rank - done after DD is constructed - GpuTaskAssignments gpuTaskAssignments = GpuTaskAssignmentsBuilder::build( - gpuIdsToUse, userGpuTaskAssignment, *hwinfo_, simulationCommunicator, physicalNodeComm, - nonbondedTarget, pmeTarget, bondedTarget, updateTarget, useGpuForNonbonded, - useGpuForPme, thisRankHasDuty(cr, DUTY_PP), - // TODO cr->duty & DUTY_PME should imply that a PME - // algorithm is active, but currently does not. - EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME)); - - // Get the device handles for the modules, nullptr when no task is assigned. - int deviceId = -1; - DeviceInformation* deviceInfo = gpuTaskAssignments.initDevice(&deviceId); - - // timing enabling - TODO put this in gpu_utils (even though generally this is just option handling?) - bool useTiming = true; - - if (GMX_GPU_CUDA) - { - /* WARNING: CUDA timings are incorrect with multiple streams. - * This is the main reason why they are disabled by default. - */ - // TODO: Consider turning on by default when we can detect nr of streams. - useTiming = (getenv("GMX_ENABLE_GPU_TIMING") != nullptr); - } - else if (GMX_GPU_OPENCL) - { - useTiming = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr); - } - - // TODO Currently this is always built, yet DD partition code - // checks if it is built before using it. Probably it should - // become an MDModule that is made only when another module - // requires it (e.g. pull, CompEl, density fitting), so that we - // don't update the local atom sets unilaterally every step. - LocalAtomSetManager atomSets; - if (ddBuilder) - { - // TODO Pass the GPU streams to ddBuilder to use in buffer - // transfers (e.g. halo exchange) - cr->dd = ddBuilder->build(&atomSets); - // The builder's job is done, so destruct it - ddBuilder.reset(nullptr); - // Note that local state still does not exist yet. - } - // Ensure that all atoms within the same update group are in the - // same periodic image. Otherwise, a simulation that did not use - // update groups (e.g. a single-rank simulation) cannot always be - // correctly restarted in a way that does use update groups - // (e.g. a multi-rank simulation). - if (isSimulationMasterRank) - { - const bool useUpdateGroups = cr->dd ? ddUsesUpdateGroups(*cr->dd) : false; - if (useUpdateGroups) - { - putUpdateGroupAtomsInSamePeriodicImage(*cr->dd, mtop, globalState->box, globalState->x); - } - } - - // The GPU update is decided here because we need to know whether the constraints or - // SETTLEs can span accross the domain borders (i.e. whether or not update groups are - // defined). This is only known after DD is initialized, hence decision on using GPU - // update is done so late. - try - { - const bool useUpdateGroups = cr->dd ? ddUsesUpdateGroups(*cr->dd) : false; - const bool haveFrozenAtoms = inputrecFrozenAtoms(inputrec.get()); - - useGpuForUpdate = decideWhetherToUseGpuForUpdate( - useDomainDecomposition, useUpdateGroups, pmeRunMode, domdecOptions.numPmeRanks > 0, - useGpuForNonbonded, updateTarget, gpusWereDetected, *inputrec, mtop, - doEssentialDynamics, gmx_mtop_ftype_count(mtop, F_ORIRES) > 0, - replExParams.exchangeInterval > 0, haveFrozenAtoms, doRerun, devFlags, mdlog); - } - GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR - - const bool printHostName = (cr->nnodes > 1); - gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode, useGpuForUpdate); - - const bool disableNonbondedCalculation = (getenv("GMX_NO_NONBONDED") != nullptr); - if (disableNonbondedCalculation) - { - /* turn off non-bonded calculations */ - GMX_LOG(mdlog.warning) - .asParagraph() - .appendText( - "Found environment variable GMX_NO_NONBONDED.\n" - "Disabling nonbonded calculations."); - } - - MdrunScheduleWorkload runScheduleWork; - - bool useGpuDirectHalo = decideWhetherToUseGpuForHalo( - devFlags, havePPDomainDecomposition(cr), useGpuForNonbonded, useModularSimulator, - doRerun, EI_ENERGY_MINIMIZATION(inputrec->eI)); - - // Also populates the simulation constant workload description. - runScheduleWork.simulationWork = createSimulationWorkload( - *inputrec, disableNonbondedCalculation, devFlags, useGpuForNonbonded, pmeRunMode, - useGpuForBonded, useGpuForUpdate, useGpuDirectHalo); - - std::unique_ptr deviceStreamManager = nullptr; - - if (deviceInfo != nullptr) - { - if (DOMAINDECOMP(cr) && thisRankHasDuty(cr, DUTY_PP)) - { - dd_setup_dlb_resource_sharing(cr, deviceId); - } - deviceStreamManager = std::make_unique( - *deviceInfo, havePPDomainDecomposition(cr), runScheduleWork.simulationWork, useTiming); - } - - // If the user chose a task assignment, give them some hints - // where appropriate. - if (!userGpuTaskAssignment.empty()) - { - gpuTaskAssignments.logPerformanceHints(mdlog, numDevicesToUse); - } - - if (PAR(cr)) - { - /* After possible communicator splitting in make_dd_communicators. - * we can set up the intra/inter node communication. - */ - gmx_setup_nodecomm(fplog, cr); - } - -#if GMX_MPI - if (isMultiSim(ms)) - { - GMX_LOG(mdlog.warning) - .asParagraph() - .appendTextFormatted( - "This is simulation %d out of %d running as a composite GROMACS\n" - "multi-simulation job. Setup for this simulation:\n", - ms->simulationIndex_, ms->numSimulations_); - } - GMX_LOG(mdlog.warning) - .appendTextFormatted("Using %d MPI %s\n", cr->nnodes, -# if GMX_THREAD_MPI - cr->nnodes == 1 ? "thread" : "threads" -# else - cr->nnodes == 1 ? "process" : "processes" -# endif - ); - fflush(stderr); -#endif - - // If mdrun -pin auto honors any affinity setting that already - // exists. If so, it is nice to provide feedback about whether - // that existing affinity setting was from OpenMP or something - // else, so we run this code both before and after we initialize - // the OpenMP support. - gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo_->nthreads_hw_avail, FALSE); - /* Check and update the number of OpenMP threads requested */ - checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo_, cr, ms, physicalNodeComm.size_, - pmeRunMode, mtop, *inputrec); - - gmx_omp_nthreads_init(mdlog, cr, hwinfo_->nthreads_hw_avail, physicalNodeComm.size_, - hw_opt.nthreads_omp, hw_opt.nthreads_omp_pme, !thisRankHasDuty(cr, DUTY_PP)); - - // Enable FP exception detection, but not in - // Release mode and not for compilers with known buggy FP - // exception support (clang with any optimization) or suspected - // buggy FP exception support (gcc 7.* with optimization). -#if !defined NDEBUG \ - && !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \ - && defined __OPTIMIZE__) - const bool bEnableFPE = true; -#else - const bool bEnableFPE = false; -#endif - // FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run() - if (bEnableFPE) - { - gmx_feenableexcept(); - } - - /* Now that we know the setup is consistent, check for efficiency */ - check_resource_division_efficiency(hwinfo_, gpuTaskAssignments.thisRankHasAnyGpuTask(), - mdrunOptions.ntompOptionIsSet, cr, mdlog); - - /* getting number of PP/PME threads on this MPI / tMPI rank. - PME: env variable should be read only on one node to make sure it is - identical everywhere; - */ - const int numThreadsOnThisRank = thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded) - : gmx_omp_nthreads_get(emntPME); - checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid, *hwinfo_->hardwareTopology, - physicalNodeComm, mdlog); - - // Enable Peer access between GPUs where available - // Only for DD, only master PP rank needs to perform setup, and only if thread MPI plus - // any of the GPU communication features are active. - if (DOMAINDECOMP(cr) && MASTER(cr) && thisRankHasDuty(cr, DUTY_PP) && GMX_THREAD_MPI - && (runScheduleWork.simulationWork.useGpuHaloExchange - || runScheduleWork.simulationWork.useGpuPmePpCommunication)) - { - setupGpuDevicePeerAccess(gpuIdsToUse, mdlog); - } - - if (hw_opt.threadAffinity != ThreadAffinity::Off) - { - /* Before setting affinity, check whether the affinity has changed - * - which indicates that probably the OpenMP library has changed it - * since we first checked). - */ - gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo_->nthreads_hw_avail, TRUE); - - int numThreadsOnThisNode, intraNodeThreadOffset; - analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode, - &intraNodeThreadOffset); - - /* Set the CPU affinity */ - gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo_->hardwareTopology, numThreadsOnThisRank, - numThreadsOnThisNode, intraNodeThreadOffset, nullptr); - } - - if (mdrunOptions.timingOptions.resetStep > -1) - { - GMX_LOG(mdlog.info) - .asParagraph() - .appendText( - "The -resetstep functionality is deprecated, and may be removed in a " - "future version."); - } - wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr); - - if (PAR(cr)) - { - /* Master synchronizes its value of reset_counters with all nodes - * including PME only nodes */ - int64_t reset_counters = wcycle_get_reset_counters(wcycle); - gmx_bcast(sizeof(reset_counters), &reset_counters, cr->mpi_comm_mysim); - wcycle_set_reset_counters(wcycle, reset_counters); - } - - // Membrane embedding must be initialized before we call init_forcerec() - membedHolder.initializeMembed(fplog, filenames.size(), filenames.data(), &mtop, inputrec.get(), - globalState.get(), cr, &mdrunOptions.checkpointOptions.period); - - const bool thisRankHasPmeGpuTask = gpuTaskAssignments.thisRankHasPmeGpuTask(); - std::unique_ptr mdAtoms; - std::unique_ptr vsite; - std::unique_ptr gpuBonded; - - t_nrnb nrnb; - if (thisRankHasDuty(cr, DUTY_PP)) - { - mdModulesNotifier.notify(*cr); - mdModulesNotifier.notify(&atomSets); - mdModulesNotifier.notify(inputrec->pbcType); - mdModulesNotifier.notify(SimulationTimeStep{ inputrec->delta_t }); - /* Initiate forcerecord */ - fr = new t_forcerec; - fr->forceProviders = mdModules_->initForceProviders(); - init_forcerec(fplog, mdlog, fr, inputrec.get(), &mtop, cr, box, - opt2fn("-table", filenames.size(), filenames.data()), - opt2fn("-tablep", filenames.size(), filenames.data()), - opt2fns("-tableb", filenames.size(), filenames.data()), pforce); - // Dirty hack, for fixing disres and orires should be made mdmodules - fr->fcdata->disres = disresdata; - fr->fcdata->orires = oriresdata; - - // Save a handle to device stream manager to use elsewhere in the code - // TODO: Forcerec is not a correct place to store it. - fr->deviceStreamManager = deviceStreamManager.get(); - - if (runScheduleWork.simulationWork.useGpuPmePpCommunication && !thisRankHasDuty(cr, DUTY_PME)) - { - GMX_RELEASE_ASSERT( - deviceStreamManager != nullptr, - "GPU device stream manager should be valid in order to use PME-PP direct " - "communications."); - GMX_RELEASE_ASSERT( - deviceStreamManager->streamIsValid(DeviceStreamType::PmePpTransfer), - "GPU PP-PME stream should be valid in order to use GPU PME-PP direct " - "communications."); - fr->pmePpCommGpu = std::make_unique( - cr->mpi_comm_mysim, cr->dd->pme_nodeid, deviceStreamManager->context(), - deviceStreamManager->stream(DeviceStreamType::PmePpTransfer)); - } - - fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec.get(), fr, cr, *hwinfo_, - runScheduleWork.simulationWork.useGpuNonbonded, - deviceStreamManager.get(), &mtop, box, wcycle); - // TODO: Move the logic below to a GPU bonded builder - if (runScheduleWork.simulationWork.useGpuBonded) - { - GMX_RELEASE_ASSERT(deviceStreamManager != nullptr, - "GPU device stream manager should be valid in order to use GPU " - "version of bonded forces."); - gpuBonded = std::make_unique( - mtop.ffparams, fr->ic->epsfac * fr->fudgeQQ, deviceStreamManager->context(), - deviceStreamManager->bondedStream(havePPDomainDecomposition(cr)), wcycle); - fr->gpuBonded = gpuBonded.get(); - } - - /* Initialize the mdAtoms structure. - * mdAtoms is not filled with atom data, - * as this can not be done now with domain decomposition. - */ - mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask); - if (globalState && thisRankHasPmeGpuTask) - { - // The pinning of coordinates in the global state object works, because we only use - // PME on GPU without DD or on a separate PME rank, and because the local state pointer - // points to the global state object without DD. - // FIXME: MD and EM separately set up the local state - this should happen in the same - // function, which should also perform the pinning. - changePinningPolicy(&globalState->x, pme_get_pinning_policy()); - } - - /* Initialize the virtual site communication */ - vsite = makeVirtualSitesHandler(mtop, cr, fr->pbcType); - - calc_shifts(box, fr->shift_vec); - - /* With periodic molecules the charge groups should be whole at start up - * and the virtual sites should not be far from their proper positions. - */ - if (!inputrec->bContinuation && MASTER(cr) - && !(inputrec->pbcType != PbcType::No && inputrec->bPeriodicMols)) - { - /* Make molecules whole at start of run */ - if (fr->pbcType != PbcType::No) - { - do_pbc_first_mtop(fplog, inputrec->pbcType, box, &mtop, globalState->x.rvec_array()); - } - if (vsite) - { - /* Correct initial vsite positions are required - * for the initial distribution in the domain decomposition - * and for the initial shell prediction. - */ - constructVirtualSitesGlobal(mtop, globalState->x); - } - } - - if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype)) - { - ewaldcoeff_q = fr->ic->ewaldcoeff_q; - ewaldcoeff_lj = fr->ic->ewaldcoeff_lj; - } - } - else - { - /* This is a PME only node */ - - GMX_ASSERT(globalState == nullptr, - "We don't need the state on a PME only rank and expect it to be unitialized"); - - ewaldcoeff_q = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol); - ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj); - } - - gmx_pme_t* sepPmeData = nullptr; - // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks - GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr), - "Double-checking that only PME-only ranks have no forcerec"); - gmx_pme_t*& pmedata = fr ? fr->pmedata : sepPmeData; - - // TODO should live in ewald module once its testing is improved - // - // Later, this program could contain kernels that might be later - // re-used as auto-tuning progresses, or subsequent simulations - // are invoked. - PmeGpuProgramStorage pmeGpuProgram; - if (thisRankHasPmeGpuTask) - { - GMX_RELEASE_ASSERT( - (deviceStreamManager != nullptr), - "GPU device stream manager should be initialized in order to use GPU for PME."); - GMX_RELEASE_ASSERT((deviceInfo != nullptr), - "GPU device should be initialized in order to use GPU for PME."); - pmeGpuProgram = buildPmeGpuProgram(deviceStreamManager->context()); - } - - /* Initiate PME if necessary, - * either on all nodes or on dedicated PME nodes only. */ - if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)) - { - if (mdAtoms && mdAtoms->mdatoms()) - { - nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed; - if (EVDW_PME(inputrec->vdwtype)) - { - nTypePerturbed = mdAtoms->mdatoms()->nTypePerturbed; - } - } - if (cr->npmenodes > 0) - { - /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/ - gmx_bcast(sizeof(nChargePerturbed), &nChargePerturbed, cr->mpi_comm_mysim); - gmx_bcast(sizeof(nTypePerturbed), &nTypePerturbed, cr->mpi_comm_mysim); - } - - if (thisRankHasDuty(cr, DUTY_PME)) - { - try - { - // TODO: This should be in the builder. - GMX_RELEASE_ASSERT(!runScheduleWork.simulationWork.useGpuPme - || (deviceStreamManager != nullptr), - "Device stream manager should be valid in order to use GPU " - "version of PME."); - GMX_RELEASE_ASSERT( - !runScheduleWork.simulationWork.useGpuPme - || deviceStreamManager->streamIsValid(DeviceStreamType::Pme), - "GPU PME stream should be valid in order to use GPU version of PME."); - - const DeviceContext* deviceContext = runScheduleWork.simulationWork.useGpuPme - ? &deviceStreamManager->context() - : nullptr; - const DeviceStream* pmeStream = - runScheduleWork.simulationWork.useGpuPme - ? &deviceStreamManager->stream(DeviceStreamType::Pme) - : nullptr; - - pmedata = gmx_pme_init(cr, getNumPmeDomains(cr->dd), inputrec.get(), - nChargePerturbed != 0, nTypePerturbed != 0, - mdrunOptions.reproducible, ewaldcoeff_q, ewaldcoeff_lj, - gmx_omp_nthreads_get(emntPME), pmeRunMode, nullptr, - deviceContext, pmeStream, pmeGpuProgram.get(), mdlog); - } - GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR - } - } - - - if (EI_DYNAMICS(inputrec->eI)) - { - /* Turn on signal handling on all nodes */ - /* - * (A user signal from the PME nodes (if any) - * is communicated to the PP nodes. - */ - signal_handler_install(); - } - - pull_t* pull_work = nullptr; - if (thisRankHasDuty(cr, DUTY_PP)) - { - /* Assumes uniform use of the number of OpenMP threads */ - walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault)); - - if (inputrec->bPull) - { - /* Initialize pull code */ - pull_work = init_pull(fplog, inputrec->pull.get(), inputrec.get(), &mtop, cr, &atomSets, - inputrec->fepvals->init_lambda); - if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage) - { - initPullHistory(pull_work, &observablesHistory); - } - if (EI_DYNAMICS(inputrec->eI) && MASTER(cr)) - { - init_pull_output_files(pull_work, filenames.size(), filenames.data(), oenv, startingBehavior); - } - } - - std::unique_ptr enforcedRotation; - if (inputrec->bRot) - { - /* Initialize enforced rotation code */ - enforcedRotation = init_rot(fplog, inputrec.get(), filenames.size(), filenames.data(), - cr, &atomSets, globalState.get(), &mtop, oenv, mdrunOptions, - startingBehavior); - } - - t_swap* swap = nullptr; - if (inputrec->eSwapCoords != eswapNO) - { - /* Initialize ion swapping code */ - swap = init_swapcoords(fplog, inputrec.get(), - opt2fn_master("-swap", filenames.size(), filenames.data(), cr), - &mtop, globalState.get(), &observablesHistory, cr, &atomSets, - oenv, mdrunOptions, startingBehavior); - } - - /* Let makeConstraints know whether we have essential dynamics constraints. */ - auto constr = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics, fplog, cr, - ms, &nrnb, wcycle, fr->bMolPBC); - - /* Energy terms and groups */ - gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(), - inputrec->fepvals->n_lambda); - - // cos acceleration is only supported by md, but older tpr - // files might still combine it with other integrators - GMX_RELEASE_ASSERT(inputrec->cos_accel == 0.0 || inputrec->eI == eiMD, - "cos_acceleration is only supported by integrator=md"); - - /* Kinetic energy data */ - gmx_ekindata_t ekind; - init_ekindata(fplog, &mtop, &(inputrec->opts), &ekind, inputrec->cos_accel); - - /* Set up interactive MD (IMD) */ - auto imdSession = - makeImdSession(inputrec.get(), cr, wcycle, &enerd, ms, &mtop, mdlog, - MASTER(cr) ? globalState->x.rvec_array() : nullptr, filenames.size(), - filenames.data(), oenv, mdrunOptions.imdOptions, startingBehavior); - - if (DOMAINDECOMP(cr)) - { - GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP"); - /* This call is not included in init_domain_decomposition mainly - * because fr->cginfo_mb is set later. - */ - dd_init_bondeds(fplog, cr->dd, mtop, vsite.get(), inputrec.get(), - domdecOptions.checkBondedInteractions, fr->cginfo_mb); - } - - if (runScheduleWork.simulationWork.useGpuBufferOps) - { - fr->gpuForceReduction[gmx::AtomLocality::Local] = std::make_unique( - deviceStreamManager->context(), - deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedLocal), wcycle); - fr->gpuForceReduction[gmx::AtomLocality::NonLocal] = std::make_unique( - deviceStreamManager->context(), - deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedNonLocal), wcycle); - } - - std::unique_ptr stateGpu; - if (gpusWereDetected - && ((runScheduleWork.simulationWork.useGpuPme && thisRankHasDuty(cr, DUTY_PME)) - || runScheduleWork.simulationWork.useGpuBufferOps)) - { - GpuApiCallBehavior transferKind = (inputrec->eI == eiMD && !doRerun && !useModularSimulator) - ? GpuApiCallBehavior::Async - : GpuApiCallBehavior::Sync; - GMX_RELEASE_ASSERT(deviceStreamManager != nullptr, - "GPU device stream manager should be initialized to use GPU."); - stateGpu = std::make_unique( - *deviceStreamManager, transferKind, pme_gpu_get_block_size(fr->pmedata), wcycle); - fr->stateGpu = stateGpu.get(); - } - - GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator."); - SimulatorBuilder simulatorBuilder; - - simulatorBuilder.add(SimulatorStateData(globalState.get(), &observablesHistory, &enerd, &ekind)); - simulatorBuilder.add(std::move(membedHolder)); - simulatorBuilder.add(std::move(stopHandlerBuilder_)); - simulatorBuilder.add(SimulatorConfig(mdrunOptions, startingBehavior, &runScheduleWork)); - - - simulatorBuilder.add(SimulatorEnv(fplog, cr, ms, mdlog, oenv)); - simulatorBuilder.add(Profiling(&nrnb, walltime_accounting, wcycle)); - simulatorBuilder.add(ConstraintsParam( - constr.get(), enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr, - vsite.get())); - // TODO: Separate `fr` to a separate add, and make the `build` handle the coupling sensibly. - simulatorBuilder.add(LegacyInput(static_cast(filenames.size()), filenames.data(), - inputrec.get(), fr)); - simulatorBuilder.add(ReplicaExchangeParameters(replExParams)); - simulatorBuilder.add(InteractiveMD(imdSession.get())); - simulatorBuilder.add(SimulatorModules(mdModules_->outputProvider(), mdModules_->notifier())); - simulatorBuilder.add(CenterOfMassPulling(pull_work)); - // Todo move to an MDModule - simulatorBuilder.add(IonSwapping(swap)); - simulatorBuilder.add(TopologyData(&mtop, mdAtoms.get())); - simulatorBuilder.add(BoxDeformationHandle(deform.get())); - simulatorBuilder.add(std::move(modularSimulatorCheckpointData)); - - // build and run simulator object based on user-input - auto simulator = simulatorBuilder.build(useModularSimulator); - simulator->run(); - - if (fr->pmePpCommGpu) - { - // destroy object since it is no longer required. (This needs to be done while the GPU context still exists.) - fr->pmePpCommGpu.reset(); - } - - if (inputrec->bPull) - { - finish_pull(pull_work); - } - finish_swapcoords(swap); - } - else - { - GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP"); - /* do PME only */ - walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME)); - gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec.get(), pmeRunMode, - deviceStreamManager.get()); - } - - wallcycle_stop(wcycle, ewcRUN); - - /* Finish up, write some stuff - * if rerunMD, don't write last frame again - */ - finish_run(fplog, mdlog, cr, inputrec.get(), &nrnb, wcycle, walltime_accounting, - fr ? fr->nbv.get() : nullptr, pmedata, EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms)); - - // clean up cycle counter - wallcycle_destroy(wcycle); - - deviceStreamManager.reset(nullptr); - // Free PME data - if (pmedata) - { - gmx_pme_destroy(pmedata); - pmedata = nullptr; - } - - // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x, - // before we destroy the GPU context(s) - // Pinned buffers are associated with contexts in CUDA. - // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go. - mdAtoms.reset(nullptr); - globalState.reset(nullptr); - mdModules_.reset(nullptr); // destruct force providers here as they might also use the GPU - gpuBonded.reset(nullptr); - /* Free pinned buffers in *fr */ - delete fr; - fr = nullptr; - // TODO convert to C++ so we can get rid of these frees - sfree(disresdata); - sfree(oriresdata); - - if (!hwinfo_->deviceInfoList.empty()) - { - /* stop the GPU profiler (only CUDA) */ - stopGpuProfiler(); - } - - /* With tMPI we need to wait for all ranks to finish deallocation before - * destroying the CUDA context as some tMPI ranks may be sharing - * GPU and context. - * - * This is not a concern in OpenCL where we use one context per rank. - * - * Note: it is safe to not call the barrier on the ranks which do not use GPU, - * but it is easier and more futureproof to call it on the whole node. - * - * Note that this function needs to be called even if GPUs are not used - * in this run because the PME ranks have no knowledge of whether GPUs - * are used or not, but all ranks need to enter the barrier below. - * \todo Remove this physical node barrier after making sure - * that it's not needed anymore (with a shared GPU run). - */ - if (GMX_THREAD_MPI) - { - physicalNodeComm.barrier(); - } - releaseDevice(deviceInfo); - - /* Does what it says */ - print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime()); - walltime_accounting_destroy(walltime_accounting); - - // Ensure log file content is written - if (logFileHandle) - { - gmx_fio_flush(logFileHandle); - } - - /* Reset FPEs (important for unit tests) by disabling them. Assumes no - * exceptions were enabled before function was called. */ - if (bEnableFPE) - { - gmx_fedisableexcept(); - } - - auto rc = static_cast(gmx_get_stop_condition()); - -#if GMX_THREAD_MPI - /* we need to join all threads. The sub-threads join when they - exit this function, but the master thread needs to be told to - wait for that. */ - if (MASTER(cr)) - { - tMPI_Finalize(); - } -#endif - return rc; -} // namespace gmx - -Mdrunner::~Mdrunner() -{ - // Clean up of the Manager. - // This will end up getting called on every thread-MPI rank, which is unnecessary, - // but okay as long as threads synchronize some time before adding or accessing - // a new set of restraints. - if (restraintManager_) - { - restraintManager_->clear(); - GMX_ASSERT(restraintManager_->countRestraints() == 0, - "restraints added during runner life time should be cleared at runner " - "destruction."); - } -}; - -void Mdrunner::addPotential(std::shared_ptr puller, const std::string& name) -{ - GMX_ASSERT(restraintManager_, "Mdrunner must have a restraint manager."); - // Not sure if this should be logged through the md logger or something else, - // but it is helpful to have some sort of INFO level message sent somewhere. - // std::cout << "Registering restraint named " << name << std::endl; - - // When multiple restraints are used, it may be wasteful to register them separately. - // Maybe instead register an entire Restraint Manager as a force provider. - restraintManager_->addToSpec(std::move(puller), name); -} - -Mdrunner::Mdrunner(std::unique_ptr mdModules) : mdModules_(std::move(mdModules)) {} - -Mdrunner::Mdrunner(Mdrunner&&) noexcept = default; - -//NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265 in CentOS 7 -Mdrunner& Mdrunner::operator=(Mdrunner&& /*handle*/) noexcept(BUGFREE_NOEXCEPT_STRING) = default; - -class Mdrunner::BuilderImplementation -{ -public: - BuilderImplementation() = delete; - BuilderImplementation(std::unique_ptr mdModules, compat::not_null context); - ~BuilderImplementation(); - - BuilderImplementation& setExtraMdrunOptions(const MdrunOptions& options, - real forceWarningThreshold, - StartingBehavior startingBehavior); - - void addHardwareDetectionResult(const gmx_hw_info_t* hwinfo); - - void addDomdec(const DomdecOptions& options); - - void addInput(SimulationInputHandle inputHolder); - - void addVerletList(int nstlist); - - void addReplicaExchange(const ReplicaExchangeParameters& params); - - void addNonBonded(const char* nbpu_opt); - - void addPME(const char* pme_opt_, const char* pme_fft_opt_); - - void addBondedTaskAssignment(const char* bonded_opt); - - void addUpdateTaskAssignment(const char* update_opt); - - void addHardwareOptions(const gmx_hw_opt_t& hardwareOptions); - - void addFilenames(ArrayRef filenames); - - void addOutputEnvironment(gmx_output_env_t* outputEnvironment); - - void addLogFile(t_fileio* logFileHandle); - - void addStopHandlerBuilder(std::unique_ptr builder); - - Mdrunner build(); - -private: - // Default parameters copied from runner.h - // \todo Clarify source(s) of default parameters. - - const char* nbpu_opt_ = nullptr; - const char* pme_opt_ = nullptr; - const char* pme_fft_opt_ = nullptr; - const char* bonded_opt_ = nullptr; - const char* update_opt_ = nullptr; - - MdrunOptions mdrunOptions_; - - DomdecOptions domdecOptions_; - - ReplicaExchangeParameters replicaExchangeParameters_; - - //! Command-line override for the duration of a neighbor list with the Verlet scheme. - int nstlist_ = 0; - - //! World communicator, used for hardware detection and task assignment - MPI_Comm libraryWorldCommunicator_ = MPI_COMM_NULL; - - //! Multisim communicator handle. - gmx_multisim_t* multiSimulation_; - - //! mdrun communicator - MPI_Comm simulationCommunicator_ = MPI_COMM_NULL; - - //! Print a warning if any force is larger than this (in kJ/mol nm). - real forceWarningThreshold_ = -1; - - //! Whether the simulation will start afresh, or restart with/without appending. - StartingBehavior startingBehavior_ = StartingBehavior::NewSimulation; - - //! The modules that comprise the functionality of mdrun. - std::unique_ptr mdModules_; - - //! Detected hardware. - const gmx_hw_info_t* hwinfo_ = nullptr; - - //! \brief Parallelism information. - gmx_hw_opt_t hardwareOptions_; - - //! filename options for simulation. - ArrayRef filenames_; - - /*! \brief Handle to output environment. - * - * \todo gmx_output_env_t needs lifetime management. - */ - gmx_output_env_t* outputEnvironment_ = nullptr; - - /*! \brief Non-owning handle to MD log file. - * - * \todo Context should own output facilities for client. - * \todo Improve log file handle management. - * \internal - * Code managing the FILE* relies on the ability to set it to - * nullptr to check whether the filehandle is valid. - */ - t_fileio* logFileHandle_ = nullptr; - - /*! - * \brief Builder for simulation stop signal handler. - */ - std::unique_ptr stopHandlerBuilder_ = nullptr; - - /*! - * \brief Sources for initial simulation state. - * - * See issue #3652 for near-term refinements to the SimulationInput interface. - * - * See issue #3379 for broader discussion on API aspects of simulation inputs and outputs. - */ - SimulationInputHandle inputHolder_; -}; - -Mdrunner::BuilderImplementation::BuilderImplementation(std::unique_ptr mdModules, - compat::not_null context) : - mdModules_(std::move(mdModules)) -{ - libraryWorldCommunicator_ = context->libraryWorldCommunicator_; - simulationCommunicator_ = context->simulationCommunicator_; - multiSimulation_ = context->multiSimulation_.get(); -} - -Mdrunner::BuilderImplementation::~BuilderImplementation() = default; - -Mdrunner::BuilderImplementation& -Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions& options, - const real forceWarningThreshold, - const StartingBehavior startingBehavior) -{ - mdrunOptions_ = options; - forceWarningThreshold_ = forceWarningThreshold; - startingBehavior_ = startingBehavior; - return *this; -} - -void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions& options) -{ - domdecOptions_ = options; -} - -void Mdrunner::BuilderImplementation::addVerletList(int nstlist) -{ - nstlist_ = nstlist; -} - -void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters& params) -{ - replicaExchangeParameters_ = params; -} - -Mdrunner Mdrunner::BuilderImplementation::build() -{ - auto newRunner = Mdrunner(std::move(mdModules_)); - - newRunner.mdrunOptions = mdrunOptions_; - newRunner.pforce = forceWarningThreshold_; - newRunner.startingBehavior = startingBehavior_; - newRunner.domdecOptions = domdecOptions_; - - // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid - newRunner.hw_opt = hardwareOptions_; - - // No invariant to check. This parameter exists to optionally override other behavior. - newRunner.nstlist_cmdline = nstlist_; - - newRunner.replExParams = replicaExchangeParameters_; - - newRunner.filenames = filenames_; - - newRunner.libraryWorldCommunicator = libraryWorldCommunicator_; - - newRunner.simulationCommunicator = simulationCommunicator_; - - // nullptr is a valid value for the multisim handle - newRunner.ms = multiSimulation_; - - if (hwinfo_) - { - newRunner.hwinfo_ = hwinfo_; - } - else - { - GMX_THROW(gmx::APIError( - "MdrunnerBuilder::addHardwareDetectionResult() is required before build()")); - } - - if (inputHolder_) - { - newRunner.inputHolder_ = std::move(inputHolder_); - } - else - { - GMX_THROW(gmx::APIError("MdrunnerBuilder::addInput() is required before build().")); - } - - // \todo Clarify ownership and lifetime management for gmx_output_env_t - // \todo Update sanity checking when output environment has clearly specified invariants. - // Initialization and default values for oenv are not well specified in the current version. - if (outputEnvironment_) - { - newRunner.oenv = outputEnvironment_; - } - else - { - GMX_THROW(gmx::APIError( - "MdrunnerBuilder::addOutputEnvironment() is required before build()")); - } - - newRunner.logFileHandle = logFileHandle_; - - if (nbpu_opt_) - { - newRunner.nbpu_opt = nbpu_opt_; - } - else - { - GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()")); - } - - if (pme_opt_ && pme_fft_opt_) - { - newRunner.pme_opt = pme_opt_; - newRunner.pme_fft_opt = pme_fft_opt_; - } - else - { - GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()")); - } - - if (bonded_opt_) - { - newRunner.bonded_opt = bonded_opt_; - } - else - { - GMX_THROW(gmx::APIError( - "MdrunnerBuilder::addBondedTaskAssignment() is required before build()")); - } - - if (update_opt_) - { - newRunner.update_opt = update_opt_; - } - else - { - GMX_THROW(gmx::APIError( - "MdrunnerBuilder::addUpdateTaskAssignment() is required before build() ")); - } - - - newRunner.restraintManager_ = std::make_unique(); - - if (stopHandlerBuilder_) - { - newRunner.stopHandlerBuilder_ = std::move(stopHandlerBuilder_); - } - else - { - newRunner.stopHandlerBuilder_ = std::make_unique(); - } - - return newRunner; -} - -void Mdrunner::BuilderImplementation::addHardwareDetectionResult(const gmx_hw_info_t* hwinfo) -{ - hwinfo_ = hwinfo; -} - -void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt) -{ - nbpu_opt_ = nbpu_opt; -} - -void Mdrunner::BuilderImplementation::addPME(const char* pme_opt, const char* pme_fft_opt) -{ - pme_opt_ = pme_opt; - pme_fft_opt_ = pme_fft_opt; -} - -void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt) -{ - bonded_opt_ = bonded_opt; -} - -void Mdrunner::BuilderImplementation::addUpdateTaskAssignment(const char* update_opt) -{ - update_opt_ = update_opt; -} - -void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t& hardwareOptions) -{ - hardwareOptions_ = hardwareOptions; -} - -void Mdrunner::BuilderImplementation::addFilenames(ArrayRef filenames) -{ - filenames_ = filenames; -} - -void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment) -{ - outputEnvironment_ = outputEnvironment; -} - -void Mdrunner::BuilderImplementation::addLogFile(t_fileio* logFileHandle) -{ - logFileHandle_ = logFileHandle; -} - -void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr builder) -{ - stopHandlerBuilder_ = std::move(builder); -} - -void Mdrunner::BuilderImplementation::addInput(SimulationInputHandle inputHolder) -{ - inputHolder_ = std::move(inputHolder); -} - -MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr mdModules, - compat::not_null context) : - impl_{ std::make_unique(std::move(mdModules), context) } -{ -} - -MdrunnerBuilder::~MdrunnerBuilder() = default; - -MdrunnerBuilder& MdrunnerBuilder::addHardwareDetectionResult(const gmx_hw_info_t* hwinfo) -{ - impl_->addHardwareDetectionResult(hwinfo); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addSimulationMethod(const MdrunOptions& options, - real forceWarningThreshold, - const StartingBehavior startingBehavior) -{ - impl_->setExtraMdrunOptions(options, forceWarningThreshold, startingBehavior); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addDomainDecomposition(const DomdecOptions& options) -{ - impl_->addDomdec(options); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addNeighborList(int nstlist) -{ - impl_->addVerletList(nstlist); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters& params) -{ - impl_->addReplicaExchange(params); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addNonBonded(const char* nbpu_opt) -{ - impl_->addNonBonded(nbpu_opt); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addElectrostatics(const char* pme_opt, const char* pme_fft_opt) -{ - // The builder method may become more general in the future, but in this version, - // parameters for PME electrostatics are both required and the only parameters - // available. - if (pme_opt && pme_fft_opt) - { - impl_->addPME(pme_opt, pme_fft_opt); - } - else - { - GMX_THROW( - gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers.")); - } - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt) -{ - impl_->addBondedTaskAssignment(bonded_opt); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addUpdateTaskAssignment(const char* update_opt) -{ - impl_->addUpdateTaskAssignment(update_opt); - return *this; -} - -Mdrunner MdrunnerBuilder::build() -{ - return impl_->build(); -} - -MdrunnerBuilder& MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t& hardwareOptions) -{ - impl_->addHardwareOptions(hardwareOptions); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addFilenames(ArrayRef filenames) -{ - impl_->addFilenames(filenames); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment) -{ - impl_->addOutputEnvironment(outputEnvironment); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addLogFile(t_fileio* logFileHandle) -{ - impl_->addLogFile(logFileHandle); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addStopHandlerBuilder(std::unique_ptr builder) -{ - impl_->addStopHandlerBuilder(std::move(builder)); - return *this; -} - -MdrunnerBuilder& MdrunnerBuilder::addInput(SimulationInputHandle input) -{ - impl_->addInput(std::move(input)); - return *this; -} - -MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder&&) noexcept = default; - -MdrunnerBuilder& MdrunnerBuilder::operator=(MdrunnerBuilder&&) noexcept = default; - -} // namespace gmx