diff --git a/.travis.yml b/.travis.yml index ec3ddc5..e292727 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,24 +9,54 @@ addons: - valgrind cache: directories: - - .travis_helpers/ulfm-install + - ulfm-install before_install: - - cd .travis_helpers - - source ./fetchULFMmpi.sh - - cd ../ #Always end back at the root directory + - echo "Configuring ULFM" + - if [ -f ulfm-install/lib/libmpi.so ]; then + echo "libmpi.so found -- nothing to build."; + cd ulfm-install; + else + ROOT=`pwd`; + mkdir ulfm-install; + echo "Downloading ULFM from repo"; + git clone --recursive https://bitbucket.org/icldistcomp/ulfm2.git ulfm-src/; + echo " - Configuring and building ULFM."; + cd ulfm-src; + echo " - Running autogen.pl"; + ./autogen.pl >../ulfm-install/ulfm_build_output.txt 2>&1; + echo " - Running configure"; + ./configure --prefix=$ROOT/ulfm-install >>../ulfm-install/ulfm_build_output.txt 2>&1; + echo " - Running make"; + make -j4 >>../ulfm-install/ulfm_build_output.txt 2>&1; + echo " - Running make install"; + make install >>../ulfm-install/ulfm_build_output.txt 2>&1; + echo " - Finished installing ULFM"; + cd ../ulfm-install/; + fi + + #Expect that any changes to the above still puts me in the install's home dir + - export MPI_HOME=`pwd` + - export PATH=$MPI_HOME/bin/:$PATH + - export LD_LIBRARY_PATH=$MPI_HOME/lib:$LD_LIBRARY_PATH + - export DYLD_LIBRARY_PATH=$MPI_HOME/lib:$DYLD_LIBRARY_PATH + - export MANPATH=$MPI_HOME/share/man:$MANPATH + + - export MPICC="`which mpicc`" + - export MPICXX="`which mpic++`" + + #Allow oversubscription for tests, since we're potentially single core + - export OMPI_MCA_rmaps_base_oversubscribe=1 + + - tail -n50 ./ulfm_build_output.txt + - cd ../ #End back at root +install: + - mkdir build && cd build + - cmake ../ -DBUILD_TESTING=ON && make -j4 VERBOSE=1 script: - - cd .travis_helpers - - source fetchULFMmpi.sh #Just updates path if ULFM was built properly in before_install - - cd ../ - - mkdir build - - cd build - - cmake ../ -DBUILD_TESTING=ON - - make -j4 VERBOSE=1 - make test - - cd ../ #Always end back at the root directory. +after_success: + - echo "Success, printing run logs:" + - cat Testing/Temporary/LastTest.log after_failure: - echo "Failure occured, printing run logs:" - - pwd - - cat build/Testing/Temporary/LastTest.log - - echo "Printing ULFM build log tail. If no output, ULFM was built before this test run" - - tail -n100 .travis_helpers/build_output.txt + - cat Testing/Temporary/LastTest.log diff --git a/.travis_helpers/fetchULFMmpi.sh b/.travis_helpers/fetchULFMmpi.sh deleted file mode 100644 index a5611cc..0000000 --- a/.travis_helpers/fetchULFMmpi.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -if [ -f ulfm-install/lib/libmpi.so ]; then - echo "libmpich.so found -- nothing to build." - cd ulfm-install -else - ROOT=`pwd` - echo "Downloading ULFM from repo" - wget https://bitbucket.org/icldistcomp/ulfm2/get/ulfm2.0rc.tar.bz2 - tar -xjf ulfm2.0rc.tar.bz2 - mv icldist* ulfm-src/ - echo " - Configuring and building ULFM." - cd ulfm-src - echo " - Running autogen.pl" - ./autogen.pl > ../build_output.txt - echo " - Running configure" - ./configure --prefix=$ROOT/ulfm-install >> ../build_output.txt - echo " - Running make" - make -j4 >> ../build_output.txt - echo " - Running make install" - make install >> ../build_output.txt - echo " - Finished installing ULFM" - cd ../ulfm-install/ -fi - -#Expect that any changes to the above still puts me in the install's home dir -export MPI_HOME=`pwd` -export PATH=$MPI_HOME/bin/:$PATH -export LD_LIBRARY_PATH=$MPI_HOME/lib:$LD_LIBRARY_PATH -export DYLD_LIBRARY_PATH=$MPI_HOME/lib:$DYLD_LIBRARY_PATH -export MANPATH=$MPI_HOME/share/man:$MANPATH - -export MPICC="`which mpicc`" -export MPICXX="`which mpic++`" - -#Assuming the install's home dir is one above current. -cd ../ diff --git a/CMakeLists.txt b/CMakeLists.txt index e90822f..b866e11 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,8 +25,8 @@ set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) -set(CMAKE_BUILD_TYPE Release) -#set(CMAKE_BUILD_TYPE Debug) +#set(CMAKE_BUILD_TYPE Release) +set(CMAKE_BUILD_TYPE Debug) #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -O0 -ggdb") #ENABLE_TESTING @@ -109,4 +109,7 @@ if(BUILD_TESTING) add_subdirectory(test/subset_internal) add_subdirectory(test/subset_merging) add_subdirectory(test/request_tracking) + add_subdirectory(test/request_cancelled) + add_subdirectory(test/no_jump) + add_subdirectory(test/issend) endif() diff --git a/examples/01_hello_world/fenix/CMakeLists.txt b/examples/01_hello_world/fenix/CMakeLists.txt index 22658d2..df8d7a1 100644 --- a/examples/01_hello_world/fenix/CMakeLists.txt +++ b/examples/01_hello_world/fenix/CMakeLists.txt @@ -16,5 +16,5 @@ if(BUILD_TESTING) add_executable(fenix_hello_world-debug fenix_hello_world.c) target_link_libraries(fenix_hello_world-debug fenix ${MPI_C_LIBRARIES}) add_test(NAME hello_world - COMMAND mpirun --oversubscribe -np 3 fenix_hello_world-debug "1") + COMMAND mpirun -mca mpi_ft_detector_timeout 1 -np 3 fenix_hello_world-debug "1") endif() diff --git a/examples/01_hello_world/fenix/fenix_hello_world.c b/examples/01_hello_world/fenix/fenix_hello_world.c index 374a80f..9008ee7 100644 --- a/examples/01_hello_world/fenix/fenix_hello_world.c +++ b/examples/01_hello_world/fenix/fenix_hello_world.c @@ -65,7 +65,6 @@ const int kKillID = 1; int main(int argc, char **argv) { -#warning "It's a good idea to complain when not enough parameters! Should add this code to other examples too." if (argc < 2) { printf("Usage: %s <# spare ranks> \n", *argv); exit(0); @@ -108,6 +107,19 @@ int main(int argc, char **argv) { printf("hello world: %s, old rank (MPI_COMM_WORLD): %d, new rank: %d, active ranks: %d, ranks before process failure: %d\n", processor_name, old_rank, new_rank, new_world_size, old_world_size); + + int *fails, num_fails; + num_fails = Fenix_Process_fail_list(&fails); + + char fails_str[100]; + sprintf(fails_str, "Rank %d sees failed processes [", new_rank); + for(int i = 0; i < num_fails; i++){ + sprintf(fails_str, "%s%s%d", fails_str, (i==0 ? "" : ", "), fails[i]); + } + sprintf(fails_str, "%s]\n", fails_str); + printf(fails_str); + + Fenix_Finalize(); MPI_Finalize(); diff --git a/examples/02_send_recv/fenix/CMakeLists.txt b/examples/02_send_recv/fenix/CMakeLists.txt index f3f197f..78b07d5 100644 --- a/examples/02_send_recv/fenix/CMakeLists.txt +++ b/examples/02_send_recv/fenix/CMakeLists.txt @@ -16,7 +16,7 @@ if(BUILD_TESTING) add_executable(fenix_ring-debug fenix_ring.c) target_link_libraries(fenix_ring-debug fenix ${MPI_C_LIBRARIES}) add_test(NAME ring - COMMAND mpirun --oversubscribe -np 5 fenix_ring-debug 1 2) + COMMAND mpirun -mca mpi_ft_detector_timeout 1 -np 5 fenix_ring-debug 1 2) set_tests_properties(ring PROPERTIES FAIL_REGULAR_EXPRESSION "FAILURE") endif() diff --git a/examples/05_subset_create/CMakeLists.txt b/examples/05_subset_create/CMakeLists.txt index c8d37ee..10d9864 100644 --- a/examples/05_subset_create/CMakeLists.txt +++ b/examples/05_subset_create/CMakeLists.txt @@ -16,7 +16,7 @@ if(BUILD_TESTING) add_executable(fenix_subset_create-debug subset_create.c) target_link_libraries(fenix_subset_create-debug fenix ${MPI_C_LIBRARIES}) add_test(NAME subset_create - COMMAND mpirun -np 5 --oversubscribe fenix_subset_create-debug 1) + COMMAND mpirun -mca mpi_ft_detector_timeout 1 -np 5 fenix_subset_create-debug 1) set_tests_properties(subset_create PROPERTIES FAIL_REGULAR_EXPRESSION "FAILURE") endif() diff --git a/examples/05_subset_create/subset_create.c b/examples/05_subset_create/subset_create.c index fcd0624..c819318 100644 --- a/examples/05_subset_create/subset_create.c +++ b/examples/05_subset_create/subset_create.c @@ -73,6 +73,11 @@ fprintf(stderr, "Started\n"); int subset[500]; MPI_Status status; + if (argc < 2) { + printf("Usage: %s <# spare ranks> \n", *argv); + exit(0); + } + int fenix_role; MPI_Comm world_comm; MPI_Comm new_comm; diff --git a/examples/06_subset_createv/CMakeLists.txt b/examples/06_subset_createv/CMakeLists.txt index 0cc4a5a..72112eb 100644 --- a/examples/06_subset_createv/CMakeLists.txt +++ b/examples/06_subset_createv/CMakeLists.txt @@ -16,7 +16,7 @@ if(BUILD_TESTING) add_executable(fenix_subset_createv-debug subset_createv.c) target_link_libraries(fenix_subset_createv-debug fenix ${MPI_C_LIBRARIES}) add_test(NAME subset_createv - COMMAND mpirun -np 5 --oversubscribe fenix_subset_createv-debug 1) + COMMAND mpirun -mca mpi_ft_detector_timeout 1 -np 5 fenix_subset_createv-debug 1) set_tests_properties(subset_createv PROPERTIES FAIL_REGULAR_EXPRESSION "FAILURE") endif() diff --git a/examples/06_subset_createv/subset_createv.c b/examples/06_subset_createv/subset_createv.c index e1a8631..182dd59 100644 --- a/examples/06_subset_createv/subset_createv.c +++ b/examples/06_subset_createv/subset_createv.c @@ -73,6 +73,11 @@ int main(int argc, char **argv) { int subset[1000]; MPI_Status status; + if (argc < 2) { + printf("Usage: %s <# spare ranks> \n", *argv); + exit(0); + } + int fenix_role; MPI_Comm world_comm; MPI_Comm new_comm; diff --git a/include/fenix.h b/include/fenix.h index 94d1130..7a1e382 100644 --- a/include/fenix.h +++ b/include/fenix.h @@ -88,6 +88,7 @@ extern "C" { #define FENIX_ERROR_SUBSET_STRIDE -25 #define FENIX_ERROR_NODATA_FOUND -30 #define FENIX_ERROR_INTERN -40 +#define FENIX_ERROR_CANCELLED -50 #define FENIX_WARNING_SPARE_RANKS_DEPLETED 100 #define FENIX_WARNING_PARTIAL_RESTORE 101 @@ -216,6 +217,10 @@ int Fenix_Data_group_delete(int group_id); int Fenix_Data_member_delete(int group_id, int member_id); +int Fenix_Process_fail_list(int** fail_list); + +int Fenix_check_cancelled(MPI_Request *request, MPI_Status *status); + #if defined(c_plusplus) || defined(__cplusplus) } #endif diff --git a/include/fenix_ext.h b/include/fenix_ext.h index 9e92454..785a108 100644 --- a/include/fenix_ext.h +++ b/include/fenix_ext.h @@ -62,7 +62,6 @@ #include "fenix_opt.h" #include "fenix_data_group.h" #include "fenix_process_recovery.h" -#include "fenix_request_store.h" typedef struct { int num_inital_ranks; // Keeps the global MPI rank ID at Fenix_init @@ -71,7 +70,6 @@ typedef struct { int resume_mode; // Defines how program resumes after process recovery int spawn_policy; // Indicate dynamic process spawning int spare_ranks; // Spare ranks entered by user to repair failed ranks - int replace_comm_flag; // Internal global variable to describe the status of MPI communicator int repair_result; // Internal global variable to store the result of MPI communicator repair int finalized; jmp_buf *recover_environment; // Calling environment to fill the jmp_buf structure @@ -81,17 +79,28 @@ typedef struct { int role; // Role of rank: initial, survivor or repair int fenix_init_flag; - fenix_request_store_t request_store; + int fail_world_size; + int* fail_world; + + //Save the pointer to role and error of Fenix_Init + int *ret_role; + int *ret_error; fenix_callback_list_t* callback_list; // singly linked list for user-defined Fenix callback functions //fenix_communicator_list_t* communicator_list; // singly linked list for Fenix resilient communicators fenix_debug_opt_t options; // This is reserved to store the user options - MPI_Comm *world; // Duplicate of the MPI communicator provided by user - MPI_Comm *new_world; // Global MPI communicator identical to g_world but without spare ranks + MPI_Comm world; // Duplicate of the MPI communicator provided by user + MPI_Comm new_world; // Global MPI communicator identical to g_world but without spare ranks MPI_Comm *user_world; // MPI communicator with repaired ranks - MPI_Comm original_comm; // Keep the information of the original global MPI Communicator (this will be umodified until Fenix_finalize) MPI_Op agree_op; // This is reserved for the global agreement call for Fenix data recovery API + + + MPI_Errhandler mpi_errhandler; // This stores callback info for our custom error handler + int ignore_errs; // Set this to return errors instead of using the error handler normally. (Don't forget to unset!) + int print_unhandled; // Set this to print the error string for MPI errors of an unhandled return type. + + fenix_data_recovery_t *data_recovery; // Global pointer for Fenix Data Recovery Data Structure } fenix_t; diff --git a/include/fenix_process_recovery.h b/include/fenix_process_recovery.h index 6d73382..90f2075 100644 --- a/include/fenix_process_recovery.h +++ b/include/fenix_process_recovery.h @@ -120,6 +120,6 @@ void __fenix_finalize(); void __fenix_finalize_spare(); -void __fenix_test_MPI(int, const char *); +void __fenix_test_MPI(MPI_Comm*, int*, ...); #endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 85a21fc..7d413a1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -34,7 +34,6 @@ fenix_data_member.c fenix_data_subset.c fenix_comm_list.c fenix_callbacks.c -fenix_request_store.c globals.c ) diff --git a/src/fenix.c b/src/fenix.c index 022ec1d..3590297 100644 --- a/src/fenix.c +++ b/src/fenix.c @@ -181,3 +181,23 @@ int Fenix_Data_group_delete(int group_id) { int Fenix_Data_member_delete(int group_id, int member_id) { return __fenix_member_delete(group_id, member_id); } + +int Fenix_Process_fail_list(int** fail_list){ + *fail_list = fenix.fail_world; + return fenix.fail_world_size; +} + +int Fenix_check_cancelled(MPI_Request *request, MPI_Status *status){ + + //We know this may return as "COMM_REVOKED", but we know the error was already handled + int old_ignore_setting = fenix.ignore_errs; + fenix.ignore_errs = 1; + + int flag; + int ret = PMPI_Test(request, &flag, status); + + fenix.ignore_errs = old_ignore_setting; + + //Request was (potentially) cancelled if ret is MPI_ERR_PROC_FAILED + return ret == MPI_ERR_PROC_FAILED || ret == MPI_ERR_REVOKED; +} diff --git a/src/fenix_callbacks.c b/src/fenix_callbacks.c index a69a4dc..f693080 100644 --- a/src/fenix_callbacks.c +++ b/src/fenix_callbacks.c @@ -84,7 +84,7 @@ void __fenix_callback_invoke_all(int error) { fenix_callback_list_t *current = fenix.callback_list; while (current != NULL) { - (current->callback->x)((MPI_Comm) * fenix.new_world, error, + (current->callback->x)((MPI_Comm) fenix.new_world, error, (void *) current->callback->y); current = current->next; } diff --git a/src/fenix_comm_list.c b/src/fenix_comm_list.c index 95f2579..f9fe0cf 100644 --- a/src/fenix_comm_list.c +++ b/src/fenix_comm_list.c @@ -58,7 +58,8 @@ #include #include #include - +#include + fenix_comm_list_t my_list = {NULL, NULL}; int __fenix_comm_push(MPI_Comm *comm) { diff --git a/src/fenix_data_group.c b/src/fenix_data_group.c index f0ae9d7..7fec469 100644 --- a/src/fenix_data_group.c +++ b/src/fenix_data_group.c @@ -77,7 +77,7 @@ fenix_data_recovery_t * __fenix_data_recovery_init() { if (fenix.options.verbose == 41) { verbose_print("c-rank: %d, role: %d, g-count: %zu, g-size: %zu\n", - __fenix_get_current_rank(*fenix.world), fenix.role, data_recovery->count, + __fenix_get_current_rank(fenix.world), fenix.role, data_recovery->count, data_recovery->total_size); } @@ -94,7 +94,7 @@ int __fenix_member_delete(int groupid, int memberid) { if (fenix.options.verbose == 38) { verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", - __fenix_get_current_rank(*fenix.new_world), fenix.role, group_index, + __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, member_index); } @@ -124,7 +124,7 @@ int __fenix_member_delete(int groupid, int memberid) { fenix_member_entry_t *mentry = &(member->member_entry[member_index]); verbose_print("c-rank: %d, role: %d, m-count: %zu, m-state: %d", - __fenix_get_current_rank(*fenix.new_world), fenix.role, + __fenix_get_current_rank(fenix.new_world), fenix.role, member->count, mentry->state); } @@ -172,7 +172,7 @@ int __fenix_group_delete(int groupid) { if (fenix.options.verbose == 37) { verbose_print("c-rank: %d, group_index: %d\n", - __fenix_get_current_rank(*fenix.new_world), group_index); + __fenix_get_current_rank(fenix.new_world), group_index); } if (group_index == -1) { @@ -221,7 +221,7 @@ void __fenix_data_recovery_reinit(fenix_data_recovery_t *data_recovery, if (fenix.options.verbose == 48) { verbose_print("c-rank: %d, role: %d, g-size: %zu\n", - __fenix_get_current_rank(*fenix.new_world), fenix.role, + __fenix_get_current_rank(fenix.new_world), fenix.role, data_recovery->total_size); } } diff --git a/src/fenix_data_member.c b/src/fenix_data_member.c index 8e47d84..5cf604a 100644 --- a/src/fenix_data_member.c +++ b/src/fenix_data_member.c @@ -75,7 +75,7 @@ fenix_member_t *__fenix_data_member_init() { if (fenix.options.verbose == 42) { verbose_print("c-rank: %d, role: %d, m-count: %zu, m-size: %zu\n", - __fenix_get_current_rank(*fenix.world), fenix.role, member->count, + __fenix_get_current_rank(fenix.world), fenix.role, member->count, member->total_size); } @@ -88,7 +88,7 @@ fenix_member_t *__fenix_data_member_init() { if (fenix.options.verbose == 42) { verbose_print("c-rank: %d, role: %d, m-memberid: %d, m-state: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role, + __fenix_get_current_rank(fenix.world), fenix.role, mentry->memberid, mentry->state); } } @@ -176,7 +176,7 @@ void __fenix_ensure_member_capacity(fenix_member_t *m) { if (fenix.options.verbose == 52) { verbose_print("c-rank: %d, role: %d, m-count: %zu, m-size: %zu\n", - __fenix_get_current_rank(*fenix.new_world), fenix.role, + __fenix_get_current_rank(fenix.new_world), fenix.role, member->count, member->total_size); } @@ -189,7 +189,7 @@ void __fenix_ensure_member_capacity(fenix_member_t *m) { if (fenix.options.verbose == 52) { verbose_print( "c-rank: %d, role: %d, member[%d] m-memberid: %d, m-state: %d\n", - __fenix_get_current_rank(*fenix.new_world), fenix.role, + __fenix_get_current_rank(fenix.new_world), fenix.role, member_index, mentry->memberid, mentry->state); } } @@ -276,7 +276,7 @@ void __fenix_data_member_reinit(fenix_member_t *m, fenix_two_container_packet_t sizeof(fenix_member_entry_t)); if (fenix.options.verbose == 50) { verbose_print("c-rank: %d, role: %d, m-count: %zu, m-size: %zu\n", - __fenix_get_current_rank(*fenix.new_world), fenix.role, + __fenix_get_current_rank(fenix.new_world), fenix.role, member->count, member->total_size); } @@ -289,7 +289,7 @@ void __fenix_data_member_reinit(fenix_member_t *m, fenix_two_container_packet_t mentry->state = mystatus; if (fenix.options.verbose == 50) { verbose_print("c-rank: %d, role: %d, m-memberid: %d, m-state: %d\n", - __fenix_get_current_rank(*fenix.new_world), fenix.role, + __fenix_get_current_rank(fenix.new_world), fenix.role, mentry->memberid, mentry->state); } } diff --git a/src/fenix_data_recovery.c b/src/fenix_data_recovery.c index 56778b9..7c1c706 100644 --- a/src/fenix_data_recovery.c +++ b/src/fenix_data_recovery.c @@ -81,8 +81,7 @@ int __fenix_group_create( int groupid, MPI_Comm comm, int timestart, int depth, if (fenix.options.verbose == 12) { - verbose_print("c-rank: %d, group_index: %d\n", __fenix_get_current_rank(*fenix.new_world), group_index); - + verbose_print("c-rank: %d, group_index: %d\n", __fenix_get_current_rank(fenix.new_world), group_index); } @@ -103,7 +102,7 @@ int __fenix_group_create( int groupid, MPI_Comm comm, int timestart, int depth, /* If so, recover the data and set the recovery */ /* for member recovery. */ - int i, group_position; + int i; int remote_need_recovery; fenix_group_t *group; MPI_Status status; @@ -142,14 +141,14 @@ int __fenix_group_create( int groupid, MPI_Comm comm, int timestart, int depth, if ( fenix.options.verbose == 12) { verbose_print( "c-rank: %d, g-groupid: %d, g-timestart: %d, g-depth: %d\n", - __fenix_get_current_rank(*fenix.new_world), group->groupid, + __fenix_get_current_rank(fenix.new_world), group->groupid, group->timestart, group->depth); } } else { /* Already created. Renew the MPI communicator */ - group = ( data_recovery->group[group_position] ); + group = ( data_recovery->group[group_index] ); group->comm = comm; /* Renew communicator */ MPI_Comm_rank(comm, &(group->current_rank)); @@ -199,7 +198,7 @@ int __fenix_member_create(int groupid, int memberid, void *data, int count, MPI_ if (fenix.options.verbose == 13) { verbose_print("c-rank: %d, group_index: %d, member_index: %d\n", - __fenix_get_current_rank(*fenix.new_world), + __fenix_get_current_rank(fenix.new_world), group_index, member_index); } @@ -298,7 +297,7 @@ int __fenix_member_store(int groupid, int memberid, Fenix_Data_subset specifier) if (fenix.options.verbose == 18 && fenix.data_recovery->group[group_index]->current_rank== 0 ) { verbose_print( "c-rank: %d, role: %d, group_index: %d, member_index: %d memberid: %d\n", - __fenix_get_current_rank(*fenix.new_world), fenix.role, group_index, + __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, member_index, memberid); } @@ -338,7 +337,7 @@ int __fenix_member_istore(int groupid, int memberid, Fenix_Data_subset specifier if (fenix.options.verbose == 18 && fenix.data_recovery->group[group_index]->current_rank== 0 ) { verbose_print( "c-rank: %d, role: %d, group_index: %d, member_index: %d memberid: %d\n", - __fenix_get_current_rank(*fenix.new_world), fenix.role, group_index, + __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, member_index, memberid); } @@ -395,7 +394,7 @@ void __fenix_subset(fenix_group_t *group, fenix_member_entry_t *me, Fenix_Data_s lentry_packet.entry_count = lentry->count; lentry_packet.entry_size = subset_total_size; - int current_rank = __fenix_get_current_rank(*fenix.new_world); + int current_rank = __fenix_get_current_rank(fenix.new_world); int current_role = fenix.role; MPI_Sendrecv(&lentry_packet, sizeof(member_store_packet_t), MPI_BYTE, ge->out_rank, @@ -545,7 +544,7 @@ int __fenix_data_commit(int groupid, int *timestamp) { int retval = -1; int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); if (fenix.options.verbose == 22) { - verbose_print("c-rank: %d, role: %d, group_index: %d\n", __fenix_get_current_rank(*fenix.new_world), fenix.role, group_index); + verbose_print("c-rank: %d, role: %d, group_index: %d\n", __fenix_get_current_rank(fenix.new_world), fenix.role, group_index); } if (group_index == -1) { debug_print("ERROR Fenix_Data_commit: group_id <%d> does not exist\n", groupid); @@ -577,7 +576,7 @@ int __fenix_data_commit_barrier(int groupid, int *timestamp) { int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); if (fenix.options.verbose == 23) { verbose_print("c-rank: %d, role: %d, group_index: %d\n", - __fenix_get_current_rank(*fenix.new_world), fenix.role, group_index); + __fenix_get_current_rank(fenix.new_world), fenix.role, group_index); } if (group_index == -1) { debug_print("ERROR Fenix_Data_commit: group_id <%d> does not exist\n", groupid); @@ -617,7 +616,7 @@ int __fenix_member_restore(int groupid, int memberid, void *data, int maxcount, if (fenix.options.verbose == 25) { verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", - __fenix_get_current_rank(*fenix.new_world), fenix.role, group_index, + __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, member_index); } @@ -651,7 +650,7 @@ int __fenix_member_restore_from_rank(int groupid, int memberid, void *target_buf if (fenix.options.verbose == 25) { verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", - __fenix_get_current_rank(*fenix.new_world), fenix.role, group_index, + __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, member_index); } @@ -747,7 +746,7 @@ int __fenix_get_snapshot_at_position(int groupid, int position, int *timestamp) int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); if (fenix.options.verbose == 33) { verbose_print("c-rank: %d, role: %d, group_index: %d\n", - __fenix_get_current_rank(*fenix.new_world), fenix.role, group_index); + __fenix_get_current_rank(fenix.new_world), fenix.role, group_index); } if (group_index == -1) { debug_print("ERROR Fenix_Data_commit: group_id <%d> does not exist\n", groupid); @@ -780,7 +779,7 @@ int __fenix_member_get_attribute(int groupid, int memberid, int attributename, if (fenix.options.verbose == 34) { verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", - __fenix_get_current_rank(*fenix.new_world), fenix.role, group_index, + __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, member_index); } if (group_index == -1) { @@ -794,8 +793,9 @@ int __fenix_member_get_attribute(int groupid, int memberid, int attributename, } else { fenix_group_t *group = (fenix.data_recovery->group[group_index]); fenix_member_t *member = group->member; + fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - int retval = group->vtbl.member_get_attribute(group, member, attributename, + int retval = group->vtbl.member_get_attribute(group, mentry, attributename, attributevalue, flag, sourcerank); } @@ -822,7 +822,7 @@ int __fenix_member_set_attribute(int groupid, int memberid, int attributename, if (fenix.options.verbose == 35) { verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", - __fenix_get_current_rank(*fenix.new_world), fenix.role, group_index, + __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, member_index); } @@ -934,14 +934,14 @@ void __feninx_dr_print_store() { int *local_data = current->group[group]->member->member_entry[member].version->local_entry[version].data; for (local = 0; local < local_data_count; local++) { //printf("*** store rank[%d] group[%d] member[%d] local[%d]: %d\n", - //get_current_rank(*fenix.new_world), group, member, local, + //get_current_rank(fenix.new_world), group, member, local, //local_data[local]); } int remote_data_count = current->group[group]->member->member_entry[member].version->remote_entry[version].count; int *remote_data = current->group[group]->member->member_entry[member].version->remote_entry[version].data; for (remote = 0; remote < remote_data_count; remote++) { printf("*** store rank[%d] group[%d] member[%d] remote[%d]: %d\n", - __fenix_get_current_rank(*fenix.new_world), group, member, remote, + __fenix_get_current_rank(fenix.new_world), group, member, remote, remote_data[remote]); } } @@ -961,7 +961,7 @@ void __fenix_dr_print_restore() { int local_data_count = current->group[0]->member->member_entry[0].version->local_entry[0].count; int remote_data_count = current->group[0]->member->member_entry[0].version->remote_entry[0].count; printf("*** restore rank: %d; group: %d; member: %d; local: %d; remote: %d\n", - __fenix_get_current_rank(*fenix.new_world), group_count, member_count, + __fenix_get_current_rank(fenix.new_world), group_count, member_count, local_data_count, remote_data_count); } @@ -977,7 +977,7 @@ void __fenix_dr_print_datastructure() { return; } - printf("\n\ncurrent_rank: %d\n", __fenix_get_current_rank(*fenix.new_world)); + printf("\n\ncurrent_rank: %d\n", __fenix_get_current_rank(fenix.new_world)); int group_size = current->total_size; for (group_index = 0; group_index < group_size; group_index++) { int depth = current->group[group_index]->depth; diff --git a/src/fenix_mpi_override.c b/src/fenix_mpi_override.c index 559230c..a3592a7 100644 --- a/src/fenix_mpi_override.c +++ b/src/fenix_mpi_override.c @@ -59,292 +59,18 @@ #include #include "fenix_ext.h" -static inline -MPI_Comm __fenix_replace_comm(MPI_Comm comm) -{ - if(fenix.replace_comm_flag && - comm == fenix.original_comm && - fenix.fenix_init_flag) - return *fenix.new_world; - else - return comm; -} - static inline int __fenix_notify_newcomm(int ret, MPI_Comm *newcomm) { - if (ret != MPI_SUCCESS || - !fenix.fenix_init_flag || - *newcomm == MPI_COMM_NULL) return ret; - ret = PMPI_Comm_set_errhandler(*newcomm, MPI_ERRORS_RETURN); - if (ret != MPI_SUCCESS) { - fprintf(stderr, "[fenix error] Did not manage to set error handler\n"); - PMPI_Comm_free(newcomm); - ret = MPI_ERR_INTERN; - } else { -#warning "Calling fenix comm push and fenix init may not have been called... check other places in this function" - if (__fenix_comm_push(newcomm) != FENIX_SUCCESS) { - fprintf(stderr, "[fenix error] Did not manage to push communicator\n"); - PMPI_Comm_free(newcomm); - ret = MPI_ERR_INTERN; - } - } - return ret; -} - -// This inlined function is used to avoid a function call for each MPI -// operation call in the case where no failures are detected. -static inline -void __fenix_test_MPI_inline(int ret, const char *msg) -{ - if(ret == MPI_SUCCESS) return; - __fenix_test_MPI(ret, msg); -} - -int MPI_Comm_size(MPI_Comm comm, int *size) -{ - int ret; - ret = PMPI_Comm_size(__fenix_replace_comm(comm), size); - __fenix_test_MPI_inline(ret, "MPI_Comm_size"); - return ret; -} - -int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm) -{ - int ret; - ret = PMPI_Comm_dup(__fenix_replace_comm(comm), newcomm); - ret = __fenix_notify_newcomm(ret, newcomm); - __fenix_test_MPI_inline(ret, "MPI_Comm_dup"); - return ret; -} - -int MPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm) -{ - int ret; - ret = PMPI_Comm_split(__fenix_replace_comm(comm), color, key, newcomm); - ret = __fenix_notify_newcomm(ret, newcomm); - __fenix_test_MPI_inline(ret, "MPI_Comm_split"); - return ret; -} - -/* #warning "For MPI >= 3.0, const void * is used!" */ -#if MPI_VERSION < 3 -#define MPI_CONST_TYPE -#else -#define MPI_CONST_TYPE const -#endif - -int MPI_Alltoallv(MPI_CONST_TYPE void* sendbuf, MPI_CONST_TYPE int sendcounts[], - MPI_CONST_TYPE int sdispls[], MPI_Datatype sendtype, - void *recvbuf, MPI_CONST_TYPE int recvcounts[], - MPI_CONST_TYPE int rdispls[], MPI_Datatype recvtype, - MPI_Comm comm) -{ - int ret; - ret = PMPI_Alltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, - recvcounts, rdispls, recvtype, - __fenix_replace_comm(comm)); - __fenix_test_MPI_inline(ret, "MPI_Alltoallv"); - return ret; -} - -int MPI_Allgather(MPI_CONST_TYPE void* sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - MPI_Comm comm) -{ - int ret; - ret = PMPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, __fenix_replace_comm(comm)); - __fenix_test_MPI_inline(ret, "MPI_Allgather"); - return ret; -} - -int MPI_Comm_rank(MPI_Comm comm, int *rank) -{ - int ret; - ret = PMPI_Comm_rank(__fenix_replace_comm(comm), rank); - __fenix_test_MPI_inline(ret, "MPI_Comm_rank"); - return ret; -} - - -int MPI_Allreduce(MPI_CONST_TYPE void* sendbuf, void *recvbuf, int count, - MPI_Datatype type, MPI_Op op, MPI_Comm comm) -{ - int ret; - ret = PMPI_Allreduce(sendbuf, recvbuf, count, type, op, __fenix_replace_comm(comm)); - __fenix_test_MPI_inline(ret, "MPI_Allreduce"); - return ret; -} - -int MPI_Reduce(MPI_CONST_TYPE void* sendbuf, void *recvbuf, int count, MPI_Datatype type, - MPI_Op op, int root, MPI_Comm comm) -{ - int ret; - ret = PMPI_Reduce(sendbuf, recvbuf, count, type, op, root, __fenix_replace_comm(comm)); - __fenix_test_MPI_inline(ret, "MPI_Reduce"); - return ret; -} - -int MPI_Barrier(MPI_Comm comm) -{ - int ret; - ret = PMPI_Barrier(__fenix_replace_comm(comm)); - __fenix_test_MPI_inline(ret, "MPI_Barrier"); - return ret; -} - -int MPI_Bcast(void *buf, int count, MPI_Datatype type, int root, MPI_Comm comm) -{ - int ret; - ret = PMPI_Bcast(buf, count, type, root, __fenix_replace_comm(comm)); - __fenix_test_MPI_inline(ret, "MPI_Bcast"); - return ret; -} - -int MPI_Recv(void *buf, int count, MPI_Datatype type, int source, int tag, - MPI_Comm comm, MPI_Status *status) -{ - int ret; - ret = PMPI_Recv(buf, count, type, source, tag, __fenix_replace_comm(comm), - status); - __fenix_test_MPI_inline(ret, "MPI_Recv"); - return ret; -} - -int MPI_Send(MPI_CONST_TYPE void* buf, int count, MPI_Datatype type, int dest, - int tag, MPI_Comm comm) -{ - int ret; - ret = PMPI_Send(buf, count, type, dest, tag, __fenix_replace_comm(comm)); - __fenix_test_MPI_inline(ret, "MPI_Send"); - return ret; -} - -int MPI_Sendrecv(MPI_CONST_TYPE void* sendbuf, int sendcount, - MPI_Datatype sendtype, int dest, int sendtag, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - int source, int recvtag, - MPI_Comm comm, MPI_Status *status) -{ - int ret; - ret = PMPI_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, - recvcount, recvtype, source, recvtag, - __fenix_replace_comm(comm), status); - __fenix_test_MPI_inline(ret, "MPI_Sendrecv"); - return ret; -} - -static inline -void __fenix_override_request(int ret, MPI_Request *request) -{ - if(ret != MPI_SUCCESS) return; - - assert(*request != MPI_REQUEST_NULL); - - // insert 'request' in the request_store - // get location of 'request' in store and return in 'fenix_request' - *((int *)request) = __fenix_request_store_add(&fenix.request_store, - request); -} - -int MPI_Isend(MPI_CONST_TYPE void* buf, int count, MPI_Datatype datatype, - int dest, int tag, MPI_Comm comm, MPI_Request *request) -{ - int ret; - ret = PMPI_Isend(buf, count, datatype, dest, tag, - __fenix_replace_comm(comm), request); - __fenix_override_request(ret, request); - __fenix_test_MPI_inline(ret, "MPI_Isend"); - return ret; -} - -int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, - int source, int tag, MPI_Comm comm, MPI_Request *request) -{ - int ret; - ret = PMPI_Irecv(buf, count, datatype, source, tag, - __fenix_replace_comm(comm), request); - __fenix_override_request(ret, request); - __fenix_test_MPI_inline(ret, "MPI_Irecv"); - return ret; -} - -int MPI_Wait(MPI_Request *fenix_request, MPI_Status *status) -{ - int ret; - MPI_Request request = MPI_REQUEST_NULL; - if(*fenix_request != MPI_REQUEST_NULL) - __fenix_request_store_get(&fenix.request_store, - *((int *) fenix_request), - &request); - - ret = PMPI_Wait(&request, status); - if(ret == MPI_SUCCESS) { - __fenix_request_store_remove(&fenix.request_store, - *((int *) fenix_request)); - assert(request == MPI_REQUEST_NULL); - *fenix_request = MPI_REQUEST_NULL; - } - __fenix_test_MPI_inline(ret, "MPI_Wait"); - return ret; -} - -#warning "Fix tabs in source code" - -int MPI_Waitall(int count, MPI_Request array_of_fenix_requests[], - MPI_Status *array_of_statuses) -{ - // The list (array_of_requests) may contain null or inactive handles. - int ret, i; - for(i=0 ; i= __fenix_get_world_size(comm)) { @@ -148,15 +164,8 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha fenix.spare_ranks); } -#warning "There is no reason why fenix.world (and other) need to be malloced..." - fenix.world = (MPI_Comm *) s_malloc(sizeof(MPI_Comm)); - - MPI_Comm_dup(comm, fenix.world); - fenix.data_recovery = __fenix_data_recovery_init(); - fenix.new_world = (MPI_Comm *) s_malloc(sizeof(MPI_Comm)); - /*****************************************************/ /* Note: fenix.new_world is only valid for the */ /* active MPI ranks. Spare ranks do not */ @@ -174,10 +183,10 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha } if ( __fenix_spare_rank() != 1) { - fenix.num_inital_ranks = __fenix_get_world_size(*fenix.new_world); + fenix.num_inital_ranks = __fenix_get_world_size(fenix.new_world); if (fenix.options.verbose == 0) { verbose_print("rank: %d, role: %d, number_initial_ranks: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role, + __fenix_get_current_rank(fenix.world), fenix.role, fenix.num_inital_ranks); } @@ -186,7 +195,7 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha if (fenix.options.verbose == 0) { verbose_print("rank: %d, role: %d, number_initial_ranks: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role, + __fenix_get_current_rank(fenix.world), fenix.role, fenix.num_inital_ranks); } } @@ -198,19 +207,19 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha int a; int myrank; MPI_Status mpi_status; - ret = PMPI_Recv(&a, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, *fenix.world, + ret = PMPI_Recv(&a, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, fenix.world, &mpi_status); // listen for a failure if (ret == MPI_SUCCESS) { if (fenix.options.verbose == 0) { verbose_print("Finalize the program; rank: %d, role: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role); + __fenix_get_current_rank(fenix.world), fenix.role); } __fenix_finalize_spare(); } else { fenix.repair_result = __fenix_repair_ranks(); if (fenix.options.verbose == 0) { verbose_print("spare rank exiting from MPI_Recv - repair ranks; rank: %d, role: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role); + __fenix_get_current_rank(fenix.world), fenix.role); } } fenix.role = FENIX_ROLE_RECOVERED_RANK; @@ -222,10 +231,9 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha int __fenix_create_new_world() { int ret; - ret = PMPI_Comm_set_errhandler(*fenix.world, MPI_ERRORS_RETURN); if ( __fenix_spare_rank() == 1) { - int current_rank = __fenix_get_current_rank(*fenix.world); + int current_rank = __fenix_get_current_rank(fenix.world); /*************************************************************************/ /** MPI_UNDEFINED makes the new communicator "undefined" at spare ranks **/ @@ -234,26 +242,25 @@ int __fenix_create_new_world() /*************************************************************************/ if (fenix.options.verbose == 1) { - verbose_print("rank: %d, role: %d\n", __fenix_get_current_rank(*fenix.world), + verbose_print("rank: %d, role: %d\n", __fenix_get_current_rank(fenix.world), fenix.role); } - ret = PMPI_Comm_split(*fenix.world, MPI_UNDEFINED, current_rank, - fenix.new_world); + ret = PMPI_Comm_split(fenix.world, MPI_UNDEFINED, current_rank, + &fenix.new_world); if (ret != MPI_SUCCESS) { debug_print("MPI_Comm_split: %d\n", ret); } } else { - int current_rank = __fenix_get_current_rank(*fenix.world); + int current_rank = __fenix_get_current_rank(fenix.world); if (fenix.options.verbose == 1) { - verbose_print("rank: %d, role: %d\n", __fenix_get_current_rank(*fenix.world), + verbose_print("rank: %d, role: %d\n", __fenix_get_current_rank(fenix.world), fenix.role); } - ret = PMPI_Comm_split(*fenix.world, 0, current_rank, fenix.new_world); + ret = PMPI_Comm_split(fenix.world, 0, current_rank, &fenix.new_world); if (ret != MPI_SUCCESS) { debug_print("MPI_Comm_split: %d\n", ret); } - MPI_Comm_set_errhandler(*fenix.new_world, MPI_ERRORS_RETURN); } return ret; @@ -282,17 +289,10 @@ int __fenix_repair_ranks() while (!repair_success) { repair_success = 1; - ret = MPIX_Comm_shrink(*fenix.world, &world_without_failures); - /* if (ret != MPI_SUCCESS) { debug_print("MPI_Comm_shrink. repair_ranks\n"); } */ - if (ret != MPI_SUCCESS) { - repair_success = 0; - goto END_LOOP; - } - - ret = MPI_Comm_set_errhandler(world_without_failures, MPI_ERRORS_RETURN); + ret = MPIX_Comm_shrink(fenix.world, &world_without_failures); + //if (ret != MPI_SUCCESS) { debug_print("MPI_Comm_shrink. repair_ranks\n"); } if (ret != MPI_SUCCESS) { repair_success = 0; - MPI_Comm_free(&world_without_failures); goto END_LOOP; } @@ -300,36 +300,34 @@ int __fenix_repair_ranks() /* Free up the storage for active process communicator */ /*********************************************************/ if ( __fenix_spare_rank() != 1) { - PMPI_Comm_free(fenix.new_world); - if ( fenix.replace_comm_flag == 0) { - PMPI_Comm_free(fenix.user_world); - } + PMPI_Comm_free(&fenix.new_world); + PMPI_Comm_free(fenix.user_world); } /*********************************************************/ /* Need closer look above */ /*********************************************************/ /* current_rank means the global MPI rank before failure */ - current_rank = __fenix_get_current_rank(*fenix.world); + current_rank = __fenix_get_current_rank(fenix.world); survivor_world_size = __fenix_get_world_size(world_without_failures); - world_size = __fenix_get_world_size(*fenix.world); - fail_world_size = world_size - survivor_world_size; + world_size = __fenix_get_world_size(fenix.world); + fenix.fail_world_size = world_size - survivor_world_size; if (fenix.options.verbose == 2) { verbose_print( "current_rank: %d, role: %d, world_size: %d, fail_world_size: %d, survivor_world_size: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role, world_size, - fail_world_size, survivor_world_size); + __fenix_get_current_rank(fenix.world), fenix.role, world_size, + fenix.fail_world_size, survivor_world_size); } - if (fenix.spare_ranks < fail_world_size) { + if (fenix.spare_ranks < fenix.fail_world_size) { /* Not enough spare ranks */ if (fenix.options.verbose == 2) { verbose_print( "current_rank: %d, role: %d, spare_ranks: %d, fail_world_size: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role, fenix.spare_ranks, - fail_world_size); + __fenix_get_current_rank(fenix.world), fenix.role, fenix.spare_ranks, + fenix.fail_world_size); } if (fenix.spawn_policy == 1) { @@ -355,12 +353,12 @@ int __fenix_repair_ranks() int index; for (index = 0; index < survivor_world_size; index++) { verbose_print("current_rank: %d, role: %d, survivor_world[%d]: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role, index, + __fenix_get_current_rank(fenix.world), fenix.role, index, survivor_world[index]); } } - /* if (ret != MPI_SUCCESS) { debug_print("MPI_Allgather. repair_ranks\n"); } */ + //if (ret != MPI_SUCCESS) { debug_print("MPI_Allgather. repair_ranks\n"); } if (ret != MPI_SUCCESS) { repair_success = 0; if (ret == MPI_ERR_PROC_FAILED) { @@ -379,7 +377,7 @@ int __fenix_repair_ranks() ret = PMPI_Allreduce(&survived_flag, &fenix.num_survivor_ranks, 1, MPI_INT, MPI_SUM, world_without_failures); - /* if (ret != MPI_SUCCESS) { debug_print("MPI_Allreduce. repair_ranks\n"); } */ + //if (ret != MPI_SUCCESS) { debug_print("MPI_Allreduce. repair_ranks\n"); } if (ret != MPI_SUCCESS) { repair_success = 0; if (ret == MPI_ERR_PROC_FAILED) { @@ -393,22 +391,25 @@ int __fenix_repair_ranks() fenix.num_inital_ranks = 0; /* recovered ranks must be the number of spare ranks */ - fenix.num_recovered_ranks = fail_world_size; + fenix.num_recovered_ranks = fenix.fail_world_size; if (fenix.options.verbose == 2) { verbose_print("current_rank: %d, role: %d, recovered_ranks: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role, + __fenix_get_current_rank(fenix.world), fenix.role, fenix.num_recovered_ranks); } - - fail_world = (int *) s_malloc(fail_world_size * sizeof(int)); - fail_world = __fenix_get_fail_ranks(survivor_world, survivor_world_size, - fail_world_size); + + if(fenix.role != FENIX_ROLE_INITIAL_RANK){ + free(fenix.fail_world); + } + fenix.fail_world = (int *) s_malloc(fenix.fail_world_size * sizeof(int)); + fenix.fail_world = __fenix_get_fail_ranks(survivor_world, survivor_world_size, + fenix.fail_world_size); if (fenix.options.verbose == 2) { int index; - for (index = 0; index < fail_world_size; index++) { - verbose_print("fail_world[%d]: %d\n", index, fail_world[index]); + for (index = 0; index < fenix.fail_world_size; index++) { + verbose_print("fail_world[%d]: %d\n", index, fenix.fail_world[index]); } } @@ -418,30 +419,28 @@ int __fenix_repair_ranks() if (fenix.options.verbose == 2) { verbose_print("current_rank: %d, role: %d, active_ranks: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role, + __fenix_get_current_rank(fenix.world), fenix.role, active_ranks); } /* Assign new rank for reordering */ if (current_rank >= active_ranks) { // reorder ranks int rank_offset = ((world_size - 1) - current_rank); - if (rank_offset < fail_world_size) { + if (rank_offset < fenix.fail_world_size) { if (fenix.options.verbose == 11) { verbose_print("reorder ranks; current_rank: %d -> new_rank: %d\n", - current_rank, fail_world[rank_offset]); + current_rank, fenix.fail_world[rank_offset]); } - current_rank = fail_world[rank_offset]; + current_rank = fenix.fail_world[rank_offset]; } } - free(fail_world); - /************************************/ /* Update the number of spare ranks */ /************************************/ fenix.spare_ranks = 0; - /* debug_print("not enough spare ranks to repair rank failures. repair_ranks\n"); */ + //debug_print("not enough spare ranks to repair rank failures. repair_ranks\n"); } /****************************************************************/ @@ -458,7 +457,7 @@ int __fenix_repair_ranks() ret = PMPI_Allgather(¤t_rank, 1, MPI_INT, survivor_world, 1, MPI_INT, world_without_failures); - /* if (ret != MPI_SUCCESS) { debug_print("MPI_Allgather. repair_ranks\n"); } */ + //if (ret != MPI_SUCCESS) { debug_print("MPI_Allgather. repair_ranks\n"); } if (ret != MPI_SUCCESS) { repair_success = 0; if (ret == MPI_ERR_PROC_FAILED) { @@ -476,7 +475,7 @@ int __fenix_repair_ranks() ret = PMPI_Allreduce(&survived_flag, &fenix.num_survivor_ranks, 1, MPI_INT, MPI_SUM, world_without_failures); - /* if (ret != MPI_SUCCESS) { debug_print("MPI_Allreduce. repair_ranks\n"); } */ + //if (ret != MPI_SUCCESS) { debug_print("MPI_Allreduce. repair_ranks\n"); } if (ret != MPI_SUCCESS) { repair_success = 0; if (ret != MPI_ERR_PROC_FAILED) { @@ -488,16 +487,20 @@ int __fenix_repair_ranks() } fenix.num_inital_ranks = 0; - fenix.num_recovered_ranks = fail_world_size; + fenix.num_recovered_ranks = fenix.fail_world_size; + + if(fenix.role != FENIX_ROLE_INITIAL_RANK){ + free(fenix.fail_world); + } - fail_world = (int *) s_malloc(fail_world_size * sizeof(int)); - fail_world = __fenix_get_fail_ranks(survivor_world, survivor_world_size, fail_world_size); + fenix.fail_world = (int *) s_malloc(fenix.fail_world_size * sizeof(int)); + fenix.fail_world = __fenix_get_fail_ranks(survivor_world, survivor_world_size, fenix.fail_world_size); free(survivor_world); if (fenix.options.verbose == 2) { int index; - for (index = 0; index < fail_world_size; index++) { - verbose_print("fail_world[%d]: %d\n", index, fail_world[index]); + for (index = 0; index < fenix.fail_world_size; index++) { + verbose_print("fail_world[%d]: %d\n", index, fenix.fail_world[index]); } } @@ -505,29 +508,27 @@ int __fenix_repair_ranks() if (fenix.options.verbose == 2) { verbose_print("current_rank: %d, role: %d, active_ranks: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role, active_ranks); + __fenix_get_current_rank(fenix.world), fenix.role, active_ranks); } if (current_rank >= active_ranks) { // reorder ranks int rank_offset = ((world_size - 1) - current_rank); - if (rank_offset < fail_world_size) { + if (rank_offset < fenix.fail_world_size) { if (fenix.options.verbose == 2) { verbose_print("reorder ranks; current_rank: %d -> new_rank: %d\n", - current_rank, fail_world[rank_offset]); + current_rank, fenix.fail_world[rank_offset]); } - current_rank = fail_world[rank_offset]; + current_rank = fenix.fail_world[rank_offset]; } } - free(fail_world); - /************************************/ /* Update the number of spare ranks */ /************************************/ - fenix.spare_ranks = fenix.spare_ranks - fail_world_size; + fenix.spare_ranks = fenix.spare_ranks - fenix.fail_world_size; if (fenix.options.verbose == 2) { verbose_print("current_rank: %d, role: %d, spare_ranks: %d\n", - __fenix_get_current_rank(*fenix.world), fenix.role, + __fenix_get_current_rank(fenix.world), fenix.role, fenix.spare_ranks); } } @@ -537,10 +538,10 @@ int __fenix_repair_ranks() /*********************************************************/ if (!flag_g_world_freed) { - ret = PMPI_Comm_free(fenix.world); + ret = PMPI_Comm_free(&fenix.world); if (ret != MPI_SUCCESS) { flag_g_world_freed = 1; } } - ret = PMPI_Comm_split(world_without_failures, 0, current_rank, fenix.world); + ret = PMPI_Comm_split(world_without_failures, 0, current_rank, &fenix.world); /* if (ret != MPI_SUCCESS) { debug_print("MPI_Comm_split. repair_ranks\n"); } */ if (ret != MPI_SUCCESS) { @@ -557,12 +558,12 @@ int __fenix_repair_ranks() /* Need special treatment for error handling */ __fenix_create_new_world(); - ret = PMPI_Barrier(*fenix.world); + ret = PMPI_Barrier(fenix.world); /* if (ret != MPI_SUCCESS) { debug_print("MPI_Barrier. repair_ranks\n"); } */ if (ret != MPI_SUCCESS) { repair_success = 0; if (ret != MPI_ERR_PROC_FAILED) { - MPIX_Comm_revoke(*fenix.world); + MPIX_Comm_revoke(fenix.world); } } @@ -574,7 +575,7 @@ int __fenix_repair_ranks() /*******************************************************/ /* - if (__fenix_get_current_rank(*fenix.world) == FENIX_ROOT) { + if (__fenix_get_current_rank(fenix.world) == FENIX_ROOT) { LDEBUG("Fenix: communicators repaired\n"); } */ @@ -603,8 +604,8 @@ int* __fenix_get_fail_ranks(int *survivor_world, int survivor_world_size, int fa int __fenix_spare_rank() { int result = -1; - int current_rank = __fenix_get_current_rank(*fenix.world); - int new_world_size = __fenix_get_world_size(*fenix.world) - fenix.spare_ranks; + int current_rank = __fenix_get_current_rank(fenix.world); + int new_world_size = __fenix_get_world_size(fenix.world) - fenix.spare_ranks; if (current_rank >= new_world_size) { if (fenix.options.verbose == 6) { verbose_print("current_rank: %d, new_world_size: %d\n", current_rank, new_world_size); @@ -618,19 +619,13 @@ void __fenix_postinit(int *error) { //if (fenix.options.verbose == 9) { - // verbose_print(" postinit: current_rank: %d, role: %d\n", __fenix_get_current_rank(*fenix.new_world), + // verbose_print(" postinit: current_rank: %d, role: %d\n", __fenix_get_current_rank(fenix.new_world), // fenix.role); //} - PMPI_Barrier(*fenix.new_world); - + PMPI_Barrier(fenix.new_world); - if (fenix.replace_comm_flag == 0) { - PMPI_Comm_dup(*fenix.new_world, fenix.user_world); - PMPI_Comm_set_errhandler(*fenix.user_world, MPI_ERRORS_RETURN); - } else { - PMPI_Comm_set_errhandler(*fenix.new_world, MPI_ERRORS_RETURN); - } + PMPI_Comm_dup(fenix.new_world, fenix.user_world); if (fenix.repair_result != 0) { *error = fenix.repair_result; @@ -647,7 +642,7 @@ void __fenix_postinit(int *error) __fenix_callback_invoke_all(*error); } if (fenix.options.verbose == 9) { - verbose_print("After barrier. current_rank: %d, role: %d\n", __fenix_get_current_rank(*fenix.new_world), + verbose_print("After barrier. current_rank: %d, role: %d\n", __fenix_get_current_rank(fenix.new_world), fenix.role); } } @@ -659,21 +654,24 @@ void __fenix_finalize() // By setting fenix.finalized to 1 we are skipping the longjump // after recovery. fenix.finalized = 1; + + //We don't want to handle failures in here as normally, we just want to continue trying to finalize. + fenix.ignore_errs = 1; - int ret = MPI_Barrier( *fenix.new_world ); + int ret = MPI_Barrier( fenix.new_world ); if (ret != MPI_SUCCESS) { __fenix_finalize(); return; } - if (__fenix_get_current_rank(*fenix.world) == 0) { + if (__fenix_get_current_rank(fenix.world) == 0) { int spare_rank; - MPI_Comm_size(*fenix.world, &spare_rank); + MPI_Comm_size(fenix.world, &spare_rank); spare_rank--; int a; int i; for (i = 0; i < fenix.spare_ranks; i++) { - int ret = MPI_Send(&a, 1, MPI_INT, spare_rank, 1, *fenix.world); + int ret = MPI_Send(&a, 1, MPI_INT, spare_rank, 1, fenix.world); if (ret != MPI_SUCCESS) { __fenix_finalize(); return; @@ -682,18 +680,20 @@ void __fenix_finalize() } } - ret = MPI_Barrier(*fenix.world); + ret = MPI_Barrier(fenix.world); if (ret != MPI_SUCCESS) { __fenix_finalize(); return; } MPI_Op_free( &fenix.agree_op ); - MPI_Comm_set_errhandler( *fenix.world, MPI_ERRORS_ARE_FATAL ); - MPI_Comm_free( fenix.world ); - MPI_Comm_free( fenix.new_world ); - free( fenix.world ); - free( fenix.new_world ); + MPI_Comm_set_errhandler( fenix.world, MPI_ERRORS_ARE_FATAL ); + MPI_Comm_free( &fenix.world ); + MPI_Comm_free( &fenix.new_world ); + + if(fenix.role != FENIX_ROLE_INITIAL_RANK){ + free(fenix.fail_world); + } /* Free Callbacks */ __fenix_callback_destroy( fenix.callback_list ); @@ -701,26 +701,18 @@ void __fenix_finalize() /* Free data recovery interface */ __fenix_data_recovery_destroy( fenix.data_recovery ); - /* Free the request store */ - __fenix_request_store_destroy(&fenix.request_store); - fenix.fenix_init_flag = 0; } void __fenix_finalize_spare() { fenix.fenix_init_flag = 0; - int ret = PMPI_Barrier(*fenix.world); + int ret = PMPI_Barrier(fenix.world); if (ret != MPI_SUCCESS) { debug_print("MPI_Barrier: %d\n", ret); } MPI_Op_free(&fenix.agree_op); - MPI_Comm_set_errhandler(*fenix.world, MPI_ERRORS_ARE_FATAL); - MPI_Comm_free(fenix.world); - - /* This communicator is invalid for spare processes */ - /* MPI_Comm_free(fenix.new_world); */ - free(fenix.world); - free(fenix.new_world); + MPI_Comm_set_errhandler(fenix.world, MPI_ERRORS_ARE_FATAL); + MPI_Comm_free(&fenix.world); /* Free callbacks */ __fenix_callback_destroy( fenix.callback_list ); @@ -735,50 +727,66 @@ void __fenix_finalize_spare() exit(0); } -void __fenix_test_MPI(int ret, const char *msg) +void __fenix_test_MPI(MPI_Comm *pcomm, int *pret, ...) { + int ret_repair; int index; - if(!fenix.fenix_init_flag || ret == MPI_SUCCESS || __fenix_spare_rank() == 1) { + int ret = *pret; + if(!fenix.fenix_init_flag || __fenix_spare_rank() == 1 || fenix.ignore_errs) { return; } switch (ret) { case MPI_ERR_PROC_FAILED: - MPIX_Comm_revoke(*fenix.world); - MPIX_Comm_revoke(*fenix.new_world); + MPIX_Comm_revoke(fenix.world); + MPIX_Comm_revoke(fenix.new_world); - if (fenix.replace_comm_flag == 0) { - MPIX_Comm_revoke(*fenix.user_world); - } + MPIX_Comm_revoke(*fenix.user_world); - __fenix_request_store_waitall_removeall(&fenix.request_store); __fenix_comm_list_destroy(); fenix.repair_result = __fenix_repair_ranks(); break; case MPI_ERR_REVOKED: - __fenix_request_store_waitall_removeall(&fenix.request_store); - __fenix_comm_list_destroy(); fenix.repair_result = __fenix_repair_ranks(); break; case MPI_ERR_INTERN: printf("Fenix detected error: MPI_ERR_INTERN\n"); - default: + default: + if(fenix.print_unhandled){ + int len; + char errstr[MPI_MAX_ERROR_STRING]; + MPI_Error_string(ret, errstr, &len); + fprintf(stderr, "UNHANDLED ERR: %s\n", errstr); + } return; break; #ifdef MPICH - MPIX_Comm_revoke(*fenix.world); - MPIX_Comm_revoke(*fenix.new_world); + MPIX_Comm_revoke(fenix.world); + MPIX_Comm_revoke(fenix.new_world); //MPIX_Comm_revoke(*fenix.user_world); fenix.repair_result = __fenix_repair_ranks(); #endif } fenix.role = FENIX_ROLE_SURVIVOR_RANK; - if(!fenix.finalized) - longjmp(*fenix.recover_environment, 1); + if(!fenix.finalized) { + switch(fenix.resume_mode) { + case __FENIX_RESUME_AT_INIT: + longjmp(*fenix.recover_environment, 1); + break; + case __FENIX_RESUME_NO_JUMP: + *(fenix.ret_role) = FENIX_ROLE_SURVIVOR_RANK; + __fenix_postinit(fenix.ret_error); + break; + default: + printf("Fenix detected error: Unknown resume mode\n"); + assert(false); + break; + } + } } diff --git a/src/fenix_request_store.h b/src/fenix_request_store.h deleted file mode 100644 index 2c85a9a..0000000 --- a/src/fenix_request_store.h +++ /dev/null @@ -1,223 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// -// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| -// _| _| _|_| _| _| _| _| -// _|_|_| _|_|_| _| _| _| _| _| -// _| _| _| _|_| _| _| _| -// _| _|_|_|_| _| _| _|_|_| _| _| -// -// -// -// -// Copyright (C) 2016 Rutgers University and Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar -// and Michael Heroux -// -// Questions? Contact Keita Teranishi (knteran@sandia.gov) and -// Marc Gamell (mgamell@cac.rutgers.edu) -// -// ************************************************************************ -//@HEADER -*/ - - - -#ifndef __FENIX_REQUEST_STORE_H__ -#define __FENIX_REQUEST_STORE_H__ - -#include -#include -#include - -#include "fenix_stack.h" - -/* - - MPI_REQUEST_NULL = 0; - - user MPI_Irecv(&req); - fenix PMPI_Irecv(&req); - MPI returns req=222; - fenix store_add(222); - store returns "-123"; - fenix returns req=-123; - - user MPI_Wait(-123); - fenix store_get(-123); - store_get {req_id = 0; req=222;} - store_get returns req=222; - fenix PMPI_Wait(222); - - */ - - -typedef struct { - char valid; - MPI_Request r; -} __fenix_request_t; - -#define __fenix_dynamic_array_type __fenix_request_t -#define __fenix_dynamic_array_typename req -#include "fenix_dynamic_array.h" -#undef __fenix_dynamic_array_type -#undef __fenix_dynamic_array_typename - -typedef struct { - __fenix_req_dynamic_array_t reqs; // list of requests - int first_unused_position; // first position in 'reqs' that has never been used - __fenix_stack_t freed_list; // list of positions in 'reqs' that are not used anymore -} fenix_request_store_t; - -static inline -void __fenix_request_store_init(fenix_request_store_t *s) -{ - s->first_unused_position = 0; - __fenix_int_stack_init(&(s->freed_list), 100); - __fenix_req_dynamic_array_init(&(s->reqs), 500); -} - -static inline -void __fenix_request_store_destroy(fenix_request_store_t *s) -{ - int valid_count = 0, i; - for(i=0 ; ifirst_unused_position ; i++) - if(s->reqs.elements[i].valid) valid_count++; - if(valid_count > 0) - printf("[Fenix warning] __fenix_request_store_destroy. store contains valid elements (valid elems %d, first_unused_pos %d)\n", valid_count, s->first_unused_position); - __fenix_req_dynamic_array_destroy(&(s->reqs)); - __fenix_int_stack_destroy(&(s->freed_list)); - s->first_unused_position = 0; -} - -// returns request_id (i.e. position in the s->reqs.elements array) -static inline -int __fenix_request_store_add(fenix_request_store_t *s, - MPI_Request *r) -{ - assert(*r != MPI_REQUEST_NULL); - int position = -1; - if(s->freed_list.size > 0) { - position = __fenix_int_stack_pop(&(s->freed_list)); - } else { - position = s->first_unused_position++; - } - assert(position >= 0); - __fenix_req_dynamic_array_inc(&(s->reqs)); - __fenix_request_t *f = &(s->reqs.elements[position]); - assert(!f->valid); - memcpy(&(f->r), r, sizeof(MPI_Request)); - f->valid = 1; - - // Cannot return a position that is equivalent to MPI_REQUEST_NULL - MPI_Request r_test; - *((int *)&r_test) = position; - if(r_test == MPI_REQUEST_NULL) { - position = -123; - { - *((int *)&r_test) = position; - assert(r_test != MPI_REQUEST_NULL); - } - } - return position; -} - -static inline -void __fenix_request_store_get(fenix_request_store_t *s, - int request_id, - MPI_Request *r) -{ - { - MPI_Request r_test; - *((int *)&r_test) = request_id; - assert(r_test != MPI_REQUEST_NULL); - } - if(request_id == -123) { - MPI_Request r_test = MPI_REQUEST_NULL; - request_id = *((int*) &r_test); - } - __fenix_request_t *f = &(s->reqs.elements[request_id]); - assert(f->valid); - memcpy(r, &(f->r), sizeof(MPI_Request)); - assert(*r != MPI_REQUEST_NULL); -} - -static inline -void __fenix_request_store_remove(fenix_request_store_t *s, - int request_id) -{ - { - MPI_Request r_test; - *((int *)&r_test) = request_id; - assert(r_test != MPI_REQUEST_NULL); - } - if(request_id == -123) { - MPI_Request r_test = MPI_REQUEST_NULL; - request_id = *((int*) &r_test); - } - __fenix_request_t *f = &(s->reqs.elements[request_id]); - assert(f->valid); - f->valid = 0; - - __fenix_int_stack_push(&(s->freed_list), request_id); -} - - -static inline -void __fenix_request_store_getremove(fenix_request_store_t *s, - int request_id, - MPI_Request *r) -{ - { - MPI_Request r_test; - *((int *)&r_test) = request_id; - assert(r_test != MPI_REQUEST_NULL); - } - if(request_id == -123) { - MPI_Request r_test = MPI_REQUEST_NULL; - request_id = *((int*) &r_test); - } - __fenix_request_t *f = &(s->reqs.elements[request_id]); - assert(f->valid); - memcpy(r, &(f->r), sizeof(MPI_Request)); - assert(*r != MPI_REQUEST_NULL); - f->valid = 0; - __fenix_int_stack_push(&(s->freed_list), request_id); -} - -void __fenix_request_store_waitall_removeall(fenix_request_store_t *s); - -#endif // __FENIX_REQUEST_STORE_H__ diff --git a/test/issend/CMakeLists.txt b/test/issend/CMakeLists.txt new file mode 100644 index 0000000..1375b5d --- /dev/null +++ b/test/issend/CMakeLists.txt @@ -0,0 +1,15 @@ +# +# This file is part of Fenix +# Copyright (c) 2016 Rutgers University and Sandia Corporation. +# This software is distributed under the BSD License. +# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +# the U.S. Government retains certain rights in this software. +# For more information, see the LICENSE file in the top Fenix +# directory. +# + +set(CMAKE_BUILD_TYPE Debug) +add_executable(fenix_issend_test fenix_issend_test.c) +target_link_libraries(fenix_issend_test fenix ${MPI_C_LIBRARIES}) + +add_test(NAME issend COMMAND mpirun -mca mpi_ft_detector_timeout 1 -np 5 fenix_issend_test "1") diff --git a/test/issend/fenix_issend_test.c b/test/issend/fenix_issend_test.c new file mode 100644 index 0000000..7e45e5c --- /dev/null +++ b/test/issend/fenix_issend_test.c @@ -0,0 +1,139 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include +#include +#include +#include + +const int kKillID = 1; + +int main(int argc, char **argv) { + +#warning "It's a good idea to complain when not enough parameters! Should add this code to other examples too." + if (argc < 2) { + printf("Usage: %s <# spare ranks> \n", *argv); + exit(0); + } + + int old_world_size, new_world_size = - 1; + int old_rank = 1, new_rank = - 1; + int spare_ranks = atoi(argv[1]); + int buffer; + + MPI_Init(&argc, &argv); + + MPI_Barrier(MPI_COMM_WORLD); + MPI_Comm world_comm; + MPI_Comm_dup(MPI_COMM_WORLD, &world_comm); + MPI_Comm_size(world_comm, &old_world_size); + MPI_Comm_rank(world_comm, &old_rank); + + MPI_Info info; + MPI_Info_create(&info); + MPI_Info_set(info, "FENIX_RESUME_MODE", "NO_JUMP"); + + int fenix_status; + int recovered = 0; + MPI_Comm new_comm; + int error; + Fenix_Init(&fenix_status, world_comm, &new_comm, &argc, &argv, spare_ranks, 0, info, &error); + + MPI_Comm_size(new_comm, &new_world_size); + MPI_Comm_rank(new_comm, &new_rank); + + if (old_rank == kKillID) { + assert(fenix_status == FENIX_ROLE_INITIAL_RANK); + pid_t pid = getpid(); + kill(pid, SIGTERM); + } + + if(new_rank == kKillID) { + assert(fenix_status == FENIX_ROLE_RECOVERED_RANK); + int rval = 44; + MPI_Status status; + MPI_Recv(&rval, 1, MPI_INT, kKillID-1, 1, new_comm, &status); + assert(rval == 33); + printf("Rank %d received new value %d actual value was %d\n", new_rank, rval, 44); + } + else if(new_rank == kKillID-1) { + int sval = 33; + MPI_Request req; + MPI_Issend(&sval, 1, MPI_INT, kKillID, 1, new_comm, &req); + assert(fenix_status == FENIX_ROLE_INITIAL_RANK); + MPI_Wait(&req, MPI_STATUS_IGNORE); + + assert(fenix_status == FENIX_ROLE_SURVIVOR_RANK); + MPI_Issend(&sval, 1, MPI_INT, kKillID, 1, new_comm, &req); + MPI_Wait(&req, MPI_STATUS_IGNORE); + } + else { + assert(fenix_status == FENIX_ROLE_INITIAL_RANK); + MPI_Barrier(new_comm); + assert(fenix_status == FENIX_ROLE_SURVIVOR_RANK); + } + + MPI_Barrier(new_comm); + + Fenix_Finalize(); + MPI_Finalize(); + + return 0; +} + diff --git a/test/no_jump/CMakeLists.txt b/test/no_jump/CMakeLists.txt new file mode 100644 index 0000000..fb830f5 --- /dev/null +++ b/test/no_jump/CMakeLists.txt @@ -0,0 +1,15 @@ +# +# This file is part of Fenix +# Copyright (c) 2016 Rutgers University and Sandia Corporation. +# This software is distributed under the BSD License. +# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +# the U.S. Government retains certain rights in this software. +# For more information, see the LICENSE file in the top Fenix +# directory. +# + +set(CMAKE_BUILD_TYPE Debug) +add_executable(fenix_no_jump_test fenix_no_jump_test.c) +target_link_libraries(fenix_no_jump_test fenix ${MPI_C_LIBRARIES}) + +add_test(NAME no_jump COMMAND mpirun -mca mpi_ft_detector_timeout 1 -np 5 fenix_no_jump_test "1") diff --git a/src/fenix_request_store.c b/test/no_jump/fenix_no_jump_test.c similarity index 53% rename from src/fenix_request_store.c rename to test/no_jump/fenix_no_jump_test.c index fb222be..82187a3 100644 --- a/src/fenix_request_store.c +++ b/test/no_jump/fenix_no_jump_test.c @@ -44,8 +44,8 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar -// and Michael Heroux +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) @@ -54,28 +54,85 @@ //@HEADER */ +#include +#include +#include +#include +#include +#include #include -#include "fenix_request_store.h" - -void __fenix_request_store_waitall_removeall(fenix_request_store_t *s) -{ - int i; - for(i=0 ; ifirst_unused_position ; i++) { - __fenix_request_t *f = &(s->reqs.elements[i]); - if(f->valid) { -#warning "What to do with requests upon failure? Wait or Cancel?" - PMPI_Cancel(&(f->r)); - if(i == MPI_REQUEST_NULL) // This may look ugly and - // produce a warning, but it is - // necessary to make sure an - // MPI_Request NULL does not - // collide in the request store - __fenix_request_store_remove(s, -123); - else - __fenix_request_store_remove(s, i); - } - } - - s->first_unused_position = 0; - __fenix_int_stack_clear(&(s->freed_list)); + +const int kKillID = 1; + +int main(int argc, char **argv) { + + if (argc < 2) { + printf("Usage: %s <# spare ranks> \n", *argv); + exit(0); + } + + int old_world_size, new_world_size = - 1; + int old_rank = 1, new_rank = - 1; + int spare_ranks = atoi(argv[1]); + int buffer; + + MPI_Init(&argc, &argv); + + MPI_Barrier(MPI_COMM_WORLD); + MPI_Comm world_comm; + MPI_Comm_dup(MPI_COMM_WORLD, &world_comm); + MPI_Comm_size(world_comm, &old_world_size); + MPI_Comm_rank(world_comm, &old_rank); + + MPI_Info info; + MPI_Info_create(&info); + MPI_Info_set(info, "FENIX_RESUME_MODE", "NO_JUMP"); + + int fenix_status; + int recovered = 0; + MPI_Comm new_comm; + int error; + Fenix_Init(&fenix_status, world_comm, &new_comm, &argc, &argv, spare_ranks, 0, info, &error); + + MPI_Comm_size(new_comm, &new_world_size); + MPI_Comm_rank(new_comm, &new_rank); + + if (old_rank == kKillID) { + assert(fenix_status == FENIX_ROLE_INITIAL_RANK); + pid_t pid = getpid(); + kill(pid, SIGTERM); + } + + if(new_rank == kKillID) { + assert(fenix_status == FENIX_ROLE_RECOVERED_RANK); + int sval = 33; + MPI_Send(&sval, 1, MPI_INT, kKillID-1, 1, new_comm); + } + else if(new_rank == kKillID-1) { + assert(fenix_status == FENIX_ROLE_INITIAL_RANK); + int rval = 44; + MPI_Status status; + MPI_Recv(&rval, 1, MPI_INT, kKillID, 1, new_comm, &status); + + assert(fenix_status == FENIX_ROLE_SURVIVOR_RANK); + assert(rval == 44); + printf("Rank %d did not receive new value. old value is %d\n", new_rank, rval); + + MPI_Recv(&rval, 1, MPI_INT, kKillID, 1, new_comm, &status); + assert(rval == 33); + printf("Rank %d received new value %d\n", new_rank, rval); + } + else { + assert(fenix_status == FENIX_ROLE_INITIAL_RANK); + MPI_Barrier(new_comm); + assert(fenix_status == FENIX_ROLE_SURVIVOR_RANK); + } + + MPI_Barrier(new_comm); + + Fenix_Finalize(); + MPI_Finalize(); + + return 0; } + diff --git a/test/request_cancelled/CMakeLists.txt b/test/request_cancelled/CMakeLists.txt new file mode 100644 index 0000000..88af22b --- /dev/null +++ b/test/request_cancelled/CMakeLists.txt @@ -0,0 +1,15 @@ +# +# This file is part of Fenix +# Copyright (c) 2016 Rutgers University and Sandia Corporation. +# This software is distributed under the BSD License. +# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +# the U.S. Government retains certain rights in this software. +# For more information, see the LICENSE file in the top Fenix +# directory. +# + +set(CMAKE_BUILD_TYPE Debug) +add_executable(fenix_request_cancelled_test fenix_req_cancelled_test.c) +target_link_libraries(fenix_request_cancelled_test fenix ${MPI_C_LIBRARIES}) + +add_test(NAME request_cancelled COMMAND mpirun -mca mpi_ft_detector_timeout 1 -np 5 fenix_request_cancelled_test "1") diff --git a/test/request_cancelled/fenix_req_cancelled_test.c b/test/request_cancelled/fenix_req_cancelled_test.c new file mode 100644 index 0000000..3d7c89a --- /dev/null +++ b/test/request_cancelled/fenix_req_cancelled_test.c @@ -0,0 +1,141 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include +#include +#include + +const int kKillID = 1; + +int main(int argc, char **argv) { + + fprintf(stderr, "This is actually running\n"); + if (argc < 2) { + printf("Usage: %s <# spare ranks> \n", *argv); + exit(0); + } + + int old_world_size, new_world_size = - 1; + int old_rank = 1, new_rank = - 1; + int spare_ranks = atoi(argv[1]); + int buffer; + + MPI_Init(&argc, &argv); + + MPI_Barrier(MPI_COMM_WORLD); + MPI_Comm world_comm; + MPI_Comm_dup(MPI_COMM_WORLD, &world_comm); + MPI_Comm_size(world_comm, &old_world_size); + MPI_Comm_rank(world_comm, &old_rank); + + int fenix_status; + int recovered = 0; + MPI_Comm new_comm; + int error; + MPI_Request req = MPI_REQUEST_NULL; + fprintf(stderr, "Before Fenix init\n"); + Fenix_Init(&fenix_status, world_comm, &new_comm, &argc, &argv, spare_ranks, 0, MPI_INFO_NULL, &error); + fprintf(stderr, "After Fenix init\n"); + + MPI_Comm_size(new_comm, &new_world_size); + MPI_Comm_rank(new_comm, &new_rank); + + if (fenix_status != FENIX_ROLE_INITIAL_RANK) { + recovered = 1; + } else { + MPI_Irecv(&buffer, 1, MPI_INT, (new_rank+1)%new_world_size, 1, new_comm, &req); + //Kill rank dies before being able to send + if(new_rank == 0 || new_rank == 2) MPI_Send(&buffer, 1, MPI_INT, old_rank==0 ? new_world_size-1 : new_rank-1, 1, new_comm); + MPI_Barrier(new_comm); + } + + + if (old_rank == kKillID && recovered == 0) { + fprintf(stderr, "Before kill\n"); + pid_t pid = getpid(); + kill(pid, SIGTERM); + } + + + MPI_Barrier(new_comm); + + //After recovery, the slow ranks send + if(new_rank == 1 || new_rank == 3 ) MPI_Send(&buffer, 1, MPI_INT, new_rank==0 ? new_world_size-1 : new_rank-1, 1, new_comm); + + MPI_Barrier(new_comm); //Lots of barriers to demonstrate a specific ordering of events. + + //Check result of old requests - cannot wait, must MPI_Test only on old pre-failure requests for now + if(new_rank != kKillID){ + int flag; + int cancelled = Fenix_check_cancelled(&req, MPI_STATUS_IGNORE); + if(cancelled){ + printf("Rank %d's request was NOT satisfied before the failure\n", new_rank); + MPI_Irecv(&buffer, 1, MPI_INT, (new_rank+1)%new_world_size, 1, new_comm, &req); //We can re-launch the IRecv if we know the + //other ranks are going to send now + } else { + printf("Rank %d's request was satisfied before the failure\n", new_rank); + } + + } + + Fenix_Finalize(); + MPI_Finalize(); + + return 0; +} diff --git a/test/request_tracking/CMakeLists.txt b/test/request_tracking/CMakeLists.txt index 9dc93df..c8269b2 100644 --- a/test/request_tracking/CMakeLists.txt +++ b/test/request_tracking/CMakeLists.txt @@ -13,4 +13,4 @@ add_executable(fenix_request_tracking_test fenix_request_tracking_test.c) target_link_libraries(fenix_request_tracking_test fenix ${MPI_C_LIBRARIES}) add_test(NAME request_tracking - COMMAND mpirun --oversubscribe -np 3 fenix_request_tracking_test) + COMMAND mpirun -np 3 fenix_request_tracking_test) diff --git a/test/subset_internal/fenix_subset_internal_test.c b/test/subset_internal/fenix_subset_internal_test.c index acd9104..bd37d38 100644 --- a/test/subset_internal/fenix_subset_internal_test.c +++ b/test/subset_internal/fenix_subset_internal_test.c @@ -119,6 +119,11 @@ int main(int argc, char **argv) int space_size; double *d_space; + if (argc < 6) { + printf("Usage: %s <# blocks> \n", *argv); + exit(0); + } + space_size = atoi(argv[1]); num_blocks = atoi(argv[2]); start_offset = atoi(argv[3]);