From b898e743d32fae3eca75d4025d2fa55c638b924d Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 20 Aug 2018 13:08:58 -0400 Subject: [PATCH 1/2] RMI::begin takes an intracomm to be able to madness::initialize in a subset of nodes --- src/madness/world/world.cc | 4 ++-- src/madness/world/worldrmi.cc | 22 +++++++++++----------- src/madness/world/worldrmi.h | 6 +++--- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/madness/world/world.cc b/src/madness/world/world.cc index 135f95bfc80..3364ec53060 100644 --- a/src/madness/world/world.cc +++ b/src/madness/world/world.cc @@ -195,8 +195,8 @@ namespace madness { start_cpu_time = cpu_time(); start_wall_time = wall_time(); ThreadPool::begin(); // Must have thread pool before any AM arrives - if(SafeMPI::COMM_WORLD.Get_size() > 1) { - RMI::begin(); // Must have RMI while still running single threaded + if(comm.Get_size() > 1) { + RMI::begin(comm); // Must have RMI while still running single threaded // N.B. sync everyone up before messages start flying // this is needed to avoid hangs with some MPIs, e.g. Intel MPI on commodity hardware comm.Barrier(); diff --git a/src/madness/world/worldrmi.cc b/src/madness/world/worldrmi.cc index b8c191146a1..b0d57dd9c95 100644 --- a/src/madness/world/worldrmi.cc +++ b/src/madness/world/worldrmi.cc @@ -221,13 +221,13 @@ namespace madness { // recv_req[i].Cancel(); // } // } - for (decltype(nrecv_) i=0; i(addresses_inout); int n = *len; // produce zero if addresses do not match; zero address trumps everything else - for(decltype(n) i=0; i!=n; ++i) { + for(size_t i=0; i!=n; ++i) { if (in[i] == 0 || inout[i] == 0 || in[i] != inout[i]) inout[i] = 0; } } } // namespace detail - void RMI::assert_aslr_off() { + void RMI::assert_aslr_off(const SafeMPI::Intracomm& comm) { unsigned long my_address = reinterpret_cast(&assert_aslr_off); MADNESS_ASSERT(my_address != 0ul); MPI_Op compare_fn_addresses_op = SafeMPI::Op_create(&detail::compare_fn_addresses, 1); unsigned long zero_if_addresses_differ; - SafeMPI::COMM_WORLD.Reduce(&my_address, &zero_if_addresses_differ, 1, MPI_UNSIGNED_LONG, compare_fn_addresses_op, 0); - if (SafeMPI::COMM_WORLD.Get_rank() == 0) { + comm.Reduce(&my_address, &zero_if_addresses_differ, 1, MPI_UNSIGNED_LONG, compare_fn_addresses_op, 0); + if (comm.Get_rank() == 0) { if (zero_if_addresses_differ == 0) { MADNESS_EXCEPTION("Address Space Layout Randomization (ASLR) detected, please turn off or disable by providing appropriate linker flags (see MADNESS_DISABLEPIE_LINKER_FLAG)",0); } @@ -380,10 +380,10 @@ namespace madness { SafeMPI::Op_free(compare_fn_addresses_op); } - void RMI::begin() { + void RMI::begin(const SafeMPI::Intracomm& comm) { // complain loudly and throw if ASLR is on ... RMI requires ASLR to be off - assert_aslr_off(); + assert_aslr_off(comm); testsome_backoff_us = 5; const char* buf = getenv("MAD_BACKOFF_US"); @@ -404,7 +404,7 @@ namespace madness { tbb_rmi_parent_task = new (tbb::task::allocate_root()) tbb::empty_task; tbb_rmi_parent_task->set_ref_count(2); - task_ptr = new (tbb_rmi_parent_task->allocate_child()) RmiTask(); + task_ptr = new (tbb_rmi_parent_task->allocate_child()) RmiTask(comm); tbb::task::enqueue(*task_ptr, tbb::priority_high); task_ptr->comm.Barrier(); @@ -436,7 +436,7 @@ namespace madness { tbb::task::destroy(*empty_root); task_ptr->comm.Barrier(); #else - task_ptr = new RmiTask(); + task_ptr = new RmiTask(comm); task_ptr->start(); #endif // HAVE_INTEL_TBB } diff --git a/src/madness/world/worldrmi.h b/src/madness/world/worldrmi.h index 15df2338054..199560ee178 100644 --- a/src/madness/world/worldrmi.h +++ b/src/madness/world/worldrmi.h @@ -222,7 +222,7 @@ namespace madness { void process_some(); - RmiTask(); + RmiTask(const SafeMPI::Intracomm& comm = SafeMPI::COMM_WORLD); virtual ~RmiTask(); static void set_rmi_task_is_running(bool flag = true); @@ -344,9 +344,9 @@ namespace madness { return task_ptr->isend(buf, nbyte, dest, func, attr); } - static void assert_aslr_off(); // will complain to std::cerr and throw if ASLR is on + static void assert_aslr_off(const SafeMPI::Intracomm& comm = SafeMPI::COMM_WORLD); // will complain to std::cerr and throw if ASLR is on - static void begin(); + static void begin(const SafeMPI::Intracomm& comm = SafeMPI::COMM_WORLD); static void end() { if(task_ptr) { From b47bd4c6ba5c29d75529b10a93c69c6bd0402599 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 20 Aug 2018 13:20:19 -0400 Subject: [PATCH 2/2] amended b898e743d32fae3eca75d4025d2fa55c638b924d --- src/madness/world/world.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/madness/world/world.cc b/src/madness/world/world.cc index 3364ec53060..684604012a5 100644 --- a/src/madness/world/world.cc +++ b/src/madness/world/world.cc @@ -211,7 +211,7 @@ namespace madness { #endif // HAVE_ELEMENTAL madness_initialized_ = true; - if(SafeMPI::COMM_WORLD.Get_rank() == 0) + if(comm.Get_rank() == 0) std::cout << "MADNESS runtime initialized with " << ThreadPool::size() << " threads in the pool and affinity " << sbind << "\n"; @@ -220,6 +220,7 @@ namespace madness { void finalize() { World::default_world->gop.fence(); + const auto world_size = World::default_world->size(); // Destroy the default world delete World::default_world; @@ -229,7 +230,7 @@ namespace madness { elem::Finalize(); #endif - if(SafeMPI::COMM_WORLD.Get_size() > 1) + if(world_size > 1) RMI::end(); ThreadPool::end(); detail::WorldMpi::finalize();