From c8669d9ddcd077a20d9be7aca36bd7e63df41a08 Mon Sep 17 00:00:00 2001 From: Sterling Harper Date: Thu, 11 Jun 2015 17:07:01 -0600 Subject: [PATCH] Make ProjectionTransfer Parallel #1913 --- .../transfers/MultiAppProjectionTransfer.C | 232 +++++++++++++++--- 1 file changed, 202 insertions(+), 30 deletions(-) diff --git a/framework/src/transfers/MultiAppProjectionTransfer.C b/framework/src/transfers/MultiAppProjectionTransfer.C index b21dd5f90370..27aca9c0d6ca 100644 --- a/framework/src/transfers/MultiAppProjectionTransfer.C +++ b/framework/src/transfers/MultiAppProjectionTransfer.C @@ -14,11 +14,13 @@ #include "MultiAppProjectionTransfer.h" #include "FEProblem.h" #include "AddVariableAction.h" +#include "MooseError.h" #include "libmesh/quadrature_gauss.h" #include "libmesh/dof_map.h" #include "libmesh/mesh_function.h" #include "libmesh/mesh_tools.h" #include "libmesh/string_to_enum.h" +#include "libmesh/parallel_algebra.h" void assemble_l2_from(EquationSystems & es, const std::string & system_name) @@ -229,44 +231,93 @@ MultiAppProjectionTransfer::assembleL2To(EquationSystems & es, const std::string void MultiAppProjectionTransfer::assembleL2From(EquationSystems & es, const std::string & system_name) { - unsigned int n_apps = _multi_app->numGlobalApps(); - std::vector *> from_slns(n_apps, NULL); - std::vector from_fns(n_apps, NULL); - std::vector from_bbs(n_apps, NULL); - - // get bounding box, mesh function and solution for each subapp - for (unsigned int i = 0; i < n_apps; i++) + /******************** + First step, get the bounding boxes and mesh functions for all the sub apps on + this processor. + ********************/ + const unsigned int n_global_apps = _multi_app->numGlobalApps(); + const unsigned int n_local_apps = _multi_app->numLocalApps(); + std::vector *> from_slns(n_local_apps, NULL); + std::vector from_fns(n_local_apps, NULL); + std::vector > bb_points(n_local_apps); + std::vector from_bbs(n_local_apps); + + unsigned int i_local = 0; + for (unsigned int i_global = 0; i_global < n_global_apps; i_global++) { - if (!_multi_app->hasLocalApp(i)) + if (!_multi_app->hasLocalApp(i_global)) continue; MPI_Comm swapped = Moose::swapLibMeshComm(_multi_app->comm()); - FEProblem & from_problem = *_multi_app->appProblem(i); + // Get the bounding box. + FEProblem & from_problem = *_multi_app->appProblem(i_global); EquationSystems & from_es = from_problem.es(); MeshBase & from_mesh = from_es.get_mesh(); - MeshTools::BoundingBox * app_box = new MeshTools::BoundingBox(MeshTools::processor_bounding_box(from_mesh, from_mesh.processor_id())); - from_bbs[i] = app_box; + MeshTools::BoundingBox app_box = MeshTools::BoundingBox(MeshTools::processor_bounding_box(from_mesh, from_mesh.processor_id())); + + // Cast the bounding box into a pair of points to simplify MPI + // communication. Translate the bounding box to the app's position. + bb_points[i_local] = static_cast >(app_box); + bb_points[i_local].first = bb_points[i_local].first + _multi_app->position(i_global); + bb_points[i_local].second = bb_points[i_local].second + _multi_app->position(i_global); + //from_bbs[i_local] = app_box; + from_bbs[i_local] = static_cast(bb_points[i_local]); + // Get a serialized copy of the subapp's solution vector. MooseVariable & from_var = from_problem.getVariable(0, _from_var_name); System & from_sys = from_var.sys().system(); unsigned int from_var_num = from_sys.variable_number(from_var.name()); NumericVector * serialized_from_solution = NumericVector::build(from_sys.comm()).release(); serialized_from_solution->init(from_sys.n_dofs(), false, SERIAL); - // Need to pull down a full copy of this vector on every processor so we can get values in parallel from_sys.solution->localize(*serialized_from_solution); - from_slns[i] = serialized_from_solution; + from_slns[i_local] = serialized_from_solution; + // Get the subapp's mesh function. MeshFunction * from_func = new MeshFunction(from_es, *serialized_from_solution, from_sys.get_dof_map(), from_var_num); from_func->init(Trees::ELEMENTS); from_func->enable_out_of_mesh_mode(OutOfMeshValue); - from_fns[i] = from_func; + from_fns[i_local] = from_func; Moose::swapLibMeshComm(swapped); + + i_local++; + } + + /******************** + Next, serialize the bounding boxes, and keep track of how many apps (i.e. how + many bounding boxes) each processor has. + ********************/ + std::vector apps_per_proc(1, n_local_apps); + + _communicator.allgather(bb_points); + _communicator.allgather(apps_per_proc, true); + + if (apps_per_proc.size() != n_processors()) + mooseError("Transfer failed to gather data from all processors."); + + unsigned int n_sources = 0; + for (unsigned int i=0; i bboxes(n_sources); + for (unsigned int i=0; i(bb_points[i]); + } + /******************** + Now, check all the elements local to this processor and see if they overlap + with any of the bounding boxes from other processors. Keep track of which + elements overlap with which processors. Build vectors of quadrature points to + send to other processors for mesh function evaluations. + ********************/ const MeshBase& mesh = es.get_mesh(); const unsigned int dim = mesh.mesh_dimension(); @@ -276,21 +327,131 @@ MultiAppProjectionTransfer::assembleL2From(EquationSystems & es, const std::stri UniquePtr fe(FEBase::build(dim, fe_type)); QGauss qrule(dim, fe_type.default_quadrature_order()); fe->attach_quadrature_rule(&qrule); - const std::vector & JxW = fe->get_JxW(); - const std::vector > & phi = fe->get_phi(); const std::vector & xyz = fe->get_xyz(); + std::vector > outgoing_qps(n_processors()); + std::vector< std::unordered_map > element_index_map(n_processors()); + + for (unsigned int i_proc = 0, app0 = 0; + i_proc < n_processors(); + i_proc++, app0 += apps_per_proc[i_proc]) + { + MeshBase::const_element_iterator el = mesh.active_local_elements_begin(); + const MeshBase::const_element_iterator end_el = mesh.active_local_elements_end(); + for ( ; el != end_el; ++el) + { + const Elem* elem = *el; + fe->reinit (elem); + + bool qp_hit = false; + for (unsigned int i_app = 0; + i_app < apps_per_proc[i_proc] && ! qp_hit; i_app++) + { + for (unsigned int qp = 0; + qp < qrule.n_points() && ! qp_hit; qp ++) + { + Point qpt = xyz[qp]; + if (bboxes[app0 + i_app].contains_point(qpt)) + qp_hit = true; + } + } + + if (qp_hit) + { + // This processor's bounding box contains at least one qudrature point + // from this element. + element_index_map[i_proc][elem->id()] = outgoing_qps[i_proc].size(); + for (unsigned int qp = 0; qp < qrule.n_points(); qp ++) + { + Point qpt = xyz[qp]; + outgoing_qps[i_proc].push_back(qpt); + } + } + } + } + + /******************** + Request quadrature point evaluations from other processors and handle requests + sent to this processor. + ********************/ + std::vector > incoming_evals(n_processors()); + std::vector > incoming_app_ids(n_processors()); + for (unsigned int i_proc = 0; i_proc < n_processors(); i_proc++) + { + if (i_proc == processor_id()) + continue; + + //_console << "Processor " << processor_id() << " sending " << outgoing_qps[i_proc].size() << " points to processor " << i_proc << std::endl; + _communicator.send(i_proc, outgoing_qps[i_proc]); + } + + for (unsigned int i_proc = 0; i_proc < n_processors(); i_proc++) + { + std::vector incoming_qps; + if (i_proc == processor_id()) + incoming_qps = outgoing_qps[i_proc]; + else + _communicator.receive(i_proc, incoming_qps); + + std::vector outgoing_evals(incoming_qps.size(), OutOfMeshValue); + std::vector outgoing_ids(incoming_qps.size(), -1); // -1 = largest unsigned int + for (unsigned int qp = 0; qp < incoming_qps.size(); qp++) + { + Point qpt = incoming_qps[qp]; + + // Loop until we've found the lowest-ranked app that actually contains + // the quadrature point. + for (unsigned int i_global = 0, i_local = 0; + i_global < n_global_apps && outgoing_evals[qp] == OutOfMeshValue; + i_global++) + { + if (!_multi_app->hasLocalApp(i_global)) + continue; + if (from_bbs[i_local].contains_point(qpt)) + { + outgoing_evals[qp] = (* from_fns[i_local])(qpt - _multi_app->position(i_global)); + outgoing_ids[qp] = i_global; + } + i_local ++; + } + } + + if (i_proc == processor_id()) + { + incoming_evals[i_proc] = outgoing_evals; + incoming_app_ids[i_proc] = outgoing_ids; + } + else + { + _communicator.send(i_proc, outgoing_evals); + _communicator.send(i_proc, outgoing_ids); + } + } + + /******************** + Gather all of the qp evaluations, find the best one for each qp, and define + the system. + ********************/ + for (unsigned int i_proc = 0; i_proc < n_processors(); i_proc++) + { + if (i_proc == processor_id()) + continue; + + _communicator.receive(i_proc, incoming_evals[i_proc]); + _communicator.receive(i_proc, incoming_app_ids[i_proc]); + } const DofMap& dof_map = system.get_dof_map(); DenseMatrix Ke; DenseVector Fe; std::vector dof_indices; + const std::vector & JxW = fe->get_JxW(); + const std::vector > & phi = fe->get_phi(); MeshBase::const_element_iterator el = mesh.active_local_elements_begin(); const MeshBase::const_element_iterator end_el = mesh.active_local_elements_end(); for ( ; el != end_el; ++el) { const Elem* elem = *el; - fe->reinit (elem); dof_map.dof_indices (elem, dof_indices); @@ -300,24 +461,36 @@ MultiAppProjectionTransfer::assembleL2From(EquationSystems & es, const std::stri for (unsigned int qp = 0; qp < qrule.n_points(); qp++) { Point qpt = xyz[qp]; - Real f = 0.; - for (unsigned int app = 0; app < n_apps; app++) + + unsigned int lowest_app_rank = -1; // -1 = largest unsigned int + Real meshfun_eval = 0.; + for (unsigned int i_proc = 0; i_proc < n_processors(); i_proc++) { - Point pt = qpt - _multi_app->position(app); - if (from_bbs[app] != NULL && from_bbs[app]->contains_point(pt)) - { - MPI_Comm swapped = Moose::swapLibMeshComm(_multi_app->comm()); - f = (*from_fns[app])(pt); - Moose::swapLibMeshComm(swapped); - break; - } + std::unordered_map & map = element_index_map[i_proc]; + // Ignore this processor if the element wasn't found in it's bounding + // box. + if (map.find(elem->id()) == map.end()) + continue; + unsigned int qp0 = map[elem->id()]; + + // Ignore this processor if it's app has a higher rank than the + // previously found lowest app rank. + if (incoming_app_ids[i_proc][qp0 + qp] >= lowest_app_rank) + continue; + + // Ignore this processor if the qp was actually outside the processor's + // mesh. + if (incoming_evals[i_proc][qp0 + qp] == OutOfMeshValue) + continue; + + meshfun_eval = incoming_evals[i_proc][qp0 + qp]; } // Now compute the element matrix and RHS contributions. for (unsigned int i=0; i