From b2062b6494309970e2c06673fa5a3aeebcaa9292 Mon Sep 17 00:00:00 2001 From: Sterling Harper Date: Tue, 21 Jul 2015 16:58:17 -0600 Subject: [PATCH] Fix ProjectionTransfer load imbalance crash refs #1913 --- .../transfers/MultiAppProjectionTransfer.h | 2 - .../transfers/MultiAppProjectionTransfer.C | 331 +++++++++++------- 2 files changed, 205 insertions(+), 128 deletions(-) diff --git a/framework/include/transfers/MultiAppProjectionTransfer.h b/framework/include/transfers/MultiAppProjectionTransfer.h index babe8549c85f..1a02ece93146 100644 --- a/framework/include/transfers/MultiAppProjectionTransfer.h +++ b/framework/include/transfers/MultiAppProjectionTransfer.h @@ -56,8 +56,6 @@ class MultiAppProjectionTransfer : public MultiAppTransfer friend void assemble_l2(EquationSystems & es, const std::string & system_name); - NumericVector * _serialized_master_solution; - }; diff --git a/framework/src/transfers/MultiAppProjectionTransfer.C b/framework/src/transfers/MultiAppProjectionTransfer.C index f9ea97041c1a..329e0ec6f1b3 100644 --- a/framework/src/transfers/MultiAppProjectionTransfer.C +++ b/framework/src/transfers/MultiAppProjectionTransfer.C @@ -95,31 +95,106 @@ MultiAppProjectionTransfer::initialSetup() void MultiAppProjectionTransfer::assembleL2(EquationSystems & es, const std::string & system_name) { + // Get the system and mesh from the input arguments. + LinearImplicitSystem & system = es.get_system(system_name); + MeshBase & to_mesh = es.get_mesh(); + + // Get the meshfunction evaluations and the map that was stashed in the es. + std::vector & final_evals = * es.parameters.get*>("final_evals"); + std::map & element_map = * es.parameters.get*>("element_map"); + + // Setup system vectors and matrices. + FEType fe_type = system.variable_type(0); + UniquePtr fe(FEBase::build(to_mesh.mesh_dimension(), fe_type)); + QGauss qrule(to_mesh.mesh_dimension(), fe_type.default_quadrature_order()); + fe->attach_quadrature_rule(&qrule); + const DofMap& dof_map = system.get_dof_map(); + DenseMatrix Ke; + DenseVector Fe; + std::vector dof_indices; + const std::vector & JxW = fe->get_JxW(); + const std::vector > & phi = fe->get_phi(); + + const MeshBase::const_element_iterator end_el = to_mesh.active_local_elements_end(); + for (MeshBase::const_element_iterator el = to_mesh.active_local_elements_begin(); el != end_el; ++el) + { + const Elem* elem = *el; + fe->reinit (elem); + + dof_map.dof_indices (elem, dof_indices); + Ke.resize (dof_indices.size(), dof_indices.size()); + Fe.resize (dof_indices.size()); + + for (unsigned int qp = 0; qp < qrule.n_points(); qp++) + { + Real meshfun_eval = 0.; + if (element_map.find(elem->id()) != element_map.end()) + { + // We have evaluations for this element. + meshfun_eval = final_evals[element_map[elem->id()] + qp]; + } + + // Now compute the element matrix and RHS contributions. + for (unsigned int i=0; iadd_matrix(Ke, dof_indices); + system.rhs->add_vector(Fe, dof_indices); + } + } +} + + +void +MultiAppProjectionTransfer::execute() +{ + _console << "Beginning projection transfer " << name() << std::endl; + + getAppInfo(); + //////////////////// - // In order to assemble the system we need to evaluate the sub app solutions - // at quadrature points in the master domain. This is complicated by the fact - // that each app is on its own MPI communicator. + // We are going to project the solutions by solving some linear systems. In + // order to assemble the systems, we need to evaluate the "from" domain + // solutions at quadrature points in the "to" domain. Some parallel + // communication is necessary because each processor doesn't necessarily have + // all the "from" information it needs to set its "to" values. We don't want + // to use a bunch of big all-to-all broadcasts, so we'll use bounding boxes to + // figure out which processors have the information we need and only + // communicate with those processors. // // Each processor will - // 1. Check its local quadrature points in the "to" domain to see which "from" - // domains they might be in. - // 2. Send quadrature points to the processors that might have a "from" domain - // that contains those points. + // 1. Check its local quadrature points in the "to" domains to see which + // "from" domains they might be in. + // 2. Send quadrature points to the processors with "from" domains that might + // contain those points. // 3. Recieve quadrature points from other processors, evaluate its mesh // functions at those points, and send the values back to the proper // processor // 4. Recieve mesh function evaluations from all relevant processors and // decide which one to use at every quadrature point (the lowest global app // index always wins) - // 5. And assemble the L2 system at its local quadrature points + // 5. And use the mesh function evaluations to assemble and solve an L2 + // projection system on its local elements. //////////////////// //////////////////// - // Get the bounding boxes for all of the processors. Then, check all the - // elements local to this processor and see if they overlap with any of the - // bounding boxes from other processors. Keep track of which elements overlap - // with which processors. Build vectors of quadrature points to send to other - // processors for mesh function evaluations. + // For every combination of global "from" problem and local "to" problem, find + // which "from" bounding boxes overlap with which "to" elements. Keep track + // of which processors own bounding boxes that overlap with which elements. + // Build vectors of quadrature points to send to other processors for mesh + // function evaluations. //////////////////// // Get the bounding boxes for the "from" domains. @@ -128,60 +203,61 @@ MultiAppProjectionTransfer::assembleL2(EquationSystems & es, const std::string & // Figure out how many "from" domains each processor owns. std::vector froms_per_proc = getFromsPerProc(); - // Figure out which "to" domain we are projecting to. - unsigned int i_to = es.parameters.get("to_index"); + std::vector > outgoing_qps(n_processors()); + std::vector, unsigned int> > element_index_map(n_processors()); + // element_index_map[i_to, element_id] = index + // outgoing_qps[index] is the first quadrature point in element - const MeshBase& mesh = es.get_mesh(); - const unsigned int dim = mesh.mesh_dimension(); + for (unsigned int i_to = 0; i_to < _to_problems.size(); i_to++) + { + MeshBase & to_mesh = _to_meshes[i_to]->getMesh(); - LinearImplicitSystem & system = es.get_system(system_name); + LinearImplicitSystem & system = * _proj_sys[i_to]; - FEType fe_type = system.variable_type(0); - UniquePtr fe(FEBase::build(dim, fe_type)); - QGauss qrule(dim, fe_type.default_quadrature_order()); - fe->attach_quadrature_rule(&qrule); - const std::vector & xyz = fe->get_xyz(); - std::vector > outgoing_qps(n_processors()); - std::vector > element_index_map(n_processors()); - // element_index_map[element_id] = index - // outgoing_qps[index] is the first quadrature point in element + FEType fe_type = system.variable_type(0); + UniquePtr fe(FEBase::build(to_mesh.mesh_dimension(), fe_type)); + QGauss qrule(to_mesh.mesh_dimension(), fe_type.default_quadrature_order()); + fe->attach_quadrature_rule(&qrule); + const std::vector & xyz = fe->get_xyz(); - MeshBase::const_element_iterator el = mesh.local_elements_begin(); - const MeshBase::const_element_iterator end_el = mesh.local_elements_end(); + MeshBase::const_element_iterator el = to_mesh.local_elements_begin(); + const MeshBase::const_element_iterator end_el = to_mesh.local_elements_end(); - unsigned int from0 = 0; - for (processor_id_type i_proc = 0; - i_proc < n_processors(); - from0 += froms_per_proc[i_proc], i_proc++) - { - for (el = mesh.local_elements_begin(); el != end_el; ++el) + unsigned int from0 = 0; + for (processor_id_type i_proc = 0; + i_proc < n_processors(); + from0 += froms_per_proc[i_proc], i_proc++) { - const Elem* elem = *el; - fe->reinit (elem); - - bool qp_hit = false; - for (unsigned int i_from = 0; - i_from < froms_per_proc[i_proc] && ! qp_hit; i_from++) + for (el = to_mesh.local_elements_begin(); el != end_el; el++) { - for (unsigned int qp = 0; - qp < qrule.n_points() && ! qp_hit; qp ++) + const Elem* elem = *el; + fe->reinit (elem); + + bool qp_hit = false; + for (unsigned int i_from = 0; + i_from < froms_per_proc[i_proc] && ! qp_hit; i_from++) { - Point qpt = xyz[qp]; - if (bboxes[from0 + i_from].contains_point(qpt + _to_positions[i_to])) - qp_hit = true; + for (unsigned int qp = 0; + qp < qrule.n_points() && ! qp_hit; qp ++) + { + Point qpt = xyz[qp]; + if (bboxes[from0 + i_from].contains_point(qpt + _to_positions[i_to])) + qp_hit = true; + } } - } - if (qp_hit) - { - // The selected processor's bounding box contains at least one - // quadrature point from this element. Send all qps from this element - // and remember where they are in the array using the map. - element_index_map[i_proc][elem->id()] = outgoing_qps[i_proc].size(); - for (unsigned int qp = 0; qp < qrule.n_points(); qp ++) + if (qp_hit) { - Point qpt = xyz[qp]; - outgoing_qps[i_proc].push_back(qpt + _to_positions[i_to]); + // The selected processor's bounding box contains at least one + // quadrature point from this element. Send all qps from this element + // and remember where they are in the array using the map. + std::pair key(i_to, elem->id()); + element_index_map[i_proc][key] = outgoing_qps[i_proc].size(); + for (unsigned int qp = 0; qp < qrule.n_points(); qp ++) + { + Point qpt = xyz[qp]; + outgoing_qps[i_proc].push_back(qpt + _to_positions[i_to]); + } } } } @@ -268,10 +344,10 @@ MultiAppProjectionTransfer::assembleL2(EquationSystems & es, const std::string & switch (_direction) { case TO_MULTIAPP: - outgoing_ids[qp] = _local2global_map[i_from]; + outgoing_ids[qp] = 0; break; case FROM_MULTIAPP: - outgoing_ids[qp] = 0; + outgoing_ids[qp] = _local2global_map[i_from]; break; } } @@ -291,8 +367,7 @@ MultiAppProjectionTransfer::assembleL2(EquationSystems & es, const std::string & } //////////////////// - // Gather all of the qp evaluations, find the best one for each qp, and - // assemble the system. + // Gather all of the qp evaluations and pick out the best ones for each qp. //////////////////// for (processor_id_type i_proc = 0; i_proc < n_processors(); i_proc++) { @@ -303,89 +378,94 @@ MultiAppProjectionTransfer::assembleL2(EquationSystems & es, const std::string & _communicator.receive(i_proc, incoming_app_ids[i_proc]); } - const DofMap& dof_map = system.get_dof_map(); - DenseMatrix Ke; - DenseVector Fe; - std::vector dof_indices; - const std::vector & JxW = fe->get_JxW(); - const std::vector > & phi = fe->get_phi(); + std::vector > final_evals(_to_problems.size()); + std::vector > trimmed_element_maps(_to_problems.size()); - for (el = mesh.active_local_elements_begin(); el != end_el; ++el) + for (unsigned int i_to = 0; i_to < _to_problems.size(); i_to++) { - const Elem* elem = *el; - fe->reinit (elem); + MeshBase & to_mesh = _to_meshes[i_to]->getMesh(); + LinearImplicitSystem & system = * _proj_sys[i_to]; - dof_map.dof_indices (elem, dof_indices); - Ke.resize (dof_indices.size(), dof_indices.size()); - Fe.resize (dof_indices.size()); + FEType fe_type = system.variable_type(0); + UniquePtr fe(FEBase::build(to_mesh.mesh_dimension(), fe_type)); + QGauss qrule(to_mesh.mesh_dimension(), fe_type.default_quadrature_order()); + fe->attach_quadrature_rule(&qrule); + const std::vector & xyz = fe->get_xyz(); - for (unsigned int qp = 0; qp < qrule.n_points(); qp++) + MeshBase::const_element_iterator el = to_mesh.local_elements_begin(); + const MeshBase::const_element_iterator end_el = to_mesh.local_elements_end(); + + for (el = to_mesh.active_local_elements_begin(); el != end_el; el++) { - Point qpt = xyz[qp]; + const Elem* elem = *el; + fe->reinit (elem); - unsigned int lowest_app_rank = -1; // -1 = largest unsigned int - Real meshfun_eval = 0.; - for (unsigned int i_proc = 0; i_proc < n_processors(); i_proc++) - { - // Ignore the selected processor if the element wasn't found in it's - // bounding box. - std::map & map = element_index_map[i_proc]; - if (map.find(elem->id()) == map.end()) - continue; - unsigned int qp0 = map[elem->id()]; - - // Ignore the selected processor if it's app has a higher rank than the - // previously found lowest app rank. - if (incoming_app_ids[i_proc][qp0 + qp] >= lowest_app_rank) - continue; - - // Ignore the selected processor if the qp was actually outside the - // processor's subapp's mesh. - if (incoming_evals[i_proc][qp0 + qp] == OutOfMeshValue) - continue; - - meshfun_eval = incoming_evals[i_proc][qp0 + qp]; - } + bool element_is_evaled = false; + std::vector evals(qrule.n_points(), 0.); - // Now compute the element matrix and RHS contributions. - for (unsigned int i=0; i, unsigned int> & map = element_index_map[i_proc]; + std::pair key(i_to, elem->id()); + if (map.find(key) == map.end()) + continue; + unsigned int qp0 = map[key]; + + // Ignore the selected processor if it's app has a higher rank than the + // previously found lowest app rank. + if (incoming_app_ids[i_proc][qp0 + qp] >= lowest_app_rank) + continue; + + // Ignore the selected processor if the qp was actually outside the + // processor's subapp's mesh. + if (incoming_evals[i_proc][qp0 + qp] == OutOfMeshValue) + continue; + + // This is the best meshfunction evaluation so far, save it. + element_is_evaled = true; + evals[qp] = incoming_evals[i_proc][qp0 + qp]; + } } - dof_map.constrain_element_matrix_and_vector(Ke, Fe, dof_indices); - if (_compute_matrix) - system.matrix->add_matrix(Ke, dof_indices); - system.rhs->add_vector(Fe, dof_indices); + // If we found good evaluations for any of the qps in this element, save + // those evaluations for later. + if (element_is_evaled) + { + trimmed_element_maps[i_to][elem->id()] = final_evals[i_to].size(); + for (unsigned int qp = 0; qp < qrule.n_points(); qp++) + { + final_evals[i_to].push_back(evals[qp]); + } + } } } - for (unsigned int i = 0; i < _from_problems.size(); i++) - { - delete local_meshfuns[i]; - delete serialized_from_solutions[i]; - } -} - - -void -MultiAppProjectionTransfer::execute() -{ - _console << "Beginning projection transfer " << name() << std::endl; - - getAppInfo(); + //////////////////// + // We now have just one or zero mesh function values at all of our local + // quadrature points. Stash those values (and a map linking them to element + // ids) in the equation systems parameters and project the solution. + //////////////////// for (unsigned int i_to = 0; i_to < _to_problems.size(); i_to++) { + _to_es[i_to]->parameters.set*>("final_evals") = & final_evals[i_to]; + _to_es[i_to]->parameters.set*>("element_map") = & trimmed_element_maps[i_to]; projectSolution(i_to); + _to_es[i_to]->parameters.set*>("final_evals") = NULL; + _to_es[i_to]->parameters.set*>("element_map") = NULL; + } + + for (unsigned int i = 0; i < _from_problems.size(); i++) + { + delete local_meshfuns[i]; + delete serialized_from_solutions[i]; } _console << "Finished projection transfer " << name() << std::endl; @@ -399,7 +479,6 @@ MultiAppProjectionTransfer::projectSolution(unsigned int i_to) LinearImplicitSystem & ls = *_proj_sys[i_to]; // activate the current transfer proj_es.parameters.set("transfer") = this; - proj_es.parameters.set("to_index") = i_to; // TODO: specify solver params in an input file // solver tolerance