Skip to content

Commit

Permalink
Address some potential MPI bugs (#688)
Browse files Browse the repository at this point in the history
* Fix two potential MPI bugs.

Two conditions where a function could be returned from before the end,
and the code after the return makes MPI calls or modifies the MPI state.
This could lead to MPI hanging if it gets left in a invalid or bad
state.
  • Loading branch information
mgjarrett committed May 27, 2022
1 parent da2fe93 commit f6bf598
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 1 deletion.
5 changes: 4 additions & 1 deletion armi/mpiActions.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,7 @@ def invokeHook(self):
)
)

actionResult = None
try:
action = mpiComm.scatter(self._actions, root=0)
# create a new communicator that only has these specific dudes running
Expand All @@ -430,7 +431,7 @@ def invokeHook(self):
context.MPI_SIZE = context.MPI_COMM.Get_size()
context.MPI_NODENAMES = context.MPI_COMM.allgather(context.MPI_NODENAME)
if hasAction:
return action.invoke(self.o, self.r, self.cs)
actionResult = action.invoke(self.o, self.r, self.cs)
finally:
# restore the global variables
context.MPI_DISTRIBUTABLE = canDistribute
Expand All @@ -439,6 +440,8 @@ def invokeHook(self):
context.MPI_SIZE = mpiSize
context.MPI_NODENAMES = mpiNodeNames

return actionResult


class MpiActionError(Exception):
"""Exception class raised when error conditions occur during an MpiAction."""
Expand Down
2 changes: 2 additions & 0 deletions armi/utils/pathTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@ def cleanPath(path, mpiRank=0):
"""
valid = False
if not os.path.exists(path):
if context.MPI_SIZE > 1:
context.MPI_COMM.barrier()
return True

for validPath in [
Expand Down

0 comments on commit f6bf598

Please sign in to comment.