Skip to content

Commit

Permalink
[REEF-726] Fix a race condition with completed Containers
Browse files Browse the repository at this point in the history
This removes the releasing of Evaluators directly in `YarnContainerManager` and
instead delegates the job to `EvaluatorManager` on container complete.

JIRA:
  [REEF-726](https://issues.apache.org/jira/browse/REEF-726)

Pull Request:
  This closes apache#476
  • Loading branch information
afchung authored and Markus Weimer committed Sep 16, 2015
1 parent f90bb12 commit bcc1652
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,6 @@ public void onResourceStatusMessage(final ResourceStatusEvent resourceStatusEven
.append(this.task.get().getId())
.append("] was running when the Evaluator crashed.");
}
this.isResourceReleased = true;

if (resourceStatusEvent.getState() == ReefServiceProtos.State.KILLED) {
this.onEvaluatorException(new EvaluatorKilledByResourceManagerException(this.evaluatorId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,6 @@ private void onContainerStatus(final ContainerStatus value) {
status.setState(ReefServiceProtos.State.FAILED);
}
status.setExitCode(value.getExitStatus());
// remove the completed container (can be either done/killed/failed) from book keeping
this.containers.removeAndGet(containerId);
break;
default:
LOG.info("Container running");
Expand All @@ -356,6 +354,7 @@ private void onContainerStatus(final ContainerStatus value) {
status.setDiagnostics(value.getDiagnostics());
}

// The ResourceStatusHandler should close and release the Evaluator for us if the state is a terminal state.
this.reefEventHandlers.onResourceStatus(status.build());
}
}
Expand Down

0 comments on commit bcc1652

Please sign in to comment.