Skip to content

Commit

Permalink
Merge pull request #1 from markhamstra/yeweizhang-spy-1407
Browse files Browse the repository at this point in the history
tweaking
  • Loading branch information
yeweizhang committed Jan 10, 2018
2 parents ad3c5fd + 909e416 commit d8ee9c6
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -462,14 +462,14 @@ private[spark] class TaskSchedulerImpl(
}

override def executorLost(executorId: String, reason: ExecutorLossReason): Unit = {
var failedExecutor: Option[String] = None
var dagSchedulerShouldRemoveExecutor: Boolean = false

synchronized {
if (executorIdToRunningTaskIds.contains(executorId)) {
val hostPort = executorIdToHost(executorId)
logExecutorLoss(executorId, hostPort, reason)
removeExecutor(executorId, reason)
failedExecutor = Some(executorId)
dagSchedulerShouldRemoveExecutor = true
} else {
executorIdToHost.get(executorId) match {
case Some(hostPort) =>
Expand All @@ -489,14 +489,14 @@ private[spark] class TaskSchedulerImpl(
}
}
}
// Call dagScheduler.executorLost without holding the lock on this to prevent deadlock
// When an executor is never used, we should ask dag scheduler to clean it up so that it can be
// removed from BlockManager
if (failedExecutor.isDefined) {
// Call dagScheduler.executorLost without holding the lock on this to prevent deadlock.
if (dagSchedulerShouldRemoveExecutor) {
dagScheduler.executorLost(executorId)
backend.reviveOffers()
} else {
logInfo(s"Call BlockManager Master to remove executor $executorId")
// When an executor is never used, we should ask BlockManagerMaster to remove this executor
// from its accounting.
logInfo(s"Call BlockManagerMaster to remove executor $executorId")
sc.env.blockManager.master.removeExecutorAsync(executorId)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,9 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
// SPARK-15262: If an executor is still alive even after the scheduler has removed
// its metadata, we may receive a heartbeat from that executor and tell its block
// manager to reregister itself. If that happens, the block manager master will know
// about the executor, but the scheduler will not. Therefore, we should remove the
// executor from the block manager when we hit this case.
// about the executor, but the scheduler will not. (The same state will occur if an
// executor has been dynamically allocated but never assigned any task: SPARK-21876.)
// Therefore, we should remove the executor from the block manager when we hit this case.
scheduler.sc.env.blockManager.master.removeExecutorAsync(executorId)
logInfo(s"Asked to remove non-existent executor $executorId")
}
Expand Down

0 comments on commit d8ee9c6

Please sign in to comment.