Permalink
Browse files

command.rb: Add an exponential backoff when spinning up new workers f…

…ails

Helps with #2, though it doesn't do much besides stop trying to spin
up tons of processes.
  • Loading branch information...
1 parent d1e4697 commit e2d29c1f01465003360be015ba6916f1f0370640 @gdb gdb committed Sep 27, 2012
Showing with 31 additions and 6 deletions.
  1. +2 −1 lib/einhorn.rb
  2. +29 −5 lib/einhorn/command.rb
View
@@ -55,7 +55,8 @@ def self.default_state
:command_socket_as_fd => false,
:socket_path => nil,
:pidfile => nil,
- :lockfile => nil
+ :lockfile => nil,
+ :consecutive_deaths_before_ack => 0
}
end
end
View
@@ -29,9 +29,17 @@ def self.mourn(pid)
Einhorn::State.children.delete(pid)
+ # Unacked worker
+ if spec[:type] == :worker && !spec[:acked]
+ Einhorn::State.consecutive_deaths_before_ack += 1
+ extra = ' before it was ACKed'
+ else
+ extra = nil
+ end
+
case type = spec[:type]
when :worker
- Einhorn.log_info("===> Exited worker #{pid.inspect}")
+ Einhorn.log_info("===> Exited worker #{pid.inspect}#{extra}")
when :state_passer
Einhorn.log_debug("===> Exited state passing process #{pid.inspect}")
else
@@ -77,8 +85,15 @@ def self.register_ack(pid)
return
end
+ if Einhorn::State.consecutive_deaths_before_ack > 0
+ extra = ", breaking the streak of #{Einhorn::State.consecutive_deaths_before_ack} consecutive unacked workers dying"
+ else
+ extra = nil
+ end
+ Einhorn::State.consecutive_deaths_before_ack = 0
+
spec[:acked] = true
- Einhorn.log_info("Up to #{Einhorn::WorkerPool.ack_count} / #{Einhorn::WorkerPool.ack_target} #{Einhorn::State.ack_mode[:type]} ACKs")
+ Einhorn.log_info("Up to #{Einhorn::WorkerPool.ack_count} / #{Einhorn::WorkerPool.ack_target} #{Einhorn::State.ack_mode[:type]} ACKs#{extra}")
# Could call cull here directly instead, I believe.
Einhorn::Event.break_loop
end
@@ -322,14 +337,23 @@ def self.replenish_gradually
return if Einhorn::TransientState.has_outstanding_spinup_timer
return unless Einhorn::WorkerPool.missing_worker_count > 0
- spinup_interval = Einhorn::State.config[:seconds]
+ # Exponentially backoff automated spinup if we're just having
+ # things die before ACKing
+ spinup_interval = Einhorn::State.config[:seconds] * (1.5 ** Einhorn::State.consecutive_deaths_before_ack)
seconds_ago = (Time.now - Einhorn::State.last_spinup).to_f
if seconds_ago > spinup_interval
- Einhorn.log_debug("Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}, so spinning up a new process")
+ msg = "Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}s, so spinning up a new process"
+
+ if Einhorn::State.consecutive_deaths_before_ack > 0
+ Einhorn.log_info("#{msg} (there have been #{Einhorn::State.consecutive_deaths_before_ack} consecutive unacked worker deaths)")
+ else
+ Einhorn.log_debug(msg)
+ end
+
spinup
else
- Einhorn.log_debug("Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}, so not spinning up a new process")
+ Einhorn.log_debug("Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}s, so not spinning up a new process")
end
Einhorn::TransientState.has_outstanding_spinup_timer = true

0 comments on commit e2d29c1

Please sign in to comment.