Skip to content
This repository has been archived by the owner on Mar 14, 2019. It is now read-only.

Commit

Permalink
Merge pull request #30 from stitchfix/redesign-monitoring-internals
Browse files Browse the repository at this point in the history
Redesign monitoring internals
  • Loading branch information
davetron5000 committed Jan 31, 2016
2 parents 467ef0a + b853ea9 commit e9c4231
Show file tree
Hide file tree
Showing 18 changed files with 135 additions and 138 deletions.
30 changes: 30 additions & 0 deletions lib/monitoring/check_result.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
module Monitoring
class CheckResult
attr_reader :resque_name, # Name of the resque the check checked
:scope, # Refined scope inside this resque, if relavent (e.g. queue name or class name)
:check_name, # Name of the thing checked
:check_count # Count related to what was being checked

def initialize(resque_name: nil,check_name: nil,check_count: nil, scope: nil)
@resque_name = required! resque_name, "resque_name"
@check_name = required! check_name, "check_name"
@check_count = required! check_count, "check_count", :to_i
@scope = optional scope
end

private

def required!(value, name, conversion = :to_s)
raise ArgumentError, "#{name} is required" if value.nil?
value.send(conversion)
end

def optional(value)
if value.nil?
nil
else
value.to_s
end
end
end
end
1 change: 1 addition & 0 deletions lib/monitoring/checker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ class Checker
def initialize(resques: RESQUES)
@resques = resques
end
# Should return an array of CheckResult representing the results of the check
def check!
raise
end
Expand Down
8 changes: 5 additions & 3 deletions lib/monitoring/failed_job_check.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
module Monitoring
class FailedJobCheck < Monitoring::Checker
def check!
Hash[@resques.all.map { |resque_instance|
[resque_instance.name,resque_instance.jobs_failed]
}]
@resques.all.map { |resque_instance|
CheckResult.new(resque_name: resque_instance.name,
check_name: "resque.failed_jobs",
check_count: resque_instance.jobs_failed.size)
}
end
end
end
10 changes: 5 additions & 5 deletions lib/monitoring/librato_notifier.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
module Monitoring
class LibratoNotifier < Notifier
def initialize(prefix: nil, logger: Rails.logger, type: :count, unit: "")
@prefix = validate_prefix!(prefix)
def initialize(logger: Rails.logger, type: :count, unit: "")
@logger = logger
@type = type
@unit = unit || ""
Expand All @@ -11,9 +10,10 @@ def initialize(prefix: nil, logger: Rails.logger, type: :count, unit: "")
#
# results:: a hash where the keys represent the source (the resque instance name) and the values
# are lists of items to be counted. The items won't be examined, just counted and used in the metric
def notify!(results)
results.each do |resque_name,items|
log_to_librato(resque_name,@type,@prefix,items.size)
def notify!(check_results)
check_results.each do |check_result|
source = [check_result.resque_name,check_result.scope].compact.join(".")
log_to_librato(source,@type,check_result.check_name,check_result.check_count)
end
end

Expand Down
3 changes: 2 additions & 1 deletion lib/monitoring/monitor.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module Monitoring
class Monitor
def initialize(checker: checker,notifier: notifier)
def initialize(checker: nil,notifier: nil)
raise ArgumentError, "both checker and notifier are required" if checker.nil? || notifier.nil?
@checker = checker
@notifier = notifier
end
Expand Down
4 changes: 3 additions & 1 deletion lib/monitoring/notifier.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
module Monitoring
class Notifier
def notify!(results)

# This will be given an array of CheckResult instances
def notify!(check_results)
raise
end
end
Expand Down
16 changes: 0 additions & 16 deletions lib/monitoring/per_queue_librato_notifier.rb

This file was deleted.

12 changes: 9 additions & 3 deletions lib/monitoring/queue_size_check.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,15 @@
module Monitoring
class QueueSizeCheck < Monitoring::Checker
def check!
Hash[@resques.all.map { |resque_instance|
[resque_instance.name,resque_instance.jobs_waiting]
}]
@resques.all.map { |resque_instance|
resque_instance.jobs_waiting.keys.sort.map { |queue_name|
jobs = resque_instance.jobs_waiting[queue_name]
CheckResult.new(resque_name: resque_instance.name,
scope: queue_name,
check_name: "resque.queue_size",
check_count: jobs.size)
}
}.flatten
end
end
end
8 changes: 5 additions & 3 deletions lib/monitoring/stale_worker_check.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
module Monitoring
class StaleWorkerCheck < Monitoring::Checker
def check!
Hash[@resques.all.map { |resque_instance|
[resque_instance.name,resque_instance.jobs_running.select(&:too_long?)]
}]
@resques.all.map { |resque_instance|
CheckResult.new(resque_name: resque_instance.name,
check_name: "resque.stale_workers",
check_count: resque_instance.jobs_running.select(&:too_long?).size)
}
end
end
end
6 changes: 3 additions & 3 deletions lib/tasks/monitor.rake
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ namespace :monitor do
task :failed => :environment do
monitor = Monitoring::Monitor.new(
checker: Monitoring::FailedJobCheck.new,
notifier: Monitoring::LibratoNotifier.new(prefix: "resque.failed_jobs", unit: "jobs"))
notifier: Monitoring::LibratoNotifier.new(unit: "jobs"))
monitor.monitor!
end

desc "Check the number of stale workers and stat the results to the log in a way Librato can understand"
task :stale_workers => :environment do
monitor = Monitoring::Monitor.new(
checker: Monitoring::StaleWorkerCheck.new,
notifier: Monitoring::LibratoNotifier.new(prefix: "resque.stale_workers", type: :measure, unit: "workers"))
notifier: Monitoring::LibratoNotifier.new(type: :measure, unit: "workers"))

monitor.monitor!
end
Expand All @@ -20,7 +20,7 @@ namespace :monitor do
task :queue_sizes => :environment do
monitor = Monitoring::Monitor.new(
checker: Monitoring::QueueSizeCheck.new,
notifier: Monitoring::PerQueueLibratoNotifier.new(prefix: "resque.queue_size", type: :count, unit: "jobs"))
notifier: Monitoring::LibratoNotifier.new(unit: "jobs"))

monitor.monitor!
end
Expand Down
8 changes: 4 additions & 4 deletions test/integration/monitoring_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ class MonitoringTest < ActionDispatch::IntegrationTest

Rake::Task['monitor:queue_sizes'].invoke

assert_equal "source=test1.cache count#resque.queue_size=2jobs" , logger.infos[0]
assert_equal "source=test1.mail count#resque.queue_size=4jobs" , logger.infos[1]
assert_equal "source=test2.admin count#resque.queue_size=2jobs" , logger.infos[2]
assert_equal "source=test2.mail count#resque.queue_size=1jobs" , logger.infos[3]
assert_equal "source=test1.cache count#resque.queue_size=2jobs" , logger.infos[0], "0" + logger.infos.inspect
assert_equal "source=test1.mail count#resque.queue_size=4jobs" , logger.infos[1], "1" + logger.infos.inspect
assert_equal "source=test2.admin count#resque.queue_size=2jobs" , logger.infos[2], "2" + logger.infos.inspect
assert_equal "source=test2.mail count#resque.queue_size=1jobs" , logger.infos[3], "3" + logger.infos.inspect
end
end
21 changes: 12 additions & 9 deletions test/lib/monitoring/failed_job_check_test.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
require 'quick_test_helper'
require 'support/resque_helpers'
require 'support/monitoring_helpers'
require 'minitest/autorun'
require 'resque'

lib_require 'monitoring/checker'
lib_require 'monitoring/check_result'
lib_require 'monitoring/failed_job_check'
rails_require 'models/resque_instance'
rails_require 'models/job'
Expand All @@ -14,12 +16,13 @@ module Monitoring
end
class Monitoring::FailedJobCheckTest < MiniTest::Test
include ResqueHelpers
include MonitoringHelpers

def setup_resques(test1: 1, test2: 2)
def setup_resques(test1: ["BazJob"], test2: ["FooJob","BarJob"])
Redis.new.flushall
Resques.new([
add_failed_jobs(num_failed: test1, resque_instance: resque_instance("test1",:resque)),
add_failed_jobs(num_failed: test2, resque_instance: resque_instance("test2",:resque2)),
add_failed_jobs(job_class_names: test1, resque_instance: resque_instance("test1",:resque)),
add_failed_jobs(job_class_names: test2, resque_instance: resque_instance("test2",:resque2)),
])
end

Expand All @@ -28,22 +31,22 @@ def test_type
end

def test_failed_jobs
resques = setup_resques(test1: 1, test2: 2)
resques = setup_resques
check = Monitoring::FailedJobCheck.new(resques: resques)

results = check.check!

assert_equal 1,results["test1"].size,results["test1"].inspect
assert_equal 2,results["test2"].size,results["test2"].inspect
assert_check_result results[0], resque_name: "test1", check_count: 1
assert_check_result results[1], resque_name: "test2", check_count: 2
end

def test_no_failed_jobs
resques = setup_resques(test1: 0, test2: 0)
resques = setup_resques(test1: [], test2: [])
check = Monitoring::FailedJobCheck.new(resques: resques)

results = check.check!

assert_equal 0,results["test1"].size,results.inspect
assert_equal 0,results["test2"].size,results.inspect
assert_check_result results[0], resque_name: "test1", check_count: 0
assert_check_result results[1], resque_name: "test2", check_count: 0
end
end
51 changes: 27 additions & 24 deletions test/lib/monitoring/librato_notifier_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
require 'support/fake_logger'

lib_require 'monitoring/notifier'
lib_require 'monitoring/check_result'
lib_require 'monitoring/librato_notifier'

module Monitoring
Expand All @@ -14,26 +15,14 @@ def test_type
assert Monitoring::LibratoNotifier.ancestors.include?(Monitoring::Notifier)
end

def test_requires_a_prefix
assert_raises ArgumentError do
Monitoring::LibratoNotifier.new(logger: FakeLogger.new)
end
end

def test_prefix_should_just_have_alpha_nums_and_dots
assert_raises ArgumentError do
Monitoring::LibratoNotifier.new(prefix: "foo bar", logger: FakeLogger.new)
end
end

def test_logs_results
logger = FakeLogger.new
notifier = Monitoring::LibratoNotifier.new(prefix: "foo.bar", logger: logger, unit: "jobs")
notifier.notify!({
"test1" => [ Object.new, Object.new, Object.new ],
"test2" => [ Object.new ],
"test3" => [],
})
notifier = Monitoring::LibratoNotifier.new(logger: logger, unit: "jobs")
notifier.notify!([
Monitoring::CheckResult.new(resque_name: "test1", check_name: "foo.bar", check_count: 3),
Monitoring::CheckResult.new(resque_name: "test2", check_name: "foo.bar", check_count: 1),
Monitoring::CheckResult.new(resque_name: "test3", check_name: "foo.bar", check_count: 0),
])

assert_equal "source=test1 count#foo.bar=3jobs", logger.infos[0]
assert_equal "source=test2 count#foo.bar=1jobs", logger.infos[1]
Expand All @@ -42,15 +31,29 @@ def test_logs_results

def test_logs_results_as_measure
logger = FakeLogger.new
notifier = Monitoring::LibratoNotifier.new(prefix: "foo.bar", logger: logger, type: :measure, unit: "workers")
notifier.notify!({
"test1" => [ Object.new, Object.new, Object.new ],
"test2" => [ Object.new ],
"test3" => [],
})
notifier = Monitoring::LibratoNotifier.new(logger: logger, type: :measure, unit: "workers")
notifier.notify!([
Monitoring::CheckResult.new(resque_name: "test1", check_name: "foo.bar", check_count: 3),
Monitoring::CheckResult.new(resque_name: "test2", check_name: "foo.bar", check_count: 1),
Monitoring::CheckResult.new(resque_name: "test3", check_name: "foo.bar", check_count: 0),
])

assert_equal "source=test1 measure#foo.bar=3workers", logger.infos[0]
assert_equal "source=test2 measure#foo.bar=1workers", logger.infos[1]
assert_equal "source=test3 measure#foo.bar=0workers", logger.infos[2]
end

def test_logs_results_with_scope
logger = FakeLogger.new
notifier = Monitoring::LibratoNotifier.new(logger: logger, unit: "workers")
notifier.notify!([
Monitoring::CheckResult.new(resque_name: "test1", scope: "baz", check_name: "foo.bar", check_count: 3),
Monitoring::CheckResult.new(resque_name: "test2", scope: "baz", check_name: "foo.bar", check_count: 1),
Monitoring::CheckResult.new(resque_name: "test3", scope: "baz", check_name: "foo.bar", check_count: 0),
])

assert_equal "source=test1.baz count#foo.bar=3workers", logger.infos[0]
assert_equal "source=test2.baz count#foo.bar=1workers", logger.infos[1]
assert_equal "source=test3.baz count#foo.bar=0workers", logger.infos[2]
end
end
54 changes: 0 additions & 54 deletions test/lib/monitoring/per_queue_librato_notifier_test.rb

This file was deleted.

0 comments on commit e9c4231

Please sign in to comment.