diff --git a/lib/monitoring/check_result.rb b/lib/monitoring/check_result.rb new file mode 100644 index 0000000..a0da65a --- /dev/null +++ b/lib/monitoring/check_result.rb @@ -0,0 +1,30 @@ +module Monitoring + class CheckResult + attr_reader :resque_name, # Name of the resque the check checked + :scope, # Refined scope inside this resque, if relavent (e.g. queue name or class name) + :check_name, # Name of the thing checked + :check_count # Count related to what was being checked + + def initialize(resque_name: nil,check_name: nil,check_count: nil, scope: nil) + @resque_name = required! resque_name, "resque_name" + @check_name = required! check_name, "check_name" + @check_count = required! check_count, "check_count", :to_i + @scope = optional scope + end + + private + + def required!(value, name, conversion = :to_s) + raise ArgumentError, "#{name} is required" if value.nil? + value.send(conversion) + end + + def optional(value) + if value.nil? + nil + else + value.to_s + end + end + end +end diff --git a/lib/monitoring/checker.rb b/lib/monitoring/checker.rb index f439966..ccc2a39 100644 --- a/lib/monitoring/checker.rb +++ b/lib/monitoring/checker.rb @@ -3,6 +3,7 @@ class Checker def initialize(resques: RESQUES) @resques = resques end + # Should return an array of CheckResult representing the results of the check def check! raise end diff --git a/lib/monitoring/failed_job_check.rb b/lib/monitoring/failed_job_check.rb index ecb7bbe..5f1a62a 100644 --- a/lib/monitoring/failed_job_check.rb +++ b/lib/monitoring/failed_job_check.rb @@ -2,9 +2,11 @@ module Monitoring class FailedJobCheck < Monitoring::Checker def check! - Hash[@resques.all.map { |resque_instance| - [resque_instance.name,resque_instance.jobs_failed] - }] + @resques.all.map { |resque_instance| + CheckResult.new(resque_name: resque_instance.name, + check_name: "resque.failed_jobs", + check_count: resque_instance.jobs_failed.size) + } end end end diff --git a/lib/monitoring/librato_notifier.rb b/lib/monitoring/librato_notifier.rb index 91fea4f..12601f5 100644 --- a/lib/monitoring/librato_notifier.rb +++ b/lib/monitoring/librato_notifier.rb @@ -1,7 +1,6 @@ module Monitoring class LibratoNotifier < Notifier - def initialize(prefix: nil, logger: Rails.logger, type: :count, unit: "") - @prefix = validate_prefix!(prefix) + def initialize(logger: Rails.logger, type: :count, unit: "") @logger = logger @type = type @unit = unit || "" @@ -11,9 +10,10 @@ def initialize(prefix: nil, logger: Rails.logger, type: :count, unit: "") # # results:: a hash where the keys represent the source (the resque instance name) and the values # are lists of items to be counted. The items won't be examined, just counted and used in the metric - def notify!(results) - results.each do |resque_name,items| - log_to_librato(resque_name,@type,@prefix,items.size) + def notify!(check_results) + check_results.each do |check_result| + source = [check_result.resque_name,check_result.scope].compact.join(".") + log_to_librato(source,@type,check_result.check_name,check_result.check_count) end end diff --git a/lib/monitoring/monitor.rb b/lib/monitoring/monitor.rb index 74fbdf5..acc788c 100644 --- a/lib/monitoring/monitor.rb +++ b/lib/monitoring/monitor.rb @@ -1,6 +1,7 @@ module Monitoring class Monitor - def initialize(checker: checker,notifier: notifier) + def initialize(checker: nil,notifier: nil) + raise ArgumentError, "both checker and notifier are required" if checker.nil? || notifier.nil? @checker = checker @notifier = notifier end diff --git a/lib/monitoring/notifier.rb b/lib/monitoring/notifier.rb index d62282c..d9cef6b 100644 --- a/lib/monitoring/notifier.rb +++ b/lib/monitoring/notifier.rb @@ -1,6 +1,8 @@ module Monitoring class Notifier - def notify!(results) + + # This will be given an array of CheckResult instances + def notify!(check_results) raise end end diff --git a/lib/monitoring/per_queue_librato_notifier.rb b/lib/monitoring/per_queue_librato_notifier.rb deleted file mode 100644 index 073d271..0000000 --- a/lib/monitoring/per_queue_librato_notifier.rb +++ /dev/null @@ -1,16 +0,0 @@ -module Monitoring - class PerQueueLibratoNotifier < LibratoNotifier - # Log metrics based on the hash passed in, which is assumed to be organized by queue. - # - # results:: a hash where the keys represent the source (the resque instance name) and the values - # are themselves hashes. The keys of *those* hashes are the names of queues, and their values - # are lists of items to be counted. The items won't be examined, just counted and used in the metric - def notify!(results) - results.each do |resque_name,by_queue| - by_queue.sort_by(&:to_s).each do |queue_name,items| - log_to_librato("#{resque_name}.#{queue_name}",@type,@prefix,items.size) - end - end - end - end -end diff --git a/lib/monitoring/queue_size_check.rb b/lib/monitoring/queue_size_check.rb index 81b3285..524273a 100644 --- a/lib/monitoring/queue_size_check.rb +++ b/lib/monitoring/queue_size_check.rb @@ -2,9 +2,15 @@ module Monitoring class QueueSizeCheck < Monitoring::Checker def check! - Hash[@resques.all.map { |resque_instance| - [resque_instance.name,resque_instance.jobs_waiting] - }] + @resques.all.map { |resque_instance| + resque_instance.jobs_waiting.keys.sort.map { |queue_name| + jobs = resque_instance.jobs_waiting[queue_name] + CheckResult.new(resque_name: resque_instance.name, + scope: queue_name, + check_name: "resque.queue_size", + check_count: jobs.size) + } + }.flatten end end end diff --git a/lib/monitoring/stale_worker_check.rb b/lib/monitoring/stale_worker_check.rb index 263eca1..97c2de8 100644 --- a/lib/monitoring/stale_worker_check.rb +++ b/lib/monitoring/stale_worker_check.rb @@ -1,9 +1,11 @@ module Monitoring class StaleWorkerCheck < Monitoring::Checker def check! - Hash[@resques.all.map { |resque_instance| - [resque_instance.name,resque_instance.jobs_running.select(&:too_long?)] - }] + @resques.all.map { |resque_instance| + CheckResult.new(resque_name: resque_instance.name, + check_name: "resque.stale_workers", + check_count: resque_instance.jobs_running.select(&:too_long?).size) + } end end end diff --git a/lib/tasks/monitor.rake b/lib/tasks/monitor.rake index 60e6b9f..a739115 100644 --- a/lib/tasks/monitor.rake +++ b/lib/tasks/monitor.rake @@ -3,7 +3,7 @@ namespace :monitor do task :failed => :environment do monitor = Monitoring::Monitor.new( checker: Monitoring::FailedJobCheck.new, - notifier: Monitoring::LibratoNotifier.new(prefix: "resque.failed_jobs", unit: "jobs")) + notifier: Monitoring::LibratoNotifier.new(unit: "jobs")) monitor.monitor! end @@ -11,7 +11,7 @@ namespace :monitor do task :stale_workers => :environment do monitor = Monitoring::Monitor.new( checker: Monitoring::StaleWorkerCheck.new, - notifier: Monitoring::LibratoNotifier.new(prefix: "resque.stale_workers", type: :measure, unit: "workers")) + notifier: Monitoring::LibratoNotifier.new(type: :measure, unit: "workers")) monitor.monitor! end @@ -20,7 +20,7 @@ namespace :monitor do task :queue_sizes => :environment do monitor = Monitoring::Monitor.new( checker: Monitoring::QueueSizeCheck.new, - notifier: Monitoring::PerQueueLibratoNotifier.new(prefix: "resque.queue_size", type: :count, unit: "jobs")) + notifier: Monitoring::LibratoNotifier.new(unit: "jobs")) monitor.monitor! end diff --git a/test/integration/monitoring_test.rb b/test/integration/monitoring_test.rb index 6f36f9f..824d0b3 100644 --- a/test/integration/monitoring_test.rb +++ b/test/integration/monitoring_test.rb @@ -60,9 +60,9 @@ class MonitoringTest < ActionDispatch::IntegrationTest Rake::Task['monitor:queue_sizes'].invoke - assert_equal "source=test1.cache count#resque.queue_size=2jobs" , logger.infos[0] - assert_equal "source=test1.mail count#resque.queue_size=4jobs" , logger.infos[1] - assert_equal "source=test2.admin count#resque.queue_size=2jobs" , logger.infos[2] - assert_equal "source=test2.mail count#resque.queue_size=1jobs" , logger.infos[3] + assert_equal "source=test1.cache count#resque.queue_size=2jobs" , logger.infos[0], "0" + logger.infos.inspect + assert_equal "source=test1.mail count#resque.queue_size=4jobs" , logger.infos[1], "1" + logger.infos.inspect + assert_equal "source=test2.admin count#resque.queue_size=2jobs" , logger.infos[2], "2" + logger.infos.inspect + assert_equal "source=test2.mail count#resque.queue_size=1jobs" , logger.infos[3], "3" + logger.infos.inspect end end diff --git a/test/lib/monitoring/failed_job_check_test.rb b/test/lib/monitoring/failed_job_check_test.rb index 4b7aa34..eff72da 100644 --- a/test/lib/monitoring/failed_job_check_test.rb +++ b/test/lib/monitoring/failed_job_check_test.rb @@ -1,9 +1,11 @@ require 'quick_test_helper' require 'support/resque_helpers' +require 'support/monitoring_helpers' require 'minitest/autorun' require 'resque' lib_require 'monitoring/checker' +lib_require 'monitoring/check_result' lib_require 'monitoring/failed_job_check' rails_require 'models/resque_instance' rails_require 'models/job' @@ -14,12 +16,13 @@ module Monitoring end class Monitoring::FailedJobCheckTest < MiniTest::Test include ResqueHelpers + include MonitoringHelpers - def setup_resques(test1: 1, test2: 2) + def setup_resques(test1: ["BazJob"], test2: ["FooJob","BarJob"]) Redis.new.flushall Resques.new([ - add_failed_jobs(num_failed: test1, resque_instance: resque_instance("test1",:resque)), - add_failed_jobs(num_failed: test2, resque_instance: resque_instance("test2",:resque2)), + add_failed_jobs(job_class_names: test1, resque_instance: resque_instance("test1",:resque)), + add_failed_jobs(job_class_names: test2, resque_instance: resque_instance("test2",:resque2)), ]) end @@ -28,22 +31,22 @@ def test_type end def test_failed_jobs - resques = setup_resques(test1: 1, test2: 2) + resques = setup_resques check = Monitoring::FailedJobCheck.new(resques: resques) results = check.check! - assert_equal 1,results["test1"].size,results["test1"].inspect - assert_equal 2,results["test2"].size,results["test2"].inspect + assert_check_result results[0], resque_name: "test1", check_count: 1 + assert_check_result results[1], resque_name: "test2", check_count: 2 end def test_no_failed_jobs - resques = setup_resques(test1: 0, test2: 0) + resques = setup_resques(test1: [], test2: []) check = Monitoring::FailedJobCheck.new(resques: resques) results = check.check! - assert_equal 0,results["test1"].size,results.inspect - assert_equal 0,results["test2"].size,results.inspect + assert_check_result results[0], resque_name: "test1", check_count: 0 + assert_check_result results[1], resque_name: "test2", check_count: 0 end end diff --git a/test/lib/monitoring/librato_notifier_test.rb b/test/lib/monitoring/librato_notifier_test.rb index 856f818..425d79a 100644 --- a/test/lib/monitoring/librato_notifier_test.rb +++ b/test/lib/monitoring/librato_notifier_test.rb @@ -4,6 +4,7 @@ require 'support/fake_logger' lib_require 'monitoring/notifier' +lib_require 'monitoring/check_result' lib_require 'monitoring/librato_notifier' module Monitoring @@ -14,26 +15,14 @@ def test_type assert Monitoring::LibratoNotifier.ancestors.include?(Monitoring::Notifier) end - def test_requires_a_prefix - assert_raises ArgumentError do - Monitoring::LibratoNotifier.new(logger: FakeLogger.new) - end - end - - def test_prefix_should_just_have_alpha_nums_and_dots - assert_raises ArgumentError do - Monitoring::LibratoNotifier.new(prefix: "foo bar", logger: FakeLogger.new) - end - end - def test_logs_results logger = FakeLogger.new - notifier = Monitoring::LibratoNotifier.new(prefix: "foo.bar", logger: logger, unit: "jobs") - notifier.notify!({ - "test1" => [ Object.new, Object.new, Object.new ], - "test2" => [ Object.new ], - "test3" => [], - }) + notifier = Monitoring::LibratoNotifier.new(logger: logger, unit: "jobs") + notifier.notify!([ + Monitoring::CheckResult.new(resque_name: "test1", check_name: "foo.bar", check_count: 3), + Monitoring::CheckResult.new(resque_name: "test2", check_name: "foo.bar", check_count: 1), + Monitoring::CheckResult.new(resque_name: "test3", check_name: "foo.bar", check_count: 0), + ]) assert_equal "source=test1 count#foo.bar=3jobs", logger.infos[0] assert_equal "source=test2 count#foo.bar=1jobs", logger.infos[1] @@ -42,15 +31,29 @@ def test_logs_results def test_logs_results_as_measure logger = FakeLogger.new - notifier = Monitoring::LibratoNotifier.new(prefix: "foo.bar", logger: logger, type: :measure, unit: "workers") - notifier.notify!({ - "test1" => [ Object.new, Object.new, Object.new ], - "test2" => [ Object.new ], - "test3" => [], - }) + notifier = Monitoring::LibratoNotifier.new(logger: logger, type: :measure, unit: "workers") + notifier.notify!([ + Monitoring::CheckResult.new(resque_name: "test1", check_name: "foo.bar", check_count: 3), + Monitoring::CheckResult.new(resque_name: "test2", check_name: "foo.bar", check_count: 1), + Monitoring::CheckResult.new(resque_name: "test3", check_name: "foo.bar", check_count: 0), + ]) assert_equal "source=test1 measure#foo.bar=3workers", logger.infos[0] assert_equal "source=test2 measure#foo.bar=1workers", logger.infos[1] assert_equal "source=test3 measure#foo.bar=0workers", logger.infos[2] end + + def test_logs_results_with_scope + logger = FakeLogger.new + notifier = Monitoring::LibratoNotifier.new(logger: logger, unit: "workers") + notifier.notify!([ + Monitoring::CheckResult.new(resque_name: "test1", scope: "baz", check_name: "foo.bar", check_count: 3), + Monitoring::CheckResult.new(resque_name: "test2", scope: "baz", check_name: "foo.bar", check_count: 1), + Monitoring::CheckResult.new(resque_name: "test3", scope: "baz", check_name: "foo.bar", check_count: 0), + ]) + + assert_equal "source=test1.baz count#foo.bar=3workers", logger.infos[0] + assert_equal "source=test2.baz count#foo.bar=1workers", logger.infos[1] + assert_equal "source=test3.baz count#foo.bar=0workers", logger.infos[2] + end end diff --git a/test/lib/monitoring/per_queue_librato_notifier_test.rb b/test/lib/monitoring/per_queue_librato_notifier_test.rb deleted file mode 100644 index edf2fb4..0000000 --- a/test/lib/monitoring/per_queue_librato_notifier_test.rb +++ /dev/null @@ -1,54 +0,0 @@ -require 'quick_test_helper' -require 'minitest/autorun' -require 'resque' -require 'support/fake_logger' - -lib_require 'monitoring/notifier' -lib_require 'monitoring/librato_notifier' -lib_require 'monitoring/per_queue_librato_notifier' - -module Monitoring -end -class Monitoring::PerQueueLibratoNotifierTest < MiniTest::Test - - def test_type - assert Monitoring::PerQueueLibratoNotifier.ancestors.include?(Monitoring::Notifier) - end - - def test_requires_a_prefix - assert_raises ArgumentError do - Monitoring::PerQueueLibratoNotifier.new(logger: FakeLogger.new) - end - end - - def test_prefix_should_just_have_alpha_nums_and_dots - assert_raises ArgumentError do - Monitoring::PerQueueLibratoNotifier.new(prefix: "foo bar", logger: FakeLogger.new) - end - end - - def test_logs_results - logger = FakeLogger.new - notifier = Monitoring::PerQueueLibratoNotifier.new(prefix: "foo.bar", logger: logger, unit: "jobs") - notifier.notify!( - "test1" => { - "mail" => [ Object.new, Object.new, Object.new ], - "cache" => [ Object.new, Object.new, Object.new, Object.new ], - }, - "test2" => { - "mail" => [ Object.new ], - }, - "test3" => { - "mail" => [], - "cache" => [ Object.new], - } - ) - - # Sorts by queue within a resque for predictability - assert_equal "source=test1.cache count#foo.bar=4jobs", logger.infos[0] - assert_equal "source=test1.mail count#foo.bar=3jobs", logger.infos[1] - assert_equal "source=test2.mail count#foo.bar=1jobs", logger.infos[2] - assert_equal "source=test3.cache count#foo.bar=1jobs", logger.infos[3] - assert_equal "source=test3.mail count#foo.bar=0jobs", logger.infos[4] - end -end diff --git a/test/lib/monitoring/queue_size_check_test.rb b/test/lib/monitoring/queue_size_check_test.rb index 80794dd..5ef6c1e 100644 --- a/test/lib/monitoring/queue_size_check_test.rb +++ b/test/lib/monitoring/queue_size_check_test.rb @@ -1,10 +1,12 @@ require 'quick_test_helper' require 'support/resque_helpers' +require 'support/monitoring_helpers' require 'minitest/autorun' require 'resque' lib_require 'monitoring/checker' lib_require 'monitoring/queue_size_check' +lib_require 'monitoring/check_result' rails_require 'models/resque_instance' rails_require 'models/job' rails_require 'models/running_job' @@ -14,6 +16,7 @@ module Monitoring end class Monitoring::QueueSizeCheckTest < MiniTest::Test include ResqueHelpers + include MonitoringHelpers def setup_resques(test1: {}, test2: {}) Redis.new.flushall @@ -32,12 +35,12 @@ def test_jobs_in_queue test2: { mail: 3, admin: 2 }) check = Monitoring::QueueSizeCheck.new(resques: resques) - results = check.check! + results = check.check!.sort_by { |result| "#{result.resque_name}.#{result.scope}" } - assert_equal 10, results["test1"]["mail"].size - assert_equal 4 , results["test1"]["cache"].size + assert_check_result results[0], resque_name: "test1" , scope: "cache" , check_count: 4 + assert_check_result results[1], resque_name: "test1" , scope: "mail" , check_count: 10 + assert_check_result results[2], resque_name: "test2" , scope: "admin" , check_count: 2 + assert_check_result results[3], resque_name: "test2" , scope: "mail" , check_count: 3 - assert_equal 3 , results["test2"]["mail"].size - assert_equal 2 , results["test2"]["admin"].size end end diff --git a/test/lib/monitoring/stale_worker_check_test.rb b/test/lib/monitoring/stale_worker_check_test.rb index 9c30439..69f9e36 100644 --- a/test/lib/monitoring/stale_worker_check_test.rb +++ b/test/lib/monitoring/stale_worker_check_test.rb @@ -5,8 +5,10 @@ require 'support/explicit_interface_implementation' require 'support/resque_helpers' +require 'support/monitoring_helpers' lib_require 'monitoring/checker' +lib_require 'monitoring/check_result' lib_require 'monitoring/stale_worker_check' rails_require 'models/resque_instance' rails_require 'models/job' @@ -17,6 +19,7 @@ module Monitoring end class Monitoring::StaleWorkerCheckTest < MiniTest::Test include ResqueHelpers + include MonitoringHelpers def setup_resques(test1: 1, test2: 2) Redis.new.flushall @@ -36,17 +39,18 @@ def test_stale_workers results = check.check! - assert_equal 1,results["test1"].size,results["test1"].inspect - assert_equal 2,results["test2"].size,results["test2"].inspect + assert_check_result results[0], resque_name: "test1", check_count: 1 + assert_check_result results[1], resque_name: "test2", check_count: 2 end def test_no_stale_workers resques = setup_resques(test1: 0, test2: 0) check = Monitoring::StaleWorkerCheck.new(resques: resques) + results = check.check! + assert_check_result results[0], resque_name: "test1", check_count: 0 + assert_check_result results[1], resque_name: "test2", check_count: 0 - assert_equal 0,results["test1"].size,results.inspect - assert_equal 0,results["test2"].size,results.inspect end end diff --git a/test/support/monitoring_helpers.rb b/test/support/monitoring_helpers.rb new file mode 100644 index 0000000..7f07bac --- /dev/null +++ b/test/support/monitoring_helpers.rb @@ -0,0 +1,7 @@ +module MonitoringHelpers + def assert_check_result(check_result, resque_name: nil, scope: nil, check_count: nil, message: nil) + assert_equal resque_name , check_result.resque_name , message.to_s + assert_equal scope , check_result.scope , message.to_s + assert_equal check_count , check_result.check_count , message.to_s + end +end diff --git a/test/support/resque_helpers.rb b/test/support/resque_helpers.rb index 32f04aa..de23954 100644 --- a/test/support/resque_helpers.rb +++ b/test/support/resque_helpers.rb @@ -5,11 +5,14 @@ def resque_instance(name,namespace) ResqueInstance.new(name: name, resque_data_store: resque_data_store) end - def add_failed_jobs(num_failed: num_failed, resque_instance: nil) - num_failed.times do |i| + def add_failed_jobs(num_failed: nil, resque_instance: nil, job_class_names: nil) + raise 'you must supply num_failed or job_class_names' if num_failed.nil? && job_class_names.nil? + job_class_names ||= num_failed.times.map { "Baz" } + + job_class_names.each_with_index do |class_name,i| resque_instance.resque_data_store.push_to_failed_queue(Resque.encode( failed_at: Time.now.utc.iso8601, - payload: { class: "Baz", args: [ i ]}, + payload: { class: class_name, args: [ i ]}, exception: "Resque::TermException", error: "SIGTERM", backtrace: [ "foo","bar","blah"],