Skip to content
This repository has been archived by the owner on Mar 14, 2019. It is now read-only.

Commit

Permalink
Merge pull request #31 from stitchfix/failed-jobs-by-class
Browse files Browse the repository at this point in the history
stat failures by class
  • Loading branch information
davetron5000 committed Jan 31, 2016
2 parents e9c4231 + 7539d09 commit 0d983c2
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 0 deletions.
16 changes: 16 additions & 0 deletions lib/monitoring/failed_job_by_class_check.rb
@@ -0,0 +1,16 @@
require 'ostruct'
module Monitoring
class FailedJobByClassCheck < Monitoring::Checker
def check!
@resques.all.map { |resque_instance|
by_class = resque_instance.jobs_failed.group_by { |job| job.payload["class"] || "NoClass" }
by_class.keys.sort.map { |class_name|
CheckResult.new(resque_name: resque_instance.name,
check_name: "resque.failed_jobs",
scope: class_name.parameterize,
check_count: by_class[class_name].size)
}
}.flatten.compact
end
end
end
8 changes: 8 additions & 0 deletions lib/tasks/monitor.rake
Expand Up @@ -7,6 +7,14 @@ namespace :monitor do
monitor.monitor!
end

desc "Check the number of failed jobs and stat the results per class in the failed queue"
task :failed_by_class => :environment do
monitor = Monitoring::Monitor.new(
checker: Monitoring::FailedJobByClassCheck.new,
notifier: Monitoring::LibratoNotifier.new(unit: "jobs"))
monitor.monitor!
end

desc "Check the number of stale workers and stat the results to the log in a way Librato can understand"
task :stale_workers => :environment do
monitor = Monitoring::Monitor.new(
Expand Down
17 changes: 17 additions & 0 deletions test/integration/monitoring_test.rb
Expand Up @@ -34,6 +34,23 @@ class MonitoringTest < ActionDispatch::IntegrationTest
assert_equal "source=test2 count#resque.failed_jobs=4jobs",logger.infos[1]
end

test "failed by class check to librato" do
logger = FakeLogger.new
Rails.logger = logger

Object.const_set(:RESQUES,Resques.new([
add_failed_jobs(job_class_names: ["FooJob","BarJob","FooJob"], resque_instance: resque_instance("test1",:resque)),
add_failed_jobs(job_class_names: ["BazJob","BazJob",nil,"BazJob"], resque_instance: resque_instance("test2",:resque2)),
]))

Rake::Task['monitor:failed_by_class'].invoke

assert_equal "source=test1.barjob count#resque.failed_jobs=1jobs",logger.infos[0]
assert_equal "source=test1.foojob count#resque.failed_jobs=2jobs",logger.infos[1]
assert_equal "source=test2.bazjob count#resque.failed_jobs=3jobs",logger.infos[2]
assert_equal "source=test2.noclass count#resque.failed_jobs=1jobs",logger.infos[3]
end

test "stale workers to librato" do
logger = FakeLogger.new
Rails.logger = logger
Expand Down
51 changes: 51 additions & 0 deletions test/lib/monitoring/failed_job_by_class_check_test.rb
@@ -0,0 +1,51 @@

require 'quick_test_helper'
require 'support/resque_helpers'
require 'support/monitoring_helpers'
require 'minitest/autorun'
require 'resque'

lib_require 'monitoring/checker'
lib_require 'monitoring/check_result'
lib_require 'monitoring/failed_job_by_class_check'

rails_require 'models/resque_instance'
rails_require 'models/job'
rails_require 'models/failed_job'
rails_require 'models/resques'

module Monitoring
end
class Monitoring::FailedJobByClassCheckTest < MiniTest::Test
include ResqueHelpers
include MonitoringHelpers

def setup_resques(test1: ["BazJob", nil], test2: ["FooJob","FooJob", "BarJob"])
Redis.new.flushall
Resques.new([
add_failed_jobs(job_class_names: test1, resque_instance: resque_instance("test1",:resque)),
add_failed_jobs(job_class_names: test2, resque_instance: resque_instance("test2",:resque2)),
])
end

def test_failed_jobs
resques = setup_resques
check = Monitoring::FailedJobByClassCheck.new(resques: resques)

results = check.check!

assert_check_result results[0], resque_name: "test1", scope: "bazjob", check_count: 1
assert_check_result results[1], resque_name: "test1", scope: "noclass", check_count: 1
assert_check_result results[2], resque_name: "test2", scope: "barjob", check_count: 1
assert_check_result results[3], resque_name: "test2", scope: "foojob", check_count: 2
end

def test_no_failed_jobs
resques = setup_resques(test1: [], test2: [])
check = Monitoring::FailedJobByClassCheck.new(resques: resques)

results = check.check!

assert_equal 0,results.size
end
end

0 comments on commit 0d983c2

Please sign in to comment.