Permalink
Browse files

Merge branch 'master' into mongo_split

  • Loading branch information...
itsderek23 committed Mar 19, 2013
2 parents 5bd9081 + e46cbfa commit 98da3d31791f7c55a79864cd67ef51ec991080e8
@@ -25,7 +25,7 @@ def build_report
base_url = "#{option(:elasticsearch_host)}:#{option(:elasticsearch_port)}/_cluster/health"
response = JSON.parse(Net::HTTP.get(URI.parse(base_url)))
- report(:status => response['status'])
+ report(:status => status(response['status']))
report(:number_of_nodes => response['number_of_nodes'])
report(:number_of_data_nodes => response['number_of_data_nodes'])
report(:active_primary_shards => response['active_primary_shards'])
@@ -47,6 +47,19 @@ def build_report
rescue Errno::ECONNREFUSED
error("Unable to connect", "Please ensure the host and port are correct. Current URL: \n\n#{base_url}")
end
+
+ # Generates a status string like "2 (green)" so triggers can be run off the status.
+ def status(color)
+ code = case color
+ when 'green'
+ 2
+ when 'yellow'
+ 1
+ when 'red'
+ 0
+ end
+ "#{code} (#{color})"
+ end
end
@@ -34,63 +34,54 @@ class MysqlReplicationMonitor < Scout::Plugin
attr_accessor :connection
def build_report
+ res={"Seconds Behind Master" => -1, "Replication Running"=>0}
begin
- self.connection = Mysql.new(option(:host), option(:username), option(:password), nil, option(:port).to_i)
- h = connection.query("show slave status").fetch_hash
+ self.connection=Mysql.new(option(:host),option(:username),option(:password),nil,option(:port).to_i)
+ h=connection.query("show slave status").fetch_hash
down_at = memory(:down_at)
if h.nil?
error("Replication not configured")
- elsif h["Seconds_Behind_Master"].nil? and !down_at
- unless in_ignore_window?
- alert("Replication not running", alert_body(h))
- down_at = Time.now
- end
- elsif h["Slave_IO_Running"] == "Yes" and h["Slave_SQL_Running"] == "Yes"
- if down_at
- alert("Replication running again","Replication was not running for #{(Time.now - down_at).to_i} seconds")
- down_at = nil
- end
- elsif !down_at
- unless in_ignore_window?
- alert("Replication not running", alert_body(h))
- down_at = Time.now
+ else
+ if h["Seconds_Behind_Master"].nil? and !down_at
+ if in_ignore_window?
+ res["Replication Running"]=1
+ else
+ res["Replication Running"]=0
+ down_at = Time.now
+ end
+ elsif h["Slave_IO_Running"] == "Yes" and h["Slave_SQL_Running"] == "Yes"
+ res["Seconds Behind Master"] = h["Seconds_Behind_Master"]
+ res["Replication Running"]=1
+ down_at = nil if down_at
+ elsif !down_at
+ if in_ignore_window?
+ res["Replication Running"]=1
+ else
+ down_at = Time.now
+ res["Replication Running"]=0
+ end
end
end
- report("Seconds Behind Master" => h["Seconds_Behind_Master"]) if h && h["Seconds_Behind_Master"]
- remember(:down_at, down_at)
- rescue Mysql::Error => e
- error("Unable to connect to MySQL", e.to_s)
+ remember(:down_at,down_at)
+ rescue Mysql::Error=>e
+ error("Unable to connect to MySQL",e.to_s)
end
+ report(res)
end
def in_ignore_window?
- if s = option(:ignore_window_start) && e = option(:ignore_window_end)
+ if s=option(:ignore_window_start) && e=option(:ignore_window_end)
start_time = Time.parse("#{Date.today} #{s}")
end_time = Time.parse("#{Date.today} #{e}")
- if start_time < end_time
- return (Time.now > start_time and Time.now < end_time)
+ if start_time<end_time
+ return(Time.now > start_time and Time.now < end_time)
else
- return (Time.now > start_time or Time.now < end_time)
+ return(Time.now > start_time or Time.now < end_time)
end
else
false
end
end
- def alert_body(h)
- """
-IO Slave Running: #{h["Slave_IO_Running"]}
-SQL Slave Running: #{h["Slave_SQL_Running"]}
-
-Last Errno: #{h["Last_Errno"]}
-Last Error: #{h["Last_Error"]}
-
-Last IO Errno: #{h["Last_IO_Errno"]}
-Last IO Error: #{h["Last_IO_Error"]}
-
-Last_SQL_Errno: #{h["Last_SQL_Errno"]}
-Last SQL Error: #{h["Last_SQL_Error"]}
-"""
- end
-end
+end
@@ -2,3 +2,10 @@ metadata:
"Seconds Behind Master":
units: seconds
precision: 0
+ "Replication Running":
+ precision: 0
+
+triggers:
+ - type: peak
+ dname: "Replication Running"
+ low_value: 0
@@ -1,4 +1,4 @@
-class PostgresqlMonitoring< Scout::Plugin
+class PostgresqlMonitoring < Scout::Plugin
# need the ruby-pg gem
needs 'pg'
@@ -29,7 +29,6 @@ class PostgresqlMonitoring< Scout::Plugin
NON_COUNTER_ENTRIES = ["numbackends"]
def build_report
- now = Time.now
report = {}
begin
@@ -16,8 +16,9 @@ class SidekiqMonitor < Scout::Plugin
notes: Redis database ID to pass to the client library.
default: 0
username:
- name: Username
- notes: If you're using Redis' username/password authentication
+ name: RedisToGo Username
+ notes: If you're using RedisToGo.
+ attributes: advanced
password:
name: Password
notes: If you're using Redis' username/password authentication.
@@ -0,0 +1,96 @@
+require 'time'
+require 'date'
+class MysqlReplicationMonitor < Scout::Plugin
+ needs 'mysql'
+
+ OPTIONS=<<-EOS
+ host:
+ name: Host
+ notes: The slave host to monitor
+ default: 127.0.0.1
+ port:
+ name: Port
+ notes: The port number on the slave host
+ default: 3306
+ username:
+ name: Username
+ notes: The MySQL username to use
+ default: root
+ password:
+ name: Password
+ notes: The password for the mysql user
+ default:
+ attributes: password
+ ignore_window_start:
+ name: Ignore Window Start
+ notes: Time to start ignoring replication failures. Useful for disabling replication for backups. For Example, 7:00pm
+ default:
+ ignore_window_end:
+ name: Ignore Window End
+ notes: Time to resume alerting on replication failure. For Example, 2:00am
+ default:
+ EOS
+
+ attr_accessor :connection
+
+ def build_report
+ begin
+ self.connection = Mysql.new(option(:host), option(:username), option(:password), nil, option(:port).to_i)
+ h = connection.query("show slave status").fetch_hash
+ down_at = memory(:down_at)
+ if h.nil?
+ error("Replication not configured")
+ elsif h["Seconds_Behind_Master"].nil? and !down_at
+ unless in_ignore_window?
+ alert("Replication not running", alert_body(h))
+ down_at = Time.now
+ end
+ elsif h["Slave_IO_Running"] == "Yes" and h["Slave_SQL_Running"] == "Yes"
+ if down_at
+ alert("Replication running again","Replication was not running for #{(Time.now - down_at).to_i} seconds")
+ down_at = nil
+ end
+ elsif !down_at
+ unless in_ignore_window?
+ alert("Replication not running", alert_body(h))
+ down_at = Time.now
+ end
+ end
+ report("Seconds Behind Master" => h["Seconds_Behind_Master"]) if h && h["Seconds_Behind_Master"]
+ remember(:down_at, down_at)
+ rescue Mysql::Error => e
+ error("Unable to connect to MySQL", e.to_s)
+ end
+ end
+
+ def in_ignore_window?
+ if s = option(:ignore_window_start) && e = option(:ignore_window_end)
+ start_time = Time.parse("#{Date.today} #{s}")
+ end_time = Time.parse("#{Date.today} #{e}")
+
+ if start_time < end_time
+ return (Time.now > start_time and Time.now < end_time)
+ else
+ return (Time.now > start_time or Time.now < end_time)
+ end
+ else
+ false
+ end
+ end
+
+ def alert_body(h)
+ """
+IO Slave Running: #{h["Slave_IO_Running"]}
+SQL Slave Running: #{h["Slave_SQL_Running"]}
+
+Last Errno: #{h["Last_Errno"]}
+Last Error: #{h["Last_Error"]}
+
+Last IO Errno: #{h["Last_IO_Errno"]}
+Last IO Error: #{h["Last_IO_Error"]}
+
+Last_SQL_Errno: #{h["Last_SQL_Errno"]}
+Last SQL Error: #{h["Last_SQL_Error"]}
+"""
+ end
+end
@@ -0,0 +1,4 @@
+metadata:
+ "Seconds Behind Master":
+ units: seconds
+ precision: 0
@@ -0,0 +1,114 @@
+require File.expand_path('../../test_helper.rb', __FILE__)
+require File.expand_path('../mysql_replication_monitor.rb', __FILE__)
+
+require 'mysql'
+
+class MysqlReplicationMonitorTest < Test::Unit::TestCase
+
+ def setup
+ @options=parse_defaults("mysql_replication_monitor")
+ end
+
+ def test_replication_success
+ # Stub the plugin instance where necessary and run
+ # @plugin=PluginName.new(last_run, memory, options)
+ # date hash hash
+ @plugin = MysqlReplicationMonitor.new(nil, {}, @options)
+ ms_res = Mysql::Result.new
+ ms_res.stubs(:fetch_hash).returns(FIXTURES[:success])
+ Mysql.any_instance.stubs(:query).with("show slave status").returns(ms_res).once
+ res = @plugin.run()
+
+ # assertions
+ assert_equal 1, res[:reports].first['Seconds Behind Master']
+ end
+
+ def test_replication_not_configured
+ @plugin = MysqlReplicationMonitor.new(nil, {}, @options)
+ ms_res = Mysql::Result.new
+ ms_res.stubs(:fetch_hash).returns(nil)
+ Mysql.any_instance.stubs(:query).with("show slave status").returns(ms_res).once
+ res= @plugin.run()
+
+ # assertions
+ assert_equal 1, res[:errors].size
+ end
+
+ def test_replication_failure
+ @plugin = MysqlReplicationMonitor.new(nil, {}, @options)
+ ms_res = Mysql::Result.new
+ ms_res.stubs(:fetch_hash).returns(FIXTURES[:failure])
+ Mysql.any_instance.stubs(:query).with("show slave status").returns(ms_res).once
+ res = @plugin.run()
+
+ # assertions
+ assert_equal 1, res[:alerts].size
+ assert_equal 1, res[:reports].first['Seconds Behind Master']
+ end
+
+ def test_replication_failure_nil_seconds_behind
+ @plugin = MysqlReplicationMonitor.new(nil, {}, @options)
+ ms_res = Mysql::Result.new
+ ms_res.stubs(:fetch_hash).returns(FIXTURES[:failure_nil_seconds_behind])
+ Mysql.any_instance.stubs(:query).with("show slave status").returns(ms_res).once
+ res = @plugin.run()
+
+ # assertions
+ assert_equal 1, res[:alerts].size
+ end
+
+ FIXTURES=YAML.load(<<-EOS)
+ :success:
+ Slave_IO_Running: 'Yes'
+ Slave_SQL_Running: 'Yes'
+ Seconds_Behind_Master: 1
+ :failure:
+ Slave_IO_Running: 'Yes'
+ Slave_SQL_Running: 'No'
+ Seconds_Behind_Master: 1
+ :failure_nil_seconds_behind:
+ Slave_IO_Running: 'Yes'
+ Slave_SQL_Running: 'Yes'
+ Seconds_Behind_Master: NULL
+ :full:
+ Slave_IO_State: Waiting for master to send event
+ Master_Host: mysql002.int
+ Master_User: replication
+ Master_Port: 3306
+ Connect_Retry: 60
+ Master_Log_File: mysql-bin.000006
+ Read_Master_Log_Pos: 505440314
+ Relay_Log_File: slave100-relay.000068
+ Relay_Log_Pos: 505440459
+ Relay_Master_Log_File: mysql-bin.000006
+ Slave_IO_Running: 'Yes'
+ Slave_SQL_Running: 'Yes'
+ Replicate_Do_DB:
+ Replicate_Ignore_DB:
+ Replicate_Do_Table:
+ Replicate_Ignore_Table:
+ Replicate_Wild_Do_Table:
+ Replicate_Wild_Ignore_Table:
+ Last_Errno: 0
+ Last_Error:
+ Skip_Counter: 0
+ Exec_Master_Log_Pos: 505440314
+ Relay_Log_Space: 505440656
+ Until_Condition: None
+ Until_Log_File:
+ Until_Log_Pos: 0
+ Master_SSL_Allowed: 'No'
+ Master_SSL_CA_File:
+ Master_SSL_CA_Path:
+ Master_SSL_Cert:
+ Master_SSL_Cipher:
+ Master_SSL_Key:
+ Seconds_Behind_Master: 1
+ Master_SSL_Verify_Server_Cert: 'No'
+ Last_IO_Errno: 0
+ Last_IO_Error:
+ Last_SQL_Errno: 0
+ Last_SQL_Error:
+ EOS
+
+end

0 comments on commit 98da3d3

Please sign in to comment.