Permalink
Browse files

retrying iostat on Errno::EPIPE exceptions, zookeeper cleanup

  • Loading branch information...
1 parent d371e45 commit c3d8a3c26b0824e8b7fb96766203bc4ec798abd8 @itsderek23 itsderek23 committed Aug 5, 2011
Showing with 14 additions and 112 deletions.
  1. +14 −4 iostat/iostat.rb
  2. +0 −60 zookeeper_info/zookeeper-info.rb
  3. +0 −48 zookeeper_info/zookeeper-info.yml
View
@@ -49,10 +49,20 @@ def default_device
def iostat(dev)
# if a LVM is used, `mount` output doesn't map to `/diskstats`. In this case, use dm-0 as the default device.
lvm = nil
- IO.readlines('/proc/diskstats').each do |line|
- entry = Hash[*COLUMNS.zip(line.strip.split(/\s+/).collect { |v| Integer(v) rescue v }).flatten]
- return entry if dev.include?(entry['name'])
- lvm = entry if (@default_device_used and 'dm-0'.include?(entry['name']))
+ retried = false
+ begin
+ IO.readlines('/proc/diskstats').each do |line|
+ entry = Hash[*COLUMNS.zip(line.strip.split(/\s+/).collect { |v| Integer(v) rescue v }).flatten]
+ return entry if dev.include?(entry['name'])
+ lvm = entry if (@default_device_used and 'dm-0'.include?(entry['name']))
+ end
+ rescue Errno::EPIPE
+ if retried
+ raise
+ else
+ retried = true
+ retry
+ end
end
return lvm
end
@@ -1,60 +0,0 @@
-class ZookeeperMonitor < Scout::Plugin
- needs 'socket'
-
- OPTIONS=<<-EOS
- port:
- name: Port
- notes: ZooKeeper listening port
- default: 2181
- EOS
-
-# Run the 4-letter command to grab the server stats from the running service
-#
-# This is what the output of the command in bash looks like:
-# bash$ echo srvr | nc localhost 2181
-#
-# Zookeeper version: 3.3.3-cdh3u0--1, built on 03/26/2011 00:21 GMT
-# Latency min/avg/max: 0/0/0
-# Received: 68
-# Sent: 67
-# Outstanding: 0
-# Zxid: 0x400000002
-# Mode: follower
-# Node count: 4
-
- def build_report
- # Zero out all the variables we want to return
- lat_min, lat_avg, lat_max, received, sent, outstanding, node_count, mode = nil
-
- # Ruby's error handling is weird, but this catches in the event that the port is incorrect, unresponsive
- begin
- # Ruby sockets! http://www.ruby-doc.org/stdlib/libdoc/socket/rdoc/index.html
- socket = TCPSocket.open("localhost", "#{option(:port)}")
- socket.print("srvr")
- stats = socket.read
-
- # Let's set the variables to the outputs, based on regexes
- stats.each_line do |line|
- # This line is smarter, thanks to Dan's regex-fu
- lat_min, lat_avg, lat_max = $1, $2, $3 if line =~ /^Latency min\/avg\/max:\s+(\d+)+\/+(\d+)+\/+(\d+)/
- received = $1 if line =~ /^Received:\s+(\d+)/
- sent = $1 if line =~ /^Sent:\s+(\d+)/
- outstanding = $1 if line =~ /^Outstanding:\s+(\d+)/
- node_count = $1 if line =~ /^Node count:\s+(\d+)/
- mode = $1 if line =~ /^Mode:\s+(\w+)/
- end
-
- # Build the output report
- counter(:received, received.to_i, :per => :minute)
- counter(:sent, sent.to_i, :per => :minute)
- report({:lat_min => lat_min, :lat_avg => lat_avg, :lat_max => lat_max,
- :outstanding => outstanding, :node_count => node_count, :mode => mode })
-
- rescue Exception => e
- # I take liberties with error messages.
- error(:subject => 'Poopytime!', :body => 'Apparently, the zookeeper service is not running on the specified port.\nFull error is:\n' + e)
- end
-
- end
-
-end
@@ -1,48 +0,0 @@
-metadata: !map:HashWithIndifferentAccess
- lat_avg: !map:HashWithIndifferentAccess
- units: ms
- delimiter: ","
- precision: "0"
- label: Latency Average
- node_count: !map:HashWithIndifferentAccess
- units: nodes
- delimiter: ","
- precision: "0"
- label: Node count
- sent: !map:HashWithIndifferentAccess
- units: pkts/min
- delimiter: ","
- precision: "2"
- label: Packets Sent
- mode: !map:HashWithIndifferentAccess
- units: ""
- delimiter: ","
- precision: "1"
- label: Node Mode
- lat_max: !map:HashWithIndifferentAccess
- units: ms
- delimiter: ","
- precision: "0"
- label: Latency Maximum
- outstanding: !map:HashWithIndifferentAccess
- units: requests
- delimiter: ","
- precision: "0"
- label: Outstanding
- received: !map:HashWithIndifferentAccess
- units: pkts/min
- delimiter: ","
- precision: "2"
- label: Packets Received
- lat_min: !map:HashWithIndifferentAccess
- units: ms
- delimiter: ","
- precision: "0"
- label: Latency Minimum
-
-triggers:
-- max_value: 900.0
- type: plateau
- dname: outstanding
- population_size: 0
- duration: 10

0 comments on commit c3d8a3c

Please sign in to comment.