Skip to content
Browse files

Merge branch 'master' of git@github.com:infochimps/graphiterb

  • Loading branch information...
2 parents 699a76f + b110784 commit d3a9edd220a740ee828313dab1733397b0308e9e @dhruvbansal dhruvbansal committed Aug 17, 2010
Showing with 189 additions and 0 deletions.
  1. +51 −0 examples/api_call_monitor.rb
  2. +113 −0 examples/file_monitor.rb
  3. +2 −0 examples/run_servers.sh
  4. +23 −0 examples/storage_monitor.rb
View
51 examples/api_call_monitor.rb
@@ -0,0 +1,51 @@
+#!/usr/bin/env ruby
+$: << File.dirname(__FILE__)+'/../lib/'
+require 'graphiterb'
+require 'graphiterb/graphite_script'
+
+WC_EXEC = '/usr/bin/wc'
+
+class ApiCallMonitor < Graphiterb::GraphiteLogger
+ API_CALLS_TO_MONITOR = %w[trstrank wordbag influence conversation]
+ ERROR_CODES_TO_MONITOR = %w[4.. 5.. 200]
+
+ def initialize *args
+ super *args
+ @current_total = Hash.new
+ @prev_total = Hash.new
+ end
+
+ def calls api
+ total_calls = `cat /var/www/apeyeye/shared/log/apeyeye-access.log | egrep 'GET /soc/net/tw/#{api}' | #{WC_EXEC} -l` rescue 0
+ @current_total[api] = total_calls.to_i
+ end
+
+ def errors error_code
+ log_cat = `cat /var/www/apeyeye/shared/log/apeyeye-access.log | egrep 'GET /soc/net/tw/.*HTTP/1\.[0-1]..#{error_code}' | #{WC_EXEC} -l` rescue 0
+ @current_total[error_code] = log_cat.to_i
+ end
+
+ def rate item
+ @prev_total[item] ||= @current_total[item]
+ rate = @current_total[item].to_i - @prev_total[item].to_i
+ @prev_total[item] = @current_total[item]
+ [0, rate].max
+ end
+
+ def get_metrics metrics, iter, since
+ API_CALLS_TO_MONITOR.each do |api|
+ metrics << [scope_name(hostname, api, 'total_accesses'), calls(api)]
+ metrics << [scope_name(hostname, api, 'accesses'), rate(api)]
+ end
+ ERROR_CODES_TO_MONITOR.each do |code|
+ metrics << [scope_name(hostname, code.gsub('.','x'), 'total_errors'), errors(code)]
+ metrics << [scope_name(hostname, code.gsub('.','x'), 'errors'), rate(code)]
+ end
+ end
+end
+
+
+warn "Update delay is #{Settings.update_delay} seconds. You probably want something larger: some of these checks are data-intensive" if Settings.update_delay < 60
+Settings.die "Update delay is #{Settings.update_delay} seconds. You need to radio in at least as often as /usr/local/share/graphite/conf/storage-schemas says -- this is typically 5 minutes." if Settings.update_delay >= 300
+
+ApiCallMonitor.new('apeyeye', :iters => nil, :time => Settings.update_delay).run!
View
113 examples/file_monitor.rb
@@ -0,0 +1,113 @@
+#!/usr/bin/env ruby
+$: << File.dirname(__FILE__)+'/../lib/'
+require 'graphiterb'
+Settings.define :work_dir, :description => "Base directory where scrapers store files. (Ex: /data/ripd/com.tw)", :required => true
+require 'graphiterb/graphite_script'
+
+#
+# Usage:
+#
+# nohup ~/ics/backend/graphiterb/bin/file_monitor.rb --work_dir=/data/ripd/com.tw --carbon_server=whatever --update_delay=120 > /data/log/file_monitor.log 2>&1 &
+#
+
+WC_EXEC = '/usr/bin/wc'
+
+class FilePool
+ # Path to sample for files
+ attr_accessor :path
+ # wildcard sequence for files under the current directory
+ attr_accessor :filter_re
+ # A recent file was modified within this window
+ attr_accessor :recent_window
+ # Only consider the last this-many files
+ MAX_FILES = 30
+
+ def initialize path, filter_re=/.*/, options={}
+ self.path = path
+ self.filter_re = filter_re
+ end
+
+ # Name for this pool, suitable for inclusion in a metrics handle
+ def name
+ path.gsub(/\./,'_').gsub(%r{/}, '.').gsub(%r{(^\.|\.$)},'')
+ end
+
+ #
+ # Lists all files in the pool
+ # @param filter_block files only keeps filenames that pass this filter
+ #
+ def files &filter_block
+ Dir[File.join(path, '**/*')].
+ reject{|f| File.directory?(f) }.
+ select{|f| f =~ filter_re }.
+ sort.reverse[0..MAX_FILES].
+ select(&filter_block)
+ end
+
+ def num_files &filter_block
+ files(&filter_block).count
+ end
+
+ def sizes &filter_block
+ files(&filter_block).map{|f| File.size(f) rescue nil }.compact
+ end
+ def size &filter_block
+ sizes(&filter_block).sum
+ end
+ def avg_size &filter_block
+ sizes(&filter_block).sum.to_f / num_files(&filter_block).to_f
+ end
+
+ def lines_in_result_of command, *args
+ begin
+ escaped_args = args.map{|f| "'#{f}'" }
+ result = `#{command} #{escaped_args.join(" ")}`.chomp
+ result.split(/[\r\n]+/)
+ rescue StandardError => e ; warn(e.backtrace, e) ; return nil ; end
+ end
+
+ def line_counts &filter_block
+ files = files(&filter_block) ; return 0 if files.blank?
+ result = lines_in_result_of(WC_EXEC, '-l', *files) or return 0
+ counts = result.map{|wc| wc =~ /^\s*(\d+)\s+/ and $1 }.compact
+ counts.map(&:to_i).sum
+ end
+
+ def self.recent? file
+ (Time.now - File.mtime(file)) < 3600
+ end
+ def self.recency_filter
+ Proc.new{|file| recent?(file) }
+ end
+end
+
+class FileMonitor < Graphiterb::GraphiteSystemLogger
+ attr_accessor :path
+ attr_accessor :pools
+
+ def initialize *args
+ super *args
+ self.path = Settings.work_dir
+ self.pools = {}
+ populate_pools!
+ end
+
+ def populate_pools!
+ Dir[File.join(path, '*')].select{|d| File.directory?(d) }.each do |dir|
+ self.pools[dir] ||= FilePool.new(dir, %r{20\d*/.*\.(?:tsv|json|xml)})
+ end
+ end
+
+ def get_metrics metrics, iter, since
+ recent = FilePool.recency_filter
+ pools.each do |pool_path, pool|
+ metrics << [scope_name(pool.name, hostname, 'active_files'), pool.num_files(&recent) ]
+ metrics << [scope_name(pool.name, hostname, 'active_file_size'), pool.size(&recent) ]
+ metrics << [scope_name(pool.name, hostname, 'line_counts'), pool.line_counts(&recent) ]
+ end
+ end
+end
+
+warn "Update delay is #{Settings.update_delay} seconds. You probably want something larger: some of the metrics are expensive." if Settings.update_delay < 60
+warn "Update delay is #{Settings.update_delay} seconds. You probably want something smaller: need to report in faster than the value in the graphite/conf/storage-schemas." if Settings.update_delay >= 300
+FileMonitor.new('scraper', :iters => nil, :time => Settings.update_delay).run!
View
2 examples/run_servers.sh
@@ -0,0 +1,2 @@
+PYTHONPATH=$pwd/whisper nohup ./bin/run-graphite-devel-server.py --libs=$pwd/webapp/ /usr/local/share/graphite/ >> ./storage/log/webapp/server.log 2>&1 &
+PYTHONPATH=$pwd/whisper nohup ./carbon/bin/carbon-cache.py --debug start >> ./storage/log/carbon-cache/console.log 2>&1 &
View
23 examples/storage_monitor.rb
@@ -0,0 +1,23 @@
+#!/usr/bin/env ruby
+$: << File.dirname(__FILE__)+'/../lib/'
+require 'rubygems'
+require 'graphiterb/graphite_script'
+
+class AvailSpaceMonitor < Graphiterb::GraphiteLogger
+ def diskfree
+ `/bin/df`.chomp.split("\n").
+ grep(%r{^/dev/}).
+ map{|line| line.split(/\s+/) } rescue []
+ end
+
+ def get_metrics metrics, iter, since
+ diskfree.each do |handle, size, spaceused, spacefree, percentfree, location|
+ metrics << ["system.#{hostname}#{handle.gsub(/\//,'.')}.available", spacefree.to_i]
+ end
+ end
+end
+
+warn "Update delay is #{Settings.update_delay} seconds. You probably want something larger: some of the metrics are expensive." if Settings.update_delay < 30
+warn "Update delay is #{Settings.update_delay} seconds. You probably want something smaller: need to report in faster than the value in the graphite/conf/storage-schemas." if Settings.update_delay >= 60
+
+AvailSpaceMonitor.new('system').run!

0 comments on commit d3a9edd

Please sign in to comment.
Something went wrong with that request. Please try again.