Skip to content

Commit

Permalink
Add profiler; improved exception logging.
Browse files Browse the repository at this point in the history
  • Loading branch information
rustyio committed Apr 9, 2015
1 parent ea36596 commit e58c635
Show file tree
Hide file tree
Showing 11 changed files with 101 additions and 37 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@ gem 'spring' , :group => :development
gem 'sqlite3' , :group => :development
gem 'pry' , :group => :development
gem 'pry-byebug' , :group => :development
gem 'ruby-prof' , :group => :stress

gem 'sdoc' , '~> 0.4.0', :group => :doc
2 changes: 2 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ GEM
rest-client (1.7.2)
mime-types (>= 1.16, < 3.0)
netrc (~> 0.7)
ruby-prof (0.15.6)
sass (3.2.19)
sass-rails (4.0.5)
railties (>= 4.0.0, < 5.0)
Expand Down Expand Up @@ -244,6 +245,7 @@ DEPENDENCIES
rails_12factor
rails_stdout_logging
rest-client (~> 1.7.2)
ruby-prof
sass-rails (~> 4.0.3)
sdoc (~> 0.4.0)
spring
Expand Down
4 changes: 3 additions & 1 deletion Procfile.stress-test
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
imap_test_server: rake imap:test_server
imap_client: rake imap:client STRESS_TEST_MODE=true
imap_client_1: rake imap:client STRESS_TEST_MODE=true ENABLE_PROFILER=$ENABLE_PROFILER
imap_client_2: rake imap:client STRESS_TEST_MODE=true ENABLE_PROFILER=false
imap_client_3: rake imap:client STRESS_TEST_MODE=true ENABLE_PROFILER=false
6 changes: 5 additions & 1 deletion app/processes/common/csv_log.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@ def csv_log(path)
def close_csv_logs
_csv_log_initialize
@csv_log_paths.each do |path|
@csv_log_filehandles[path].close
begin
@csv_log_filehandles[path].close
rescue IOError
# May fire if we've already closed the stream elsewhere.
end
end
end

Expand Down
40 changes: 37 additions & 3 deletions app/processes/imap_client/daemon.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class ImapClient::Daemon
include Common::DbConnection
include Common::CsvLog

attr_accessor :stress_test_mode, :chaos_mode
attr_accessor :stress_test_mode, :chaos_mode, :enable_profiler
attr_accessor :num_worker_threads, :max_user_threads, :max_email_size, :tracer_interval, :num_tracers
attr_accessor :server_tag, :server_rhash
attr_accessor :heartbeat_thread, :discovery_thread
Expand All @@ -41,6 +41,7 @@ def initialize(options = {})
self.max_email_size = options.fetch(:max_email_size)
self.tracer_interval = options.fetch(:tracer_interval)
self.num_tracers = options.fetch(:num_tracers)
self.enable_profiler = options.fetch(:enable_profiler)

# Load balancing stuff.
self.server_tag = SecureRandom.hex(10)
Expand All @@ -57,6 +58,7 @@ def initialize(options = {})
def run
trap_signals
force_class_loading
maybe_start_profiling

# If stress testing, start a log.
if self.stress_test_mode
Expand All @@ -83,9 +85,9 @@ def run
discovery_thread && discovery_thread.terminate
claim_thread && claim_thread.terminate
terminate_worker_pool
user_threads.values.map(&:terminate)
user_threads.values.map(&:join)
terminate_user_threads
close_csv_logs
stop_profiling
end


Expand Down Expand Up @@ -304,6 +306,19 @@ def action_disconnect_user(options)
Log.exception(e)
end

# Private: Stop and disconnect all user threads.
def terminate_user_threads
user_threads.values.each do |thread|
# Terminate the thread, ignore exceptions.
thread.terminate rescue nil
end

user_threads.values.each do |thread|
# Join the thread, ignore exceptions.
thread.join if thread.alive?
end
end

# Private: Run a function, then restart a user thread.
#
# See UserThread#schedule for more details.
Expand All @@ -329,4 +344,23 @@ def action_callback(options)
options[:thread].run
end
end

# Private.
def maybe_start_profiling
return unless enable_profiler
Log.info("Starting profiler...")
require 'ruby-prof'
RubyProf.start
end

# Private.
def stop_profiling
return unless enable_profiler
Log.info("Stopping profiler...")
result = RubyProf.stop
printer = RubyProf::CallStackPrinter.new(result)
File.open("./tmp/profile.html", 'w') do |file|
printer.print(file)
end
end
end
4 changes: 1 addition & 3 deletions app/processes/imap_client/process_uid.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ def run
maybe.run { deploy_webhook }
maybe.run { update_daemon_stats }
maybe.finish
rescue => e
user_thread.log_exception(e)
user_thread.stop!
end

# Private: The User model.
Expand Down Expand Up @@ -91,6 +88,7 @@ def fetch_internal_date_and_size
# Save the internal_date and message_size for later.
self.internal_date = Time.parse(response.attr["INTERNALDATE"])
self.message_size = (response.attr["RFC822.SIZE"] || 0).to_i

return true
rescue Timeout::Error => e
# If this email triggered a timeout, then skip it.
Expand Down
48 changes: 30 additions & 18 deletions app/processes/imap_client/user_thread.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,15 @@ def run
update_uid_validity if running?
main_loop if running?
rescue => e
Log.error("Encountered error for #{user.email}.")
log_exception(e)
stat_exception(e)
self.daemon.increment_error_count(user.id)
stop!
ensure
stop!
daemon.schedule_work(:disconnect_user, :hash => user.id, :user_id => user.id)
disconnect
Log.error("Disconnected #{user.email}.")
Log.info("Disconnected #{user.email}.")
end

# Private: Schedule a block of code to run in a worker thread
Expand All @@ -70,7 +70,7 @@ def schedule(&block)
sleep
end

# Private: Log exceptions differently if we're stress testing.
# Private: Log exceptions. Be less verbose if we're stress testing.
def log_exception(e)
imap_exceptions = [
Net::IMAP::Error,
Expand All @@ -82,12 +82,20 @@ def log_exception(e)

# Minimally log imap exceptions when stress testing.
if imap_exceptions.include?(e.class) && self.daemon.stress_test_mode
Log.error("#{e.class} - #{e.to_s}")
Log.info("#{e.class} - #{e.to_s}")
else
Log.info("Encountered error for #{user.email}: #{e.class} - #{e.to_s}")
Log.exception(e)
end
end

# Private: Log operations metrics. Be less verbose if we're stress testing.
def stat_exception(e)
return if self.daemon.stress_test_mode
category = "error." + e.class.to_s.gsub("::", "_")
Log.librato(:count, category, 1)
end

private unless Rails.env.test?

# Private: Exponentially backoff based on the number of errors we
Expand All @@ -114,21 +122,16 @@ def authenticate
schedule do
user.update_attributes!(:last_login_at => Time.now)
end
rescue OAuth::Error => e
rescue OAuth::Error,
Net::IMAP::NoResponseError,
Net::IMAP::ByeResponseError,
OAuth2::Error => e
# If we encounter an OAuth error during authentication, then the
# credentials are probably invalid. We don't want to log every
# occurrance of this, and we don't want to discard any information
# or disconnect the user, so we'll just back off from reconnecting
# again.
self.daemon.increment_error_count(user.id)
stop!
rescue OAuth2::Error => e
# Ditto for OAuth 2.0
self.daemon.increment_error_count(user.id)
stop!
rescue Net::IMAP::NoResponseError => e
# Ditto, some servers trigger this exception when an agent is not
# authorized.
# occurrance of this, and we don't want to archive the user, so
# we'll just back off from reconnecting again.
stat_exception(e)
Log.info("Encountered error for #{user.email}: #{e.class} - #{e.to_s}")
self.daemon.increment_error_count(user.id)
stop!
end
Expand Down Expand Up @@ -241,8 +244,11 @@ def wait_for_email
client.idle_done()
end
end
rescue Net::IMAP::Error => e
rescue Net::IMAP::Error,
EOFError => e
# Recover gracefully.
stat_exception(e)
Log.info("Encountered error for #{user.email}: #{e.class} - #{e.to_s}")
self.daemon.increment_error_count(user.id)
stop!
end
Expand All @@ -253,6 +259,12 @@ def wait_for_email
# + uid - The UID of the email.
def process_uid(uid)
ProcessUid.new(self, uid).run
rescue Timeout::Error => e
# Recover gracefully.
stat_exception(e)
Log.info("Encountered error for #{user.email}: #{e.class} - #{e.to_s}")
self.daemon.increment_error_count(user.id)
stop!
end

# Private: Logout the user, disconnect the client.
Expand Down
2 changes: 2 additions & 0 deletions config/application.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,7 @@ class Application < Rails::Application
else
config.encryption_cipher = nil
end

config.log_level = String(ENV['LOG_LEVEL'] || "info").upcase
end
end
1 change: 1 addition & 0 deletions lib/tasks/imap.rake
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ namespace :imap do
config[:tracer_interval] = (ENV['TRACER_INTERVAL'] || 10 * 60).to_i
config[:num_tracers] = (ENV['NUM_TRACERS'] || 3).to_i
config[:enable_chaos] = (ENV['ENABLE_CHAOS'] || "true") == "true"
config[:enable_profiler] = (ENV['ENABLE_PROFILER'] || "false") == "true"

require 'imap_client'
ImapClient::Daemon.new(config).run
Expand Down
2 changes: 1 addition & 1 deletion script/analyze-stress-test.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ p2 <- ggplot() +

## Save the plots.
cat("Saving plots.\n")
pdf("./stress-test-results.pdf")
pdf("./tmp/stress-test-results.pdf")
print(p1)
print(p2)
dev.off()
28 changes: 18 additions & 10 deletions script/stress-test
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
#!/usr/bin/env bash

[ -z "$LENGTH_OF_TEST" ] && LENGTH_OF_TEST="1"
[ -z "$EMAILS_PER_MINUTE" ] && EMAILS_PER_MINUTE="500"
[ -z "$ENABLE_CHAOS" ] && ENABLE_CHAOS="true"
[ -z "$ENABLE_PROFILER" ] && ENABLE_PROFILER="false"
[ -z "$RUBY_PROF_MEASURE_MODE" ] && RUBY_PROF_MEASURE_MODE="process"

clear

echo
Expand All @@ -22,27 +28,29 @@ echo "Do not be alarmed; you can ignore the errors."
echo
echo "Some useful environment variables to set:"
echo
echo "+ LENGTH_OF_TEST - Total number of minutes to run test. [$LENGTH_OF_TEST]"
echo "+ EMAILS_PER_MINUTE - Rate of fake email generation. [$EMAILS_PER_MINUTE]"
echo "+ ENABLE_CHAOS - 'true' if we should generate bad IMAP responses. [$ENABLE_CHAOS]"
echo "+ LENGTH_OF_TEST - Total number of minutes to run test. [$LENGTH_OF_TEST]"
echo "+ EMAILS_PER_MINUTE - Rate of fake email generation. [$EMAILS_PER_MINUTE]"
echo "+ ENABLE_CHAOS - 'true' if we should generate bad IMAP responses. [$ENABLE_CHAOS]"
echo "+ ENABLE_PROFILER - 'true' to start the ruby-prof profiler. [$ENABLE_PROFILER]"
echo "+ RUBY_PROF_MEASURE_MODE - What should ruby-prof measure? [$RUBY_PROF_MEASURE_MODE]"
echo " - Can be 'wall', 'process', 'cpu', 'allocations', 'memory', 'gc_time', or 'gc_runs'"
echo " - See https://github.com/ruby-prof/ruby-prof"
echo
echo "Press enter to continue, Control-C to exit."
read

echo "Beginning stress test. Please wait."

# Remove old log files.
rm ./log/stress/*

# Begin the stress test.
RAILS_ENV=stress LOG_LEVEL=info \
foreman s \
-f Procfile.stress-test \
-c imap_client=3,imap_test_server=1

RAILS_ENV=stress foreman s -f Procfile.stress-test

# Generate some plots.

Rscript script/analyze-stress-test.R

echo
echo "Results are in 'stress-test-results.pdf'."
echo "Stress test results are in 'tmp/stress-test-results.pdf'."
echo "If enabled, profiler results are in 'tmp/profile.html'."
echo

0 comments on commit e58c635

Please sign in to comment.