Skip to content

Commit

Permalink
EmrEtlRunner: on job failure, now logging overall jobflow and individ…
Browse files Browse the repository at this point in the history
…ual step statuses (closes #1153)
  • Loading branch information
alexanderdean committed Dec 4, 2014
1 parent 6406bcf commit 20d6fa0
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 1 deletion.
1 change: 1 addition & 0 deletions 3-enrich/emr-etl-runner/Gemfile
Expand Up @@ -23,6 +23,7 @@ gem "contracts", "~> 0.4"
gem "elasticity", "~> 3.0.4"
gem "sluice", "~> 0.2.1"
gem "awrence", "~> 0.1.0"
gem "time_diff", "~> 0.3.0"

group :development do
gem "rspec", "~> 2.14", ">= 2.14.1"
Expand Down
16 changes: 16 additions & 0 deletions 3-enrich/emr-etl-runner/Gemfile.lock
@@ -1,6 +1,12 @@
GEM
remote: https://rubygems.org/
specs:
activesupport (4.1.8)
i18n (~> 0.6, >= 0.6.9)
json (~> 1.7, >= 1.7.7)
minitest (~> 5.1)
thread_safe (~> 0.1)
tzinfo (~> 1.1)
awrence (0.1.0)
builder (3.2.2)
contracts (0.4)
Expand Down Expand Up @@ -38,10 +44,13 @@ GEM
fog-json (1.0.0)
multi_json (~> 1.0)
formatador (0.2.5)
i18n (0.6.11)
inflecto (0.0.2)
ipaddress (0.8.0)
json (1.8.1)
mime-types (2.3)
mini_portile (0.6.0)
minitest (5.4.3)
multi_json (1.10.1)
net-scp (1.2.1)
net-ssh (>= 2.6.5)
Expand Down Expand Up @@ -69,7 +78,13 @@ GEM
term-ansicolor (1.3.0)
tins (~> 1.0)
thor (0.19.1)
thread_safe (0.3.4)
time_diff (0.3.0)
activesupport
i18n
tins (1.3.0)
tzinfo (1.2.2)
thread_safe (~> 0.1)
unf (0.1.4)
unf_ext
unf_ext (0.0.6)
Expand All @@ -84,3 +99,4 @@ DEPENDENCIES
elasticity (~> 3.0.4)
rspec (~> 2.14, >= 2.14.1)
sluice (~> 0.2.1)
time_diff (~> 0.3.0)
48 changes: 47 additions & 1 deletion 3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb
Expand Up @@ -279,7 +279,7 @@ def run()
status = wait_for()

if !status
raise EmrExecutionError, "EMR jobflow #{jobflow_id} failed, check Amazon EMR console and Hadoop logs for details (help: https://github.com/snowplow/snowplow/wiki/Troubleshooting-jobs-on-Elastic-MapReduce). Data files not archived."
raise EmrExecutionError, get_failure_details()
end

logger.debug "EMR jobflow #{jobflow_id} completed successfully."
Expand Down Expand Up @@ -354,6 +354,52 @@ def wait_for()
success
end

# Prettified string containing failure details
# for this job flow.
Contract None => String
def get_failure_details()

js = @jobflow.status

[
"EMR jobflow #{js.jobflow_id} failed, check Amazon EMR console and Hadoop logs for details (help: https://github.com/snowplow/snowplow/wiki/Troubleshooting-jobs-on-Elastic-MapReduce). Data files not archived.",
"#{js.name}: #{js.state} [#{js.last_state_change_reason}] ~ #{self.class.get_elapsed_time(js.started_at, js.ended_at)} #{self.class.get_timespan(js.started_at, js.ended_at)}"
].concat(js.steps
.sort { |a,b|
a.started_at <=> b.started_at
}
.each_with_index
.map { |s,i|
" - #{i + 1}. #{s.name}: #{s.state} ~ #{self.class.get_elapsed_time(s.started_at, s.ended_at)} #{self.class.get_timespan(s.started_at, s.ended_at)}"
})
.join("\n")
end

# Gets the time span.
#
# Parameters:
# +start+:: start time
# +_end+:: end time
Contract Maybe[Time], Maybe[Time] => String
def self.get_timespan(start, _end)
"[#{start} - #{_end}]"
end

# Gets the elapsed time in a
# human-readable format.
#
# Parameters:
# +start+:: start time
# +_end+:: end time
Contract Maybe[Time], Maybe[Time] => String
def self.get_elapsed_time(start, _end)
if start.nil? or _end.nil?
"elapsed time n/a"
else
Time.diff(start, _end, '%H %N %S')[:diff]
end
end

# We need to partition our output buckets by run ID
# Note buckets already have trailing slashes
#
Expand Down

0 comments on commit 20d6fa0

Please sign in to comment.