Permalink
Browse files

Update with latest error handling, timeouts, and local lexers

  • Loading branch information...
1 parent 0b2049c commit 02c05ffe8c7464c5c63b385ccf83a7c97dc315c1 @tnm committed Sep 25, 2012
Showing with 78 additions and 66 deletions.
  1. +11 −1 Rakefile
  2. +4 −25 bench.rb
  3. +8 −0 cache-lexers.rb
  4. BIN lexers
  5. +12 −15 lib/pygments/mentos.py
  6. +43 −25 lib/pygments/popen.rb
View
@@ -28,10 +28,20 @@ end
# ==========================================================
task :bench do
- sh "ruby bench.rb"
+ sh "ruby bench.rb"
end
# ==========================================================
+# Cache lexers
+# # ==========================================================
+
+# Write all the lexers to a file for easy lookup
+task :lexers do
+ sh "ruby cache-lexers.rb"
+end
+
+
+# ==========================================================
# Vendor
# ==========================================================
View
@@ -3,7 +3,7 @@
include Benchmark
# number of iterations
-num = ARGV[0] ? ARGV[0].to_i : 25
+num = ARGV[0] ? ARGV[0].to_i : 10
# we can also repeat the code itself
repeats = ARGV[1] ? ARGV[1].to_i : 1
@@ -15,29 +15,8 @@
puts "Iterations: " + num.to_s + "\n"
Benchmark.bm(40) do |x|
- x.report("pygments popen ") { for i in 1..num; Pygments.highlight(code, :lexer => 'ruby'); end }
- x.report("pygments popen (process already started) ") { for i in 1..num; Pygments.highlight(code, :lexer => 'ruby'); end }
- x.report("pygments popen (process already started 2) ") { for i in 1..num; Pygments.highlight(code, :lexer => 'ruby'); end }
+ x.report("pygments popen ") { for i in 1..num; Pygments.highlight(code, :lexer => 'python'); end }
+ x.report("pygments popen (process already started) ") { for i in 1..num; Pygments.highlight(code, :lexer => 'python'); end }
+ x.report("pygments popen (process already started 2) ") { for i in 1..num; Pygments.highlight(code, :lexer => 'python'); end }
end
-# $ ruby bench.rb 50
-# Benchmarking....
-# Size: 698 bytes
-# Iterations: 50
-# user system total real
-# pygments popen 0.010000 0.010000 0.020000 ( 0.460370)
-# pygments popen (process already started) 0.010000 0.000000 0.010000 ( 0.272975)
-# pygments popen (process already started 2) 0.000000 0.000000 0.000000 ( 0.273589)
-#
-# $ ruby bench.rb 10
-# Benchmarking....
-# Size: 15523 bytes
-# Iterations: 10
-# user system total real
-# pygments popen 0.000000 0.000000 0.000000 ( 0.819419)
-# pygments popen (process already started) 0.010000 0.000000 0.010000 ( 0.676515)
-# pygments popen (process already started 2) 0.000000 0.010000 0.010000 ( 0.674189)
-#
-#
-#
-#
View
@@ -0,0 +1,8 @@
+require File.join(File.dirname(__FILE__), '/lib/pygments.rb')
+
+# Simple marshalling
+serialized_lexers = Marshal.dump(Pygments.lexers!)
+
+# Write to a file
+File.open("lexers", 'w') { |file| file.write(serialized_lexers) }
+
View
BIN lexers
Binary file not shown.
View
@@ -74,28 +74,31 @@ def return_lexer(self, lexer, args, inputs, code=None):
"""
if lexer:
- return lexers.get_lexer_by_name(lexer)
+ if inputs:
+ return lexers.get_lexer_by_name(lexer, **inputs)
+ else:
+ return lexers.get_lexer_by_name(lexer)
if inputs:
if 'lexer' in inputs:
- return lexers.get_lexer_by_name(inputs['lexer'])
+ return lexers.get_lexer_by_name(inputs['lexer'], **inputs)
elif 'mimetype' in inputs:
- return lexers.get_lexer_for_mimetype(inputs['mimetype'])
+ return lexers.get_lexer_for_mimetype(inputs['mimetype'], **inputs)
elif 'filename' in inputs:
name = inputs['filename']
# If we have code and a filename, pygments allows us to guess
# with both. This is better than just guessing with code.
if code:
- return lexers.guess_lexer_for_filename(name, code)
+ return lexers.guess_lexer_for_filename(name, code, **inputs)
else:
- return lexers.get_lexer_for_filename(name)
+ return lexers.get_lexer_for_filename(name, **inputs)
# If all we got is code, try anyway.
if code:
- return lexers.guess_lexer(code)
+ return lexers.guess_lexer(code, **inputs)
else:
_write_error("No lexer")
@@ -226,10 +229,10 @@ def _check_and_return_text(self, text, start_id, end_id):
id_regex = re.compile('[A-Z]{8}')
if not id_regex.match(start_id) and not id_regex.match(end_id):
- _write_error("ID check failed. Not a id.")
+ _write_error("ID check failed. Not an ID.")
if not start_id == end_id:
- _write_error("id check failed. id's did not match.")
+ _write_error("ID check failed. ID's did not match.")
# Passed the sanity check. Remove the id's and return
text = text[10:-10]
@@ -320,13 +323,7 @@ def main():
mentos = Mentos()
- # close fd's. mentos is a long-running process
- # and inherits fd's from its unicorn parent
- # (and, thus, burdens like mysql) — we don't want that here.
-
- # An optimization: we can check to see the max FD
- # a process can open and run the os.close() iteration against that.
- # If it's infinite, we default to 65536.
+ # close fd's inherited from the ruby parent
maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
if maxfd == resource.RLIM_INFINITY:
maxfd = 65536
View
@@ -3,6 +3,7 @@
require 'yajl'
require 'timeout'
require 'logger'
+require 'time'
# Error class
class MentosError < IOError
@@ -35,7 +36,7 @@ def start(pygments_path = File.expand_path('../../../vendor/pygments-main/', __F
# A pipe to the mentos python process. #popen4 gives us
# the pid and three IO objects to write and read.
@pid, @in, @out, @err = popen4(File.expand_path('../mentos.py', __FILE__))
- @log.info "Starting pid #{@pid.to_s} with fd #{@out.to_i.to_s}."
+ @log.info "[#{Time.now.iso8601}] Starting pid #{@pid.to_s} with fd #{@out.to_i.to_s}."
end
# Stop the child process by issuing a kill -9.
@@ -58,21 +59,26 @@ def stop(reason)
rescue Errno::ESRCH, Errno::ECHILD
end
end
- @log.info "Killing pid: #{@pid.to_s}. Reason: #{reason}"
+ @log.info "[#{Time.now.iso8601}] Killing pid: #{@pid.to_s}. Reason: #{reason}"
@pid = nil
end
# Check for a @pid variable, and then hit `kill -0` with the pid to
# check if the pid is still in the process table. If this function
# gives us an ENOENT or ESRCH, we can also safely return false (no process
- # to worry about).
+ # to worry about). Defensively, if EPERM is raised, in a odd/rare
+ # dying process situation (e.g., mentos is checking on the pid of a dead
+ # process and the pid has already been re-used) we'll want to raise
+ # that as a more informative Mentos exception.
#
# Returns true if the child is alive.
def alive?
return true if @pid && Process.kill(0, @pid)
false
rescue Errno::ENOENT, Errno::ESRCH
false
+ rescue Errno::EPERM
+ raise MentosError, "EPERM checking if child process is alive."
end
# Public: Get an array of available Pygments formatters
@@ -91,10 +97,27 @@ def formatters
end
end
- # Public: Get back all available lexers
+
+ # Public: Get all lexers from a serialized array. This avoids needing to spawn
+ # mentos when it's not really needed (e.g,. one-off jobs, loading the Rails env, etc).
+ #
+ # Should be preferred to #lexers!
#
# Returns an array of lexers
def lexers
+ begin
+ lexer_file = File.expand_path('../../../lexers', __FILE__)
+ raw = File.open(lexer_file, "r").read
+ Marshal.load(raw)
+ rescue Errno::ENOENT
+ raise MentosError, "Error loading lexer file. Was it created and vendored?"
+ end
+ end
+
+ # Public: Get back all available lexers from mentos itself
+ #
+ # Returns an array of lexers
+ def lexers!
mentos(:get_all_lexers).inject(Hash.new) do |hash, lxr|
name = lxr[0]
hash[name] = {
@@ -168,14 +191,8 @@ def mentos(method, args=[], kwargs={}, original_code=nil)
start unless alive?
begin
- # Timeout requests that take too long. The timeout duration is based on what
- # method we get. Highlights are given more time than the others (the other
- # calls, like get_all_lexers, should not take long).
- if method == 'highlight'
- timeout_time = 3
- else
- timeout_time = 1
- end
+ # Timeout requests that take too long.
+ timeout_time = 8
Timeout::timeout(timeout_time) do
# For sanity checking on both sides of the pipe when highlighting, we prepend and
@@ -211,8 +228,9 @@ def mentos(method, args=[], kwargs={}, original_code=nil)
# Finally, return what we got.
return_result(res, method)
end
- rescue Timeout::Error => boom
- @log.error "Timeout on a mentos #{method} call"
+ rescue Timeout::Error
+ # If we timeout, we need to clear out the pipe and start over.
+ @log.error "[#{Time.now.iso8601}] Timeout on a mentos #{method} call"
stop "Timeout on mentos #{method} call."
end
@@ -239,34 +257,34 @@ def handle_header_and_return(header, id)
if header["method"] == "highlight"
# Make sure we have a result back; else consider this an error.
if res.nil?
- @log.warn "No highlight result back from mentos."
+ @log.warn "[#{Time.now.iso8601}] No highlight result back from mentos."
stop "No highlight result back from mentos."
raise MentosError, "No highlight result back from mentos."
end
# Remove the newline from Python
res = res[0..-2]
- @log.info "Highlight in process."
+ @log.info "[#{Time.now.iso8601}] Highlight in process."
# Get the id's
start_id = res[0..7]
end_id = res[-8..-1]
# Sanity check.
if not (start_id == id and end_id == id)
- @log.error "ID's did not match. Aborting."
+ @log.error "[#{Time.now.iso8601}] ID's did not match. Aborting."
stop "ID's did not match. Aborting."
raise MentosError, "ID's did not match. Aborting. " + res.to_s
else
# We're good. Remove the padding
res = res[10..-11]
- @log.info "Highlighting complete."
+ @log.info "[#{Time.now.iso8601}] Highlighting complete."
res
end
end
res
else
- @log.error "No header data back."
+ @log.error "[#{Time.now.iso8601}] No header data back."
stop "No header data back."
raise MentosError, "No header received back."
end
@@ -287,7 +305,7 @@ def add_ids(code, id)
# Returns nothing.
def write_data(out_header, code=nil)
@in.write(out_header)
- @log.info "Out header: #{out_header.to_s}"
+ @log.info "[#{Time.now.iso8601}] Out header: #{out_header.to_s}"
@in.write(code) if code
end
@@ -311,18 +329,18 @@ def get_header
# Sanity check the size
if not size_check(size)
- @log.error "Size returned from Mentos invalid."
+ @log.error "[#{Time.now.iso8601}] Size returned from Mentos invalid."
stop "Size returned from Mentos invalid."
raise MentosError, "Size returned from Mentos invalid."
end
# Read the amount of bytes we should be expecting. We first
# convert the string of bits into an integer.
header_bytes = size.to_s.to_i(2) + 1
- @log.info "Size in: #{size.to_s} (#{header_bytes.to_s})"
+ @log.info "[#{Time.now.iso8601}] Size in: #{size.to_s} (#{header_bytes.to_s})"
@out.read(header_bytes)
rescue
- @log.error "Failed to get header."
+ @log.error "[#{Time.now.iso8601}] Failed to get header."
stop "Failed to get header."
raise MentosError, "Failed to get header."
end
@@ -339,13 +357,13 @@ def return_result(res, method)
# Convert a text header into JSON for easy access.
def header_to_json(header)
- @log.info "In header:" + header.to_s
+ @log.info "[#{Time.now.iso8601}] In header: #{header.to_s} "
header = Yajl.load(header)
if header["error"]
# Raise this as a Ruby exception of the MentosError class.
# Stop so we don't leave the pipe in an inconsistent state.
- @log.error "Failed to convert header to JSON."
+ @log.error "[#{Time.now.iso8601}] Failed to convert header to JSON."
stop header["error"]
raise MentosError, header["error"]
else

0 comments on commit 02c05ff

Please sign in to comment.