Permalink
Browse files

Change gem name and packaging layout.

This should probably have been split up into separate commits,
but instead is just one massive commit. Changes include:

  - rename FastSpawn to POSIX::Spawn
  - rename fastspawn.c to posix-spawn.c
  - rename fastspawn.so to posix_spawn_ext.so
  - rename fastspawn_* C functions to posixspawn_*

  - add posix-spawn.rb wrapper
  - add posix/spawn/version.rb for current version
  - add Gemfile to bundle install dependencies

  - use rake/gempackagetask to build gems
  - use rake-compiler's rake/extensiontask to build extensions

  - update gemspec
    - remove date, as it is added during packaging
    - add required description
    - add required email for each author
    - compute spec.files inside gemspec to avoid regenerate+commit for each change
    - read version from version.rb for same reason
    - add dev dependency on rake-compiler
  • Loading branch information...
1 parent 3951756 commit b18838018829b473839969f52ddd4e5056c9aeb6 @tmm1 tmm1 committed Mar 1, 2011
View
12 .gitignore
@@ -1,7 +1,5 @@
-/ext/Makefile
-/ext/fastspawn.bundle
-/ext/fastspawn.o
-/ext/fastspawn.so
-/ext/ruby-*
-/lib/fastspawn.bundle
-/lib/fastspawn.so
+Gemfile.lock
+ext/Makefile
+lib/posix_spawn_ext.*
+tmp
+pkg
View
2 Gemfile
@@ -0,0 +1,2 @@
+source :rubygems
+gemspec
View
1 HACKING
@@ -2,6 +2,7 @@ Clone the project:
git clone http://github.com/rtomayko/fastspawn.git
cd fastspawn
+ bundle install
Rake tasks can be run without further setup:
View
4 README
@@ -1,12 +1,12 @@
-fastspawn
+posix-spawn
fork(2) calls slow down as the parent process uses more memory due to the need
to copy page tables. In many common uses of fork(), where it is followed by one
of the exec family of functions to spawn child processes (Kernel#system,
IO::popen, Process::spawn, etc.), it's possible to remove this overhead by using
the use of special process spawning interfaces (posix_spawn(), vfork(), etc.)
-The fastspawn library aims to implement a subset of the Ruby 1.9 Process::spawn
+The posix-spawn library aims to implement a subset of the Ruby 1.9 Process::spawn
interface in a way that takes advantage of fast process spawning interfaces when
available and provide sane fallbacks on systems that do not.
View
86 Rakefile
@@ -1,34 +1,24 @@
-require 'date'
-require 'digest/md5'
-require 'rake/clean'
-
task :default => :test
# ==========================================================
-# Ruby Extension
+# Packaging
# ==========================================================
-dlext = Config::MAKEFILE_CONFIG['DLEXT']
-digest = Digest::MD5.hexdigest(`#{RUBY} --version`)
+GEMSPEC = eval(File.read('posix-spawn.gemspec'))
-file "ext/ruby-#{digest}" do |f|
- rm_f FileList["ext/ruby-*"]
- touch f.name
+require 'rake/gempackagetask'
+Rake::GemPackageTask.new(GEMSPEC) do |pkg|
end
-CLEAN.include "ext/ruby-*"
-file 'ext/Makefile' => FileList['ext/*.{c,h,rb}', "ext/ruby-#{digest}"] do
- chdir('ext') { ruby 'extconf.rb' }
-end
-CLEAN.include 'ext/Makefile', 'ext/mkmf.log'
+# ==========================================================
+# Ruby Extension
+# ==========================================================
-file "ext/fastspawn.#{dlext}" => FileList["ext/Makefile"] do |f|
- chdir('ext') { sh 'make clean && make && rm -rf conftest.dSYM' }
+require 'rake/extensiontask'
+Rake::ExtensionTask.new('posix_spawn_ext', GEMSPEC) do |ext|
+ ext.ext_dir = 'ext'
end
-CLEAN.include 'ext/*.{o,bundle,so,dll}'
-
-desc 'Build the fastspawn extension'
-task :build => "ext/fastspawn.#{dlext}"
+task :build => :compile
# ==========================================================
# Testing
@@ -37,60 +27,10 @@ task :build => "ext/fastspawn.#{dlext}"
require 'rake/testtask'
Rake::TestTask.new 'test' do |t|
t.test_files = FileList['test/test_*.rb']
- t.ruby_opts += ['-Ilib:ext']
end
task :test => :build
-desc "Run some benchmarks"
+desc 'Run some benchmarks'
task :benchmark => :build do
- ruby "-Ilib:ext", "bin/fastspawn-bm"
-end
-
-# ==========================================================
-# Packaging
-# ==========================================================
-
-require 'rubygems'
-$spec = eval(File.read('fastspawn.gemspec'))
-package = "pkg/fastspawn-#{$spec.version}.gem"
-
-desc 'Build packages'
-task :package => package
-
-directory 'pkg/'
-file package => %w[pkg/ fastspawn.gemspec] + $spec.files do |f|
- sh "gem build fastspawn.gemspec"
- mv File.basename(f.name), f.name
-end
-
-# ==========================================================
-# Gemspec Generation
-# ==========================================================
-
-def source_version
- line = File.read('lib/fastspawn.rb')[/^\s*VERSION = .*/]
- line.match(/.*VERSION = '(.*)'/)[1]
+ ruby '-Ilib', 'bin/posix-spawn-bm'
end
-
-file 'fastspawn.gemspec' => FileList['Rakefile','lib/fastspawn.rb'] do |f|
- # read spec file and split out manifest section
- spec = File.read(f.name)
- head, manifest, tail = spec.split(" # = MANIFEST =\n")
- head.sub!(/\.version = '.*'/, ".version = '#{source_version}'")
- head.sub!(/\.date = '.*'/, ".date = '#{Date.today.to_s}'")
- # determine file list from git ls-files
- files = `git ls-files`.
- split("\n").
- sort.
- reject{ |file| file =~ /^\./ }.
- map{ |file| " #{file}" }.
- join("\n")
- # piece file back together and write...
- manifest = " s.files = %w[\n#{files}\n ]\n"
- spec = [head,manifest,tail].join(" # = MANIFEST =\n")
- File.open(f.name, 'w') { |io| io.write(spec) }
- puts "updated #{f.name}"
-end
-
-desc 'Build the fastspawn.gemspec if needed'
-task :gemspec => 'fastspawn.gemspec'
View
8 bin/fastspawn-bm → bin/posix-spawn-bm
@@ -1,6 +1,6 @@
#!/usr/bin/env ruby
-# Usage: fastspawn-bm
-require 'fastspawn'
+# Usage: posix-spawn-bm
+require 'posix-spawn'
require 'benchmark'
allocate = 100 * (1024 ** 2)
@@ -17,13 +17,13 @@ puts "benchmarking the various spawns over #{iterations} runs" +
bm 40 do |x|
x.report("fspawn (fork/exec):") do
iterations.times do
- pid = FastSpawn.fspawn('true')
+ pid = POSIX::Spawn.fspawn('true')
Process.wait(pid)
end
end
x.report("pspawn (posix_spawn):") do
iterations.times do
- pid = FastSpawn.pspawn('true')
+ pid = POSIX::Spawn.pspawn('true')
Process.wait(pid)
end
end
View
4 ext/extconf.rb
@@ -1,8 +1,6 @@
require 'mkmf'
-dir_config('fastspawn')
-
# warnings save lives
$CFLAGS << " -Wall "
-create_makefile('fastspawn')
+create_makefile('posix_spawn_ext')
View
38 ext/fastspawn.c → ext/posix-spawn.c
@@ -35,7 +35,8 @@
extern char **environ;
#endif
-static VALUE rb_mFastSpawn;
+static VALUE rb_mPOSIX;
+static VALUE rb_mPOSIXSpawn;
/* Determine the fd number for a Ruby object VALUE.
*
@@ -49,7 +50,7 @@ static VALUE rb_mFastSpawn;
* does not map to an fd.
*/
static int
-fastspawn_obj_to_fd(VALUE obj)
+posixspawn_obj_to_fd(VALUE obj)
{
int fd = -1;
switch (TYPE(obj)) {
@@ -90,15 +91,15 @@ fastspawn_obj_to_fd(VALUE obj)
* no operation was performed.
*/
static int
-fastspawn_file_actions_addclose(VALUE key, VALUE val, posix_spawn_file_actions_t *fops)
+posixspawn_file_actions_addclose(VALUE key, VALUE val, posix_spawn_file_actions_t *fops)
{
int fd;
/* we only care about { (IO|FD|:in|:out|:err) => :close } */
if (TYPE(val) != T_SYMBOL || SYM2ID(val) != rb_intern("close"))
return ST_CONTINUE;
- fd = fastspawn_obj_to_fd(key);
+ fd = posixspawn_obj_to_fd(key);
if (fd >= 0) {
posix_spawn_file_actions_addclose(fops, fd);
return ST_DELETE;
@@ -116,15 +117,15 @@ fastspawn_file_actions_addclose(VALUE key, VALUE val, posix_spawn_file_actions_t
* no operation was performed.
*/
static int
-fastspawn_file_actions_reopen(VALUE key, VALUE val, posix_spawn_file_actions_t *fops)
+posixspawn_file_actions_reopen(VALUE key, VALUE val, posix_spawn_file_actions_t *fops)
{
int fd, newfd;
- newfd = fastspawn_obj_to_fd(key);
+ newfd = posixspawn_obj_to_fd(key);
if (newfd < 0)
return ST_CONTINUE;
- fd = fastspawn_obj_to_fd(val);
+ fd = posixspawn_obj_to_fd(val);
if (fd < 0)
return ST_CONTINUE;
@@ -142,14 +143,14 @@ fastspawn_file_actions_reopen(VALUE key, VALUE val, posix_spawn_file_actions_t *
* if not.
*/
static int
-fastspawn_file_actions_operations_iter(VALUE key, VALUE val, posix_spawn_file_actions_t *fops)
+posixspawn_file_actions_operations_iter(VALUE key, VALUE val, posix_spawn_file_actions_t *fops)
{
int act;
- act = fastspawn_file_actions_addclose(key, val, fops);
+ act = posixspawn_file_actions_addclose(key, val, fops);
if (act != ST_CONTINUE) return act;
- act = fastspawn_file_actions_reopen(key, val, fops);
+ act = posixspawn_file_actions_reopen(key, val, fops);
if (act != ST_CONTINUE) return act;
return ST_CONTINUE;
@@ -163,10 +164,10 @@ fastspawn_file_actions_operations_iter(VALUE key, VALUE val, posix_spawn_file_ac
* Returns nothing.
*/
static void
-fastspawn_file_actions_init(posix_spawn_file_actions_t *fops, VALUE options)
+posixspawn_file_actions_init(posix_spawn_file_actions_t *fops, VALUE options)
{
posix_spawn_file_actions_init(fops);
- rb_hash_foreach(options, fastspawn_file_actions_operations_iter, (VALUE)fops);
+ rb_hash_foreach(options, posixspawn_file_actions_operations_iter, (VALUE)fops);
}
static int
@@ -228,7 +229,7 @@ each_env_i(VALUE key, VALUE val, VALUE arg)
}
/*
- * FastSpawn#_pspawn(env, argv, options)
+ * POSIX::Spawn#_pspawn(env, argv, options)
*
* env - Hash of the new environment.
* argv - The [[cmdname, argv0], argv1, ...] exec array.
@@ -237,7 +238,7 @@ each_env_i(VALUE key, VALUE val, VALUE arg)
* Returns the pid of the newly spawned process.
*/
static VALUE
-rb_fastspawn_pspawn(VALUE self, VALUE env, VALUE argv, VALUE options)
+rb_posixspawn_pspawn(VALUE self, VALUE env, VALUE argv, VALUE options)
{
int i, ret;
long argc = RARRAY_LEN(argv);
@@ -285,7 +286,7 @@ rb_fastspawn_pspawn(VALUE self, VALUE env, VALUE argv, VALUE options)
cargv[i] = StringValuePtr(RARRAY_PTR(argv)[i]);
cargv[argc] = NULL;
- fastspawn_file_actions_init(&fops, options);
+ posixspawn_file_actions_init(&fops, options);
posix_spawnattr_init(&attr);
#if defined(POSIX_SPAWN_USEVFORK) || defined(__linux__)
@@ -323,10 +324,11 @@ rb_fastspawn_pspawn(VALUE self, VALUE env, VALUE argv, VALUE options)
}
void
-Init_fastspawn()
+Init_posix_spawn_ext()
{
- rb_mFastSpawn = rb_define_module("FastSpawn");
- rb_define_method(rb_mFastSpawn, "_pspawn", rb_fastspawn_pspawn, 3);
+ rb_mPOSIX = rb_define_module("POSIX");
+ rb_mPOSIXSpawn = rb_define_module_under(rb_mPOSIX, "Spawn");
+ rb_define_method(rb_mPOSIXSpawn, "_pspawn", rb_posixspawn_pspawn, 3);
}
/* vim: set noexpandtab sts=0 ts=4 sw=4: */
View
31 fastspawn.gemspec
@@ -1,31 +0,0 @@
-Gem::Specification.new do |s|
- s.name = 'fastspawn'
- s.version = '0.2.0'
- s.summary = "Fast process spawner"
- s.date = '2011-02-28'
- s.email = 'r@tomayko.com'
- s.homepage = 'http://github.com/rtomayko/fastspawn'
- s.has_rdoc = false
- s.authors = ["Ryan Tomayko", "Aman Gupta"]
- # = MANIFEST =
- s.files = %w[
- COPYING
- HACKING
- README
- Rakefile
- bin/fastspawn-bm
- ext/extconf.rb
- ext/fastspawn.c
- fastspawn.gemspec
- lib/fastspawn.rb
- lib/fastspawn/process.rb
- test/test_fastspawn.rb
- test/test_fastspawn_process.rb
- ]
- # = MANIFEST =
- s.test_files = []
- s.extra_rdoc_files = ["COPYING"]
- s.extensions = ["ext/extconf.rb"]
- s.executables = []
- s.require_paths = ["lib"]
-end
View
113 lib/fastspawn.rb
@@ -1,113 +0,0 @@
-module FastSpawn
- VERSION = '0.2.0'
- extend self
-
- # fail fast when extension methods already defined due to twice-loading
- raise LoadError, "fastspawn extension already loaded" if method_defined?(:vspawn)
-
- # Spawn a child process using posix_spawn.
- #
- # argv - Array of command line arguments passed to exec.
- #
- # Returns the pid of the newly spawned process.
- # Raises NotImplemented when pfork is not supported on the current platform.
- def pspawn(*argv)
- env, argv, options = extract_process_spawn_arguments(*argv)
- _pspawn(env, argv, options)
- end
-
- # Spawn a child process using a normal fork + exec.
- #
- # Returns the pid of the newly spawned process.
- def fspawn(*argv)
- env, argv, options = extract_process_spawn_arguments(*argv)
- fork do
- exec(*argv)
- exit! 1
- end
- end
-
- # Turns the various varargs incantations supported by Process::spawn into a
- # simple [env, argv, options] tuple. This just makes life easier for the
- # extension functions.
- #
- # The following method signature is supported:
- # Process::spawn([env], command, ..., [options])
- #
- # The env and options hashes are optional. The command may be a variable
- # number of strings or an Array full of strings that make up the new process's
- # argv.
- #
- # Returns an [env, argv, options] tuple. All elements are guaranteed to be
- # non-nil. When no env or options are given, empty hashes are returned.
- def extract_process_spawn_arguments(*args)
- # pop the options hash off the end if it's there
- options =
- if args[-1].respond_to?(:to_hash)
- args.pop.to_hash
- else
- {}
- end
- flatten_process_spawn_options!(options)
-
- # shift the environ hash off the front if it's there and account for
- # possible :env key in options hash.
- env =
- if args[0].respond_to?(:to_hash)
- args.shift.to_hash
- else
- {}
- end
- env.merge!(options.delete(:env)) if options.key?(:env)
-
- # remaining arguments are the argv supporting a number of variations.
- argv = adjust_process_spawn_argv(args)
-
- [env, argv, options]
- end
-
- # Convert { [fd1, fd2, ...] => (:close|fd) } options to individual keys,
- # like: { fd1 => :close, fd2 => :close }. This just makes life easier for the
- # spawn implementations.
- #
- # options - The options hash. This is modified in place.
- #
- # Returns the modified options hash.
- def flatten_process_spawn_options!(options)
- options.to_a.each do |key, value|
- if key.respond_to?(:to_ary)
- key.to_ary.each { |fd| options[fd] = value }
- options.delete(key)
- end
- end
- end
-
- # Converts the various supported command argument variations into a
- # standard argv suitable for use with exec. This includes detecting commands
- # to be run through the shell (single argument strings with spaces).
- #
- # The args array may follow any of these variations:
- #
- # 'true' => [['true', 'true']]
- # 'echo', 'hello', 'world' => [['echo', 'echo'], 'hello', 'world']
- # 'echo hello world' => [['/bin/sh', '/bin/sh'], '-c', 'echo hello world']
- # ['echo', 'fuuu'], 'hello' => [['echo', 'fuuu'], 'hello']
- #
- # Returns a [[cmdname, argv0], argv1, ...] array.
- def adjust_process_spawn_argv(args)
- if args.size == 1 && args[0] =~ /[ |>]/
- # single string with these characters means run it through the shell
- [['/bin/sh', '/bin/sh'], '-c', args[0]]
- elsif !args[0].respond_to?(:to_ary)
- # [argv0, argv1, ...]
- [[args[0], args[0]], *args[1..-1]]
- else
- # [[cmdname, argv0], argv1, ...]
- args
- end
- end
-end
-
-# fastspawn extension methods replace ruby versions
-require 'fastspawn.so'
-require 'fastspawn/process'
View
244 lib/fastspawn/process.rb
@@ -1,244 +0,0 @@
-module FastSpawn
- # FastSpawn::Process includes logic for executing child processes and
- # reading/writing from their standard input, output, and error streams.
- #
- # Create an run a process to completion:
- #
- # >> process = FastSpawn::Process.new(['git', '--help'])
- #
- # Retrieve stdout or stderr output:
- #
- # >> process.out
- # => "usage: git [--version] [--exec-path[=GIT_EXEC_PATH]]\n ..."
- # >> process.err
- # => ""
- #
- # Check process exit status information:
- #
- # >> process.status
- # => #<Process::Status: pid=80718,exited(0)>
- #
- # FastSpawn::Process is designed to take all input in a single string and
- # provides all output as single strings. It is therefore not well suited
- # to streaming large quantities of data in and out of commands.
- #
- # Q: Why not use popen3 or hand-roll fork/exec code?
- #
- # - It's more efficient than popen3 and provides meaningful process
- # hierarchies because it performs a single fork/exec. (popen3 double forks
- # to avoid needing to collect the exit status and also calls
- # Process::detach which creates a Ruby Thread!!!!).
- #
- # - It's more portable than hand rolled pipe, fork, exec code because
- # fork(2) and exec(2) aren't available on all platforms. In those cases,
- # FastSpawn::Process falls back to using whatever janky substitutes the platform
- # provides.
- #
- # - It handles all max pipe buffer hang cases, which is non trivial to
- # implement correctly and must be accounted for with either popen3 or
- # hand rolled fork/exec code.
- class Process
- # Create and execute a new process.
- #
- # argv - Array of [command, arg1, ...] strings to use as the new
- # process's argv. When argv is a String, the shell is used
- # to interpret the command.
- # env - The new process's environment variables. This is merged with
- # the current environment as if by ENV.merge(env).
- # options - Additional options:
- # :input => str to write str to the process's stdin.
- # :timeout => int number of seconds before we given up.
- # :max => total number of output bytes
- # A subset of Process::spawn options are also supported on all
- # platforms:
- # :chdir => str to start the process in different working dir.
- #
- # Returns a new Process instance that has already executed to completion.
- # The out, err, and status attributes are immediately available.
- def initialize(argv, env={}, options={})
- @argv = argv
- @env = env
-
- @options = options.dup
- @input = @options.delete(:input)
- @timeout = @options.delete(:timeout)
- @max = @options.delete(:max)
- @options.delete(:chdir) if @options[:chdir].nil?
-
- exec!
- end
-
- # All data written to the child process's stdout stream as a String.
- attr_reader :out
-
- # All data written to the child process's stderr stream as a String.
- attr_reader :err
-
- # A Process::Status object with information on how the child exited.
- attr_reader :status
-
- # Total command execution time (wall-clock time)
- attr_reader :runtime
-
- # Determine if the process did exit with a zero exit status.
- def success?
- @status && @status.success?
- end
-
- private
- # Execute command, write input, and read output. This is called
- # immediately when a new instance of this object is initialized.
- def exec!
- # when argv is a string, use /bin/sh to interpret command
- argv = @argv
- argv = ['/bin/sh', '-c', argv.to_str] if argv.respond_to?(:to_str)
-
- # spawn the process and hook up the pipes
- pid, stdin, stdout, stderr = popen4(@env, *(argv + [@options]))
-
- # async read from all streams into buffers
- @out, @err = read_and_write(@input, stdin, stdout, stderr, @timeout, @max)
-
- # grab exit status
- @status = waitpid(pid)
- rescue Object => boom
- [stdin, stdout, stderr].each { |fd| fd.close rescue nil }
- if @status.nil?
- ::Process.kill('TERM', pid) rescue nil
- @status = waitpid(pid) rescue nil
- end
- raise
- end
-
- # Exception raised when the total number of bytes output on the command's
- # stderr and stdout streams exceeds the maximum output size (:max option).
- class MaximumOutputExceeded < StandardError
- end
-
- # Exception raised when timeout is exceeded.
- class TimeoutExceeded < StandardError
- end
-
- # Maximum buffer size for reading
- BUFSIZE = (32 * 1024)
-
- # Start a select loop writing any input on the child's stdin and reading
- # any output from the child's stdout or stderr.
- #
- # input - String input to write on stdin. May be nil.
- # stdin - The write side IO object for the child's stdin stream.
- # stdout - The read side IO object for the child's stdout stream.
- # stderr - The read side IO object for the child's stderr stream.
- # timeout - An optional Numeric specifying the total number of seconds
- # the read/write operations should occur for.
- #
- # Returns an [out, err] tuple where both elements are strings with all
- # data written to the stdout and stderr streams, respectively.
- # Raises TimeoutExceeded when all data has not been read / written within
- # the duration specified in the timeout argument.
- # Raises MaximumOutputExceeded when the total number of bytes output
- # exceeds the amount specified by the max argument.
- def read_and_write(input, stdin, stdout, stderr, timeout=nil, max=nil)
- input ||= ''
- max = nil if max && max <= 0
- out, err = '', ''
- offset = 0
-
- timeout = nil if timeout && timeout <= 0.0
- @runtime = 0.0
- start = Time.now
-
- writers = [stdin]
- readers = [stdout, stderr]
- t = timeout
- while readers.any? || writers.any?
- ready = IO.select(readers, writers, readers + writers, t)
- raise TimeoutExceeded if ready.nil?
-
- # write to stdin stream
- ready[1].each do |fd|
- begin
- boom = nil
- size = fd.write_nonblock(input)
- input = input[size, input.size]
- rescue Errno::EPIPE => boom
- rescue Errno::EAGAIN, Errno::EINTR
- end
- if boom || input.size == 0
- stdin.close
- writers.delete(stdin)
- end
- end
-
- # read from stdout and stderr streams
- ready[0].each do |fd|
- buf = (fd == stdout) ? out : err
- begin
- buf << fd.readpartial(BUFSIZE)
- rescue Errno::EAGAIN, Errno::EINTR
- rescue EOFError
- readers.delete(fd)
- fd.close
- end
- end
-
- # keep tabs on the total amount of time we've spent here
- @runtime = Time.now - start
- if timeout
- t = timeout - @runtime
- raise TimeoutExceeded if t < 0.0
- end
-
- # maybe we've hit our max output
- if max && ready[0].any? && (out.size + err.size) > max
- raise MaximumOutputExceeded
- end
- end
-
- [out, err]
- end
-
- # Start a process with spawn options and return
- # popen4([env], command, arg1, arg2, [opt])
- #
- # env - The child process's environment as a Hash.
- # command - The command and zero or more arguments.
- # options - An options hash.
- #
- # See Ruby 1.9 IO.popen and Process::spawn docs for more info:
- # http://www.ruby-doc.org/core-1.9/classes/IO.html#M001640
- #
- # Returns a [pid, stdin, stderr, stdout] tuple where pid is the child
- # process's pid, stdin is a writeable IO object, and stdout + stderr are
- # readable IO objects.
- def popen4(*argv)
- # create some pipes (see pipe(2) manual -- the ruby docs suck)
- ird, iwr = IO.pipe
- ord, owr = IO.pipe
- erd, ewr = IO.pipe
-
- # spawn the child process with either end of pipes hooked together
- opts =
- ((argv.pop if argv[-1].is_a?(Hash)) || {}).merge(
- # redirect fds # close other sides
- :in => ird, iwr => :close,
- :out => owr, ord => :close,
- :err => ewr, erd => :close
- )
- pid = FastSpawn.pspawn(*(argv + [opts]))
-
- [pid, iwr, ord, erd]
- ensure
- # we're in the parent, close child-side fds
- [ird, owr, ewr].each { |fd| fd.close }
- end
-
- # Wait for the child process to exit
- #
- # Returns the Process::Status object obtained by reaping the process.
- def waitpid(pid)
- ::Process::waitpid(pid)
- $?
- end
- end
-end
View
1 lib/posix-spawn.rb
@@ -0,0 +1 @@
+require File.expand_path("../posix/spawn", __FILE__)
View
113 lib/posix/spawn.rb
@@ -0,0 +1,113 @@
+require File.expand_path('../../posix_spawn_ext', __FILE__)
+require File.expand_path('../spawn/version', __FILE__)
+require File.expand_path('../spawn/process', __FILE__)
+
+module POSIX
+ module Spawn
+ extend self
+
+ # Spawn a child process using posix_spawn.
+ #
+ # argv - Array of command line arguments passed to exec.
+ #
+ # Returns the pid of the newly spawned process.
+ # Raises NotImplemented when pfork is not supported on the current platform.
+ def pspawn(*argv)
+ env, argv, options = extract_process_spawn_arguments(*argv)
+ _pspawn(env, argv, options)
+ end
+
+ # Spawn a child process using a normal fork + exec.
+ #
+ # Returns the pid of the newly spawned process.
+ def fspawn(*argv)
+ env, argv, options = extract_process_spawn_arguments(*argv)
+ fork do
+ exec(*argv)
+ exit! 1
+ end
+ end
+
+ private
+
+ # Turns the various varargs incantations supported by Process::spawn into a
+ # simple [env, argv, options] tuple. This just makes life easier for the
+ # extension functions.
+ #
+ # The following method signature is supported:
+ # Process::spawn([env], command, ..., [options])
+ #
+ # The env and options hashes are optional. The command may be a variable
+ # number of strings or an Array full of strings that make up the new process's
+ # argv.
+ #
+ # Returns an [env, argv, options] tuple. All elements are guaranteed to be
+ # non-nil. When no env or options are given, empty hashes are returned.
+ def extract_process_spawn_arguments(*args)
+ # pop the options hash off the end if it's there
+ options =
+ if args[-1].respond_to?(:to_hash)
+ args.pop.to_hash
+ else
+ {}
+ end
+ flatten_process_spawn_options!(options)
+
+ # shift the environ hash off the front if it's there and account for
+ # possible :env key in options hash.
+ env =
+ if args[0].respond_to?(:to_hash)
+ args.shift.to_hash
+ else
+ {}
+ end
+ env.merge!(options.delete(:env)) if options.key?(:env)
+
+ # remaining arguments are the argv supporting a number of variations.
+ argv = adjust_process_spawn_argv(args)
+
+ [env, argv, options]
+ end
+
+ # Convert { [fd1, fd2, ...] => (:close|fd) } options to individual keys,
+ # like: { fd1 => :close, fd2 => :close }. This just makes life easier for the
+ # spawn implementations.
+ #
+ # options - The options hash. This is modified in place.
+ #
+ # Returns the modified options hash.
+ def flatten_process_spawn_options!(options)
+ options.to_a.each do |key, value|
+ if key.respond_to?(:to_ary)
+ key.to_ary.each { |fd| options[fd] = value }
+ options.delete(key)
+ end
+ end
+ end
+
+ # Converts the various supported command argument variations into a
+ # standard argv suitable for use with exec. This includes detecting commands
+ # to be run through the shell (single argument strings with spaces).
+ #
+ # The args array may follow any of these variations:
+ #
+ # 'true' => [['true', 'true']]
+ # 'echo', 'hello', 'world' => [['echo', 'echo'], 'hello', 'world']
+ # 'echo hello world' => [['/bin/sh', '/bin/sh'], '-c', 'echo hello world']
+ # ['echo', 'fuuu'], 'hello' => [['echo', 'fuuu'], 'hello']
+ #
+ # Returns a [[cmdname, argv0], argv1, ...] array.
+ def adjust_process_spawn_argv(args)
+ if args.size == 1 && args[0] =~ /[ |>]/
+ # single string with these characters means run it through the shell
+ [['/bin/sh', '/bin/sh'], '-c', args[0]]
+ elsif !args[0].respond_to?(:to_ary)
+ # [argv0, argv1, ...]
+ [[args[0], args[0]], *args[1..-1]]
+ else
+ # [[cmdname, argv0], argv1, ...]
+ args
+ end
+ end
+ end
+end
View
246 lib/posix/spawn/process.rb
@@ -0,0 +1,246 @@
+module POSIX
+ module Spawn
+ # POSIX::Spawn::Process includes logic for executing child processes and
+ # reading/writing from their standard input, output, and error streams.
+ #
+ # Create an run a process to completion:
+ #
+ # >> process = POSIX::Spawn::Process.new(['git', '--help'])
+ #
+ # Retrieve stdout or stderr output:
+ #
+ # >> process.out
+ # => "usage: git [--version] [--exec-path[=GIT_EXEC_PATH]]\n ..."
+ # >> process.err
+ # => ""
+ #
+ # Check process exit status information:
+ #
+ # >> process.status
+ # => #<Process::Status: pid=80718,exited(0)>
+ #
+ # POSIX::Spawn::Process is designed to take all input in a single string and
+ # provides all output as single strings. It is therefore not well suited
+ # to streaming large quantities of data in and out of commands.
+ #
+ # Q: Why not use popen3 or hand-roll fork/exec code?
+ #
+ # - It's more efficient than popen3 and provides meaningful process
+ # hierarchies because it performs a single fork/exec. (popen3 double forks
+ # to avoid needing to collect the exit status and also calls
+ # Process::detach which creates a Ruby Thread!!!!).
+ #
+ # - It's more portable than hand rolled pipe, fork, exec code because
+ # fork(2) and exec(2) aren't available on all platforms. In those cases,
+ # POSIX::Spawn::Process falls back to using whatever janky substitutes the platform
+ # provides.
+ #
+ # - It handles all max pipe buffer hang cases, which is non trivial to
+ # implement correctly and must be accounted for with either popen3 or
+ # hand rolled fork/exec code.
+ class Process
+ # Create and execute a new process.
+ #
+ # argv - Array of [command, arg1, ...] strings to use as the new
+ # process's argv. When argv is a String, the shell is used
+ # to interpret the command.
+ # env - The new process's environment variables. This is merged with
+ # the current environment as if by ENV.merge(env).
+ # options - Additional options:
+ # :input => str to write str to the process's stdin.
+ # :timeout => int number of seconds before we given up.
+ # :max => total number of output bytes
+ # A subset of Process::spawn options are also supported on all
+ # platforms:
+ # :chdir => str to start the process in different working dir.
+ #
+ # Returns a new Process instance that has already executed to completion.
+ # The out, err, and status attributes are immediately available.
+ def initialize(argv, env={}, options={})
+ @argv = argv
+ @env = env
+
+ @options = options.dup
+ @input = @options.delete(:input)
+ @timeout = @options.delete(:timeout)
+ @max = @options.delete(:max)
+ @options.delete(:chdir) if @options[:chdir].nil?
+
+ exec!
+ end
+
+ # All data written to the child process's stdout stream as a String.
+ attr_reader :out
+
+ # All data written to the child process's stderr stream as a String.
+ attr_reader :err
+
+ # A Process::Status object with information on how the child exited.
+ attr_reader :status
+
+ # Total command execution time (wall-clock time)
+ attr_reader :runtime
+
+ # Determine if the process did exit with a zero exit status.
+ def success?
+ @status && @status.success?
+ end
+
+ private
+ # Execute command, write input, and read output. This is called
+ # immediately when a new instance of this object is initialized.
+ def exec!
+ # when argv is a string, use /bin/sh to interpret command
+ argv = @argv
+ argv = ['/bin/sh', '-c', argv.to_str] if argv.respond_to?(:to_str)
+
+ # spawn the process and hook up the pipes
+ pid, stdin, stdout, stderr = popen4(@env, *(argv + [@options]))
+
+ # async read from all streams into buffers
+ @out, @err = read_and_write(@input, stdin, stdout, stderr, @timeout, @max)
+
+ # grab exit status
+ @status = waitpid(pid)
+ rescue Object => boom
+ [stdin, stdout, stderr].each { |fd| fd.close rescue nil }
+ if @status.nil?
+ ::Process.kill('TERM', pid) rescue nil
+ @status = waitpid(pid) rescue nil
+ end
+ raise
+ end
+
+ # Exception raised when the total number of bytes output on the command's
+ # stderr and stdout streams exceeds the maximum output size (:max option).
+ class MaximumOutputExceeded < StandardError
+ end
+
+ # Exception raised when timeout is exceeded.
+ class TimeoutExceeded < StandardError
+ end
+
+ # Maximum buffer size for reading
+ BUFSIZE = (32 * 1024)
+
+ # Start a select loop writing any input on the child's stdin and reading
+ # any output from the child's stdout or stderr.
+ #
+ # input - String input to write on stdin. May be nil.
+ # stdin - The write side IO object for the child's stdin stream.
+ # stdout - The read side IO object for the child's stdout stream.
+ # stderr - The read side IO object for the child's stderr stream.
+ # timeout - An optional Numeric specifying the total number of seconds
+ # the read/write operations should occur for.
+ #
+ # Returns an [out, err] tuple where both elements are strings with all
+ # data written to the stdout and stderr streams, respectively.
+ # Raises TimeoutExceeded when all data has not been read / written within
+ # the duration specified in the timeout argument.
+ # Raises MaximumOutputExceeded when the total number of bytes output
+ # exceeds the amount specified by the max argument.
+ def read_and_write(input, stdin, stdout, stderr, timeout=nil, max=nil)
+ input ||= ''
+ max = nil if max && max <= 0
+ out, err = '', ''
+ offset = 0
+
+ timeout = nil if timeout && timeout <= 0.0
+ @runtime = 0.0
+ start = Time.now
+
+ writers = [stdin]
+ readers = [stdout, stderr]
+ t = timeout
+ while readers.any? || writers.any?
+ ready = IO.select(readers, writers, readers + writers, t)
+ raise TimeoutExceeded if ready.nil?
+
+ # write to stdin stream
+ ready[1].each do |fd|
+ begin
+ boom = nil
+ size = fd.write_nonblock(input)
+ input = input[size, input.size]
+ rescue Errno::EPIPE => boom
+ rescue Errno::EAGAIN, Errno::EINTR
+ end
+ if boom || input.size == 0
+ stdin.close
+ writers.delete(stdin)
+ end
+ end
+
+ # read from stdout and stderr streams
+ ready[0].each do |fd|
+ buf = (fd == stdout) ? out : err
+ begin
+ buf << fd.readpartial(BUFSIZE)
+ rescue Errno::EAGAIN, Errno::EINTR
+ rescue EOFError
+ readers.delete(fd)
+ fd.close
+ end
+ end
+
+ # keep tabs on the total amount of time we've spent here
+ @runtime = Time.now - start
+ if timeout
+ t = timeout - @runtime
+ raise TimeoutExceeded if t < 0.0
+ end
+
+ # maybe we've hit our max output
+ if max && ready[0].any? && (out.size + err.size) > max
+ raise MaximumOutputExceeded
+ end
+ end
+
+ [out, err]
+ end
+
+ # Start a process with spawn options and return
+ # popen4([env], command, arg1, arg2, [opt])
+ #
+ # env - The child process's environment as a Hash.
+ # command - The command and zero or more arguments.
+ # options - An options hash.
+ #
+ # See Ruby 1.9 IO.popen and Process::spawn docs for more info:
+ # http://www.ruby-doc.org/core-1.9/classes/IO.html#M001640
+ #
+ # Returns a [pid, stdin, stderr, stdout] tuple where pid is the child
+ # process's pid, stdin is a writeable IO object, and stdout + stderr are
+ # readable IO objects.
+ def popen4(*argv)
+ # create some pipes (see pipe(2) manual -- the ruby docs suck)
+ ird, iwr = IO.pipe
+ ord, owr = IO.pipe
+ erd, ewr = IO.pipe
+
+ # spawn the child process with either end of pipes hooked together
+ opts =
+ ((argv.pop if argv[-1].is_a?(Hash)) || {}).merge(
+ # redirect fds # close other sides
+ :in => ird, iwr => :close,
+ :out => owr, ord => :close,
+ :err => ewr, erd => :close
+ )
+ pid = POSIX::Spawn.pspawn(*(argv + [opts]))
+
+ [pid, iwr, ord, erd]
+ ensure
+ # we're in the parent, close child-side fds
+ [ird, owr, ewr].each { |fd| fd.close }
+ end
+
+ # Wait for the child process to exit
+ #
+ # Returns the Process::Status object obtained by reaping the process.
+ def waitpid(pid)
+ ::Process::waitpid(pid)
+ $?
+ end
+ end
+ end
+end
View
5 lib/posix/spawn/version.rb
@@ -0,0 +1,5 @@
+module POSIX
+ module Spawn
+ VERSION = '0.2.0'
+ end
+end
View
23 posix-spawn.gemspec
@@ -0,0 +1,23 @@
+require File.expand_path('../lib/posix/spawn/version', __FILE__)
+
+Gem::Specification.new do |s|
+ s.name = 'posix-spawn'
+ s.version = POSIX::Spawn::VERSION
+
+ s.summary = 'posix_spawnp(2) for ruby'
+ s.description = 'posix-spawn uses posix_spawnp(2) for faster process spawning'
+
+ s.homepage = 'http://github.com/rtomayko/fastspawn'
+ s.has_rdoc = false
+
+ s.authors = ['Ryan Tomayko', 'Aman Gupta']
+ s.email = ['r@tomayko.com', 'aman@tmm1.net']
+
+ s.add_development_dependency 'rake-compiler', '0.7.6'
+
+ s.extensions = ['ext/extconf.rb']
+ s.require_paths = ['lib']
+
+ s.files = `git ls-files`.split("\n")
+ s.extra_rdoc_files = %w[ COPYING HACKING ]
+end
View
6 test/test_fastspawn_process.rb → test/test_process.rb
@@ -1,8 +1,8 @@
require 'test/unit'
-require 'fastspawn'
+require 'posix-spawn'
-class FastSpawnProcessTest < Test::Unit::TestCase
- include FastSpawn
+class ProcessTest < Test::Unit::TestCase
+ include POSIX::Spawn
def test_argv_array_execs
p = Process.new(['printf', '%s %s %s', '1', '2', '3 4'])
View
12 test/test_fastspawn.rb → test/test_spawn.rb
@@ -1,12 +1,12 @@
require 'test/unit'
-require 'fastspawn'
+require 'posix-spawn'
-class FastSpawnTest < Test::Unit::TestCase
- include FastSpawn
+class SpawnTest < Test::Unit::TestCase
+ include POSIX::Spawn
- def test_fastspawn_methods_exposed_at_module_level
- assert FastSpawn.respond_to?(:pspawn)
- assert FastSpawn.respond_to?(:_pspawn)
+ def test_spawn_methods_exposed_at_module_level
+ assert POSIX::Spawn.respond_to?(:pspawn)
+ assert POSIX::Spawn.respond_to?(:_pspawn)
end
def test_fspawn

0 comments on commit b188380

Please sign in to comment.