Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Release v0.0.25

  • Loading branch information...
commit a97486859630b8304caac51878e61f6dd773f4cb 1 parent a59648f
@pgte authored
View
8 ChangeLog
@@ -1,3 +1,11 @@
+2010.10.06 v0.0.25
+
+* Fixed race condition between launching workers and asynchronous binding on master socket
+* On sudden master death, somehow the workers should eventually realize that their master has died and kill themselves.
+* added fugue.workerPids()
+* added options.worker_to_master_ping_interval
+* Worker notifies master that he is also listening, so master can more accurately count him in in order to invoke the options.started callback.
+
2010.10.05 v0.0.22
* Now workers file descriptors (stdin, stdout, stderr) are the same as the master process, so there is no longer the need to pipe them from worker into master.
View
2  Makefile
@@ -4,7 +4,7 @@ build:
node-waf configure build
test: build
- node tools/test.js test_zero_workers test_one_worker test_if_i_can_reach_many_workers test_app_reload test_working_dir test_master_socket_is_protected test_leaks
+ node tools/test.js test_zero_workers test_one_worker test_if_i_can_reach_many_workers test_app_reload test_working_dir test_master_socket_is_protected test_leaks test_worker_dies_after_master_sudden_death
test-fail:
node tools/test.js test_should_fail
View
7 README.markdown
@@ -48,12 +48,13 @@ For UNIX sockets:
* started : callback to be invoked when all workers are up and running
* working_path : absolute path for the working dir
* tmp_path : the absolute path for the temp dir. defaults to '/tmp'
-* log_file : the full path of the log file if you wish stdout to be redirected there. All workers + master will write here. If absent does not touch stdout.
-* master_log_file : alternative path for the log file for the master only.
+* log_file : the full path of the log file if you wish stdout to be redirected there. All workers + master will write here. If absent does not touch stdout
+* master_log_file : alternative path for the log file for the master only
* uid : unix user id for workers. Defaults to current user
* gid : unix group id for workers. Defaults to current group
* daemonize : to fork and detach
-* master_pid_path : master PID file path. If not passed in, no pid file is written.
+* master_pid_path : master PID file path. If not passed in, no pid file is written
+* worker_to_master_ping_interval : the interval by which children ping master to know if it's alive. in milliseconds. defaults to 30000 (30 seconds)
### How to reload
View
6 TODO
@@ -7,8 +7,8 @@
7. Monitor request timeouts (as Unicorn does) - only plausible for HTTP Servers, though (??)
8. (DONE) Don't hardcode path for unix socket. Make it a temporary random file and pass it around on environment?
9. Somehow delete master sockets that are not being used. This can be tricky, since master socket paths are passed around to workers and new masters.
-10. Tests for these features: set gid, uid; set working dir; Redirect stdout to log files; daemonize properly.
-11. On sudden master death, somehow the workers should eventually realize that their master has died and kill themselves.
-12. Worker should notify master that he is also listening, so master can more accurately count him in in order to invoke the options.started callback.
+10. (DONE) Tests for these features: set gid, uid; set working dir; Redirect stdout to log files; daemonize properly.
+11. (DONE) On sudden master death, somehow the workers should eventually realize that their master has died and kill themselves.
+12. (DONE) Worker should notify master that he is also listening, so master can more accurately count him in in order to invoke the options.started callback.
13. Worker should have a timeout when shutting down to prevent a client from hanging the process forever. Should be an option. default = 1 min?
14. Support a development mode, where any changes to the code trigger the master restart, much like nodemon - http://github.com/remy/nodemon
View
175 lib/fugue.js
@@ -16,6 +16,13 @@ var workerId = exports.workerId = function() {
var masterSocketPath = exports.masterSocketPath = function() {
return master_socket_path;
}
+exports.workerPids = function() {
+ var worker_pids = [];
+ workers.forEach(function(worker) {
+ worker_pids.push(worker.pid);
+ });
+ return worker_pids;
+}
var stop = exports.stop = function() {
if (isMaster()) {
@@ -65,7 +72,8 @@ exports.start = function(server, port, host, worker_count, options) {
uid : process.getuid(),
gid : process.getgid(),
daemonize: false,
- verbose: false
+ verbose: false,
+ worker_to_master_ping_interval: 30000
}
if (options) {
(function(obj1, obj2) {
@@ -206,85 +214,93 @@ exports.start = function(server, port, host, worker_count, options) {
master_server = net.createServer(function(conn) {
conn.on('data', function(data) {
- log('got data for master_server');
- if (data.toString() == 'GIMME_SOCKET') {
- log('serving server socket file descriptor ' + server_socket);
- conn.write('HERE_YOU_GO', 'ascii', server_socket);
- workers_started ++;
- if (options.started && workers_started == worker_count) options.started();
+ switch(data.toString()) {
+ case 'GIMME_SOCKET':
+ log('serving server socket file descriptor ' + server_socket);
+ conn.write('HERE_YOU_GO', 'ascii', server_socket);
+ break
+ case 'LISTENING':
+ workers_started ++;
+ if (options.started && workers_started == worker_count) options.started();
+ break;
+ case 'PING':
+ conn.write('PONG');
+ break;
}
});
});
- master_server.listen(master_socket_path);
-
- workers = [];
- workers_started = 0;
+ master_server.listen(master_socket_path, function() {
+ workers = [];
+ workers_started = 0;
+
+ var spawn_worker = function(worker_idx) {
+ // prepare environment for worker
+ var env = {};
+ for (var i in process.env) {
+ env[i] = process.env[i];
+ }
+ env._FUGUE_WORKER = "" + (worker_idx + 1);
+ var args = process.argv;
+
+ // spawn worker process
+ var stdio = process.binding("stdio");
+ var fds = [ stdio.stdinFD, process.stdout.fd, stdio.stderrFD ];
+ var new_worker = workers[worker_idx] = spawn(args[0], args.slice(1), {env: env, customFds: fds});
+ // listen for when the worker dies and bring him back to life
+ new_worker.on('exit', function() {
+ log('Child ' + worker_idx + ' died. Respawning it.');
+ spawn_worker(worker_idx);
+ });
- var spawn_worker = function(worker_idx) {
- // prepare environment for worker
- var env = {};
- for (var i in process.env) {
- env[i] = process.env[i];
}
- env._FUGUE_WORKER = "" + (worker_idx + 1);
- var args = process.argv;
-
- // spawn worker process
- var stdio = process.binding("stdio");
- var fds = [ stdio.stdinFD, process.stdout.fd, stdio.stderrFD ];
- var new_worker = workers[worker_idx] = spawn(args[0], args.slice(1), {env: env, customFds: fds});
- // listen for when the worker dies and bring him back to life
- new_worker.on('exit', function() {
- log('Child ' + worker_idx + ' died. Respawning it.');
- spawn_worker(worker_idx);
- });
- }
+ log('Spawning workers...');
+ process.env._FUGUE_WORKER_COUNT = worker_count;
+ for (var worker_idx = 0; worker_idx < worker_count; worker_idx ++) {
+ spawn_worker(worker_idx);
+ }
+ log('spawned.')
- log('Spawning workers...');
- process.env._FUGUE_WORKER_COUNT = worker_count;
- for (var worker_idx = 0; worker_idx < worker_count; worker_idx ++) {
- spawn_worker(worker_idx);
- }
- log('spawned.')
+ killer = function() {
+ stop();
+ process.nextTick(function() {
+ process.exit();
+ });
+ }
- killer = function() {
- stop();
- process.nextTick(function() {
- process.exit();
- });
- }
+ // Listen for process exits
+ process.on('SIGINT', killer);
+ process.on('SIGHUP', killer);
+ process.on('SIGTERM', killer);
+
+ // Listen to SIGUSR2 for master restarts
+ respawner = function() {
+ log('Got SIGUSR2, respawning self');
+ // respawn self
+ var env = {};
+ for(var i in process.env){
+ env[i] = process.env[i];
+ }
+ env._FUGUE_ORIG_MASTER_PID = process.pid.toString();
+ var args = process.argv;
- // Listen for process exits
- process.on('SIGINT', killer);
- process.on('SIGHUP', killer);
- process.on('SIGTERM', killer);
-
- // Listen to SIGUSR2 for master restarts
- respawner = function() {
- log('Got SIGUSR2, respawning self');
- // respawn self
- var env = {};
- for(var i in process.env){
- env[i] = process.env[i];
+ // spawn worker process
+ spawned = spawn(args[0], args.slice(1), env);
+ spawned.stdout.on('data', function(data) {
+ log("New master goes: " + data);
+ });
}
- env._FUGUE_ORIG_MASTER_PID = process.pid.toString();
- var args = process.argv;
+ process.on('SIGUSR2', respawner);
- // spawn worker process
- spawned = spawn(args[0], args.slice(1), env);
- spawned.stdout.on('data', function(data) {
- log("New master goes: " + data);
- });
- }
- process.on('SIGUSR2', respawner);
+ // Save master PID
+ if (options.master_pid_path) {
+ fs.writeFile(options.master_pid_path, process.pid.toString(), function(error) {
+ if (error) throw "Error saving master PID file in " + options.master_pid_path + ': ' + error;
+ });
+ }
+
+ });
- // Save master PID
- if (options.master_pid_path) {
- fs.writeFile(options.master_pid_path, process.pid.toString(), function(error) {
- if (error) throw "Error saving master PID file in " + options.master_pid_path + ': ' + error;
- });
- }
} else { // Worker
@@ -315,7 +331,8 @@ exports.start = function(server, port, host, worker_count, options) {
log("Worker " + worker_id + " exiting.");
});
- var connection = net.createConnection(master_socket_path).on('connect', function() {
+ var connection = net.createConnection(master_socket_path);
+ connection.on('connect', function() {
connection.write('GIMME_SOCKET');
connection.on('fd', function(fd) {
@@ -340,9 +357,29 @@ exports.start = function(server, port, host, worker_count, options) {
}
server.listenFD(fd);
+
+ connection.end('LISTENING');
});
});
+
+ // Setup to ping master and die if it fails
+ setInterval(function() {
+ var ping_connection = net.createConnection(master_socket_path);
+ ping_connection.on('connect', function() {
+ ping_connection.write('PING');
+ ping_connection.on('data', function(data) {
+ if (data.toString() == 'PONG') {
+ ping_connection.end();
+ }
+ })
+ });
+ ping_connection.on('error', function(error) {
+ log('ping error to server: '+error);
+ log('going to shutdown because of it');
+ worker_killer();
+ });
+ }, options.worker_to_master_ping_interval);
// Track connections so we don't die in the middle of serving one
server.on('connection', function(connection) {
View
2  package.json
@@ -1,6 +1,6 @@
{ "name" : "fugue"
, "description" : "Unicorn for node for node"
-, "version" : "0.0.24"
+, "version" : "0.0.25"
, "homepage" : "http://www.metaduck.com/fugue"
, "author" : "Pedro Teixeira <pedro.teixeira@gmail.com> (http://www.metaduck.com)"
, "contributors" :
View
19 test/test_if_i_can_reach_many_workers.js
@@ -10,9 +10,18 @@ var fugue = require(path.join(__dirname, '..', 'lib', 'fugue.js'));
var expected_data = 'here is some data';
server = net.createServer(function(conn) {
- //console.log('worker '+fugue.workerId()+' got connection');
- conn.end(fugue.workerId().toString(), 'ascii');
- server.watcher.stop();
+ if (!fugue.isMaster()) {
+ //console.log('worker '+fugue.workerId()+' got connection');
+ conn.write(fugue.workerId().toString());
+ try {
+ conn.end();
+ } catch(exp) {
+ // do nothing if error closing
+ }
+ process.nextTick(function() {
+ server.watcher.stop();
+ });
+ }
});
var port = 4001;
@@ -50,11 +59,11 @@ exports.run = function(next) {
var got_some_data = false;
client.on('data', function(workerId) {
worker_marks[workerId] = true;
- client.destroy();
+ client.end();
if (all_workers_contacted()) {
//console.log('all workers contacted');
clearTimeout(timeout);
- if (next) next();
+ next();
} else
if (workers_tried < max_tries) try_next_worker();
});
View
111 test/test_worker_dies_after_master_sudden_death.js
@@ -0,0 +1,111 @@
+// Testing to see if I have downtime and I change workers when reloading app
+
+
+var path = require('path'),
+ net = require('net'),
+ child_process = require('child_process'),
+ fs = require('fs'),
+ assert = require('assert');
+var fugue = require(path.join(__dirname, '..', 'lib', 'fugue.js'));
+
+var port = 4001;
+var master_debug_port = 4002;
+
+var master_pid_path = '/tmp/fugue_master_test.pid';
+
+exports.run = function(next) {
+
+ if (!process.env._FUGUE_TEST_APP_RELOAD) {
+ // spawner
+ // spawn new test
+ var env = {};
+ for(var i in process.env){
+ env[i] = process.env[i];
+ }
+ env._FUGUE_TEST_APP_RELOAD = true;
+ var args = process.argv;
+ var spawned = child_process.spawn(args[0], args.slice(1), env);
+ spawned.stdout.on('data', function(data) {
+ console.log('master: '+data.toString());
+ });
+ spawned.stderr.on('data', function(data) {
+ console.log('master: '+data.toString());
+ });
+
+ var pids = {};
+ var pid_count = 0;
+
+ var make_call = function() {
+ var client = net.createConnection(port);
+ client.on('data', function(pid) {
+ pid = pid.toString();
+ if (!pids[pid]) {
+ pids[pid] = true;
+ pid_count ++;
+ }
+ client.end();
+ });
+ client.on('error', function(error) {
+ throw error;
+ });
+ }
+
+ // wait sometime before starting to make calls
+ setTimeout(function() {
+
+ // get the worker pids
+ var client = net.createConnection(master_debug_port);
+ client.on('data', function(worker_pids) {
+ worker_pids = worker_pids.toString();
+ worker_pids = worker_pids.split(',');
+ assert.equal(2, worker_pids.length, "expected 2 worker pids and I get " + worker_pids.length);
+ client.end();
+
+ // kill master without it knowing
+ spawned.kill('SIGKILL');
+
+ // wait some time to let the workers die
+ setTimeout(function() {
+ worker_pids.forEach(function(worker_pid) {
+ var command = 'ps '+worker_pid+' | grep '+worker_pid;
+ child_process.exec(command, function(error, stdout, stderr) {
+ assert.equal('', stderr, 'error executing command ' + command);
+ assert.equal('', stdout, 'looks like worker processes are still working. command output was: '+stdout);
+ next();
+ });
+ });
+ }, 3000);
+ });
+
+ }, 2000);
+
+ } else {
+ // spawned
+ server = net.createServer(function(conn) {
+ conn.end('Hello');
+ });
+
+ if (fugue.isMaster()) {
+ net.createServer(function(conn) {
+ conn.end(fugue.workerPids().join(','));
+ }).listen(master_debug_port);
+ }
+
+ fugue.start(server, port, null, 2, {verbose: false , master_pid_path : master_pid_path, worker_to_master_ping_interval: 1000 } );
+
+ }
+
+}
+
+exports.teardown = function() {
+ if (!process.env._FUGUE_TEST_APP_RELOAD) {
+ pid = parseInt(fs.readFileSync(master_pid_path));
+ //console.log('killing '+pid);
+ if (pid) {
+ child_process.exec('kill '+pid, function(error, stdout, stderr) {
+ assert.ok(!error, error);
+ });
+ }
+ }
+
+}

0 comments on commit a974868

Please sign in to comment.
Something went wrong with that request. Please try again.