Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

add "ncore" tool

  • Loading branch information...
commit 9557229702c5bc1e7658422b8624e4cfe6097ede 1 parent bb354f0
@davepacheco davepacheco authored
View
91 README.md
@@ -172,6 +172,97 @@ The dump itself is just a JSON object. This module automatically fills in the f
*plus* any information added with `panicDbg.set` or `panicDbg.add`.
+Generating dumps from outside the program
+-----------------------------------------
+
+node-panic includes a tool called "ncore" for causing a node program that's
+already loaded node-panic to dump core on demand *without* any other cooperation
+from the program itself. That is, even if the program is stuck inside an
+infinite loop, "ncore" can interrupt it to take a core dump.
+
+Caveat: this tool can be very dangerous! Since it uses SIGUSR1, invoking it on
+non-node processes can result in all kinds of failure. (On Illumos systems,
+"ncore" will automatically detect this case and bail out.) Additionally, if
+another program on the same system is using the node debugger, ncore will fail.
+"ncore" tries to avoid hijacking another debugger session, but this check is
+inherently racy. Because of these risks, this tool should be viewed as a last
+resort, but it can be extremely valuable when needed.
+
+Let's take a look at how it works:
+
+ $ cat examples/example-loop.js
+ /*
+ * example-loop.js: example of using "ncore" tool to generate a node core
+ */
+
+ var mod_panic = require('panic');
+
+ function func()
+ {
+ for (var ii = 0; ; ii++)
+ panicDbg.set('func-iter', ii);
+ }
+
+ console.log('starting infinite loop; use "ncore" tool to generate core');
+ func();
+
+Now run the program:
+
+ $ node examples/example-loop.js
+ starting infinite loop; use "ncore" tool to generate core
+
+In another shell, run "ncore" on the given program:
+
+ $ ncore 1369
+ attempting to attach to process 1369 ... . ok.
+
+And back in the first shell we see:
+
+ Hit SIGUSR1 - starting debugger agent.
+ debugger listening on port 5858[2011-09-13 19:20:38.265 UTC] CRIT PANIC:
+ explicit panic: EXCEPTION: Error: Error: core dump initiated at user request
+ at caPanic (/Users/dap/work/node-panic/lib/panic.js:55:9)
+ at eval at func (/Users/dap/work/node-panic/examples/example-loop.js:9:23)
+ at ExecutionState.evaluateGlobal (native)
+ at DebugCommandProcessor.evaluateRequest_ (native)
+ at DebugCommandProcessor.processDebugJSONRequest (native)
+ at DebugCommandProcessor.processDebugRequest (native)
+ at func (/Users/dap/work/node-panic/examples/example-loop.js:9:23)
+ at Object.<anonymous>
+ (/Users/dap/work/node-panic/examples/example-loop.js:14:1)
+ at Module._compile (module.js:402:26)
+ at Object..js (module.js:408:10)
+ [2011-09-13 19:20:38.265 UTC] CRIT writing core dump to
+ /Users/dap/work/node-panic/ncore.1369
+ [2011-09-13 19:20:38.294 UTC] CRIT finished writing core dump
+
+And we now have a core dump from the process somewhere in the middle of the
+loop:
+
+ $ json < ncore.1369
+ {
+ "dbg.format-version": "0.1",
+ "init.process.argv": [
+ "node",
+ "/Users/dap/work/node-panic/examples/example-loop.js"
+ ],
+ "init.process.pid": 1369,
+ "init.process.cwd": "/Users/dap/work/node-panic",
+ ...
+ "func-iter": 604762552,
+ "panic.error": "EXCEPTION: Error: Error: core dump initiated at user request\n
+ at caPanic (/Users/dap/work/node-panic/lib/panic.js:55:9)\n at eval at func
+ (/Users/dap/work/node-panic/examples/example-loop.js:9:23)\n at
+ ExecutionState.evaluateGlobal (native)\n at
+ DebugCommandProcessor.evaluateRequest_ (native)\n at
+ DebugCommandProcessor.processDebugJSONRequest (native)\n at
+ DebugCommandProcessor.processDebugRequest (native)\n at func
+ (/Users/dap/work/node-panic/examples/example-loop.js:9:23)\n at
+ Object.<anonymous> (/Users/dap/work/node-panic/examples/example-loop.js:14:1)\n
+ at Module._compile (module.js:402:26)\n at Object..js (module.js:408:10)",
+ }
+
+
Notes
-----
View
159 cmd/ncore.js
@@ -0,0 +1,159 @@
+#!/usr/bin/env node
+
+/*
+ * cacore.js: generate a core file from a running node program. See usage.
+ */
+
+var mod_child = require('child_process');
+var mod_debug = require('_debugger');
+var mod_net = require('net');
+var mod_subr = require('../lib/subr');
+
+var cacPid;
+var cacClient;
+var cacStages = [];
+var cacTries = 30;
+var cacSignaled = false;
+var cacUsage = mod_subr.caSprintf([
+ 'usage: %s %s PID',
+ '',
+ 'Cause the specified process to dump core and exit. The target process ',
+ 'MUST be a node process and MUST contain the symbol "caPanic". ',
+ 'Additionally, no other node process on the system may be running under ',
+ 'the debugger. That is, the node debug port (5858) must be available. ',
+ 'This tool will attempt to verify these conditions, but such checks are ',
+ 'necessarily subject to races and so should not be relied upon.'
+].join('\n'), process.argv[0], process.argv[1]);
+
+cacStages.push(cacCheckArgs);
+cacStages.push(cacCheckTarget);
+cacStages.push(cacCheckPort);
+cacStages.push(cacDebugEnable);
+cacStages.push(cacDebugConnect);
+cacStages.push(cacCheckPid);
+cacStages.push(cacSendPanic);
+
+function die()
+{
+ var msg = mod_subr.caSprintf.apply(null, arguments);
+
+ console.error('%s', msg);
+
+ if (cacSignaled)
+ console.error('WARNING: SIGUSR1 sent to pid %s, but ' +
+ 'debug attach failed.', cacPid);
+
+ process.exit(1);
+}
+
+function cacCheckArgs(unused, next)
+{
+ if (process.argv.length < 3)
+ die(cacUsage);
+
+ cacPid = process.argv[2];
+ next();
+}
+
+function cacCheckTarget(unused, next)
+{
+ var cmd = mod_subr.caSprintf('pargs %s | grep "argv\\[0\\]"', cacPid);
+
+ mod_child.exec(cmd, function (error, stdout, stderr) {
+ if (error) {
+ if (!/pargs: command not found/.test(stderr))
+ die('pargs code %s: %s', error.code, stderr);
+
+ console.error('WARNING: no "pargs" present; cannot ' +
+ 'confirm process %s is "node"', cacPid);
+ } else if (!/^argv\[0\]: (.*\/)?node\n$/.test(stdout))
+ die('target process is not node: %s', stdout);
+
+ next();
+ });
+}
+
+function cacCheckPort(unused, next)
+{
+ var server = mod_net.createServer(function () {});
+
+ server.on('error', function (err) {
+ die('debug port already in use (error %s)\n' +
+ 'won\'t try to attach to target', err.code);
+ });
+
+ server.listen(mod_debug.port, 'localhost', function () {
+ server.on('close', next);
+ server.close();
+ });
+}
+
+function cacDebugEnable(unused, next)
+{
+ process.kill(cacPid, 'SIGUSR1');
+ cacSignaled = true;
+ next();
+}
+
+function cacDebugConnect(unused, next)
+{
+ process.stderr.write(mod_subr.caSprintf(
+ 'attempting to attach to process %s ... ', cacPid));
+
+ cacClient = new mod_debug.Client();
+
+ cacClient.on('error', function (err) {
+ if (--cacTries === 0)
+ die('FAILED\nexceeded retry limit with error %s ',
+ err.code);
+
+ process.stderr.write('.');
+ setTimeout(function () {
+ cacClient.connect(mod_debug.port);
+ }, 1000);
+ });
+
+ cacClient.on('ready', function () {
+ process.stderr.write(' ok.\n');
+ next();
+ });
+
+ cacClient.connect(mod_debug.port);
+}
+
+function cacCheckPid(unused, next)
+{
+ cacClient.reqEval('process.pid', function (res) {
+ if (!res.success || res.body.type != 'number')
+ die('failed to get target pid: %j', res);
+
+ if (res.body.value != cacPid)
+ die('connected to wrong pid: %j', res.body.value);
+
+ next();
+ });
+}
+
+function cacSendPanic(unused, next)
+{
+ cacClient.reqEval('caPanic("core dump initiated at user request")',
+ function (res) {
+ if (!res.success)
+ die('core dump FAILED: %j', res);
+ die('core dumped');
+ });
+}
+
+function main()
+{
+ mod_subr.caRunStages(cacStages, null, function (err) {
+ if (err) {
+ die('fatal error: %r', err);
+ process.exit(1);
+ }
+
+ process.exit(0);
+ });
+}
+
+main();
View
14 examples/example-loop.js
@@ -0,0 +1,14 @@
+/*
+ * example-loop.js: example of using "ncore" tool to generate a node core
+ */
+
+var mod_panic = require('panic');
+
+function func()
+{
+ for (var ii = 0; ; ii++)
+ panicDbg.set('func-iter', ii);
+}
+
+console.log('starting infinite loop; use "ncore" tool to generate core');
+func();
View
6 lib/panic.js
@@ -225,6 +225,12 @@ caDebugState.prototype.dump = function ()
if (!global.panicDbg)
global.panicDbg = new caDebugState();
+/*
+ * We expose "caPanic" as a global for the "ncore" tool, which uses the debugger
+ * interface to invoke it.
+ */
+global.caPanic = caPanic;
+
exports.enablePanicOnCrash = caEnablePanicOnCrash;
exports.panic = caPanic;
exports.caPanicSave = caPanicSave; /* for testing only */
View
42 lib/subr.js
@@ -52,6 +52,47 @@ function caRemoveCircularRefs(obj)
}
/*
+ * caRunStages is given an array "stages" of functions, an initial argument
+ * "arg", and a callback "callback". Each stage represents some task,
+ * asynchronous or not, which should be completed before the next stage is
+ * started. Each stage is invoked with the result of the previous stage and can
+ * abort this process if it encounters an error. When all stages have
+ * completed, "callback" is invoked with the error and results of the last stage
+ * that was run.
+ *
+ * More precisely: the first function of "stages" may be invoked during
+ * caRunStages or immediately after (asynchronously). Each stage is invoked as
+ * stage(arg, callback), where "arg" is the result of the previous stage (or
+ * the "arg" specified to caRunStages, for the first stage) and "callback"
+ * should be invoked when the stage is complete. "callback" should be invoked
+ * as callback(err, result), where "err" is a non-null instance of Error iff an
+ * error was encountered and null otherwise, and "result" is an arbitrary object
+ * to be passed to the next stage. The "callback" given to caRunStages is
+ * invoked after the last stage has been run with the arguments given to that
+ * stage's completion callback.
+ */
+function caRunStages(stages, arg, callback)
+{
+ var stage, next;
+
+ next = function (err, result) {
+ var nextfunc;
+
+ if (err)
+ return (callback(err, result));
+
+ nextfunc = stages[stage++];
+ if (!nextfunc)
+ return (callback(null, result));
+
+ return (nextfunc(result, next));
+ };
+
+ stage = 0;
+ next(null, arg);
+}
+
+/*
* Stripped down version of s[n]printf(3c). We make a best effort to throw an
* exception when given a format string we don't understand, rather than
* ignoring it, so that we won't break existing programs if/when we go implement
@@ -205,4 +246,5 @@ function dumpException(ex)
exports.caFormatDate = caFormatDate;
exports.caRemoveCircularRefs = caRemoveCircularRefs;
+exports.caRunStages = caRunStages;
exports.caSprintf = caSprintf;
View
5 package.json
@@ -1,6 +1,6 @@
{
"name": "panic",
- "version": "0.1.0",
+ "version": "0.1.1",
"description": "Postmortem debugging facility",
"author": "Joyent (joyent.com)",
"engines": { "node": "*" },
@@ -8,5 +8,8 @@
"repository": {
"type": "git",
"url": "http://github.com/joyent/node-panic.git"
+ },
+ "bin": {
+ "ncore": "cmd/ncore.js"
}
}
Please sign in to comment.
Something went wrong with that request. Please try again.