Skip to content

Commit

Permalink
OS-5442 metadata agent cannot deal with stale sockets showing up post…
Browse files Browse the repository at this point in the history
… OS-4897

Reviewed by: Julien Gilli <julien.gilli@joyent.com>
  • Loading branch information
joshwilsdon committed Jun 8, 2016
1 parent 28a2e8d commit d8bf58a
Show file tree
Hide file tree
Showing 4 changed files with 459 additions and 147 deletions.
1 change: 1 addition & 0 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ JS_CHECK_TARGETS=\
vm/tests/test-openonerrlogger.js \
vm/tests/test-reboot.js \
vm/tests/test-reprovision.js \
vm/tests/test-snapshots.js \
vm/tests/test-spoof-opts.js \
vm/tests/test-tmpfs.js \
vm/tests/test-update.js \
Expand Down
147 changes: 119 additions & 28 deletions src/vm/lib/metadata/agent.js
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,9 @@ function zoneExists(zonename, callback) {
}

/*
* Takes a zoneConnections entry (or undefined) and calls callback with:
* Takes a zoneConnections entry (or undefined) and an 'opts' object that
* contains at least a 'log' property which is a bunyan logger, and then
* calls callback with:
*
* callback(<Error Object>);
*
Expand All @@ -228,35 +230,124 @@ function zoneExists(zonename, callback) {
*
* callback(null, true);
*
* - when the conn entry has a sockpath that's been removed.
* - when the conn entry has a sockpath that's been removed or
* when the conn entry sockpath has a different fs.stat() signature
*
*/
function checkStaleSocket(conn, callback) {
assert.optionalObject(conn, 'conn');
function checkStaleSocket(zoneConn, opts, callback) {
assert.optionalObject(zoneConn, 'zoneConn');
assert.object(opts, 'opts');
assert.object(opts.log, 'opts.log');
assert.func(callback, 'callback');

if (!conn || !conn.sockpath) {
// if there's no conn, it can't be stale
if (!zoneConn || !zoneConn.sockpath) {
// if there's no zoneConn, it can't be stale
callback(null, false);
return;
}

assert.string(conn.sockpath, 'conn.sockpath');
assert.string(zoneConn.sockpath, 'zoneConn.sockpath');
assert.object(zoneConn.sockstat, 'zoneConn.sockstat');

fs.stat(zoneConn.sockpath, function _onSockpathStat(err, stats) {
var field;
var fields = ['dev', 'ino']; // fields to compare in fs.Stats

fs.stat(conn.sockpath, function _onSockpathStat(err, stats) {
if (err) {
if (err.code === 'ENOENT') {
opts.log.trace({
sockpath: zoneConn.sockpath
}, 'ENOENT on sockpath: stale');
callback(null, true); // stale
return;
}
callback(err);
return;
}
callback(null, false); // no error in stat means exists: not stale

// Check for changes in the fs.stat() signature since we created this
// socket. If it has changed, that means our handle to it is stale and
// we should recreate it.
for (field = 0; field < fields.length; field++) {
if (zoneConn.sockstat[fields[field]] !== stats[fields[field]]) {
opts.log.debug({
field: fields[field],
sockpath: zoneConn.sockpath,
new_sockstat: stats,
old_sockstat: zoneConn.sockstat
}, 'change in sockpath fs.stat signature: stale');
callback(null, true); // stale
return;
}
}
if ((zoneConn.sockstat.ctime
&& zoneConn.sockstat.ctime.getTime()) !== stats.ctime.getTime()) {
opts.log.debug({
field: 'ctime',
sockpath: zoneConn.sockpath,
new_sockstat: stats,
old_sockstat: zoneConn.sockstat
}, 'change in sockpath fs.stat signature: stale');
callback(null, true); // stale
return;
}

// no error in stat, and no diff fields means exists: not stale
opts.log.trace({
sockpath: zoneConn.sockpath
}, 'sockpath still exists and fs.stat signature matches: not stale');
callback(null, false);
return;
});
}

/*
* This function does an fs.stat() on the 'sockpath' of the zoneConn and
* attaches the fs.Stats result to the 'zoneConn' object as .sockstat. If there
* is an error with fs.stat() conn.sockstat will be set to an empty object.
*
* After the stat has completed, callback() will be called. Any error from
* fs.stat() will be passed as the first and only argument to callback().
*/
function addConnSockStat(zoneConn, callback) {
assert.object(zoneConn, 'zoneConn');
assert.string(zoneConn.sockpath, 'zoneConn.sockpath');
assert.func(callback, 'callback');

fs.stat(zoneConn.sockpath, function _statSock(e, st) {
if (e) {
// If there was an error w/ the stat, it's most likely because
// the state of the world has changed. We'll fill in sockstat
// with an empty object here so that checkStaleSocket will report
// this as stale.
zoneConn.sockstat = {};
} else {
zoneConn.sockstat = st;
}
callback(e);
});
}

function closeZoneConnection(zoneConn) {
assert.object(zoneConn, 'zoneConn');

// Ensure we don't have *both* .serverSocket and .conn but that we do have
// at least one of the two.
assert.ok(!(zoneConn.serverSocket && zoneConn.conn),
'should not have both .conn and .serverSocket');
assert.ok((zoneConn.serverSocket || zoneConn.conn),
'should have either .serverSocket or .conn');

// .serverSocket is a net.Server for non-KVM
if (zoneConn.serverSocket) {
zoneConn.serverSocket.close();
}
// .conn is a net.Socket client connection to a KVM/qemu device
if (zoneConn.conn) {
zoneConn.conn.destroy();
}
}

/*
* Call as:
*
Expand Down Expand Up @@ -509,9 +600,7 @@ MetadataAgent.prototype.purgeZoneCache = function purgeZoneCache(zonename) {
delete self.zlog[zonename];
}
if (self.zoneConnections.hasOwnProperty(zonename)) {
if (self.zoneConnections[zonename].conn) {
self.zoneConnections[zonename].conn.close();
}
closeZoneConnection(self.zoneConnections[zonename]);
delete self.zoneConnections[zonename];
}
if (self.zones.hasOwnProperty(zonename)) {
Expand All @@ -537,9 +626,9 @@ MetadataAgent.prototype.checkMissedSysevents = function checkMissedSysevents() {
}, 'loaded VM kstats');

Object.keys(results).forEach(function (zonename) {
var conn = self.zoneConnections[zonename]; // may be undefined
var zoneConn = self.zoneConnections[zonename]; // may be undefined

checkStaleSocket(conn, function (e, isStale) {
checkStaleSocket(zoneConn, {log: self.log}, function (e, isStale) {
if (e) {
// This currently can only happen when fs.stat fails. We'll
// just have to assume the socket is not stale if we can't
Expand All @@ -549,9 +638,8 @@ MetadataAgent.prototype.checkMissedSysevents = function checkMissedSysevents() {
} else if (isStale) {
self.log.debug({zonename: zonename}, 'stale socket detected'
+ ' cleaning up');
if (self.zoneConnections[zonename].conn) {
self.zoneConnections[zonename].conn.close();
}

closeZoneConnection(self.zoneConnections[zonename]);
delete self.zoneConnections[zonename];
_assumeBooted(zonename);
} else if (!self.zones[zonename]) {
Expand Down Expand Up @@ -688,15 +776,15 @@ MetadataAgent.prototype.start = function () {
self.startPeriodicChecks();

zwatch.on('zone_transition', function (msg) {
var conn = self.zoneConnections[msg.zonename];
var zoneConn = self.zoneConnections[msg.zonename];
var when = new Date(msg.when / 1000000);

// ignore everything except start
if (msg.cmd !== 'start') {
return;
}

checkStaleSocket(conn, function (e, isStale) {
checkStaleSocket(zoneConn, {log: self.log}, function (e, isStale) {
if (e) {
// This currently can only happen when fs.stat fails. We'll
// just have to assume the socket is not stale if we can't
Expand All @@ -706,9 +794,7 @@ MetadataAgent.prototype.start = function () {
} else if (isStale) {
self.log.debug({zonename: msg.zonename},
'stale socket detected cleaning up');
if (self.zoneConnections[msg.zonename].conn) {
self.zoneConnections[msg.zonename].conn.close();
}
closeZoneConnection(self.zoneConnections[msg.zonename]);
delete self.zoneConnections[msg.zonename];
}

Expand Down Expand Up @@ -813,11 +899,13 @@ MetadataAgent.prototype.createKVMServer = function (zopts, callback) {
}
self.zoneConnections[zopts.zone] = {};

kvmstream = new net.Stream();
kvmstream = new net.Socket();

// refuse to overwrite an existing connection
assert.ok(!self.zoneConnections[zopts.zone].hasOwnProperty('conn'),
'should not have existing connection when creating new');
'should not have existing conn when creating new');
assert.ok(!self.zoneConnections[zopts.zone].hasOwnProperty('serverSocket'),
'should not have existing serverSocket when creating new');

// replace the placeholder entry with a real one.
self.zoneConnections[zopts.zone] = {
Expand Down Expand Up @@ -857,7 +945,7 @@ MetadataAgent.prototype.createKVMServer = function (zopts, callback) {
zlog.info('listening on fd %d', fd);
self.zoneConnections[zopts.zone].fd = fd;

callback();
addConnSockStat(self.zoneConnections[zopts.zone], callback);
};

MetadataAgent.prototype.startZoneSocketServer =
Expand Down Expand Up @@ -1077,10 +1165,13 @@ function createZoneSocket(zopts, callback) {

// refuse to overwrite an existing connection
assert.ok(!self.zoneConnections[zopts.zone].hasOwnProperty('conn'),
'should not have existing connection when creating new');
'should not have existing conn when creating new');
assert.ok(!self.zoneConnections[zopts.zone]
.hasOwnProperty('serverSocket'),
'should not have existing serverSocket when creating new');

self.zoneConnections[zopts.zone] = {
conn: server,
serverSocket: server,
fd: fd, // so it's in the core for debugging
sockpath: path.join(zopts.zoneroot, zopts.path)
};
Expand Down Expand Up @@ -1121,7 +1212,7 @@ function createZoneSocket(zopts, callback) {
zlog.info('listening on fd %d', fd);
self.addDebug(zopts.zone, 'last_zsock_listen_success');

callback();
addConnSockStat(self.zoneConnections[zopts.zone], callback);
});
});
};
Expand Down
3 changes: 2 additions & 1 deletion src/vm/runtest
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ done
imgadm sources --add-docker-hub
export DOCKER_ALPINE_UUID=$(imgadm list -o uuid,tags \
| grep "\"docker:repo\":\"alpine\"" \
| cut -d' ' -f1)
| cut -d' ' -f1 \
| head -1)
if [[ -z ${DOCKER_ALPINE_UUID} ]]; then
imgadm import alpine:latest
export DOCKER_ALPINE_UUID=$(imgadm list -o uuid,tags \
Expand Down
Loading

0 comments on commit d8bf58a

Please sign in to comment.