Permalink
Browse files

Bug 746042 - Clientproxy should ensure buildbot stops after every job…

… run on a tegra. r=bear

--HG--
extra : rebase_source : 74dfafe71c56a697ab4312f1c89f6a699aa2259d
  • Loading branch information...
1 parent 957e453 commit f1d4994bc83ac4e4c5422ef530a42f5fe71aeffa Justin Wood committed Apr 13, 2012
Showing with 104 additions and 6 deletions.
  1. +26 −3 sut_tools/clientproxy.py
  2. +11 −3 sut_tools/reboot.py
  3. +67 −0 sut_tools/sut_lib.py
View
@@ -19,14 +19,16 @@
import datetime
import traceback
import subprocess
+import datetime
from optparse import OptionParser
from Queue import Empty
from logging.handlers import RotatingFileHandler
from multiprocessing import Process, Queue, current_process, get_logger, log_to_stderr
-from sut_lib import checkSlaveAlive, checkSlaveActive, stopSlave, getOurIP, getIPAddress, \
- dumpException, runCommand, loadOptions, getLastLine, setFlag
+from sut_lib import checkSlaveAlive, checkSlaveActive, stopSlave, getOurIP, \
+ getIPAddress, dumpException, runCommand, loadOptions, \
+ getLastLine, setFlag, clearFlag, gracefulSlave
"""clientproxy.py
@@ -241,6 +243,7 @@ def monitorEvents(options, events):
pidFile = os.path.join(options.bbpath, 'twistd.pid')
flagFile = os.path.join(options.bbpath, 'proxy.flg')
errorFile = os.path.join(options.bbpath, 'error.flg')
+ forceRebootFile = os.path.join(options.bbpath, 'forceReboot.flg')
bbEnv = { 'PATH': os.getenv('PATH'),
'SUT_NAME': options.tegra,
'SUT_IP': options.tegraIP,
@@ -323,11 +326,21 @@ def monitorEvents(options, events):
if nChatty > maxChatty:
nChatty = 0
- if state == 'reboot':
+ if state == 'forceReboot':
+ log.warning("Force Reboot Necessary, Stopping Buildslave")
+ if bbActive:
+ stopSlave(pidFile)
+ bbActive = False
+ if tegraActive:
+ sendReboot(options.tegraIP, sutDataPort)
+ elif state == 'reboot':
tegraActive = False
if not os.path.isfile(flagFile):
log.warning('Tegra rebooting, stopping buildslave')
events.put(('stop',))
+ if os.path.isfile(forceRebootFile):
+ log.debug("Told to Force Reboot but we noticed tegra rebooting, clearing")
+ clearFlag(forceRebootFile)
elif state == 'stop' or state == 'offline':
stopSlave(pidFile)
bbActive = False
@@ -438,6 +451,16 @@ def monitorEvents(options, events):
else:
log.warning('buildslave should be active but pidfile not found, marking as offline')
events.put(('offline',))
+
+ if os.path.isfile(forceRebootFile):
+ n = datetime.datetime.now()
+ forceRebootTS = datetime.datetime.strptime(
+ getLastLine(forceRebootFile)[:19],
+ '%Y-%m-%d %H:%M:%S')
+ if forceRebootTS + datetime.timedelta(minutes=10) < n:
+ events.put(('forceReboot',))
+ gracefulSlave(options.tegra)
+ clearFlag(forceRebootFile)
else:
if os.path.isfile(pidFile):
if checkSlaveAlive(options.bbpath):
View
@@ -1,12 +1,17 @@
#!/usr/bin/env python
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
import os, sys
import devicemanagerSUT as devicemanager
import socket
import random
import time
+import datetime
from sut_lib import getOurIP, calculatePort, clearFlag, setFlag, waitForDevice
+from sut_lib import getLastLine
if (len(sys.argv) <> 2):
print "usage: reboot.py <ip address>"
@@ -15,9 +20,13 @@
cwd = os.getcwd()
proxyFile = os.path.join(cwd, '..', 'proxy.flg')
errorFile = os.path.join(cwd, '..', 'error.flg')
+forceRebootFile = os.path.join(cwd, '..', 'forceReboot.flg')
proxyIP = getOurIP()
proxyPort = calculatePort()
+stringedNow = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+setFlag(forceRebootFile, stringedNow)
+
print "connecting to: %s" % sys.argv[1]
dm = devicemanager.DeviceManagerSUT(sys.argv[1])
dm.debug = 5
@@ -35,12 +44,11 @@
try:
waitForDevice(dm, waitTime=600)
except SystemExit:
- clearFlag(proxyFile)
setFlag(errorFile, "Remote Device Error: call for device reboot failed")
- sys.exit(1)
-
clearFlag(proxyFile)
+time.sleep(20*60) # Let Buildbot Die or ForcedRebootFlag kill us
+
#if status is None or not status:
# print "Remote Device Error: call for device reboot failed"
# sys.exit(1)
View
@@ -17,6 +17,10 @@
import traceback
import subprocess
import random
+import re
+import urllib2
+import cStringIO as StringIO
+import gzip
import devicemanagerSUT as devicemanager
from optparse import OptionParser
@@ -583,3 +587,66 @@ def loadOptions(defaults=None):
return options
+class DefaultErrorHandler(urllib2.HTTPDefaultErrorHandler):
+ def http_error_default(self, req, fp, code, msg, headers):
+ result = urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
+ result.status = code
+ return result
+
+def fetchUrl(url, debug=False):
+ result = None
+ opener = urllib2.build_opener(DefaultErrorHandler())
+ opener.addheaders.append(('Accept-Encoding', 'gzip'))
+
+ try:
+ response = opener.open(url)
+ raw_data = response.read()
+
+ if response.headers.get('content-encoding', None) == 'gzip':
+ result = gzip.GzipFile(fileobj=StringIO.StringIO(raw_data)).read()
+ else:
+ result = raw_data
+ except:
+ log.error('Error fetching url [%s]' % url, exc_info=True)
+
+ return result
+
+def gracefulSlave(tegra):
+ # An adapted version of graceful_shutdown from briar-patch remote.py
+ tegra_dir = os.path.join("/builds", tegra)
+ def get_tac():
+ log.debug("Determining host's master")
+ p, o = runCommand(["cat", "%s/buildbot.tac" % tegra_dir])
+ data = "\r\n".join(o) + "\r\n"
+ master = re.search('^buildmaster_host\s*=\s*["\'](.*)["\']', data, re.M)
+ port = re.search('^port\s*=\s*(\d+)', data, re.M)
+ host = re.search('^slavename\s*=\s*["\'](.*)["\']', data, re.M)
+ if master and port and host:
+ return master.group(1), int(port.group(1)), host.group(1)
+
+ tacinfo = get_tac()
+
+ if tacinfo is None:
+ log.error("Couldn't get info from buildbot.tac; host is disabled?")
+ return False
+
+ host, port, hostname = tacinfo
+ # HTTP port is host port - 1000
+ port -= 1000
+ # Look at the host's page
+ url = "http://%s:%i/buildslaves/%s" % (host, port, hostname)
+ log.info("Fetching host page %s" % url)
+ data = fetchUrl('%s?numbuilds=0' % url)
+
+ if data is None:
+ return False
+
+ if "Graceful Shutdown" not in data:
+ log.error("no shutdown form for %s" % (self.hostname))
+
+ log.info("Setting Graceful Directive")
+ data = fetchUrl("%s/shutdown" % url)
+ if data is None:
+ return False
+
+ return True

0 comments on commit f1d4994

Please sign in to comment.