Skip to content
This repository has been archived by the owner on Sep 15, 2021. It is now read-only.

Commit

Permalink
Bug 746042 - Clientproxy should ensure buildbot stops after every job…
Browse files Browse the repository at this point in the history
… run on a tegra. r=bear
  • Loading branch information
Justin Wood committed Apr 13, 2012
1 parent b162cd7 commit 282a3f6
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 6 deletions.
29 changes: 26 additions & 3 deletions sut_tools/clientproxy.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@
import datetime import datetime
import traceback import traceback
import subprocess import subprocess
import datetime


from optparse import OptionParser from optparse import OptionParser
from Queue import Empty from Queue import Empty
from logging.handlers import RotatingFileHandler from logging.handlers import RotatingFileHandler
from multiprocessing import Process, Queue, current_process, get_logger, log_to_stderr from multiprocessing import Process, Queue, current_process, get_logger, log_to_stderr


from sut_lib import checkSlaveAlive, checkSlaveActive, stopSlave, getOurIP, getIPAddress, \ from sut_lib import checkSlaveAlive, checkSlaveActive, stopSlave, getOurIP, \
dumpException, runCommand, loadOptions, getLastLine, setFlag getIPAddress, dumpException, runCommand, loadOptions, \
getLastLine, setFlag, clearFlag, gracefulSlave




"""clientproxy.py """clientproxy.py
Expand Down Expand Up @@ -241,6 +243,7 @@ def monitorEvents(options, events):
pidFile = os.path.join(options.bbpath, 'twistd.pid') pidFile = os.path.join(options.bbpath, 'twistd.pid')
flagFile = os.path.join(options.bbpath, 'proxy.flg') flagFile = os.path.join(options.bbpath, 'proxy.flg')
errorFile = os.path.join(options.bbpath, 'error.flg') errorFile = os.path.join(options.bbpath, 'error.flg')
forceRebootFile = os.path.join(options.bbpath, 'forceReboot.flg')
bbEnv = { 'PATH': os.getenv('PATH'), bbEnv = { 'PATH': os.getenv('PATH'),
'SUT_NAME': options.tegra, 'SUT_NAME': options.tegra,
'SUT_IP': options.tegraIP, 'SUT_IP': options.tegraIP,
Expand Down Expand Up @@ -324,11 +327,21 @@ def monitorEvents(options, events):
if nChatty > maxChatty: if nChatty > maxChatty:
nChatty = 0 nChatty = 0


if state == 'reboot': if state == 'forceReboot':
log.warning("Force Reboot Necessary, Stopping Buildslave")
if bbActive:
stopSlave(pidFile)
bbActive = False
if tegraActive:
sendReboot(options.tegraIP, sutDataPort)
elif state == 'reboot':
tegraActive = False tegraActive = False
if not os.path.isfile(flagFile): if not os.path.isfile(flagFile):
log.warning('Tegra rebooting, stopping buildslave') log.warning('Tegra rebooting, stopping buildslave')
events.put(('stop',)) events.put(('stop',))
if os.path.isfile(forceRebootFile):
log.debug("Told to Force Reboot but we noticed tegra rebooting, clearing")
clearFlag(forceRebootFile)
elif state == 'stop' or state == 'offline': elif state == 'stop' or state == 'offline':
stopSlave(pidFile) stopSlave(pidFile)
bbActive = False bbActive = False
Expand Down Expand Up @@ -444,6 +457,16 @@ def monitorEvents(options, events):
else: else:
log.warning('buildslave should be active but pidfile not found, marking as offline') log.warning('buildslave should be active but pidfile not found, marking as offline')
events.put(('offline',)) events.put(('offline',))

if os.path.isfile(forceRebootFile):
n = datetime.datetime.now()
forceRebootTS = datetime.datetime.strptime(
getLastLine(forceRebootFile)[:19],
'%Y-%m-%d %H:%M:%S')
if forceRebootTS + datetime.timedelta(minutes=10) < n:
events.put(('forceReboot',))
gracefulSlave(options.tegra)
clearFlag(forceRebootFile)
else: else:
if os.path.isfile(pidFile): if os.path.isfile(pidFile):
if checkSlaveAlive(options.bbpath): if checkSlaveAlive(options.bbpath):
Expand Down
13 changes: 10 additions & 3 deletions sut_tools/reboot.py
Original file line number Original file line Diff line number Diff line change
@@ -1,12 +1,17 @@
#!/usr/bin/env python #!/usr/bin/env python


# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.


import os, sys import os, sys
import devicemanagerSUT as devicemanager import devicemanagerSUT as devicemanager
import socket import socket
import random import random
import time import time
import datetime
from sut_lib import getOurIP, calculatePort, clearFlag, setFlag, waitForDevice from sut_lib import getOurIP, calculatePort, clearFlag, setFlag, waitForDevice
from sut_lib import getLastLine


if (len(sys.argv) <> 2): if (len(sys.argv) <> 2):
print "usage: reboot.py <ip address>" print "usage: reboot.py <ip address>"
Expand All @@ -15,9 +20,13 @@
cwd = os.getcwd() cwd = os.getcwd()
proxyFile = os.path.join(cwd, '..', 'proxy.flg') proxyFile = os.path.join(cwd, '..', 'proxy.flg')
errorFile = os.path.join(cwd, '..', 'error.flg') errorFile = os.path.join(cwd, '..', 'error.flg')
forceRebootFile = os.path.join(cwd, '..', 'forceReboot.flg')
proxyIP = getOurIP() proxyIP = getOurIP()
proxyPort = calculatePort() proxyPort = calculatePort()


stringedNow = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
setFlag(forceRebootFile, stringedNow)

print "connecting to: %s" % sys.argv[1] print "connecting to: %s" % sys.argv[1]
dm = devicemanager.DeviceManagerSUT(sys.argv[1]) dm = devicemanager.DeviceManagerSUT(sys.argv[1])
dm.debug = 5 dm.debug = 5
Expand All @@ -35,13 +44,11 @@
try: try:
waitForDevice(dm, waitTime=600) waitForDevice(dm, waitTime=600)
except SystemExit: except SystemExit:
clearFlag(proxyFile)
setFlag(errorFile, "Remote Device Error: call for device reboot failed") setFlag(errorFile, "Remote Device Error: call for device reboot failed")
sys.exit(1)

clearFlag(proxyFile) clearFlag(proxyFile)


sys.stdout.flush() sys.stdout.flush()
time.sleep(20*60) # Let Buildbot Die or ForcedRebootFlag kill us


#if status is None or not status: #if status is None or not status:
# print "Remote Device Error: call for device reboot failed" # print "Remote Device Error: call for device reboot failed"
Expand Down
67 changes: 67 additions & 0 deletions sut_tools/sut_lib.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
import traceback import traceback
import subprocess import subprocess
import random import random
import re
import urllib2
import cStringIO as StringIO
import gzip
import devicemanagerSUT as devicemanager import devicemanagerSUT as devicemanager


from optparse import OptionParser from optparse import OptionParser
Expand Down Expand Up @@ -591,3 +595,66 @@ def loadOptions(defaults=None):


return options return options


class DefaultErrorHandler(urllib2.HTTPDefaultErrorHandler):
def http_error_default(self, req, fp, code, msg, headers):
result = urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
result.status = code
return result

def fetchUrl(url, debug=False):
result = None
opener = urllib2.build_opener(DefaultErrorHandler())
opener.addheaders.append(('Accept-Encoding', 'gzip'))

try:
response = opener.open(url)
raw_data = response.read()

if response.headers.get('content-encoding', None) == 'gzip':
result = gzip.GzipFile(fileobj=StringIO.StringIO(raw_data)).read()
else:
result = raw_data
except:
log.error('Error fetching url [%s]' % url, exc_info=True)

return result

def gracefulSlave(tegra):
# An adapted version of graceful_shutdown from briar-patch remote.py
tegra_dir = os.path.join("/builds", tegra)
def get_tac():
log.debug("Determining host's master")
p, o = runCommand(["cat", "%s/buildbot.tac" % tegra_dir])
data = "\r\n".join(o) + "\r\n"
master = re.search('^buildmaster_host\s*=\s*["\'](.*)["\']', data, re.M)
port = re.search('^port\s*=\s*(\d+)', data, re.M)
host = re.search('^slavename\s*=\s*["\'](.*)["\']', data, re.M)
if master and port and host:
return master.group(1), int(port.group(1)), host.group(1)

tacinfo = get_tac()

if tacinfo is None:
log.error("Couldn't get info from buildbot.tac; host is disabled?")
return False

host, port, hostname = tacinfo
# HTTP port is host port - 1000
port -= 1000
# Look at the host's page
url = "http://%s:%i/buildslaves/%s" % (host, port, hostname)
log.info("Fetching host page %s" % url)
data = fetchUrl('%s?numbuilds=0' % url)

if data is None:
return False

if "Graceful Shutdown" not in data:
log.error("no shutdown form for %s" % (self.hostname))

log.info("Setting Graceful Directive")
data = fetchUrl("%s/shutdown" % url)
if data is None:
return False

return True

0 comments on commit 282a3f6

Please sign in to comment.