diff --git a/config.ini.example b/config.ini.example index ba9557b..f869131 100644 --- a/config.ini.example +++ b/config.ini.example @@ -86,6 +86,11 @@ keep = 50 # corresponds to 15 minutes timeout = 900 +[firefox] +# How long to wait for firefox before killing it, in seconds. The value below +# corresponds to 15 minutes +timeout = 900 + [mqproxy] # Where to keep the proxy database for windows queue entries db = /Users/hurley/src/stoneridge/testroot/mqproxy.db @@ -112,6 +117,10 @@ download_suffix = dmg # firefox for win and linux, and FirefoxNightly.app/Contents/MacOS for mac firefox_path = FirefoxNightly.app/Contents/MacOS +# The name of the firefox binary, firefox for mac and linux, firefox.exe +# for windows +firefox = firefox + # The name of the xpcshell binary, xpcshell for mac and linux, xpcshell.exe # for windows xpcshell = xpcshell diff --git a/head.js b/head.js index ffb64e3..58eb896 100644 --- a/head.js +++ b/head.js @@ -7,36 +7,24 @@ * test suite. This must be run under xpcshell running in JS v1.8 mode. */ +/*jshint curly:true, indent:4, latedef:true, undef:true, + trailing:true, es5:true, esnext:true*/ +/*global Components:true, run_test:true, _SR_OUT_FILE:true, + do_save_results:true*/ + var STONERIDGE_FINISHED = null; -var STONERIDGE_RESULTS = null; var Cc = Components.classes; var Ci = Components.interfaces; var Cr = Components.results; -/* - * Store some results for writing once we're all done - */ -function do_write_result(key, start, stop) { - var startms = start.valueOf(); - var stopms = stop.valueOf(); - - var val = {'start':startms, 'stop':stopms, 'total':stopms - startms}; - - if (STONERIDGE_RESULTS.hasOwnProperty(key)) { - STONERIDGE_RESULTS[key].push(val); - } else { - STONERIDGE_RESULTS[key] = [val]; - } -} - /* * This is used to indicate that the tests are done. Now that we know we're * done, we can write the results to disk for the python harness to do its thing * with. */ function do_test_finish() { - STONERIDGE_FINISHED = true; + STONERIDGE_FINISHED = true; } /* @@ -45,45 +33,27 @@ function do_test_finish() { */ function do_test_pending() {} -function _do_save_results() { - // Create a file pointing to our output directory - var ofile = Cc["@mozilla.org/file/local;1"].createInstance(Ci.nsILocalFile); - ofile.initWithPath(_SR_OUT_SUBDIR); - - // And use the file determined by our caller - ofile.append(_SR_OUT_FILE); - - // Now get an output stream for our file - var ostream = Cc["@mozilla.org/network/file-output-stream;1"]. - createInstance(Ci.nsIFileOutputStream); - ostream.init(ofile, -1, -1, 0); - - var jstring = JSON.stringify(STONERIDGE_RESULTS); - ostream.write(jstring, jstring.length); - ostream.close(); -} - function make_channel(url) { - var ios = Cc["@mozilla.org/network/io-service;1"].getService(Ci.nsIIOService); - return ios.newChannel(url, "", null); + var ios = Cc["@mozilla.org/network/io-service;1"].getService(Ci.nsIIOService); + return ios.newChannel(url, "", null); } /* * The main entry point for all stone ridge tests */ function do_stoneridge() { - STONERIDGE_FINISHED = false; - STONERIDGE_RESULTS = {}; + STONERIDGE_FINISHED = false; - run_test(); + run_test(); - // Pump the event loop until we're told to stop - var thread = Cc["@mozilla.org/thread-manager;1"]. - getService().currentThread; - while (!STONERIDGE_FINISHED) - thread.processNextEvent(true); - while (thread.hasPendingEvents()) - thread.processNextEvent(true); + // Pump the event loop until we're told to stop + var thread = Cc["@mozilla.org/thread-manager;1"].getService().currentThread; + while (!STONERIDGE_FINISHED) { + thread.processNextEvent(true); + } + while (thread.hasPendingEvents()) { + thread.processNextEvent(true); + } - _do_save_results(); + do_save_results(_SR_OUT_FILE); } diff --git a/linux/init/client/srworker b/linux/init/client/srworker deleted file mode 100755 index 5bf70e3..0000000 --- a/linux/init/client/srworker +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# -# srworker Stone Ridge worker process -# -# chkconfig: 2345 98 09 -# description: srworker is responsible for running tests - -### BEGIN INIT INFO -# Provides: srworker -# Required-Start: $local_fs $network -# Required-Stop: $local_fs $network -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Start and stop stoneridge worker -# Description: stoneridge worker runs tests -### END INIT INFO - -source /etc/default/stoneridge - -PID=$SRHOME/srworker.pid -LOG=$SRHOME/srworker.log - -start() { - python $SRRUN $SRROOT/srworker.py --config $CONFFILE --pidfile $PID --log $LOG -} - -stop() { - kill $(cat $PID) -} - -case "$1" in - start) - start - ;; - stop) - stop - ;; - restart|force-reload|reload) - stop - start - ;; - *) - echo "Usage: $0 {start|stop|restart|reload|force-reload}" - exit 2 -esac diff --git a/linux/user/etc_gdm_custom.conf b/linux/user/etc_gdm_custom.conf new file mode 100644 index 0000000..c79cab3 --- /dev/null +++ b/linux/user/etc_gdm_custom.conf @@ -0,0 +1,16 @@ +# GDM configuration storage + +[daemon] +AutomaticLoginEnable=true +AutomaticLogin=hurley + +[security] + +[xdmcp] + +[greeter] + +[chooser] + +[debug] + diff --git a/linux/user/srterm.py b/linux/user/srterm.py new file mode 100755 index 0000000..f6eb49f --- /dev/null +++ b/linux/user/srterm.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python + +import subprocess + +SRWRAPPER = '/home/hurley/srhome/stoneridge/linux/user/srwrapper.py' + +p = subprocess.Popen(['/usr/bin/gnome-terminal', '-t', 'Stone Ridge', + '-e', SRWRAPPER]) +p.wait() diff --git a/linux/user/srwrapper.py b/linux/user/srwrapper.py new file mode 100755 index 0000000..50d713e --- /dev/null +++ b/linux/user/srwrapper.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +import subprocess +import sys +import time + +SRHOME = '/home/hurley/srhome' +SRPYTHON = '%s/stoneridge' % (SRHOME,) +SRRUN = '%s/srrun.py' % (SRPYTHON,) +SRWORKER = '%s/srworker.py' % (SRPYTHON,) +SRINI = '%s/stoneridge.ini' % (SRHOME,) +LOG = '%s/srworker.log' % (SRHOME,) + +cli = [sys.executable, SRRUN, SRWORKER, '--config', SRINI, '--log', LOG] + +p = subprocess.Popen(cli) +p.wait() + +while True: + # Sleep indefinitely in case of failure, so we choose when to kill the + # terminal. This isn't particularly useful on the actual infrastructure, + # but it works great for debugging errors during testing. + time.sleep(60) diff --git a/linux/user/stoneridge.desktop b/linux/user/stoneridge.desktop new file mode 100644 index 0000000..cb3ba8f --- /dev/null +++ b/linux/user/stoneridge.desktop @@ -0,0 +1,10 @@ + +[Desktop Entry] +Type=Application +Exec=/home/hurley/srhome/stoneridge/linux/user/srterm.py +Hidden=false +X-GNOME-Autostart-enabled=true +Name[en_US]=Stone Ridge +Name=Stone Ridge +Comment[en_US]=Start a terminal running the stone ridge worker +Comment=Start a terminal running the stone ridge worker diff --git a/osx/org.mozilla.srworker.plist b/osx/org.mozilla.srworker.plist deleted file mode 100644 index a332a52..0000000 --- a/osx/org.mozilla.srworker.plist +++ /dev/null @@ -1,27 +0,0 @@ - - - - - Label - org.mozilla.srworker - ProgramArguments - - /usr/local/bin/python - /Users/hurley/srhome/stoneridge/srworker.py - --config - /Users/hurley/srhome/stoneridge.ini - --nodaemon - --log - /Users/hurley/srhome/srworker.log - - KeepAlive - - WorkingDirectory - /Users/hurley/srhome/stoneridge - EnvironmentVariables - - PATH - /usr/local/share/python:/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin - - - diff --git a/osx/terminal_setup.sh b/osx/terminal_setup.sh new file mode 100755 index 0000000..5d84c8e --- /dev/null +++ b/osx/terminal_setup.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +defaults write com.apple.Terminal 'Window Settings' -dict-add 'Stone Ridge' '{ CommandString = "/usr/local/bin/python /Users/hurley/srhome/stoneridge/srworker.py --config /Users/hurley/srhome/stoneridge.ini --log /Users/hurley/srhome/srworker.log"; ProfileCurrentVersion = "2.02"; RunCommandAsShell = 0; name = "Stone Ridge"; type = "Window Settings"; }' +defaults write com.apple.Terminal 'Default Window Settings' 'Stone Ridge' +defaults write com.apple.Terminal 'Startup Window Settings' 'Stone Ridge' +osascript -e 'tell app "System Events" +make login item at end with properties {path:"/Applications/Utilities/Terminal.app", hidden:false} +end tell' +echo "Setting auto login user (needs root privs)" +sudo defaults write /Library/Preferences/com.apple.loginwindow.plist autoLoginUser hurley diff --git a/pageloader/README b/pageloader/README new file mode 100644 index 0000000..3fb905c --- /dev/null +++ b/pageloader/README @@ -0,0 +1,65 @@ +Pageload Test Component +======================= + +Usage: + + ./firefox -tp file:///path/to/manifest.txt [-tpargs...] + +See ./firefox -help for other arguments. + + +Manifest file format +==================== + +Comments in the manifest file start with a #. Each line may be: + +* a URL (absolute or relative to the manifest) + +This URL is added to the list of tests. + +* one or more flags, followed by whitespace, followed by a URL + +The only flag supported currently is '%', which indicates that +a test will do its own timing. (See Self-timing Tests below.) + +* "include" followed by whitespace, followed by a URL + +Parse the given manifest file. + +Self-timing Tests +================= + +Most timing tests are interested in timing how long it takes the page +to load; that is, from the start of page loading until the 'load' +event is dispatched. By default, this is what the pageloader will +time. However, if a test URL has the % flag, the test is expected to +report its own timing. For this purpose, the pageloader will provide +a function named "tpRecordTime" in the test's global object that it +should call once it has performed whatever timing it wants to do. +The given value will be used as the timing result for this test. + +Output format +============= + +The result is a dump to stdout via dump() -- +browser.dom.window.dump.enabled must be set to true in the profile. + +Sample output: + +__start_tp_report +_x_x_mozilla_page_load,778.5,NaN,NaN +_x_x_mozilla_page_load_details,avgmedian|778.5|average|766.75|minimum|NaN|maximum|NaN|stddev|NaN|0;file:///c:/proj/mozilla-cvs/perf/tp2/base/www.cnn.com/index.html;778.5;766.75;722;1027;1027;788;777;722;780|... +__end_tp_report + +Note that the minimum, maximum, stddev are not calculated; they're +always reported as NaN. (They were the minimum and maximum values of +any sampled value, and the standard deviation across all sampled +values -- not very useful.) + +TODO +==== + +* Command line option to choose whether to run with or without browser chrome. Currently runs without. + +* Tinderbox-dropping style output + * better yet would be to teach tinderbox about JSON diff --git a/pageloader/chrome.manifest b/pageloader/chrome.manifest new file mode 100644 index 0000000..6ae8261 --- /dev/null +++ b/pageloader/chrome.manifest @@ -0,0 +1,4 @@ +content srpl chrome/ +component {E17FB86D-1CEB-4B67-8A6C-5B97AD068A7F} components/sr-cmdline.js +contract @mozilla.org/commandlinehandler/general-startup;1?type=sr {E17FB86D-1CEB-4B67-8A6C-5B97AD068A7F} +category command-line-handler m-sr @mozilla.org/commandlinehandler/general-startup;1?type=sr diff --git a/pageloader/chrome/srpl.js b/pageloader/chrome/srpl.js new file mode 100644 index 0000000..340fcd0 --- /dev/null +++ b/pageloader/chrome/srpl.js @@ -0,0 +1,301 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/*jshint es5:true, esnext:true*/ + +try { + if (Cc === undefined) { + var Cc = Components.classes; + var Ci = Components.interfaces; + } +} catch (ex) {} + +var winWidth = 1024; +var winHeight = 768; + +var pages; +var pageIndex; +var start_time; +var timeout = -1; +var delay = 250; +var timeoutEvent = -1; +var running = false; + +var useMozAfterPaint = false; +var gPaintListener = false; + +var content; + +var pageUrls; + +var outputFile = null; + +// the io service +var gIOS = null; + +function plInit() { + if (running) { + return; + } + running = true; + + try { + var args = window.arguments[0].wrappedJSObject; + + outputFile = args.outputfile; + if (!outputFile) { + dumpLine('sr: no output file, quitting'); + plStop(true); + } + + var manifestURI = args.manifest; + if (args.width) winWidth = parseInt(args.width, 10); + if (args.height) winHeight = parseInt(args.height, 10); + if (args.timeout) timeout = parseInt(args.timeout, 10); + if (args.delay) delay = parseInt(args.delay, 10); + if (args.mozafterpaint) useMozAfterPaint = true; + + gIOS = Cc["@mozilla.org/network/io-service;1"] + .getService(Ci.nsIIOService); + var fileURI = gIOS.newURI(manifestURI, null, null); + pages = plLoadURLsFromURI(fileURI); + + if (!pages) { + dumpLine('sr: could not load URLs, quitting'); + plStop(true); + } + + if (pages.length === 0) { + dumpLine('sr: no pages to test, quitting'); + plStop(true); + } + + pageUrls = pages.map(function(p) { return p.url; }); + + pageIndex = 0; + + window.resizeTo(winWidth, winHeight); + + content = document.getElementById('contentPageloader'); + + setTimeout(plLoadPage, delay); + } catch(e) { + dumpLine(e); + plStop(true); + } +} + +// load the current page, start timing +var removeLastAddedListener = null; +var removeLastAddedMsgListener = null; +function plLoadPage() { + var pageName = pages[pageIndex].url; + + if (removeLastAddedListener) + removeLastAddedListener(); + + if (removeLastAddedMsgListener) + removeLastAddedMsgListener(); + + // XXX we use a capturing event here -- load events don't bubble up + // to the element. See bug 390263. + content.addEventListener('load', plLoadHandler, true); + removeLastAddedListener = function() { + content.removeEventListener('load', plLoadHandler, true); + if (useMozAfterPaint) { + window.removeEventListener("MozAfterPaint", plPainted, true); + gPaintListener = false; + } + }; + + // If the test browser is remote (e10s / IPC) we need to use messages to watch for page load + if (content.getAttribute("remote") == "true") { + content.messageManager.addMessageListener('PageLoader:Load', plLoadHandlerMessage); + content.messageManager.addMessageListener('PageLoader:RecordTime', plRecordTimeMessage); + if (useMozAfterPaint) + content.messageManager.addMessageListener('PageLoader:MozAfterPaint', plPaintHandler); + removeLastAddedMsgListener = function() { + content.messageManager.removeMessageListener('PageLoader:Load', plLoadHandlerMessage); + content.messageManager.removeMessageListener('PageLoader:RecordTime', plRecordTimeMessage); + if (useMozAfterPaint) + content.messageManager.removeMessageListener('PageLoader:MozAfterPaint', plPaintHandler); + }; + } + + if (timeout > 0) { + timeoutEvent = setTimeout(loadFail, timeout); + } + startAndLoadURI(pageName); +} + +function startAndLoadURI(pageName) { + start_time = Date.now(); + content.loadURI(pageName); +} + +function loadFail() { + var pageName = pages[pageIndex].url; + dumpLine("__FAILTimeout exceeded on " + pageName + "__FAIL"); + plStop(true); +} + +function plNextPage() { + var doNextPage = false; + if (pageIndex < pages.length-1) { + pageIndex++; + doNextPage = true; + } + + if (doNextPage === true) { + // Force cycle collection (like you do) + var tccstart = new Date(); + window.QueryInterface(Components.interfaces.nsIInterfaceRequestor) + .getInterface(Components.interfaces.nsIDOMWindowUtils) + .garbageCollect(); + + setTimeout(plLoadPage, delay); + } else { + plStop(false); + } +} + +function plRecordTime(start, end) { + do_write_result(pageUrls[pageIndex], start, end); +} + +// the onload handler +function plLoadHandler(evt) { + // make sure we pick up the right load event + if (evt.type != 'load' || + evt.originalTarget.defaultView.frameElement) + return; + + content.removeEventListener('load', plLoadHandler, true); + setTimeout(waitForPainted, 0); +} + +// This is called after we have received a load event, now we wait for painted +function waitForPainted() { + + var utils = window.QueryInterface(Components.interfaces.nsIInterfaceRequestor) + .getInterface(Components.interfaces.nsIDOMWindowUtils); + + if (!utils.isMozAfterPaintPending || !useMozAfterPaint) { + _loadHandler(); + return; + } + + if (gPaintListener === false) + window.addEventListener("MozAfterPaint", plPainted, true); + gPaintListener = true; +} + +function plPainted() { + window.removeEventListener("MozAfterPaint", plPainted, true); + gPaintListener = false; + _loadHandler(); +} + +function _loadHandler() { + if (timeout > 0) { + clearTimeout(timeoutEvent); + } + var docElem = content.contentDocument.documentElement; + var width; + if ("getBoundingClientRect" in docElem) { + width = docElem.getBoundingClientRect().width; + } else if ("offsetWidth" in docElem) { + width = docElem.offsetWidth; + } + + var end_time = Date.now(); + + plRecordTime(start_time, end_time); + + plNextPage(); +} + +// the onload handler used for remote (e10s) browser +function plLoadHandlerMessage(message) { + _loadHandlerMessage(); +} + +// the mozafterpaint handler for remote (e10s) browser +function plPaintHandler(message) { + _loadHandlerMessage(); +} + +// the core handler for remote (e10s) browser +function _loadHandlerMessage() { + if (timeout > 0) { + clearTimeout(timeoutEvent); + } + + var end_time = Date.now(); + + if ((end_time - start_time) >= 0) { + plRecordTime(start_time, end_time); + + plNextPage(); + } +} + +// the record time handler used for remote (e10s) browser +function plRecordTimeMessage(message) { + gTime = message.json.time; + if (useMozAfterPaint) { + gStartTime = message.json.startTime; + } + _loadHandlerMessage(); +} + +function plStop(force) { + try { + if (force === false) { + pageIndex = 0; + + do_save_results(outputFile); + } + } catch (e) { + dumpLine(e); + } + + if (content) { + content.removeEventListener('load', plLoadHandler, true); + if (useMozAfterPaint) + content.removeEventListener("MozAfterPaint", plPainted, true); + + if (content.getAttribute("remote") == "true") { + content.messageManager.removeMessageListener('PageLoader:Load', plLoadHandlerMessage); + content.messageManager.removeMessageListener('PageLoader:RecordTime', plRecordTimeMessage); + if (useMozAfterPaint) + content.messageManager.removeMessageListener('PageLoader:MozAfterPaint', plPaintHandler); + + content.messageManager.loadFrameScript("data:,removeEventListener('load', _contentLoadHandler, true);", false); + } + } + + goQuitApplication(); +} + +/* Returns array */ +function plLoadURLsFromURI(manifestUri) { + var fstream = Cc["@mozilla.org/network/file-input-stream;1"] + .createInstance(Ci.nsIFileInputStream); + var uriFile = manifestUri.QueryInterface(Ci.nsIFileURL); + + fstream.init(uriFile.file, -1, 0, 0); + var istream = Cc["@mozilla.org/scriptableinputstream;1"] + .createInstance(Ci.nsIScriptableInputStream); + istream.init(fstream); + + var json = istream.read(istream.available()); + + return JSON.parse(json); +} + +function dumpLine(str) { + dump(str); + dump("\n"); +} diff --git a/pageloader/chrome/srpl.xul b/pageloader/chrome/srpl.xul new file mode 100644 index 0000000..3884c63 --- /dev/null +++ b/pageloader/chrome/srpl.xul @@ -0,0 +1,58 @@ + + + + + + + + + + + + + + diff --git a/pageloader/chrome/srquit.js b/pageloader/chrome/srquit.js new file mode 100644 index 0000000..2ab6106 --- /dev/null +++ b/pageloader/chrome/srquit.js @@ -0,0 +1,111 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is The Original Code is Mozilla Automated Testing Code + * + * The Initial Developer of the Original Code is + * Mozilla Corporation. + * Portions created by the Initial Developer are Copyright (C) 2005 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): Bob Clary + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +/* + From mozilla/toolkit/content + These files did not have a license +*/ + +/*jshint es5:true, esnext:true*/ + +function canQuitApplication() +{ + var os = Components.classes["@mozilla.org/observer-service;1"] + .getService(Components.interfaces.nsIObserverService); + if (!os) + { + return true; + } + + try + { + var cancelQuit = Components.classes["@mozilla.org/supports-PRBool;1"] + .createInstance(Components.interfaces.nsISupportsPRBool); + os.notifyObservers(cancelQuit, "quit-application-requested", null); + + // Something aborted the quit process. + if (cancelQuit.data) + { + return false; + } + } + catch (ex) + { + } + return true; +} + +function goQuitApplication() +{ + if (!canQuitApplication()) + { + return false; + } + + const kAppStartup = '@mozilla.org/toolkit/app-startup;1'; + const kAppShell = '@mozilla.org/appshell/appShellService;1'; + var appService; + var forceQuit; + + if (kAppStartup in Components.classes) + { + appService = Components.classes[kAppStartup]. + getService(Components.interfaces.nsIAppStartup); + forceQuit = Components.interfaces.nsIAppStartup.eForceQuit; + } + else if (kAppShell in Components.classes) + { + appService = Components.classes[kAppShell]. + getService(Components.interfaces.nsIAppShellService); + forceQuit = Components.interfaces.nsIAppShellService.eForceQuit; + } + else + { + throw 'goQuitApplication: no AppStartup/appShell'; + } + + try + { + appService.quit(forceQuit); + } + catch(ex) + { + throw('goQuitApplication: ' + ex); + } + + return true; +} diff --git a/pageloader/components/sr-cmdline.js b/pageloader/components/sr-cmdline.js new file mode 100644 index 0000000..9d5d8e3 --- /dev/null +++ b/pageloader/components/sr-cmdline.js @@ -0,0 +1,186 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is DOM Inspector. + * + * The Initial Developer of the Original Code is + * Christopher A. Aillon . + * Portions created by the Initial Developer are Copyright (C) 2003 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Christopher A. Aillon + * L. David Baron, Mozilla Corporation (modified for reftest) + * Vladimir Vukicevic, Mozilla Corporation (modified for tp) + * Nick Hurley, Mozilla Corporation (modified for stoneridge) + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +/*jshint es5:true, esnext:true*/ + +// This only implements nsICommandLineHandler, since it needs +// to handle multiple arguments. + +const SR_CMDLINE_CONTRACTID = "@mozilla.org/commandlinehandler/general-startup;1?type=sr"; +const SR_CMDLINE_CLSID = Components.ID('{E17FB86D-1CEB-4B67-8A6C-5B97AD068A7F}'); +const CATMAN_CONTRACTID = "@mozilla.org/categorymanager;1"; +const nsISupports = Components.interfaces.nsISupports; + +const nsICategoryManager = Components.interfaces.nsICategoryManager; +const nsICommandLine = Components.interfaces.nsICommandLine; +const nsICommandLineHandler = Components.interfaces.nsICommandLineHandler; +const nsIComponentRegistrar = Components.interfaces.nsIComponentRegistrar; +const nsISupportsString = Components.interfaces.nsISupportsString; +const nsIWindowWatcher = Components.interfaces.nsIWindowWatcher; + +function PageLoaderCmdLineHandler() {} +PageLoaderCmdLineHandler.prototype = +{ + /* nsISupports */ + QueryInterface : function handler_QI(iid) { + if (iid.equals(nsISupports)) + return this; + + if (nsICommandLineHandler && iid.equals(nsICommandLineHandler)) + return this; + + throw Components.results.NS_ERROR_NO_INTERFACE; + }, + + /* nsICommandLineHandler */ + handle : function handler_handle(cmdLine) { + var args = {}; + try { + var uristr = cmdLine.handleFlagWithParam("sr", false); + if (uristr === null) + return; + try { + args.manifest = cmdLine.resolveURI(uristr).spec; + } catch (e) { + return; + } + + args.width = cmdLine.handleFlagWithParam("srwidth", false); + args.height = cmdLine.handleFlagWithParam("srheight", false); + args.timeout = cmdLine.handleFlagWithParam("srtimeout", false); + args.delay = cmdLine.handleFlagWithParam("srdelay", false); + args.mozafterpaint = cmdLine.handleFlag("srmozafterpaint", false); + args.outputfile = cmdLine.handleFlagWithParam("sroutput", false); + if (args.outputfile === null) { + return; + } + } + catch (e) { + return; + } + + // get our data through xpconnect + args.wrappedJSObject = args; + + var wwatch = Components.classes["@mozilla.org/embedcomp/window-watcher;1"] + .getService(nsIWindowWatcher); + wwatch.openWindow(null, "chrome://srpl/content/srpl.xul", + "_blank", "chrome,dialog=no,all", args); + cmdLine.preventDefault = true; + }, + + // NWGH: Modify the flags to have output filename + helpInfo : + " -sr Run stone ridge pageload tests on given manifest\n" + + " -sroutput Save output to \n" + + " -srwidth width Width of window\n" + + " -srheight height Height of window\n" + + " -srtimeout Max amount of time given for a page to load, quit if " + + "exceeded\n" + + " -srdelay Amount of time to wait between each pageload\n" + + " -srmozafterpaint Measure Time after recieving MozAfterPaint event " + + "instead of load event\n" + +}; + + +var PageLoaderCmdLineFactory = +{ + createInstance : function(outer, iid) + { + if (outer !== null) { + throw Components.results.NS_ERROR_NO_AGGREGATION; + } + + return new PageLoaderCmdLineHandler().QueryInterface(iid); + } +}; + +function NSGetFactory(cid) { + if (!cid.equals(SR_CMDLINE_CLSID)) + throw Components.results.NS_ERROR_NOT_IMPLEMENTED; + + return PageLoaderCmdLineFactory; +} + +var PageLoaderCmdLineModule = +{ + registerSelf : function(compMgr, fileSpec, location, type) + { + compMgr = compMgr.QueryInterface(nsIComponentRegistrar); + + compMgr.registerFactoryLocation(SR_CMDLINE_CLSID, + "Stone Ridge PageLoader CommandLine Service", + SR_CMDLINE_CONTRACTID, + fileSpec, + location, + type); + + var catman = Components.classes[CATMAN_CONTRACTID].getService(nsICategoryManager); + catman.addCategoryEntry("command-line-handler", + "m-sr", + SR_CMDLINE_CONTRACTID, true, true); + }, + + unregisterSelf : function(compMgr, fileSpec, location) + { + compMgr = compMgr.QueryInterface(nsIComponentRegistrar); + + compMgr.unregisterFactoryLocation(SR_CMDLINE_CLSID, fileSpec); + catman = Components.classes[CATMAN_CONTRACTID].getService(nsICategoryManager); + catman.deleteCategoryEntry("command-line-handler", + "m-sr", true); + }, + + getClassObject : function(compMgr, cid, iid) + { + return NSGetFactory(cid); + }, + + canUnload : function(compMgr) + { + return true; + } +}; + + +function NSGetModule(compMgr, fileSpec) { + return PageLoaderCmdLineModule; +} diff --git a/pageloader/install.rdf b/pageloader/install.rdf new file mode 100644 index 0000000..6f005f2 --- /dev/null +++ b/pageloader/install.rdf @@ -0,0 +1,20 @@ + + + + + pageloader@mozilla.org + 1.0 + + + toolkit@mozilla.org + 2.0b3pre + * + + + + PageLoader extension + Cycles through pages and measures load times + Vladimir Vukicevic + + diff --git a/srdata.js b/srdata.js new file mode 100644 index 0000000..a81925e --- /dev/null +++ b/srdata.js @@ -0,0 +1,64 @@ +/* + * This Source Code Form is subject to the terms of the Mozilla Public License, + * v. 2.0. If a copy of the MPL was not distributed with this file, You can + * obtain one at http://mozilla.org/MPL/2.0/. + * + * This file defines the commonly-used functionality needed by a stone ridge + * test suite. This must be run under xpcshell running in JS v1.8 mode. + */ + +/*jshint curly:true, indent:4, latedef:true, undef:true, + trailing:true, es5:true, esnext:true*/ +/*global Components:true*/ + +var STONERIDGE_RESULTS = null; + +/* + * Store some results for writing once we're all done + */ +function do_write_result(key, start, stop) { + var startms; + var stopms; + + if (STONERIDGE_RESULTS === null) { + STONERIDGE_RESULTS = {}; + } + + if (start instanceof Date) { + startms = start.valueOf(); + } else { + startms = start; + } + + if (stop instanceof Date) { + stopms = stop.valueOf(); + } else { + stopms = stop; + } + + var val = {'start': startms, 'stop': stopms, 'total': stopms - startms}; + + if (STONERIDGE_RESULTS.hasOwnProperty(key)) { + STONERIDGE_RESULTS[key].push(val); + } else { + STONERIDGE_RESULTS[key] = [val]; + } +} + +function do_save_results(output_file) { + var cc = Components.classes; + var ci = Components.interfaces; + + // Create a file pointing to our output file + var ofile = cc["@mozilla.org/file/local;1"].createInstance(ci.nsILocalFile); + ofile.initWithPath(output_file); + + // Now get an output stream for our file + var ostream = cc["@mozilla.org/network/file-output-stream;1"] + .createInstance(ci.nsIFileOutputStream); + ostream.init(ofile, -1, -1, 0); + + var jstring = JSON.stringify(STONERIDGE_RESULTS); + ostream.write(jstring, jstring.length); + ostream.close(); +} diff --git a/srnamed.py b/srnamed.py index 370182f..60dcc09 100644 --- a/srnamed.py +++ b/srnamed.py @@ -3,13 +3,13 @@ import sys import time -from dnsproxy import DnsProxyServer, UdpDnsHandler, DnsProxyException +from dnsproxy import DnsProxyServer, DnsProxyException import stoneridge listen_ip = None - +dnssrv = None IGNORE_HOSTS = ( 'puppet1.private.scl3.mozilla.com.', @@ -25,54 +25,31 @@ } -class NeckoDnsHandler(UdpDnsHandler): - def handle(self): - self.data = self.rfile.read() - self.transaction_id = self.data[0] - self.flags = self.data[1] - self.qa_counts = self.data[4:6] - self.domain = '' - operation_code = (ord(self.data[2]) >> 3) & 15 - if operation_code == self.STANDARD_QUERY_OPERATION_CODE: - self.wire_domain = self.data[12:] - self.domain = self._domain(self.wire_domain) - else: - logging.debug("DNS request with non-zero operation code: %s", - operation_code) - real_ip = self.server.passthrough_filter(self.domain) - if real_ip: - message = 'passthrough' - ip = real_ip - else: - message = 'handle' - ip = listen_ip - logging.debug('dnsproxy: %s(%s) -> %s', message, self.domain, ip) - self.reply(self.get_dns_reply(ip)) - - -def necko_passthrough(host): - logging.debug('passthrough: checking %s' % (host,)) +def srlookup(host): + logging.debug('srlookup: checking %s' % (host,)) if host in IGNORE_HOSTS: logging.debug('attempting to ignore %s' % (host,)) try: return socket.gethostbyname(host) except: - logging.error('Could not get actual IP for %s, faking it!' % - (host,)) + logging.error('Could not get actual IP for %s' % (host,)) + # This should result in NXDOMAIN + return None if host in SR_HOSTS: logging.debug('stone ridge host detected: %s' % (host,)) return SR_HOSTS[host] logging.debug('host not found in our exception lists') - return None + + return dnssrv.server_address[0] def daemon(): + global dnssrv logging.debug('about to start proxy server') try: - with(DnsProxyServer(False, handler=NeckoDnsHandler, - passthrough_filter=necko_passthrough)): + with DnsProxyServer(srlookup, listen_ip) as dnssrv: logging.debug('proxy server started') while True: time.sleep(1) diff --git a/srrun.py b/srrun.py index 5665ae9..eaad676 100644 --- a/srrun.py +++ b/srrun.py @@ -11,9 +11,25 @@ mypath = os.path.abspath(__file__) mydir = os.path.split(mypath)[0] -if platform.system().lower() == 'windows': +if os.getenv('VIRTUAL_ENV'): + # If we're running in a virtualenv, then we're doing development, and we + # want to use the virtualenv's python, no matter what system we're running + # on, since the virtualenv is the only python guaranteed to have all our + # required third-party modules installed. srpython = sys.executable +elif platform.system().lower() == 'windows': + # Windows doesn't have any special installation, since python doesn't come + # on windows by default. + srpython = sys.executable +elif platform.system().lower() == 'darwin': + # For Mac, we need to make sure we use the homebrew-installed python, + # instead of the system one, which is out of date. + srpython = '/usr/local/bin/python' else: + # This should handle linux, where we install our own built python in the + # srhome directory to ensure we're using a modern-enough python instead of + # whatever may have been installed with the system (which may or may not + # be modern enough for our purposes). srhome = os.path.join(mydir, '..') srhome = os.path.abspath(srhome) srbin = os.path.join(srhome, 'bin') diff --git a/srrunner.py b/srrunner.py index 2253d97..f037d37 100644 --- a/srrunner.py +++ b/srrunner.py @@ -37,11 +37,20 @@ def _build_testlist(self): logging.debug('searching for all tests in %s' % (self.testroot,)) if stoneridge.get_config('test', 'enabled'): - tests = ['fake.js'] + tests = [] + if os.path.exists(os.path.join(self.testroot, 'fake.js')): + tests.append('fake.js') else: - tests = [os.path.basename(f) for f in - glob.glob(os.path.join(self.testroot, '*.js'))] - tests.remove('fake.js') + jstests = [os.path.basename(f) for f in + glob.glob(os.path.join(self.testroot, '*.js'))] + try: + jstests.remove('fake.js') + except ValueError: + # Don't care if fake.js isn't in the list + pass + pagetests = [os.path.basename(f) for f in + glob.glob(os.path.join(self.testroot, '*.page'))] + tests = jstests + pagetests logging.debug('tests found %s' % (tests,)) return tests @@ -82,32 +91,46 @@ def run(self): # Ensure our output directory exists outdir = stoneridge.get_config('run', 'out') installroot = stoneridge.get_config('stoneridge', 'root') - escaped_outdir = outdir.replace('\\', '\\\\') for test in tests: logging.debug('test: %s' % (test,)) - outfile = '%s.out' % (test,) + outfile = os.path.join(outdir, '%s.out' % (test,)) logging.debug('outfile: %s' % (outfile,)) - args = preargs + [ - '-e', 'const _SR_OUT_SUBDIR = "%s";' % (escaped_outdir,), - '-e', 'const _SR_OUT_FILE = "%s";' % (outfile,), - '-f', os.path.join(installroot, 'head.js'), - '-f', os.path.join(self.testroot, test), - '-e', 'do_stoneridge(); quit(0);' - ] - logging.debug('xpcshell args: %s' % (args,)) + if test.endswith('.js'): + escaped_outfile = outfile.replace('\\', '\\\\') + args = preargs + [ + '-e', 'const _SR_OUT_FILE = "%s";' % (escaped_outfile,), + '-f', os.path.join(installroot, 'srdata.js'), + '-f', os.path.join(installroot, 'head.js'), + '-f', os.path.join(self.testroot, test), + '-e', 'do_stoneridge(); quit(0);' + ] + logging.debug('xpcshell args: %s' % (args,)) + runner = stoneridge.run_xpcshell + else: + args = [ + '-sr', os.path.join(self.testroot, test), + '-sroutput', outfile, + # -srwidth, , + # -srheight, , + # -srtimeout, , + # -srdelay, , + # -srmozafterpaint + ] + runner = stoneridge.run_firefox + if self.unittest: logging.debug('Not running processes: in unit test mode') else: - xpcshell_out_file = '%s.xpcshell.out' % (test,) - xpcshell_out_file = os.path.join(outdir, xpcshell_out_file) - logging.debug('xpcshell output at %s' % (xpcshell_out_file,)) + process_out_file = '%s.process.out' % (test,) + process_out_file = os.path.join(outdir, process_out_file) + logging.debug('process output at %s' % (process_out_file,)) timed_out = False - with file(xpcshell_out_file, 'wb') as f: + with file(process_out_file, 'wb') as f: try: - res, _ = stoneridge.run_xpcshell(args, stdout=f) - except stoneridge.XpcshellTimeout: - logging.exception('xpcshell timed out!') + res = runner(args, f) + except stoneridge.TestProcessTimeout: + logging.exception('test process timed out!') timed_out = True res = None if res or timed_out: diff --git a/srunpacker.py b/srunpacker.py index 37d0843..cf5ea73 100644 --- a/srunpacker.py +++ b/srunpacker.py @@ -48,9 +48,9 @@ def __init__(self): self.testzip = os.path.join(downloaddir, 'tests.zip') logging.debug('test zip file: %s' % (self.testzip,)) - def _copy_tree(self, unzipdir, name): - logging.debug('_copy_tree(%s, %s)' % (unzipdir, name)) - srcdir = os.path.join(unzipdir, 'bin', name) + def _copy_tree(self, srcdir, name): + logging.debug('_copy_tree(%s, %s)' % (srcdir, name)) + srcdir = os.path.join(srcdir, name) files = os.listdir(srcdir) dstdir = os.path.join(self.bindir, name) logging.debug('srcdir: %s' % (srcdir,)) @@ -85,8 +85,9 @@ def run(self): z.extractall(unzipdir, members) # Put the xpcshell binary where it belongs + unzipbin = os.path.join(unzipdir, 'bin') xpcshell_bin = stoneridge.get_config('machine', 'xpcshell') - xpcshell = os.path.join(unzipdir, 'bin', xpcshell_bin) + xpcshell = os.path.join(unzipbin, xpcshell_bin) logging.debug('xpcshell: %s' % (xpcshell,)) # Apparently xpcshell stopped being executable in the tests zip at some @@ -99,11 +100,39 @@ def run(self): # Put our components into place logging.debug('copying components') - self._copy_tree(unzipdir, 'components') + self._copy_tree(unzipbin, 'components') # Put the plugins in place, in case we need them logging.debug('copying plugins') - self._copy_tree(unzipdir, 'plugins') + self._copy_tree(unzipbin, 'plugins') + + # Put the pageloader components into place + srroot = stoneridge.get_config('stoneridge', 'root') + pageloader = os.path.join(srroot, 'pageloader') + self._copy_tree(pageloader, 'components') + self._copy_tree(pageloader, 'chrome') + + # Now we need to put srdata.js into the appropriate place for it to be + # picked up by the pageloader + chrome = os.path.join(self.bindir, 'chrome') + srdatasrc = os.path.join(srroot, 'srdata.js') + srdatadst = os.path.join(chrome, 'srdata.js') + if os.path.exists(srdatadst): + os.unlink(srdatadst) + logging.debug('copy srdata.js %s -> %s' % (srdatasrc, srdatadst)) + shutil.copyfile(srdatasrc, srdatadst) + + # Finally, we need to update chrome.manifest with the appropriate bits + # from our local pageloader + plmanifest = os.path.join(pageloader, 'chrome.manifest') + fxmanifest = os.path.join(self.bindir, 'chrome.manifest') + logging.debug('append %s to %s' % (plmanifest, fxmanifest)) + with file(fxmanifest, 'rb') as f: + lines = f.readlines() + with file(plmanifest, 'rb') as f: + lines.extend(f.readlines()) + with file(fxmanifest, 'wb') as f: + f.writelines(lines) def unpack_firefox(self): logging.critical('Base unpack_firefox called!') diff --git a/srworker.py b/srworker.py index f05c2af..bffd38e 100644 --- a/srworker.py +++ b/srworker.py @@ -219,17 +219,17 @@ def run_test(self): self.run_process('uploader') -def daemon(): - osname = stoneridge.get_config('machine', 'os') - queue = stoneridge.CLIENT_QUEUES[osname] - - worker = StoneRidgeWorker(queue) - worker.run() - - @stoneridge.main def main(): - parser = stoneridge.DaemonArgumentParser() + parser = stoneridge.ArgumentParser() parser.parse_args() - parser.start_daemon(daemon) + osname = stoneridge.get_config('machine', 'os') + queue = stoneridge.CLIENT_QUEUES[osname] + + while True: + try: + worker = StoneRidgeWorker(queue) + worker.run() + except: + logging.exception('Worker failed') diff --git a/stoneridge.py b/stoneridge.py index 0883766..54e8fb8 100644 --- a/stoneridge.py +++ b/stoneridge.py @@ -218,62 +218,130 @@ def get_config_bool(section, option): return value -class XpcshellTimeout(Exception): - def __init__(self, timeout_secs, xpcshell_stdout): - self.timeout_secs = timeout_secs - self.xpcshell_output_fd = xpcshell_stdout - Exception.__init__(self, - 'Killed xpcshell after %s seconds' % - (timeout_secs,)) +# Supporting variables for running test processes (xpcshell & firefox in +# pageloader mode) +_test_process_environ = None +_bindir = None +_binaries = { + 'firefox': None, + 'xpcshell': None +} +_timeouts = { + 'firefox': None, + 'xpcshell': None +} -_xpcshell = None -_xpcshell_environ = None +class TestProcessTimeout(Exception): + """Exception type for when we manually time out the test process + """ + def __init__(self, process_type, timeout_secs, process_stdout): + self.process_type = process_type + self.timeout_secs = timeout_secs + self.process_output_fd = process_stdout + Exception.__init__(self, + 'Killed test process %s after %s seconds' % + (process_type, timeout_secs)) -def run_xpcshell(args, stdout=subprocess.PIPE): - """Run xpcshell with the appropriate args. +def _ensure_bindir(): + """Make sure our bindir for the tests exists. """ - global _xpcshell - global _xpcshell_environ + global _bindir - bindir = get_config('run', 'bin') - if bindir is None: - return (None, []) + if _bindir is None: + _bindir = get_config('run', 'bin') + if _bindir is None: + raise Exception('Missing bindir for tests!') + if not os.path.exists(_bindir): + raise Exception('Missing bindir for tests!') - if not os.path.exists(bindir): - return (None, []) - if _xpcshell_environ is None: - _xpcshell_environ = copy.copy(os.environ) - ldlibpath = _xpcshell_environ.get('LD_LIBRARY_PATH') +def _ensure_test_process_environ(): + """Make sure we have an environment for our test process containing the + appropriate LD_LIBRARY_PATH. + """ + global _test_process_environ + + if _test_process_environ is None: + _test_process_environ = copy.copy(os.environ) + ldlibpath = _test_process_environ.get('LD_LIBRARY_PATH') if ldlibpath: - ldlibpath = os.path.pathsep.join([bindir, ldlibpath]) + ldlibpath = os.path.pathsep.join([_bindir, ldlibpath]) else: - ldlibpath = bindir - _xpcshell_environ['LD_LIBRARY_PATH'] = ldlibpath + ldlibpath = _bindir + _test_process_environ['LD_LIBRARY_PATH'] = ldlibpath - if _xpcshell is None: - xpcshell_bin = get_config('machine', 'xpcshell') - _xpcshell = os.path.join(bindir, xpcshell_bin) - xpcargs = [_xpcshell] + args - logging.debug('Running xpcshell: %s' % (xpcargs,)) +def _ensure_binary(proctype): + """Make sure we know where the binary for our test lives. + + proctype - one of 'xpcshell' or 'firefox' + """ + if _binaries[proctype] is None: + binary = get_config('machine', proctype) + _binaries[proctype] = os.path.join(_bindir, binary) + if not os.path.exists(_binaries[proctype]): + _binaries[proctype] = None + raise Exception('Missing binary for %s' % (proctype,)) - xpcshell_timeout = get_config_int('xpcshell', 'timeout') - xpcshell_start = int(time.time()) - proc = Process(xpcargs, stdout=stdout, cwd=bindir, env=_xpcshell_environ) +def _ensure_timeout(proctype): + """Make sure we know how long to wait before timing out the test process. + We default to 15 minutes (900 seconds) + + proctype - one of 'xpcshell' or 'firefox' + """ + if _timeouts[proctype] is None: + _timeouts[proctype] = get_config_int(proctype, 'timeout', 900) + - while (int(time.time()) - xpcshell_start) < xpcshell_timeout: +def _run_test_process(proctype, args, stdout): + """Run a test process, either xpcshell or firefox. + + proctype - one of 'xpcshell' or 'firefox' + args - list of arguments to be passed to the process + stdout - where to shove the stdout data from the process + """ + start = int(time.time()) + + procargs = [_binaries[proctype]] + args + + proc = Process(procargs, stdout=stdout, cwd=_bindir, + env=_test_process_environ) + + timeout = _timeouts[proctype] + while (int(time.time()) - start) < timeout: time.sleep(5) if proc.poll() is not None: - return (proc.returncode, proc.stdout) + return proc.returncode # If we get here, that means we hit the timeout proc.kill() - raise XpcshellTimeout(xpcshell_timeout, proc.stdout) + raise TestProcessTimeout(proctype, timeout, proc.stdout) + + +def run_firefox(args, stdout): + """Run firefox with the appropriate args + """ + _ensure_bindir() + _ensure_test_process_environ() + _ensure_binary('firefox') + _ensure_timeout('firefox') + + return _run_test_process('firefox', args, stdout) + + +def run_xpcshell(args, stdout): + """Run xpcshell with the appropriate args. + """ + _ensure_bindir() + _ensure_test_process_environ() + _ensure_binary('xpcshell') + _ensure_timeout('xpcshell') + + return _run_test_process('xpcshell', args, stdout) _os_version = None diff --git a/tests/pinterest.page b/tests/pinterest.page new file mode 100644 index 0000000..6595420 --- /dev/null +++ b/tests/pinterest.page @@ -0,0 +1,3 @@ +[ + {"url": "http://pinterest.com"} +] diff --git a/tools/wprexplode.py b/tools/wprexplode.py new file mode 100644 index 0000000..eb3d99f --- /dev/null +++ b/tools/wprexplode.py @@ -0,0 +1,99 @@ +import argparse +import hashlib +import os +import sys +import urlparse + +import httparchive + +VHOST_CONFIG = """ + RewriteEngine On + RewriteOptions Inherit + ServerName %(host)s + DocumentRoot %(toplevel)s/%(host)s + +""" + + +def explode_archive(wprfile, archiveroot): + """Explode a HTTP Archive into its component pages. For a file named + foo.har, the exploded http archive will be put in a directory named foo + with the same parent directory as foo.har. Beneath that will be directories + named for each host in the archive, each containing the full paths to the + files from that host massaged to be consumed by apache's mod_asis + """ + # Make our directory to explode the HAR into + hardir = os.path.splitext(os.path.basename(wprfile))[0] + toplevel = os.path.join(archiveroot, hardir) + os.mkdir(toplevel) + + # Keep track of the hosts in here so we can make http conf for them + hosts = set() + + # Load our HAR file + har = httparchive.HttpArchive.Load(wprfile) + + for request in har.get_requests(): + # Keep track of this host + hosts.add(request.host) + + # Figure out where to put this file + hostdir = os.path.join(toplevel, request.host) + url = urlparse.urlparse(request.path) + + # Make sure our destination directory exists + if not os.path.exists(hostdir): + os.makedirs(hostdir) + + # Make sure we have a file name + name = url.path + url.params + if not name: + name = '/' + + if url.query: + name = name + '?' + url.query + + # Hash everything up + name = hashlib.sha1(name).hexdigest() + + # Make apache recognize the file as an "asis" file + name += '.asis' + + # Write our data out + fname = os.path.join(hostdir, name) + response = har[request] + if os.path.exists(fname): + sys.stderr.write('WARNING: Replacing %s%s\n' % (request.host, + url.path)) + with file(fname, 'w') as f: + # Special "Status:" header for apache to set the HTTP status + f.write('Status: %s %s\n' % (response.status, response.reason)) + + # The rest of the headers are pretty standard + for k, v in sorted(response.headers): + if k.lower() == 'transfer-encoding' and v.lower() == 'chunked': + # Don't use chunked transfer-encoding, it breaks things + continue + f.write('%s: %s\n' % (k, v)) + + # Apache expects a blank line to separate headers and content + f.write('\n') + + # This should give us the raw file (compressed if appropriate) + f.write(''.join(response.response_data)) + + # Now write out the httpd configuration specific to this archive + httpd_conf_name = '%s.conf' % (hardir,) + httpd_conf = os.path.join(archiveroot, httpd_conf_name) + with file(httpd_conf, 'w') as f: + for host in hosts: + f.write(VHOST_CONFIG % {'host': host, 'toplevel': toplevel}) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--archive', dest='archive', required=True) + parser.add_argument('--outdir', dest='outdir', required=True) + args = parser.parse_args() + + explode_archive(args.archive, args.outdir) diff --git a/windows/srwebworker.py b/windows/srwebworker.py index 157c18e..37748d1 100644 --- a/windows/srwebworker.py +++ b/windows/srwebworker.py @@ -6,6 +6,7 @@ import json import logging import requests +import time import srworker import stoneridge @@ -17,26 +18,47 @@ def __init__(self): self.setup() def run(self): - res = requests.get(self.url) + handled = True - if res.status_code != 200: - logging.error('Got non-200 response: %s %s (text %s)' % - (res.status_code, res.reason, res.text)) - return + while True: + if not handled: + time.sleep(5) - logging.debug('Got response %s' % (res.text,)) + handled = False - if not res.text: - logging.debug('No entries waiting!') - return + try: + res = requests.get(self.url) + except: + logging.exception('Error getting events') + continue - args = json.loads(res.text) + if res.status_code != 200: + logging.error('Got non-200 response: %s %s (text %s)' % + (res.status_code, res.reason, res.text)) + continue - logging.debug('Handling request') + logging.debug('Got response %s' % (res.text,)) - self.handle(**args) + if not res.text: + logging.debug('No entries waiting!') + continue - logging.debug('Done') + try: + args = json.loads(res.text) + except: + logging.exception('Error loading result as json') + continue + + logging.debug('Handling request') + + handled = True + try: + self.handle(**args) + except: + logging.exception('Error handling request') + continue + + logging.debug('Done') @stoneridge.main @@ -45,7 +67,4 @@ def main(): parser.parse_args() worker = StoneRidgeWebWorker() - try: - worker.run() - except: - logging.exception('Error running this time') + worker.run() diff --git a/windows/srworker.bat b/windows/srworker.bat new file mode 100644 index 0000000..8bd4ffa --- /dev/null +++ b/windows/srworker.bat @@ -0,0 +1,2 @@ +cd C:\srhome\stoneridge +C:\Python27\python.exe C:\srhome\stoneridge\srrun.py C:\srhome\stoneridge\windows\srwebworker.py --config C:\srhome\stoneridge.ini --log C:\srhome\srworker.log diff --git a/wpr/PKG-INFO b/wpr/PKG-INFO new file mode 100644 index 0000000..9863d0b --- /dev/null +++ b/wpr/PKG-INFO @@ -0,0 +1,10 @@ +Metadata-Version: 1.0 +Name: webpagereplay +Version: 1.1.2 +Summary: Record and replay web content +Home-page: http://code.google.com/p/web-page-replay/ +Author: Web Page Replay Project Authors +Author-email: web-page-replay-dev@googlegroups.com +License: Apache License 2.0 +Description: UNKNOWN +Platform: UNKNOWN diff --git a/wpr/cachemissarchive.py b/wpr/cachemissarchive.py new file mode 100755 index 0000000..e54880f --- /dev/null +++ b/wpr/cachemissarchive.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python +# Copyright 2011 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Create and view cache miss archives. + +Usage: +./cachemissarchive.py + +This will print out some statistics of the cache archive. +""" + +import logging +import os +import sys +from perftracker import runner_cfg +import persistentmixin + + +def format_request(request, join_val=' ', use_path=True, + use_request_body=False, headers=False): + if use_path: + request_parts = [request.command, request.host + request.path] + else: + request_parts = [request.command, request.host] + if use_request_body: + request_parts.append(request.request_body) + if headers: + request_parts.append(request.headers) + return join_val.join([str(x) for x in request_parts]) + + +class CacheMissArchive(persistentmixin.PersistentMixin): + """Archives cache misses from playback mode. + + Uses runner_cfg.urls for tracking the current page url. + + Attributes: + archive_file: output file to store cache miss data + current_page_url: any cache misses will be marked as caused by this URL + page_urls: the list of urls to record and keep track of + archive: dict of cache misses, where the key is a page URL and + the value is a list of ArchivedHttpRequest objects + request_counts: dict that records the number of times a request is issued in + both record and replay mode + """ + + def __init__(self, archive_file): + """Initialize CacheMissArchive. + + Args: + archive_file: output file to store data + """ + self.archive_file = archive_file + self.current_page_url = None + + # TODO: Pass in urls to CacheMissArchive without runner_cfg dependency + if runner_cfg.urls: + self.page_urls = runner_cfg.urls + + # { URL: [archived_http_request, ...], ... } + self.archive = {} + + # { archived_http_request: (num_record_requests, num_replay_requests), ... } + self.request_counts = {} + + def record_cache_miss(self, request, page_url=None): + """Records a cache miss for given request. + + Args: + request: instance of ArchivedHttpRequest that causes a cache miss + page_url: specify the referer URL that caused this cache miss + """ + if not page_url: + page_url = self.current_page_url + logging.debug('Cache miss on %s', request) + self._append_archive(page_url, request) + + def set_urls_list(self, urls): + self.page_urls = urls + + def record_request(self, request, is_record_mode, is_cache_miss=False): + """Records the request into the cache archive. + + Should be updated on every HTTP request. + + Also updates the current page_url contained in runner_cfg.urls. + + Args: + request: instance of ArchivedHttpRequest + is_record_mode: indicates whether WPR is on record mode + is_cache_miss: if True, records the request as a cache miss + """ + self._record_request(request, is_record_mode) + + page_url = request.host + request.path + + for url in self.page_urls: + if self._match_urls(page_url, url): + self.current_page_url = url + logging.debug('Updated current url to %s', self.current_page_url) + break + + if is_cache_miss: + self.record_cache_miss(request) + + def _record_request(self, request, is_record_mode): + """Adds 1 to the appropriate request count. + + Args: + request: instance of ArchivedHttpRequest + is_record_mode: indicates whether WPR is on record mode + """ + num_record, num_replay = self.request_counts.get(request, (0, 0)) + if is_record_mode: + num_record += 1 + else: + num_replay += 1 + self.request_counts[request] = (num_record, num_replay) + + def request_diff(self, is_show_all=False): + """Calculates if there are requests sent in record mode that are + not sent in replay mode and vice versa. + + Args: + is_show_all: If True, only includes instance where the number of requests + issued in record/replay mode differs. If False, includes all instances. + Returns: + A string displaying difference in requests between record and replay modes + """ + str_list = ['Diff of requests sent in record mode versus replay mode\n'] + less = [] + equal = [] + more = [] + + for request, (num_record, num_replay) in self.request_counts.items(): + format_req = format_request(request, join_val=' ', + use_path=True, use_request_body=False) + request_line = '%s record: %d, replay: %d' % ( + format_req, num_record, num_replay) + if num_record < num_replay: + less.append(request_line) + elif num_record == num_replay: + equal.append(request_line) + else: + more.append(request_line) + + if is_show_all: + str_list.extend(sorted(equal)) + + str_list.append('') + str_list.extend(sorted(less)) + str_list.append('') + str_list.extend(sorted(more)) + + return '\n'.join(str_list) + + def _match_urls(self, url_1, url_2): + """Returns true if urls match. + + Args: + url_1: url string (e.g. 'http://www.cnn.com') + url_2: same as url_1 + Returns: + True if the two urls match, false otherwise + """ + scheme = 'http://' + if url_1.startswith(scheme): + url_1 = url_1[len(scheme):] + if url_2.startswith(scheme): + url_2 = url_2[len(scheme):] + return url_1 == url_2 + + def _append_archive(self, page_url, request): + """Appends the corresponding (page_url,request) pair to archived dictionary. + + Args: + page_url: page_url string (e.g. 'http://www.cnn.com') + request: instance of ArchivedHttpRequest + """ + self.archive.setdefault(page_url, []) + self.archive[page_url].append(request) + + def __repr__(self): + return repr((self.archive_file, self.archive)) + + def Persist(self): + self.current_page_url = None + persistentmixin.PersistentMixin.Persist(self, self.archive_file) + + def get_total_referers(self): + return len(self.archive) + + def get_total_cache_misses(self): + count = 0 + for k in self.archive: + count += len(self.archive[k]) + return count + + def get_total_referer_cache_misses(self): + count = 0 + if self.page_urls: + count = sum(len(v) for k, v in self.archive.items() + if k in self.page_urls) + return count + + def get_cache_misses(self, page_url, join_val=' ', + use_path=False, use_request_body=False): + """Returns a list of cache miss requests from the page_url. + + Args: + page_url: url of the request (e.g. http://www.zappos.com/) + join_val: value to join output string with + use_path: true if path is to be included in output display + use_request_body: true if request_body is to be included in output display + Returns: + A list of cache miss requests (in textual representation) from page_url + """ + misses = [] + if page_url in self.archive: + cache_misses = self.archive[page_url] + for k in cache_misses: + misses.append(format_request(k, join_val, use_path, use_request_body)) + return misses + + def get_all_cache_misses(self, use_path=False): + """Format cache misses into concise visualization.""" + all_cache_misses = '' + for page_url in self.archive: + misses = self.get_cache_misses(page_url, use_path=use_path) + all_cache_misses = '%s%s --->\n %s\n\n' % ( + all_cache_misses, page_url, '\n '.join(misses)) + return all_cache_misses + + +if __name__ == '__main__': + archive_file = sys.argv[1] + cache_archive = CacheMissArchive.Load(archive_file) + + print 'Total cache misses: %d' % cache_archive.get_total_cache_misses() + print 'Total page_urls cache misses: %d' % ( + cache_archive.get_total_referer_cache_misses()) + print 'Total referers: %d\n' % cache_archive.get_total_referers() + print 'Referers are:' + for ref in cache_archive.archive: + print '%s with %d cache misses' % (ref, len(cache_archive.archive[ref])) + print + print cache_archive.get_all_cache_misses(use_path=True) + print diff --git a/wpr/cachemissarchive_test.py b/wpr/cachemissarchive_test.py new file mode 100755 index 0000000..c624e9d --- /dev/null +++ b/wpr/cachemissarchive_test.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python +# Copyright 2011 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import cachemissarchive +from mockhttprequest import ArchivedHttpRequest +import os +import unittest +import util + + +def get_mock_requests(): + keepends = True + return util.resource_string('mock-archive.txt').splitlines(keepends) + + +class CacheMissArchiveTest(unittest.TestCase): + + HEADERS = [('accept-encoding', 'gzip,deflate')] + REQUEST = ArchivedHttpRequest( + 'GET', 'www.test.com', '/', None, HEADERS) + + def setUp(self): + self.load_mock_archive() + + def load_mock_archive(self): + self.cache_archive = cachemissarchive.CacheMissArchive('mock-archive') + self.num_requests = 0 + urls_list = [ + 'http://www.zappos.com/', + 'http://www.msn.com/', + 'http://www.amazon.com/', + 'http://www.google.com/', + ] + self.cache_archive.set_urls_list(urls_list) + for line in get_mock_requests(): + # Each line contains: (command, host, path, request_body, headers) + # Delimited by '%' + args = line.split('%') + headers = ast.literal_eval(args[4].strip('\n ')) + request = ArchivedHttpRequest( + args[0], args[1], args[2], args[3], headers) + self.cache_archive.record_request(request, is_record_mode=False, + is_cache_miss=True) + self.num_requests += 1 + + def test_init(self): + empty_archive = cachemissarchive.CacheMissArchive('empty-archive') + self.assert_(not empty_archive.archive) + + def test_record_cache_miss(self): + cache_archive = cachemissarchive.CacheMissArchive('empty-archive') + referer = 'mock_referer' + cache_archive.record_cache_miss(self.REQUEST, page_url=referer) + self.assert_(cache_archive.archive[referer]) + + def test__match_urls(self): + self.assert_(self.cache_archive._match_urls( + 'http://www.cnn.com', 'http://www.cnn.com')) + self.assert_(self.cache_archive._match_urls( + 'http://www.cnn.com', 'www.cnn.com')) + self.assert_(not self.cache_archive._match_urls( + 'http://www.zappos.com', 'http://www.cnn.com')) + self.assert_(not self.cache_archive._match_urls( + 'www.zappos.com', 'www.amazon.com')) + + def test_get_total_referers_small(self): + cache_archive = cachemissarchive.CacheMissArchive('empty-archive') + self.assertEqual(cache_archive.get_total_referers(), 0) + referer = 'mock_referer' + cache_archive.record_cache_miss(self.REQUEST, page_url=referer) + self.assertEqual(cache_archive.get_total_referers(), 1) + + def test_get_total_referers_large(self): + self.assertEqual(self.cache_archive.get_total_referers(), 4) + + def test_get_total_cache_misses(self): + self.assertEqual(self.cache_archive.get_total_cache_misses(), + self.num_requests) + + def test_get_total_referer_cache_misses(self): + self.assertEqual(self.cache_archive.get_total_referer_cache_misses(), + self.num_requests) + + def test_record_request(self): + request = self.REQUEST + cache_archive = cachemissarchive.CacheMissArchive('empty-archive') + self.assertEqual(len(cache_archive.request_counts), 0) + + cache_archive.record_request(request, is_record_mode=True, + is_cache_miss=False) + self.assertEqual(len(cache_archive.request_counts), 1) + self.assertEqual(cache_archive.request_counts[request], (1, 0)) + + cache_archive.record_request(request, is_record_mode=False, + is_cache_miss=False) + self.assertEqual(len(cache_archive.request_counts), 1) + self.assertEqual(cache_archive.request_counts[request], (1, 1)) + + def test_get_cache_misses(self): + self.assertEqual( + len(self.cache_archive.get_cache_misses('http://www.zappos.com/')), 5) + self.assertEqual( + len(self.cache_archive.get_cache_misses('http://www.msn.com/')), 3) + self.assertEqual( + len(self.cache_archive.get_cache_misses('http://www.google.com/')), 1) + self.assertEqual( + len(self.cache_archive.get_cache_misses('http://www.amazon.com/')), 1) + +if __name__ == '__main__': + unittest.main() diff --git a/wpr/customhandlers.py b/wpr/customhandlers.py index dad6324..0c4a358 100644 --- a/wpr/customhandlers.py +++ b/wpr/customhandlers.py @@ -13,64 +13,102 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Handle special HTTP requests. + +/web-page-replay-generate-[RESPONSE_CODE] + - Return the given RESPONSE_CODE. +/web-page-replay-post-image-[FILENAME] + - Save the posted image to local disk. +/web-page-replay-command-[record|replay|status] + - Optional. Enable by calling custom_handlers.add_server_manager_handler(...). + - Change the server mode to either record or replay. + + When switching to record, the http_archive is cleared. + + When switching to replay, the http_archive is maintained. +""" + import base64 +import httparchive +import httplib +import json import logging import os -GENERATOR_URL_PREFIX = '/web-page-replay-generate-' -POST_IMAGE_URL_PREFIX = '/web-page-replay-post-image-' +COMMON_URL_PREFIX = '/web-page-replay-' +COMMAND_URL_PREFIX = COMMON_URL_PREFIX + 'command-' +GENERATOR_URL_PREFIX = COMMON_URL_PREFIX + 'generate-' +POST_IMAGE_URL_PREFIX = COMMON_URL_PREFIX + 'post-image-' IMAGE_DATA_PREFIX = 'data:image/png;base64,' +def SimpleResponse(status): + """Return a ArchivedHttpResponse with |status| code and a simple text body.""" + return httparchive.create_response(status) + + +def JsonResponse(data): + """Return a ArchivedHttpResponse with |data| encoded as json in the body.""" + status = 200 + reason = 'OK' + headers = [('content-type', 'application/json')] + body = json.dumps(data) + return httparchive.create_response(status, reason, headers, body) + + class CustomHandlers(object): def __init__(self, screenshot_dir=None): - if screenshot_dir and not os.path.exists(screenshot_dir): - try: - os.makedirs(screenshot_dir) - except: - logging.error('%s does not exist and could not be created.', - screenshot_dir) - screenshot_dir = None - self.screenshot_dir = screenshot_dir + """Initialize CustomHandlers. + + Args: + screenshot_dir: a path to which screenshots are saved. + """ + self.handlers = [ + (GENERATOR_URL_PREFIX, self.get_generator_url_response_code)] + if screenshot_dir: + if not os.path.exists(screenshot_dir): + try: + os.makedirs(screenshot_dir) + except IOError: + logging.error('Unable to create screenshot dir: %s', screenshot_dir) + screenshot_dir = None + if screenshot_dir: + self.screenshot_dir = screenshot_dir + self.handlers.append( + (POST_IMAGE_URL_PREFIX, self.handle_possible_post_image)) def handle(self, request): - """Handles special URLs needed for the benchmark. + """Dispatches requests to matching handlers. Args: request: an http request Returns: - If request is for a special URL, a 3-digit integer like 404. - Otherwise, None. + ArchivedHttpResponse or None. """ - response_code = self.get_generator_url_response_code(request.path) - if response_code: - return response_code - - response_code = self.handle_possible_post_image(request) - if response_code: - return response_code - + for prefix, handler in self.handlers: + if request.path.startswith(prefix): + return handler(request, request.path[len(prefix):]) return None - def get_generator_url_response_code(self, request_path): + def get_generator_url_response_code(self, request, url_suffix): """Parse special generator URLs for the embedded response code. Clients like perftracker can use URLs of this form to request a response with a particular response code. Args: - request_path: a string like "/foo", or "/web-page-replay-generator-404" + request: an ArchivedHttpRequest instance + url_suffix: string that is after the handler prefix (e.g. 304) Returns: - On a match, a 3-digit integer like 404. + On a match, an ArchivedHttpResponse. Otherwise, None. """ - prefix, response_code = request_path[:-3], request_path[-3:] - if prefix == GENERATOR_URL_PREFIX and response_code.isdigit(): - return int(response_code) - return None + try: + response_code = int(url_suffix) + return SimpleResponse(response_code) + except ValueError: + return None - def handle_possible_post_image(self, request): + def handle_possible_post_image(self, request, url_suffix): """If sent, saves embedded image to local directory. Expects a special url containing the filename. If sent, saves the base64 @@ -78,24 +116,20 @@ def handle_possible_post_image(self, request): passing in screenshot_dir to the initializer for this class. Args: - request: an http request - + request: an ArchivedHttpRequest instance + url_suffix: string that is after the handler prefix (e.g. 'foo.png') Returns: - On a match, a 3-digit integer response code. - False otherwise. + On a match, an ArchivedHttpResponse. + Otherwise, None. """ - if not self.screenshot_dir: - return None - - prefix = request.path[:len(POST_IMAGE_URL_PREFIX)] - basename = request.path[len(POST_IMAGE_URL_PREFIX):] - if prefix != POST_IMAGE_URL_PREFIX or not basename: + basename = url_suffix + if not basename: return None data = request.request_body if not data.startswith(IMAGE_DATA_PREFIX): logging.error('Unexpected image format for: %s', basename) - return 400 + return SimpleResponse(400) data = data[len(IMAGE_DATA_PREFIX):] png = base64.b64decode(data) @@ -103,8 +137,47 @@ def handle_possible_post_image(self, request): '%s-%s.png' % (request.host, basename)) if not os.access(self.screenshot_dir, os.W_OK): logging.error('Unable to write to: %s', filename) - return 400 + return SimpleResponse(400) with file(filename, 'w') as f: f.write(png) - return 200 + return SimpleResponse(200) + + def add_server_manager_handler(self, server_manager): + """Add the ability to change the server mode (e.g. to record mode). + Args: + server_manager: a servermanager.ServerManager instance. + """ + self.server_manager = server_manager + self.handlers.append( + (COMMAND_URL_PREFIX, self.handle_server_manager_command)) + + def handle_server_manager_command(self, request, url_suffix): + """Parse special URLs for the embedded server manager command. + + Clients like webpagetest.org can use URLs of this form to change + the replay server from record mode to replay mode. + + This handler is not in the default list of handlers. Call + add_server_manager_handler to add it. + + In the future, this could be expanded to save or serve archive files. + + Args: + request: an ArchivedHttpRequest instance + url_suffix: string that is after the handler prefix (e.g. 'record') + Returns: + On a match, an ArchivedHttpResponse. + Otherwise, None. + """ + command = url_suffix + if command == 'record': + self.server_manager.SetRecordMode() + return SimpleResponse(200) + elif command == 'replay': + self.server_manager.SetReplayMode() + return SimpleResponse(200) + elif command == 'status': + is_record_mode = self.server_manager.IsRecordMode() + return JsonResponse({'is_record_mode': is_record_mode}) + return None diff --git a/wpr/daemonserver.py b/wpr/daemonserver.py old mode 100755 new mode 100644 diff --git a/wpr/deterministic.js b/wpr/deterministic.js new file mode 100644 index 0000000..291c0c5 --- /dev/null +++ b/wpr/deterministic.js @@ -0,0 +1,42 @@ +(function () { + var orig_date = Date; + var random_count = 0; + var date_count = 0; + var random_seed = 0.462; + var time_seed = 1204251968254; + var random_count_threshold = 25; + var date_count_threshold = 25; + Math.random = function() { + random_count++; + if (random_count > random_count_threshold){ + random_seed += 0.1; + random_count = 1; + } + return (random_seed % 1); + }; + Date = function() { + if (this instanceof Date) { + date_count++; + if (date_count > date_count_threshold){ + time_seed += 50; + date_count = 1; + } + switch (arguments.length) { + case 0: return new orig_date(time_seed); + case 1: return new orig_date(arguments[0]); + default: return new orig_date(arguments[0], arguments[1], + arguments.length >= 3 ? arguments[2] : 1, + arguments.length >= 4 ? arguments[3] : 0, + arguments.length >= 5 ? arguments[4] : 0, + arguments.length >= 6 ? arguments[5] : 0, + arguments.length >= 7 ? arguments[6] : 0); + } + } + return new Date().toString(); + }; + Date.__proto__ = orig_date; + Date.prototype.constructor = Date; + orig_date.now = function() { + return new Date().getTime(); + }; +})(); diff --git a/wpr/dnsproxy.py b/wpr/dnsproxy.py old mode 100755 new mode 100644 index 6f940aa..b8fe951 --- a/wpr/dnsproxy.py +++ b/wpr/dnsproxy.py @@ -16,13 +16,16 @@ import daemonserver import errno import logging -import platformsettings import socket import SocketServer import threading import third_party +import dns.flags +import dns.message +import dns.rcode import dns.resolver +import dns.rdatatype import ipaddr @@ -31,18 +34,21 @@ class DnsProxyException(Exception): class RealDnsLookup(object): - def __init__(self, name_servers=None): + def __init__(self, name_servers): + if '127.0.0.1' in name_servers: + raise DnsProxyException( + 'Invalid nameserver: 127.0.0.1 (causes an infinte loop)') self.resolver = dns.resolver.get_default_resolver() - self.resolver.nameservers = [ - platformsettings.get_platform_settings().get_original_primary_dns()] + self.resolver.nameservers = name_servers self.dns_cache_lock = threading.Lock() self.dns_cache = {} - def __call__(self, hostname): + def __call__(self, hostname, rdtype=dns.rdatatype.A): """Return real IP for a host. Args: host: a hostname ending with a period (e.g. "www.google.com.") + rdtype: the query type (1 for 'A', 28 for 'AAAA') Returns: the IP address as a string (e.g. "192.168.25.2") """ @@ -50,54 +56,72 @@ def __call__(self, hostname): ip = self.dns_cache.get(hostname) self.dns_cache_lock.release() if ip: - logging.debug('_real_dns_lookup(%s) cache hit! -> %s', hostname, ip) return ip try: - answers = self.resolver.query(hostname, 'A') - except (dns.resolver.NoAnswer, - dns.resolver.NXDOMAIN, - dns.resolver.Timeout) as ex: + answers = self.resolver.query(hostname, rdtype) + except dns.resolver.NXDOMAIN: + return None + except (dns.resolver.NoAnswer, dns.resolver.Timeout) as ex: logging.debug('_real_dns_lookup(%s) -> None (%s)', hostname, ex.__class__.__name__) return None if answers: ip = str(answers[0]) - logging.debug('_real_dns_lookup(%s) -> %s', hostname, ip) self.dns_cache_lock.acquire() self.dns_cache[hostname] = ip self.dns_cache_lock.release() return ip + def ClearCache(self): + """Clearn the dns cache.""" + self.dns_cache_lock.acquire() + self.dns_cache.clear() + self.dns_cache_lock.release() + -class DnsPrivatePassthroughFilter: - """Allow private hosts to resolve to their real IPs.""" - def __init__(self, real_dns_lookup, skip_passthrough_hosts=()): - """Initialize DnsPrivatePassthroughFilter. +class PrivateIpDnsLookup(object): + """Resolve private hosts to their real IPs and others to the Web proxy IP. + + Hosts in the given http_archive will resolve to the Web proxy IP without + checking the real IP. + + This only supports IPv4 lookups. + """ + def __init__(self, web_proxy_ip, real_dns_lookup, http_archive): + """Initialize PrivateIpDnsLookup. Args: + web_proxy_ip: the IP address returned by __call__ for non-private hosts. real_dns_lookup: a function that resolves a host to an IP. - skip_passthrough_hosts: an iterable of hosts that skip - the private determination (i.e. avoids a real dns lookup - for them). + http_archive: an instance of a HttpArchive + Hosts is in the archive will always resolve to the web_proxy_ip """ + self.web_proxy_ip = web_proxy_ip self.real_dns_lookup = real_dns_lookup - self.skip_passthrough_hosts = set( - host + '.' for host in skip_passthrough_hosts) + self.http_archive = http_archive + self.InitializeArchiveHosts() def __call__(self, host): - """Return real IP for host if private. + """Return real IPv4 for private hosts and Web proxy IP otherwise. Args: host: a hostname ending with a period (e.g. "www.google.com.") Returns: - If private, the real IP address as a string (e.g. 192.168.25.2) - Otherwise, None. + IP address as a string or None (if lookup fails) """ - if host not in self.skip_passthrough_hosts: + ip = self.web_proxy_ip + if host not in self.archive_hosts: real_ip = self.real_dns_lookup(host) - if real_ip and ipaddr.IPv4Address(real_ip).is_private: - return real_ip - return None + if real_ip: + if ipaddr.IPAddress(real_ip).is_private: + ip = real_ip + else: + ip = None + return ip + + def InitializeArchiveHosts(self): + """Recompute the archive_hosts from the http_archive.""" + self.archive_hosts = set('%s.' % req.host for req in self.http_archive) class UdpDnsHandler(SocketServer.DatagramRequestHandler): @@ -110,6 +134,13 @@ class UdpDnsHandler(SocketServer.DatagramRequestHandler): STANDARD_QUERY_OPERATION_CODE = 0 def handle(self): + """Handle a DNS query. + + IPv6 requests (with rdtype AAAA) receive mismatched IPv4 responses + (with rdtype A). To properly support IPv6, the http proxy would + need both types of addresses. By default, Windows XP does not + support IPv6. + """ self.data = self.rfile.read() self.transaction_id = self.data[0] self.flags = self.data[1] @@ -122,15 +153,17 @@ def handle(self): else: logging.debug("DNS request with non-zero operation code: %s", operation_code) - real_ip = self.server.passthrough_filter(self.domain) - if real_ip: - message = 'passthrough' - ip = real_ip + ip = self.server.dns_lookup(self.domain) + if ip is None: + logging.debug('dnsproxy: %s -> NXDOMAIN', self.domain) + response = self.get_dns_no_such_name_response() else: - message = 'handle' - ip = self.server.server_address[0] - logging.debug('dnsproxy: %s(%s) -> %s', message, self.domain, ip) - self.reply(self.get_dns_reply(ip)) + if ip == self.server.server_address[0]: + logging.debug('dnsproxy: %s -> %s (replay web proxy)', self.domain, ip) + else: + logging.debug('dnsproxy: %s -> %s', self.domain, ip) + response = self.get_dns_response(ip) + self.wfile.write(response) @classmethod def _domain(cls, wire_domain): @@ -143,10 +176,7 @@ def _domain(cls, wire_domain): length = ord(wire_domain[index]) return domain - def reply(self, buf): - self.wfile.write(buf) - - def get_dns_reply(self, ip): + def get_dns_response(self, ip): packet = '' if self.domain: packet = ( @@ -164,48 +194,35 @@ def get_dns_reply(self, ip): ) return packet + def get_dns_no_such_name_response(self): + query_message = dns.message.from_wire(self.data) + response_message = dns.message.make_response(query_message) + response_message.flags |= dns.flags.AA | dns.flags.RA + response_message.set_rcode(dns.rcode.NXDOMAIN) + return response_message.to_wire() class DnsProxyServer(SocketServer.ThreadingUDPServer, daemonserver.DaemonServer): - def __init__(self, use_forwarding, passthrough_filter=None, host='', port=53, handler=UdpDnsHandler): + def __init__(self, dns_lookup=None, host='', port=53): """Initialize DnsProxyServer. Args: - use_forwarding: a boolean that if true, changes primary DNS to host. - passthrough_filter: a function that resolves a host to its real IP, - or None, if it should resolve to the dnsproxy's address. + dns_lookup: a function that resolves a host to an IP address. host: a host string (name or IP) to bind the dns proxy and to which DNS requests will be resolved. port: an integer port on which to bind the proxy. """ - self.use_forwarding = use_forwarding - self.passthrough_filter = passthrough_filter or (lambda host: None) - self.platform_settings = platformsettings.get_platform_settings() try: SocketServer.ThreadingUDPServer.__init__( - self, (host, port), handler) + self, (host, port), UdpDnsHandler) except socket.error, (error_number, msg): if error_number == errno.EACCES: raise DnsProxyException( 'Unable to bind DNS server on (%s:%s)' % (host, port)) raise + self.dns_lookup = dns_lookup or (lambda host: self.server_address[0]) logging.info('Started DNS server on %s...', self.server_address) - if self.use_forwarding: - self.platform_settings.set_primary_dns(host) def cleanup(self): - if self.use_forwarding: - self.platform_settings.restore_primary_dns() self.shutdown() logging.info('Shutdown DNS server') - - -class DummyDnsServer(): - def __init__(self, use_forwarding, passthrough_filter=None, host='', port=53): - pass - - def __enter__(self): - pass - - def __exit__(self, unused_exc_type, unused_exc_val, unused_exc_tb): - pass diff --git a/wpr/httparchive.py b/wpr/httparchive.py index 96e25ea..efd693c 100755 --- a/wpr/httparchive.py +++ b/wpr/httparchive.py @@ -32,125 +32,185 @@ """ import difflib +import email.utils +import httplib import httpzlib +import json import logging import optparse import os import persistentmixin -import re import StringIO import subprocess +import sys import tempfile +import urlparse - -HTML_RE = re.compile(r']*>', re.IGNORECASE) -HEAD_RE = re.compile(r']*>', re.IGNORECASE) -DETERMINISTIC_SCRIPT = """ - -""" +import platformsettings class HttpArchiveException(Exception): + """Base class for all exceptions in httparchive.""" pass -class InjectionFailedException(HttpArchiveException): - def __init__(self, text): - self.text = text - - def __str__(self): - return repr(text) - -def _InsertScriptAfter(matchobj): - return matchobj.group(0) + DETERMINISTIC_SCRIPT - class HttpArchive(dict, persistentmixin.PersistentMixin): """Dict with ArchivedHttpRequest keys and ArchivedHttpResponse values. PersistentMixin adds CreateNew(filename), Load(filename), and Persist(). + + Attributes: + server_rtt: dict of {hostname, server rtt in milliseconds} """ - def get_requests(self, command=None, host=None, path=None): - """Retruns a list of all requests matching giving params.""" - return [r for r in self if r.matches(command, host, path)] + def __init__(self): + self.server_rtt = {} + + def get_server_rtt(self, server): + """Retrieves the round trip time (rtt) to the server + + Args: + server: the hostname of the server + + Returns: + round trip time to the server in seconds, or 0 if unavailable + """ + if server not in self.server_rtt: + platform_settings = platformsettings.get_platform_settings() + self.server_rtt[server] = platform_settings.ping(server) + return self.server_rtt[server] + + def get(self, request, default=None): + """Return the archived response for a given request. + + Does extra checking for handling some HTTP request headers. + + Args: + request: instance of ArchivedHttpRequest + default: default value to return if request is not found + + Returns: + Instance of ArchivedHttpResponse or default if no matching + response is found + """ + if request in self: + return self[request] + return self.get_conditional_response(request, default) + + def get_conditional_response(self, request, default): + """Get the response based on the conditional HTTP request headers. + + Args: + request: an ArchivedHttpRequest representing the original request. + default: default ArchivedHttpResponse + original request with matched headers removed. + + Returns: + an ArchivedHttpResponse with a status of 200, 302 (not modified), or + 412 (precondition failed) + """ + response = default + if request.is_conditional(): + stripped_request = request.create_request_without_conditions() + if stripped_request in self: + response = self[stripped_request] + if response.status == 200: + status = self.get_conditional_status(request, response) + if status != 200: + response = create_response(status) + return response + + def get_conditional_status(self, request, response): + status = 200 + last_modified = email.utils.parsedate( + response.get_header_case_insensitive('last-modified')) + response_etag = response.get_header_case_insensitive('etag') + is_get_or_head = request.command.upper() in ('GET', 'HEAD') + + match_value = request.headers.get('if-match', None) + if match_value: + if self.is_etag_match(match_value, response_etag): + status = 200 + else: + status = 412 # precondition failed + none_match_value = request.headers.get('if-none-match', None) + if none_match_value: + if self.is_etag_match(none_match_value, response_etag): + status = 304 + elif is_get_or_head: + status = 200 + else: + status = 412 + if is_get_or_head and last_modified: + for header in ('if-modified-since', 'if-unmodified-since'): + date = email.utils.parsedate(request.headers.get(header, None)) + if date: + if ((header == 'if-modified-since' and last_modified > date) or + (header == 'if-unmodified-since' and last_modified < date)): + if status != 412: + status = 200 + else: + status = 304 # not modified + return status + + def is_etag_match(self, request_etag, response_etag): + """Determines whether the entity tags of the request/response matches. + + Args: + request_etag: the value string of the "if-(none)-match:" + portion of the request header + response_etag: the etag value of the response + + Returns: + True on match, False otherwise + """ + response_etag = response_etag.strip('" ') + for etag in request_etag.split(','): + etag = etag.strip('" ') + if etag in ('*', response_etag): + return True + return False + + def get_requests(self, command=None, host=None, path=None, use_query=True): + """Return a list of requests that match the given args.""" + return [r for r in self if r.matches(command, host, path, + use_query=use_query)] def ls(self, command=None, host=None, path=None): """List all URLs that match given params.""" - out = StringIO.StringIO() - for request in self.get_requests(command, host, path): - print >>out, '%s %s%s %s' % (request.command, request.host, request.path, - request.headers) - return out.getvalue() + return ''.join(sorted( + '%s\n' % r for r in self.get_requests(command, host, path))) def cat(self, command=None, host=None, path=None): """Print the contents of all URLs that match given params.""" out = StringIO.StringIO() for request in self.get_requests(command, host, path): - print >>out, '%s %s %s\nrequest headers:\n' % ( - request.command, request.host, request.path) - for k, v in sorted(request.headers): - print >>out, " %s: %s" % (k, v) + print >>out, str(request) + print >>out, 'Untrimmed request headers:' + for k in request.headers: + print >>out, ' %s: %s' % (k, request.headers[k]) if request.request_body: print >>out, request.request_body - print >>out, '-' * 70 + print >>out, '---- Response Info', '-' * 51 response = self[request] - print >>out, 'Status: %s\nReason: %s\nheaders:\n' % ( - response.status, response.reason) - for k, v in sorted(response.headers): - print >>out, " %s: %s" % (k, v) - headers = dict(response.headers) + chunk_lengths = [len(x) for x in response.response_data] + print >>out, ('Status: %s\n' + 'Reason: %s\n' + 'Headers delay: %s\n' + 'Response headers:') % ( + response.status, response.reason, response.delays['headers']) + for k, v in response.headers: + print >>out, ' %s: %s' % (k, v) + print >>out, ('Chunk count: %s\n' + 'Chunk lengths: %s\n' + 'Chunk delays: %s') % ( + len(chunk_lengths), chunk_lengths, response.delays['data']) body = response.get_data_as_text() + print >>out, '---- Response Data', '-' * 51 if body: - print >>out, '-' * 70 print >>out, body + else: + print >>out, '[binary data]' print >>out, '=' * 70 return out.getvalue() @@ -172,76 +232,209 @@ def edit(self, command=None, host=None, path=None): response = self[matching_requests[0]] tmp_file = tempfile.NamedTemporaryFile(delete=False) - tmp_file.write(response.get_data_as_text()) + tmp_file.write(response.get_response_as_text()) tmp_file.close() subprocess.check_call([editor, tmp_file.name]) - response.set_data(''.join(open(tmp_file.name).readlines())) + response.set_response_from_text(''.join(open(tmp_file.name).readlines())) os.remove(tmp_file.name) + def _format_request_lines(self, req): + """Format request to make diffs easier to read. + + Args: + req: an ArchivedHttpRequest + Returns: + Example: + ['GET www.example.com/path\n', 'Header-Key: header value\n', ...] + """ + parts = ['%s %s%s\n' % (req.command, req.host, req.path)] + if req.request_body: + parts.append('%s\n' % req.request_body) + for k, v in req.trimmed_headers: + k = '-'.join(x.capitalize() for x in k.split('-')) + parts.append('%s: %s\n' % (k, v)) + return parts + + def find_closest_request(self, request, use_path=False): + """Find the closest matching request in the archive to the given request. + + Args: + request: an ArchivedHttpRequest + use_path: If True, closest matching request's path component must match. + (Note: this refers to the 'path' component within the URL, not the + query string component.) + If use_path=False, candidate will NOT match in example below + e.g. request = GET www.test.com/path?aaa + candidate = GET www.test.com/diffpath?aaa + Returns: + If a close match is found, return the instance of ArchivedHttpRequest. + Otherwise, return None. + """ + best_match = None + request_lines = self._format_request_lines(request) + matcher = difflib.SequenceMatcher(b=''.join(request_lines)) + path = None + if use_path: + path = request.path + for candidate in self.get_requests(request.command, request.host, path, + use_query=not use_path): + candidate_lines = self._format_request_lines(candidate) + matcher.set_seq1(''.join(candidate_lines)) + best_match = max(best_match, (matcher.ratio(), candidate)) + if best_match: + return best_match[1] + return None + def diff(self, request): - request_repr = request.verbose_repr() - best_similarity = None - best_candidate_repr = None - for candidate in self.get_requests(request.command, request.host): - candidate_repr = candidate.verbose_repr() - similarity = difflib.SequenceMatcher(a=request_repr, - b=candidate_repr).ratio() - if best_similarity is None or similarity > best_similarity: - best_similarity = similarity - best_candidate_repr = candidate_repr - - delta = None - if best_candidate_repr: - delta = ''.join(difflib.ndiff(best_candidate_repr.splitlines(1), - request_repr.splitlines(1))) - return delta + """Diff the given request to the closest matching request in the archive. + + Args: + request: an ArchivedHttpRequest + Returns: + If a close match is found, return a textual diff between the requests. + Otherwise, return None. + """ + request_lines = self._format_request_lines(request) + closest_request = self.find_closest_request(request) + if closest_request: + closest_request_lines = self._format_request_lines(closest_request) + return ''.join(difflib.ndiff(closest_request_lines, request_lines)) + return None class ArchivedHttpRequest(object): - def __init__(self, command, host, path, request_body, headers): + """Record all the state that goes into a request. + + ArchivedHttpRequest instances are considered immutable so they can + serve as keys for HttpArchive instances. + (The immutability is not enforced.) + + Upon creation, the headers are "trimmed" (i.e. edited or dropped) + and saved to self.trimmed_headers to allow requests to match in a wider + variety of playback situations (e.g. using different user agents). + + For unpickling, 'trimmed_headers' is recreated from 'headers'. That + allows for changes to the trim function and can help with debugging. + """ + CONDITIONAL_HEADERS = [ + 'if-none-match', 'if-match', + 'if-modified-since', 'if-unmodified-since'] + + def __init__(self, command, host, path, request_body, headers, is_ssl=False): + """Initialize an ArchivedHttpRequest. + + Args: + command: a string (e.g. 'GET' or 'POST'). + host: a host name (e.g. 'www.google.com'). + path: a request path (e.g. '/search?q=dogs'). + request_body: a request body string for a POST or None. + headers: {key: value, ...} where key and value are strings. + is_ssl: a boolean which is True iff request is make via SSL. + """ self.command = command self.host = host self.path = path self.request_body = request_body - self.headers = self._FuzzHeaders(headers) + self.headers = headers + self.is_ssl = is_ssl + self.trimmed_headers = self._TrimHeaders(headers) + + def __str__(self): + scheme = 'https' if self.is_ssl else 'http' + return '%s %s://%s%s %s' % ( + self.command, scheme, self.host, self.path, self.trimmed_headers) def __repr__(self): return repr((self.command, self.host, self.path, self.request_body, - self.headers)) + self.trimmed_headers, self.is_ssl)) def __hash__(self): - return hash(self.__repr__()) + """Return a integer hash to use for hashed collections including dict.""" + return hash(repr(self)) def __eq__(self, other): - return self.__repr__() == other.__repr__() + """Define the __eq__ method to match the hash behavior.""" + return repr(self) == repr(other) def __setstate__(self, state): + """Influence how to unpickle. + + "headers" are the original request headers. + "trimmed_headers" are the trimmed headers used for matching requests + during replay. + + Args: + state: a dictionary for __dict__ + """ + if 'full_headers' in state: + # Fix older version of archive. + state['headers'] = state['full_headers'] + del state['full_headers'] if 'headers' not in state: - error_msg = ('Archived HTTP requests are missing headers. Your HTTP ' - 'archive is likely from a previous version and must be ' - 'recorded again.') - raise Exception(error_msg) - self.__dict__ = state - - def matches(self, command=None, host=None, path=None): - """Returns true iff the request matches all parameters.""" + raise HttpArchiveException( + 'Archived HTTP request is missing "headers". The HTTP archive is' + ' likely from a previous version and must be re-recorded.') + state['trimmed_headers'] = self._TrimHeaders(dict(state['headers'])) + if 'is_ssl' not in state: + state['is_ssl'] = False + self.__dict__.update(state) + + def __getstate__(self): + """Influence how to pickle. + + Returns: + a dict to use for pickling + """ + state = self.__dict__.copy() + del state['trimmed_headers'] + return state + + def matches(self, command=None, host=None, path_with_query=None, + use_query=True): + """Returns true iff the request matches all parameters. + + Args: + command: a string (e.g. 'GET' or 'POST'). + host: a host name (e.g. 'www.google.com'). + path_with_query: a request path with query string (e.g. '/search?q=dogs') + use_query: + If use_query is True, request matching uses both the hierarchical path + and query string component. + If use_query is False, request matching only uses the hierarchical path + + e.g. req1 = GET www.test.com/index?aaaa + req2 = GET www.test.com/index?bbbb + + If use_query is True, req1.matches(req2) evaluates to False + If use_query is False, req1.matches(req2) evaluates to True + + Returns: + True iff the request matches all parameters + """ + path_match = path_with_query == self.path + if not use_query: + self_path = urlparse.urlparse('http://%s%s' % ( + self.host or '', self.path or '')).path + other_path = urlparse.urlparse('http://%s%s' % ( + host or '', path_with_query or '')).path + path_match = self_path == other_path return ((command is None or command == self.command) and (host is None or host == self.host) and - (path is None or path == self.path)) - - def verbose_repr(self): - return '\n'.join([str(x) for x in - [self.command, self.host, self.path, self.request_body] + self.headers]) + (path_with_query is None or path_match)) - def _FuzzHeaders(self, headers): + @classmethod + def _TrimHeaders(cls, headers): """Removes headers that are known to cause problems during replay. These headers are removed for the following reasons: - accept: Causes problems with www.bing.com. During record, CSS is fetched with *. During replay, it's text/css. + - accept-charset, accept-language, referer: vary between clients. - connection, method, scheme, url, version: Cause problems with spdy. - cookie: Extremely sensitive to request/response order. + - keep-alive: Not supported by Web Page Replay. - user-agent: Changes with every Chrome version. + - proxy-connection: Sent for proxy requests. Another variant to consider is dropping only the value from the header. However, this is particularly bad for the cookie header, because the @@ -249,53 +442,131 @@ def _FuzzHeaders(self, headers): is made. Args: - headers: Dictionary of String -> String headers to values. + headers: {header_key: header_value, ...} Returns: - Dictionary of headers, with undesirable headers removed. + [(header_key, header_value), ...] # (with undesirable headers removed) """ - fuzzed_headers = headers.copy() - undesirable_keys = ['accept', 'connection', 'cookie', 'method', 'scheme', - 'url', 'version', 'user-agent'] - keys_to_delete = [] - for key in fuzzed_headers: - if key.lower() in undesirable_keys: - keys_to_delete.append(key) - for key in keys_to_delete: - del fuzzed_headers[key] - return [(k, fuzzed_headers[k]) for k in sorted(fuzzed_headers.keys())] - + # TODO(tonyg): Strip sdch from the request headers because we can't + # guarantee that the dictionary will be recorded, so replay may not work. + if 'accept-encoding' in headers: + headers['accept-encoding'] = headers['accept-encoding'].replace( + 'sdch', '') + # A little clean-up + if headers['accept-encoding'].endswith(','): + headers['accept-encoding'] = headers['accept-encoding'][:-1] + undesirable_keys = [ + 'accept', 'accept-charset', 'accept-language', + 'connection', 'cookie', 'keep-alive', 'method', + 'referer', 'scheme', 'url', 'version', 'user-agent', 'proxy-connection'] + return sorted([(k, v) for k, v in headers.items() + if k.lower() not in undesirable_keys]) + + def is_conditional(self): + """Return list of headers that match conditional headers.""" + for header in self.CONDITIONAL_HEADERS: + if header in self.headers: + return True + return False + + def create_request_without_conditions(self): + stripped_headers = dict((k, v) for k, v in self.headers.iteritems() + if k.lower() not in self.CONDITIONAL_HEADERS) + return ArchivedHttpRequest( + self.command, self.host, self.path, self.request_body, + stripped_headers, self.is_ssl) class ArchivedHttpResponse(object): - """HTTPResponse objects. - - ArchivedHttpReponse instances have the following attributes: - version: HTTP protocol version used by server. - 10 for HTTP/1.0, 11 for HTTP/1.1 (same as httplib). - status: Status code returned by server (e.g. 200). - reason: Reason phrase returned by server (e.g. "OK"). - headers: list of (header, value) tuples. - response_data: list of content chunks. Concatenating all the content chunks - gives the complete contents (i.e. the chunks do not have any lengths or - delimiters). - """ + """All the data needed to recreate all HTTP response.""" # CHUNK_EDIT_SEPARATOR is used to edit and view text content. # It is not sent in responses. It is added by get_data_as_text() # and removed by set_data(). CHUNK_EDIT_SEPARATOR = '[WEB_PAGE_REPLAY_CHUNK_BOUNDARY]' - def __init__(self, version, status, reason, headers, response_data): + # DELAY_EDIT_SEPARATOR is used to edit and view server delays. + DELAY_EDIT_SEPARATOR = ('\n[WEB_PAGE_REPLAY_EDIT_ARCHIVE --- ' + 'Delays are above. Response content is below.]\n') + + def __init__(self, version, status, reason, headers, response_data, + delays=None): + """Initialize an ArchivedHttpResponse. + + Args: + version: HTTP protocol version used by server. + 10 for HTTP/1.0, 11 for HTTP/1.1 (same as httplib). + status: Status code returned by server (e.g. 200). + reason: Reason phrase returned by server (e.g. "OK"). + headers: list of (header, value) tuples. + response_data: list of content chunks. + Concatenating the chunks gives the complete contents + (i.e. the chunks do not have any lengths or delimiters). + Do not include the final, zero-length chunk that marks the end. + delays: dict of (ms) delays before "headers" and "data". For example, + {'headers': 50, 'data': [0, 10, 10]} + """ self.version = version self.status = status self.reason = reason self.headers = headers self.response_data = response_data + self.delays = delays + self.fix_delays() + + def fix_delays(self): + """Initialize delays, or check the number of data delays.""" + expected_num_delays = len(self.response_data) + if not self.delays: + self.delays = { + 'headers': 0, + 'data': [0] * expected_num_delays + } + else: + num_delays = len(self.delays['data']) + if num_delays != expected_num_delays: + raise HttpArchiveException( + 'Server delay length mismatch: %d (expected %d): %s', + num_delays, expected_num_delays, self.delays['data']) + + def __repr__(self): + return repr((self.version, self.status, self.reason, sorted(self.headers), + self.response_data)) - def get_header(self, key): + def __hash__(self): + """Return a integer hash to use for hashed collections including dict.""" + return hash(repr(self)) + + def __eq__(self, other): + """Define the __eq__ method to match the hash behavior.""" + return repr(self) == repr(other) + + def __setstate__(self, state): + """Influence how to unpickle. + + Args: + state: a dictionary for __dict__ + """ + if 'server_delays' in state: + state['delays'] = { + 'headers': 0, + 'data': state['server_delays'] + } + del state['server_delays'] + elif 'delays' not in state: + state['delays'] = None + self.__dict__.update(state) + self.fix_delays() + + def get_header(self, key, default=None): for k, v in self.headers: if key == k: return v + return default + + def get_header_case_insensitive(self, key): + for k, v in self.headers: + if key.lower() == k.lower(): + return v return None def set_header(self, key, value): @@ -317,6 +588,9 @@ def is_gzip(self): def is_compressed(self): return self.get_header('content-encoding') in ('gzip', 'deflate') + def is_chunked(self): + return self.get_header('transfer-encoding') == 'chunked' + def get_data_as_text(self): """Return content as a single string. @@ -334,8 +608,25 @@ def get_data_as_text(self): uncompressed_chunks = self.response_data return self.CHUNK_EDIT_SEPARATOR.join(uncompressed_chunks) + def get_delays_as_text(self): + """Return delays as editable text.""" + return json.dumps(self.delays, indent=2) + + def get_response_as_text(self): + """Returns response content as a single string. + + Server delays are separated on a per-chunk basis. Delays are in seconds. + Response content begins after DELAY_EDIT_SEPARATOR + """ + data = self.get_data_as_text() + if data is None: + logging.warning('Data can not be represented as text.') + data = '' + delays = self.get_delays_as_text() + return self.DELAY_EDIT_SEPARATOR.join((delays, data)) + def set_data(self, text): - """Inverse of set_data_as_text(). + """Inverse of get_data_as_text(). Split on CHUNK_EDIT_SEPARATOR and compress if needed. """ @@ -344,26 +635,55 @@ def set_data(self, text): self.response_data = httpzlib.compress_chunks(text_chunks, self.is_gzip()) else: self.response_data = text_chunks - if not self.get_header('transfer-encoding'): + if not self.is_chunked(): content_length = sum(len(c) for c in self.response_data) self.set_header('content-length', str(content_length)) - def inject_deterministic_script(self): - """Inject deterministic script immediately after or .""" - content_type = self.get_header('content-type') - if not content_type or not content_type.startswith('text/html'): + def set_delays(self, delays_text): + """Inverse of get_delays_as_text(). + + Args: + delays_text: JSON encoded text such as the following: + { + headers: 80, + data: [6, 55, 0] + } + Times are in milliseconds. + Each data delay corresponds with one response_data value. + """ + try: + self.delays = json.loads(delays_text) + except (ValueError, KeyError) as e: + logging.critical('Unable to parse delays %s: %s', delays_text, e) + self.fix_delays() + + def set_response_from_text(self, text): + """Inverse of get_response_as_text(). + + Modifies the state of the archive according to the textual representation. + """ + try: + delays, data = text.split(self.DELAY_EDIT_SEPARATOR) + except ValueError: + logging.critical( + 'Error parsing text representation. Skipping edits.') return - text = self.get_data_as_text() - if text: - text, is_injected = HEAD_RE.subn(_InsertScriptAfter, text, 1) - if not is_injected: - text, is_injected = HTML_RE.subn(_InsertScriptAfter, text, 1) - if not is_injected: - raise InjectionFailedException(text) - self.set_data(text) + self.set_delays(delays) + self.set_data(data) -if __name__ == '__main__': +def create_response(status, reason=None, headers=None, body=None): + """Convenience method for creating simple ArchivedHttpResponse objects.""" + if reason is None: + reason = httplib.responses.get(status, 'Unknown') + if headers is None: + headers = [('content-type', 'text/plain')] + if body is None: + body = "%s %s" % (status, reason) + return ArchivedHttpResponse(11, status, reason, headers, [body]) + + +def main(): class PlainHelpFormatter(optparse.IndentedHelpFormatter): def format_description(self, description): if description: @@ -412,3 +732,8 @@ def format_description(self, description): http_archive.Persist(replay_file) else: option_parser.error('Unknown command "%s"' % command) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/wpr/httparchive_test.py b/wpr/httparchive_test.py new file mode 100755 index 0000000..5a7b759 --- /dev/null +++ b/wpr/httparchive_test.py @@ -0,0 +1,347 @@ +#!/usr/bin/env python +# Copyright 2011 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import httparchive +import os +import unittest + + +def create_request(headers): + return httparchive.ArchivedHttpRequest( + 'GET', 'www.test.com', '/', None, headers) + +def create_response(headers): + return httparchive.ArchivedHttpResponse( + 11, 200, 'OK', headers, '') + + +class HttpArchiveTest(unittest.TestCase): + + REQUEST_HEADERS = {} + REQUEST = create_request(REQUEST_HEADERS) + + # Used for if-(un)modified-since checks + DATE_PAST = 'Wed, 13 Jul 2011 03:58:08 GMT' + DATE_PRESENT = 'Wed, 20 Jul 2011 04:58:08 GMT' + DATE_FUTURE = 'Wed, 27 Jul 2011 05:58:08 GMT' + DATE_INVALID = 'This is an invalid date!!' + + # etag values + ETAG_VALID = 'etag' + ETAG_INVALID = 'This is an invalid etag value!!' + + RESPONSE_HEADERS = [('last-modified', DATE_PRESENT), ('etag', ETAG_VALID)] + RESPONSE = create_response(RESPONSE_HEADERS) + + def setUp(self): + self.archive = httparchive.HttpArchive() + self.archive[self.REQUEST] = self.RESPONSE + + # Also add an identical POST request for testing + request = httparchive.ArchivedHttpRequest( + 'POST', 'www.test.com', '/', None, self.REQUEST_HEADERS) + self.archive[request] = self.RESPONSE + + def tearDown(self): + pass + + def test_init(self): + archive = httparchive.HttpArchive() + self.assertEqual(len(archive), 0) + + def test__TrimHeaders(self): + request = httparchive.ArchivedHttpRequest + header1 = {'accept-encoding': 'gzip,deflate'} + self.assertEqual(request._TrimHeaders(header1), + [(k, v) for k, v in header1.items()]) + + header2 = {'referer': 'www.google.com'} + self.assertEqual(request._TrimHeaders(header2), []) + + header3 = {'referer': 'www.google.com', 'cookie': 'cookie_monster!', + 'hello': 'world'} + self.assertEqual(request._TrimHeaders(header3), [('hello', 'world')]) + + def test_matches(self): + headers = {} + request1 = httparchive.ArchivedHttpRequest( + 'GET', 'www.test.com', '/index.html?hello=world', None, headers) + request2 = httparchive.ArchivedHttpRequest( + 'GET', 'www.test.com', '/index.html?foo=bar', None, headers) + + self.assert_(not request1.matches( + request2.command, request2.host, request2.path, use_query=True)) + self.assert_(request1.matches( + request2.command, request2.host, request2.path, use_query=False)) + + self.assert_(request1.matches( + request2.command, request2.host, None, use_query=True)) + self.assert_(request1.matches( + request2.command, None, request2.path, use_query=False)) + + empty_request = httparchive.ArchivedHttpRequest( + None, None, None, None, headers) + self.assert_(not empty_request.matches( + request2.command, request2.host, None, use_query=True)) + self.assert_(not empty_request.matches( + request2.command, None, request2.path, use_query=False)) + + def setup_find_closest_request(self): + headers = {} + request1 = httparchive.ArchivedHttpRequest( + 'GET', 'www.test.com', '/a?hello=world', None, headers) + request2 = httparchive.ArchivedHttpRequest( + 'GET', 'www.test.com', '/a?foo=bar', None, headers) + request3 = httparchive.ArchivedHttpRequest( + 'GET', 'www.test.com', '/b?hello=world', None, headers) + + archive = httparchive.HttpArchive() + # Add requests 2 and 3 and find closest match with request1 + archive[request2] = self.RESPONSE + archive[request3] = self.RESPONSE + + return archive, request1, request2, request3 + + def test_find_closest_request(self): + archive, request1, request2, request3 = self.setup_find_closest_request() + + # Request 3 is the closest match to request 1 + self.assertEqual( + request3, archive.find_closest_request(request1, use_path=False)) + # However, if we match strictly on path, request2 is the only match + self.assertEqual( + request2, archive.find_closest_request(request1, use_path=True)) + + def test_find_closest_request_delete_simple(self): + archive, request1, request2, request3 = self.setup_find_closest_request() + + del archive[request3] + self.assertEqual( + request2, archive.find_closest_request(request1, use_path=False)) + self.assertEqual( + request2, archive.find_closest_request(request1, use_path=True)) + + def test_find_closest_request_delete_complex(self): + archive, request1, request2, request3 = self.setup_find_closest_request() + + del archive[request2] + self.assertEqual( + request3, archive.find_closest_request(request1, use_path=False)) + self.assertEqual( + None, archive.find_closest_request(request1, use_path=True)) + + def test_get_simple(self): + request = self.REQUEST + response = self.RESPONSE + archive = self.archive + + self.assertEqual(archive.get(request), response) + + false_request_headers = {'foo': 'bar'} + false_request = create_request(false_request_headers) + self.assertEqual(archive.get(false_request, default=None), None) + + def test_get_modified_headers(self): + request = self.REQUEST + response = self.RESPONSE + archive = self.archive + not_modified_response = httparchive.create_response(304) + + # Fail check and return response again + request_headers = {'if-modified-since': self.DATE_PAST} + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + # Succeed check and return 304 Not Modified + request_headers = {'if-modified-since': self.DATE_FUTURE} + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + # Succeed check and return 304 Not Modified + request_headers = {'if-modified-since': self.DATE_PRESENT} + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + # Invalid date, fail check and return response again + request_headers = {'if-modified-since': self.DATE_INVALID} + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + # fail check since the request is not a GET or HEAD request (as per RFC) + request_headers = {'if-modified-since': self.DATE_FUTURE} + request = httparchive.ArchivedHttpRequest( + 'POST', 'www.test.com', '/', None, request_headers) + self.assertEqual(archive.get(request), response) + + def test_get_unmodified_headers(self): + request = self.REQUEST + response = self.RESPONSE + archive = self.archive + not_modified_response = httparchive.create_response(304) + + # Succeed check + request_headers = {'if-unmodified-since': self.DATE_PAST} + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + # Fail check + request_headers = {'if-unmodified-since': self.DATE_FUTURE} + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + # Succeed check + request_headers = {'if-unmodified-since': self.DATE_PRESENT} + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + # Fail check + request_headers = {'if-unmodified-since': self.DATE_INVALID} + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + # Fail check since the request is not a GET or HEAD request (as per RFC) + request_headers = {'if-modified-since': self.DATE_PAST} + request = httparchive.ArchivedHttpRequest( + 'POST', 'www.test.com', '/', None, request_headers) + self.assertEqual(archive.get(request), response) + + def test_get_etags(self): + request = self.REQUEST + response = self.RESPONSE + archive = self.archive + not_modified_response = httparchive.create_response(304) + precondition_failed_response = httparchive.create_response(412) + + # if-match headers + request_headers = {'if-match': self.ETAG_VALID} + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + request_headers = {'if-match': self.ETAG_INVALID} + request = create_request(request_headers) + self.assertEqual(archive.get(request), precondition_failed_response) + + # if-none-match headers + request_headers = {'if-none-match': self.ETAG_VALID} + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + request_headers = {'if-none-match': self.ETAG_INVALID} + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + def test_get_multiple_match_headers(self): + request = self.REQUEST + response = self.RESPONSE + archive = self.archive + not_modified_response = httparchive.create_response(304) + precondition_failed_response = httparchive.create_response(412) + + # if-match headers + # If the request would, without the If-Match header field, + # result in anything other than a 2xx or 412 status, + # then the If-Match header MUST be ignored. + + request_headers = { + 'if-match': self.ETAG_VALID, + 'if-modified-since': self.DATE_PAST, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + # Invalid etag, precondition failed + request_headers = { + 'if-match': self.ETAG_INVALID, + 'if-modified-since': self.DATE_PAST, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), precondition_failed_response) + + # 304 response; ignore if-match header + request_headers = { + 'if-match': self.ETAG_VALID, + 'if-modified-since': self.DATE_FUTURE, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + # 304 response; ignore if-match header + request_headers = { + 'if-match': self.ETAG_INVALID, + 'if-modified-since': self.DATE_PRESENT, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + # Invalid etag, precondition failed + request_headers = { + 'if-match': self.ETAG_INVALID, + 'if-modified-since': self.DATE_INVALID, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), precondition_failed_response) + + def test_get_multiple_none_match_headers(self): + request = self.REQUEST + response = self.RESPONSE + archive = self.archive + not_modified_response = httparchive.create_response(304) + precondition_failed_response = httparchive.create_response(412) + + # if-none-match headers + # If the request would, without the If-None-Match header field, + # result in anything other than a 2xx or 304 status, + # then the If-None-Match header MUST be ignored. + + request_headers = { + 'if-none-match': self.ETAG_VALID, + 'if-modified-since': self.DATE_PAST, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + request_headers = { + 'if-none-match': self.ETAG_INVALID, + 'if-modified-since': self.DATE_PAST, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + # etag match, precondition failed + request_headers = { + 'if-none-match': self.ETAG_VALID, + 'if-modified-since': self.DATE_FUTURE, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + request_headers = { + 'if-none-match': self.ETAG_INVALID, + 'if-modified-since': self.DATE_PRESENT, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + request_headers = { + 'if-none-match': self.ETAG_INVALID, + 'if-modified-since': self.DATE_INVALID, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + +if __name__ == '__main__': + unittest.main() diff --git a/wpr/httpclient.py b/wpr/httpclient.py old mode 100755 new mode 100644 index 2790288..7bcc3a5 --- a/wpr/httpclient.py +++ b/wpr/httpclient.py @@ -15,9 +15,71 @@ """Retrieve web resources over http.""" +import copy import httparchive import httplib import logging +import os +import platformsettings +import re +import util + + +HTML_RE = re.compile(r'^.{,256}?', re.IGNORECASE | re.DOTALL) +HEAD_RE = re.compile(r'^.{,256}?', re.IGNORECASE | re.DOTALL) +TIMER = platformsettings.get_platform_settings().timer + + +class HttpClientException(Exception): + """Base class for all exceptions in httpclient.""" + pass + + +def GetInjectScript(scripts): + """Loads |scripts| from disk and returns a string of their content.""" + lines = [] + for script in scripts: + if os.path.exists(script): + lines += open(script).read() + elif util.resource_exists(script): + lines += util.resource_string(script) + else: + raise HttpClientException('Script does not exist: %s', script) + return ''.join(lines) + + +def _InjectScripts(response, inject_script): + """Injects |inject_script| immediately after or . + + Copies |response| if it is modified. + + Args: + response: an ArchivedHttpResponse + inject_script: JavaScript string (e.g. "Math.random = function(){...}") + Returns: + an ArchivedHttpResponse + """ + if type(response) == tuple: + logging.warn('tuple response: %s', response) + content_type = response.get_header('content-type') + if content_type and content_type.startswith('text/html'): + text = response.get_data_as_text() + + def InsertScriptAfter(matchobj): + return '%s' % (matchobj.group(0), inject_script) + + if text and not inject_script in text: + text, is_injected = HEAD_RE.subn(InsertScriptAfter, text, 1) + if not is_injected: + text, is_injected = HTML_RE.subn(InsertScriptAfter, text, 1) + if not is_injected: + logging.warning('Failed to inject scripts.') + logging.debug('Response content: %s', text) + else: + response = copy.deepcopy(response) + response.set_data(text) + return response + class DetailedHTTPResponse(httplib.HTTPResponse): """Preserve details relevant to replaying responses. @@ -27,21 +89,31 @@ class DetailedHTTPResponse(httplib.HTTPResponse): """ def read_chunks(self): - """Return an array of data. + """Return the response body content and timing data. - The returned chunked have the chunk size and CRLFs stripped off. + The returned chunks have the chunk size and CRLFs stripped off. If the response was compressed, the returned data is still compressed. Returns: - [response_body] # non-chunked responses - [response_body_chunk_1, response_body_chunk_2, ...] # chunked responses + (chunks, delays) + chunks: + [response_body] # non-chunked responses + [chunk_1, chunk_2, ...] # chunked responses + delays: + [0] # non-chunked responses + [chunk_1_first_byte_delay, ...] # chunked responses + + The delay for the first body item should be recorded by the caller. """ buf = [] + chunks = [] + delays = [] if not self.chunked: - chunks = [self.read()] + chunks.append(self.read()) + delays.append(0) else: + start = TIMER() try: - chunks = [] while True: line = self.fp.readline() chunk_size = self._read_chunk_size(line) @@ -49,8 +121,10 @@ def read_chunks(self): raise httplib.IncompleteRead(''.join(chunks)) if chunk_size == 0: break + delays.append(TIMER() - start) chunks.append(self._safe_read(chunk_size)) self._safe_read(2) # skip the CRLF at the end of the chunk + start = TIMER() # Ignore any trailers. while True: @@ -59,7 +133,7 @@ def read_chunks(self): break finally: self.close() - return chunks + return chunks, delays @classmethod def _read_chunk_size(cls, line): @@ -78,118 +152,223 @@ class DetailedHTTPConnection(httplib.HTTPConnection): response_class = DetailedHTTPResponse +class DetailedHTTPSResponse(DetailedHTTPResponse): + """Preserve details relevant to replaying SSL responses.""" + pass + +class DetailedHTTPSConnection(httplib.HTTPSConnection): + """Preserve details relevant to replaying SSL connections.""" + response_class = DetailedHTTPSResponse + + class RealHttpFetch(object): - def __init__(self, real_dns_lookup): + def __init__(self, real_dns_lookup, get_server_rtt): + """Initialize RealHttpFetch. + + Args: + real_dns_lookup: a function that resolves a host to an IP. + get_server_rtt: a function that returns the round-trip time of a host. + """ self._real_dns_lookup = real_dns_lookup + self._get_server_rtt = get_server_rtt - def __call__(self, request, headers): - """Fetch an HTTP request and return the response and response_body. + def __call__(self, request): + """Fetch an HTTP request. Args: - request: an instance of an ArchivedHttpRequest - headers: a dict of HTTP headers + request: an ArchivedHttpRequest Returns: - (instance of httplib.HTTPResponse, - [response_body_chunk_1, response_body_chunk_2, ...]) - # If the response did not use chunked encoding, there is only one chunk. + an ArchivedHttpResponse """ - # TODO(tonyg): Strip sdch from the request headers because we can't - # guarantee that the dictionary will be recorded, so replay may not work. - if 'accept-encoding' in headers: - headers['accept-encoding'] = headers['accept-encoding'].replace( - 'sdch', '') - - logging.debug('RealHttpRequest: %s %s', request.host, request.path) + logging.debug('RealHttpFetch: %s %s', request.host, request.path) host_ip = self._real_dns_lookup(request.host) if not host_ip: logging.critical('Unable to find host ip for name: %s', request.host) - return None, None - try: - connection = DetailedHTTPConnection(host_ip) - connection.request( - request.command, - request.path, - request.request_body, - headers) - response = connection.getresponse() - chunks = response.read_chunks() - return response, chunks - except Exception, e: - logging.critical('Could not fetch %s: %s', request, e) - import traceback - logging.critical(traceback.format_exc()) - return None, None + return None + retries = 3 + while True: + try: + if request.is_ssl: + connection = DetailedHTTPSConnection(host_ip) + else: + connection = DetailedHTTPConnection(host_ip) + start = TIMER() + connection.request( + request.command, + request.path, + request.request_body, + request.headers) + response = connection.getresponse() + headers_delay = int((TIMER() - start) * 1000) + headers_delay -= self._get_server_rtt(request.host) + + chunks, chunk_delays = response.read_chunks() + delays = { + 'headers': headers_delay, + 'data': chunk_delays + } + archived_http_response = httparchive.ArchivedHttpResponse( + response.version, + response.status, + response.reason, + response.getheaders(), + chunks, + delays) + return archived_http_response + except Exception, e: + if retries: + retries -= 1 + logging.warning('Retrying fetch %s: %s', request, e) + continue + logging.critical('Could not fetch %s: %s', request, e) + return None class RecordHttpArchiveFetch(object): """Make real HTTP fetches and save responses in the given HttpArchive.""" - def __init__(self, http_archive, real_dns_lookup, use_deterministic_script): + def __init__(self, http_archive, real_dns_lookup, inject_script, + cache_misses=None): """Initialize RecordHttpArchiveFetch. Args: - http_archve: an instance of a HttpArchive + http_archive: an instance of a HttpArchive real_dns_lookup: a function that resolves a host to an IP. - use_deterministic_script: If True, attempt to inject a script, - when appropriate, to make JavaScript more deterministic. + inject_script: script string to inject in all pages + cache_misses: instance of CacheMissArchive """ self.http_archive = http_archive - self.real_http_fetch = RealHttpFetch(real_dns_lookup) - self.use_deterministic_script = use_deterministic_script + self.real_http_fetch = RealHttpFetch(real_dns_lookup, + http_archive.get_server_rtt) + self.inject_script = inject_script + self.cache_misses = cache_misses - def __call__(self, request, request_headers): + def __call__(self, request): """Fetch the request and return the response. Args: - request: an instance of an ArchivedHttpRequest. - request_headers: a dict of HTTP headers. + request: an ArchivedHttpRequest. + Returns: + an ArchivedHttpResponse """ - response, response_chunks = self.real_http_fetch(request, request_headers) - if response is None: - return None - archived_http_response = httparchive.ArchivedHttpResponse( - response.version, - response.status, - response.reason, - response.getheaders(), - response_chunks) - if self.use_deterministic_script: - try: - archived_http_response.inject_deterministic_script() - except httparchive.InjectionFailedException as err: - logging.error('Failed to inject deterministic script for %s', request) - logging.debug('Request content: %s', err.text) + if self.cache_misses: + self.cache_misses.record_request( + request, is_record_mode=True, is_cache_miss=False) + + # If request is already in the archive, return the archived response. + if request in self.http_archive: + logging.debug('Repeated request found: %s', request) + response = self.http_archive[request] + else: + response = self.real_http_fetch(request) + if response is None: + return None + self.http_archive[request] = response + if self.inject_script: + response = _InjectScripts(response, self.inject_script) logging.debug('Recorded: %s', request) - self.http_archive[request] = archived_http_response - return archived_http_response + return response class ReplayHttpArchiveFetch(object): """Serve responses from the given HttpArchive.""" - def __init__(self, http_archive, use_diff_on_unknown_requests=False): + def __init__(self, http_archive, inject_script, + use_diff_on_unknown_requests=False, cache_misses=None, + use_closest_match=False): """Initialize ReplayHttpArchiveFetch. Args: - http_archve: an instance of a HttpArchive + http_archive: an instance of a HttpArchive + inject_script: script string to inject in all pages use_diff_on_unknown_requests: If True, log unknown requests with a diff to requests that look similar. + cache_misses: Instance of CacheMissArchive. + Callback updates archive on cache misses + use_closest_match: If True, on replay mode, serve the closest match + in the archive instead of giving a 404. """ self.http_archive = http_archive + self.inject_script = inject_script self.use_diff_on_unknown_requests = use_diff_on_unknown_requests + self.cache_misses = cache_misses + self.use_closest_match = use_closest_match - def __call__(self, request, request_headers=None): + def __call__(self, request): """Fetch the request and return the response. Args: request: an instance of an ArchivedHttpRequest. - request_headers: a dict of HTTP headers. + Returns: + Instance of ArchivedHttpResponse (if found) or None """ response = self.http_archive.get(request) + + if self.use_closest_match and not response: + closest_request = self.http_archive.find_closest_request( + request, use_path=True) + if closest_request: + response = self.http_archive.get(closest_request) + if response: + logging.info('Request not found: %s\nUsing closest match: %s', + request, closest_request) + + if self.cache_misses: + self.cache_misses.record_request( + request, is_record_mode=False, is_cache_miss=not response) + if not response: + reason = str(request) if self.use_diff_on_unknown_requests: - reason = self.http_archive.diff(request) or request - else: - reason = request + diff = self.http_archive.diff(request) + if diff: + reason += ( + "\nNearest request diff " + "('-' for archived request, '+' for current request):\n%s" % diff) logging.warning('Could not replay: %s', reason) + else: + response = _InjectScripts(response, self.inject_script) return response + + +class ControllableHttpArchiveFetch(object): + """Controllable fetch function that can swap between record and replay.""" + + def __init__(self, http_archive, real_dns_lookup, + inject_script, use_diff_on_unknown_requests, + use_record_mode, cache_misses, use_closest_match): + """Initialize HttpArchiveFetch. + + Args: + http_archive: an instance of a HttpArchive + real_dns_lookup: a function that resolves a host to an IP. + inject_script: script string to inject in all pages. + use_diff_on_unknown_requests: If True, log unknown requests + with a diff to requests that look similar. + use_record_mode: If True, start in server in record mode. + cache_misses: Instance of CacheMissArchive. + use_closest_match: If True, on replay mode, serve the closest match + in the archive instead of giving a 404. + """ + self.record_fetch = RecordHttpArchiveFetch( + http_archive, real_dns_lookup, inject_script, + cache_misses) + self.replay_fetch = ReplayHttpArchiveFetch( + http_archive, inject_script, use_diff_on_unknown_requests, cache_misses, + use_closest_match) + if use_record_mode: + self.SetRecordMode() + else: + self.SetReplayMode() + + def SetRecordMode(self): + self.fetch = self.record_fetch + self.is_record_mode = True + + def SetReplayMode(self): + self.fetch = self.replay_fetch + self.is_record_mode = False + + def __call__(self, *args, **kwargs): + """Forward calls to Replay/Record fetch functions depending on mode.""" + return self.fetch(*args, **kwargs) diff --git a/wpr/httpproxy.py b/wpr/httpproxy.py old mode 100755 new mode 100644 index 42546db..a375fcd --- a/wpr/httpproxy.py +++ b/wpr/httpproxy.py @@ -16,13 +16,22 @@ import BaseHTTPServer import daemonserver import httparchive -import httpclient # wpr httplib wrapper import logging import os -import socket import SocketServer +import ssl import subprocess import time +import urlparse + + +class HttpProxyError(Exception): + """Module catch-all error.""" + pass + +class HttpProxyServerError(HttpProxyError): + """Raised for errors like 'Address already in use'.""" + pass class HttpArchiveHandler(BaseHTTPServer.BaseHTTPRequestHandler): @@ -52,20 +61,25 @@ def get_archived_http_request(self): logging.error('Request without host header') return None + parsed = urlparse.urlparse(self.path) + query = '?%s' % parsed.query if parsed.query else '' + fragment = '#%s' % parsed.fragment if parsed.fragment else '' + full_path = '%s%s%s' % (parsed.path, query, fragment) + return httparchive.ArchivedHttpRequest( self.command, host, - self.path, + full_path, self.read_request_body(), - self.get_header_dict()) + self.get_header_dict(), + self.server.is_ssl) def send_archived_http_response(self, response): try: # We need to set the server name before we start the response. - headers = dict(response.headers) - use_chunked = 'transfer-encoding' in headers - has_content_length = 'content-length' in headers - self.server_version = headers.get('server', 'WebPageReplay') + is_chunked = response.is_chunked() + has_content_length = response.get_header('content-length') is not None + self.server_version = response.get_header('server', 'WebPageReplay') self.sys_version = '' if response.version == 10: @@ -73,10 +87,15 @@ def send_archived_http_response(self, response): # If we don't have chunked encoding and there is no content length, # we need to manually compute the content-length. - if not use_chunked and not has_content_length: + if not is_chunked and not has_content_length: content_length = sum(len(c) for c in response.response_data) response.headers.append(('content-length', str(content_length))) + use_delays = (self.server.use_delays and + not self.server.http_archive_fetch.is_record_mode) + if use_delays: + logging.debug('Using delays: %s', response.delays) + time.sleep(response.delays['headers'] / 1000.0) self.send_response(response.status, response.reason) # TODO(mbelshe): This is lame - each write is a packet! for header, value in response.headers: @@ -84,16 +103,16 @@ def send_archived_http_response(self, response): self.send_header(header, value) self.end_headers() - for chunk in response.response_data: - if use_chunked: + for chunk, delay in zip(response.response_data, response.delays['data']): + if use_delays: + time.sleep(delay / 1000.0) + if is_chunked: # Write chunk length (hex) and data (e.g. "A\r\nTESSELATED\r\n"). self.wfile.write('%x\r\n%s\r\n' % (len(chunk), chunk)) else: self.wfile.write(chunk) - if use_chunked and (not response.response_data or - response.response_data[-1]): - # Write last chunk as a zero-length chunk with no data. - self.wfile.write('0\r\n\r\n') + if is_chunked: + self.wfile.write('0\r\n\r\n') # write final, zero-length chunk. self.wfile.flush() # TODO(mbelshe): This connection close doesn't seem to work. @@ -102,9 +121,7 @@ def send_archived_http_response(self, response): except Exception, e: logging.error('Error sending response for %s/%s: %s', - self.headers['host'], - self.path, - e) + self.headers['host'], self.path, e) def do_POST(self): self.do_GET() @@ -112,17 +129,12 @@ def do_POST(self): def do_HEAD(self): self.do_GET() - def send_error(self, response_code, message=None): + def send_error(self, status): """Override the default send error with a version that doesn't unnecessarily close the connection. """ - body = "Not Found" - self.send_response(response_code, message) - self.send_header('content-type', 'text/plain') - self.send_header('content-length', str(len(body))) - self.end_headers() - self.wfile.write(body) - self.wfile.flush() + response = httparchive.create_response(status) + self.send_archived_http_response(response) def do_GET(self): start_time = time.time() @@ -130,11 +142,9 @@ def do_GET(self): if request is None: self.send_error(500) return - response_code = self.server.custom_handlers.handle(request) - if response_code: - self.send_error(response_code) - return - response = self.server.http_archive_fetch(request, self.get_header_dict()) + response = self.server.custom_handlers.handle(request) + if not response: + response = self.server.http_archive_fetch(request) if response: self.send_archived_http_response(response) request_time_ms = (time.time() - start_time) * 1000.0; @@ -146,21 +156,29 @@ def do_GET(self): class HttpProxyServer(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer, daemonserver.DaemonServer): - def __init__(self, http_archive_fetch, custom_handlers, - host='localhost', port=80): - self.http_archive_fetch = http_archive_fetch - self.custom_handlers = custom_handlers + HANDLER = HttpArchiveHandler - # Increase the listen queue size. The default, 5, is set in - # SocketServer.TCPServer (the parent of BaseHTTPServer.HTTPServer). - # Since we're intercepting many domains through this single server, - # it is quite possible to get more than 5 concurrent connection requests. - self.request_queue_size = 128 + # Increase the request queue size. The default value, 5, is set in + # SocketServer.TCPServer (the parent of BaseHTTPServer.HTTPServer). + # Since we're intercepting many domains through this single server, + # it is quite possible to get more than 5 concurrent requests. + request_queue_size = 128 + def __init__(self, http_archive_fetch, custom_handlers, + host='localhost', port=80, use_delays=False, + is_ssl=False): try: - BaseHTTPServer.HTTPServer.__init__(self, (host, port), HttpArchiveHandler) + BaseHTTPServer.HTTPServer.__init__(self, (host, port), self.HANDLER) except Exception, e: - logging.critical('Could not start HTTPServer on port %d: %s', port, e) + raise HttpProxyServerError('Could not start HTTPServer on port %d: %s' % + (port, e)) + self.http_archive_fetch = http_archive_fetch + self.custom_handlers = custom_handlers + self.use_delays = use_delays + self.is_ssl = is_ssl + + protocol = 'HTTPS' if self.is_ssl else 'HTTP' + logging.info('Started %s server on %s...', protocol, self.server_address) def cleanup(self): try: @@ -168,3 +186,16 @@ def cleanup(self): except KeyboardInterrupt, e: pass logging.info('Stopped HTTP server') + + +class HttpsProxyServer(HttpProxyServer): + """SSL server.""" + + def __init__(self, http_archive_fetch, custom_handlers, certfile, + host='localhost', port=443, use_delays=False): + HttpProxyServer.__init__( + self, http_archive_fetch, custom_handlers, host, port, + use_delays, is_ssl=True) + self.socket = ssl.wrap_socket( + self.socket, certfile=certfile, server_side=True) + # Ancestor class, deamonserver, calls serve_forever() during its __init__. diff --git a/wpr/httpzlib.py b/wpr/httpzlib.py old mode 100755 new mode 100644 diff --git a/wpr/mock-archive.txt b/wpr/mock-archive.txt new file mode 100644 index 0000000..a90bb03 --- /dev/null +++ b/wpr/mock-archive.txt @@ -0,0 +1,10 @@ +GET%www.zappos.com%/%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.zappos.com')] +GET%www.zappos.com%/css/print.20110525145237.css%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.zappos.com')] +GET%www.zappos.com%/favicon.ico%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.zappos.com')] +GET%www.zappos.com%/hydra/hydra.p.20110607.js%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.zappos.com')] +GET%www.zappos.com%/imgs/shadebg.20110525145241.png%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.zappos.com')] +GET%www.msn.com%/%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.msn.com')] +GET%www.msn.com%/?euid=&userGroup=W:default&PM=z:1%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.msn.com'), ('x-requested-with', 'XMLHttpRequest')] +GET%www.msn.com%/?euid=342%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.msn.com'), ('x-requested-with', 'XMLHttpRequest')] +GET%www.amazon.com%/%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.amazon.com')] +GET%www.google.com%/%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.google.com')] diff --git a/wpr/mockhttprequest.py b/wpr/mockhttprequest.py new file mode 100644 index 0000000..ac5df99 --- /dev/null +++ b/wpr/mockhttprequest.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# Copyright 2010 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Mock instance of ArchivedHttpRequest used for testing.""" + + +class ArchivedHttpRequest(object): + """Mock instance of ArchivedHttpRequest in HttpArchive.""" + + def __init__(self, command, host, path, request_body, headers): + """Initialize an ArchivedHttpRequest. + + Args: + command: a string (e.g. 'GET' or 'POST'). + host: a host name (e.g. 'www.google.com'). + path: a request path (e.g. '/search?q=dogs'). + request_body: a request body string for a POST or None. + headers: [(header1, value1), ...] list of tuples + """ + self.command = command + self.host = host + self.path = path + self.request_body = request_body + self.headers = headers + self.trimmed_headers = headers + + def __str__(self): + return '%s %s%s %s' % (self.command, self.host, self.path, + self.trimmed_headers) + + def __repr__(self): + return repr((self.command, self.host, self.path, self.request_body, + self.trimmed_headers)) + + def __hash__(self): + """Return a integer hash to use for hashed collections including dict.""" + return hash(repr(self)) + + def __eq__(self, other): + """Define the __eq__ method to match the hash behavior.""" + return repr(self) == repr(other) + + def matches(self, command=None, host=None, path=None): + """Returns true iff the request matches all parameters.""" + return ((command is None or command == self.command) and + (host is None or host == self.host) and + (path is None or path == self.path)) diff --git a/wpr/perftracker/__init__.py b/wpr/perftracker/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/wpr/perftracker/app/appengine_config.py b/wpr/perftracker/app/appengine_config.py index e6c00bf..1e21316 100644 --- a/wpr/perftracker/app/appengine_config.py +++ b/wpr/perftracker/app/appengine_config.py @@ -1,3 +1,17 @@ +#!/usr/bin/env python +# Copyright 2012 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. def webapp_add_wsgi_middleware(app): from google.appengine.ext.appstats import recording diff --git a/wpr/perftracker/extension/background.html b/wpr/perftracker/extension/background.html index 171a66c..595fe6c 100644 --- a/wpr/perftracker/extension/background.html +++ b/wpr/perftracker/extension/background.html @@ -410,6 +410,8 @@ setIds_[LoadType.cold]); chrome.benchmarking.clearCache(); + chrome.benchmarking.clearHostResolverCache(); + chrome.benchmarking.clearPredictorCache(); chrome.benchmarking.closeConnections(); me_.asyncClearCookies(); diff --git a/wpr/perftracker/runner.py b/wpr/perftracker/runner.py index ce12f3c..2da4730 100755 --- a/wpr/perftracker/runner.py +++ b/wpr/perftracker/runner.py @@ -15,6 +15,11 @@ description = """ This is a script for running automated network tests of chrome. + + There is an optional -e flag that instead runs an automated + web-page-replay test. It runs WPR record mode on the set of URLs specified + in the config file, then runs replay mode on the same set of URLs and + records any cache misses to . """ import sys @@ -129,6 +134,7 @@ def _XvfbPidFilename(slave_build_name): """ return os.path.join(tempfile.gettempdir(), 'xvfb-%s.pid' % slave_build_name) + def StartVirtualX(slave_build_name, build_dir): """Start a virtual X server and set the DISPLAY environment variable so sub processes will use the virtual X server. Also start icewm. This only works @@ -224,7 +230,11 @@ def GetVersion(): class TestInstance: def __init__(self, network, log_level, log_file, record, - diff_unknown_requests, screenshot_dir): + diff_unknown_requests, screenshot_dir, cache_miss_file=None, + use_deterministic_script=False, + use_chrome_deterministic_js=True, + use_closest_match=False, + use_server_delay=False): self.network = network self.log_level = log_level self.log_file = log_file @@ -233,6 +243,11 @@ def __init__(self, network, log_level, log_file, record, self.spdy_proxy_process = None self.diff_unknown_requests = diff_unknown_requests self.screenshot_dir = screenshot_dir + self.cache_miss_file = cache_miss_file + self.use_deterministic_script = use_deterministic_script + self.use_chrome_deterministic_js = use_chrome_deterministic_js + self.use_closest_match = use_closest_match + self.use_server_delay = use_server_delay def GenerateConfigFile(self, notes=''): # The PerfTracker extension requires this name in order to kick off. @@ -289,21 +304,28 @@ def StartProxy(self): init_cwnd = 10 protocol = self.network['protocol'] if 'spdy' in protocol: - port = BACKEND_SERVER_PORT - init_cwnd = 32 + port = BACKEND_SERVER_PORT + init_cwnd = 32 if protocol == 'http-base': - init_cwnd = 3 # See RFC3390 + init_cwnd = 3 # See RFC3390 cmdline = [ REPLAY_PATH, '--no-dns_forwarding', - '--no-deterministic_script', '--port', str(port), '--shaping_port', str(SERVER_PORT), - '--init_cwnd', str(init_cwnd), '--log_level', self.log_level, + '--init_cwnd', str(init_cwnd), ] + if self.cache_miss_file: + cmdline += ['-e', self.cache_miss_file] + if self.use_closest_match: + cmdline += ['--use_closest_match'] + if self.use_server_delay: + cmdline += ['--use_server_delay'] + if not self.use_deterministic_script: + cmdline += ['--inject_scripts=""'] if self.log_file: cmdline += ['--log_file', self.log_file] if self.network['bandwidth_kbps']['down']: @@ -314,15 +336,15 @@ def StartProxy(self): cmdline += ['-m', str(self.network['round_trip_time_ms'])] if self.network['packet_loss_percent']: cmdline += ['-p', str(self.network['packet_loss_percent'] / 100.0)] - if self.diff_unknown_requests: - cmdline.append('--diff_unknown_requests') + if not self.diff_unknown_requests: + cmdline.append('--no-diff_unknown_requests') if self.screenshot_dir: cmdline += ['-I', self.screenshot_dir] if self.record: cmdline.append('-r') cmdline.append(runner_cfg.replay_data_archive) - logging.debug('Starting Web-Page-Replay: %s', ' '.join(cmdline)) + logging.info('Starting Web-Page-Replay: %s', ' '.join(cmdline)) self.proxy_process = subprocess.Popen(cmdline) def StopProxy(self): @@ -404,16 +426,10 @@ def RunChrome(self, chrome_cmdline): runner_cfg.chrome_path, '--activate-on-launch', '--disable-background-networking', - # Stop the translate bar from appearing at the top of the page. When # it's there, the screenshots are shorter than they should be. '--disable-translate', - # TODO(tonyg): These are disabled to reduce noise. It would be nice to - # make the model realistic and stable enough to enable them. - '--disable-preconnect', - '--dns-prefetch-disable', - '--enable-benchmarking', '--enable-logging', '--enable-experimental-extension-apis', @@ -423,11 +439,14 @@ def RunChrome(self, chrome_cmdline): '--load-extension=' + PERFTRACKER_EXTENSION_PATH, '--log-level=0', '--no-first-run', - '--no-js-randomness', '--no-proxy-server', '--start-maximized', '--user-data-dir=' + profile_dir, ] + if self.use_chrome_deterministic_js: + cmdline += ['--no-js-randomness'] + if self.cache_miss_file: + cmdline += ['--no-sandbox'] spdy_mode = None if self.network['protocol'] == 'spdy': @@ -441,7 +460,7 @@ def RunChrome(self, chrome_cmdline): cmdline.extend(chrome_cmdline.split(' ')) cmdline.append(start_file_url) - logging.debug('Starting Chrome: %s', ' '.join(cmdline)) + logging.info('Starting Chrome: %s', ' '.join(cmdline)) chrome = subprocess.Popen(cmdline, preexec_fn=switch_away_from_root) returncode = chrome.wait() if returncode: @@ -491,7 +510,7 @@ def ConfigureLogging(log_level_name, log_file_name): logging.getLogger().addHandler(fh) -def main(options): +def main(options, cache_miss_file): # When in record mode, override most of the configuration. if options.record: runner_cfg.replay_data_archive = options.record @@ -513,7 +532,10 @@ def main(options): logging.debug("Running network configuration: %s", network) test = TestInstance( network, options.log_level, options.log_file, options.record, - options.diff_unknown_requests, options.screenshot_dir) + options.diff_unknown_requests, options.screenshot_dir, + cache_miss_file, options.use_deterministic_script, + options.use_chrome_deterministic_js, options.use_closest_match, + options.use_server_delay) test.RunTest(options.notes, options.chrome_cmdline) if not options.infinite or options.record: break @@ -547,10 +569,10 @@ def format_description(self, description): action='store', type='string', help='Log file to use in addition to writting logs to stderr.') - option_parser.add_option('-r', '--record', default='', - action='store', - type='string', - help=('Record URLs in runner_cfg to this file.')) + option_parser.add_option('-r', '--record', default=False, + action='store_true', + dest='do_record', + help=('Record URLs to file specified by runner_cfg.')) option_parser.add_option('-i', '--infinite', default=False, action='store_true', help='Loop infinitely, repeating the test.') @@ -566,14 +588,43 @@ def format_description(self, description): action='store', type='string', help='Username for logging into appengine.') - option_parser.add_option('-D', '--diff_unknown_requests', default=False, - action='store_true', - help='During replay, show a unified diff of any unknown requests against ' + option_parser.add_option('-D', '--no-diff_unknown_requests', default=True, + action='store_false', + dest='diff_unknown_requests', + help='During replay, do not show a diff of any unknown requests against ' 'their nearest match in the archive.') option_parser.add_option('-I', '--screenshot_dir', default=None, action='store', type='string', help='Save PNG images of the loaded page in the given directory.') + option_parser.add_option('-d', '--deterministic_script', default=False, + action='store_true', + dest='use_deterministic_script', + help='During a record, inject JavaScript to make sources of ' + 'entropy such as Date() and Math.random() deterministic. CAUTION: ' + 'Without this option many web pages will not replay properly.') + option_parser.add_option('-j', '--no_chrome_deterministic_js', default=True, + action='store_false', + dest='use_chrome_deterministic_js', + help='Enable Chrome\'s deterministic implementations of javascript.' + 'This makes sources of entropy such as Date() and Math.random()' + 'deterministic.') + option_parser.add_option('-e', '--cache_miss_file', default=None, + action='store', + dest='cache_miss_file', + type='string', + help='Archive file to record cache misses in replay mode.') + option_parser.add_option('-C', '--use_closest_match', default=False, + action='store_true', + dest='use_closest_match', + help='During replay, if a request is not found, serve the closest match' + 'in the archive instead of giving a 404.') + option_parser.add_option('-U', '--use_server_delay', default=False, + action='store_true', + dest='use_server_delay', + help='During replay, simulate server delay by delaying response time to' + 'requests.') + options, args = option_parser.parse_args() @@ -593,4 +644,14 @@ def format_description(self, description): else: options.login_url = '' - sys.exit(main(options)) + # run the recording round, if specified + if options.do_record and options.cache_miss_file: + logging.debug("Running on record mode") + options.record = runner_cfg.replay_data_archive + main(options, options.cache_miss_file) + options.do_record = False + + options.record = None + # run the replay round + logging.debug("Running on replay mode") + sys.exit(main(options, options.cache_miss_file)) diff --git a/wpr/persistentmixin.py b/wpr/persistentmixin.py old mode 100755 new mode 100644 diff --git a/wpr/platformsettings.py b/wpr/platformsettings.py old mode 100755 new mode 100644 index b1e34c8..9d023d3 --- a/wpr/platformsettings.py +++ b/wpr/platformsettings.py @@ -21,7 +21,9 @@ import re import socket import subprocess +import sys import tempfile +import time class PlatformSettingsError(Exception): @@ -39,6 +41,22 @@ class DnsUpdateError(PlatformSettingsError): pass +class NotAdministratorError(PlatformSettingsError): + """Raised when not running as administrator.""" + pass + + +class CalledProcessError(PlatformSettingsError): + """Raised when a _check_output() process returns a non-zero exit status.""" + def __init__(self, returncode, cmd): + self.returncode = returncode + self.cmd = cmd + + def __str__(self): + return 'Command "%s" returned non-zero exit status %d' % ( + ' '.join(self.cmd), self.returncode) + + def _check_output(*args): """Run Popen(*args) and return its output as a byte string. @@ -49,7 +67,7 @@ def _check_output(*args): Args: *args: sequence of program arguments Raises: - subprocess.CalledProcessError if the program returns non-zero exit status. + CalledProcessError if the program returns non-zero exit status. Returns: output as a byte string. """ @@ -60,31 +78,34 @@ def _check_output(*args): output = process.communicate()[0] retcode = process.poll() if retcode: - raise subprocess.CalledProcessError(retcode, command_args, output=output) + raise CalledProcessError(retcode, command_args) return output class PlatformSettings(object): - _IPFW_BIN = None - _IPFW_QUEUE_SLOTS = 100 + _CERT_FILE = 'wpr_cert.pem' # Some platforms do not shape traffic with the loopback address. _USE_REAL_IP_FOR_TRAFFIC_SHAPING = False def __init__(self): self.original_primary_dns = None + self.original_cwnd = None # original TCP congestion window def get_primary_dns(self): - raise NotImplementedError() + raise NotImplementedError + + def _set_primary_dns(self): + raise NotImplementedError def get_original_primary_dns(self): - if not self.original_primary_dns: + if self.original_primary_dns is None: self.original_primary_dns = self.get_primary_dns() + logging.info('Saved original system DNS (%s)', self.original_primary_dns) return self.original_primary_dns def set_primary_dns(self, dns): - if not self.original_primary_dns: - self.original_primary_dns = self.get_primary_dns() + self.get_original_primary_dns() self._set_primary_dns(dns) if self.get_primary_dns() == dns: logging.info('Changed system DNS to %s', dns) @@ -92,30 +113,40 @@ def set_primary_dns(self, dns): raise self._get_dns_update_error() def restore_primary_dns(self): - if not self.original_primary_dns: - raise DnsUpdateError('Cannot restore because never set.') - self.set_primary_dns(self.original_primary_dns) - self.original_primary_dns = None + if self.original_primary_dns is not None: + self.set_primary_dns(self.original_primary_dns) + self.original_primary_dns = None - def ipfw(self, *args): - if self._IPFW_BIN: - ipfw_args = [self._IPFW_BIN] + [str(a) for a in args] - logging.debug(' '.join(ipfw_args)) - subprocess.check_call(ipfw_args) - else: - raise NotImplementedError() + def get_cwnd(self): + return None - def is_cwnd_available(self): - return False + def _set_cwnd(self, args): + pass - def set_cwnd(self, args): - logging.error("Platform does not support setting cwnd.") + def get_original_cwnd(self): + if not self.original_cwnd: + self.original_cwnd = self.get_cwnd() + return self.original_cwnd - def get_cwnd(self): - logging.error("Platform does not support getting cwnd.") + def set_cwnd(self, cwnd): + self.get_original_cwnd() + self._set_cwnd(cwnd) + if self.get_cwnd() == cwnd: + logging.info("Changed cwnd to %s", cwnd) + else: + logging.error("Unable to update cwnd to %s", cwnd) + + def restore_cwnd(self): + if self.original_cwnd is not None: + self.set_cwnd(self.original_cwnd) + self.original_cwnd = None - def get_ipfw_queue_slots(self): - return self._IPFW_QUEUE_SLOTS + def _ipfw_bin(self): + raise NotImplementedError + + def ipfw(self, *args): + ipfw_args = [self._ipfw_bin()] + [str(a) for a in args] + return _check_output(*ipfw_args) def get_server_ip_address(self, is_server_mode=False): """Returns the IP address to use for dnsproxy, httpproxy, and ipfw.""" @@ -135,14 +166,54 @@ def configure_loopback(self): def unconfigure_loopback(self): pass + def get_system_logging_handler(self): + """Return a handler for the logging module (optional).""" + return None + + def ping(self, hostname): + """Pings the hostname by calling the OS system ping command. + Also stores the result internally. + + Args: + hostname: hostname of the server to be pinged + Returns: + round trip time to the server in seconds, or 0 if unable to calculate RTT + """ + raise NotImplementedError + + def rerun_as_administrator(self): + """If needed, rerun the program with administrative privileges. + + Raises NotAdministratorError if unable to rerun. + """ + pass + + def get_certfile_name(self): + """Get the file name for a temporary self-signed certificate.""" + raise NotImplementedError + + def create_certfile(self, certfile): + """Create a certfile for serving SSL traffic.""" + raise NotImplementedError + + def timer(self): + """Return the current time in seconds as a floating point number.""" + return time.time() + class PosixPlatformSettings(PlatformSettings): - _IPFW_BIN = 'ipfw' + PING_PATTERN = r'rtt min/avg/max/mdev = \d+\.\d+/(\d+\.\d+)/\d+\.\d+/\d+\.\d+' + PING_CMD = ('ping', '-c', '3', '-i', '0.2', '-W', '1') + # For OsX Lion non-root: + PING_RESTRICTED_CMD = ('ping', '-c', '1', '-i', '1', '-W', '1') def _get_dns_update_error(self): return DnsUpdateError('Did you run under sudo?') def _sysctl(self, *args): + sysctl = '/usr/sbin/sysctl' + if not os.path.exists(sysctl): + sysctl = '/sbin/sysctl' sysctl = subprocess.Popen( ['sysctl'] + [str(a) for a in args], stdin=subprocess.PIPE, stdout=subprocess.PIPE) @@ -150,7 +221,11 @@ def _sysctl(self, *args): return sysctl.returncode, stdout def has_sysctl(self, name): - return self._sysctl(name)[0] == 0 + if not hasattr(self, 'has_sysctl_cache'): + self.has_sysctl_cache = {} + if name not in self.has_sysctl_cache: + self.has_sysctl_cache[name] = self._sysctl(name)[0] == 0 + return self.has_sysctl_cache[name] def set_sysctl(self, name, value): rv = self._sysctl('%s=%s' % (name, value))[0] @@ -165,17 +240,97 @@ def get_sysctl(self, name): logging.error("Unable to get sysctl %s: %s", name, rv) return None + def _check_output(self, *args): + """Allow tests to override this.""" + return _check_output(*args) + + def _ping(self, hostname): + """Return ping output or None if ping fails. + + Initially pings 'localhost' to test for ping command that works. + If the tests fails, subsequent calls will return None without calling ping. + + Args: + hostname: host to ping + Returns: + ping stdout string, or None if ping unavailable + Raises: + CalledProcessError if ping returns non-zero exit + """ + if not hasattr(self, 'ping_cmd'): + test_host = 'localhost' + for self.ping_cmd in (self.PING_CMD, self.PING_RESTRICTED_CMD): + try: + if self._ping(test_host): + break + except (CalledProcessError, OSError) as e: + last_ping_error = e + else: + logging.critical('Ping configuration failed: %s', last_ping_error) + self.ping_cmd = None + if self.ping_cmd: + cmd = list(self.ping_cmd) + [hostname] + return self._check_output(*cmd) + return None + + def ping(self, hostname): + """Pings the hostname by calling the OS system ping command. + + Args: + hostname: hostname of the server to be pinged + Returns: + round trip time to the server in milliseconds, or 0 if unavailable + """ + rtt = 0 + output = None + try: + output = self._ping(hostname) + except CalledProcessError as e: + logging.critical('Ping failed: %s', e) + if output: + match = re.search(self.PING_PATTERN, output) + if match: + rtt = float(match.groups()[0]) + else: + logging.warning('Unable to ping %s: %s', hostname, output) + return rtt + + def rerun_as_administrator(self): + """If needed, rerun the program with administrative privileges. + + Raises NotAdministratorError if unable to rerun. + """ + if os.geteuid() != 0: + logging.warn("Rerunning with sudo: %s", sys.argv) + os.execv('/usr/bin/sudo', ['--'] + sys.argv) + + def get_certfile_name(self): + """Get the file name for a temporary self-signed certificate.""" + return os.path.join(tempfile.gettempdir(), self._CERT_FILE) + + def create_certfile(self, certfile): + """Create a certfile for serving SSL traffic.""" + if not os.path.exists(certfile): + _check_output( + '/usr/bin/openssl', 'req', '-batch', '-new', '-x509', '-days', '365', + '-nodes', '-out', certfile, '-keyout', certfile) + + def _ipfw_bin(self): + for ipfw in ['/usr/local/sbin/ipfw', '/sbin/ipfw']: + if os.path.exists(ipfw): + return ipfw + raise PlatformSettingsError("ipfw not found.") class OsxPlatformSettings(PosixPlatformSettings): LOCAL_SLOWSTART_MIB_NAME = 'net.inet.tcp.local_slowstart_flightsize' def _scutil(self, cmd): scutil = subprocess.Popen( - ['scutil'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) + ['/usr/sbin/scutil'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) return scutil.communicate(cmd)[0] def _ifconfig(self, *args): - return _check_output('ifconfig', *args) + return _check_output('/sbin/ifconfig', *args) def set_sysctl(self, name, value): rv = self._sysctl('-w', '%s=%s' % (name, value))[0] @@ -194,7 +349,7 @@ def _get_dns_service_key(self): key_value = line.split(' : ') if key_value[0] == ' PrimaryService': return 'State:/Network/Service/%s/DNS' % key_value[1] - raise self._get_dns_update_error() + raise DnsReadError('Unable to find DNS service key: %s', output) def get_primary_dns(self): # { @@ -205,9 +360,14 @@ def get_primary_dns(self): # DomainName : apple.co.uk # } output = self._scutil('show %s' % self._get_dns_service_key()) - primary_line = output.split('\n')[2] - line_parts = primary_line.split(' ') - return line_parts[-1] + match = re.search( + br'ServerAddresses\s+:\s+\s+{\s+0\s+:\s+((\d{1,3}\.){3}\d{1,3})', + output) + if match: + return match.group(1) + else: + raise DnsReadError('Unable to find primary DNS server: %s', output) + def _set_primary_dns(self, dns): command = '\n'.join([ @@ -217,22 +377,18 @@ def _set_primary_dns(self, dns): ]) self._scutil(command) + def get_cwnd(self): + return int(self.get_sysctl(self.LOCAL_SLOWSTART_MIB_NAME)) + + def _set_cwnd(self, size): + self.set_sysctl(self.LOCAL_SLOWSTART_MIB_NAME, size) + def get_loopback_mtu(self): config = self._ifconfig('lo0') match = re.search(r'\smtu\s+(\d+)', config) if match: return int(match.group(1)) - else: - return None - - def is_cwnd_available(self): - return True - - def set_cwnd(self, size): - self.set_sysctl(self.LOCAL_SLOWSTART_MIB_NAME, size) - - def get_cwnd(self): - return int(self.get_sysctl(self.LOCAL_SLOWSTART_MIB_NAME)) + return None def configure_loopback(self): """Configure loopback to use reasonably sized frames. @@ -240,19 +396,20 @@ def configure_loopback(self): OS X uses jumbo frames by default (16KB). """ TARGET_LOOPBACK_MTU = 1500 - loopback_mtu = self.get_loopback_mtu() - if loopback_mtu and loopback_mtu != TARGET_LOOPBACK_MTU: - self.saved_loopback_mtu = loopback_mtu + self.original_loopback_mtu = self.get_loopback_mtu() + if self.original_loopback_mtu == TARGET_LOOPBACK_MTU: + self.original_loopback_mtu = None + if self.original_loopback_mtu is not None: self._ifconfig('lo0', 'mtu', TARGET_LOOPBACK_MTU) logging.debug('Set loopback MTU to %d (was %d)', - TARGET_LOOPBACK_MTU, loopback_mtu) + TARGET_LOOPBACK_MTU, self.original_loopback_mtu) else: logging.error('Unable to read loopback mtu. Setting left unchanged.') def unconfigure_loopback(self): - if hasattr(self, 'saved_loopback_mtu') and self.saved_loopback_mtu: - self._ifconfig('lo0', 'mtu', self.saved_loopback_mtu) - logging.debug('Restore loopback MTU to %d', self.saved_loopback_mtu) + if self.original_loopback_mtu is not None: + self._ifconfig('lo0', 'mtu', self.original_loopback_mtu) + logging.debug('Restore loopback MTU to %d', self.original_loopback_mtu) class LinuxPlatformSettings(PosixPlatformSettings): @@ -280,7 +437,6 @@ class LinuxPlatformSettings(PosixPlatformSettings): TCP_INIT_CWND = 'net.ipv4.tcp_init_cwnd' TCP_BASE_MSS = 'net.ipv4.tcp_base_mss' TCP_MTU_PROBING = 'net.ipv4.tcp_mtu_probing' - _IPFW_QUEUE_SLOTS = 500 def get_primary_dns(self): try: @@ -294,7 +450,12 @@ def get_primary_dns(self): def _set_primary_dns(self, dns): """Replace the first nameserver entry with the one given.""" - self._write_resolve_conf(dns) + try: + self._write_resolve_conf(dns) + except OSError, e: + if 'Permission denied' in e: + raise self._get_dns_update_error() + raise def _write_resolve_conf(self, dns): is_first_nameserver_replaced = False @@ -306,17 +467,19 @@ def _write_resolve_conf(self, dns): else: print line, if not is_first_nameserver_replaced: - raise DnsUpdateError('Could not find a suitable namserver entry in %s' % + raise DnsUpdateError('Could not find a suitable nameserver entry in %s' % self.RESOLV_CONF) - def is_cwnd_available(self): - return self.has_sysctl(self.TCP_INIT_CWND) + def get_cwnd(self): + if self.has_sysctl(self.TCP_INIT_CWND): + return self.get_sysctl(self.TCP_INIT_CWND) + else: + return None - def set_cwnd(self, args): - self.set_sysctl(self.TCP_INIT_CWND, str(args)) + def _set_cwnd(self, args): + if self.has_sysctl(self.TCP_INIT_CWND): + self.set_sysctl(self.TCP_INIT_CWND, str(args)) - def get_cwnd(self): - return self.get_sysctl(self.TCP_INIT_CWND) def configure_loopback(self): """ @@ -351,27 +514,26 @@ def _netsh_show_dns(self): """Return DNS information: Example output: + Configuration for interface "Local Area Connection 3" + DNS servers configured through DHCP: None + Register with which suffix: Primary only - Configuration for interface "Local Area Connection 3" - DNS servers configured through DHCP: None - Register with which suffix: Primary only - - Configuration for interface "Wireless Network Connection 2" - DNS servers configured through DHCP: 192.168.1.1 - Register with which suffix: Primary only + Configuration for interface "Wireless Network Connection 2" + DNS servers configured through DHCP: 192.168.1.1 + Register with which suffix: Primary only """ return _check_output('netsh', 'interface', 'ip', 'show', 'dns') - def _netsh_get_interface_names(self): - return re.findall(r'"(.+?)"', self._netsh_show_dns()) - def get_primary_dns(self): match = re.search(r':\s+(\d+\.\d+\.\d+\.\d+)', self._netsh_show_dns()) return match and match.group(1) or None def _set_primary_dns(self, dns): - vbs = """Set objWMIService = GetObject("winmgmts:{impersonationLevel=impersonate}!\\\\.\\root\\cimv2") -Set colNetCards = objWMIService.ExecQuery("Select * From Win32_NetworkAdapterConfiguration Where IPEnabled = True") + vbs = """ +Set objWMIService = GetObject( _ + "winmgmts:{impersonationLevel=impersonate}!\\\\.\\root\\cimv2") +Set colNetCards = objWMIService.ExecQuery( _ + "Select * From Win32_NetworkAdapterConfiguration Where IPEnabled = True") For Each objNetCard in colNetCards arrDNSServers = Array("%s") objNetCard.SetDNSServerSearchOrder(arrDNSServers) @@ -394,14 +556,16 @@ def _ipconfig(self, *args): def get_mac_address(self, ip): """Return the MAC address for the given ip.""" + ip_re = re.compile(r'^\s*IP(?:v4)? Address[ .]+:\s+([0-9.]+)') for line in self._ipconfig('/all').splitlines(): if line[:1].isalnum(): current_ip = None current_mac = None elif ':' in line: line = line.strip() - if line.startswith('IP Address'): - current_ip = line.split(':', 1)[1].lstrip() + ip_match = ip_re.match(line) + if ip_match: + current_ip = ip_match.group(1) elif line.startswith('Physical Address'): current_mac = line.split(':', 1)[1].lstrip() if current_ip == ip and current_mac: @@ -409,7 +573,6 @@ def get_mac_address(self, ip): return None def configure_loopback(self): - # TODO(slamm): use/set ip address that is compat with replay.py self.ip = self.get_server_ip_address() self.mac_address = self.get_mac_address(self.ip) if self.mac_address: @@ -424,8 +587,56 @@ def unconfigure_loopback(self): self._arp('-d', self.ip) self._route('delete', self.ip, self.ip, 'mask', '255.255.255.255') + def get_system_logging_handler(self): + """Return a handler for the logging module (optional). + + For Windows, output can be viewed with DebugView. + http://technet.microsoft.com/en-us/sysinternals/bb896647.aspx + """ + import ctypes + output_debug_string = ctypes.windll.kernel32.OutputDebugStringA + output_debug_string.argtypes = [ctypes.c_char_p] + class DebugViewHandler(logging.Handler): + def emit(self, record): + output_debug_string("[wpr] " + self.format(record)) + return DebugViewHandler() + + def rerun_as_administrator(self): + """If needed, rerun the program with administrative privileges. + + Raises NotAdministratorError if unable to rerun. + """ + import ctypes + if ctypes.windll.shell32.IsUserAnAdmin(): + raise NotAdministratorError('Rerun with administrator privileges.') + #os.execv('runas', sys.argv) # TODO: replace needed Windows magic + + def get_certfile_name(self): + """Get the file name for a temporary self-signed certificate.""" + raise PlatformSettingsError('Certificate file does not exist.') + + def create_certfile(self, certfile): + """Create a certfile for serving SSL traffic and return its name. + + TODO: Check for Windows SDK makecert.exe tool. + """ + raise PlatformSettingsError('Certificate file does not exist.') + + def timer(self): + """Return the current time in seconds as a floating point number. + + From time module documentation: + On Windows, this function [time.clock()] returns wall-clock + seconds elapsed since the first call to this function, as a + floating point number, based on the Win32 function + QueryPerformanceCounter(). The resolution is typically better + than one microsecond. + """ + return time.clock() + class WindowsXpPlatformSettings(WindowsPlatformSettings): - _IPFW_BIN = r'third_party\ipfw_win32\ipfw.exe' + def _ipfw_bin(self): + return r'third_party\ipfw_win32\ipfw.exe' def _new_platform_settings(): diff --git a/wpr/platformsettings_test.py b/wpr/platformsettings_test.py new file mode 100755 index 0000000..9142a23 --- /dev/null +++ b/wpr/platformsettings_test.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python +# Copyright 2011 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for platformsettings. + +Usage: +$ ./platformsettings_test.py +""" + +import unittest + +import platformsettings + +WINDOWS_7_IP = '172.11.25.170' +WINDOWS_7_MAC = '00-1A-44-DA-88-C0' +WINDOWS_7_IPCONFIG = """ +Windows IP Configuration + + Host Name . . . . . . . . . . . . : THEHOST1-W + Primary Dns Suffix . . . . . . . : something.example.com + Node Type . . . . . . . . . . . . : Hybrid + IP Routing Enabled. . . . . . . . : No + WINS Proxy Enabled. . . . . . . . : No + DNS Suffix Search List. . . . . . : example.com + another.example.com + +Ethernet adapter Local Area Connection: + + Connection-specific DNS Suffix . : somethingexample.com + Description . . . . . . . . . . . : Int PRO/1000 MT Network Connection + Physical Address. . . . . . . . . : %(mac_addr)s + DHCP Enabled. . . . . . . . . . . : Yes + Autoconfiguration Enabled . . . . : Yes + IPv6 Address. . . . . . . . . . . : 1234:0:1000:1200:839f:d256:3a6c:210(Preferred) + Temporary IPv6 Address. . . . . . : 2143:0:2100:1800:38f9:2d65:a3c6:120(Preferred) + Link-local IPv6 Address . . . . . : abcd::1234:1a33:b2cc:238%%18(Preferred) + IPv4 Address. . . . . . . . . . . : %(ip_addr)s(Preferred) + Subnet Mask . . . . . . . . . . . : 255.255.248.0 + Lease Obtained. . . . . . . . . . : Thursday, April 28, 2011 9:40:22 PM + Lease Expires . . . . . . . . . . : Tuesday, May 10, 2011 12:15:48 PM + Default Gateway . . . . . . . . . : abcd::2:37ee:ef70:56%%18 + 172.11.25.254 + DHCP Server . . . . . . . . . . . : 172.11.22.33 + DNS Servers . . . . . . . . . . . : 8.8.4.4 + NetBIOS over Tcpip. . . . . . . . : Enabled +""" % { 'ip_addr': WINDOWS_7_IP, 'mac_addr': WINDOWS_7_MAC } + +WINDOWS_XP_IP = '172.1.2.3' +WINDOWS_XP_MAC = '00-34-B8-1F-FA-70' +WINDOWS_XP_IPCONFIG = """ +Windows IP Configuration + + Host Name . . . . . . . . . . . . : HOSTY-0 + Primary Dns Suffix . . . . . . . : + Node Type . . . . . . . . . . . . : Unknown + IP Routing Enabled. . . . . . . . : No + WINS Proxy Enabled. . . . . . . . : No + DNS Suffix Search List. . . . . . : example.com + +Ethernet adapter Local Area Connection 2: + + Connection-specific DNS Suffix . : example.com + Description . . . . . . . . . . . : Int Adapter (PILA8470B) + Physical Address. . . . . . . . . : %(mac_addr)s + Dhcp Enabled. . . . . . . . . . . : Yes + Autoconfiguration Enabled . . . . : Yes + IP Address. . . . . . . . . . . . : %(ip_addr)s + Subnet Mask . . . . . . . . . . . : 255.255.254.0 + Default Gateway . . . . . . . . . : 172.1.2.254 + DHCP Server . . . . . . . . . . . : 172.1.3.241 + DNS Servers . . . . . . . . . . . : 172.1.3.241 + 8.8.8.8 + 8.8.4.4 + Lease Obtained. . . . . . . . . . : Thursday, April 07, 2011 9:14:55 AM + Lease Expires . . . . . . . . . . : Thursday, April 07, 2011 1:14:55 PM +""" % { 'ip_addr': WINDOWS_XP_IP, 'mac_addr': WINDOWS_XP_MAC } + + +# scutil show State:/Network/Global/IPv4 +OSX_IPV4_STATE = """ + { + PrimaryInterface : en1 + PrimaryService : 8824452C-FED4-4C09-9256-40FB146739E0 + Router : 192.168.1.1 +} +""" + +# scutil show State:/Network/Service/[PRIMARY_SERVICE_KEY]/DNS +OSX_DNS_STATE_LION = """ + { + DomainName : mtv.corp.google.com + SearchDomains : { + 0 : mtv.corp.google.com + 1 : corp.google.com + 2 : prod.google.com + 3 : prodz.google.com + 4 : google.com + } + ServerAddresses : { + 0 : 172.72.255.1 + 1 : 172.49.117.57 + 2 : 172.54.116.57 + } +} +""" + +OSX_DNS_STATE_SNOW_LEOPARD = """ + { + ServerAddresses : { + 0 : 172.27.1.1 + 1 : 172.94.117.57 + 2 : 172.45.116.57 + } + DomainName : mtv.corp.google.com + SearchDomains : { + 0 : mtv.corp.google.com + 1 : corp.google.com + 2 : prod.google.com + 3 : prodz.google.com + 4 : google.com + } +} +""" + + +class Win7Settings(platformsettings.WindowsPlatformSettings): + @classmethod + def _ipconfig(cls, *args): + if args == ('/all',): + return WINDOWS_7_IPCONFIG + raise RuntimeError + +class WinXpSettings(platformsettings.WindowsPlatformSettings): + @classmethod + def _ipconfig(cls, *args): + if args == ('/all',): + return WINDOWS_XP_IPCONFIG + raise RuntimeError + + +class WindowsPlatformSettingsTest(unittest.TestCase): + def test_get_mac_address_xp(self): + self.assertEqual(WINDOWS_XP_MAC, + WinXpSettings().get_mac_address(WINDOWS_XP_IP)) + + def test_get_mac_address_7(self): + self.assertEqual(WINDOWS_7_MAC, + Win7Settings().get_mac_address(WINDOWS_7_IP)) + + +class OsxSettings(platformsettings.OsxPlatformSettings): + def __init__(self): + super(OsxSettings, self).__init__() + self.ipv4_state = OSX_IPV4_STATE + self.dns_state = None # varies by test + + def _scutil(self, cmd): + if cmd == 'show State:/Network/Global/IPv4': + return self.ipv4_state + elif cmd.startswith('show State:/Network/Service/'): + return self.dns_state + raise RuntimeError("Unrecognized cmd: %s", cmd) + + +class OsxPlatformSettingsTest(unittest.TestCase): + def setUp(self): + self.settings = OsxSettings() + + def test_get_primary_dns_lion(self): + self.settings.dns_state = OSX_DNS_STATE_LION + self.assertEqual('172.72.255.1', self.settings.get_primary_dns()) + + def test_get_primary_dns_snow_leopard(self): + self.settings.dns_state = OSX_DNS_STATE_SNOW_LEOPARD + self.assertEqual('172.27.1.1', self.settings.get_primary_dns()) + + def test_get_primary_dns_unexpected_ipv4_state_raises(self): + self.settings.ipv4_state = 'Some error' + self.settings.dns_state = OSX_DNS_STATE_SNOW_LEOPARD + self.assertRaises(platformsettings.DnsReadError, + self.settings.get_primary_dns) + + def test_get_primary_dns_unexpected_dns_state_raises(self): + self.settings.dns_state = 'Some other error' + self.assertRaises(platformsettings.DnsReadError, + self.settings.get_primary_dns) + + +PING_OUTPUT = '''PING www.a.shifen.com (119.75.218.77) 56(84) bytes of data. + +--- www.a.shifen.com ping statistics --- +3 packets transmitted, 3 received, 0% packet loss, time 2204ms +rtt min/avg/max/mdev = 191.206/191.649/191.980/0.325 ms +''' +PING_AVG = 191.649 + +class PingSettings(platformsettings.PosixPlatformSettings): + def __init__(self): + super(PingSettings, self).__init__() + self.working_cmd = None + self.working_output = None + + def _check_output(self, *args): + if self.working_cmd and ' '.join(self.working_cmd) == ' '.join(args[:-1]): + return self.working_output + raise platformsettings.CalledProcessError(99, args) + +class PingTest(unittest.TestCase): + def setUp(self): + self.settings = PingSettings() + + def testNoWorkingPingReturnsZero(self): + self.assertEqual(0, self.settings.ping('www.noworking.com')) + + def testRegularPingCmdReturnsValue(self): + self.settings.working_cmd = self.settings.PING_CMD + self.settings.working_output = PING_OUTPUT + self.assertEqual(PING_AVG, self.settings.ping('www.regular.com')) + + def testRestrictedPingCmdReturnsValue(self): + self.settings.working_cmd = self.settings.PING_RESTRICTED_CMD + self.settings.working_output = PING_OUTPUT + self.assertEqual(PING_AVG, self.settings.ping('www.restricted.com')) + + def testNoWorkingPingConfiguresOnce(self): + self.settings.ping('www.first.com') + def AssertNotCalled(*args): + self.fail('Unexpected _check_output call.') + self.settings._check_output = AssertNotCalled + self.settings.ping('www.second.com') + +if __name__ == '__main__': + unittest.main() diff --git a/wpr/replay.py b/wpr/replay.py index 0c00ed5..63b7307 100755 --- a/wpr/replay.py +++ b/wpr/replay.py @@ -41,11 +41,11 @@ import logging import optparse -import socket +import os import sys -import time import traceback +import cachemissarchive import customhandlers import dnsproxy import httparchive @@ -53,131 +53,246 @@ import httpproxy import platformsettings import replayspdyserver +import servermanager import trafficshaper - if sys.version < '2.6': print 'Need Python 2.6 or greater.' sys.exit(1) -def resolve_dns_to_remote_replay_server(platform_settings, dnsproxy_ip): - """Set the primary dns nameserver to the replay dnsproxy. - - Restore the original primary dns nameserver on exit. +def configure_logging(platform_settings, log_level_name, log_file_name=None): + """Configure logging level and format. Args: - platform_settings: an instance of platformsettings.PlatformSettings - dnsproxy_ip: the ip address to use as the primary dns server. + log_level_name: 'debug', 'info', 'warning', 'error', or 'critical'. + log_file_name: a file name """ - try: - platform_settings.set_primary_dns(dnsproxy_ip) - while True: - time.sleep(1) - except KeyboardInterrupt: - logging.info('Shutting down.') - finally: - platform_settings.restore_primary_dns() + if logging.root.handlers: + logging.critical('A logging method (e.g. "logging.warn(...)")' + ' was called before logging was configured.') + log_level = getattr(logging, log_level_name.upper()) + log_format = '%(asctime)s %(levelname)s %(message)s' + logging.basicConfig(level=log_level, format=log_format) + logger = logging.getLogger() + if log_file_name: + fh = logging.FileHandler(log_file_name) + fh.setLevel(log_level) + fh.setFormatter(logging.Formatter(log_format)) + logger.addHandler(fh) + system_handler = platform_settings.get_system_logging_handler() + if system_handler: + logger.addHandler(system_handler) -def main(options, replay_filename): - exit_status = 0 - platform_settings = platformsettings.get_platform_settings() - if options.server: - resolve_dns_to_remote_replay_server(platform_settings, options.server) - return exit_status - host = platform_settings.get_server_ip_address(options.server_mode) - - web_server_class = httpproxy.HttpProxyServer - web_server_kwargs = { - 'host': host, - 'port': options.port, - } - if options.spdy: - assert not options.record, 'spdy cannot be used with --record.' - web_server_class = replayspdyserver.ReplaySpdyServer - web_server_kwargs['use_ssl'] = options.spdy != 'no-ssl' - web_server_kwargs['certfile'] = options.certfile - web_server_kwargs['keyfile'] = options.keyfile +def AddDnsForward(server_manager, platform_settings, host): + """Forward DNS traffic.""" + server_manager.AppendStartStopFunctions( + [platform_settings.set_primary_dns, host], + [platform_settings.restore_primary_dns]) - if options.record: - http_archive = httparchive.HttpArchive() - http_archive.AssertWritable(replay_filename) - else: - http_archive = httparchive.HttpArchive.Load(replay_filename) - logging.info('Loaded %d responses from %s', - len(http_archive), replay_filename) +def AddDnsProxy(server_manager, options, host, real_dns_lookup, http_archive): + dns_lookup = None + if options.dns_private_passthrough: + dns_lookup = dnsproxy.PrivateIpDnsLookup( + host, real_dns_lookup, http_archive) + server_manager.AppendRecordCallback(dns_lookup.InitializeArchiveHosts) + server_manager.AppendReplayCallback(dns_lookup.InitializeArchiveHosts) + server_manager.Append(dnsproxy.DnsProxyServer, dns_lookup, host) - custom_handlers = customhandlers.CustomHandlers(options.screenshot_dir) - real_dns_lookup = dnsproxy.RealDnsLookup() - if options.record: - http_archive_fetch = httpclient.RecordHttpArchiveFetch( - http_archive, real_dns_lookup, options.deterministic_script) - else: +def AddWebProxy(server_manager, options, host, real_dns_lookup, http_archive, + cache_misses): + inject_script = httpclient.GetInjectScript(options.inject_scripts.split(',')) + http_custom_handlers = customhandlers.CustomHandlers(options.screenshot_dir) + if options.spdy: + assert not options.record, 'spdy cannot be used with --record.' http_archive_fetch = httpclient.ReplayHttpArchiveFetch( - http_archive, options.diff_unknown_requests) - - dns_passthrough_filter = None - if options.dns_private_passthrough: - skip_passthrough_hosts = set(request.host for request in http_archive) - dns_passthrough_filter = dnsproxy.DnsPrivatePassthroughFilter( - real_dns_lookup, skip_passthrough_hosts) - - dns_class = dnsproxy.DummyDnsServer - if options.dns_forwarding: - dns_class = dnsproxy.DnsProxyServer + http_archive, + inject_script, + options.diff_unknown_requests, + cache_misses=cache_misses, + use_closest_match=options.use_closest_match) + server_manager.Append( + replayspdyserver.ReplaySpdyServer, http_archive_fetch, + http_custom_handlers, host=host, port=options.port, + certfile=options.certfile) + else: + http_custom_handlers.add_server_manager_handler(server_manager) + http_archive_fetch = httpclient.ControllableHttpArchiveFetch( + http_archive, real_dns_lookup, + inject_script, + options.diff_unknown_requests, options.record, + cache_misses=cache_misses, use_closest_match=options.use_closest_match) + server_manager.AppendRecordCallback(http_archive_fetch.SetRecordMode) + server_manager.AppendReplayCallback(http_archive_fetch.SetReplayMode) + server_manager.Append( + httpproxy.HttpProxyServer, http_archive_fetch, http_custom_handlers, + host=host, port=options.port, use_delays=options.use_server_delay) + if options.ssl: + server_manager.Append( + httpproxy.HttpsProxyServer, http_archive_fetch, + http_custom_handlers, options.certfile, + host=host, port=options.ssl_port, use_delays=options.use_server_delay) + + +def AddTrafficShaper(server_manager, options, host): + if options.HasTrafficShaping(): + server_manager.Append( + trafficshaper.TrafficShaper, host=host, port=options.shaping_port, + ssl_port=(options.ssl_shaping_port if options.ssl else None), + up_bandwidth=options.up, down_bandwidth=options.down, + delay_ms=options.delay_ms, packet_loss_rate=options.packet_loss_rate, + init_cwnd=options.init_cwnd, use_loopback=not options.server_mode) + + +class OptionsWrapper(object): + """Add checks, updates, and methods to option values. + + Example: + options, args = option_parser.parse_args() + options = OptionsWrapper(options, option_parser) # run checks and updates + if options.record and options.HasTrafficShaping(): + [...] + """ + _TRAFFICSHAPING_OPTIONS = set( + ['down', 'up', 'delay_ms', 'packet_loss_rate', 'init_cwnd', 'net']) + _CONFLICTING_OPTIONS = ( + ('record', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net', + 'spdy', 'use_server_delay')), + ('net', ('down', 'up', 'delay_ms')), + ('server', ('server_mode',)), + ) + # The --net values come from http://www.webpagetest.org/. + # https://sites.google.com/a/webpagetest.org/docs/other-resources/2011-fcc-broadband-data + _NET_CONFIGS = ( + # key --down --up --delay_ms + ('dsl', ('1536Kbit/s', '384Kbit/s', '50')), + ('cable', ( '5Mbit/s', '1Mbit/s', '28')), + ('fios', ( '20Mbit/s', '5Mbit/s', '4')), + ) + NET_CHOICES = [key for key, values in _NET_CONFIGS] + + def __init__(self, options, parser): + self._options = options + self._parser = parser + self._nondefaults = set([ + name for name, value in parser.defaults.items() + if getattr(options, name) != value]) + self._CheckConflicts() + self._MassageValues() + + def _CheckConflicts(self): + """Give an error if mutually exclusive options are used.""" + for option, bad_options in self._CONFLICTING_OPTIONS: + if option in self._nondefaults: + for bad_option in bad_options: + if bad_option in self._nondefaults: + self._parser.error('Option --%s cannot be used with --%s.' % + (bad_option, option)) + + def _MassageValues(self): + """Set options that depend on the values of other options.""" + for net_choice, values in self._NET_CONFIGS: + if net_choice == self.net: + self._options.down, self._options.up, self._options.delay_ms = values + if not self.shaping_port: + self._options.shaping_port = self.port + if not self.ssl_shaping_port: + self._options.ssl_shaping_port = self.ssl_port + if not self.ssl: + self._options.certfile = None + + def __getattr__(self, name): + """Make the original option values available.""" + return getattr(self._options, name) + + def HasTrafficShaping(self): + """Returns True iff the options require traffic shaping.""" + return bool(self._TRAFFICSHAPING_OPTIONS & self._nondefaults) + + def IsRootRequired(self): + """Returns True iff the options require root access.""" + return (self.HasTrafficShaping() or + self.dns_forwarding or + self.port < 1024 or + self.ssl_port < 1024) + + +def replay(options, replay_filename): + platform_settings = platformsettings.get_platform_settings() + if options.IsRootRequired(): + platform_settings.rerun_as_administrator() + configure_logging(platform_settings, options.log_level, options.log_file) + server_manager = servermanager.ServerManager(options.record) + cache_misses = None + if options.cache_miss_file: + if os.path.exists(options.cache_miss_file): + logging.warning('Cache Miss Archive file %s already exists; ' + 'replay will load and append entries to archive file', + options.cache_miss_file) + cache_misses = cachemissarchive.CacheMissArchive.Load( + options.cache_miss_file) + else: + cache_misses = cachemissarchive.CacheMissArchive( + options.cache_miss_file) + if options.server: + AddDnsForward(server_manager, platform_settings, options.server) + else: + host = platform_settings.get_server_ip_address(options.server_mode) + real_dns_lookup = dnsproxy.RealDnsLookup( + name_servers=[platform_settings.get_original_primary_dns()]) + if options.record: + http_archive = httparchive.HttpArchive() + http_archive.AssertWritable(replay_filename) + else: + http_archive = httparchive.HttpArchive.Load(replay_filename) + logging.info('Loaded %d responses from %s', + len(http_archive), replay_filename) + server_manager.AppendRecordCallback(real_dns_lookup.ClearCache) + server_manager.AppendRecordCallback(http_archive.clear) + + if options.dns_forwarding: + if not options.server_mode: + AddDnsForward(server_manager, platform_settings, host) + AddDnsProxy(server_manager, options, host, real_dns_lookup, http_archive) + if options.ssl and options.certfile is None: + options.certfile = platform_settings.get_certfile_name() + server_manager.AppendStartStopFunctions( + [platform_settings.create_certfile, options.certfile], + [os.unlink, options.certfile]) + AddWebProxy(server_manager, options, host, real_dns_lookup, + http_archive, cache_misses) + AddTrafficShaper(server_manager, options, host) + exit_status = 0 try: - with dns_class(options.dns_forwarding, dns_passthrough_filter, host): - with web_server_class(http_archive_fetch, custom_handlers, - **web_server_kwargs): - with trafficshaper.TrafficShaper( - host=host, - port=options.shaping_port, - up_bandwidth=options.up, - down_bandwidth=options.down, - delay_ms=options.delay_ms, - packet_loss_rate=options.packet_loss_rate, - init_cwnd=options.init_cwnd): - while True: - time.sleep(1) + server_manager.Run() except KeyboardInterrupt: logging.info('Shutting down.') except (dnsproxy.DnsProxyException, - trafficshaper.TrafficShaperException) as e: - logging.critical(e) + trafficshaper.TrafficShaperException, + platformsettings.NotAdministratorError, + platformsettings.DnsUpdateError) as e: + logging.critical('%s: %s', e.__class__.__name__, e) exit_status = 1 except: - print traceback.format_exc() + logging.critical(traceback.format_exc()) exit_status = 2 + if options.record: http_archive.Persist(replay_filename) logging.info('Saved %d responses to %s', len(http_archive), replay_filename) + if cache_misses: + cache_misses.Persist() + logging.info('Saved %d cache misses and %d requests to %s', + cache_misses.get_total_cache_misses(), + len(cache_misses.request_counts.keys()), + options.cache_miss_file) return exit_status -def configure_logging(log_level_name, log_file_name=None): - """Configure logging level and format. - - Args: - log_level_name: 'debug', 'info', 'warning', 'error', or 'critical'. - log_file_name: a file name - """ - if logging.root.handlers: - logging.critical('A logging method (e.g. "logging.warn(...)")' - ' was called before logging was configured.') - log_level = getattr(logging, log_level_name.upper()) - log_format = '%(asctime)s %(levelname)s %(message)s' - logging.basicConfig(level=log_level, format=log_format) - if log_file_name: - fh = logging.FileHandler(log_file_name) - fh.setLevel(log_level) - fh.setFormatter(logging.Formatter(log_format)) - logging.getLogger().addHandler(fh) - - -if __name__ == '__main__': +def main(): class PlainHelpFormatter(optparse.IndentedHelpFormatter): def format_description(self, description): if description: @@ -190,10 +305,9 @@ def format_description(self, description): description=__doc__, epilog='http://code.google.com/p/web-page-replay/') - option_parser.add_option('-s', '--spdy', default=False, - action='store', - type='string', - help='Use spdy to replay relay_file. --spdy="no-ssl" uses SPDY without SSL.') + option_parser.add_option('--spdy', default=False, + action='store_true', + help='Replay via SPDY. (Can be combined with --no-ssl).') option_parser.add_option('-r', '--record', default=False, action='store_true', help='Download real responses and record them to replay_file') @@ -206,6 +320,12 @@ def format_description(self, description): action='store', type='string', help='Log file to use in addition to writting logs to stderr.') + option_parser.add_option('-e', '--cache_miss_file', default=None, + action='store', + dest='cache_miss_file', + type='string', + help='Archive file to record cache misses as pickled objects.' + 'Cache misses occur when a request cannot be served in replay mode.') network_group = optparse.OptionGroup(option_parser, 'Network Simulation Options', @@ -230,6 +350,12 @@ def format_description(self, description): action='store', type='string', help='Set initial cwnd (linux only, requires kernel patch)') + network_group.add_option('--net', default=None, + action='store', + type='choice', + choices=OptionsWrapper.NET_CHOICES, + help='Select a set of network options: %s.' % ', '.join( + OptionsWrapper.NET_CHOICES)) option_parser.add_option_group(network_group) harness_group = optparse.OptionGroup(option_parser, @@ -246,17 +372,28 @@ def format_description(self, description): 'without changing the primary DNS nameserver. ' 'Other hosts may connect to this using "replay.py --server" ' 'or by pointing their DNS to this server.') - harness_group.add_option('-n', '--no-deterministic_script', default=True, + harness_group.add_option('-i', '--inject_scripts', default='deterministic.js', + action='store', + dest='inject_scripts', + help='A comma separated list of JavaScript sources to inject in all ' + 'pages. By default a script is injected that eliminates sources ' + 'of entropy such as Date() and Math.random() deterministic. ' + 'CAUTION: Without deterministic.js, many pages will not replay.') + harness_group.add_option('-D', '--no-diff_unknown_requests', default=True, action='store_false', - dest='deterministic_script', - help='During a record, do not inject JavaScript to make sources of ' - 'entropy such as Date() and Math.random() deterministic. CAUTION: ' - 'With this option many web pages will not replay properly.') - harness_group.add_option('-D', '--diff_unknown_requests', default=False, - action='store_true', dest='diff_unknown_requests', - help='During replay, show a unified diff of any unknown requests against ' + help='During replay, do not show a diff of unknown requests against ' 'their nearest match in the archive.') + harness_group.add_option('-C', '--use_closest_match', default=False, + action='store_true', + dest='use_closest_match', + help='During replay, if a request is not found, serve the closest match' + 'in the archive instead of giving a 404.') + harness_group.add_option('-U', '--use_server_delay', default=False, + action='store_true', + dest='use_server_delay', + help='During replay, simulate server delay by delaying response time to' + 'requests.') harness_group.add_option('-I', '--screenshot_dir', default=None, action='store', type='string', @@ -270,33 +407,39 @@ def format_description(self, description): harness_group.add_option('-x', '--no-dns_forwarding', default=True, action='store_false', dest='dns_forwarding', - help='Don\'t forward DNS requests to the local replay server.' + help='Don\'t forward DNS requests to the local replay server. ' 'CAUTION: With this option an external mechanism must be used to ' 'forward traffic to the replay server.') harness_group.add_option('-o', '--port', default=80, action='store', type='int', help='Port number to listen on.') - harness_group.add_option('--shaping_port', default=0, + harness_group.add_option('--ssl_port', default=443, action='store', type='int', - help='Port to apply traffic shaping to. \'0\' means use the same ' - 'port as the listen port (--port)') - harness_group.add_option('-c', '--certfile', default='', + help='SSL port number to listen on.') + harness_group.add_option('--shaping_port', default=None, action='store', - dest='certfile', - type='string', - help='Certificate file for use with SSL') - harness_group.add_option('-k', '--keyfile', default='', + type='int', + help='Port on which to apply traffic shaping. Defaults to the ' + 'listen port (--port)') + harness_group.add_option('--ssl_shaping_port', default=None, + action='store', + type='int', + help='SSL port on which to apply traffic shaping. Defaults to the ' + 'SSL listen port (--ssl_port)') + harness_group.add_option('-c', '--certfile', default=None, action='store', - dest='keyfile', type='string', - help='Key file for use with SSL') + help='Certificate file to use with SSL (gets auto-generated if needed).') + harness_group.add_option('--no-ssl', default=True, + action='store_false', + dest='ssl', + help='Do not setup an SSL proxy.') option_parser.add_option_group(harness_group) options, args = option_parser.parse_args() - - configure_logging(options.log_level, options.log_file) + options = OptionsWrapper(options, option_parser) if options.server: replay_filename = None @@ -305,23 +448,8 @@ def format_description(self, description): else: replay_filename = args[0] - if options.record: - if options.up != '0': - option_parser.error('Option --up cannot be used with --record.') - if options.down != '0': - option_parser.error('Option --down cannot be used with --record.') - if options.delay_ms != '0': - option_parser.error('Option --delay_ms cannot be used with --record.') - if options.packet_loss_rate != '0': - option_parser.error( - 'Option --packet_loss_rate cannot be used with --record.') - if options.spdy: - option_parser.error('Option --spdy cannot be used with --record.') - - if options.server and options.server_mode: - option_parser.error('Cannot run with both --server and --server_mode') - - if options.shaping_port == 0: - options.shaping_port = options.port - - sys.exit(main(options, replay_filename)) + return replay(options, replay_filename) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/wpr/replayspdyserver.py b/wpr/replayspdyserver.py index 76a5295..bdcd96e 100755 --- a/wpr/replayspdyserver.py +++ b/wpr/replayspdyserver.py @@ -32,8 +32,12 @@ class ReplaySpdyServer(daemonserver.DaemonServer): def __init__(self, http_archive_fetch, custom_handlers, - host='localhost', port=80, - use_ssl=True, certfile=None, keyfile=None): + host='localhost', port=80, certfile=None, keyfile=None): + """Initialize ReplaySpdyServer. + + The private key may be stored in |certfile|. If so, |keyfile| + may be left unset. + """ #TODO(lzheng): figure out how to get the log level from main. self.log = logging.getLogger('ReplaySpdyServer') self.log.setLevel(logging.INFO) @@ -41,17 +45,10 @@ def __init__(self, http_archive_fetch, custom_handlers, self.custom_handlers = custom_handlers self.host = host self.port = port - self.use_ssl = use_ssl - if self.use_ssl and (not certfile or not keyfile): - self.log.error('SPDY SSL mode requires a keyfile and certificate file') - raise Exception('keyfile or certfile missing') - self.spdy_server = spdy_server.SpdyServer(host, - port, - self.use_ssl, - certfile, - keyfile, - self.request_handler, - self.log) + self.use_ssl = certfile is not None + self.spdy_server = spdy_server.SpdyServer( + host, port, self.use_ssl, certfile, keyfile, self.request_handler, + self.log) def serve_forever(self): self.log.info('Replaying with SPDY on %s:%d', self.host, self.port) @@ -66,54 +63,52 @@ def request_handler(self, method, uri, hdrs, res_start, req_pause): Based on method, host and uri to fetch the matching response and reply to browser using spdy. """ + dummy = http_common.dummy + def simple_responder(code, content): + res_hdrs = [('content-type', 'text/html'), ('version', 'HTTP/1.1')] + res_body, res_done = res_start(str(code), content, res_hdrs, dummy) + res_body(None) + res_done(None) + host = '' - for (name, value) in hdrs: + for name, value in hdrs: if name.lower() == 'host': host = value self.log.debug("request: %s, uri: %s, method: %s", host, uri, method) - dummy = http_common.dummy if method == 'GET': - request = httparchive.ArchivedHttpRequest(method, host, uri, None) + request = httparchive.ArchivedHttpRequest( + method, host, uri, None, dict(hdrs)) response_code = self.custom_handlers.handle(request) if response_code: - self.send_simple_response(response_code, "Handled by custom handlers") + simple_responder(response_code, "Handled by custom handlers") return dummy, dummy response = self.http_archive_fetch(request) if response: res_hdrs = [('version', 'HTTP/1.1')] - for (name, value) in response.headers: + for name, value in response.headers: name_lower = name.lower() - if name.lower() == CONTENT_LENGTH: + if name_lower == CONTENT_LENGTH: res_hdrs.append((name, str(value))) - elif name_lower == STATUS: - pass - elif name_lower == VERSION: + elif name_lower in (STATUS, VERSION): pass else: - res_hdrs.append((name, value)) - res_body, res_done = res_start(str(response.status), - response.reason, - res_hdrs, dummy) + res_hdrs.append((name_lower, value)) + res_body, res_done = res_start( + str(response.status), response.reason, res_hdrs, dummy) body = '' for item in response.response_data: res_body(item) res_done(None) else: self.log.error("404 returned: %s %s", method, uri) - self.send_simple_response(404, "file not found") + simple_responder(404, "file not found") else: # TODO(lzheng): Add support for other methods. self.log.error("method: %s is not supported: %s", method, uri) - self.send_simple_response(500, "Not supported") - + simple_responder(500, "Not supported") return dummy, dummy - def send_simple_response(self, code, phrase): - res_hdrs = [('Content-Type', 'text/html'), ('version', 'HTTP/1.1')] - res_body, res_done = res_start(str(code), phrase, res_hdrs, dummy) - res_body(None) - res_done(None) if __name__ == "__main__": logging.basicConfig() diff --git a/wpr/servermanager.py b/wpr/servermanager.py new file mode 100644 index 0000000..decaca3 --- /dev/null +++ b/wpr/servermanager.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python +# Copyright 2011 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Control "replay.py --server_mode" (e.g. switch from record to replay).""" + +import sys +import time + +class ServerManager(object): + """Run servers until is removed or an exception is raised. + + Servers start in the order they are appended and stop in the + opposite order. Servers are started by calling the initializer + passed to ServerManager.Append() and by calling __enter__(). Once an + server's initializer is called successfully, the __exit__() function + is guaranteed to be called when ServerManager.Run() completes. + """ + + def __init__(self, is_record_mode): + """Initialize a server manager.""" + self.initializers = [] + self.record_callbacks = [] + self.replay_callbacks = [] + self.is_record_mode = is_record_mode + + def Append(self, initializer, *init_args, **init_kwargs): + """Append a server to the end of the list to run. + + Servers start in the order they are appended and stop in the + opposite order. + + Args: + initializer: a function that returns a server instance. + A server needs to implement the with-statement interface. + init_args: positional arguments for the initializer. + init_args: keyword arguments for the initializer. + """ + self.initializers.append((initializer, init_args, init_kwargs)) + + def AppendStartStopFunctions(self, start_spec, stop_spec): + """Append functions to call before and after the main run-loop. + + If the enter function succeeds, then the exit function will be + called when shutting down. + + Args: + start_spec: (start_func, start_args_1, start_arg_2, ...) + # The arguments are optional. + stop_spec: (stop_func, stop_args_1, stop_arg_2, ...) + # The arguments are optional. + """ + class Context(object): + def __enter__(self): + start_spec[0](*start_spec[1:]) + def __exit__(self, type, value, traceback): + stop_spec[0](*stop_spec[1:]) + self.Append(Context) + + def AppendRecordCallback(self, func): + """Append a function to the list to call when switching to record mode. + + Args: + func: a function that takes no arguments and returns no value. + """ + self.record_callbacks.append(func) + + def AppendReplayCallback(self, func): + """Append a function to the list to call when switching to replay mode. + + Args: + func: a function that takes no arguments and returns no value. + """ + self.replay_callbacks.append(func) + + def IsRecordMode(self): + """Call all the functions that have been registered to enter replay mode.""" + return self.is_record_mode + + def SetRecordMode(self): + """Call all the functions that have been registered to enter record mode.""" + self.is_record_mode = True + for record_func in self.record_callbacks: + record_func() + + def SetReplayMode(self): + """Call all the functions that have been registered to enter replay mode.""" + self.is_record_mode = False + for replay_func in self.replay_callbacks: + replay_func() + + def Run(self): + """Create the servers and loop. + + The loop quits if a server raises an exception. + + Raises: + any exception raised by the servers + """ + server_exits = [] + exception_info = (None, None, None) + try: + for initializer, init_args, init_kwargs in self.initializers: + server = initializer(*init_args, **init_kwargs) + server_exits.insert(0, server.__exit__) + server.__enter__() + + while True: + time.sleep(1) + except: + exception_info = sys.exc_info() + finally: + for server_exit in server_exits: + try: + if server_exit(*exception_info): + exception_info = (None, None, None) + except: + exception_info = sys.exc_info() + if exception_info != (None, None, None): + raise exception_info[0], exception_info[1], exception_info[2] diff --git a/wpr/setup.cfg b/wpr/setup.cfg new file mode 100644 index 0000000..861a9f5 --- /dev/null +++ b/wpr/setup.cfg @@ -0,0 +1,5 @@ +[egg_info] +tag_build = +tag_date = 0 +tag_svn_revision = 0 + diff --git a/wpr/setup.py b/wpr/setup.py new file mode 100644 index 0000000..d6c5348 --- /dev/null +++ b/wpr/setup.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# Copyright 2012 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Creates a distributable python package. + +Creating new packages: + 1. Generate the package, dist/webpagereplay-X.X.tar.gz: + python setup.py sdist + 2. Upload the package file to the following: + http://code.google.com/p/web-page-replay/downloads/entry + +Installing packages: + $ easy_install http://web-page-replay.googlecode.com/files/webpagereplay-X.X.tar.gz + - The replay and httparchive commands are now on your PATH. +""" + +import setuptools + +setuptools.setup( + name='webpagereplay', + version='1.1.2', + description='Record and replay web content', + author='Web Page Replay Project Authors', + author_email='web-page-replay-dev@googlegroups.com', + url='http://code.google.com/p/web-page-replay/', + license='Apache License 2.0', + install_requires=['dnspython>=1.8'], + packages=[ + '', + 'perftracker', + 'third_party', + 'third_party.ipaddr', + 'third_party.nbhttp' + ], + package_dir={'': '.'}, + package_data={ + '': ['*.js', '*.txt', 'COPYING', 'LICENSE'], + }, + entry_points={ + 'console_scripts': [ + 'httparchive = httparchive:main', + 'replay = replay:main', + ] + }, + ) diff --git a/wpr/third_party/dns/README.web-page-replay b/wpr/third_party/dns/README.web-page-replay new file mode 100644 index 0000000..6d445fe --- /dev/null +++ b/wpr/third_party/dns/README.web-page-replay @@ -0,0 +1,12 @@ +Name: A DNS toolkit for Python +Short Name: dnspython +URL: http://www.dnspython.org/ +Version: 1.8.0 (found in ./version.py) +License: ISC +License File: LICENSE + +Description: +Used by Web Page Replay's dnsproxy module to create and handle dns queries. + +Local Modifications: +None. \ No newline at end of file diff --git a/wpr/third_party/ipaddr/README.web-page-replay b/wpr/third_party/ipaddr/README.web-page-replay new file mode 100644 index 0000000..4b42084 --- /dev/null +++ b/wpr/third_party/ipaddr/README.web-page-replay @@ -0,0 +1,12 @@ +Name: An IPv4/IPv6 manipulation library in Python. +Short Name: ipaddr-py +URL: https://code.google.com/p/ipaddr-py/ +Version: 2.1.10 (ipaddr.__version__) +License: Apache (v2.0) +License File: COPYING + +Description: +Used by Web Page Replay to check if an IP address is private. + +Local Modifications: +Cherry picked revision 728996d6b1d4 to add license boilerplate to test-2to3.sh. diff --git a/wpr/third_party/ipaddr/ipaddr.py b/wpr/third_party/ipaddr/ipaddr.py index d7eb222..ad27ae9 100644 --- a/wpr/third_party/ipaddr/ipaddr.py +++ b/wpr/third_party/ipaddr/ipaddr.py @@ -22,7 +22,7 @@ """ -__version__ = 'trunk' +__version__ = '2.1.10' import struct @@ -134,7 +134,7 @@ def v4_int_to_packed(address): """ if address > _BaseV4._ALL_ONES: raise ValueError('Address too large for IPv4') - return struct.pack('!I', address) + return Bytes(struct.pack('!I', address)) def v6_int_to_packed(address): @@ -146,7 +146,7 @@ def v6_int_to_packed(address): Returns: The binary representation of this address. """ - return struct.pack('!QQ', address >> 64, address & (2**64 - 1)) + return Bytes(struct.pack('!QQ', address >> 64, address & (2**64 - 1))) def _find_address_range(addresses): @@ -270,12 +270,12 @@ def _collapse_address_list_recursive(addresses): Example: - ip1 = IPv4Network'1.1.0.0/24') - ip2 = IPv4Network'1.1.1.0/24') - ip3 = IPv4Network'1.1.2.0/24') - ip4 = IPv4Network'1.1.3.0/24') - ip5 = IPv4Network'1.1.4.0/24') - ip6 = IPv4Network'1.1.0.1/22') + ip1 = IPv4Network('1.1.0.0/24') + ip2 = IPv4Network('1.1.1.0/24') + ip3 = IPv4Network('1.1.2.0/24') + ip4 = IPv4Network('1.1.3.0/24') + ip5 = IPv4Network('1.1.4.0/24') + ip6 = IPv4Network('1.1.0.1/22') _collapse_address_list_recursive([ip1, ip2, ip3, ip4, ip5, ip6]) -> [IPv4Network('1.1.0.0/22'), IPv4Network('1.1.4.0/24')] @@ -368,15 +368,27 @@ def collapse_address_list(addresses): # backwards compatibility CollapseAddrList = collapse_address_list -# Test whether this Python implementation supports byte objects that -# are not identical to str ones. -# We need to exclude platforms where bytes == str so that we can -# distinguish between packed representations and strings, for example -# b'12::' (the IPv4 address 49.50.58.58) and '12::' (an IPv6 address). +# We need to distinguish between the string and packed-bytes representations +# of an IP address. For example, b'0::1' is the IPv4 address 48.58.58.49, +# while '0::1' is an IPv6 address. +# +# In Python 3, the native 'bytes' type already provides this functionality, +# so we use it directly. For earlier implementations where bytes is not a +# distinct type, we create a subclass of str to serve as a tag. +# +# Usage example (Python 2): +# ip = ipaddr.IPAddress(ipaddr.Bytes('xxxx')) +# +# Usage example (Python 3): +# ip = ipaddr.IPAddress(b'xxxx') try: - _compat_has_real_bytes = bytes is not str -except NameError: # 255 or (octet_str[0] == '0' and len(octet_str) > 1): + raise ValueError + return octet_int + def _string_from_ip_int(self, ip_int): """Turns a 32-bit integer into dotted decimal notation. @@ -1059,37 +1089,6 @@ def _string_from_ip_int(self, ip_int): ip_int >>= 8 return '.'.join(octets) - def _is_valid_ip(self, address): - """Validate the dotted decimal notation IP/netmask string. - - Args: - address: A string, either representing a quad-dotted ip - or an integer which is a valid IPv4 IP address. - - Returns: - A boolean, True if the string is a valid dotted decimal IP - string. - - """ - octets = address.split('.') - if len(octets) == 1: - # We have an integer rather than a dotted decimal IP. - try: - return int(address) >= 0 and int(address) <= self._ALL_ONES - except ValueError: - return False - - if len(octets) != 4: - return False - - for octet in octets: - try: - if not 0 <= int(octet) <= 255: - return False - except ValueError: - return False - return True - @property def max_prefixlen(self): return self._max_prefixlen @@ -1190,7 +1189,6 @@ def __init__(self, address): AddressValueError: If ipaddr isn't a valid IPv4 address. """ - _BaseIP.__init__(self, address) _BaseV4.__init__(self, address) # Efficient constructor from integer. @@ -1201,17 +1199,16 @@ def __init__(self, address): return # Constructing from a packed address - if _compat_has_real_bytes: - if isinstance(address, bytes) and len(address) == 4: - self._ip = struct.unpack('!I', address)[0] - return + if isinstance(address, Bytes): + try: + self._ip, = struct.unpack('!I', address) + except struct.error: + raise AddressValueError(address) # Wrong length. + return # Assume input argument to be string or any object representation # which converts into a formatted IP string. addr_str = str(address) - if not self._is_valid_ip(addr_str): - raise AddressValueError(addr_str) - self._ip = self._ip_int_from_string(addr_str) @@ -1276,25 +1273,14 @@ def __init__(self, address, strict=False): _BaseNet.__init__(self, address) _BaseV4.__init__(self, address) - # Efficient constructor from integer. - if isinstance(address, (int, long)): - self._ip = address - self.ip = IPv4Address(self._ip) + # Constructing from an integer or packed bytes. + if isinstance(address, (int, long, Bytes)): + self.ip = IPv4Address(address) + self._ip = self.ip._ip self._prefixlen = self._max_prefixlen self.netmask = IPv4Address(self._ALL_ONES) - if address < 0 or address > self._ALL_ONES: - raise AddressValueError(address) return - # Constructing from a packed address - if _compat_has_real_bytes: - if isinstance(address, bytes) and len(address) == 4: - self._ip = struct.unpack('!I', address)[0] - self.ip = IPv4Address(self._ip) - self._prefixlen = self._max_prefixlen - self.netmask = IPv4Address(self._ALL_ONES) - return - # Assume input argument to be string or any object representation # which converts into a formatted IP prefix string. addr = str(address).split('/') @@ -1302,9 +1288,6 @@ def __init__(self, address, strict=False): if len(addr) > 2: raise AddressValueError(address) - if not self._is_valid_ip(addr[0]): - raise AddressValueError(addr[0]) - self._ip = self._ip_int_from_string(addr[0]) self.ip = IPv4Address(self._ip) @@ -1338,6 +1321,8 @@ def __init__(self, address, strict=False): if self.ip != self.network: raise ValueError('%s has host bits set' % self.ip) + if self._prefixlen == (self._max_prefixlen - 1): + self.iterhosts = self.__iter__ def _is_hostmask(self, ip_str): """Test if the IP string is a hostmask (rather than a netmask). @@ -1403,12 +1388,14 @@ class _BaseV6(object): """ _ALL_ONES = (2**IPV6LENGTH) - 1 + _HEXTET_COUNT = 8 + _HEX_DIGITS = frozenset('0123456789ABCDEFabcdef') def __init__(self, address): self._version = 6 self._max_prefixlen = IPV6LENGTH - def _ip_int_from_string(self, ip_str=None): + def _ip_int_from_string(self, ip_str): """Turn an IPv6 ip_str into an integer. Args: @@ -1418,35 +1405,95 @@ def _ip_int_from_string(self, ip_str=None): A long, the IPv6 ip_str. Raises: - AddressValueError: if ip_str isn't a valid IP Address. + AddressValueError: if ip_str isn't a valid IPv6 Address. """ - if not ip_str: - ip_str = str(self.ip) + parts = ip_str.split(':') - ip_int = 0 - - # Do we have an IPv4 mapped (::ffff:a.b.c.d) or compact (::a.b.c.d) - # ip_str? - fields = ip_str.split(':') - if fields[-1].count('.') == 3: - ipv4_string = fields.pop() - ipv4_int = IPv4Network(ipv4_string)._ip - octets = [] - for _ in xrange(2): - octets.append(hex(ipv4_int & 0xFFFF).lstrip('0x').rstrip('L')) - ipv4_int >>= 16 - fields.extend(reversed(octets)) - ip_str = ':'.join(fields) - - fields = self._explode_shorthand_ip_string(ip_str).split(':') - for field in fields: - try: - ip_int = (ip_int << 16) + int(field or '0', 16) - except ValueError: + # An IPv6 address needs at least 2 colons (3 parts). + if len(parts) < 3: + raise AddressValueError(ip_str) + + # If the address has an IPv4-style suffix, convert it to hexadecimal. + if '.' in parts[-1]: + ipv4_int = IPv4Address(parts.pop())._ip + parts.append('%x' % ((ipv4_int >> 16) & 0xFFFF)) + parts.append('%x' % (ipv4_int & 0xFFFF)) + + # An IPv6 address can't have more than 8 colons (9 parts). + if len(parts) > self._HEXTET_COUNT + 1: + raise AddressValueError(ip_str) + + # Disregarding the endpoints, find '::' with nothing in between. + # This indicates that a run of zeroes has been skipped. + try: + skip_index, = ( + [i for i in xrange(1, len(parts) - 1) if not parts[i]] or + [None]) + except ValueError: + # Can't have more than one '::' + raise AddressValueError(ip_str) + + # parts_hi is the number of parts to copy from above/before the '::' + # parts_lo is the number of parts to copy from below/after the '::' + if skip_index is not None: + # If we found a '::', then check if it also covers the endpoints. + parts_hi = skip_index + parts_lo = len(parts) - skip_index - 1 + if not parts[0]: + parts_hi -= 1 + if parts_hi: + raise AddressValueError(ip_str) # ^: requires ^:: + if not parts[-1]: + parts_lo -= 1 + if parts_lo: + raise AddressValueError(ip_str) # :$ requires ::$ + parts_skipped = self._HEXTET_COUNT - (parts_hi + parts_lo) + if parts_skipped < 1: + raise AddressValueError(ip_str) + else: + # Otherwise, allocate the entire address to parts_hi. The endpoints + # could still be empty, but _parse_hextet() will check for that. + if len(parts) != self._HEXTET_COUNT: raise AddressValueError(ip_str) + parts_hi = len(parts) + parts_lo = 0 + parts_skipped = 0 + + try: + # Now, parse the hextets into a 128-bit integer. + ip_int = 0L + for i in xrange(parts_hi): + ip_int <<= 16 + ip_int |= self._parse_hextet(parts[i]) + ip_int <<= 16 * parts_skipped + for i in xrange(-parts_lo, 0): + ip_int <<= 16 + ip_int |= self._parse_hextet(parts[i]) + return ip_int + except ValueError: + raise AddressValueError(ip_str) - return ip_int + def _parse_hextet(self, hextet_str): + """Convert an IPv6 hextet string into an integer. + + Args: + hextet_str: A string, the number to parse. + + Returns: + The hextet as an integer. + + Raises: + ValueError: if the input isn't strictly a hex number from [0..FFFF]. + + """ + # Whitelist the characters, since int() allows a lot of bizarre stuff. + if not self._HEX_DIGITS.issuperset(hextet_str): + raise ValueError + hextet_int = int(hextet_str, 16) + if hextet_int > 0xFFFF: + raise ValueError + return hextet_int def _compress_hextets(self, hextets): """Compresses a list of hextets. @@ -1522,7 +1569,7 @@ def _string_from_ip_int(self, ip_int=None): hextets = self._compress_hextets(hextets) return ':'.join(hextets) - def _explode_shorthand_ip_string(self, ip_str=None): + def _explode_shorthand_ip_string(self): """Expand a shortened IPv6 address. Args: @@ -1532,108 +1579,20 @@ def _explode_shorthand_ip_string(self, ip_str=None): A string, the expanded IPv6 address. """ - if not ip_str: + if isinstance(self, _BaseNet): + ip_str = str(self.ip) + else: ip_str = str(self) - if isinstance(self, _BaseNet): - ip_str = str(self.ip) - - if self._is_shorthand_ip(ip_str): - new_ip = [] - hextet = ip_str.split('::') - - if len(hextet) > 1: - sep = len(hextet[0].split(':')) + len(hextet[1].split(':')) - new_ip = hextet[0].split(':') - - for _ in xrange(8 - sep): - new_ip.append('0000') - new_ip += hextet[1].split(':') - - else: - new_ip = ip_str.split(':') - # Now need to make sure every hextet is 4 lower case characters. - # If a hextet is < 4 characters, we've got missing leading 0's. - ret_ip = [] - for hextet in new_ip: - ret_ip.append(('0' * (4 - len(hextet)) + hextet).lower()) - return ':'.join(ret_ip) - # We've already got a longhand ip_str. - return ip_str - - def _is_valid_ip(self, ip_str): - """Ensure we have a valid IPv6 address. - - Probably not as exhaustive as it should be. - - Args: - ip_str: A string, the IPv6 address. - - Returns: - A boolean, True if this is a valid IPv6 address. - - """ - # We need to have at least one ':'. - if ':' not in ip_str: - return False - - # We can only have one '::' shortener. - if ip_str.count('::') > 1: - return False - - # '::' should be encompassed by start, digits or end. - if ':::' in ip_str: - return False - - # A single colon can neither start nor end an address. - if ((ip_str.startswith(':') and not ip_str.startswith('::')) or - (ip_str.endswith(':') and not ip_str.endswith('::'))): - return False - - # If we have no concatenation, we need to have 8 fields with 7 ':'. - if '::' not in ip_str and ip_str.count(':') != 7: - # We might have an IPv4 mapped address. - if ip_str.count('.') != 3: - return False - - ip_str = self._explode_shorthand_ip_string(ip_str) - - # Now that we have that all squared away, let's check that each of the - # hextets are between 0x0 and 0xFFFF. - for hextet in ip_str.split(':'): - if hextet.count('.') == 3: - # If we have an IPv4 mapped address, the IPv4 portion has to - # be at the end of the IPv6 portion. - if not ip_str.split(':')[-1] == hextet: - return False - try: - IPv4Network(hextet) - except AddressValueError: - return False - else: - try: - # a value error here means that we got a bad hextet, - # something like 0xzzzz - if int(hextet, 16) < 0x0 or int(hextet, 16) > 0xFFFF: - return False - except ValueError: - return False - return True - - def _is_shorthand_ip(self, ip_str=None): - """Determine if the address is shortened. - Args: - ip_str: A string, the IPv6 address. - - Returns: - A boolean, True if the address is shortened. - - """ - if ip_str.count('::') == 1: - return True - if filter(lambda x: len(x) < 4, ip_str.split(':')): - return True - return False + ip_int = self._ip_int_from_string(ip_str) + parts = [] + for i in xrange(self._HEXTET_COUNT): + parts.append('%04x' % (ip_int & 0xFFFF)) + ip_int >>= 16 + parts.reverse() + if isinstance(self, _BaseNet): + return '%s/%d' % (':'.join(parts), self.prefixlen) + return ':'.join(parts) @property def max_prefixlen(self): @@ -1749,13 +1708,9 @@ def ipv4_mapped(self): IPv4 mapped address. Return None otherwise. """ - hextets = self._explode_shorthand_ip_string().split(':') - if hextets[-3] != 'ffff': - return None - try: - return IPv4Address(int('%s%s' % (hextets[-2], hextets[-1]), 16)) - except AddressValueError: + if (self._ip >> 32) != 0xFFFF: return None + return IPv4Address(self._ip & 0xFFFFFFFF) @property def teredo(self): @@ -1764,14 +1719,13 @@ def teredo(self): Returns: Tuple of the (server, client) IPs or None if the address doesn't appear to be a teredo address (doesn't start with - 2001) + 2001::/32) """ - bits = self._explode_shorthand_ip_string().split(':') - if not bits[0] == '2001': + if (self._ip >> 96) != 0x20010000: return None - return (IPv4Address(int(''.join(bits[2:4]), 16)), - IPv4Address(int(''.join(bits[6:]), 16) ^ 0xFFFFFFFF)) + return (IPv4Address((self._ip >> 64) & 0xFFFFFFFF), + IPv4Address(~self._ip & 0xFFFFFFFF)) @property def sixtofour(self): @@ -1782,10 +1736,9 @@ def sixtofour(self): address doesn't appear to contain a 6to4 embedded address. """ - bits = self._explode_shorthand_ip_string().split(':') - if not bits[0] == '2002': + if (self._ip >> 112) != 0x2002: return None - return IPv4Address(int(''.join(bits[1:3]), 16)) + return IPv4Address((self._ip >> 80) & 0xFFFFFFFF) class IPv6Address(_BaseV6, _BaseIP): @@ -1810,7 +1763,6 @@ def __init__(self, address): AddressValueError: If address isn't a valid IPv6 address. """ - _BaseIP.__init__(self, address) _BaseV6.__init__(self, address) # Efficient constructor from integer. @@ -1821,11 +1773,13 @@ def __init__(self, address): return # Constructing from a packed address - if _compat_has_real_bytes: - if isinstance(address, bytes) and len(address) == 16: - tmp = struct.unpack('!QQ', address) - self._ip = (tmp[0] << 64) | tmp[1] - return + if isinstance(address, Bytes): + try: + hi, lo = struct.unpack('!QQ', address) + except struct.error: + raise AddressValueError(address) # Wrong length. + self._ip = (hi << 64) | lo + return # Assume input argument to be string or any object representation # which converts into a formatted IP string. @@ -1833,9 +1787,6 @@ def __init__(self, address): if not addr_str: raise AddressValueError('') - if not self._is_valid_ip(addr_str): - raise AddressValueError(addr_str) - self._ip = self._ip_int_from_string(addr_str) @@ -1889,26 +1840,14 @@ def __init__(self, address, strict=False): _BaseNet.__init__(self, address) _BaseV6.__init__(self, address) - # Efficient constructor from integer. - if isinstance(address, (int, long)): - self._ip = address - self.ip = IPv6Address(self._ip) + # Constructing from an integer or packed bytes. + if isinstance(address, (int, long, Bytes)): + self.ip = IPv6Address(address) + self._ip = self.ip._ip self._prefixlen = self._max_prefixlen self.netmask = IPv6Address(self._ALL_ONES) - if address < 0 or address > self._ALL_ONES: - raise AddressValueError(address) return - # Constructing from a packed address - if _compat_has_real_bytes: - if isinstance(address, bytes) and len(address) == 16: - tmp = struct.unpack('!QQ', address) - self._ip = (tmp[0] << 64) | tmp[1] - self.ip = IPv6Address(self._ip) - self._prefixlen = self._max_prefixlen - self.netmask = IPv6Address(self._ALL_ONES) - return - # Assume input argument to be string or any object representation # which converts into a formatted IP prefix string. addr = str(address).split('/') @@ -1916,8 +1855,8 @@ def __init__(self, address, strict=False): if len(addr) > 2: raise AddressValueError(address) - if not self._is_valid_ip(addr[0]): - raise AddressValueError(addr[0]) + self._ip = self._ip_int_from_string(addr[0]) + self.ip = IPv6Address(self._ip) if len(addr) == 2: if self._is_valid_netmask(addr[1]): @@ -1929,13 +1868,12 @@ def __init__(self, address, strict=False): self.netmask = IPv6Address(self._ip_int_from_prefix(self._prefixlen)) - self._ip = self._ip_int_from_string(addr[0]) - self.ip = IPv6Address(self._ip) - if strict: if self.ip != self.network: raise ValueError('%s has host bits set' % self.ip) + if self._prefixlen == (self._max_prefixlen - 1): + self.iterhosts = self.__iter__ def _is_valid_netmask(self, prefixlen): """Verify that the netmask/prefixlen is valid. diff --git a/wpr/third_party/ipaddr/ipaddr_test.py b/wpr/third_party/ipaddr/ipaddr_test.py index 64bc2b4..9446889 100755 --- a/wpr/third_party/ipaddr/ipaddr_test.py +++ b/wpr/third_party/ipaddr/ipaddr_test.py @@ -23,10 +23,10 @@ import ipaddr # Compatibility function to cast str to bytes objects -if ipaddr._compat_has_real_bytes: - _cb = lambda bytestr: bytes(bytestr, 'charmap') +if issubclass(ipaddr.Bytes, str): + _cb = ipaddr.Bytes else: - _cb = str + _cb = lambda bytestr: bytes(bytestr, 'charmap') class IpaddrUnitTest(unittest.TestCase): @@ -68,25 +68,72 @@ def testAddressIntMath(self): ipaddr.IPv6Address('::1')) def testInvalidStrings(self): - self.assertRaises(ValueError, ipaddr.IPNetwork, '') - self.assertRaises(ValueError, ipaddr.IPNetwork, 'www.google.com') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1.2.3') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1.2.3.4.5') - self.assertRaises(ValueError, ipaddr.IPNetwork, '301.2.2.2') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1:2:3:4:5:6:7') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1:2:3:4:5:6:7:') - self.assertRaises(ValueError, ipaddr.IPNetwork, ':2:3:4:5:6:7:8') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1:2:3:4:5:6:7:8:9') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1:2:3:4:5:6:7:8:') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1::3:4:5:6::8') - self.assertRaises(ValueError, ipaddr.IPNetwork, 'a:') - self.assertRaises(ValueError, ipaddr.IPNetwork, ':') - self.assertRaises(ValueError, ipaddr.IPNetwork, ':::') - self.assertRaises(ValueError, ipaddr.IPNetwork, '::a:') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1ffff::') - self.assertRaises(ValueError, ipaddr.IPNetwork, '0xa::') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1:2:3:4:5:6:1a.2.3.4') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1:2:3:4:5:1.2.3.4:8') + def AssertInvalidIP(ip_str): + self.assertRaises(ValueError, ipaddr.IPAddress, ip_str) + AssertInvalidIP("") + AssertInvalidIP("016.016.016.016") + AssertInvalidIP("016.016.016") + AssertInvalidIP("016.016") + AssertInvalidIP("016") + AssertInvalidIP("000.000.000.000") + AssertInvalidIP("000") + AssertInvalidIP("0x0a.0x0a.0x0a.0x0a") + AssertInvalidIP("0x0a.0x0a.0x0a") + AssertInvalidIP("0x0a.0x0a") + AssertInvalidIP("0x0a") + AssertInvalidIP("42.42.42.42.42") + AssertInvalidIP("42.42.42") + AssertInvalidIP("42.42") + AssertInvalidIP("42") + AssertInvalidIP("42..42.42") + AssertInvalidIP("42..42.42.42") + AssertInvalidIP("42.42.42.42.") + AssertInvalidIP("42.42.42.42...") + AssertInvalidIP(".42.42.42.42") + AssertInvalidIP("...42.42.42.42") + AssertInvalidIP("42.42.42.-0") + AssertInvalidIP("42.42.42.+0") + AssertInvalidIP(".") + AssertInvalidIP("...") + AssertInvalidIP("bogus") + AssertInvalidIP("bogus.com") + AssertInvalidIP("192.168.0.1.com") + AssertInvalidIP("12345.67899.-54321.-98765") + AssertInvalidIP("257.0.0.0") + AssertInvalidIP("42.42.42.-42") + AssertInvalidIP("3ffe::1.net") + AssertInvalidIP("3ffe::1::1") + AssertInvalidIP("1::2::3::4:5") + AssertInvalidIP("::7:6:5:4:3:2:") + AssertInvalidIP(":6:5:4:3:2:1::") + AssertInvalidIP("2001::db:::1") + AssertInvalidIP("FEDC:9878") + AssertInvalidIP("+1.+2.+3.4") + AssertInvalidIP("1.2.3.4e0") + AssertInvalidIP("::7:6:5:4:3:2:1:0") + AssertInvalidIP("7:6:5:4:3:2:1:0::") + AssertInvalidIP("9:8:7:6:5:4:3::2:1") + AssertInvalidIP("0:1:2:3::4:5:6:7") + AssertInvalidIP("3ffe:0:0:0:0:0:0:0:1") + AssertInvalidIP("3ffe::10000") + AssertInvalidIP("3ffe::goog") + AssertInvalidIP("3ffe::-0") + AssertInvalidIP("3ffe::+0") + AssertInvalidIP("3ffe::-1") + AssertInvalidIP(":") + AssertInvalidIP(":::") + AssertInvalidIP("::1.2.3") + AssertInvalidIP("::1.2.3.4.5") + AssertInvalidIP("::1.2.3.4:") + AssertInvalidIP("1.2.3.4::") + AssertInvalidIP("2001:db8::1:") + AssertInvalidIP(":2001:db8::1") + AssertInvalidIP(":1:2:3:4:5:6:7") + AssertInvalidIP("1:2:3:4:5:6:7:") + AssertInvalidIP(":1:2:3:4:5:6:") + AssertInvalidIP("192.0.2.1/32") + AssertInvalidIP("2001:db8::1/128") + self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv4Network, '') self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv4Network, 'google.com') @@ -188,26 +235,25 @@ def testIpFromInt(self): self.assertEqual(ipaddr.IPNetwork(self.ipv4.ip).version, 4) self.assertEqual(ipaddr.IPNetwork(self.ipv6.ip).version, 6) - if ipaddr._compat_has_real_bytes: # on python3+ - def testIpFromPacked(self): - ip = ipaddr.IPNetwork - - self.assertEqual(self.ipv4.ip, - ip(_cb('\x01\x02\x03\x04')).ip) - self.assertEqual(ip('255.254.253.252'), - ip(_cb('\xff\xfe\xfd\xfc'))) - self.assertRaises(ValueError, ipaddr.IPNetwork, _cb('\x00' * 3)) - self.assertRaises(ValueError, ipaddr.IPNetwork, _cb('\x00' * 5)) - self.assertEqual(self.ipv6.ip, - ip(_cb('\x20\x01\x06\x58\x02\x2a\xca\xfe' - '\x02\x00\x00\x00\x00\x00\x00\x01')).ip) - self.assertEqual(ip('ffff:2:3:4:ffff::'), - ip(_cb('\xff\xff\x00\x02\x00\x03\x00\x04' + - '\xff\xff' + '\x00' * 6))) - self.assertEqual(ip('::'), - ip(_cb('\x00' * 16))) - self.assertRaises(ValueError, ip, _cb('\x00' * 15)) - self.assertRaises(ValueError, ip, _cb('\x00' * 17)) + def testIpFromPacked(self): + ip = ipaddr.IPNetwork + + self.assertEqual(self.ipv4.ip, + ip(_cb('\x01\x02\x03\x04')).ip) + self.assertEqual(ip('255.254.253.252'), + ip(_cb('\xff\xfe\xfd\xfc'))) + self.assertRaises(ValueError, ipaddr.IPNetwork, _cb('\x00' * 3)) + self.assertRaises(ValueError, ipaddr.IPNetwork, _cb('\x00' * 5)) + self.assertEqual(self.ipv6.ip, + ip(_cb('\x20\x01\x06\x58\x02\x2a\xca\xfe' + '\x02\x00\x00\x00\x00\x00\x00\x01')).ip) + self.assertEqual(ip('ffff:2:3:4:ffff::'), + ip(_cb('\xff\xff\x00\x02\x00\x03\x00\x04' + + '\xff\xff' + '\x00' * 6))) + self.assertEqual(ip('::'), + ip(_cb('\x00' * 16))) + self.assertRaises(ValueError, ip, _cb('\x00' * 15)) + self.assertRaises(ValueError, ip, _cb('\x00' * 17)) def testGetIp(self): self.assertEqual(int(self.ipv4.ip), 16909060) @@ -287,6 +333,11 @@ def testIterSubnets(self): self.assertEqual(self.ipv4.subnet(), list(self.ipv4.iter_subnets())) self.assertEqual(self.ipv6.subnet(), list(self.ipv6.iter_subnets())) + def testIterHosts(self): + self.assertEqual([ipaddr.IPv4Address('2.0.0.0'), + ipaddr.IPv4Address('2.0.0.1')], + list(ipaddr.IPNetwork('2.0.0.0/31').iterhosts())) + def testFancySubnetting(self): self.assertEqual(sorted(self.ipv4.subnet(prefixlen_diff=3)), sorted(self.ipv4.subnet(new_prefix=27))) @@ -893,7 +944,7 @@ def testCompressIPv6Address(self): '2001:0:0:4:0:0:0:8': '2001:0:0:4::8/128', '2001:0:0:4:5:6:7:8': '2001::4:5:6:7:8/128', '2001:0:3:4:5:6:7:8': '2001:0:3:4:5:6:7:8/128', - '2001:0::3:4:5:6:7:8': '2001:0:3:4:5:6:7:8/128', + '2001:0:3:4:5:6:7:8': '2001:0:3:4:5:6:7:8/128', '0:0:3:0:0:0:0:ffff': '0:0:3::ffff/128', '0:0:0:4:0:0:0:ffff': '::4:0:0:0:ffff/128', '0:0:0:0:5:0:0:ffff': '::5:0:0:ffff/128', @@ -903,6 +954,12 @@ def testCompressIPv6Address(self): '0:0:0:0:0:0:0:1': '::1/128', '2001:0658:022a:cafe:0000:0000:0000:0000/66': '2001:658:22a:cafe::/66', + '::1.2.3.4': '::102:304/128', + '1:2:3:4:5:ffff:1.2.3.4': '1:2:3:4:5:ffff:102:304/128', + '::7:6:5:4:3:2:1': '0:7:6:5:4:3:2:1/128', + '::7:6:5:4:3:2:0': '0:7:6:5:4:3:2:0/128', + '7:6:5:4:3:2:1::': '7:6:5:4:3:2:1:0/128', + '0:6:5:4:3:2:1::': '0:6:5:4:3:2:1:0/128', } for uncompressed, compressed in test_addresses.items(): self.assertEqual(compressed, str(ipaddr.IPv6Network(uncompressed))) @@ -910,9 +967,9 @@ def testCompressIPv6Address(self): def testExplodeShortHandIpStr(self): addr1 = ipaddr.IPv6Network('2001::1') addr2 = ipaddr.IPv6Address('2001:0:5ef5:79fd:0:59d:a0e5:ba1') - self.assertEqual('2001:0000:0000:0000:0000:0000:0000:0001', - addr1._explode_shorthand_ip_string(str(addr1.ip))) - self.assertEqual('0000:0000:0000:0000:0000:0000:0000:0001', + self.assertEqual('2001:0000:0000:0000:0000:0000:0000:0001/128', + addr1.exploded) + self.assertEqual('0000:0000:0000:0000:0000:0000:0000:0001/128', ipaddr.IPv6Network('::1/128').exploded) # issue 77 self.assertEqual('2001:0000:5ef5:79fd:0000:059d:a0e5:0ba1', @@ -957,7 +1014,7 @@ def testBackwardsCompability(self): self.assertEqual(ipaddr.IPNetwork('::/121').Supernet(), ipaddr.IPNetwork('::/120')) - self.assertEqual(ipaddr.IPNetwork('10.0.0.02').IsRFC1918(), True) + self.assertEqual(ipaddr.IPNetwork('10.0.0.2').IsRFC1918(), True) self.assertEqual(ipaddr.IPNetwork('10.0.0.0').IsMulticast(), False) self.assertEqual(ipaddr.IPNetwork('127.255.255.255').IsLoopback(), True) self.assertEqual(ipaddr.IPNetwork('169.255.255.255').IsLinkLocal(), @@ -1017,19 +1074,6 @@ def testNetworkElementCaching(self): self.assertTrue(self.ipv6._cache.has_key('broadcast')) self.assertTrue(self.ipv6._cache.has_key('hostmask')) - def testIsValidIp(self): - ip = ipaddr.IPv6Address('::') - self.assertTrue(ip._is_valid_ip('2001:658:22a:cafe:200::1')) - self.assertTrue(ip._is_valid_ip('::ffff:10.10.0.0')) - self.assertTrue(ip._is_valid_ip('::ffff:192.168.0.0')) - self.assertFalse(ip._is_valid_ip('2001:658:22a::::1')) - self.assertFalse(ip._is_valid_ip(':658:22a:cafe:200::1')) - self.assertFalse(ip._is_valid_ip('2001:658:22a:cafe:200:')) - self.assertFalse(ip._is_valid_ip('2001:658:22a:cafe:200:127.0.0.1::1')) - self.assertFalse(ip._is_valid_ip('2001:658:22a:cafe:200::127.0.1')) - self.assertFalse(ip._is_valid_ip('2001:658:22a:zzzz:200::1')) - self.assertFalse(ip._is_valid_ip('2001:658:22a:cafe1:200::1')) - def testTeredo(self): # stolen from wikipedia server = ipaddr.IPv4Address('65.54.227.120') @@ -1039,6 +1083,8 @@ def testTeredo(self): ipaddr.IPAddress(teredo_addr).teredo) bad_addr = '2000::4136:e378:8000:63bf:3fff:fdd2' self.assertFalse(ipaddr.IPAddress(bad_addr).teredo) + bad_addr = '2001:0001:4136:e378:8000:63bf:3fff:fdd2' + self.assertFalse(ipaddr.IPAddress(bad_addr).teredo) # i77 teredo_addr = ipaddr.IPv6Address('2001:0:5ef5:79fd:0:59d:a0e5:ba1') diff --git a/wpr/third_party/ipaddr/test-2to3.sh b/wpr/third_party/ipaddr/test-2to3.sh index 408d665..5196083 100755 --- a/wpr/third_party/ipaddr/test-2to3.sh +++ b/wpr/third_party/ipaddr/test-2to3.sh @@ -1,5 +1,19 @@ #!/bin/sh - +# Copyright 2007 Google Inc. +# Licensed to PSF under a Contributor Agreement. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# # Converts the python2 ipaddr files to python3 and runs the unit tests # with both python versions. diff --git a/wpr/third_party/ipfw_win32/LICENSE b/wpr/third_party/ipfw_win32/LICENSE new file mode 100644 index 0000000..c1df6fe --- /dev/null +++ b/wpr/third_party/ipfw_win32/LICENSE @@ -0,0 +1,25 @@ +/*- + * Copyright (c) 1998-2010 Luigi Rizzo, Universita` di Pisa + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ diff --git a/wpr/third_party/ipfw_win32/README.web-page-replay b/wpr/third_party/ipfw_win32/README.web-page-replay new file mode 100644 index 0000000..8bf15c6 --- /dev/null +++ b/wpr/third_party/ipfw_win32/README.web-page-replay @@ -0,0 +1,12 @@ +Name: Windows XP NDIS module for Dummynet. +Short Name: ipfw3 +URL: http://info.iet.unipi.it/~luigi/dummynet/ +Version: 20100322 v.3.0.0.2 +License: BSD +License File: LICENSE + +Description: +Used by Web Page Replay to simulate network delays and bandwidth throttling on Windows XP. + +Local Modifications: +Dropped files: cyg-ipfw.exe, cygwin1.dll, testme.bat, wget.exe. \ No newline at end of file diff --git a/wpr/third_party/nbhttp/README.web-page-replay b/wpr/third_party/nbhttp/README.web-page-replay index b606e41..64d9eb0 100644 --- a/wpr/third_party/nbhttp/README.web-page-replay +++ b/wpr/third_party/nbhttp/README.web-page-replay @@ -1,5 +1,16 @@ -Source code home: https://github.com/mnot/nbhttp.git - commit 3f5d9b4f38c6579199cb - tree 47b3e9909bf633a098fb - parent 59b7793ef70f4fcf46ad -This directory contains files only from nbhttp/src directory. Please see each file header or LICENSE file (which is extracted from file headers) for license information. +Name: Tools for building non-blocking HTTP components +Short Name: nbhttp +URL: https://github.com/mnot/nbhttp/tree/spdy +Revision: commit 3f5d9b4f38c6579199cb + tree 47b3e9909bf633a098fb + parent 59b7793ef70f4fcf46ad +License: MIT/X11 (BSD like) +License File: LICENSE + +Description: +nbhttp is used to add support for spdy/2. + +Local Modifications: +Copied license from README to LICENSE. +Only included files from the nbhttp/src directory. +Moved license boilerplate to tops of files for Chrome license check. \ No newline at end of file diff --git a/wpr/third_party/nbhttp/__init__.py b/wpr/third_party/nbhttp/__init__.py index a909c83..ce1e048 100644 --- a/wpr/third_party/nbhttp/__init__.py +++ b/wpr/third_party/nbhttp/__init__.py @@ -4,6 +4,28 @@ Non-blocking HTTP components. """ +__copyright__ = """\ +Copyright (c) 2008-2009 Mark Nottingham + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + from client import Client from server import Server from push_tcp import run, stop, schedule diff --git a/wpr/third_party/nbhttp/client.py b/wpr/third_party/nbhttp/client.py index f91f0dc..e912362 100644 --- a/wpr/third_party/nbhttp/client.py +++ b/wpr/third_party/nbhttp/client.py @@ -1,5 +1,27 @@ #!/usr/bin/env python +__copyright__ = """\ +Copyright (c) 2008-2009 Mark Nottingham + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + """ Non-Blocking HTTP Client @@ -63,27 +85,6 @@ """ __author__ = "Mark Nottingham " -__copyright__ = """\ -Copyright (c) 2008-2009 Mark Nottingham - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" from urlparse import urlsplit, urlunsplit diff --git a/wpr/third_party/nbhttp/push_tcp.py b/wpr/third_party/nbhttp/push_tcp.py index 43017da..547e6bc 100644 --- a/wpr/third_party/nbhttp/push_tcp.py +++ b/wpr/third_party/nbhttp/push_tcp.py @@ -2,6 +2,28 @@ import traceback +__copyright__ = """\ +Copyright (c) 2008-2009 Mark Nottingham + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + """ push-based asynchronous TCP @@ -122,27 +144,6 @@ """ __author__ = "Mark Nottingham " -__copyright__ = """\ -Copyright (c) 2008-2009 Mark Nottingham - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" import sys import socket diff --git a/wpr/third_party/nbhttp/server.py b/wpr/third_party/nbhttp/server.py index 6f5a0be..7e43845 100755 --- a/wpr/third_party/nbhttp/server.py +++ b/wpr/third_party/nbhttp/server.py @@ -1,5 +1,27 @@ #!/usr/bin/env python +__copyright__ = """\ +Copyright (c) 2008-2009 Mark Nottingham + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + """ Non-Blocking HTTP Server @@ -63,27 +85,6 @@ """ __author__ = "Mark Nottingham " -__copyright__ = """\ -Copyright (c) 2008-2009 Mark Nottingham - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" import os import sys diff --git a/wpr/third_party/nbhttp/spdy_client.py b/wpr/third_party/nbhttp/spdy_client.py index 4c9af41..58856ec 100644 --- a/wpr/third_party/nbhttp/spdy_client.py +++ b/wpr/third_party/nbhttp/spdy_client.py @@ -1,5 +1,27 @@ #!/usr/bin/env python +__copyright__ = """\ +Copyright (c) 2008-2009 Mark Nottingham + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + """ Non-Blocking SPDY Client @@ -65,27 +87,6 @@ # FIXME: update docs for API change (move res_start) __author__ = "Mark Nottingham " -__copyright__ = """\ -Copyright (c) 2008-2009 Mark Nottingham - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" from urlparse import urlsplit diff --git a/wpr/third_party/nbhttp/spdy_common.py b/wpr/third_party/nbhttp/spdy_common.py index 09b3416..a978b79 100644 --- a/wpr/third_party/nbhttp/spdy_common.py +++ b/wpr/third_party/nbhttp/spdy_common.py @@ -33,10 +33,18 @@ import struct -import c_zlib +compressed_hdrs = True +try: + import c_zlib +except TypeError: + # c_zlib loads "libz". However, that fails on Windows. + compressed_hdrs = False + import sys + print >>sys.stderr, ( + 'WARNING: sdpy_common: import c_zlib failed. Using uncompressed headers.') + from http_common import dummy -compressed_hdrs = True # There is a null character ('\0') at the end of the dictionary. The '\0' might # be removed in future spdy versions. dictionary = \ diff --git a/wpr/third_party/nbhttp/spdy_server.py b/wpr/third_party/nbhttp/spdy_server.py index 5fe084b..931645e 100755 --- a/wpr/third_party/nbhttp/spdy_server.py +++ b/wpr/third_party/nbhttp/spdy_server.py @@ -1,5 +1,27 @@ #!/usr/bin/env python +__copyright__ = """\ +Copyright (c) 2008-2009 Mark Nottingham + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + """ Non-Blocking SPDY Server @@ -63,27 +85,6 @@ """ __author__ = "Mark Nottingham " -__copyright__ = """\ -Copyright (c) 2008-2009 Mark Nottingham - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" import os import sys diff --git a/wpr/trafficshaper.py b/wpr/trafficshaper.py old mode 100755 new mode 100644 index 8ff74c2..9b6f894 --- a/wpr/trafficshaper.py +++ b/wpr/trafficshaper.py @@ -37,30 +37,37 @@ def __str__(self): class TrafficShaper(object): + """Manages network traffic shaping.""" - _UPLOAD_PIPE = '1' # Enforces overall upload bandwidth. - _UPLOAD_QUEUE = '2' # Shares upload bandwidth among source ports. - _DOWNLOAD_PIPE = '3' # Enforces overall download bandwidth. - _DOWNLOAD_QUEUE = '4' # Shares download bandwidth among destination ports. + # Pick webpagetest-compatible values (details: http://goo.gl/oghTg). + _UPLOAD_PIPE = '10' # Enforces overall upload bandwidth. + _UPLOAD_QUEUE = '10' # Shares upload bandwidth among source ports. + _UPLOAD_RULE = '5000' # Specifies when the upload queue is used. + _DOWNLOAD_PIPE = '11' # Enforces overall download bandwidth. + _DOWNLOAD_QUEUE = '11' # Shares download bandwidth among destination ports. + _DOWNLOAD_RULE = '5100' # Specifies when the download queue is used. + _QUEUE_SLOTS = 100 # Number of packets to queue. _BANDWIDTH_RE = re.compile(BANDWIDTH_PATTERN) - """Manages network traffic shaping.""" def __init__(self, dont_use=None, host='127.0.0.1', port='80', + ssl_port='443', dns_port='53', up_bandwidth='0', down_bandwidth='0', delay_ms='0', packet_loss_rate='0', - init_cwnd='0'): + init_cwnd='0', + use_loopback=True): """Start shaping traffic. Args: host: a host string (name or IP) for the web proxy. port: a port string (e.g. '80') for the web proxy. + ssl_port: a port string (e.g. '443') for the SSL web proxy. dns_port: a port string for the dns proxy (for unit testing). up_bandwidth: Upload bandwidth down_bandwidth: Download bandwidth @@ -68,43 +75,47 @@ def __init__(self, delay_ms: Propagation delay in milliseconds. '0' means no delay. packet_loss_rate: Packet loss rate in range [0..1]. '0' means no loss. init_cwnd: the initial cwnd setting. '0' means no change. + use_loopback: True iff shaping is done on the loopback (or equiv) adapter. """ assert dont_use is None # Force args to be named. self.platformsettings = platformsettings.get_platform_settings() self.host = host self.port = port + self.ssl_port = ssl_port self.dns_port = dns_port self.up_bandwidth = up_bandwidth self.down_bandwidth = down_bandwidth self.delay_ms = delay_ms self.packet_loss_rate = packet_loss_rate self.init_cwnd = init_cwnd + self.use_loopback = use_loopback if not self._BANDWIDTH_RE.match(self.up_bandwidth): raise BandwidthValueError(self.up_bandwidth) if not self._BANDWIDTH_RE.match(self.down_bandwidth): raise BandwidthValueError(self.down_bandwidth) - + self.is_shaping = False def __enter__(self): - self.platformsettings.configure_loopback() + if self.use_loopback: + self.platformsettings.configure_loopback() if self.init_cwnd != '0': - if self.platformsettings.is_cwnd_available(): - self.original_cwnd = self.platformsettings.get_cwnd() - self.platformsettings.set_cwnd(self.init_cwnd) - else: - logging.error('Platform does not support setting cwnd.') + self.platformsettings.set_cwnd(self.init_cwnd) try: - self.platformsettings.ipfw('-q', 'flush') + ipfw_list = self.platformsettings.ipfw('list') + if not ipfw_list.startswith('65535 '): + logging.warn('ipfw has existing rules:\n%s', ipfw_list) + self._delete_rules(ipfw_list) except: pass if (self.up_bandwidth == '0' and self.down_bandwidth == '0' and self.delay_ms == '0' and self.packet_loss_rate == '0'): + logging.info('Skipped shaping traffic.') return if not self.dns_port and not self.port: raise TrafficShaperException('No ports on which to shape traffic.') - ports = ','.join(str(p) for p in (self.port, self.dns_port) if p) - queue_size = self.platformsettings.get_ipfw_queue_slots() + ports = ','.join( + str(p) for p in (self.port, self.ssl_port, self.dns_port) if p) half_delay_ms = int(self.delay_ms) / 2 # split over up/down links try: @@ -120,18 +131,19 @@ def __enter__(self): 'config', 'pipe', self._UPLOAD_PIPE, 'plr', self.packet_loss_rate, - 'queue', queue_size, + 'queue', self._QUEUE_SLOTS, 'mask', 'src-port', '0xffff', ) self.platformsettings.ipfw( - 'add', + 'add', self._UPLOAD_RULE, 'queue', self._UPLOAD_QUEUE, 'ip', 'from', 'any', 'to', self.host, - 'out', + self.use_loopback and 'out' or 'in', 'dst-port', ports, ) + self.is_shaping = True # Configure download shaping. self.platformsettings.ipfw( @@ -145,11 +157,11 @@ def __enter__(self): 'config', 'pipe', self._DOWNLOAD_PIPE, 'plr', self.packet_loss_rate, - 'queue', queue_size, + 'queue', self._QUEUE_SLOTS, 'mask', 'dst-port', '0xffff', ) self.platformsettings.ipfw( - 'add', + 'add', self._DOWNLOAD_RULE, 'queue', self._DOWNLOAD_QUEUE, 'ip', 'from', self.host, @@ -162,12 +174,22 @@ def __enter__(self): raise TrafficShaperException('Unable to shape traffic: %s' % e) def __exit__(self, unused_exc_type, unused_exc_val, unused_exc_tb): - self.platformsettings.unconfigure_loopback() - if (self.init_cwnd != '0' and - self.platformsettings.is_cwnd_available()): - self.platformsettings.set_cwnd(self.original_cwnd) - try: - self.platformsettings.ipfw('-q', 'flush') - logging.info('Stopped shaping traffic') - except Exception, e: - raise TrafficShaperException('Unable to stop shaping traffic: %s' % e) + if self.use_loopback: + self.platformsettings.unconfigure_loopback() + self.platformsettings.restore_cwnd() + if self.is_shaping: + try: + self._delete_rules() + logging.info('Stopped shaping traffic') + except Exception, e: + raise TrafficShaperException('Unable to stop shaping traffic: %s' % e) + + def _delete_rules(self, ipfw_list=None): + if ipfw_list is None: + ipfw_list = self.platformsettings.ipfw('list') + existing_rules = set( + r.split()[0].lstrip('0') for r in ipfw_list.splitlines()) + delete_rules = [r for r in (self._DOWNLOAD_RULE, self._UPLOAD_RULE) + if r in existing_rules] + if delete_rules: + self.platformsettings.ipfw('delete', *delete_rules) diff --git a/wpr/trafficshaper_test.py b/wpr/trafficshaper_test.py index d7f4179..2c35393 100755 --- a/wpr/trafficshaper_test.py +++ b/wpr/trafficshaper_test.py @@ -25,24 +25,14 @@ import platformsettings import socket import SocketServer -import sys -import time import trafficshaper import unittest +RESPONSE_SIZE_KEY = 'response-size:' TEST_DNS_PORT = 5555 TEST_HTTP_PORT = 8888 -RESPONSE_SIZE_KEY = 'response-size:' - - -# from timeit.py -if sys.platform == "win32": - # On Windows, the best timer is time.clock() - DEFAULT_TIMER = time.clock -else: - # On most other platforms the best timer is time.time() - DEFAULT_TIMER = time.time +TIMER = platformsettings.get_platform_settings().timer def GetElapsedMs(start_time, end_time): @@ -100,7 +90,7 @@ class TimedUdpServer(SocketServer.ThreadingUDPServer, # Override SocketServer.TcpServer setting to avoid intermittent errors. allow_reuse_address = True - def __init__(self, host, port, timer=DEFAULT_TIMER): + def __init__(self, host, port, timer=TIMER): SocketServer.ThreadingUDPServer.__init__( self, (host, port), TimedUdpHandler) self.timer = timer @@ -116,7 +106,7 @@ class TimedTcpServer(SocketServer.ThreadingTCPServer, # Override SocketServer.TcpServer setting to avoid intermittent errors. allow_reuse_address = True - def __init__(self, host, port, timer=DEFAULT_TIMER): + def __init__(self, host, port, timer=TIMER): SocketServer.ThreadingTCPServer.__init__( self, (host, port), TimedTcpHandler) self.timer = timer @@ -162,7 +152,7 @@ def setUp(self): self.host = platform_settings.get_server_ip_address() self.port = TEST_HTTP_PORT self.tcp_socket_creator = TcpTestSocketCreator(self.host, self.port) - self.timer = DEFAULT_TIMER + self.timer = TIMER def TrafficShaper(self, **kwargs): return trafficshaper.TrafficShaper( @@ -236,7 +226,7 @@ def setUp(self): platform_settings = platformsettings.get_platform_settings() self.host = platform_settings.get_server_ip_address() self.dns_port = TEST_DNS_PORT - self.timer = DEFAULT_TIMER + self.timer = TIMER def TrafficShaper(self, **kwargs): return trafficshaper.TrafficShaper( diff --git a/wpr/util.py b/wpr/util.py new file mode 100644 index 0000000..486d5c2 --- /dev/null +++ b/wpr/util.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# Copyright 2012 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Miscellaneous utility functions.""" + + +try: + # pkg_resources (part of setuptools) is needed when WPR is + # distributed as a package. (Resources may need to be extracted from + # the package.) + + import pkg_resources + + def resource_exists(resource_name): + return pkg_resources.resource_exists(__name__, resource_name) + + def resource_string(resource_name): + return pkg_resources.resource_string(__name__, resource_name) + +except ImportError: + # Import of pkg_resources failed, so fall back to getting resources + # from the file system. + + import os + + def _resource_path(resource_name): + _replay_dir = os.path.dirname(os.path.abspath(__file__)) + return os.path.join(_replay_dir, resource_name) + + def resource_exists(resource_name): + return os.path.exists(_resource_path(resource_name)) + + def resource_string(resource_name): + return open(_resource_path(resource_name)).read() diff --git a/wpr/webpagereplay.egg-info/PKG-INFO b/wpr/webpagereplay.egg-info/PKG-INFO new file mode 100644 index 0000000..9863d0b --- /dev/null +++ b/wpr/webpagereplay.egg-info/PKG-INFO @@ -0,0 +1,10 @@ +Metadata-Version: 1.0 +Name: webpagereplay +Version: 1.1.2 +Summary: Record and replay web content +Home-page: http://code.google.com/p/web-page-replay/ +Author: Web Page Replay Project Authors +Author-email: web-page-replay-dev@googlegroups.com +License: Apache License 2.0 +Description: UNKNOWN +Platform: UNKNOWN diff --git a/wpr/webpagereplay.egg-info/SOURCES.txt b/wpr/webpagereplay.egg-info/SOURCES.txt new file mode 100644 index 0000000..16c5f1e --- /dev/null +++ b/wpr/webpagereplay.egg-info/SOURCES.txt @@ -0,0 +1,209 @@ +.gitignore +COPYING +cachemissarchive.py +cachemissarchive_test.py +customhandlers.py +daemonserver.py +deterministic.js +dnsproxy.py +httparchive.py +httparchive_test.py +httpclient.py +httpproxy.py +httpzlib.py +mock-archive.txt +mockhttprequest.py +persistentmixin.py +platformsettings.py +platformsettings_test.py +replay.py +replayspdyserver.py +servermanager.py +setup.py +trafficshaper.py +trafficshaper_test.py +util.py +./cachemissarchive.py +./cachemissarchive_test.py +./customhandlers.py +./daemonserver.py +./dnsproxy.py +./httparchive.py +./httparchive_test.py +./httpclient.py +./httpproxy.py +./httpzlib.py +./mockhttprequest.py +./persistentmixin.py +./platformsettings.py +./platformsettings_test.py +./replay.py +./replayspdyserver.py +./servermanager.py +./trafficshaper.py +./trafficshaper_test.py +./util.py +./perftracker/__init__.py +./perftracker/runner.py +./perftracker/runner_cfg.py +./third_party/__init__.py +./third_party/ipaddr/ipaddr.py +./third_party/ipaddr/ipaddr_test.py +./third_party/ipaddr/setup.py +./third_party/nbhttp/__init__.py +./third_party/nbhttp/c_zlib.py +./third_party/nbhttp/client.py +./third_party/nbhttp/error.py +./third_party/nbhttp/http_common.py +./third_party/nbhttp/push_tcp.py +./third_party/nbhttp/server.py +./third_party/nbhttp/spdy_client.py +./third_party/nbhttp/spdy_common.py +./third_party/nbhttp/spdy_server.py +perftracker/README +perftracker/__init__.py +perftracker/runner.py +perftracker/runner_cfg.py +perftracker/app/app.yaml +perftracker/app/appengine_config.py +perftracker/app/index.yaml +perftracker/app/json.py +perftracker/app/main.py +perftracker/app/models.py +perftracker/app/suite.html +perftracker/app/jst/jsevalcontext.js +perftracker/app/jst/jstemplate.js +perftracker/app/jst/jstemplate_test.js +perftracker/app/jst/util.js +perftracker/app/scripts/util.js +perftracker/app/styles/style.css +perftracker/app/templates/compare_set.html +perftracker/app/templates/index.html +perftracker/app/templates/search.html +perftracker/app/templates/view_set.html +perftracker/app/templates/view_summary.html +perftracker/extension/background.html +perftracker/extension/manifest.json +perftracker/extension/script.js +perftracker/extension/server.js +perftracker/extension/start.js +third_party/__init__.py +third_party/dns/LICENSE +third_party/dns/README.web-page-replay +third_party/dns/__init__.py +third_party/dns/dnssec.py +third_party/dns/e164.py +third_party/dns/edns.py +third_party/dns/entropy.py +third_party/dns/exception.py +third_party/dns/flags.py +third_party/dns/inet.py +third_party/dns/ipv4.py +third_party/dns/ipv6.py +third_party/dns/message.py +third_party/dns/name.py +third_party/dns/namedict.py +third_party/dns/node.py +third_party/dns/opcode.py +third_party/dns/query.py +third_party/dns/rcode.py +third_party/dns/rdata.py +third_party/dns/rdataclass.py +third_party/dns/rdataset.py +third_party/dns/rdatatype.py +third_party/dns/renderer.py +third_party/dns/resolver.py +third_party/dns/reversename.py +third_party/dns/rrset.py +third_party/dns/set.py +third_party/dns/tokenizer.py +third_party/dns/tsig.py +third_party/dns/tsigkeyring.py +third_party/dns/ttl.py +third_party/dns/update.py +third_party/dns/version.py +third_party/dns/zone.py +third_party/dns/rdtypes/__init__.py +third_party/dns/rdtypes/dsbase.py +third_party/dns/rdtypes/keybase.py +third_party/dns/rdtypes/mxbase.py +third_party/dns/rdtypes/nsbase.py +third_party/dns/rdtypes/sigbase.py +third_party/dns/rdtypes/txtbase.py +third_party/dns/rdtypes/ANY/AFSDB.py +third_party/dns/rdtypes/ANY/CERT.py +third_party/dns/rdtypes/ANY/CNAME.py +third_party/dns/rdtypes/ANY/DLV.py +third_party/dns/rdtypes/ANY/DNAME.py +third_party/dns/rdtypes/ANY/DNSKEY.py +third_party/dns/rdtypes/ANY/DS.py +third_party/dns/rdtypes/ANY/GPOS.py +third_party/dns/rdtypes/ANY/HINFO.py +third_party/dns/rdtypes/ANY/HIP.py +third_party/dns/rdtypes/ANY/ISDN.py +third_party/dns/rdtypes/ANY/KEY.py +third_party/dns/rdtypes/ANY/LOC.py +third_party/dns/rdtypes/ANY/MX.py +third_party/dns/rdtypes/ANY/NS.py +third_party/dns/rdtypes/ANY/NSEC.py +third_party/dns/rdtypes/ANY/NSEC3.py +third_party/dns/rdtypes/ANY/NSEC3PARAM.py +third_party/dns/rdtypes/ANY/NXT.py +third_party/dns/rdtypes/ANY/PTR.py +third_party/dns/rdtypes/ANY/RP.py +third_party/dns/rdtypes/ANY/RRSIG.py +third_party/dns/rdtypes/ANY/RT.py +third_party/dns/rdtypes/ANY/SIG.py +third_party/dns/rdtypes/ANY/SOA.py +third_party/dns/rdtypes/ANY/SPF.py +third_party/dns/rdtypes/ANY/SSHFP.py +third_party/dns/rdtypes/ANY/TXT.py +third_party/dns/rdtypes/ANY/X25.py +third_party/dns/rdtypes/ANY/__init__.py +third_party/dns/rdtypes/IN/A.py +third_party/dns/rdtypes/IN/AAAA.py +third_party/dns/rdtypes/IN/APL.py +third_party/dns/rdtypes/IN/DHCID.py +third_party/dns/rdtypes/IN/IPSECKEY.py +third_party/dns/rdtypes/IN/KX.py +third_party/dns/rdtypes/IN/NAPTR.py +third_party/dns/rdtypes/IN/NSAP.py +third_party/dns/rdtypes/IN/NSAP_PTR.py +third_party/dns/rdtypes/IN/PX.py +third_party/dns/rdtypes/IN/SRV.py +third_party/dns/rdtypes/IN/WKS.py +third_party/dns/rdtypes/IN/__init__.py +third_party/ipaddr/COPYING +third_party/ipaddr/MANIFEST.in +third_party/ipaddr/OWNERS +third_party/ipaddr/README +third_party/ipaddr/README.web-page-replay +third_party/ipaddr/ipaddr.py +third_party/ipaddr/ipaddr_test.py +third_party/ipaddr/setup.py +third_party/ipaddr/test-2to3.sh +third_party/ipfw_win32/LICENSE +third_party/ipfw_win32/README.txt +third_party/ipfw_win32/README.web-page-replay +third_party/ipfw_win32/ipfw.exe +third_party/ipfw_win32/ipfw.sys +third_party/ipfw_win32/netipfw.inf +third_party/ipfw_win32/netipfw_m.inf +third_party/nbhttp/LICENSE +third_party/nbhttp/README.web-page-replay +third_party/nbhttp/__init__.py +third_party/nbhttp/c_zlib.py +third_party/nbhttp/client.py +third_party/nbhttp/error.py +third_party/nbhttp/http_common.py +third_party/nbhttp/push_tcp.py +third_party/nbhttp/server.py +third_party/nbhttp/spdy_client.py +third_party/nbhttp/spdy_common.py +third_party/nbhttp/spdy_server.py +webpagereplay.egg-info/PKG-INFO +webpagereplay.egg-info/SOURCES.txt +webpagereplay.egg-info/dependency_links.txt +webpagereplay.egg-info/entry_points.txt +webpagereplay.egg-info/requires.txt +webpagereplay.egg-info/top_level.txt \ No newline at end of file diff --git a/wpr/webpagereplay.egg-info/dependency_links.txt b/wpr/webpagereplay.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/wpr/webpagereplay.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/wpr/webpagereplay.egg-info/entry_points.txt b/wpr/webpagereplay.egg-info/entry_points.txt new file mode 100644 index 0000000..5ec8e3a --- /dev/null +++ b/wpr/webpagereplay.egg-info/entry_points.txt @@ -0,0 +1,4 @@ +[console_scripts] +httparchive = httparchive:main +replay = replay:main + diff --git a/wpr/webpagereplay.egg-info/requires.txt b/wpr/webpagereplay.egg-info/requires.txt new file mode 100644 index 0000000..2413754 --- /dev/null +++ b/wpr/webpagereplay.egg-info/requires.txt @@ -0,0 +1 @@ +dnspython>=1.8 \ No newline at end of file diff --git a/wpr/webpagereplay.egg-info/top_level.txt b/wpr/webpagereplay.egg-info/top_level.txt new file mode 100644 index 0000000..d956a0e --- /dev/null +++ b/wpr/webpagereplay.egg-info/top_level.txt @@ -0,0 +1,3 @@ + +third_party +perftracker