Permalink
Browse files

pywb proxy: add accept-datetime header to all requests to avoid inter…

…stitial in openwayback, add custom user-agent

nginx: proxy request headers
  • Loading branch information...
1 parent a426f8a commit dd032089bfd3dd2f69e2d7b264701ecf3f35e063 @ikreymer ikreymer committed Dec 21, 2015
Showing with 11 additions and 4 deletions.
  1. +1 −0 nginx/nginx.conf
  2. +10 −4 pywb/archivereplayview.py
View
@@ -88,6 +88,7 @@ http {
set $targethost $1;
resolver 8.8.8.8;
proxy_pass $1$2$is_args$args;
+ proxy_pass_request_headers on;
#proxy_redirect ~^/(.*)$ $scheme://$host:$server_port/$targethost/$1;
#proxy_redirect ~^(http.*)$ $scheme://$host:$server_port/$1;
@@ -3,7 +3,7 @@
from pywb.webapp.replay_views import ReplayView, CaptureException
from pywb.rewrite.url_rewriter import UrlRewriter
from pywb.rewrite.rewrite_live import LiveRewriter
-from pywb.utils.timeutils import timestamp_to_sec
+from pywb.utils.timeutils import timestamp_to_sec, timestamp_to_http_date
from pywb.utils.loaders import BlockLoader
from redisclient import redisclient
@@ -71,6 +71,7 @@ def __init__(self, config):
self.archive_template = config['archive_template']
self.archive_name = config['archive_name']
self.reverse_proxy_prefix = config.get('reverse_proxy_prefix', '')
+ self.user_agent = config.get('user_agent', 'netcapsule for ({})')
# init redis here only
redisclient.init_redis(config)
@@ -84,7 +85,7 @@ def unrewrite_header(self, response, name):
if m:
response.headers[name] = m.group(4)
- def _do_req(self, urls, host, env, skip_hosts):
+ def _do_req(self, urls, host, cdx, env, skip_hosts):
response = None
headers = {}
@@ -93,7 +94,12 @@ def _do_req(self, urls, host, env, skip_hosts):
# disable gzip, as mosaic won't support it!
# TODO: maybe ungzip later
if any(exclude in user_agent for exclude in NO_GZIP_UAS):
- headers={'Accept-Encoding': 'identity'}
+ headers['Accept-Encoding'] = 'identity'
+
+ # needed to avoid interstitial in openwayback
+ headers['Accept-Datetime'] = timestamp_to_http_date(cdx['timestamp'])
+
+ headers['User-Agent'] = self.user_agent.format(user_agent)
for url in urls:
if self.reverse_proxy_prefix:
@@ -135,7 +141,7 @@ def __call__(self, cdx, skip_hosts, cdx_loader, wbrequest):
try_urls, host, archive_name = self._get_urls_to_try(cdx, skip_hosts, wbrequest)
try:
- response = self._do_req(try_urls, host, wbrequest.env, skip_hosts)
+ response = self._do_req(try_urls, host, cdx, wbrequest.env, skip_hosts)
except Exception as e:
print(e)
response = None

0 comments on commit dd03208

Please sign in to comment.