Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Add --noauth option to turn offchecks on url authority

  • Loading branch information...
commit b84fbdc774ffc517c8f338afd0b765e89f53f81a 1 parent 62c6ab8
@zimeon zimeon authored
Showing with 16 additions and 2 deletions.
  1. +5 −0 resync-client
  2. +11 −2 resync/client.py
View
5 resync-client
@@ -104,6 +104,9 @@ def main():
# FIXME - to have multiple --link, one for each link
opt.add_option('--multifile', '-m', action='store_true',
help="disable reading and output of sitemapindex for multifile sitemap")
+ opt.add_option('--noauth', action='store_true',
+ help="disable checking of URL paths to ensure that the sitemaps refer "\
+ "only to resources on the same server/sub-path etc. Use with care.")
opt.add_option('--warc', action='store_true',
help="write dumps in WARC format (instead of ZIP+Sitemap default)")
opt.add_option('--dryrun', '-n', action='store_true',
@@ -158,6 +161,8 @@ def main():
c.dump_format='warc'
if (args.multifile):
c.allow_multifile=not args.multifile
+ if (args.noauth):
+ c.noauth=args.noauth
if (args.max_sitemap_entries):
c.max_sitemap_entries=args.max_sitemap_entries
View
13 resync/client.py
@@ -14,6 +14,7 @@
from resync.sitemap import Sitemap
from resync.dump import Dump
from resync.resource_change import ResourceChange
+from resync.url_authority import UrlAuthority
class ClientFatalError(Exception):
"""Non-recoverable error in client, should include message to user"""
@@ -32,6 +33,7 @@ def __init__(self, checksum=False, verbose=False, dryrun=False, logger=None):
self.sitemap_name = 'sitemap.xml'
self.dump_format = None
self.allow_multifile = True
+ self.noauth = False
self.max_sitemap_entries = None
@property
@@ -106,10 +108,17 @@ def sync_or_audit(self, allow_deletion=False, audit_only=False):
status = "NOT IN SYNC"
print "Status: %s (same=%d, updated=%d, deleted=%d, created=%d)" %\
(status,len(same),len(updated),len(deleted),len(created))
-
if (audit_only):
return
- ### 4. Grab files to do sync
+ ### 4. Check that sitemap has authority over URIs listed
+ uauth = UrlAuthority(self.sitemap)
+ for resource in src_inventory:
+ if (not uauth.has_authority_over(resource.uri)):
+ if (self.noauth):
+ self.logger.warning("Sitemap (%s) mentions resource at a location it does not have authority over (%s)" % (self.sitemap,resource.uri))
+ else:
+ raise ClientFatalError("Aborting as sitemap (%s) mentions resource at a location it does not have authority over (%s), override with --noauth" % (self.sitemap,resource.uri))
+ ### 5. Grab files to do sync
for resource in updated:
uri = resource.uri
file = self.mapper.src_to_dst(uri)
Please sign in to comment.
Something went wrong with that request. Please try again.