Skip to content

Commit

Permalink
Find map + tests
Browse files Browse the repository at this point in the history
  • Loading branch information
zimeon committed Apr 11, 2017
1 parent dfa120c commit bd0db88
Show file tree
Hide file tree
Showing 13 changed files with 223 additions and 15 deletions.
42 changes: 30 additions & 12 deletions resync/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,8 @@ def read_resource_list(self, uri):
mapper=self.mapper)
resource_list.read(uri=uri)
except Exception as e:
raise ClientFatalError(
"Can't read source resource list from %s (%s)" %
(uri, str(e)))
raise ClientError("Can't read source resource list from %s (%s)" %
(uri, str(e)))
self.logger.debug("Finished reading resource list")
return(resource_list)

Expand All @@ -110,7 +109,7 @@ def find_resource_list_from_source_description(self, uri):
source description in another location, but a ClientFatalError if
a source description is found but there is some problem using it.
"""
self.logger.info("Reading capability list %s" % (uri))
self.logger.info("Reading source description %s" % (uri))
try:
sd = SourceDescription()
sd.read(uri=uri)
Expand All @@ -125,11 +124,25 @@ def find_resource_list_from_source_description(self, uri):
raise ClientFatalError(
"Source description %s has multiple sources" % (uri))
self.logger.info("Finished reading source description")
raise ClientFatalError('debug')
return(cl)
# Now read capability list
cluri = sd.resources.first().uri
uri = urljoin(uri, cluri) # FIXME - Should relative URI handling be elsewhere?
self.logger.info("Reading capability list %s" % (uri))
try:
cl = CapabilityList()
cl.read(uri=uri)
except Exception as e:
raise ClientError(
"Can't read capability list from %s (%s)" %
(uri, str(e)))
if (not cl.has_capability('resourcelist')):
raise ClientFatalError(
"Capability list %s does not describe a resource list" % (uri))
rluri = cl.capability_info('resourcelist').uri
return(urljoin(uri, rluri))

def find_resource_list(self):
"""Look for resource list by hueristics.
"""Finf resource list by hueristics, returns ResourceList object.
1. Use explicitly specified self.sitemap_name (and
fail if that doesn't work)
Expand All @@ -146,23 +159,28 @@ def find_resource_list(self):
return(self.read_resource_list(self.sitemap_name))
# 2 & 3
parts = urlsplit(self.sitemap)
uri_host = urlunsplit([parts[0],parts[1],'','',''])
uri_host = urlunsplit([parts[0], parts[1], '', '', ''])
errors = []
for uri in [urljoin(self.sitemap, '.well-known/resourcesync'),
urljoin(uri_host, '.well-known/resourcesync')]:
uri = uri.lstrip('file:///') # urljoin adds this for local files
try:
return(self.find_resource_list_from_source_description(uri))
rluri = self.find_resource_list_from_source_description(uri)
return(self.read_resource_list(rluri))
except ClientError as e:
pass
errors.append(str(e))
# 4, 5 & 6
for uri in [urljoin(self.sitemap, 'resourcelist.xml'),
urljoin(self.sitemap, 'sitemap.xml'),
urljoin(uri_host, 'sitemap.xml')]:
uri = uri.lstrip('file:///') # urljoin adds this for local files
try:
return(self.read_resource_list(uri))
except ClientError as e:
pass
errors.append(str(e))
raise ClientFatalError(
"Failed to find source resource list from common patterns")
"Failed to find source resource list from common patterns (%s)" %
". ".join(errors))

def build_resource_list(self, paths=None, set_path=False):
"""Return a resource list for files on local disk.
Expand Down
4 changes: 4 additions & 0 deletions resync/resource_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ def add(self, resource, replace=False):
"Attempt to add resource already in this set")
self[uri] = resource

def first(self):
"""First item obtained from iterator."""
return(next(iter(self)))


class ResourceSetDupeError(Exception):
"""Exception for case of attempt to add duplicate resource."""
Expand Down
38 changes: 37 additions & 1 deletion tests/test_client.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from tests.testcase_with_tmpdir import TestCase
from tests.capture_stdout import capture_stdout
from tests.webserver_context import webserver

import unittest
import re
Expand All @@ -8,7 +9,7 @@
import sys
import os.path

from resync.client import Client, ClientFatalError
from resync.client import Client, ClientError, ClientFatalError
from resync.resource import Resource
from resync.resource_list import ResourceList
from resync.change_list import ChangeList
Expand Down Expand Up @@ -36,6 +37,41 @@ def test03_sitemap_uri(self):
self.assertEqual(c.sitemap_uri('/abcd2'), '/abcd2')
self.assertEqual(c.sitemap_uri('scheme:/abcd3'), 'scheme:/abcd3')

def test04_read_resource_list(self):
c = Client()
rl = c.read_resource_list('tests/testdata/client/dir1/resourcelist.xml')
self.assertEqual(len(rl), 3)
rl = c.read_resource_list('file:tests/testdata/client/dir1/resourcelist.xml')
self.assertEqual(len(rl), 3)
self.assertEqual(len(rl), 3)
self.assertRaises(ClientError, c.read_resource_list, 'file://tests/testdata/client/dir1/resourcelist.xml')
self.assertRaises(ClientError, c.read_resource_list, 'file:///tests/testdata/client/dir1/resourcelist.xml')
self.assertRaises(ClientError, c.read_resource_list, 'DOES_NOT_EXIST')

def test05_find_resource_list(self):
c = Client()
# Filesystem tests
c.set_mappings(['tests/testdata/find/find1', 'xxx'])
self.assertEqual(c.find_resource_list().up, 'find1')
c.set_mappings(['tests/testdata/find/find2', 'xxx'])
self.assertEqual(c.find_resource_list().up, 'find2')
c.set_mappings(['tests/testdata/find/find3', 'xxx'])
self.assertEqual(c.find_resource_list().up, 'find3')
# Tests requiring a server
with webserver('tests/testdata/find', 'localhost', 9999):
c.set_mappings(['http://localhost:9999/find1', 'xxx'])
self.assertEqual(c.find_resource_list().up, 'find1')
c.set_mappings(['http://localhost:9999/find2', 'xxx'])
self.assertEqual(c.find_resource_list().up, 'find2')
c.set_mappings(['http://localhost:9999/find3', 'xxx'])
self.assertEqual(c.find_resource_list().up, 'find3')
with webserver('tests/testdata/find/find1', 'localhost', 9999):
c.set_mappings(['http://localhost:9999/data', 'xxx'])
self.assertEqual(c.find_resource_list().up, 'find1')
with webserver('tests/testdata/find/find3', 'localhost', 9999):
c.set_mappings(['http://localhost:9999/data/data1', 'xxx'])
self.assertEqual(c.find_resource_list().up, 'find3')

def test10_baseline_or_audit(self):
# FIXME - this is the guts of the client, tough to test, need to work
# through more cases...
Expand Down
2 changes: 1 addition & 1 deletion tests/testcase_with_tmpdir.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def setUpClass(cls):
def tearDownClass(cls):
# Cleanup
if (not os.path.isdir(cls._tmpdir)):
raise Exception("Ooops, no tempdir (%s) to clean up?" % (tmpdir))
raise Exception("Ooops, no tempdir (%s) to clean up?" % (cls._tmpdir))
shutil.rmtree(cls._tmpdir)
try:
cls.extraTearUpClass()
Expand Down
2 changes: 1 addition & 1 deletion tests/testdata/client/dir1/resourcelist.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:rs="http://www.openarchives.org/rs/terms/">
<rs:ln rel="resourcesync"
<rs:ln rel="up"
href="file:tests/testdata/client/dir1/caps1.xml"/>
<rs:md capability="resourcelist"
modified="2013-01-03T15:00:00Z"/>
Expand Down
8 changes: 8 additions & 0 deletions tests/testdata/find/find1/.well-known/resourcelist.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:rs="http://www.openarchives.org/rs/terms/">
<rs:ln rel="up"
href="find1"/>
<rs:md capability="resourcelist"
modified="2017-04-11T10:00:00Z"/>
</urlset>
11 changes: 11 additions & 0 deletions tests/testdata/find/find1/.well-known/resourcesync
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:rs="http://www.openarchives.org/rs/terms/">
<rs:ln rel="describedby"
href="http://resync.library.cornell.edu/"/>
<rs:md capability="description"/>
<url>
<loc>../data/capabilitylist.xml</loc>
<rs:md capability="capabilitylist"/>
</url>
</urlset>
9 changes: 9 additions & 0 deletions tests/testdata/find/find1/data/capabilitylist.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:rs="http://www.openarchives.org/rs/terms/">
<rs:md capability="capabilitylist"/>
<url>
<loc>resourcelist.xml</loc>
<rs:md capability="resourcelist"/>
</url>
</urlset>
11 changes: 11 additions & 0 deletions tests/testdata/find/find1/data/resourcelist.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:rs="http://www.openarchives.org/rs/terms/">
<rs:ln rel="up" href="find1"/>
<rs:md capability="resourcelist"
modified="2017-04-11T11:22:33Z"/>
<url>
<loc>a_resource</loc>
<lastmod>2017-05-28T00:00:00Z</lastmod>
</url>
</urlset>
11 changes: 11 additions & 0 deletions tests/testdata/find/find2/resourcelist.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:rs="http://www.openarchives.org/rs/terms/">
<rs:ln rel="up" href="find2"/>
<rs:md capability="resourcelist"
modified="2017-04-11T11:22:33Z"/>
<url>
<loc>a_resource</loc>
<lastmod>2017-05-28T00:00:00Z</lastmod>
</url>
</urlset>
1 change: 1 addition & 0 deletions tests/testdata/find/find3/data/data1/res.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Text
11 changes: 11 additions & 0 deletions tests/testdata/find/find3/sitemap.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:rs="http://www.openarchives.org/rs/terms/">
<rs:ln rel="up" href="find3"/>
<rs:md capability="resourcelist"
modified="2017-04-11T11:22:33Z"/>
<url>
<loc>a_resource</loc>
<lastmod>2017-05-28T00:00:00Z</lastmod>
</url>
</urlset>
88 changes: 88 additions & 0 deletions tests/webserver_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""Provides content manager that runs local webserver."""
import contextlib
import os
import posixpath
import requests
import signal
import time
from multiprocessing import Process
from http.server import HTTPServer, SimpleHTTPRequestHandler
try: # python3
from urllib.parse import unquote
except ImportError: # python2
from urlparse import unquote


class MyHTTPRequestHandler(SimpleHTTPRequestHandler):
"""Copy of SimpleHTTPRequestHandler with cls._base_path setting."""

_base_path = '/'

def translate_path(self, path):
"""Translate a /-separated PATH to the local filename syntax.
Components that mean special things to the local file system
(e.g. drive or directory names) are ignored. (XXX They should
probably be diagnosed.)
**Copied from http.server.SimpleHTTPRequestHandler with modification
of path**
"""
# abandon query parameters
path = path.split('?', 1)[0]
path = path.split('#', 1)[0]
# Don't forget explicit trailing slash when normalizing. Issue17324
trailing_slash = path.rstrip().endswith('/')
try:
path = unquote(path, errors='surrogatepass')
except UnicodeDecodeError:
path = unquote(path)
path = posixpath.normpath(path)
words = path.split('/')
words = filter(None, words)
path = self._base_path
for word in words:
if os.path.dirname(word) or word in (os.curdir, os.pardir):
# Ignore components that are not a simple file/directory name
continue
path = os.path.join(path, word)
if trailing_slash:
path += '/'
return path


def run_webserver(host='', port=9999):
"""Run webserver at given host & port."""
server_address = (host, port)
httpd = HTTPServer(server_address, MyHTTPRequestHandler)
httpd.serve_forever()


@contextlib.contextmanager
def webserver(dir='/tmp', host='', port=9999):
"""Context Manager that provides a webserver serving files from dir."""
MyHTTPRequestHandler._base_path = dir
p = Process(target=run_webserver, args=(host, port))
p.start()

# Wait for the server to be launched
for j in range(0, 10):
try:
requests.get("http://localhost:9999/", timeout=0.1)
break
except requests.exceptions.ConnectionError:
pass
time.sleep(0.1)

try:
yield
finally:
# Closing the server
p.terminate()


if __name__ == '__main__':
with webserver():
print('Started server...')
# Things with server go in here
print('Exited server')

0 comments on commit bd0db88

Please sign in to comment.