Skip to content

Commit

Permalink
ETags (fixes #75)
Browse files Browse the repository at this point in the history
* Implement ETags only for LDP-NR
* Remove redundant Flask ETag
* Implement If-Match and If-None-Match for GET, PUT, PATCH, DELETE
* Implement If-Modified-Since, If-Unmodified-Since for the above
* Tests for ETags and conditional header for GET and PUT
  • Loading branch information
Stefano Cossu committed Oct 14, 2018
1 parent 7a07f73 commit b628f01
Show file tree
Hide file tree
Showing 4 changed files with 586 additions and 67 deletions.
314 changes: 255 additions & 59 deletions lakesuperior/endpoints/ldp.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
Blueprint, Response, g, make_response, render_template,
request, send_file)
from rdflib import Graph, plugin, parser#, serializer
from werkzeug.http import parse_date

from lakesuperior import env
from lakesuperior.api import resource as rsrc_api
Expand Down Expand Up @@ -140,46 +141,63 @@ def get_resource(uid, out_fmt=None):
"""
out_headers = std_headers.copy()
repr_options = defaultdict(dict)

# Fist check if it's not a 404 or a 410.
try:
if not rsrc_api.exists(uid):
return '', 404
except TombstoneError as e:
return _tombstone_response(e, uid)

# Then process the condition headers.
cond_ret = _process_cond_headers(uid, request.headers)
if cond_ret:
return cond_ret

# Then, business as usual.
# Evaluate which representation is requested.
if 'prefer' in request.headers:
prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
logger.debug('Parsed Prefer header: {}'.format(pformat(prefer)))
if 'return' in prefer:
repr_options = parse_repr_options(prefer['return'])

try:
rsrc = rsrc_api.get(uid, repr_options)
except ResourceNotExistsError as e:
return str(e), 404
except TombstoneError as e:
return _tombstone_response(e, uid)
else:
if out_fmt is None:
rdf_mimetype = _best_rdf_mimetype()
out_fmt = (
'rdf'
if isinstance(rsrc, LdpRs) or rdf_mimetype is not None
else 'non_rdf')
out_headers.update(_headers_from_metadata(rsrc, out_fmt))
uri = g.tbox.uid_to_uri(uid)
if out_fmt == 'rdf':
if locals().get('rdf_mimetype', None) is None:
rdf_mimetype = DEFAULT_RDF_MIMETYPE
ggr = g.tbox.globalize_graph(rsrc.out_graph)
ggr.namespace_manager = nsm
return _negotiate_content(
ggr, rdf_mimetype, out_headers, uid=uid, uri=uri)
else:
if not getattr(rsrc, 'local_path', False):
return ('{} has no binary content.'.format(rsrc.uid), 404)
rsrc = rsrc_api.get(uid, repr_options)

logger.debug('Streaming out binary content.')
rsp = make_response(send_file(
rsrc.local_path, as_attachment=True,
attachment_filename=rsrc.filename,
mimetype=rsrc.mimetype), 200, out_headers)
rsp.headers.add('Link',
'<{}/fcr:metadata>; rel="describedby"'.format(uri))
return rsp
if out_fmt is None:
rdf_mimetype = _best_rdf_mimetype()
out_fmt = (
'rdf'
if isinstance(rsrc, LdpRs) or rdf_mimetype is not None
else 'non_rdf')
out_headers.update(_headers_from_metadata(rsrc, out_fmt))
uri = g.tbox.uid_to_uri(uid)

# RDF output.
if out_fmt == 'rdf':
if locals().get('rdf_mimetype', None) is None:
rdf_mimetype = DEFAULT_RDF_MIMETYPE
ggr = g.tbox.globalize_graph(rsrc.out_graph)
ggr.namespace_manager = nsm
return _negotiate_content(
ggr, rdf_mimetype, out_headers, uid=uid, uri=uri)

# Datastream.
else:
if not getattr(rsrc, 'local_path', False):
return ('{} has no binary content.'.format(rsrc.uid), 404)

logger.debug('Streaming out binary content.')
rsp = make_response(send_file(
rsrc.local_path, as_attachment=True,
attachment_filename=rsrc.filename,
mimetype=rsrc.mimetype), 200, out_headers)
# This seems necessary to prevent Flask from setting an additional ETag.
if 'ETag' in out_headers:
rsp.set_etag(out_headers['ETag'])
rsp.headers.add('Link',
'<{}/fcr:metadata>; rel="describedby"'.format(uri))
return rsp


@ldp.route('/<path:uid>/fcr:versions', methods=['GET'])
Expand Down Expand Up @@ -262,14 +280,13 @@ def post_resource(parent_uid):
except ServerManagedTermError as e:
return str(e), 412

hdr = {'Location' : uri}
uri = g.tbox.uid_to_uri(rsrc.uid)
rsp_headers.update(_headers_from_metadata(rsrc))
rsp_headers['Location'] = uri

if mimetype and rdf_fmt is None:
hdr['Link'] = '<{0}/fcr:metadata>; rel="describedby"; anchor="{0}"'\
.format(uri)

rsp_headers.update(hdr)
rsp_headers['Link'] = (f'<{uri}/fcr:metadata>; rel="describedby"; '
f'anchor="{uri}"')

return uri, 201, rsp_headers

Expand All @@ -286,7 +303,9 @@ def put_resource(uid):
# Parse headers.
logger.debug('Request headers: {}'.format(request.headers))

rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
cond_ret = _process_cond_headers(uid, request.headers, False)
if cond_ret:
return cond_ret

handling, disposition = set_post_put_params()
stream, mimetype = _bistream_from_req()
Expand Down Expand Up @@ -314,6 +333,9 @@ def put_resource(uid):
except TombstoneError as e:
return _tombstone_response(e, uid)

rsp_headers = _headers_from_metadata(rsrc)
rsp_headers['Content-Type'] = 'text/plain; charset=utf-8'

uri = g.tbox.uid_to_uri(uid)
if evt == RES_CREATED:
rsp_code = 201
Expand All @@ -336,6 +358,18 @@ def patch_resource(uid, is_metadata=False):
Update an existing resource with a SPARQL-UPDATE payload.
"""
# Fist check if it's not a 404 or a 410.
try:
if not rsrc_api.exists(uid):
return '', 404
except TombstoneError as e:
return _tombstone_response(e, uid)

# Then process the condition headers.
cond_ret = _process_cond_headers(uid, request.headers, False)
if cond_ret:
return cond_ret

rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
if request.mimetype != 'application/sparql-update':
return 'Provided content type is not a valid parsable format: {}'\
Expand All @@ -345,10 +379,6 @@ def patch_resource(uid, is_metadata=False):
local_update_str = g.tbox.localize_ext_str(update_str, nsc['fcres'][uid])
try:
rsrc = rsrc_api.update(uid, local_update_str, is_metadata)
except ResourceNotExistsError as e:
return str(e), 404
except TombstoneError as e:
return _tombstone_response(e, uid)
except (ServerManagedTermError, SingleSubjectError) as e:
return str(e), 412
except InvalidResourceError as e:
Expand Down Expand Up @@ -377,6 +407,18 @@ def delete_resource(uid):
must be deleted as well, or the ``Prefer:no-tombstone`` header can be used.
The latter will forget (completely delete) the resource immediately.
"""
# Fist check if it's not a 404 or a 410.
try:
if not rsrc_api.exists(uid):
return '', 404
except TombstoneError as e:
return _tombstone_response(e, uid)

# Then process the condition headers.
cond_ret = _process_cond_headers(uid, request.headers, False)
if cond_ret:
return cond_ret

headers = std_headers.copy()

if 'prefer' in request.headers:
Expand All @@ -385,12 +427,7 @@ def delete_resource(uid):
else:
leave_tstone = True

try:
rsrc_api.delete(uid, leave_tstone)
except ResourceNotExistsError as e:
return str(e), 404
except TombstoneError as e:
return _tombstone_response(e, uid)
rsrc_api.delete(uid, leave_tstone)

return '', 204, headers

Expand Down Expand Up @@ -623,17 +660,11 @@ def _headers_from_metadata(rsrc, out_fmt='text/turtle'):
"""
rsp_headers = defaultdict(list)

digest = rsrc.metadata.value(rsrc.uri, nsc['premis'].hasMessageDigest)
digest_p = rsrc.metadata.value(nsc['premis'].hasMessageDigest)
# Only add ETag and digest if output is not RDF.
if digest:
digest_components = digest.split(':')
cksum_hex = digest_components[-1]
cksum = bytearray.fromhex(cksum_hex)
digest_algo = digest_components[-2]
etag_str = cksum_hex
rsp_headers['ETag'] = etag_str
rsp_headers['Digest'] = '{}={}'.format(
digest_algo.upper(), b64encode(cksum).decode('ascii'))
if digest_p:
rsp_headers['ETag'], rsp_headers['Digest'] = (
_digest_headers(digest_p))


last_updated_term = rsrc.metadata.value(nsc['fcrepo'].lastModified)
Expand All @@ -650,3 +681,168 @@ def _headers_from_metadata(rsrc, out_fmt='text/turtle'):

return rsp_headers


def _digest_headers(digest):
"""
Format ETag and Digest headers from resource checksum.
:param str digest: Resource digest. For an extracted IMR, this is the
value of the ``premis:hasMessageDigest`` property.
"""
digest_components = digest.split(':')
cksum_hex = digest_components[-1]
cksum = bytearray.fromhex(cksum_hex)
digest_algo = digest_components[-2]
etag_str = cksum_hex
digest_str = '{}={}'.format(
digest_algo.upper(), b64encode(cksum).decode('ascii'))

return etag_str, digest_str


def _condition_hdr_match(uid, headers, safe=True):
"""
Conditional header evaluation for HEAD, GET, PUT and DELETE requests.
Determine whether any conditional headers, and which, is/are imposed in the
request (``If-Match``, ``If-None-Match``, ``If-Modified-Since``,
``If-Unmodified-Since``, or none) and what the most relevant condition
evaluates to (``True`` or ``False``).
`RFC 7232 <https://tools.ietf.org/html/rfc7232#section-3.1>`__ does not
indicate an exact condition precedence, except that the ETag
matching conditions void the timestamp-based ones. This function
adopts the following precedence:
- ``If-Match`` is evaluated first if present;
- Else, ``If-None-Match`` is evaluated if present;
- Else, ``If-Modified-Since`` and ``If-Unmodified-Since``
are evaluated if present. If both conditions are present they are
both returned so they can be furher evaluated, e.g. using a logical AND
to allow time-range conditions, where the two terms indicate the early
and late boundary, respectively.
Note that the above mentioned RFC mentions several cases in which these
conditions are ignored, e.g. for a 404 in some cases, or for certain
HTTP methods for ``If-Modified-Since``. This must be implemented by the
calling function.
:param str uid: UID of the resource requested.
:param werkzeug.datastructures.EnvironHeaders headers: Incoming request
headers.
:param bool safe: Whether a "safe" method is being processed. Defaults to
True.
:rtype: dict (str, bool)
:return: Dictionary whose keys are the conditional header names that
have been evaluated, and whose boolean values indicate whether each
condition is met. If no valid conditional header is found, an empty
dict is returned.
"""
# ETag-based conditions.
# This ignores headers with empty values.
if headers.get('if-match') or headers.get('if-none-match'):
cond_hdr = 'if-match' if headers.get('if-match') else 'if-none-match'

# Wildcard matching for unsafe methods. Cannot be part of a list of
# ETags nor be enclosed in quotes.
if not safe and headers.get(cond_hdr) == '*':
return {cond_hdr: (cond_hdr == 'if-match') == rsrc_api.exists(uid)}

req_etags = [
et.strip('\'" ') for et in headers.get(cond_hdr).split(',')]

try:
rsrc_meta = rsrc_api.get_metadata(uid)
except ResourceNotExistsError:
rsrc_meta = Imr(nsc['fcres'][uid])

digest_prop = rsrc_meta.value(nsc['premis'].hasMessageDigest)
if digest_prop:
etag, _ = _digest_headers(digest_prop)
if cond_hdr == 'if-match':
is_match = etag in req_etags
else:
is_match = etag not in req_etags
else:
is_match = cond_hdr == 'if-none-match'

return {cond_hdr: is_match}

# Timestmp-based conditions.
ret = {}
if headers.get('if-modified-since') or headers.get('if-unmodified-since'):

try:
rsrc_meta = rsrc_api.get_metadata(uid)
except ResourceNotExistsError:
return {
'if-modified-since': False,
'if-unmodified-since': False
}

lastmod_str = rsrc_meta.value(nsc['fcrepo'].lastModified)
lastmod_date = arrow.get(lastmod_str)

mod_since_date = parse_date(headers.get('if-modified-since'))
# If date is not in a RFC 5322 format
# (https://tools.ietf.org/html/rfc5322#section-3.3) it evaluates to
# None.
if mod_since_date:
cond_hdr = 'if-modified-since'
ret[cond_hdr] = lastmod_ts > mod_since_date

unmod_since_date = parse_date(headers.get('if-unmodified-since'))
if unmod_since_date:
cond_hdr = 'if-unmodified-since'
ret[cond_hdr] = lastmod_ts < unmod_since_date

return ret


def _process_cond_headers(uid, headers, safe=True):
"""
Process the outcome of the evaluation of conditional headers.
This yields different response between safe methods (``HEAD``, ``GET``,
etc.) and unsafe ones (``PUT``, ``DELETE``, etc.
:param str uid: Resource UID.
:param werkzeug.datastructures.EnvironHeaders headers: Incoming request
headers.
:param bool safe: Whether a "safe" method is being processed. Defaults to
True.
"""
try:
cond_match = _condition_hdr_match(uid, headers, safe)
except TombstoneError as e:
return _tombstone_response(e, uid)

if cond_match:
if safe:
if 'if-match' in cond_match or 'if-none-match' in cond_match:
# If an expected list of tags is not matched, the response is
# "Precondition Failed". For all other cases, it's "Not Modified".
if not cond_match.get('if-match', True):
return '', 412
if not cond_match.get('if-none-match', True):
return '', 304
# The presence of an Etag-based condition, whether satisfied or not,
# voids the timestamp-based conditions.
elif (
not cond_match.get('if-modified-since', True) or
not cond_match.get('if-unmodified-since', True)):
return '', 304
else:
# Note that If-Modified-Since is only evaluated for safe methods.
if 'if-match' in cond_match or 'if-none-match' in cond_match:
if (
not cond_match.get('if-match', True) or
not cond_match.get('if-none-match', True)):
return '', 412

# The presence of an Etag-based condition, whether satisfied or not,
# voids the timestamp-based conditions.
elif not cond_match.get('if-unmodified-since', True):
return '', 412

Loading

0 comments on commit b628f01

Please sign in to comment.