|
|
@@ -11,9 +11,45 @@ |
|
|
from annotator import document
|
|
|
|
|
|
|
|
|
def is_file(uri):
|
|
|
"""Return True if the given uri is a local file uri."""
|
|
|
return uri and uri.lower().startswith("file:///")
|
|
|
def _format_document_link(href, title, link_text, hostname):
|
|
|
"""Return a document link for the given components.
|
|
|
|
|
|
Helper function for the .document_link property below.
|
|
|
|
|
|
The given href, title, link_text and hostname are assumed to be already
|
|
|
safely escaped. The returned string will be a Markup object so that
|
|
|
it can be rendered in Jinja2 templates without further escaped occurring.
|
|
|
|
|
|
"""
|
|
|
if hostname and hostname in link_text:
|
|
|
hostname = ""
|
|
|
|
|
|
def truncate(content, length=50):
|
|
|
"""Truncate the given string to at most length chars."""
|
|
|
if len(content) <= length:
|
|
|
return content
|
|
|
else:
|
|
|
return content[:length] + jinja2.Markup("…")
|
|
|
|
|
|
hostname = truncate(hostname)
|
|
|
link_text = truncate(link_text)
|
|
|
|
|
|
if href and hostname:
|
|
|
link = ('<a href="{href}" title="{title}">{link_text}</a> '
|
|
|
'({hostname})'.format(href=href, title=title,
|
|
|
link_text=link_text, hostname=hostname))
|
|
|
elif hostname and not href:
|
|
|
link = ('<a title="{title}">{link_text}</a> ({hostname})'.format(
|
|
|
title=title, link_text=link_text, hostname=hostname))
|
|
|
elif href and not hostname:
|
|
|
link = '<a href="{href}" title="{title}">{link_text}</a>'.format(
|
|
|
href=href, title=title, link_text=link_text)
|
|
|
else:
|
|
|
link = '<a title="{title}">{link_text}</a>'.format(
|
|
|
title=title, link_text=link_text)
|
|
|
|
|
|
return jinja2.Markup(link)
|
|
|
|
|
|
|
|
|
|
|
|
class Annotation(annotation.Annotation):
|
|
|
@@ -165,29 +201,174 @@ class Annotation(annotation.Annotation): |
|
|
def get_analysis(cls):
|
|
|
return cls.__analysis__
|
|
|
|
|
|
@property
|
|
|
def uri(self):
|
|
|
"""Return this annotation's URI or an empty string.
|
|
|
|
|
|
The uri is escaped and safe to be rendered.
|
|
|
|
|
|
The uri is a Markup object so it won't be double-escaped.
|
|
|
|
|
|
"""
|
|
|
uri_ = self.get("uri")
|
|
|
if uri_:
|
|
|
|
|
|
# Convert non-string URIs into strings.
|
|
|
# If the URI is already a unicode string this will do nothing.
|
|
|
# We're assuming that URI cannot be a byte string.
|
|
|
uri_ = unicode(uri_)
|
|
|
|
|
|
return jinja2.escape(uri_)
|
|
|
else:
|
|
|
return ""
|
|
|
|
|
|
@property
|
|
|
def filename(self):
|
|
|
"""Return the filename of this annotation's document, or "".
|
|
|
|
|
|
If the annotated URI is a file:// URI then return the filename part
|
|
|
of it, otherwise return "".
|
|
|
|
|
|
The filename is escaped and safe to be rendered.
|
|
|
|
|
|
If it contains escaped characters then the filename will be a
|
|
|
Markup object so it won't be double-escaped.
|
|
|
|
|
|
"""
|
|
|
if self.uri.lower().startswith("file:///"):
|
|
|
# self.uri is already escaped so we don't need to escape it again
|
|
|
# here.
|
|
|
return self.uri.split("/")[-1]
|
|
|
else:
|
|
|
return ""
|
|
|
|
|
|
@property
|
|
|
def title(self):
|
|
|
"""A title for this annotation."""
|
|
|
"""Return a title for this annotation.
|
|
|
|
|
|
Return the annotated document's title or if the document has no title
|
|
|
then return its filename (if it's a file:// URI) or its URI for
|
|
|
non-file URIs.
|
|
|
|
|
|
The title is escaped and safe to be rendered.
|
|
|
|
|
|
If it contains escaped characters then the title will be a
|
|
|
Markup object, so that it won't be double-escaped.
|
|
|
|
|
|
"""
|
|
|
document_ = self.get("document")
|
|
|
if document_:
|
|
|
return document_.get("title", "")
|
|
|
try:
|
|
|
title = document_["title"]
|
|
|
except (KeyError, TypeError):
|
|
|
# Sometimes document_ has no "title" key or isn't a dict at
|
|
|
# all.
|
|
|
title = ""
|
|
|
if title:
|
|
|
# Convert non-string titles into strings.
|
|
|
# We're assuming that title cannot be a byte string.
|
|
|
title = unicode(title)
|
|
|
|
|
|
return jinja2.escape(title)
|
|
|
|
|
|
if self.filename:
|
|
|
# self.filename is already escaped so we don't need to escape
|
|
|
# it again here, but we do want to unquote it for readability.
|
|
|
return urllib2.unquote(self.filename)
|
|
|
else:
|
|
|
return ""
|
|
|
# self.uri is already escaped so we don't need to escape
|
|
|
# it again here, but we do want to unquote it for readability.
|
|
|
return urllib2.unquote(self.uri)
|
|
|
|
|
|
@property
|
|
|
def filename(self):
|
|
|
if is_file(self.uri):
|
|
|
return self.uri.split("/")[-1] or ""
|
|
|
def hostname_or_filename(self):
|
|
|
"""Return the hostname of this annotation's document.
|
|
|
|
|
|
Returns the hostname part of the annotated document's URI, e.g.
|
|
|
"www.example.com" for "http://www.example.com/example.html".
|
|
|
|
|
|
If the URI is a file:// URI then return the filename part of it
|
|
|
instead.
|
|
|
|
|
|
The returned hostname or filename is escaped and safe to be rendered.
|
|
|
|
|
|
If it contains escaped characters the returned value will be a Markup
|
|
|
object so that it doesn't get double-escaped.
|
|
|
|
|
|
"""
|
|
|
if self.filename:
|
|
|
# self.filename is already escaped, doesn't need to be escaped
|
|
|
# again here.
|
|
|
return self.filename
|
|
|
else:
|
|
|
# self.uri is already escaped, doesn't need to be escaped again.
|
|
|
hostname = urlparse.urlparse(self.uri).hostname
|
|
|
|
|
|
# urlparse()'s .hostname is sometimes None.
|
|
|
hostname = hostname or ""
|
|
|
|
|
|
return hostname
|
|
|
|
|
|
@property
|
|
|
def href(self):
|
|
|
"""Return an href for this annotation's document, or "".
|
|
|
|
|
|
Returns a value suitable for use as the value of the href attribute in
|
|
|
an <a> element in an HTML document.
|
|
|
|
|
|
Returns an empty string if the annotation doesn't have a document with
|
|
|
an http(s):// URI.
|
|
|
|
|
|
The href is escaped and safe to be rendered.
|
|
|
|
|
|
If it contains escaped characters the returned value will be a
|
|
|
Markup object so that it doesn't get double-escaped.
|
|
|
|
|
|
"""
|
|
|
uri = self.uri # self.uri is already escaped.
|
|
|
if (uri.lower().startswith("http://") or
|
|
|
uri.lower().startswith("https://")):
|
|
|
return uri
|
|
|
else:
|
|
|
return ""
|
|
|
|
|
|
@property
|
|
|
def link_text(self):
|
|
|
"""Return some link text for this annotation's document.
|
|
|
|
|
|
Return a text representation of this annotation's document suitable
|
|
|
for use as the link text in a link like <a ...>{link_text}</a>.
|
|
|
|
|
|
Returns the document's title if it has one, or failing that uses part
|
|
|
of the annotated URI if the annotation has one.
|
|
|
|
|
|
The link text is escaped and safe for rendering.
|
|
|
|
|
|
If it contains escaped characters the returned value will be a
|
|
|
Markup object so it doesn't get double-escaped.
|
|
|
|
|
|
"""
|
|
|
# self.title is already escaped.
|
|
|
title = self.title
|
|
|
|
|
|
# Sometimes self.title is the annotated document's URI (if the document
|
|
|
# has no title). In those cases we want to remove the http(s):// from
|
|
|
# the front and unquote it for link text.
|
|
|
lower = title.lower()
|
|
|
if lower.startswith("http://") or lower.startswith("https://"):
|
|
|
parts = urlparse.urlparse(title)
|
|
|
return urllib2.unquote(parts.netloc + parts.path)
|
|
|
|
|
|
else:
|
|
|
return title
|
|
|
|
|
|
@property
|
|
|
def document_link(self):
|
|
|
"""Return a link to this annotation's document.
|
|
|
|
|
|
Returns HTML strings like:
|
|
|
|
|
|
<a href="{href}" title="{title}">{link_text}</a> ({domain})
|
|
|
<a href="{href}" title="{title}">{link_text}</a> ({hostname})
|
|
|
|
|
|
where:
|
|
|
|
|
|
@@ -199,87 +380,25 @@ def document_link(self): |
|
|
not the full path.
|
|
|
- {link_text} is the same as {title}, but truncated with … if
|
|
|
it's too long
|
|
|
- {domain} is the domain name of the document's uri without
|
|
|
- {hostname} is the hostname name of the document's uri without
|
|
|
the scheme (http(s)://) and www parts, e.g. "example.com".
|
|
|
If it's a local file:// uri then the filename is used as the domain.
|
|
|
If the domain is too long it is truncated with ….
|
|
|
If it's a local file:// uri then the filename is used as the
|
|
|
hostname.
|
|
|
If the hostname is too long it is truncated with ….
|
|
|
|
|
|
The ({domain}) part will be missing if it wouldn't be any different
|
|
|
The ({hostname}) part will be missing if it wouldn't be any different
|
|
|
from the {link_text} part.
|
|
|
|
|
|
The href="{href}" will be missing if there's no http(s) uri to link to
|
|
|
for this annotation's document.
|
|
|
|
|
|
User-supplied values are escaped so the string is safe for raw
|
|
|
rendering (the returned string is actually a jinja2.Markup object and
|
|
|
rendering (the returned string is actually a Markup object and
|
|
|
won't be escaped by Jinja2 when rendering).
|
|
|
|
|
|
"""
|
|
|
uri = jinja2.escape(self.uri) or ""
|
|
|
|
|
|
if (uri.lower().startswith("http://") or
|
|
|
uri.lower().startswith("https://")):
|
|
|
href = uri
|
|
|
else:
|
|
|
href = ""
|
|
|
|
|
|
if self.title:
|
|
|
title = jinja2.escape(self.title)
|
|
|
link_text = title
|
|
|
if is_file(uri):
|
|
|
domain = jinja2.escape(self.filename)
|
|
|
else:
|
|
|
domain = urlparse.urlparse(uri).hostname
|
|
|
else:
|
|
|
if is_file(uri):
|
|
|
title = urllib2.unquote(jinja2.escape(self.filename))
|
|
|
link_text = title
|
|
|
domain = ""
|
|
|
else:
|
|
|
title = urllib2.unquote(uri)
|
|
|
parts = urlparse.urlparse(uri)
|
|
|
link_text = urllib2.unquote(parts.netloc + parts.path)
|
|
|
domain = ""
|
|
|
if domain == title:
|
|
|
domain = ""
|
|
|
|
|
|
def truncate(content, length=50):
|
|
|
"""Truncate the given string to at most length chars."""
|
|
|
if len(content) <= length:
|
|
|
return content
|
|
|
else:
|
|
|
return content[:length] + jinja2.Markup("…")
|
|
|
|
|
|
if link_text:
|
|
|
link_text = truncate(link_text)
|
|
|
|
|
|
if domain:
|
|
|
domain = truncate(domain)
|
|
|
|
|
|
assert title, "The title should never be empty"
|
|
|
assert link_text, "The link text should never be empty"
|
|
|
if href and domain:
|
|
|
link = ('<a href="{href}" title="{title}">{link_text}</a> '
|
|
|
'({domain})'.format(href=href, title=title,
|
|
|
link_text=link_text, domain=domain))
|
|
|
elif domain and not href:
|
|
|
link = ('<a title="{title}">{link_text}</a> ({domain})'.format(
|
|
|
title=title, link_text=link_text, domain=domain))
|
|
|
elif href and not domain:
|
|
|
link = '<a href="{href}" title="{title}">{link_text}</a>'.format(
|
|
|
href=href, title=title, link_text=link_text)
|
|
|
elif (not href) and (not domain):
|
|
|
link = '<a title="{title}">{link_text}</a>'.format(
|
|
|
title=title, link_text=link_text)
|
|
|
else:
|
|
|
assert False, "We should never get here"
|
|
|
|
|
|
return jinja2.Markup(link)
|
|
|
|
|
|
@property
|
|
|
def uri(self):
|
|
|
"""This annotation's URI or an empty string."""
|
|
|
return self.get("uri", "")
|
|
|
return _format_document_link(
|
|
|
self.href, self.title, self.link_text, self.hostname_or_filename)
|
|
|
|
|
|
@property
|
|
|
def description(self):
|
|
|
|