Skip to content

Commit

Permalink
mf2: remember HTML content, keep newlines, don't translate to <br>
Browse files Browse the repository at this point in the history
for #130, also re #80. i highly suspect this will cause a regression somewhere, but i'm not quite sure where yet. :/
  • Loading branch information
snarfed committed Feb 26, 2018
1 parent 0b94583 commit 6260e32
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 14 deletions.
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -243,6 +243,7 @@ Changelog
* Add `fetch_mf2` kwarg to `json_to_object()` for fetching additional pages over HTTP to determine authorship.
* Generate explicit blank `p-name` in HTML to prevent old flawed [implied p-name handling](http://microformats.org/wiki/microformats2-implied-properties) [#131](https://github.com/snarfed/granary/issues/131).
* Fix `share` verb handling in `activity_to_json()` and `activities_to_html()` [#134](https://github.com/snarfed/granary/issues/134).
* Remember which content contains HTML, preserve newlines in it, and don't translate those newlines to `<br>`s ([#130](https://github.com/snarfed/granary/issues/130)).
* Atom:
* Fix timezone bugs in `updated` and `published`.
* JSON Feed:
Expand Down
22 changes: 14 additions & 8 deletions granary/microformats2.py
Expand Up @@ -103,7 +103,7 @@ def get_string_urls(objs):
return urls


def get_html(val, keep_newlines=False):
def get_html(val):
"""Returns a string value that may have HTML markup.
Args:
Expand All @@ -119,8 +119,6 @@ def get_html(val, keep_newlines=False):
# https://github.com/snarfed/granary/issues/80
# https://indiewebcamp.com/note#Indieweb_whitespace_thinking
html = val['html']
if not keep_newlines:
html = html.replace('\n', ' ')
return html.strip()

return get_text(val)
Expand Down Expand Up @@ -462,7 +460,14 @@ def html_to_activities(html, url=None, actor=None):
parsed = mf2py.parse(doc=html, url=url)
hfeed = mf2util.find_first_entry(parsed, ['h-feed'])
items = hfeed.get('children', []) if hfeed else parsed.get('items', [])
return [{'object': json_to_object(item, actor=actor)} for item in items]

activities = []
for item in items:
obj = json_to_object(item, actor=actor)
obj['content_is_html'] = True
activities.append({'object': obj})

return activities


def activities_to_html(activities):
Expand Down Expand Up @@ -571,7 +576,7 @@ def json_to_html(obj, parent_props=None):
children.append(json_to_html(target, ['u-' + mftype + '-of']))

# set up content and name
content_html = get_html(prop.get('content', {}), keep_newlines=True)
content_html = get_html(prop.get('content', {}))
content_classes = []

if content_html:
Expand Down Expand Up @@ -743,9 +748,10 @@ def render_content(obj, include_location=True, synthesize_content=True,

content += orig[last_end:]

# convert newlines to <br>s
# do this *after* linkifying tags so we don't have to shuffle indices over
content = content.replace('\n', '<br />\n')
if not obj.get('content_is_html'):
# convert newlines to <br>s
# do this *after* linkifying tags so we don't have to shuffle indices over
content = content.replace('\n', '<br />\n')

# linkify embedded links. ignore the "mention" tags that we added ourselves.
# TODO: fix the bug in test_linkify_broken() in webutil/util_test.py, then
Expand Down
18 changes: 16 additions & 2 deletions granary/test/test_atom.py
Expand Up @@ -528,12 +528,19 @@ def test_html_to_atom(self):
<activity:object-type>http://activitystrea.ms/schema/1.0/note</activity:object-type>
<id>http://my/post</id>
<title>my content</title>
<title>my content
x
y
z</title>
<content type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml">
my content
<pre> x
y
z
</pre>
</div>
</content>
Expand Down Expand Up @@ -561,7 +568,14 @@ def test_html_to_atom(self):
<article class="h-entry">
<a class="u-url" href="http://my/post" />
<div class="e-content">my content</div>
<div class="e-content">
my content
<pre>
x
y
z
</pre>
</div>
</article>
</div>
""", 'https://my.site/feed'),
Expand Down
6 changes: 3 additions & 3 deletions granary/test/test_microformats2.py
Expand Up @@ -641,11 +641,11 @@ def test_json_to_object_converts_text_newlines_to_brs(self):
'properties': {'content': [{'value': 'asdf\nqwer'}]},
}))

def test_json_to_object_drops_html_newlines(self):
"""HTML newlines should be discarded."""
def test_json_to_object_keeps_html_newlines(self):
"""HTML newlines should be preserved."""
self.assert_equals({
'objectType': 'note',
'content': 'asdf qwer',
'content': 'asdf\nqwer',
}, microformats2.json_to_object({
'properties': {'content': [{'html': 'asdf\nqwer', 'value': ''}]},
}))
Expand Down
2 changes: 1 addition & 1 deletion granary/test/testdata/repost.as-from-mf2.json
Expand Up @@ -4,7 +4,7 @@
"id": "tag:example.com,2001:3344",
"published": "2012-12-05T00:58:26+00:00",
"url": "http://example.com/this/repost",
"content": "Shared <a href=\"http://example.com/original/post\">a post</a> by <span class=\"h-card\"> <a class=\"p-name u-url\" href=\"http://example.com/bob\">Bob</a> </span> The original post",
"content": "Shared <a href=\"http://example.com/original/post\">a post</a> by <span class=\"h-card\">\n \n<a class=\"p-name u-url\" href=\"http://example.com/bob\">Bob</a>\n \n </span>\nThe original post",
"object": {
"author": {
"objectType": "person",
Expand Down

0 comments on commit 6260e32

Please sign in to comment.