Merge pull request #85 from kylewm/brevity2

Delegate tweet truncation to brevity
snarfed · Apr 23, 2016 · 2f00e42 · 2f00e42
2 parents 2fd8ef0 + 32bf4eb
commit 2f00e42
Show file tree

Hide file tree

Showing 4 changed files with 48 additions and 91 deletions.
diff --git a/granary/test/test_twitter.py b/granary/test/test_twitter.py
@@ -1283,31 +1283,31 @@ def test_tweet_truncate(self):
       u'manually may already be getting them')
     expected = (
       u'Hey #indieweb, the coming storm of webmention Spam may not '
-      u'be far away. Those of us that have input fields to… '
-      u'(https://ben.thatmustbe.me/note/2015/1/31/1/)')
-    result = self.twitter._truncate(orig, 'https://ben.thatmustbe.me/note/2015/1/31/1/', False)
+      u'be far away. Those of us that have input fields to send… '
+      u'https://ben.thatmustbe.me/note/2015/1/31/1/')
+    result = self.twitter._truncate(orig, 'https://ben.thatmustbe.me/note/2015/1/31/1/', 'note', False)
     self.assertEquals(expected, result)
 
     orig = expected = (
       u'Despite names,\n'
       u'ind.ie&indie.vc are NOT #indieweb @indiewebcamp\n'
       u'indiewebcamp.com/2014-review#Indie_Term_Re-use\n'
       u'@iainspad @sashtown @thomatronic (ttk.me t4_81)')
-    result = self.twitter._truncate(orig, None, False)
+    result = self.twitter._truncate(orig, None, 'note', False)
     self.assertEquals(expected, result)
 
     orig = expected = (
       u'@davewiner I stubbed a page on the wiki for '
       u'https://indiewebcamp.com/River4. Edits/improvmnts from users are '
       u'welcome! @kevinmarks @julien51 @aaronpk')
-    result = self.twitter._truncate(orig, None, False)
+    result = self.twitter._truncate(orig, None, 'note', False)
     self.assertEquals(expected, result)
 
     orig = expected = (
       u'This is a long tweet with (foo.com/parenthesized-urls) and urls '
       u'that wikipedia.org/Contain_(Parentheses), a url with a query '
       u'string;foo.withknown.com/example?query=parameters')
-    result = self.twitter._truncate(orig, None, False)
+    result = self.twitter._truncate(orig, None, 'note', False)
     self.assertEquals(expected, result)
 
     orig = (
@@ -1318,7 +1318,7 @@ def test_tweet_truncate(self):
       u'This is a long tweet with (foo.com/parenthesized-urls) and urls '
       u'that wikipedia.org/Contain_(Parentheses), that is one charc too '
       u'long:…')
-    result = self.twitter._truncate(orig, None, False)
+    result = self.twitter._truncate(orig, None, 'note', False)
     self.assertEquals(expected, result)
 
     # test case-insensitive link matching
@@ -1329,26 +1329,26 @@ def test_tweet_truncate(self):
     expected = (
       u'The Telegram Bot API is the best bot API ever. Everyone should learn '
       u'from it, especially Matrix.org… '
-      u'(https://unrelenting.technology/notes/2015-09-05-00-35-13)')
+      u'https://unrelenting.technology/notes/2015-09-05-00-35-13')
     result = self.twitter._truncate(
-      orig, 'https://unrelenting.technology/notes/2015-09-05-00-35-13', False)
+      orig, 'https://unrelenting.technology/notes/2015-09-05-00-35-13', 'note', False)
     self.assertEquals(expected, result)
 
     twitter.MAX_TWEET_LENGTH = 20
     twitter.TCO_LENGTH = 5
 
     orig = u'url http://foo.co/bar ellipsize http://foo.co/baz'
     expected = u'url http://foo.co/bar ellipsize…'
-    result = self.twitter._truncate(orig, None, False)
+    result = self.twitter._truncate(orig, None, 'note', False)
     self.assertEquals(expected, result)
 
     orig = u'too long\nextra whitespace\tbut should include url'
-    expected = u'too long… (http://obj.ca)'
-    result = self.twitter._truncate(orig, 'http://obj.ca', False)
+    expected = u'too long… http://obj.ca'
+    result = self.twitter._truncate(orig, 'http://obj.ca', 'note', False)
     self.assertEquals(expected, result)
 
     orig = expected = u'trailing slash http://www.foo.co/'
-    result = self.twitter._truncate(orig, None, False)
+    result = self.twitter._truncate(orig, None, 'note', False)
     self.assertEquals(expected, result)
 
   def test_no_ellipsize_real_tweet(self):
@@ -1388,12 +1388,12 @@ def test_ellipsize_real_tweet(self):
             'manually may already be getting them')
 
     content = (u'Hey #indieweb, the coming storm of webmention Spam may not '
-               u'be far away. Those of us that have input fields to… '
-               u'(https://ben.thatmustbe.me/note/2015/1/31/1/)')
+               u'be far away. Those of us that have input fields to send… '
+               u'https://ben.thatmustbe.me/note/2015/1/31/1/')
 
     preview = (u'Hey #indieweb, the coming storm of webmention Spam may not '
-               u'be far away. Those of us that have input fields to… '
-               u'(<a href="https://ben.thatmustbe.me/note/2015/1/31/1/">ben.thatmustbe.me/note/2015/1/31...</a>)')
+               u'be far away. Those of us that have input fields to send… '
+               u'<a href="https://ben.thatmustbe.me/note/2015/1/31/1/">ben.thatmustbe.me/note/2015/1/31...</a>')
 
     self.expect_urlopen(twitter.API_POST_TWEET, TWEET,
                         params={'status': content.encode('utf-8')})
@@ -1408,6 +1408,19 @@ def test_ellipsize_real_tweet(self):
     actual_preview = self.twitter.preview_create(obj, include_link=True).content
     self.assertEquals(preview, actual_preview)
 
+  def test_tweet_article_has_different_format(self):
+    """Articles are published with a slightly different format:
+    "The Title: url", instead of "The Title (url)"
+    """
+    preview = self.twitter.preview_create({
+      'objectType': 'article',
+      'displayName': 'The Article Title',
+      'url': 'http://example.com/article',
+    }, include_link=True).content
+    self.assertEquals(
+      'The Article Title: <a href="http://example.com/article">example.com/'
+      'article</a>', preview)
+
   def test_create_tweet_note_prefers_summary_then_content_then_name(self):
     obj = copy.deepcopy(OBJECT)
 
@@ -1455,7 +1468,7 @@ def test_create_tweet_include_link(self):
     twitter.TCO_LENGTH = 5
 
     self.expect_urlopen(twitter.API_POST_TWEET, TWEET,
-                        params={'status': 'too long… (http://obj.ca)'})
+                        params={'status': 'too long… http://obj.ca'})
     self.mox.ReplayAll()
 
     obj = copy.deepcopy(OBJECT)
@@ -1466,7 +1479,8 @@ def test_create_tweet_include_link(self):
         })
     self.twitter.create(obj, include_link=True)
     result = self.twitter.preview_create(obj, include_link=True)
-    self.assertIn(u'too long… (<a href="http://obj.ca">obj.ca</a>)',result.content)
+    self.assertIn(u'too long… <a href="http://obj.ca">obj.ca</a>',
+                  result.content)
 
   def test_create_recognize_note(self):
     """Use post-type-discovery to recognize a note with non-trivial html content.

diff --git a/granary/twitter.py b/granary/twitter.py
@@ -29,6 +29,7 @@
 
 from bs4 import BeautifulSoup
 import requests
+import brevity
 
 import source
 from oauth_dropins import twitter_auth
@@ -621,7 +622,7 @@ def _create(self, obj, preview=None, include_link=False, ignore_formatting=False
     # truncate and ellipsize content if it's over the character
     # count. URLs will be t.co-wrapped, so include that when counting.
     include_url = obj.get('url') if include_link else None
-    content = self._truncate(content, include_url, has_media)
+    content = self._truncate(content, include_url, type, has_media)
 
     # linkify defaults to Twitter's link shortening behavior
     preview_content = util.linkify(content, pretty=True, skip_bare_cc_tlds=True)
@@ -728,87 +729,27 @@ def _create(self, obj, preview=None, include_link=False, ignore_formatting=False
 
     return source.creation_result(resp)
 
-  def _truncate(self, content, include_url, has_media):
+  def _truncate(self, content, include_url, type, has_media):
     """Shorten tweet content to fit within the 140 character limit.
 
     Args:
       content: string
       include_url: string
+      type: string
       has_media: boolean
 
     Return: string, the possibly shortened and ellipsized tweet text
     """
-    def rstrip_cruft(text):
-      return text.rstrip().rstrip(',;.')
-
-    def trunc_to_nearest_word(text, length):
-      # try stripping trailing whitespace first
-      text = rstrip_cruft(text)
-      if len(text) <= length:
-        return text
-      # walk backwards until we find a delimiter
-      for j in xrange(length, -1, -1):
-        if text[j] in ',.;: \t\r\n':
-          return rstrip_cruft(text[:j])
-
-    links, splits = util.tokenize_links(content, skip_bare_cc_tlds=True)
-    max = MAX_TWEET_LENGTH
-    if include_url:
-      max -= TCO_LENGTH + 3
+    if type == 'article':
+      format = brevity.FORMAT_ARTICLE
+    else:
+      format = brevity.FORMAT_NOTE
     if has_media:
-      # twitter includes a pic.twitter.com link (and space) for pictures or
-      # video - one link total, regardless of number of pictures - so account
-      # for that.
-      max -= TCO_LENGTH + 1
-
-    tokens = []
-    for i in xrange(len(links)):
-      if splits[i]:
-        tokens.append(('text', splits[i]))
-      tokens.append(('link', links[i]))
-    if splits[-1]:
-      tokens.append(('text', splits[-1]))
-
-    length = 0
-    shortened = []
-    truncated = False
-
-    for i, (toktype, token) in enumerate(tokens):
-      tokmax = max - length
-
-      # links are all or nothing, either add it or don't
-      if toktype == 'link':
-        # account for ellipsis if this is not the last token
-        if i < len(tokens) - 1:
-          tokmax -= 1
-        if TCO_LENGTH > tokmax:
-          truncated = True
-          break
-        length += TCO_LENGTH
-        shortened.append(token)
-      # truncate text to the nearest word
-      else:
-        # account for ellipsis if this is not the last token, or it
-        # will be truncated
-        if i < len(tokens) - 1 or len(token) > tokmax:
-          tokmax -= 1
-        if len(token) > tokmax:
-          token = trunc_to_nearest_word(token, tokmax)
-          if token:
-            length += len(token)
-            shortened.append(token)
-          truncated = True
-          break
-        else:
-          length += len(token)
-          shortened.append(token)
-
-    content = ''.join(shortened)
-    if truncated:
-      content = content.rstrip() + u'…'
-    if include_url:
-      content += ' (%s)' % include_url
-    return content
+      format += '+' + brevity.FORMAT_MEDIA
+
+    return brevity.shorten(
+      content, permalink=include_url, permashortlink=include_url,
+      target_length=MAX_TWEET_LENGTH, link_length=TCO_LENGTH, format=format)
 
   def upload_images(self, urls):
     """Uploads one or more images from web URLs.

diff --git a/requirements.txt b/requirements.txt
@@ -6,3 +6,4 @@ mf2py>=0.2.7
 mf2util>=0.3.3
 oauth-dropins>=1.3
 requests<2.6.0
+brevity>=0.2.8
diff --git a/setup.py b/setup.py
@@ -48,6 +48,7 @@ def __init__(self, *args, **kwargs):
           'mf2util>=0.3.3',
           'oauth-dropins>=1.3',
           'requests<2.6.0',
+          'brevity>=0.2.8',
       ],
       test_loader='setup:TestLoader',
       test_suite='granary.test',