From 0a97bc209c8d8b6927dad8b524288cc65011e052 Mon Sep 17 00:00:00 2001 From: Ryan Barrett Date: Sat, 18 Feb 2017 10:34:35 -0800 Subject: [PATCH] twitter entity index handling bug fix for multi code point unicode chars ...at least i think? --- granary/test/test_twitter.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/granary/test/test_twitter.py b/granary/test/test_twitter.py index 45ba3c52..bd962b66 100644 --- a/granary/test/test_twitter.py +++ b/granary/test/test_twitter.py @@ -1089,6 +1089,27 @@ def test_quote_tweet_to_activity_without_quoted_tweet_url_entity(self): self.assert_equals('I agree with this https://t.co/ww6HD8KroG', self.twitter.tweet_to_activity(quote_tweet)['object']['content']) + def test_tweet_to_object_multi_byte_unicode_chars(self): + # the first three unicode chars in this string and in the text are the '100' + # emoji, which is multi-code-point. the emacs font i use doesn't render it, + # so it looks blank. + self.assert_equals( + u'💯💯💯 (by @itsmaeril)', + self.twitter.tweet_to_object({ + 'id_str': '831552681210556416', + 'text': u'💯💯💯 (by @itsmaeril) https://t.co/pWrOHzuHkP', + 'entities': { + 'user_mentions': [{ + 'screen_name': 'itsmaeril', + 'indices': [8, 18] + }], + 'media': [{ + 'indices': [20, 43], + 'media_url': 'http://pbs.twimg.com/media/C4pEu77UkAAVy9l.jpg', + }] + }, + })['content']) + def test_tweet_to_object_full(self): self.assert_equals(OBJECT, self.twitter.tweet_to_object(TWEET))