Skip to content
This repository has been archived by the owner on Nov 9, 2017. It is now read-only.

Commit

Permalink
Move code for handling unicode issues in embeds into link.py
Browse files Browse the repository at this point in the history
media.py should have minimal (if any) knowledge of having to UTF-8
encode dicts before storing them on Things
  • Loading branch information
David Ehrmann committed Feb 26, 2014
1 parent 5212718 commit 79d6e08
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 17 deletions.
20 changes: 3 additions & 17 deletions r2/r2/lib/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,8 @@ def _set_media(link, force=False):
link.thumbnail_url = upload_media(thumbnail)
link.thumbnail_size = thumbnail.size

link.media_object = media_object
link.secure_media_object = secure_media_object
link.set_media_object(media_object)
link.set_secure_media_object(secure_media_object)
link._commit()


Expand Down Expand Up @@ -442,20 +442,6 @@ def __init__(self, url, can_embed_securely):
self.url = url
self.can_embed_securely = can_embed_securely

@classmethod
def _utf8_encode(cls, input):
"""UTF-8 encodes any strings in an object (from json.loads)"""
if isinstance(input, dict):
return {cls._utf8_encode(key): cls._utf8_encode(value)
for key, value in input.iteritems()}
elif isinstance(input, list):
return [cls._utf8_encode(item)
for item in input]
elif isinstance(input, unicode):
return input.encode('utf-8')
else:
return input

def _fetch_from_embedly(self, secure):
params = urllib.urlencode({
"url": self.url,
Expand All @@ -465,7 +451,7 @@ def _fetch_from_embedly(self, secure):
"secure": "true" if secure else "false",
})
content = requests.get(self.EMBEDLY_API_URL + "?" + params).content
return json.loads(content, object_hook=self._utf8_encode)
return json.loads(content)

def _make_media_object(self, oembed):
if oembed.get("type") in ("video", "rich"):
Expand Down
27 changes: 27 additions & 0 deletions r2/r2/models/link.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,33 @@ def author_slow(self):
# If available, that should be used instead of calling this
return Account._byID(self.author_id, data=True, return_dict=False)

@classmethod
def _utf8_encode(cls, value):
"""
Returns a deep copy of the parameter, UTF-8-encoding any strings
encountered.
"""
if isinstance(value, dict):
return {cls._utf8_encode(key): cls._utf8_encode(value)
for key, value in value.iteritems()}
elif isinstance(value, list):
return [cls._utf8_encode(item)
for item in value]
elif isinstance(value, unicode):
return value.encode('utf-8')
else:
return value

# There's an issue where pickling fails for collections with string values
# that have unicode codepoints between 128 and 256. Encoding the strings
# as UTF-8 before storing them works around this.
def set_media_object(self, value):
self.media_object = Link._utf8_encode(value)

def set_secure_media_object(self, value):
self.secure_media_object = Link._utf8_encode(value)


class LinksByUrl(tdb_cassandra.View):
_use_db = True
_connection_pool = 'main'
Expand Down

0 comments on commit 79d6e08

Please sign in to comment.