diff --git a/shotgun_api3/shotgun.py b/shotgun_api3/shotgun.py index 4a54e72a..bee32129 100755 --- a/shotgun_api3/shotgun.py +++ b/shotgun_api3/shotgun.py @@ -2251,6 +2251,23 @@ def upload(self, entity_type, entity_id, path, field_name=None, display_name=Non """ # Basic validations of the file to upload. path = os.path.abspath(os.path.expanduser(path or "")) + + # We need to check for string encodings that we aren't going to be able + # to support later in the upload process. If the given path wasn't already + # unicode, we will try to decode it as utf-8, and if that fails then we + # have to raise a sane exception. This will always work for ascii and utf-8 + # encoded strings, but will fail on some others if the string includes non + # ascii characters. + if not isinstance(path, unicode): + try: + path = path.decode("utf-8") + except UnicodeDecodeError: + raise ShotgunError( + "Could not upload the given file path. It is encoded as " + "something other than utf-8 or ascii. To upload this file, " + "it can be string encoded as utf-8, or given as unicode: %s" % path + ) + if not os.path.isfile(path): raise ShotgunError("Path must be a valid file, got '%s'" % path) if os.path.getsize(path) == 0: @@ -2365,10 +2382,27 @@ def _upload_to_sg(self, entity_type, entity_id, path, field_name, display_name, params.update(self._auth_params()) + # If we ended up with a unicode string path, we need to encode it + # as a utf-8 string. If we don't, there's a chance that there will + # will be an attempt later on to encode it as an ascii string, and + # that will fail ungracefully if the path contains any non-ascii + # characters. + # + # On Windows, if the path contains non-ascii characters, the calls + # to open later in this method will fail to find the file if given + # a non-ascii-encoded string path. In that case, we're going to have + # to call open on the unicode path, but we'll use the encoded string + # for everything else. + path_to_open = path + if isinstance(path, unicode): + path = path.encode("utf-8") + if sys.platform != "win32": + path_to_open = path + if is_thumbnail: url = urlparse.urlunparse((self.config.scheme, self.config.server, "/upload/publish_thumbnail", None, None, None)) - params["thumb_image"] = open(path, "rb") + params["thumb_image"] = open(path_to_open, "rb") if field_name == "filmstrip_thumb_image" or field_name == "filmstrip_image": params["filmstrip"] = True @@ -2385,7 +2419,7 @@ def _upload_to_sg(self, entity_type, entity_id, path, field_name, display_name, if tag_list: params["tag_list"] = tag_list - params["file"] = open(path, "rb") + params["file"] = open(path_to_open, "rb") result = self._send_form(url, params) @@ -3929,7 +3963,15 @@ def encode(self, params, files, boundary=None, buffer=None): buffer.write('Content-Disposition: form-data; name="%s"' % key) buffer.write('\r\n\r\n%s\r\n' % value) for (key, fd) in files: - filename = fd.name.split('/')[-1] + # On Windows, it's possible that we were forced to open a file + # with non-ascii characters as unicode. In that case, we need to + # encode it as a utf-8 string to remove unicode from the equation. + # If we don't, the mix of unicode and strings going into the + # buffer can cause UnicodeEncodeErrors to be raised. + filename = fd.name + if isinstance(filename, unicode): + filename = filename.encode("utf-8") + filename = filename.split('/')[-1] content_type = mimetypes.guess_type(filename)[0] content_type = content_type or 'application/octet-stream' file_size = os.fstat(fd.fileno())[stat.ST_SIZE] diff --git "a/tests/No\303\253l.jpg" "b/tests/No\303\253l.jpg" new file mode 100644 index 00000000..7a8aaf96 Binary files /dev/null and "b/tests/No\303\253l.jpg" differ diff --git a/tests/test_api.py b/tests/test_api.py index a5408412..b84f35dc 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -15,6 +15,7 @@ import urlparse import urllib2 import warnings +import glob import shotgun_api3 from shotgun_api3.lib.httplib2 import Http, SSLHandshakeError @@ -218,6 +219,63 @@ def test_upload_download(self): {"id":123, "type":"Shot"}) self.assertRaises(TypeError, self.sg.download_attachment) + # test upload of non-ascii, unicode path + u_path = os.path.abspath( + os.path.expanduser( + glob.glob(os.path.join(unicode(this_dir), u'No*l.jpg'))[0] + ) + ) + + # If this is a problem, it'll raise with a UnicodeEncodeError. We + # don't need to check the results of the upload itself -- we're + # only checking that the non-ascii string encoding doesn't trip + # us up the way it used to. + self.sg.upload( + "Ticket", + self.ticket['id'], + u_path, + 'attachments', + tag_list="monkeys, everywhere, send, help" + ) + + # Also make sure that we can pass in a utf-8 encoded string path + # with non-ascii characters and have it work properly. This is + # primarily a concern on Windows, as it doesn't handle that + # situation as well as OS X and Linux. + self.sg.upload( + "Ticket", + self.ticket['id'], + u_path.encode("utf-8"), + 'attachments', + tag_list="monkeys, everywhere, send, help" + ) + + # Make sure that non-utf-8 encoded paths raise when they can't be + # converted to utf-8. + u_path = os.path.abspath( + os.path.expanduser( + glob.glob(os.path.join(unicode(this_dir), u'*.shift-jis'))[0] + ) + ) + self.assertRaises( + shotgun_api3.ShotgunError, + self.sg.upload, + "Ticket", + self.ticket['id'], + u_path.encode("shift-jis"), + 'attachments', + tag_list="monkeys, everywhere, send, help" + ) + + # But it should work in all cases if a unicode string is used. + self.sg.upload( + "Ticket", + self.ticket['id'], + u_path, + 'attachments', + tag_list="monkeys, everywhere, send, help" + ) + # cleanup os.remove(file_path) diff --git "a/tests/\343\201\224.shift-jis" "b/tests/\343\201\224.shift-jis" new file mode 100644 index 00000000..7a8aaf96 Binary files /dev/null and "b/tests/\343\201\224.shift-jis" differ