2007-08-13 Michal Ludvig <michal@logix.cz>

* S3/S3.py: Added function urlencode_string() that encodes non-ascii characters in object name before sending it to S3. git-svn-id: https://s3tools.svn.sourceforge.net/svnroot/s3tools/s3cmd/trunk@134 830e0280-6d2a-0410-9c65-932aecc39d9d
s3tools · Aug 13, 2007 · c0e0c04 · c0e0c04
1 parent c5a458d
commit c0e0c04
Show file tree

Hide file tree

Showing 2 changed files with 43 additions and 1 deletion.
diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,8 @@
+2007-08-13  Michal Ludvig  <michal@logix.cz>
+
+	* S3/S3.py: Added function urlencode_string() that encodes
+	  non-ascii characters in object name before sending it to S3.
+
 2007-08-13  Michal Ludvig  <michal@logix.cz>
 
 	* README: Updated Amazon S3 pricing overview

diff --git a/S3/S3.py b/S3/S3.py
@@ -180,12 +180,49 @@ def object_delete_uri(self, uri):
 		return self.object_delete(uri.bucket(), uri.object())
 
 	## Low level methods
+	def urlencode_string(self, string):
+		encoded = ""
+		## List of characters that must be escaped for S3
+		## Haven't found this in any official docs
+		## but my tests show it's more less correct.
+		## If you start getting InvalidSignature errors
+		## from S3 check the error headers returned
+		## from S3 to see whether the list hasn't
+		## changed.
+		for c in string:	# I'm not sure how to know in what encoding 
+					# 'object' is. Apparently "type(object)==str"
+					# but the contents is a string of unicode
+					# bytes, e.g. '\xc4\x8d\xc5\xafr\xc3\xa1k'
+					# Don't know what it will do on non-utf8 
+					# systems.
+					#           [hope that sounds reassuring ;-)]
+			o = ord(c)
+			if (o <= 32 or		# Space and below
+			    o == 0x22 or	# "
+			    o == 0x23 or	# #
+			    o == 0x25 or	# %
+			    o == 0x2B or	# + (or it would become <space>)
+			    o == 0x3C or	# <
+			    o == 0x3E or	# >
+			    o == 0x3F or	# ?
+			    o == 0x5B or	# [
+			    o == 0x5C or	# \
+			    o == 0x5D or	# ]
+			    o == 0x5E or	# ^
+			    o == 0x60 or	# `
+			    o >= 123):   	# { and above, including >= 128 for UTF-8
+				encoded += "%%%02X" % o
+			else:
+				encoded += c
+		debug("String '%s' encoded to '%s'" % (string, encoded))
+		return encoded
+
 	def create_request(self, operation, bucket = None, object = None, headers = None, **params):
 		resource = "/"
 		if bucket:
 			resource += str(bucket)
 			if object:
-				resource += "/"+str(object)
+				resource += "/" + self.urlencode_string(object)
 
 		if not headers:
 			headers = SortedDict()