Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Initial commit.

  • Loading branch information...
commit a56dd32817bcaa9370f3f95fdd7c7c28c62fdcee 0 parents
@lericson authored
Showing with 533 additions and 0 deletions.
  1. +23 −0 LICENSE
  2. +1 −0  MANIFEST.in
  3. +1 −0  README
  4. +12 −0 TODO
  5. +32 −0 setup.py
  6. +464 −0 simples3.py
23 LICENSE
@@ -0,0 +1,23 @@
+Copyright (c) 2008, Ludvig Ericson
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1  MANIFEST.in
@@ -0,0 +1 @@
+include LICENSE README TODO setup.py simples3.py
1  README
@@ -0,0 +1 @@
+I like boobs.
12 TODO
@@ -0,0 +1,12 @@
+Already written: A Django file storage adapter. Kept closed because it's not
+polished. If there's a huge demand, I'll most likely publish it.
+
+Deleting buckets? Creating buckets? These are both fairly simple to implement,
+but they somehow feel outside of the scope of a simple API.
+
+Perhaps some unittests.
+
+Perhaps a CLI á la FTP.
+
+Some way to stream upload files, using Transfer-Encoding: chunked. This would
+mean skipping urllib2 for that request entirely, as it's simply incompatible.
32 setup.py
@@ -0,0 +1,32 @@
+from distutils.core import setup, Extension
+
+import simples3
+long_description = """
+`simples3` is a fairly simple, decently quick interface to Amazon's S3 storage
+service.
+
+It grew out of frustration with other libraries that were either written too
+pragmatically (slow), too bloatedly, or just half-done.
+
+The module aims for:
+
+ * simplicity,
+ * decent speed,
+ * non-intrusiveness.
+
+It really is designed to fit into programmer memory. The three basic operations
+are as easy as with dictionaries.
+
+Out of simplicity comes no dependencies - the code relies solely on Python
+standard libraries.
+
+The perhaps greatest setback is that it requires Python 2.5, nothing more,
+nothing less.
+""" + simples3.__doc__
+
+setup(name="simples3", version="0.1",
+ url="http://lericson.se/",
+ author="Ludvig Ericson", author_email="ludvig@lericson.se",
+ description="Simple, quick Amazon AWS S3 interface",
+ long_description=long_description,
+ py_modules=["simples3"])
464 simples3.py
@@ -0,0 +1,464 @@
+r"""Simple Amazon AWS S3 interface.
+
+And I do mean that.
+
+Paste your stuff here and run this module in itself to do the tests::
+
+ >>> s = S3Bucket("mybucket",
+ ... access_key="ACESSS KEY",
+ ... secret_key="SECRET KEY")
+ ...
+ >>> print s # doctest: +ELLIPSIS
+ <S3Bucket ... at 'https://s3.amazonaws.com/...'>
+
+or if you'd like to use virtual host S3::
+
+ >>> s = S3Bucket("mybucket",
+ ... access_key="ACCESS KEY",
+ ... secret_key="SECRET KEY",
+ ... base_url="http://yo.se")
+ >>> print s # doctest: +ELLIPSIS
+ <S3Bucket ... at 'http...'>
+
+ >>> s = S3Bucket("s3-dev.yo.se",
+ ... access_key="0KG1EY709BMR775A1ER2",
+ ... secret_key="ylE3bXT6Rwj2ko0+fT2P1itzHSiHm7N8Aj4NCm8b",
+ ... base_url="https://s3-dev.yo.se")
+
+Note that missing slash above, it's important. Think of it as
+"The prefix to which all calls are made." Also the scheme can be `https` or
+regular `http`, or any other urllib2-compatible scheme (that is: you can
+register your own.)
+
+Now, let's start doing something useful. Start out by putting a simple file
+onto there::
+
+ >>> s.put("my file", "my content")
+
+Alright, and fetch it back::
+
+ >>> f = s.get("my file")
+ >>> f.read()
+ 'my content'
+
+Nice and tidy, but what if we want to know more about our fetched file? Easy::
+
+ >>> f.s3_info["modify"] # doctest: +ELLIPSIS
+ datetime.datetime(...)
+ >>> f.s3_info["mimetype"]
+ 'application/x-octet-stream'
+ >>> f.s3_info.keys()
+ ['mimetype', 'modify', 'headers', 'date', 'size', 'metadata']
+ >>> f.close()
+
+Note that the type was octet stream. That's simply because we didn't specify
+anything else. Do that using the `mimetype` keyword argument::
+
+ >>> s.put("my new file!", "Improved content!\nMultiple lines!",
+ ... mimetype="text/plain")
+
+Let's be cool and use the very Pythonic API to do fetch::
+
+ >>> f = s["my new file!"]
+ >>> print f.read()
+ Improved content!
+ Multiple lines!
+ >>> f.s3_info["mimetype"]
+ 'text/plain'
+ >>> f.close()
+
+Great job, huh. Now, let's delete it::
+
+ >>> del s["my new file!"]
+
+Could've used the `delete` method instead, but we didn't.
+
+If you just want to know about a key, ask and ye shall receive::
+
+ >>> from pprint import pprint
+ >>> s["This is a testfile."] = S3File("Hi!", metadata={"hairdo": "Secret"})
+ >>> pprint(s.info("test")) # doctest: +ELLIPSIS
+ {'date': datetime.datetime(...),
+ 'headers': {'content-length': '3',
+ 'content-type': 'application/x-octet-stream',
+ 'date': '...',
+ 'etag': '"..."',
+ 'last-modified': '...',
+ 'server': 'AmazonS3',
+ 'x-amz-id-2': '...',
+ 'x-amz-meta-hairdo': 'Secret',
+ 'x-amz-request-id': '...'},
+ 'metadata': {'hairdo': 'Secret'},
+ 'mimetype': 'application/x-octet-stream',
+ 'modify': datetime.datetime(...),
+ 'size': 3}
+
+Notable is that you got the metadata parsed out in the `metadata` key. You
+might also have noticed how the file was uploaded, using an `S3File` object
+like that. That's a nicer way to do it, in a way.
+
+The `S3File` simply takes its keyword arguments, and passes them on to `put`
+later. Other than that, it's a str subclass.
+
+And the last dict-like behavior is in tests::
+
+ >>> "This is a testfile." in s
+ True
+ >>> del s["This is a testfile."]
+ >>> "This is a testfile." in s
+ False
+
+You can also set a canned ACL using `put`, which is too simple::
+
+ >>> s.put("test/foo", "test", acl="public-read")
+ >>> s.put("test/bar", "rawr", acl="public-read")
+
+Boom. What's more? Listing the bucket::
+
+ >>> for (key, modify, etag, size) in s.listdir(prefix="test/"):
+ ... print "%r (%r) is size %r, modified %r" % (key, etag, size, modify)
+ ... # doctest: +ELLIPSIS
+ 'test/bar' ('"..."') is size 4, modified datetime.datetime(...)
+ 'test/foo' ('"..."') is size 4, modified datetime.datetime(...)
+
+That about sums it up.
+"""
+
+import time
+import hmac
+import md5
+import sha
+import re
+import urllib
+import urllib2
+import datetime
+
+rfc822_fmt = '%a, %d %b %Y %H:%M:%S GMT'
+iso8601_fmt = '%Y-%m-%dT%H:%M:%S.000Z'
+
+def _amz_canonicalize(headers):
+ r"""Canonicalize AMZ headers in that certain AWS way.
+
+ >>> _amz_canonicalize({"x-amz-test": "test"})
+ 'x-amz-test:test\n'
+ >>> _amz_canonicalize({})
+ ''
+ """
+ rv = {}
+ for header, value in headers.iteritems():
+ header = header.lower()
+ if header.startswith("x-amz-"):
+ rv.setdefault(header, []).append(value)
+ return "".join(":".join((h, ",".join(v))) + "\n" for h, v in rv.iteritems())
+
+def metadata_headers(metadata):
+ return dict(("X-AMZ-Meta-" + h, v) for h, v in metadata.iteritems())
+
+def headers_metadata(headers):
+ return dict((h[11:], v) for h, v in headers.iteritems()
+ if h.lower().startswith("x-amz-meta-"))
+
+def _rfc822_dt(v): return datetime.datetime.strptime(v, rfc822_fmt)
+def _iso8601_dt(v): return datetime.datetime.strptime(v, iso8601_fmt)
+
+def aws_md5(data):
+ """Make an AWS-style MD5 hash (digest in base64).
+
+ >>> aws_md5("Hello!")
+ 'lS0sVtBIWVgzZ0e83ZhZDQ=='
+ """
+ return md5.new(data).digest().encode("base64")[:-1]
+
+def aws_urlquote(value):
+ r"""AWS-style quote a URL part.
+
+ >>> aws_urlquote("/bucket/a key")
+ '/bucket/a%20key'
+ >>> aws_urlquote(u"/bucket/\xe5der")
+ '/bucket/%C3%A5der'
+ """
+ if isinstance(value, unicode):
+ value = value.encode("utf-8")
+ return urllib.quote(value, "/")
+
+def info_dict(headers):
+ return {"size": int(headers["content-length"]),
+ "mimetype": headers.get("content-type"),
+ "date": _rfc822_dt(headers["date"]),
+ "modify": _rfc822_dt(headers["last-modified"]),
+ "headers": headers,
+ "metadata": headers_metadata(headers)}
+
+def name(o):
+ """Find the name of *o*.
+
+ Functions:
+ >>> name(name)
+ 'name'
+ >>> def my_fun(): pass
+ >>> name(my_fun)
+ 'my_fun'
+
+ Classes:
+ >>> name(Exception)
+ 'Exception'
+ >>> class MyKlass(object): pass
+ >>> name(MyKlass)
+ 'MyKlass'
+
+ Instances:
+ >>> name(Exception()), name(MyKlass())
+ ('Exception', 'MyKlass')
+
+ Types:
+ >>> name(str), name(object), name(int)
+ ('str', 'object', 'int')
+
+ Type instances:
+ >>> name("Hello"), name(True), name(None), name(Ellipsis)
+ ('str', 'bool', 'NoneType', 'ellipsis')
+ """
+ if hasattr(o, "__name__"): return o.__name__
+ for o in getattr(o, "__mro__", o.__class__.__mro__):
+ return name(o)
+
+class S3Error(Exception):
+ def __init__(self, message, **kwds):
+ self.message = message
+ self.extra = kwds.copy()
+
+ def __str__(self):
+ rv = self.message
+ if self.extra:
+ key_it = iter(self.extra)
+ rv += " ("
+ prep = ""
+ while len(rv) < 79:
+ try:
+ key = key_it.next()
+ except StopIteration:
+ break
+ rv += prep + key + "=" + repr(self.extra[key])
+ prep = ", "
+ rv += ")"
+ return rv
+
+ @classmethod
+ def from_urllib(cls, e):
+ """Try to read the real error from AWS."""
+ self = cls("HTTP error", code=e.code, url=e.filename)
+ self.code = e.code
+ self.fp = fp = e.fp
+ if fp:
+ self.data = data = fp.read()
+ begin, end = data.find("<Message>"), data.find("</Message>")
+ if min(begin, end) >= 0:
+ self.full_message = msg = data[begin + 9:end]
+ self.message = msg[:50]
+ if self.message != msg:
+ self.message += "..."
+ return self
+
+class StreamHTTPHandler(urllib2.HTTPHandler):
+ pass
+
+class StreamHTTPSHandler(urllib2.HTTPSHandler):
+ pass
+
+class AnyMethodRequest(urllib2.Request):
+ def __init__(self, method, *args, **kwds):
+ self.method = method
+ urllib2.Request.__init__(self, *args, **kwds)
+
+ def get_method(self):
+ return self.method
+
+class S3File(str):
+ def __new__(cls, value, **kwds):
+ return super(S3File, cls).__new__(cls, value)
+
+ def __init__(self, value, **kwds):
+ kwds["data"] = value
+ self.kwds = kwds
+
+ def put_into(self, bucket, key):
+ return bucket.put(key, **self.kwds)
+
+class S3Bucket(object):
+ amazon_s3_base = "https://s3.amazonaws.com/"
+ listdir_re = re.compile(r"^<Key>(.+?)</Key>"
+ r"<LastModified>(.{24})</LastModified>"
+ r"<ETag>(.+?)</ETag><Size>(\d+?)</Size>$")
+
+ def __init__(self, name, access_key=None, secret_key=None, base_url=None):
+ self.opener = urllib2.build_opener(StreamHTTPHandler, StreamHTTPSHandler)
+ self.name = name
+ self.access_key = access_key
+ self.secret_key = secret_key
+ if not base_url:
+ self.base_url = self.amazon_s3_base + aws_urlquote(name)
+ else:
+ self.base_url = base_url
+
+ def __str__(self):
+ return "<%s %s at %r>" % (self.__class__.__name__, self.name, self.base_url)
+
+ def __repr__(self):
+ return self.__class__.__name__ + "(%r, access_key=%r, base_url=%r)" % (
+ self.name, self.access_key, self.base_url)
+
+ def __getitem__(self, name): return self.get(name)
+ def __delitem__(self, name): return self.delete(name)
+ def __setitem__(self, name, value):
+ if hasattr(value, "put_into"):
+ return value.put_into(self, name)
+ else:
+ return self.put(name, value)
+ def __contains__(self, name):
+ try:
+ self.info(name)
+ except KeyError:
+ return False
+ else:
+ return True
+
+ def sign_description(self, desc):
+ """AWS-style sign data."""
+ hasher = hmac.new(self.secret_key, desc.encode("utf-8"), sha)
+ return hasher.digest().encode("base64")[:-1]
+
+ def make_description(self, method, key=None, data=None,
+ headers={}, subresource=None, bucket=None):
+ # The signature descriptor is detalied in the developer's PDF on p. 65.
+ # Calculate canonicalized resource.
+ res = "/"
+ if bucket or bucket is None:
+ res += aws_urlquote(bucket or self.name)
+ res += "/"
+ if key:
+ res += aws_urlquote(key)
+ if subresource:
+ res += "?" + subresource
+ # Make description. :/
+ return "\n".join((method, headers.get("Content-MD5", ""),
+ headers.get("Content-Type", ""), headers.get("Date", ""))) + "\n" +\
+ _amz_canonicalize(headers) + res
+
+ def get_request_signature(self, method, key=None, data=None,
+ headers={}, subresource=None, bucket=None):
+ return self.sign_description(self.make_description(method, key=key,
+ data=data, headers=headers, subresource=subresource, bucket=bucket))
+
+ def new_request(self, method, key=None, args=None, data=None, headers={}):
+ url = self.base_url + "/"
+ headers = headers.copy()
+ if key:
+ url += aws_urlquote(key)
+ if args:
+ if hasattr(args, "iteritems"):
+ args_items = args.iteritems()
+ elif hasattr(args, "items"):
+ args_items = args.items()
+ else:
+ args_items = args
+ url += "?" + ";".join("=".join(map(urllib.quote_plus, item))
+ for item in args_items)
+ if data and "Content-MD5" not in headers:
+ headers["Content-MD5"] = aws_md5(data)
+ if "Date" not in headers:
+ headers["Date"] = time.strftime(rfc822_fmt, time.gmtime())
+ if "Authorization" not in headers:
+ sign = self.get_request_signature(method, key=key, data=data,
+ headers=headers)
+ headers["Authorization"] = "AWS %s:%s" % (self.access_key, sign)
+ return AnyMethodRequest(method, url, data=data, headers=headers)
+
+ def open_request(self, request, errors=True):
+ try:
+ return self.opener.open(request)
+ except urllib2.HTTPError, e:
+ if errors:
+ raise S3Error.from_urllib(e)
+ else:
+ return e
+
+ def make_request(self, method, key=None, args=None, data=None, headers={}):
+ request = self.new_request(method, key=key, args=args,
+ data=data, headers=headers)
+ return self.open_request(request)
+
+ def get(self, key):
+ response = self.make_request("GET", key=key)
+ response.s3_info = info_dict(dict(response.info()))
+ return response
+
+ def info(self, key):
+ request = self.new_request("HEAD", key=key)
+ response = self.open_request(request, errors=False)
+ if response.code == 404:
+ raise KeyError(key)
+ elif response.code != 200:
+ raise response # is an exception
+ rv = info_dict(dict(response.info()))
+ response.close()
+ return rv
+
+ def put(self, key, data=None, acl=None, metadata={},
+ mimetype="application/x-octet-stream"):
+ headers = {"Content-Type": mimetype}
+ headers.update(metadata_headers(metadata))
+ headers.update({"Content-Length": str(len(data)),
+ "Content-MD5": aws_md5(data)})
+ if acl:
+ headers["X-AMZ-ACL"] = acl
+ self.make_request("PUT", key=key, data=data, headers=headers).close()
+
+ def delete(self, key):
+ try:
+ self.make_request("DELETE", key=key).close()
+ except S3Error, e:
+ if e.code == 204:
+ return True
+ elif e.code == 404:
+ raise KeyError(key)
+ raise
+ else:
+ raise KeyError(key)
+
+ def listdir(self, prefix=None, marker=None, limit=None, delimiter=None):
+ """List contents of bucket.
+
+ *prefix*, if given, predicats `key.startswith(prefix)`.
+ *marker*, if given, predicates `key > marker`, lexicographically.
+ *limit*, if given, predicates `len(keys) <= limit`.
+ """
+ mapping = (("prefix", prefix),
+ ("marker", marker),
+ ("max-keys", limit),
+ ("delimiter", delimiter))
+ args = dict((k, v) for (k, v) in mapping if v is not None)
+ response = self.make_request("GET", args=args)
+ buffer = ""
+ while True:
+ data = response.read(4096)
+ buffer += data
+ while True:
+ pos_end = buffer.find("</Contents>")
+ if pos_end == -1:
+ break
+ piece = buffer[buffer.index("<Contents>") + 10:pos_end]
+ buffer = buffer[pos_end + 10:]
+ info = piece[:piece.index("<Owner>")]
+ mo = self.listdir_re.match(info)
+ if not mo:
+ raise ValueError("unexpected: %r" % (piece,))
+ key, modify, etag, size = mo.groups()
+ # FIXME A little brittle I would say...
+ etag = etag.replace("&quot;", '"')
+ yield key, _iso8601_dt(modify), etag, int(size)
+ if not data:
+ break
+
+if __name__ == "__main__":
+ import doctest
+ doctest.testmod()
Please sign in to comment.
Something went wrong with that request. Please try again.