-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
s3_handler.py
92 lines (69 loc) · 3.02 KB
/
s3_handler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import urllib.error
import urllib.request
import urllib.response
from io import BufferedReader, IOBase
import spack.util.s3 as s3_util
import spack.util.url as url_util
# NOTE(opadron): Workaround issue in boto where its StreamingBody
# implementation is missing several APIs expected from IOBase. These missing
# APIs prevent the streams returned by boto from being passed as-are along to
# urllib.
#
# https://github.com/boto/botocore/issues/879
# https://github.com/python/cpython/pull/3249
class WrapStream(BufferedReader):
def __init__(self, raw):
# In botocore >=1.23.47, StreamingBody inherits from IOBase, so we
# only add missing attributes in older versions.
# https://github.com/boto/botocore/commit/a624815eabac50442ed7404f3c4f2664cd0aa784
if not isinstance(raw, IOBase):
raw.readable = lambda: True
raw.writable = lambda: False
raw.seekable = lambda: False
raw.closed = False
raw.flush = lambda: None
super(WrapStream, self).__init__(raw)
def detach(self):
self.raw = None
def read(self, *args, **kwargs):
return self.raw.read(*args, **kwargs)
def __getattr__(self, key):
return getattr(self.raw, key)
def _s3_open(url):
parsed = url_util.parse(url)
s3 = s3_util.create_s3_session(parsed, connection=s3_util.get_mirror_connection(parsed))
bucket = parsed.netloc
key = parsed.path
if key.startswith("/"):
key = key[1:]
obj = s3.get_object(Bucket=bucket, Key=key)
# NOTE(opadron): Apply workaround here (see above)
stream = WrapStream(obj["Body"])
headers = obj["ResponseMetadata"]["HTTPHeaders"]
return url, headers, stream
class UrllibS3Handler(urllib.request.HTTPSHandler):
def s3_open(self, req):
orig_url = req.get_full_url()
from botocore.exceptions import ClientError # type: ignore[import]
try:
url, headers, stream = _s3_open(orig_url)
return urllib.response.addinfourl(stream, headers, url)
except ClientError as err:
# if no such [KEY], but [KEY]/index.html exists,
# return that, instead.
if err.response["Error"]["Code"] == "NoSuchKey":
try:
_, headers, stream = _s3_open(url_util.join(orig_url, "index.html"))
return urllib.response.addinfourl(stream, headers, orig_url)
except ClientError as err2:
if err.response["Error"]["Code"] == "NoSuchKey":
# raise original error
raise urllib.error.URLError(err) from err
raise urllib.error.URLError(err2) from err2
raise urllib.error.URLError(err) from err
S3OpenerDirector = urllib.request.build_opener(UrllibS3Handler())
open = S3OpenerDirector.open