forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 2
/
s3.py
49 lines (39 loc) · 1.63 KB
/
s3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
""" s3 support for remote file interactivity """
from typing import IO, Any, Optional, Tuple
from urllib.parse import urlparse as parse_url
from pandas._typing import FilePathOrBuffer
from pandas.compat._optional import import_optional_dependency
s3fs = import_optional_dependency(
"s3fs", extra="The s3fs package is required to handle s3 files."
)
def _strip_schema(url):
"""Returns the url without the s3:// part"""
result = parse_url(url, allow_fragments=False)
return result.netloc + result.path
def get_file_and_filesystem(
filepath_or_buffer: FilePathOrBuffer, mode: Optional[str] = None
) -> Tuple[IO, Any]:
from botocore.exceptions import NoCredentialsError
if mode is None:
mode = "rb"
fs = s3fs.S3FileSystem(anon=False)
try:
file = fs.open(_strip_schema(filepath_or_buffer), mode)
except (FileNotFoundError, NoCredentialsError):
# boto3 has troubles when trying to access a public file
# when credentialed...
# An OSError is raised if you have credentials, but they
# aren't valid for that bucket.
# A NoCredentialsError is raised if you don't have creds
# for that bucket.
fs = s3fs.S3FileSystem(anon=True)
file = fs.open(_strip_schema(filepath_or_buffer), mode)
return file, fs
def get_filepath_or_buffer(
filepath_or_buffer: FilePathOrBuffer,
encoding: Optional[str] = None,
compression: Optional[str] = None,
mode: Optional[str] = None,
) -> Tuple[IO, Optional[str], Optional[str], bool]:
file, _fs = get_file_and_filesystem(filepath_or_buffer, mode=mode)
return file, None, compression, True