-
-
Notifications
You must be signed in to change notification settings - Fork 17.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
closes #11915 Author: Tom Augspurger <tom.augspurger88@gmail.com> Closes #13137 from TomAugspurger/s3fs and squashes the following commits: 92ac063 [Tom Augspurger] CI: Update deps, docs 81690b5 [Tom Augspurger] COMPAT/REF: Use s3fs for s3 IO
- Loading branch information
1 parent
8c798c0
commit dc4b070
Showing
14 changed files
with
72 additions
and
120 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,7 @@ sqlalchemy | |
lxml=3.2.1 | ||
scipy | ||
xlsxwriter | ||
boto | ||
s3fs | ||
bottleneck | ||
html5lib | ||
beautiful-soup | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,7 @@ sqlalchemy=0.9.6 | |
lxml=3.2.1 | ||
scipy | ||
xlsxwriter=0.4.6 | ||
boto=2.36.0 | ||
s3fs | ||
bottleneck | ||
psycopg2=2.5.2 | ||
patsy | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,7 @@ numexpr | |
pytables | ||
sqlalchemy | ||
lxml | ||
boto | ||
s3fs | ||
bottleneck | ||
psycopg2 | ||
pymysql | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,7 @@ sqlalchemy | |
pymysql | ||
psycopg2 | ||
xarray | ||
boto | ||
s3fs | ||
|
||
# incompat with conda ATM | ||
# beautiful-soup |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,7 +12,7 @@ matplotlib | |
jinja2 | ||
bottleneck | ||
xarray | ||
boto | ||
s3fs | ||
|
||
# incompat with conda ATM | ||
# beautiful-soup |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,110 +1,35 @@ | ||
""" s3 support for remote file interactivity """ | ||
|
||
import os | ||
from pandas import compat | ||
from pandas.compat import BytesIO | ||
|
||
try: | ||
import boto | ||
from boto.s3 import key | ||
import s3fs | ||
from botocore.exceptions import NoCredentialsError | ||
except: | ||
raise ImportError("boto is required to handle s3 files") | ||
raise ImportError("The s3fs library is required to handle s3 files") | ||
|
||
if compat.PY3: | ||
from urllib.parse import urlparse as parse_url | ||
else: | ||
from urlparse import urlparse as parse_url | ||
|
||
|
||
class BotoFileLikeReader(key.Key): | ||
"""boto Key modified to be more file-like | ||
This modification of the boto Key will read through a supplied | ||
S3 key once, then stop. The unmodified boto Key object will repeatedly | ||
cycle through a file in S3: after reaching the end of the file, | ||
boto will close the file. Then the next call to `read` or `next` will | ||
re-open the file and start reading from the beginning. | ||
Also adds a `readline` function which will split the returned | ||
values by the `\n` character. | ||
""" | ||
|
||
def __init__(self, *args, **kwargs): | ||
encoding = kwargs.pop("encoding", None) # Python 2 compat | ||
super(BotoFileLikeReader, self).__init__(*args, **kwargs) | ||
# Add a flag to mark the end of the read. | ||
self.finished_read = False | ||
self.buffer = "" | ||
self.lines = [] | ||
if encoding is None and compat.PY3: | ||
encoding = "utf-8" | ||
self.encoding = encoding | ||
self.lines = [] | ||
|
||
def next(self): | ||
return self.readline() | ||
|
||
__next__ = next | ||
|
||
def read(self, *args, **kwargs): | ||
if self.finished_read: | ||
return b'' if compat.PY3 else '' | ||
return super(BotoFileLikeReader, self).read(*args, **kwargs) | ||
|
||
def close(self, *args, **kwargs): | ||
self.finished_read = True | ||
return super(BotoFileLikeReader, self).close(*args, **kwargs) | ||
|
||
def seekable(self): | ||
"""Needed for reading by bz2""" | ||
return False | ||
|
||
def readline(self): | ||
"""Split the contents of the Key by '\n' characters.""" | ||
if self.lines: | ||
retval = self.lines[0] | ||
self.lines = self.lines[1:] | ||
return retval | ||
if self.finished_read: | ||
if self.buffer: | ||
retval, self.buffer = self.buffer, "" | ||
return retval | ||
else: | ||
raise StopIteration | ||
|
||
if self.encoding: | ||
self.buffer = "{}{}".format( | ||
self.buffer, self.read(8192).decode(self.encoding)) | ||
else: | ||
self.buffer = "{}{}".format(self.buffer, self.read(8192)) | ||
|
||
split_buffer = self.buffer.split("\n") | ||
self.lines.extend(split_buffer[:-1]) | ||
self.buffer = split_buffer[-1] | ||
|
||
return self.readline() | ||
def _strip_schema(url): | ||
"""Returns the url without the s3:// part""" | ||
result = parse_url(url) | ||
return result.netloc + result.path | ||
|
||
|
||
def get_filepath_or_buffer(filepath_or_buffer, encoding=None, | ||
compression=None): | ||
|
||
# Assuming AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_S3_HOST | ||
# are environment variables | ||
parsed_url = parse_url(filepath_or_buffer) | ||
s3_host = os.environ.get('AWS_S3_HOST', 's3.amazonaws.com') | ||
|
||
fs = s3fs.S3FileSystem(anon=False) | ||
try: | ||
conn = boto.connect_s3(host=s3_host) | ||
except boto.exception.NoAuthHandlerFound: | ||
conn = boto.connect_s3(host=s3_host, anon=True) | ||
|
||
b = conn.get_bucket(parsed_url.netloc, validate=False) | ||
if compat.PY2 and compression: | ||
k = boto.s3.key.Key(b, parsed_url.path) | ||
filepath_or_buffer = BytesIO(k.get_contents_as_string( | ||
encoding=encoding)) | ||
else: | ||
k = BotoFileLikeReader(b, parsed_url.path, encoding=encoding) | ||
k.open('r') # Expose read errors immediately | ||
filepath_or_buffer = k | ||
filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer)) | ||
except (OSError, NoCredentialsError): | ||
# boto3 has troubles when trying to access a public file | ||
# when credentialed... | ||
# An OSError is raised if you have credentials, but they | ||
# aren't valid for that bucket. | ||
# A NoCredentialsError is raised if you don't have creds | ||
# for that bucket. | ||
fs = s3fs.S3FileSystem(anon=True) | ||
filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer)) | ||
return filepath_or_buffer, None, compression |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters