Skip to content

Commit

Permalink
Move S3 client retrieval to a method
Browse files Browse the repository at this point in the history
so to specify the endpoint more easily.

Some changes are not strictly necessary in test_s3utils.py but to
- use the client more consistently
- use lowercase for bucket names because GCS does not allow uppercase
  • Loading branch information
hsinfang committed May 22, 2020
1 parent 9b98482 commit da4c496
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 45 deletions.
7 changes: 4 additions & 3 deletions python/lsst/daf/butler/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import lsst.utils
from lsst.utils import doImport
from .location import ButlerURI
from .s3utils import getS3Client

yaml.add_representer(collections.defaultdict, Representer.represent_dict)

Expand Down Expand Up @@ -108,7 +109,7 @@ def extractFile(self, filename):
elif fileuri.scheme == "s3":
if boto3 is None:
raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
s3 = boto3.client("s3")
s3 = getS3Client()
try:
response = s3.get_object(Bucket=fileuri.netloc, Key=fileuri.relativeToPathRoot)
except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err:
Expand Down Expand Up @@ -266,7 +267,7 @@ def __initFromS3YamlFile(self, url):
"Are you sure it is installed?")

uri = ButlerURI(url)
s3 = boto3.client("s3")
s3 = getS3Client()
try:
response = s3.get_object(Bucket=uri.netloc, Key=uri.relativeToPathRoot)
except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err:
Expand Down Expand Up @@ -851,7 +852,7 @@ def dumpToS3File(self, uri, *, overwrite=True):
if uri.scheme != "s3":
raise ValueError(f"Must provide S3 URI not {uri}")

s3 = boto3.client("s3")
s3 = getS3Client()

if not overwrite:
from .s3utils import s3CheckFileExists
Expand Down
37 changes: 28 additions & 9 deletions python/lsst/daf/butler/core/s3utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = ("s3CheckFileExists", "bucketExists", "setAwsEnvCredentials",
__all__ = ("getS3Client", "s3CheckFileExists", "bucketExists", "setAwsEnvCredentials",
"unsetAwsEnvCredentials")

import os
Expand All @@ -32,6 +32,24 @@
from .location import ButlerURI, Location


def getS3Client():
"""Create S3 client with AWS (default) or GCS if a GCS access key
is found in the environment variable.
Returns
-------
s3client : `botocore.client.S3`
A client of the S3 service.
"""
if boto3 is None:
raise ModuleNotFoundError("Could not find boto3. "
"Are you sure it is installed?")

key = os.environ.get("AWS_ACCESS_KEY_ID", "")
endpoint = "https://storage.googleapis.com" if key.startswith("GOOG") else None
return boto3.client("s3", endpoint_url=endpoint)


def s3CheckFileExists(path, bucket=None, client=None):
"""Returns (True, filesize) if file exists in the bucket and (False, -1) if
the file is not found.
Expand Down Expand Up @@ -62,11 +80,11 @@ def s3CheckFileExists(path, bucket=None, client=None):
configuration.html#configuring-credentials
"""
if boto3 is None:
raise ModuleNotFoundError(("Could not find boto3. "
"Are you sure it is installed?"))
raise ModuleNotFoundError("Could not find boto3. "
"Are you sure it is installed?")

if client is None:
client = boto3.client('s3')
client = getS3Client()

if isinstance(path, str):
if bucket is not None:
Expand Down Expand Up @@ -120,14 +138,15 @@ def bucketExists(bucketName, client=None):
configuration.html#configuring-credentials
"""
if boto3 is None:
raise ModuleNotFoundError(("Could not find boto3. "
"Are you sure it is installed?"))
raise ModuleNotFoundError("Could not find boto3. "
"Are you sure it is installed?")

s3 = boto3.client("s3")
if client is None:
client = getS3Client()
try:
s3.get_bucket_location(Bucket=bucketName)
client.get_bucket_location(Bucket=bucketName)
return True
except s3.exceptions.NoSuchBucket:
except client.exceptions.NoSuchBucket:
return False


Expand Down
5 changes: 2 additions & 3 deletions python/lsst/daf/butler/datastores/s3Datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

__all__ = ("S3Datastore", )

import boto3
import logging
import os
import pathlib
Expand All @@ -48,7 +47,7 @@
)

from .fileLikeDatastore import FileLikeDatastore
from lsst.daf.butler.core.s3utils import s3CheckFileExists, bucketExists
from lsst.daf.butler.core.s3utils import getS3Client, s3CheckFileExists, bucketExists

if TYPE_CHECKING:
from .fileLikeDatastore import DatastoreFileGetInformation
Expand Down Expand Up @@ -91,7 +90,7 @@ def __init__(self, config: Union[DatastoreConfig, str],
bridgeManager: DatastoreRegistryBridgeManager, butlerRoot: str = None):
super().__init__(config, bridgeManager, butlerRoot)

self.client = boto3.client("s3")
self.client = getS3Client()
if not bucketExists(self.locationFactory.netloc):
# PosixDatastore creates the root directory if one does not exist.
# Calling s3 client.create_bucket is possible but also requires
Expand Down
50 changes: 20 additions & 30 deletions tests/test_s3utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

try:
import boto3
import botocore
from moto import mock_s3
except ImportError:
boto3 = None
Expand All @@ -33,7 +32,7 @@ def mock_s3(cls):
"""
return cls

from lsst.daf.butler.core.s3utils import (bucketExists, s3CheckFileExists,
from lsst.daf.butler.core.s3utils import (getS3Client, bucketExists, s3CheckFileExists,
setAwsEnvCredentials, unsetAwsEnvCredentials)
from lsst.daf.butler.core.location import Location, ButlerURI

Expand All @@ -43,50 +42,41 @@ def mock_s3(cls):
class S3UtilsTestCase(unittest.TestCase):
"""Test for the S3 related utilities.
"""
bucketName = "testBucketName"
bucketName = "test_bucket_name"
fileName = "testFileName"

def setUp(self):
# set up some fake credentials if they do not exist
self.usingDummyCredentials = setAwsEnvCredentials()

s3 = boto3.client("s3")
self.client = getS3Client()
try:
s3.create_bucket(Bucket=self.bucketName)
s3.put_object(Bucket=self.bucketName, Key=self.fileName,
Body=b"test content")
except s3.exceptions.BucketAlreadyExists:
self.client.create_bucket(Bucket=self.bucketName)
self.client.put_object(Bucket=self.bucketName, Key=self.fileName,
Body=b"test content")
except self.client.exceptions.BucketAlreadyExists:
pass

def tearDown(self):
s3 = boto3.resource('s3')
bucket = s3.Bucket(self.bucketName)
try:
bucket.objects.all().delete()
except botocore.exceptions.ClientError as err:
errorcode = err.response["ResponseMetadata"]["HTTPStatusCode"]
if errorcode == 404:
# the key does not exists - pass
pass
else:
raise

bucket = s3.Bucket(self.bucketName)
bucket.delete()
objects = self.client.list_objects(Bucket=self.bucketName)
if 'Contents' in objects:
for item in objects['Contents']:
self.client.delete_object(Bucket=self.bucketName, Key=item['Key'])

self.client.delete_bucket(Bucket=self.bucketName)

# unset any potentially set dummy credentials
if self.usingDummyCredentials:
unsetAwsEnvCredentials()

def testBucketExists(self):
self.assertTrue(bucketExists(f"{self.bucketName}"))
self.assertFalse(bucketExists(f"{self.bucketName}_NO_EXIST"))
self.assertFalse(bucketExists(f"{self.bucketName}_no_exist"))

def testFileExists(self):
s3 = boto3.client('s3')
self.assertTrue(s3CheckFileExists(client=s3, bucket=self.bucketName,
self.assertTrue(s3CheckFileExists(client=self.client, bucket=self.bucketName,
path=self.fileName)[0])
self.assertFalse(s3CheckFileExists(client=s3, bucket=self.bucketName,
self.assertFalse(s3CheckFileExists(client=self.client, bucket=self.bucketName,
path=self.fileName+"_NO_EXIST")[0])

datastoreRootUri = f"s3://{self.bucketName}/"
Expand All @@ -95,13 +85,13 @@ def testFileExists(self):
buri = ButlerURI(uri)
location = Location(datastoreRootUri, self.fileName)

self.assertTrue(s3CheckFileExists(client=s3, path=buri)[0])
self.assertTrue(s3CheckFileExists(client=self.client, path=buri)[0])
# just to make sure the overloaded keyword works correctly
self.assertTrue(s3CheckFileExists(buri, client=s3)[0])
self.assertTrue(s3CheckFileExists(client=s3, path=location)[0])
self.assertTrue(s3CheckFileExists(buri, client=self.client)[0])
self.assertTrue(s3CheckFileExists(client=self.client, path=location)[0])

# make sure supplying strings resolves correctly too
self.assertTrue(s3CheckFileExists(uri, client=s3))
self.assertTrue(s3CheckFileExists(uri, client=self.client))
self.assertTrue(s3CheckFileExists(uri))


Expand Down

0 comments on commit da4c496

Please sign in to comment.