Skip to content

Commit

Permalink
Merge pull request #73 from lsst/tickets/DM-41879
Browse files Browse the repository at this point in the history
DM-41879: Implement S3 URL signing
  • Loading branch information
dhirving committed Dec 8, 2023
2 parents d2ae093 + 60ba628 commit a713f88
Show file tree
Hide file tree
Showing 12 changed files with 150 additions and 35 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12"]
python-version: ["3.11", "3.12"]

steps:
- uses: actions/checkout@v3
Expand Down Expand Up @@ -98,7 +98,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
python-version: "3.11"
cache: "pip"
cache-dependency-path: "setup.cfg"

Expand Down
1 change: 1 addition & 0 deletions doc/changes/DM-41879.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added methods to ResourcePath for generating presigned HTTP URLs: `generate_presigned_get_url` and `generate_presigned_put_url`. These are currently only implemented for S3.
3 changes: 3 additions & 0 deletions doc/changes/DM-41879.removal.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Deprecated ``clean_test_environment``, ``setAwsEnvCredentials``, and ``unsetAwsEnvCredentials`` from the ``s3utils`` submodule. The new function ``clean_test_environment_for_s3`` replaces these.

Dropped support for Python 3.10
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@ classifiers = [
"License :: OSI Approved :: BSD License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]
keywords = ["lsst"]
dependencies = [
"lsst-utils",
"importlib_resources",
"deprecated"
]
dynamic = ["version"]
requires-python = ">=3.10.0"
requires-python = ">=3.11.0"

[project.urls]
"Homepage" = "https://github.com/lsst/resources"
Expand Down
32 changes: 32 additions & 0 deletions python/lsst/resources/_resourcePath.py
Original file line number Diff line number Diff line change
Expand Up @@ -1428,6 +1428,38 @@ def _openImpl(self, mode: str = "r", *, encoding: str | None = None) -> Iterator
if "r" not in mode or "+" in mode:
self.write(out_bytes, overwrite=("x" not in mode))

def generate_presigned_get_url(self, *, expiration_time_seconds: int) -> str:
"""Return a pre-signed URL that can be used to retrieve this resource
using an HTTP GET without supplying any access credentials.
Parameters
----------
expiration_time_seconds : `int`
Number of seconds until the generated URL is no longer valid.
Returns
-------
url : `str`
HTTP URL signed for GET.
"""
raise NotImplementedError(f"URL signing is not supported for '{self.scheme}'")

def generate_presigned_put_url(self, *, expiration_time_seconds: int) -> str:
"""Return a pre-signed URL that can be used to upload a file to this
path using an HTTP PUT without supplying any access credentials.
Parameters
----------
expiration_time_seconds : `int`
Number of seconds until the generated URL is no longer valid.
Returns
-------
url : `str`
HTTP URL signed for PUT.
"""
raise NotImplementedError(f"URL signing is not supported for '{self.scheme}'")


ResourcePathExpression = str | urllib.parse.ParseResult | ResourcePath | Path
"""Type-annotation alias for objects that can be coerced to ResourcePath.
Expand Down
6 changes: 5 additions & 1 deletion python/lsst/resources/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -920,7 +920,11 @@ def write(self, data: bytes, overwrite: bool = True) -> None:
raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled")

# Ensure the parent directory exists.
self.parent().mkdir()
# This is only meaningful and appropriate for WebDAV, not the general
# HTTP case. e.g. for S3 HTTP URLs, the underlying service has no
# concept of 'directories' at all.
if self.is_webdav_endpoint:
self.parent().mkdir()

# Upload the data.
log.debug("Writing data to remote resource: %s", self.geturl())
Expand Down
15 changes: 15 additions & 0 deletions python/lsst/resources/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,3 +430,18 @@ def _openImpl(
# BytesIO in the inheritance tree
with io.TextIOWrapper(cast(io.BytesIO, handle), encoding=encoding, write_through=True) as sub:
yield sub

def generate_presigned_get_url(self, *, expiration_time_seconds: int) -> str:
# Docstring inherited
return self._generate_presigned_url("get_object", expiration_time_seconds)

def generate_presigned_put_url(self, *, expiration_time_seconds: int) -> str:
# Docstring inherited
return self._generate_presigned_url("put_object", expiration_time_seconds)

def _generate_presigned_url(self, method: str, expiration_time_seconds: int) -> str:
return self.client.generate_presigned_url(
method,
Params={"Bucket": self.netloc, "Key": self.relativeToPathRoot},
ExpiresIn=expiration_time_seconds,
)
55 changes: 54 additions & 1 deletion python/lsst/resources/s3utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,28 @@
"retryable_io_errors",
"retryable_client_errors",
"_TooManyRequestsError",
"clean_test_environment_for_s3",
)

import functools
import os
import re
from collections.abc import Callable
from collections.abc import Callable, Iterator
from contextlib import contextmanager
from http.client import HTTPException, ImproperConnectionState
from types import ModuleType
from typing import TYPE_CHECKING, Any, cast
from unittest.mock import patch

from botocore.exceptions import ClientError
from botocore.handlers import validate_bucket_name
from deprecated.sphinx import deprecated
from urllib3.exceptions import HTTPError, RequestError

if TYPE_CHECKING:
from unittest import TestCase


try:
import boto3
except ImportError:
Expand Down Expand Up @@ -117,6 +122,12 @@ class _TooManyRequestsError(Exception):
max_retry_time = 60


@deprecated(
reason="This has been replaced by a new function, clean_test_environment_for_s3()."
" Will be removed after v26.2023.5000",
version="26.2023.5000",
category=FutureWarning,
)
def clean_test_environment(testcase: TestCase) -> None:
"""Clear S3_ENDPOINT_URL then restore it at the end of a test.
Expand All @@ -138,6 +149,35 @@ def cleanup() -> None:
testcase.addCleanup(cleanup)


@contextmanager
def clean_test_environment_for_s3() -> Iterator[None]:
"""Reset S3 environment to ensure that unit tests with a mock S3 can't
accidentally reference real infrastructure
"""
with patch.dict(
os.environ,
{
"AWS_ACCESS_KEY_ID": "test-access-key",
"AWS_SECRET_ACCESS_KEY": "test-secret-access-key",
},
) as patched_environ:
for var in (
"S3_ENDPOINT_URL",
"AWS_SECURITY_TOKEN",
"AWS_SESSION_TOKEN",
"AWS_PROFILE",
"AWS_SHARED_CREDENTIALS_FILE",
"AWS_CONFIG_FILE",
):
patched_environ.pop(var, None)
# Clear the cached boto3 S3 client instances.
# This helps us avoid a potential situation where the client could be
# instantiated before moto mocks are installed, which would prevent the
# mocks from taking effect.
_get_s3_client.cache_clear()
yield


def getS3Client() -> boto3.client:
"""Create a S3 client with AWS (default) or the specified endpoint.
Expand Down Expand Up @@ -292,6 +332,13 @@ def bucketExists(bucketName: str, client: boto3.client | None = None) -> bool:
return False


@deprecated(
reason="This function could accidentally leave real credentials in the environment during testing."
" A new function, clean_test_environment_for_s3(), can be used to set up mock credentials."
" Will be removed after v26.2023.5000",
version="26.2023.5000",
category=FutureWarning,
)
def setAwsEnvCredentials(
accessKeyId: str = "dummyAccessKeyId", secretAccessKey: str = "dummySecretAccessKey"
) -> bool:
Expand Down Expand Up @@ -323,6 +370,12 @@ def setAwsEnvCredentials(
return False


@deprecated(
reason="This has been replaced by a new function, clean_test_environment_for_s3()."
" Will be removed after v26.2023.5000",
version="26.2023.5000",
category=FutureWarning,
)
def unsetAwsEnvCredentials() -> None:
"""Unset AWS credential environment variables.
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ responses >= 0.12.0
urllib3 >= 1.25.10
requests >= 2.26.0
defusedxml
deprecated
37 changes: 31 additions & 6 deletions tests/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
# Use of this source code is governed by a 3-clause BSD-style
# license that can be found in the LICENSE file.

import time
import unittest
from urllib.parse import parse_qs, urlparse

from lsst.resources import ResourcePath
from lsst.resources.s3utils import clean_test_environment
from lsst.resources.s3utils import clean_test_environment_for_s3
from lsst.resources.tests import GenericReadWriteTestCase, GenericTestCase

try:
Expand Down Expand Up @@ -45,14 +47,10 @@ class S3ReadWriteTestCase(GenericReadWriteTestCase, unittest.TestCase):
"""The mocked s3 interface from moto."""

def setUp(self):
self.enterContext(clean_test_environment_for_s3())
# Enable S3 mocking of tests.
self.mock_s3.start()

clean_test_environment(self)

# set up some fake credentials if they do not exist
# self.usingDummyCredentials = setAwsEnvCredentials()

# MOTO needs to know that we expect Bucket bucketname to exist
s3 = boto3.resource("s3")
s3.create_bucket(Bucket=self.netloc)
Expand Down Expand Up @@ -139,6 +137,33 @@ def test_handle(self):
result = handle.read(1024)
self.assertEqual(result, 1024 * b"a")

def test_url_signing(self):
s3_path = self.root_uri.join("url-signing-test.txt")

put_url = s3_path.generate_presigned_put_url(expiration_time_seconds=1800)
self._check_presigned_url(put_url, 1800)
get_url = s3_path.generate_presigned_get_url(expiration_time_seconds=3600)
self._check_presigned_url(get_url, 3600)

# Moto monkeypatches the 'requests' library to mock access to presigned
# URLs, so we are able to use HttpResourcePath to access the URLs in
# this test
test_data = b"test123"
ResourcePath(put_url).write(test_data)
retrieved = ResourcePath(get_url).read()
self.assertEqual(retrieved, test_data)

def _check_presigned_url(self, url: str, expiration_time_seconds: int):
parsed = urlparse(url)
self.assertEqual(parsed.scheme, "https")

actual_expiration_timestamp = int(parse_qs(parsed.query)["Expires"][0])
current_time = int(time.time())
expected_expiration_timestamp = current_time + expiration_time_seconds
# Allow some flex in the expiration time in case this test process goes
# out to lunch for a while on a busy CI machine
self.assertLessEqual(abs(expected_expiration_timestamp - actual_expiration_timestamp), 120)


if __name__ == "__main__":
unittest.main()
26 changes: 3 additions & 23 deletions tests/test_s3utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,44 +23,28 @@
import unittest
from unittest import mock

from lsst.resources.s3utils import clean_test_environment

try:
import boto3
from botocore.exceptions import ParamValidationError
from moto import mock_s3
except ImportError:
boto3 = None

def mock_s3(cls):
"""No-op decorator in case moto mock_s3 can not be imported."""
return cls


from lsst.resources import ResourcePath
from lsst.resources.location import Location
from lsst.resources.s3utils import (
bucketExists,
getS3Client,
s3CheckFileExists,
setAwsEnvCredentials,
unsetAwsEnvCredentials,
)
from lsst.resources.s3utils import bucketExists, clean_test_environment_for_s3, getS3Client, s3CheckFileExists


@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
@mock_s3
class S3UtilsTestCase(unittest.TestCase):
"""Test for the S3 related utilities."""

bucketName = "test_bucket_name"
fileName = "testFileName"

def setUp(self):
# set up some fake credentials if they do not exist
self.usingDummyCredentials = setAwsEnvCredentials()

clean_test_environment(self)
self.enterContext(clean_test_environment_for_s3())
self.enterContext(mock_s3())

self.client = getS3Client()
try:
Expand All @@ -77,10 +61,6 @@ def tearDown(self):

self.client.delete_bucket(Bucket=self.bucketName)

# unset any potentially set dummy credentials
if self.usingDummyCredentials:
unsetAwsEnvCredentials()

def testBucketExists(self):
self.assertTrue(bucketExists(f"{self.bucketName}"))
self.assertFalse(bucketExists(f"{self.bucketName}_no_exist"))
Expand Down
1 change: 1 addition & 0 deletions types.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
types-Deprecated
types-requests
types-setuptools
types-urllib3
Expand Down

0 comments on commit a713f88

Please sign in to comment.