Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨(backends) add support for the AWS S3 backend #213

Merged
merged 2 commits into from
Jan 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions .env.dist
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ RALPH_APP_DIR=/app/.ralph
# RALPH_BACKENDS__STORAGE__SWIFT__OS_REGION_NAME=RegionOne
# RALPH_BACKENDS__STORAGE__SWIFT__OS_STORAGE_URL=http://swift:8080/v1/KEY_cd238e84310a46e58af7f1d515887d88/test_container

# S3 storage backend

# RALPH_BACKENDS__STORAGE__S3__ACCESS_KEY_ID=
# RALPH_BACKENDS__STORAGE__S3__SECRET_ACCESS_KEY=
# RALPH_BACKENDS__STORAGE__S3__SESSION_TOKEN=
# RALPH_BACKENDS__STORAGE__S3__DEFAULT_REGION=
# RALPH_BACKENDS__STORAGE__S3__BUCKET_NAME=

# ES database backend

RALPH_BACKENDS__DATABASE__ES__HOSTS=http://elasticsearch:9200
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and this project adheres to

- Add a new `auth` subcommand to generate required credentials file for the LRS
- Add an official Helm Chart (experimental)
- Implement support for AWS S3 storage backend

### Changed

Expand Down
19 changes: 19 additions & 0 deletions docs/backends.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,25 @@ Secondary parameters are required to work with the target container:
- `os_tenant_name`: the name of the tenant of your container
- `os_tenant_id`: the identifier of the tenant of your container

### Amazon S3

S3 is the Amazon Simple Storage Service. This storage backend is fully
supported (read and write operations) to stream and store log archives.

#### Backend parameters

Primarily required parameters correspond to a standard authentication with AWS CLI:

- `access_key_id`: the access key for your AWS account
- `secret_access_key`: the secret key for your AWS account
- `session_token`: the session key for your AWS account (only needed when you are using
temporary credentials).

Secondary parameters are required to work with the target bucket:

- `default_region`: the region where your bucket is
- `bucket_name`: the name of your S3 bucket

### File system

The file system backend is a dummy template that can be used to develop your
Expand Down
7 changes: 6 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ backend-ldp =
requests>=2.0.0
backend-mongo =
pymongo[srv]>=4.0.0
backend-s3 =
boto3>=1.24.70
botocore>=1.27.71
backend-swift =
python-keystoneclient>=5.0.0
python-swiftclient>=4.0.0
Expand All @@ -68,6 +71,7 @@ dev =
mkdocs-click==0.8.0
mkdocs-material==8.5.11
mkdocstrings[python-legacy]==0.19.1
moto==4.0.3
pydocstyle==6.1.1
pyfakefs==5.0.0
pylint==2.15.9
Expand Down Expand Up @@ -115,7 +119,8 @@ exclude =

[pydocstyle]
convention = google
match = ^src/ralph.*\.py
match_dir = ^(?!tests).*
match = ^(?!(setup)\.(py)$).*\.(py)$

[isort]
known_ralph=ralph
Expand Down
19 changes: 13 additions & 6 deletions src/ralph/api/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,21 +49,28 @@ class UserCredentials(AuthenticatedUser):


class ServerUsersCredentials(BaseModel):
"""Custom root pydantic model describing expected list of all server users
credentials as stored in the credentials file."""
"""Custom root pydantic model.

Describes expected list of all server users credentials as stored in
the credentials file.

Attributes:
__root__ (List): Custom root consisting of the
list of all server users credentials.
"""

__root__: list[UserCredentials]

def __add__(self, other):
def __add__(self, other): # noqa: D105
return ServerUsersCredentials.parse_obj(self.__root__ + other.__root__)

def __getitem__(self, item: int):
def __getitem__(self, item: int): # noqa: D105
return self.__root__[item]

def __len__(self):
def __len__(self): # noqa: D105
return len(self.__root__)

def __iter__(self):
def __iter__(self): # noqa: D105
return iter(self.__root__)

@root_validator
Expand Down
145 changes: 145 additions & 0 deletions src/ralph/backends/storage/s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
"""S3 storage backend for Ralph."""

import logging

import boto3
from botocore.exceptions import ClientError, ParamValidationError

from ralph.conf import settings
from ralph.exceptions import BackendException, BackendParameterException
from ralph.utils import now

from ..mixins import HistoryMixin
from .base import BaseStorage

s3_settings = settings.BACKENDS.STORAGE.S3
logger = logging.getLogger(__name__)


class S3Storage(
HistoryMixin, BaseStorage
): # pylint: disable=too-many-instance-attributes
"""AWS S3 storage backend."""

name = "s3"

# pylint: disable=too-many-arguments

def __init__(
self,
access_key_id: str = s3_settings.ACCESS_KEY_ID,
secret_access_key: str = s3_settings.SECRET_ACCESS_KEY,
session_token: str = s3_settings.SESSION_TOKEN,
default_region: str = s3_settings.DEFAULT_REGION,
bucket_name: str = s3_settings.BUCKET_NAME,
):
"""Instantiates the AWS S3 client."""
self.access_key_id = access_key_id
jmaupetit marked this conversation as resolved.
Show resolved Hide resolved
self.secret_access_key = secret_access_key
self.session_token = session_token
self.default_region = default_region
self.bucket_name = bucket_name

self.client = boto3.client(
"s3",
aws_access_key_id=self.access_key_id,
aws_secret_access_key=self.secret_access_key,
aws_session_token=self.session_token,
region_name=self.default_region,
)

# Check whether bucket exists and is accessible
try:
self.client.head_bucket(Bucket=self.bucket_name)
except ClientError as err:
error_msg = err.response["Error"]["Message"]
msg = "Unable to connect to the requested bucket: %s"
logger.error(msg, error_msg)
raise BackendParameterException(msg % error_msg) from err

def list(self, details=False, new=False):
"""Lists archives in the storage backend."""
archives_to_skip = set()
if new:
archives_to_skip = set(self.get_command_history(self.name, "fetch"))

try:
paginator = self.client.get_paginator("list_objects_v2")
page_iterator = paginator.paginate(Bucket=self.bucket_name)
for archives in page_iterator:
if "Contents" not in archives:
continue
for archive in archives["Contents"]:
if new and archive["Key"] in archives_to_skip:
continue
if details:
archive["LastModified"] = archive["LastModified"].strftime(
"%Y-%m-%d %H:%M:%S"
)
yield archive
else:
yield archive["Key"]
except ClientError as err:
error_msg = err.response["Error"]["Message"]
msg = "Failed to list the bucket %s: %s"
logger.error(msg, self.bucket_name, error_msg)
raise BackendException(msg % (self.bucket_name, error_msg)) from err

def url(self, name):
"""Gets `name` file absolute URL."""
return f"{self.bucket_name}.s3.{self.default_region}.amazonaws.com/{name}"

def read(self, name, chunk_size: int = 4096):
"""Reads `name` file and yields its content by chunks of a given size."""
logger.debug("Getting archive: %s", name)

try:
obj = self.client.get_object(Bucket=self.bucket_name, Key=name)
except ClientError as err:
error_msg = err.response["Error"]["Message"]
msg = "Failed to download %s: %s"
logger.error(msg, name, error_msg)
raise BackendException(msg % (name, error_msg)) from err

size = 0
for chunk in obj["Body"].iter_chunks(chunk_size):
logger.debug("Chunk length %s", len(chunk))
size += len(chunk)
yield chunk

# Archive fetched, add a new entry to the history
self.append_to_history(
{
"backend": self.name,
"command": "fetch",
"id": name,
"size": size,
"fetched_at": now(),
}
)

def write(self, stream, name, overwrite=False):
"""Writes data from `stream` to the `name` target."""
if not overwrite and name in list(self.list()):
msg = "%s already exists and overwrite is not allowed"
logger.error(msg, name)
raise FileExistsError(msg % name)

logger.debug("Creating archive: %s", name)

try:
self.client.upload_fileobj(stream, self.bucket_name, name)
except (ClientError, ParamValidationError) as exc:
msg = "Failed to upload"
logger.error(msg)
raise BackendException(msg) from exc

# Archive written, add a new entry to the history
self.append_to_history(
{
"backend": self.name,
"command": "push",
"id": name,
jmaupetit marked this conversation as resolved.
Show resolved Hide resolved
"pushed_at": now(),
}
)
13 changes: 13 additions & 0 deletions src/ralph/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,25 @@ class SWIFTStorageBackendSettings(InstantiableSettingsItem):
OS_IDENTITY_API_VERSION: str = "3"


class S3StorageBackendSettings(InstantiableSettingsItem):
"""Represents the S3 storage backend configuration settings."""

_class_path: str = "ralph.backends.storage.s3.S3Storage"

ACCESS_KEY_ID: str = None
SECRET_ACCESS_KEY: str = None
SESSION_TOKEN: str = None
DEFAULT_REGION: str = None
BUCKET_NAME: str = None


class StorageBackendSettings(BaseModel):
"""Pydantic model for storage backend configuration settings."""

LDP: LDPStorageBackendSettings = LDPStorageBackendSettings()
FS: FSStorageBackendSettings = FSStorageBackendSettings()
SWIFT: SWIFTStorageBackendSettings = SWIFTStorageBackendSettings()
S3: S3StorageBackendSettings = S3StorageBackendSettings()


# Active storage backend Settings.
Expand Down