Skip to content

Commit

Permalink
✨(backends) add support for AWS S3
Browse files Browse the repository at this point in the history
Implement s3 backend using low-level interface client from the boto3 library
  • Loading branch information
wilbrdt committed Sep 16, 2022
1 parent f9e10e6 commit cc26794
Show file tree
Hide file tree
Showing 10 changed files with 595 additions and 4 deletions.
8 changes: 8 additions & 0 deletions .env.dist
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ RALPH_APP_DIR=/app/.ralph
# RALPH_BACKENDS__STORAGE__SWIFT__OS_REGION_NAME=RegionOne
# RALPH_BACKENDS__STORAGE__SWIFT__OS_STORAGE_URL=http://swift:8080/v1/KEY_cd238e84310a46e58af7f1d515887d88/test_container

# S3 storage backend

# RALPH_BACKENDS__STORAGE__S3__ACCESS_KEY_ID=
# RALPH_BACKENDS__STORAGE__S3__SECRET_ACCESS_KEY=
# RALPH_BACKENDS__STORAGE__S3__SESSION_TOKEN=
# RALPH_BACKENDS__STORAGE__S3__DEFAULT_REGION=
# RALPH_BACKENDS__STORAGE__S3__BUCKET_NAME=

# ES database backend

# RALPH_BACKENDS__DATABASE__ES__HOSTS=http://elasticsearch:9200
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ and this project adheres to
- Add `host` and `port` options for the `runserver` cli command
- Add support for database selection when running the Ralph LRS server
- Implement support for xAPI statement forwarding
- Implement support for AWS S3 storage backend

### Changed

Expand Down
18 changes: 18 additions & 0 deletions docs/backends.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,24 @@ Secondary parameters are required to work with the target container:
- `os_tenant_name`: the name of the tenant of your container
- `os_tenant_id`: the identifier of the tenant of your container

### Amazon S3

S3 is the Amazon Simple Storage Service. This storage backend is fully
supported (read and write operations) to stream and store log archives.

#### Backend parameters

Primarily required parameters correspond to a standard authentication with AWS CLI:

- `access_key_id`: the access key for your AWS account
- `secret_access_key`: the secret key for your AWS account
- `session_token`: the session key for your AWS account (only needed when you are using temporary credentials).

Secondary parameters are required to work with the target container:

- `default_region`: the region where your container is
- `bucket_name`: the name of your S3 bucket

### File system

The file system backend is a dummy template that can be used to develop your
Expand Down
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ classifiers =
[options]
include_package_data = True
install_requires =
boto3==1.24.70
botocore==1.27.71
bcrypt==3.2.2
click==8.1.3
click-option-group==0.5.3
Expand Down Expand Up @@ -72,6 +74,7 @@ dev =
mkdocs-click==0.8.0
mkdocs-material==8.4.1
mkdocstrings[python-legacy]==0.19.0
moto==4.0.3
pyfakefs==4.5.6
pylint==2.14.5
pytest==7.1.2
Expand Down
150 changes: 150 additions & 0 deletions src/ralph/backends/storage/s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
"""S3 storage backend for Ralph"""

import logging

import boto3
from botocore.exceptions import ClientError, ParamValidationError

from ralph.conf import settings
from ralph.exceptions import BackendException, BackendParameterException
from ralph.utils import now

from ..mixins import HistoryMixin
from .base import BaseStorage

s3_settings = settings.BACKENDS.STORAGE.S3
logger = logging.getLogger(__name__)


class S3Storage(
HistoryMixin, BaseStorage
): # pylint: disable=too-many-instance-attributes
"""AWS S3 storage backend."""

name = "s3"

# pylint: disable=too-many-arguments

def __init__(
self,
access_key_id: str = s3_settings.ACCESS_KEY_ID,
secret_access_key: str = s3_settings.SECRET_ACCESS_KEY,
session_token: str = s3_settings.SESSION_TOKEN,
default_region: str = s3_settings.DEFAULT_REGION,
bucket_name: str = s3_settings.BUCKET_NAME,
):
"""Instantiates the AWS S3 client."""

self.access_key_id = access_key_id
self.secret_access_key = secret_access_key
self.session_token = session_token
self.default_region = default_region
self.bucket_name = bucket_name

self.client = boto3.client(
"s3",
aws_access_key_id=self.access_key_id,
aws_secret_access_key=self.secret_access_key,
aws_session_token=self.session_token,
region_name=self.default_region,
)

# Check whether bucket exists and is accessible
try:
self.client.head_bucket(Bucket=self.bucket_name)
except ClientError as err:
error_msg = err.response["Error"]["Message"]
msg = "Unable to connect to the requested bucket: %s"
logger.error(msg, error_msg)
raise BackendParameterException(msg % error_msg) from err

def list(self, details=False, new=False):
"""Lists archives in the storage backend."""

archives_to_skip = set()
if new:
archives_to_skip = set(self.get_command_history(self.name, "fetch"))

try:
paginator = self.client.get_paginator("list_objects_v2")
page_iterator = paginator.paginate(Bucket=self.bucket_name)
for archives in page_iterator:
if "Contents" not in archives:
continue
for archive in archives["Contents"]:
if new and archive["Key"] in archives_to_skip:
continue
if details:
archive["LastModified"] = archive["LastModified"].strftime(
"%Y-%m-%d %H:%M:%S"
)
yield archive
else:
yield archive["Key"]
except ClientError as err:
error_msg = err.response["Error"]["Message"]
msg = "Failed to list the bucket %s: %s"
logger.error(msg, self.bucket_name, error_msg)
raise BackendException(msg % (self.bucket_name, error_msg)) from err

def url(self, name):
"""Gets `name` file absolute URL."""

return f"{self.bucket_name}.s3.{self.default_region}.amazonaws.com/{name}"

def read(self, name, chunk_size: int = 4096):
"""Reads `name` file and yields its content by chunks of a given size."""

logger.debug("Getting archive: %s", name)

try:
obj = self.client.get_object(Bucket=self.bucket_name, Key=name)
except ClientError as err:
error_msg = err.response["Error"]["Message"]
msg = "Failed to download %s: %s"
logger.error(msg, name, error_msg)
raise BackendException(msg % (name, error_msg)) from err

size = 0
for chunk in obj["Body"].iter_chunks(chunk_size):
logger.debug("Chunk length %s", len(chunk))
size += len(chunk)
yield chunk

# Archive fetched, add a new entry to the history
self.append_to_history(
{
"backend": self.name,
"command": "fetch",
"id": name,
"size": size,
"fetched_at": now(),
}
)

def write(self, stream, name, overwrite=False):
"""Writes data from `stream` to the `name` target."""

if not overwrite and name in list(self.list()):
msg = "%s already exists and overwrite is not allowed"
logger.error(msg, name)
raise FileExistsError(msg % name)

logger.debug("Creating archive: %s", name)

try:
self.client.upload_fileobj(stream, self.bucket_name, name)
except (ClientError, ParamValidationError) as exc:
msg = "Failed to upload"
logger.error(msg)
raise BackendException(msg) from exc

# Archive written, add a new entry to the history
self.append_to_history(
{
"backend": self.name,
"command": "push",
"id": name,
"pushed_at": now(),
}
)
13 changes: 13 additions & 0 deletions src/ralph/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,25 @@ class SWIFTStorageBackendSettings(InstantiableSettingsItem):
OS_IDENTITY_API_VERSION: str = "3"


class S3StorageBackendSettings(InstantiableSettingsItem):
"""Represents the S3 storage backend configuration settings."""

_class_path: str = "ralph.backends.storage.s3.S3Storage"

ACCESS_KEY_ID: str = None
SECRET_ACCESS_KEY: str = None
SESSION_TOKEN: str = None
DEFAULT_REGION: str = None
BUCKET_NAME: str = None


class StorageBackendSettings(BaseModel):
"""Represents storage backend configuration settings."""

LDP: LDPStorageBackendSettings = LDPStorageBackendSettings()
FS: FSStorageBackendSettings = FSStorageBackendSettings()
SWIFT: SWIFTStorageBackendSettings = SWIFTStorageBackendSettings()
S3: S3StorageBackendSettings = S3StorageBackendSettings()


# Active storage backend Settings.
Expand Down
Loading

0 comments on commit cc26794

Please sign in to comment.