Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v22.3.x] cloud_storage: Extend tags in AWS S3 #7668

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions src/v/archival/ntp_archiver_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,9 @@ ss::future<cloud_storage::upload_result> ntp_archiver::upload_manifest() {
ctxlog.debug,
"Uploading manifest, path: {}",
manifest().get_manifest_path());
co_return co_await _remote.upload_manifest(_bucket, manifest(), fib);
auto tags = cloud_storage::remote::get_manifest_tags(_ntp, _rev);
co_return co_await _remote.upload_manifest(
_bucket, manifest(), fib, std::move(tags));
}

remote_segment_path
Expand Down Expand Up @@ -528,13 +530,15 @@ ntp_archiver::upload_segment(upload_candidate candidate) {
_io_priority));
};

auto tags = cloud_storage::remote::get_segment_tags(_ntp, _rev);
co_return co_await _remote.upload_segment(
_bucket,
path,
candidate.content_length,
reset_func,
fib,
lazy_abort_source);
lazy_abort_source,
std::move(tags));
}

ss::future<cloud_storage::upload_result>
Expand All @@ -561,7 +565,13 @@ ntp_archiver::upload_tx(upload_candidate candidate) {

cloud_storage::tx_range_manifest manifest(path, tx_range);

co_return co_await _remote.upload_manifest(_bucket, manifest, fib);
// Note: tx-manifest is uploaded using 'remote::upload_manifest'
// method but it has the same tags as the segment because it has
// the same lifetime as corresponding segment and associated with
// the segment.
auto tags = cloud_storage::remote::get_segment_tags(_ntp, _rev);
co_return co_await _remote.upload_manifest(
_bucket, manifest, fib, std::move(tags));
}

ss::future<ntp_archiver::scheduled_upload>
Expand Down
3 changes: 2 additions & 1 deletion src/v/archival/service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,9 @@ ss::future<> scheduler_service_impl::upload_topic_manifest(
rev);
auto key = tm.get_manifest_path();
vlog(ctxlog.debug, "Topic manifest object key is '{}'", key);
auto tags = cloud_storage::remote::get_manifest_tags(topic_ns, rev);
auto res = co_await _remote.local().upload_manifest(
_conf.bucket_name, tm, fib);
_conf.bucket_name, tm, fib, std::move(tags));
uploaded = res == cloud_storage::upload_result::success;
if (!uploaded) {
vlog(ctxlog.warn, "Topic manifest upload timed out: {}", key);
Expand Down
47 changes: 43 additions & 4 deletions src/v/cloud_storage/remote.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "cloud_storage/materialized_segments.h"
#include "cloud_storage/remote_segment.h"
#include "cloud_storage/types.h"
#include "model/metadata.h"
#include "net/connection.h"
#include "s3/client.h"
#include "ssx/sformat.h"
Expand Down Expand Up @@ -310,13 +311,13 @@ ss::future<download_result> remote::do_download_manifest(
ss::future<upload_result> remote::upload_manifest(
const s3::bucket_name& bucket,
const base_manifest& manifest,
retry_chain_node& parent) {
retry_chain_node& parent,
std::vector<s3::object_tag> tags) {
gate_guard guard{_gate};
retry_chain_node fib(&parent);
retry_chain_logger ctxlog(cst_log, fib);
auto key = manifest.get_manifest_path();
auto path = s3::object_key(key());
std::vector<s3::object_tag> tags = {{"rp-type", "partition-manifest"}};
auto lease = co_await _pool.acquire();
auto permit = fib.retry();
vlog(ctxlog.debug, "Uploading manifest {} to the {}", path, bucket());
Expand Down Expand Up @@ -394,11 +395,11 @@ ss::future<upload_result> remote::upload_segment(
uint64_t content_length,
const reset_input_stream& reset_str,
retry_chain_node& parent,
lazy_abort_source& lazy_abort_source) {
lazy_abort_source& lazy_abort_source,
std::vector<s3::object_tag> tags) {
gate_guard guard{_gate};
retry_chain_node fib(&parent);
retry_chain_logger ctxlog(cst_log, fib);
std::vector<s3::object_tag> tags = {{"rp-type", "segment"}};
auto permit = fib.retry();
vlog(
ctxlog.debug,
Expand Down Expand Up @@ -793,4 +794,42 @@ cloud_roles::credentials auth_refresh_bg_op::build_static_credentials() const {
_region_name};
}

std::vector<s3::object_tag> remote::get_manifest_tags(
const model::ntp& ntp, model::initial_revision_id rev) {
auto tags = default_partition_manifest_tags;
tags.push_back({.key = "rp-ns", .value = ntp.ns()});
tags.push_back({.key = "rp-topic", .value = ntp.tp.topic()});
tags.push_back(
{.key = "rp-part", .value = ssx::sformat("{}", ntp.tp.partition())});
tags.push_back({.key = "rp-rev", .value = ssx::sformat("{}", rev)});
return tags;
}

std::vector<s3::object_tag> remote::get_manifest_tags(
const model::topic_namespace& tns, model::initial_revision_id rev) {
auto tags = default_topic_manifest_tags;
tags.push_back({.key = "rp-ns", .value = tns.ns()});
tags.push_back({.key = "rp-topic", .value = tns.tp()});
tags.push_back({.key = "rp-rev", .value = ssx::sformat("{}", rev)});
return tags;
}

std::vector<s3::object_tag> remote::get_segment_tags(
const model::ntp& ntp, model::initial_revision_id rev) {
auto tags = default_segment_tags;
tags.push_back({.key = "rp-ns", .value = ntp.ns()});
tags.push_back({.key = "rp-topic", .value = ntp.tp.topic()});
tags.push_back(
{.key = "rp-part", .value = ssx::sformat("{}", ntp.tp.partition())});
tags.push_back({.key = "rp-rev", .value = ssx::sformat("{}", rev)});
return tags;
}

const std::vector<s3::object_tag> remote::default_segment_tags = {
{"rp-type", "segment"}};
const std::vector<s3::object_tag> remote::default_topic_manifest_tags = {
{"rp-type", "topic-manifest"}};
const std::vector<s3::object_tag> remote::default_partition_manifest_tags = {
{"rp-type", "partition-manifest"}};

} // namespace cloud_storage
22 changes: 20 additions & 2 deletions src/v/cloud_storage/remote.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "cloud_storage/fwd.h"
#include "cloud_storage/probe.h"
#include "cloud_storage/types.h"
#include "model/metadata.h"
#include "random/simple_time_jitter.h"
#include "s3/client.h"
#include "storage/segment_reader.h"
Expand Down Expand Up @@ -105,6 +106,11 @@ class auth_refresh_bg_op {
/// things like reconnects, backpressure and backoff.
class remote : public ss::peering_sharded_service<remote> {
public:
/// Default tags applied to objects
static const std::vector<s3::object_tag> default_segment_tags;
static const std::vector<s3::object_tag> default_partition_manifest_tags;
static const std::vector<s3::object_tag> default_topic_manifest_tags;

/// Functor that returns fresh input_stream object that can be used
/// to re-upload and will return all data that needs to be uploaded
using reset_input_stream = ss::noncopyable_function<
Expand Down Expand Up @@ -196,7 +202,8 @@ class remote : public ss::peering_sharded_service<remote> {
ss::future<upload_result> upload_manifest(
const s3::bucket_name& bucket,
const base_manifest& manifest,
retry_chain_node& parent);
retry_chain_node& parent,
std::vector<s3::object_tag> tags = default_partition_manifest_tags);

/// \brief Upload segment to S3
///
Expand All @@ -212,7 +219,8 @@ class remote : public ss::peering_sharded_service<remote> {
uint64_t content_length,
const reset_input_stream& reset_str,
retry_chain_node& parent,
lazy_abort_source& lazy_abort_source);
lazy_abort_source& lazy_abort_source,
std::vector<s3::object_tag> tags = default_segment_tags);

/// \brief Download segment from S3
///
Expand Down Expand Up @@ -254,6 +262,16 @@ class remote : public ss::peering_sharded_service<remote> {

materialized_segments& materialized() { return *_materialized; }

/// Add partition manifest tags (includes partition id)
static std::vector<s3::object_tag>
get_manifest_tags(const model::ntp& ntp, model::initial_revision_id rev);
/// Add topic manifest tags (no partition id)
static std::vector<s3::object_tag> get_manifest_tags(
const model::topic_namespace& ntp, model::initial_revision_id rev);
/// Add segment level tags
static std::vector<s3::object_tag>
get_segment_tags(const model::ntp& ntp, model::initial_revision_id rev);

private:
ss::future<> propagate_credentials(cloud_roles::credentials credentials);
s3::client_pool _pool;
Expand Down