Skip to content

Commit

Permalink
apacheGH-41797: [C++][S3] Remove GetBucketRegion hack for never AWS S…
Browse files Browse the repository at this point in the history
…DK versions

To get the region a S3 bucket resides on, it is required to issue a HeadBucket region and parse the response headers for a certain header value.

Unfortunately, the AWS SDK doesn't let us access arbitrary headers on successful responses for S3 model requests, which had us implement a workaround by calling lower-level SDK APIs.

However, the SDK recently added a `GetBucketRegion` method on `HeadBucketRequest`, which obsoletes the need for this workaround. We now use this method if the available AWS SDK version is recent enough.
  • Loading branch information
pitrou committed May 23, 2024
1 parent 420c01a commit e4e6ef2
Showing 1 changed file with 59 additions and 22 deletions.
81 changes: 59 additions & 22 deletions cpp/src/arrow/filesystem/s3fs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -601,44 +601,81 @@ class S3Client : public Aws::S3::S3Client {
public:
using Aws::S3::S3Client::S3Client;

static inline constexpr auto kBucketRegionHeaderName = "x-amz-bucket-region";

std::string GetBucketRegionFromHeaders(
const Aws::Http::HeaderValueCollection& headers) {
const auto it = headers.find(ToAwsString(kBucketRegionHeaderName));
if (it != headers.end()) {
return std::string(FromAwsString(it->second));
}
return std::string();
}

template <typename ErrorType>
Result<std::string> GetBucketRegionFromError(
const std::string& bucket, const Aws::Client::AWSError<ErrorType>& error) {
std::string region = GetBucketRegionFromHeaders(error.GetResponseHeaders());
if (!region.empty()) {
return region;
} else if (error.GetResponseCode() == Aws::Http::HttpResponseCode::NOT_FOUND) {
return Status::IOError("Bucket '", bucket, "' not found");
} else {
return ErrorToStatus(
std::forward_as_tuple("When resolving region for bucket '", bucket, "': "),
"HeadBucket", error);
}
}

#if ARROW_AWS_SDK_VERSION_CHECK(1, 11, 212)
// HeadBucketResult::GetBucketRegion appeared in AWS SDK 1.11.212
Result<std::string> GetBucketRegion(const std::string& bucket,
const S3Model::HeadBucketRequest& request) {
auto outcome = this->HeadBucket(request);
if (!outcome.IsSuccess()) {
return GetBucketRegionFromError(bucket, outcome.GetError());
}
auto&& region = std::move(outcome).GetResult().GetBucketRegion();
if (region.empty()) {
return Status::IOError("When resolving region for bucket '", request.GetBucket(),
"': missing 'x-amz-bucket-region' header in response");
}
return region;
}
#else
// To get a bucket's region, we must extract the "x-amz-bucket-region" header
// from the response to a HEAD bucket request.
// Unfortunately, the S3Client APIs don't let us access the headers of successful
// responses. So we have to cook a AWS request and issue it ourselves.

Result<std::string> GetBucketRegion(const S3Model::HeadBucketRequest& request) {
Result<std::string> GetBucketRegion(const std::string& bucket,
const S3Model::HeadBucketRequest& request) {
auto uri = GeneratePresignedUrl(request.GetBucket(),
/*key=*/"", Aws::Http::HttpMethod::HTTP_HEAD);
// NOTE: The signer region argument isn't passed here, as there's no easy
// way of computing it (the relevant method is private).
auto outcome = MakeRequest(uri, request, Aws::Http::HttpMethod::HTTP_HEAD,
Aws::Auth::SIGV4_SIGNER);
const auto code = outcome.IsSuccess() ? outcome.GetResult().GetResponseCode()
: outcome.GetError().GetResponseCode();
const auto& headers = outcome.IsSuccess()
? outcome.GetResult().GetHeaderValueCollection()
: outcome.GetError().GetResponseHeaders();

const auto it = headers.find(ToAwsString("x-amz-bucket-region"));
if (it == headers.end()) {
if (code == Aws::Http::HttpResponseCode::NOT_FOUND) {
return Status::IOError("Bucket '", request.GetBucket(), "' not found");
} else if (!outcome.IsSuccess()) {
return ErrorToStatus(std::forward_as_tuple("When resolving region for bucket '",
request.GetBucket(), "': "),
"HeadBucket", outcome.GetError());
} else {
return Status::IOError("When resolving region for bucket '", request.GetBucket(),
"': missing 'x-amz-bucket-region' header in response");
}
if (!outcome.IsSuccess()) {
return GetBucketRegionFromError(bucket, outcome.GetError());
}
std::string region =
GetBucketRegionFromHeaders(outcome.GetResult().GetHeaderValueCollection());
if (!region.empty()) {
return region;
} else if (outcome.GetResult().GetResponseCode() ==
Aws::Http::HttpResponseCode::NOT_FOUND) {
return Status::IOError("Bucket '", request.GetBucket(), "' not found");
} else {
return Status::IOError("When resolving region for bucket '", request.GetBucket(),
"': missing 'x-amz-bucket-region' header in response");
}
return std::string(FromAwsString(it->second));
}
#endif

Result<std::string> GetBucketRegion(const std::string& bucket) {
S3Model::HeadBucketRequest req;
req.SetBucket(ToAwsString(bucket));
return GetBucketRegion(req);
return GetBucketRegion(bucket, req);
}

S3Model::CompleteMultipartUploadOutcome CompleteMultipartUploadWithErrorFixup(
Expand Down

0 comments on commit e4e6ef2

Please sign in to comment.