Skip to content

Commit

Permalink
Support remote datasets from s3 compliant data sources (#7453)
Browse files Browse the repository at this point in the history
  • Loading branch information
frcroth committed Nov 29, 2023
1 parent e3f007f commit 4487180
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.unreleased.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ For upgrade instructions, please check the [migration guide](MIGRATIONS.released
- Added support for reading uint24 rgb layers in datasets with zarr2/zarr3/n5/neuroglancerPrecomputed format, as used for voxelytics predictions. [#7413](https://github.com/scalableminds/webknossos/pull/7413)
- Adding a remote dataset can now be done by providing a Neuroglancer URI. [#7416](https://github.com/scalableminds/webknossos/pull/7416)
- Added a filter to the Task List->Stats column to quickly filter for tasks with "Prending", "In-Progress" or "Finished" instances. [#7430](https://github.com/scalableminds/webknossos/pull/7430)
- Added support for S3-compliant object storage services (e.g. MinIO) as a storage backend for remote datasets. [#7453](https://github.com/scalableminds/webknossos/pull/7453)

### Changed
- An appropriate error is returned when requesting an API version that is higher that the current version. [#7424](https://github.com/scalableminds/webknossos/pull/7424)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ import com.amazonaws.auth.{
BasicAWSCredentials,
EnvironmentVariableCredentialsProvider
}
import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration
import com.amazonaws.regions.Regions
import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder}
import com.amazonaws.services.s3.model.{GetObjectRequest, S3Object}
import com.amazonaws.util.AwsHostNameUtils
import com.scalableminds.util.tools.Fox
import com.scalableminds.webknossos.datastore.storage.{RemoteSourceDescriptor, S3AccessKeyCredential}
import net.liftweb.common.{Box, Failure, Full}
Expand All @@ -27,8 +29,8 @@ class S3DataVault(s3AccessKeyCredential: Option[S3AccessKeyCredential], uri: URI
case None => throw new Exception(s"Could not parse S3 bucket for ${uri.toString}")
}

val client: AmazonS3 =
S3DataVault.getAmazonS3Client(s3AccessKeyCredential)
private lazy val client: AmazonS3 =
S3DataVault.getAmazonS3Client(s3AccessKeyCredential, uri)

private def getRangeRequest(bucketName: String, key: String, range: NumericRange[Long]): GetObjectRequest =
new GetObjectRequest(bucketName, key).withRange(range.start, range.end)
Expand Down Expand Up @@ -117,7 +119,8 @@ object S3DataVault {

// https://s3.region-code.amazonaws.com/bucket-name/key-name
private def isPathStyle(uri: URI): Boolean =
uri.getHost.matches("s3(.[\\w\\-_]+)?.amazonaws.com")
uri.getHost.matches("s3(.[\\w\\-_]+)?.amazonaws.com") ||
(!uri.getHost.contains("amazonaws.com") && uri.getHost.contains("."))

// S3://bucket-name/key-name
private def isShortStyle(uri: URI): Boolean =
Expand All @@ -143,12 +146,25 @@ object S3DataVault {
new AnonymousAWSCredentialsProvider
}

private def getAmazonS3Client(credentialOpt: Option[S3AccessKeyCredential]): AmazonS3 =
AmazonS3ClientBuilder.standard
private def isNonAmazonHost(uri: URI): Boolean =
isPathStyle(uri) && !uri.getHost.endsWith(".amazonaws.com")

private def getAmazonS3Client(credentialOpt: Option[S3AccessKeyCredential], uri: URI): AmazonS3 = {
val basic = AmazonS3ClientBuilder.standard
.withCredentials(getCredentialsProvider(credentialOpt))
.withRegion(Regions.DEFAULT_REGION)
.withForceGlobalBucketAccessEnabled(true)
.build
if (isNonAmazonHost(uri))
basic
.withPathStyleAccessEnabled(true)
.withEndpointConfiguration(
new EndpointConfiguration(
s"http://${uri.getAuthority}",
AwsHostNameUtils.parseRegion(uri.getAuthority, "s3")
)
)
.build()
else basic.withRegion(Regions.DEFAULT_REGION).build()
}

}

Expand Down

0 comments on commit 4487180

Please sign in to comment.