From bffc1b972cb172cb8deae1bb47eeb0282bd60cc8 Mon Sep 17 00:00:00 2001 From: frcroth Date: Wed, 22 Nov 2023 14:17:32 +0100 Subject: [PATCH 1/4] Support remote datasets from s3 compliant data sources --- build.sbt | 3 +++ .../datastore/datavault/S3DataVault.scala | 27 ++++++++++++++----- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/build.sbt b/build.sbt index 558c4aed4c4..b3575880e7b 100644 --- a/build.sbt +++ b/build.sbt @@ -24,6 +24,9 @@ ThisBuild / dependencyCheckAssemblyAnalyzerEnabled := Some(false) PlayKeys.devSettings := Seq("play.server.akka.requestTimeout" -> "10000s", "play.server.http.idleTimeout" -> "10000s") +// Don't warn about unused imports in REPL +Compile / console / scalacOptions -= "-Wunused:imports" + scapegoatIgnoredFiles := Seq(".*/Tables.scala", ".*/Routes.scala", ".*/.*mail.*template\\.scala") scapegoatDisabledInspections := Seq("FinalModifierOnCaseClass", "UnusedMethodParameter", "UnsafeTraversableMethods") diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala index 5f0b84f5870..bff1de8f19d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala @@ -8,9 +8,11 @@ import com.amazonaws.auth.{ BasicAWSCredentials, EnvironmentVariableCredentialsProvider } +import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration import com.amazonaws.regions.Regions import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder} import com.amazonaws.services.s3.model.{GetObjectRequest, S3Object} +import com.amazonaws.util.AwsHostNameUtils import com.scalableminds.util.tools.Fox import com.scalableminds.webknossos.datastore.storage.{RemoteSourceDescriptor, S3AccessKeyCredential} import net.liftweb.common.{Box, Failure, Full} @@ -27,8 +29,8 @@ class S3DataVault(s3AccessKeyCredential: Option[S3AccessKeyCredential], uri: URI case None => throw new Exception(s"Could not parse S3 bucket for ${uri.toString}") } - val client: AmazonS3 = - S3DataVault.getAmazonS3Client(s3AccessKeyCredential) + private lazy val client: AmazonS3 = + S3DataVault.getAmazonS3Client(s3AccessKeyCredential, uri) private def getRangeRequest(bucketName: String, key: String, range: NumericRange[Long]): GetObjectRequest = new GetObjectRequest(bucketName, key).withRange(range.start, range.end) @@ -117,7 +119,8 @@ object S3DataVault { // https://s3.region-code.amazonaws.com/bucket-name/key-name private def isPathStyle(uri: URI): Boolean = - uri.getHost.matches("s3(.[\\w\\-_]+)?.amazonaws.com") + uri.getHost.matches("s3(.[\\w\\-_]+)?.amazonaws.com") || + (!uri.getHost.contains("amazonaws.com") && uri.getHost.contains(".")) // S3://bucket-name/key-name private def isShortStyle(uri: URI): Boolean = @@ -143,12 +146,22 @@ object S3DataVault { new AnonymousAWSCredentialsProvider } - private def getAmazonS3Client(credentialOpt: Option[S3AccessKeyCredential]): AmazonS3 = - AmazonS3ClientBuilder.standard + private def getAmazonS3Client(credentialOpt: Option[S3AccessKeyCredential], uri: URI): AmazonS3 = { + val basic = AmazonS3ClientBuilder.standard .withCredentials(getCredentialsProvider(credentialOpt)) - .withRegion(Regions.DEFAULT_REGION) .withForceGlobalBucketAccessEnabled(true) - .build + if (isPathStyle(uri) && !uri.getHost.endsWith(".amazonaws.com")) + basic + .withPathStyleAccessEnabled(true) + .withEndpointConfiguration( + new EndpointConfiguration( + s"http://${uri.getAuthority}", + AwsHostNameUtils.parseRegion(uri.getAuthority, "s3") + ) + ) + .build() + else basic.withRegion(Regions.DEFAULT_REGION).build() + } } From 16363c5987a6997601ee151e20beb1562df2405d Mon Sep 17 00:00:00 2001 From: frcroth Date: Wed, 22 Nov 2023 14:28:48 +0100 Subject: [PATCH 2/4] Update changelog --- CHANGELOG.unreleased.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.unreleased.md b/CHANGELOG.unreleased.md index e61162a29fb..8586634a6b9 100644 --- a/CHANGELOG.unreleased.md +++ b/CHANGELOG.unreleased.md @@ -15,6 +15,7 @@ For upgrade instructions, please check the [migration guide](MIGRATIONS.released - Added support for reading uint24 rgb layers in datasets with zarr2/zarr3/n5/neuroglancerPrecomputed format, as used for voxelytics predictions. [#7413](https://github.com/scalableminds/webknossos/pull/7413) - Adding a remote dataset can now be done by providing a Neuroglancer URI. [#7416](https://github.com/scalableminds/webknossos/pull/7416) - Added a filter to the Task List->Stats column to quickly filter for tasks with "Prending", "In-Progress" or "Finished" instances. [#7430](https://github.com/scalableminds/webknossos/pull/7430) +- Added support for S3-compliant object storage services (e.g. MinIO) as a storage backend for remote datasets. [#7453](https://github.com/scalableminds/webknossos/pull/7453) ### Changed - An appropriate error is returned when requesting an API version that is higher that the current version. [#7424](https://github.com/scalableminds/webknossos/pull/7424) From 1a21c1720008e22ec60f1309e6d3d4694641d1e6 Mon Sep 17 00:00:00 2001 From: frcroth Date: Wed, 22 Nov 2023 14:30:26 +0100 Subject: [PATCH 3/4] Revert build.sbt change --- build.sbt | 3 --- 1 file changed, 3 deletions(-) diff --git a/build.sbt b/build.sbt index b3575880e7b..558c4aed4c4 100644 --- a/build.sbt +++ b/build.sbt @@ -24,9 +24,6 @@ ThisBuild / dependencyCheckAssemblyAnalyzerEnabled := Some(false) PlayKeys.devSettings := Seq("play.server.akka.requestTimeout" -> "10000s", "play.server.http.idleTimeout" -> "10000s") -// Don't warn about unused imports in REPL -Compile / console / scalacOptions -= "-Wunused:imports" - scapegoatIgnoredFiles := Seq(".*/Tables.scala", ".*/Routes.scala", ".*/.*mail.*template\\.scala") scapegoatDisabledInspections := Seq("FinalModifierOnCaseClass", "UnusedMethodParameter", "UnsafeTraversableMethods") From 970b8ab6758e35e8915a5a88870fad9f6456f55f Mon Sep 17 00:00:00 2001 From: frcroth Date: Mon, 27 Nov 2023 11:13:02 +0100 Subject: [PATCH 4/4] Improve code readibility --- .../webknossos/datastore/datavault/S3DataVault.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala index bff1de8f19d..957ff8f4bd7 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala @@ -146,11 +146,14 @@ object S3DataVault { new AnonymousAWSCredentialsProvider } + private def isNonAmazonHost(uri: URI): Boolean = + isPathStyle(uri) && !uri.getHost.endsWith(".amazonaws.com") + private def getAmazonS3Client(credentialOpt: Option[S3AccessKeyCredential], uri: URI): AmazonS3 = { val basic = AmazonS3ClientBuilder.standard .withCredentials(getCredentialsProvider(credentialOpt)) .withForceGlobalBucketAccessEnabled(true) - if (isPathStyle(uri) && !uri.getHost.endsWith(".amazonaws.com")) + if (isNonAmazonHost(uri)) basic .withPathStyleAccessEnabled(true) .withEndpointConfiguration(