Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up Zarr3 data reading by fixing VaultPath equality check #7363

Merged
merged 7 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.unreleased.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ For upgrade instructions, please check the [migration guide](MIGRATIONS.released
### Added

- Added social media link previews for links to datasets and annotations (only if they are public or if the links contain sharing tokens). [#7331](https://github.com/scalableminds/webknossos/pull/7331)
- Loading sharded zarr3 datasets is now significantly faster. [#7363](https://github.com/scalableminds/webknossos/pull/7363)

### Changed
- Updated backend code to Scala 2.13, with upgraded Dependencies for optimized performance. [#7327](https://github.com/scalableminds/webknossos/pull/7327)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,7 @@ class Zarr3Array(vaultPath: VaultPath,
override protected lazy val chunkReader: ChunkReader =
new Zarr3ChunkReader(header, this)

private val shardIndexCache: AlfuCache[VaultPath, Array[Byte]] =
AlfuCache()
private val parsedShardIndexCache: AlfuCache[VaultPath, Array[(Long, Long)]] = AlfuCache()

private def shardShape =
header.outerChunkSize // Only valid for one hierarchy of sharding codecs, describes total voxel size of a shard
Expand All @@ -105,7 +104,7 @@ class Zarr3Array(vaultPath: VaultPath,
private def checkSumLength = 4 // 32-bit checksum
private def getShardIndexSize = shardIndexEntryLength * chunksPerShard + checkSumLength

private def getChunkIndexInShardIndex(chunkIndex: Array[Int], shardCoordinates: Array[Int]) = {
private def getChunkIndexInShardIndex(chunkIndex: Array[Int], shardCoordinates: Array[Int]): Int = {
val shardOffset = shardCoordinates.zip(indexShape).map { case (sc, is) => sc * is }
indexShape.tails.toList
.dropRight(1)
Expand All @@ -114,12 +113,18 @@ class Zarr3Array(vaultPath: VaultPath,
.sum
}

private def readAndParseShardIndex(shardPath: VaultPath)(implicit ec: ExecutionContext): Fox[Array[(Long, Long)]] =
for {
shardIndexRaw <- readShardIndex(shardPath)
parsed = parseShardIndex(shardIndexRaw)
} yield parsed

private def readShardIndex(shardPath: VaultPath)(implicit ec: ExecutionContext) =
shardPath.readLastBytes(getShardIndexSize)

private def parseShardIndex(index: Array[Byte]): Seq[(Long, Long)] = {
private def parseShardIndex(index: Array[Byte]): Array[(Long, Long)] = {
val decodedIndex = shardingCodec match {
case Some(shardingCodec: ShardingCodec) =>
case Some(_: ShardingCodec) =>
indexCodecs.foldRight(index)((c, bytes) =>
c match {
case codec: BytesToBytesCodec => codec.decode(bytes)
Expand All @@ -133,7 +138,7 @@ class Zarr3Array(vaultPath: VaultPath,
// BigInt constructor is big endian, sharding index stores values little endian, thus reverse is used.
(BigInt(bytes.take(8).reverse).toLong, BigInt(bytes.slice(8, 16).reverse).toLong)
})
.toSeq
.toArray
}

private def chunkIndexToShardIndex(chunkIndex: Array[Int]) =
Expand All @@ -150,9 +155,9 @@ class Zarr3Array(vaultPath: VaultPath,
shardCoordinates <- Fox.option2Fox(chunkIndexToShardIndex(chunkIndex).headOption)
shardFilename = getChunkFilename(shardCoordinates)
shardPath = vaultPath / shardFilename
shardIndex <- shardIndexCache.getOrLoad(shardPath, readShardIndex)
parsedShardIndex <- parsedShardIndexCache.getOrLoad(shardPath, readAndParseShardIndex)
chunkIndexInShardIndex = getChunkIndexInShardIndex(chunkIndex, shardCoordinates)
(chunkOffset, chunkLength) = parseShardIndex(shardIndex)(chunkIndexInShardIndex)
(chunkOffset, chunkLength) = parsedShardIndex(chunkIndexInShardIndex)
_ <- Fox.bool2Fox(!(chunkOffset == -1 && chunkLength == -1)) ~> Fox.empty // -1 signifies empty/missing chunk
range = Range.Long(chunkOffset, chunkOffset + chunkLength, 1)
} yield (shardPath, range)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package com.scalableminds.webknossos.datastore.datavault
import com.scalableminds.util.tools.Fox
import com.scalableminds.util.tools.Fox.box2Fox
import net.liftweb.util.Helpers.tryo
import org.apache.commons.lang3.builder.HashCodeBuilder

import java.nio.ByteBuffer
import java.nio.file.{Files, Path}
Expand Down Expand Up @@ -41,6 +42,10 @@ class FileSystemDataVault extends DataVault {
}
} else Fox.empty

override def hashCode(): Int =
new HashCodeBuilder(19, 31).toHashCode

override def equals(obj: Any): Boolean = true
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:trollface:

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note to self: this should still do the typecheck with match

}

object FileSystemDataVault {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.scalableminds.webknossos.datastore.datavault

import com.scalableminds.util.tools.Fox
import org.apache.commons.lang3.builder.HashCodeBuilder

import java.net.URI
import java.nio.file.Path
Expand All @@ -27,6 +28,17 @@ class FileSystemVaultPath(basePath: Path, dataVault: FileSystemDataVault)
override def toString: String = basePath.toString

def exists: Boolean = basePath.toFile.exists()

private def getBasePath: Path = basePath
private def getDataVault: DataVault = dataVault

override def equals(obj: Any): Boolean = obj match {
case other: FileSystemVaultPath => other.getBasePath == basePath && other.getDataVault == dataVault
case _ => false
}

override def hashCode(): Int =
new HashCodeBuilder(13, 37).append(basePath).append(dataVault).toHashCode
}

object FileSystemVaultPath {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import com.google.cloud.storage.{BlobId, BlobInfo, Storage, StorageException, St
import com.scalableminds.util.tools.Fox
import com.scalableminds.webknossos.datastore.storage.{GoogleServiceAccountCredential, RemoteSourceDescriptor}
import net.liftweb.util.Helpers.tryo
import org.apache.commons.lang3.builder.HashCodeBuilder

import java.io.ByteArrayInputStream
import java.net.URI
Expand Down Expand Up @@ -51,9 +52,9 @@ class GoogleCloudDataVault(uri: URI, credential: Option[GoogleServiceAccountCred
case SuffixLength(l) =>
val blobReader = storage.reader(blobId)
blobReader.seek(-l)
val bb = ByteBuffer.allocateDirect(l.toInt)
val bb = ByteBuffer.allocateDirect(l)
blobReader.read(bb)
val arr = new Array[Byte](l.toInt)
val arr = new Array[Byte](l)
bb.position(0)
bb.get(arr)
Fox.successful(arr)
Expand All @@ -71,6 +72,16 @@ class GoogleCloudDataVault(uri: URI, credential: Option[GoogleServiceAccountCred
} yield (bytes, encoding)
}

private def getUri = uri
private def getCredential = credential

override def equals(obj: Any): Boolean = obj match {
case other: GoogleCloudDataVault => other.getUri == uri && other.getCredential == credential
case _ => false
}

override def hashCode(): Int =
new HashCodeBuilder(17, 31).append(uri).append(credential).toHashCode
}

object GoogleCloudDataVault {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import com.scalableminds.webknossos.datastore.storage.{
RemoteSourceDescriptor
}
import com.typesafe.scalalogging.LazyLogging
import org.apache.commons.lang3.builder.HashCodeBuilder
import play.api.http.Status
import play.api.libs.ws.{WSAuthScheme, WSClient, WSResponse}

Expand Down Expand Up @@ -92,6 +93,15 @@ class HttpsDataVault(credential: Option[DataVaultCredential], ws: WSClient) exte
}
}

private def getCredential = credential

override def equals(obj: Any): Boolean = obj match {
case other: HttpsDataVault => other.getCredential == credential
case _ => false
}

override def hashCode(): Int =
new HashCodeBuilder(17, 31).append(credential).toHashCode
}

object HttpsDataVault {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import com.scalableminds.util.tools.Fox
import com.scalableminds.webknossos.datastore.storage.{RemoteSourceDescriptor, S3AccessKeyCredential}
import net.liftweb.common.{Box, Failure, Full}
import org.apache.commons.io.IOUtils
import org.apache.commons.lang3.builder.HashCodeBuilder

import java.net.URI
import scala.collection.immutable.NumericRange
Expand Down Expand Up @@ -78,6 +79,17 @@ class S3DataVault(s3AccessKeyCredential: Option[S3AccessKeyCredential], uri: URI
(bytes, encodingString) <- performRequest(request)
encoding <- Encoding.fromRfc7231String(encodingString)
} yield (bytes, encoding)

private def getUri = uri
private def getCredential = s3AccessKeyCredential

override def equals(obj: Any): Boolean = obj match {
case other: S3DataVault => other.getUri == uri && other.getCredential == s3AccessKeyCredential
case _ => false
}

override def hashCode(): Int =
new HashCodeBuilder(17, 31).append(uri.toString).append(s3AccessKeyCredential).toHashCode
}

object S3DataVault {
Expand Down Expand Up @@ -137,6 +149,7 @@ object S3DataVault {
.withRegion(Regions.DEFAULT_REGION)
.withForceGlobalBucketAccessEnabled(true)
.build

}

class AnonymousAWSCredentialsProvider extends AWSCredentialsProvider {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import com.scalableminds.util.tools.Fox
import com.scalableminds.util.tools.Fox.box2Fox
import com.typesafe.scalalogging.LazyLogging
import net.liftweb.util.Helpers.tryo
import org.apache.commons.lang3.builder.HashCodeBuilder

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, IOException}
import java.net.URI
Expand Down Expand Up @@ -76,4 +77,14 @@ class VaultPath(uri: URI, dataVault: DataVault) extends LazyLogging {
override def toString: String = uri.toString

def summary: String = s"VaultPath: ${this.toString} for ${dataVault.getClass.getSimpleName}"

private def getDataVault: DataVault = dataVault

override def equals(obj: Any): Boolean = obj match {
case other: VaultPath => other.toUri == toUri && other.getDataVault == dataVault
case _ => false
}

override def hashCode(): Int =
new HashCodeBuilder(17, 31).append(uri.toString).append(dataVault).toHashCode
}