Skip to content

Commit

Permalink
Speed up Zarr3 data reading by fixing VaultPath equality check (#7363)
Browse files Browse the repository at this point in the history
* Speed up Zarr3 data reading by fixing VaultPath equality check

* make equals checks collision-safe

* fix test

* changelog

* filesystemdatavault is equal only to other filesystemdatavaults
  • Loading branch information
fm3 committed Oct 5, 2023
1 parent 0d813d6 commit 815c34e
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 10 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.unreleased.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ For upgrade instructions, please check the [migration guide](MIGRATIONS.released
### Added

- Added social media link previews for links to datasets and annotations (only if they are public or if the links contain sharing tokens). [#7331](https://github.com/scalableminds/webknossos/pull/7331)
- Loading sharded zarr3 datasets is now significantly faster. [#7363](https://github.com/scalableminds/webknossos/pull/7363)

### Changed
- Updated backend code to Scala 2.13, with upgraded Dependencies for optimized performance. [#7327](https://github.com/scalableminds/webknossos/pull/7327)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,7 @@ class Zarr3Array(vaultPath: VaultPath,
override protected lazy val chunkReader: ChunkReader =
new Zarr3ChunkReader(header, this)

private val shardIndexCache: AlfuCache[VaultPath, Array[Byte]] =
AlfuCache()
private val parsedShardIndexCache: AlfuCache[VaultPath, Array[(Long, Long)]] = AlfuCache()

private def shardShape =
header.outerChunkSize // Only valid for one hierarchy of sharding codecs, describes total voxel size of a shard
Expand All @@ -105,7 +104,7 @@ class Zarr3Array(vaultPath: VaultPath,
private def checkSumLength = 4 // 32-bit checksum
private def getShardIndexSize = shardIndexEntryLength * chunksPerShard + checkSumLength

private def getChunkIndexInShardIndex(chunkIndex: Array[Int], shardCoordinates: Array[Int]) = {
private def getChunkIndexInShardIndex(chunkIndex: Array[Int], shardCoordinates: Array[Int]): Int = {
val shardOffset = shardCoordinates.zip(indexShape).map { case (sc, is) => sc * is }
indexShape.tails.toList
.dropRight(1)
Expand All @@ -114,12 +113,18 @@ class Zarr3Array(vaultPath: VaultPath,
.sum
}

private def readAndParseShardIndex(shardPath: VaultPath)(implicit ec: ExecutionContext): Fox[Array[(Long, Long)]] =
for {
shardIndexRaw <- readShardIndex(shardPath)
parsed = parseShardIndex(shardIndexRaw)
} yield parsed

private def readShardIndex(shardPath: VaultPath)(implicit ec: ExecutionContext) =
shardPath.readLastBytes(getShardIndexSize)

private def parseShardIndex(index: Array[Byte]): Seq[(Long, Long)] = {
private def parseShardIndex(index: Array[Byte]): Array[(Long, Long)] = {
val decodedIndex = shardingCodec match {
case Some(shardingCodec: ShardingCodec) =>
case Some(_: ShardingCodec) =>
indexCodecs.foldRight(index)((c, bytes) =>
c match {
case codec: BytesToBytesCodec => codec.decode(bytes)
Expand All @@ -133,7 +138,7 @@ class Zarr3Array(vaultPath: VaultPath,
// BigInt constructor is big endian, sharding index stores values little endian, thus reverse is used.
(BigInt(bytes.take(8).reverse).toLong, BigInt(bytes.slice(8, 16).reverse).toLong)
})
.toSeq
.toArray
}

private def chunkIndexToShardIndex(chunkIndex: Array[Int]) =
Expand All @@ -150,9 +155,9 @@ class Zarr3Array(vaultPath: VaultPath,
shardCoordinates <- Fox.option2Fox(chunkIndexToShardIndex(chunkIndex).headOption)
shardFilename = getChunkFilename(shardCoordinates)
shardPath = vaultPath / shardFilename
shardIndex <- shardIndexCache.getOrLoad(shardPath, readShardIndex)
parsedShardIndex <- parsedShardIndexCache.getOrLoad(shardPath, readAndParseShardIndex)
chunkIndexInShardIndex = getChunkIndexInShardIndex(chunkIndex, shardCoordinates)
(chunkOffset, chunkLength) = parseShardIndex(shardIndex)(chunkIndexInShardIndex)
(chunkOffset, chunkLength) = parsedShardIndex(chunkIndexInShardIndex)
_ <- Fox.bool2Fox(!(chunkOffset == -1 && chunkLength == -1)) ~> Fox.empty // -1 signifies empty/missing chunk
range = Range.Long(chunkOffset, chunkOffset + chunkLength, 1)
} yield (shardPath, range)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package com.scalableminds.webknossos.datastore.datavault
import com.scalableminds.util.tools.Fox
import com.scalableminds.util.tools.Fox.box2Fox
import net.liftweb.util.Helpers.tryo
import org.apache.commons.lang3.builder.HashCodeBuilder

import java.nio.ByteBuffer
import java.nio.file.{Files, Path}
Expand Down Expand Up @@ -41,6 +42,14 @@ class FileSystemDataVault extends DataVault {
}
} else Fox.empty

override def hashCode(): Int =
new HashCodeBuilder(19, 31).toHashCode

override def equals(obj: Any): Boolean = obj match {
case _: FileSystemDataVault => true
case _ => false
}

}

object FileSystemDataVault {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.scalableminds.webknossos.datastore.datavault

import com.scalableminds.util.tools.Fox
import org.apache.commons.lang3.builder.HashCodeBuilder

import java.net.URI
import java.nio.file.Path
Expand All @@ -27,6 +28,17 @@ class FileSystemVaultPath(basePath: Path, dataVault: FileSystemDataVault)
override def toString: String = basePath.toString

def exists: Boolean = basePath.toFile.exists()

private def getBasePath: Path = basePath
private def getDataVault: DataVault = dataVault

override def equals(obj: Any): Boolean = obj match {
case other: FileSystemVaultPath => other.getBasePath == basePath && other.getDataVault == dataVault
case _ => false
}

override def hashCode(): Int =
new HashCodeBuilder(13, 37).append(basePath).append(dataVault).toHashCode
}

object FileSystemVaultPath {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import com.google.cloud.storage.{BlobId, BlobInfo, Storage, StorageException, St
import com.scalableminds.util.tools.Fox
import com.scalableminds.webknossos.datastore.storage.{GoogleServiceAccountCredential, RemoteSourceDescriptor}
import net.liftweb.util.Helpers.tryo
import org.apache.commons.lang3.builder.HashCodeBuilder

import java.io.ByteArrayInputStream
import java.net.URI
Expand Down Expand Up @@ -51,9 +52,9 @@ class GoogleCloudDataVault(uri: URI, credential: Option[GoogleServiceAccountCred
case SuffixLength(l) =>
val blobReader = storage.reader(blobId)
blobReader.seek(-l)
val bb = ByteBuffer.allocateDirect(l.toInt)
val bb = ByteBuffer.allocateDirect(l)
blobReader.read(bb)
val arr = new Array[Byte](l.toInt)
val arr = new Array[Byte](l)
bb.position(0)
bb.get(arr)
Fox.successful(arr)
Expand All @@ -71,6 +72,16 @@ class GoogleCloudDataVault(uri: URI, credential: Option[GoogleServiceAccountCred
} yield (bytes, encoding)
}

private def getUri = uri
private def getCredential = credential

override def equals(obj: Any): Boolean = obj match {
case other: GoogleCloudDataVault => other.getUri == uri && other.getCredential == credential
case _ => false
}

override def hashCode(): Int =
new HashCodeBuilder(17, 31).append(uri).append(credential).toHashCode
}

object GoogleCloudDataVault {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import com.scalableminds.webknossos.datastore.storage.{
RemoteSourceDescriptor
}
import com.typesafe.scalalogging.LazyLogging
import org.apache.commons.lang3.builder.HashCodeBuilder
import play.api.http.Status
import play.api.libs.ws.{WSAuthScheme, WSClient, WSResponse}

Expand Down Expand Up @@ -92,6 +93,15 @@ class HttpsDataVault(credential: Option[DataVaultCredential], ws: WSClient) exte
}
}

private def getCredential = credential

override def equals(obj: Any): Boolean = obj match {
case other: HttpsDataVault => other.getCredential == credential
case _ => false
}

override def hashCode(): Int =
new HashCodeBuilder(17, 31).append(credential).toHashCode
}

object HttpsDataVault {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import com.scalableminds.util.tools.Fox
import com.scalableminds.webknossos.datastore.storage.{RemoteSourceDescriptor, S3AccessKeyCredential}
import net.liftweb.common.{Box, Failure, Full}
import org.apache.commons.io.IOUtils
import org.apache.commons.lang3.builder.HashCodeBuilder

import java.net.URI
import scala.collection.immutable.NumericRange
Expand Down Expand Up @@ -78,6 +79,17 @@ class S3DataVault(s3AccessKeyCredential: Option[S3AccessKeyCredential], uri: URI
(bytes, encodingString) <- performRequest(request)
encoding <- Encoding.fromRfc7231String(encodingString)
} yield (bytes, encoding)

private def getUri = uri
private def getCredential = s3AccessKeyCredential

override def equals(obj: Any): Boolean = obj match {
case other: S3DataVault => other.getUri == uri && other.getCredential == s3AccessKeyCredential
case _ => false
}

override def hashCode(): Int =
new HashCodeBuilder(17, 31).append(uri.toString).append(s3AccessKeyCredential).toHashCode
}

object S3DataVault {
Expand Down Expand Up @@ -137,6 +149,7 @@ object S3DataVault {
.withRegion(Regions.DEFAULT_REGION)
.withForceGlobalBucketAccessEnabled(true)
.build

}

class AnonymousAWSCredentialsProvider extends AWSCredentialsProvider {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import com.scalableminds.util.tools.Fox
import com.scalableminds.util.tools.Fox.box2Fox
import com.typesafe.scalalogging.LazyLogging
import net.liftweb.util.Helpers.tryo
import org.apache.commons.lang3.builder.HashCodeBuilder

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, IOException}
import java.net.URI
Expand Down Expand Up @@ -76,4 +77,14 @@ class VaultPath(uri: URI, dataVault: DataVault) extends LazyLogging {
override def toString: String = uri.toString

def summary: String = s"VaultPath: ${this.toString} for ${dataVault.getClass.getSimpleName}"

private def getDataVault: DataVault = dataVault

override def equals(obj: Any): Boolean = obj match {
case other: VaultPath => other.toUri == toUri && other.getDataVault == dataVault
case _ => false
}

override def hashCode(): Int =
new HashCodeBuilder(17, 31).append(uri.toString).append(dataVault).toHashCode
}

0 comments on commit 815c34e

Please sign in to comment.