Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement index_location setting for Zarr3 Datasets #7553

Merged
merged 11 commits into from
Jan 22, 2024
3 changes: 2 additions & 1 deletion CHANGELOG.unreleased.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@ For upgrade instructions, please check the [migration guide](MIGRATIONS.released
- When clicking a segment in the viewport, it is automatically focused in the segment list. A corresponding context menu entry was added as well. [#7512](https://github.com/scalableminds/webknossos/pull/7512)
- Updated the isValidName route in the API to return 200 for valid and invalid names. With this, the API version was bumped up to 6. [#7550](https://github.com/scalableminds/webknossos/pull/7550)
- The metadata for ND datasets and their annotation has changed: upper bound of additionalAxes is now stored as an exclusive value, called "end" in the NML format. [#7547](https://github.com/scalableminds/webknossos/pull/7547)
- Added support for the *index_location* parameter in sharded Zarr 3 datasets. [#7553](https://github.com/scalableminds/webknossos/pull/7553)

### Fixed
- Datasets with annotations can now be deleted. The concerning annotations can no longer be viewed but still be downloaded. [#7429](https://github.com/scalableminds/webknossos/pull/7429)
- Fixed several deprecation warning for using antd's Tabs.TabPane components. [#7469](https://github.com/scalableminds/webknossos/pull/7469)
- Fixed several deprecation warnings for using antd's Tabs.TabPane components. [#7469](https://github.com/scalableminds/webknossos/pull/7469)
- Fixed problems when requests for loading data failed (could impact volume data consistency and rendering). [#7477](https://github.com/scalableminds/webknossos/pull/7477)
- The settings page for non-wkw datasets no longer shows a wall of non-applying errors. [#7475](https://github.com/scalableminds/webknossos/pull/7475)
- Fixed a bug where dataset deletion for ND datasets and datasets with coordinate transforms would not free the name even if no referencing annotations exist. [#7495](https://github.com/scalableminds/webknossos/pull/7495)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.scalableminds.webknossos.datastore.datareaders.zarr3

import com.scalableminds.util.enumeration.ExtendedEnumeration
import com.scalableminds.util.tools.ByteUtils
import com.scalableminds.webknossos.datastore.datareaders.{
BloscCompressor,
Expand All @@ -12,7 +13,7 @@ import com.scalableminds.webknossos.datastore.datareaders.{
}
import com.scalableminds.webknossos.datastore.helpers.JsonImplicits
import com.typesafe.scalalogging.LazyLogging
import play.api.libs.json.{Format, JsObject, JsResult, JsSuccess, JsValue, Json, OFormat, Reads, Writes}
import play.api.libs.json.{Format, JsObject, JsResult, JsString, JsSuccess, JsValue, Json, OFormat, Reads, Writes}
import play.api.libs.json.Json.WithDefaultValues
import ucar.ma2.{Array => MultiArray}

Expand All @@ -38,6 +39,17 @@ object TransposeSetting {
def fOrderFromRank(rank: Int): IntArrayTransposeSetting = IntArrayTransposeSetting(Array.range(rank - 1, -1, -1))
}

object IndexLocationSetting extends ExtendedEnumeration {
type IndexLocationSetting = Value
val start, end = Value

implicit object IndexLocationSettingFormat extends Format[IndexLocationSetting] {
override def reads(json: JsValue): JsResult[IndexLocationSetting] =
json.validate[String].map(IndexLocationSetting.withName)
override def writes(o: IndexLocationSetting): JsValue = JsString(o.toString)
}
}

trait Codec

/*
Expand Down Expand Up @@ -169,7 +181,8 @@ class Crc32CCodec extends BytesToBytesCodec with ByteUtils with LazyLogging {

class ShardingCodec(val chunk_shape: Array[Int],
val codecs: Seq[CodecConfiguration],
val index_codecs: Seq[CodecConfiguration])
val index_codecs: Seq[CodecConfiguration],
val index_location: IndexLocationSetting.IndexLocationSetting = IndexLocationSetting.end)
extends ArrayToBytesCodec {

// https://zarr-specs.readthedocs.io/en/latest/v3/codecs/sharding-indexed/v1.0.html
Expand Down Expand Up @@ -250,6 +263,8 @@ case object Crc32CCodecConfiguration extends CodecConfiguration {
override val includeConfiguration: Boolean = false
val name = "crc32c"

val checkSumByteLength = 4 // 32 Bit Codec => 4 Byte

implicit object Crc32CCodecConfigurationReads extends Reads[Crc32CCodecConfiguration.type] {
override def reads(json: JsValue): JsResult[Crc32CCodecConfiguration.type] = JsSuccess(Crc32CCodecConfiguration)
}
Expand All @@ -276,7 +291,9 @@ object CodecSpecification {

final case class ShardingCodecConfiguration(chunk_shape: Array[Int],
codecs: Seq[CodecConfiguration],
index_codecs: Seq[CodecConfiguration])
index_codecs: Seq[CodecConfiguration],
index_location: IndexLocationSetting.IndexLocationSetting =
IndexLocationSetting.end)
extends CodecConfiguration {
override def name: String = ShardingCodecConfiguration.name
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ class Zarr3Array(vaultPath: VaultPath,
case GzipCodecConfiguration(level) => new GzipCodec(level)
case ZstdCodecConfiguration(level, checksum) => new ZstdCodec(level, checksum)
case Crc32CCodecConfiguration => new Crc32CCodec
case ShardingCodecConfiguration(chunk_shape, codecs, index_codecs) =>
new ShardingCodec(chunk_shape, codecs, index_codecs)
case ShardingCodecConfiguration(chunk_shape, codecs, index_codecs, index_location) =>
new ShardingCodec(chunk_shape, codecs, index_codecs, index_location)
}
val shardingCodecOpt: Option[ShardingCodec] = outerCodecs.flatMap {
case codec: ShardingCodec => Some(codec)
Expand Down Expand Up @@ -104,9 +104,6 @@ class Zarr3Array(vaultPath: VaultPath,
private lazy val chunksPerShard = indexShape.product
private def shardIndexEntryLength = 16

private def checkSumLength = 4 // 32-bit checksum
private def getShardIndexSize = shardIndexEntryLength * chunksPerShard + checkSumLength

private def getChunkIndexInShardIndex(chunkIndex: Array[Int], shardCoordinates: Array[Int]): Int = {
val shardOffset = shardCoordinates.zip(indexShape).map { case (sc, is) => sc * is }
indexShape.tails.toList
Expand All @@ -122,8 +119,22 @@ class Zarr3Array(vaultPath: VaultPath,
parsed = parseShardIndex(shardIndexRaw)
} yield parsed

private lazy val shardIndexChecksumLength =
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this works for most datasets, but not for all. Although, it doesn't make a lot of sense, it is spec-compliant to have multiple crc32c checksums. In the future, there might be other codecs that change the size of the shard index.

A more correct way would be to compute the size of the index based on the entire index_codecs pipeline. zarrita uses for that the compute_encoded_size methods.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At least, there should be some assertions that only supported index configurations are usable.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shardingCodec match {
case Some(codec) =>
if (codec.index_codecs.exists(_.name == "crc32c")) Crc32CCodecConfiguration.checkSumByteLength
else 0
case None => 0
}
private def getShardIndexSize = shardIndexEntryLength * chunksPerShard + shardIndexChecksumLength

private def readShardIndex(shardPath: VaultPath)(implicit ec: ExecutionContext) =
shardPath.readLastBytes(getShardIndexSize)
shardingCodec match {
case Some(codec) if codec.index_location == IndexLocationSetting.start =>
shardPath.readBytes(Some(Range.Long(0, getShardIndexSize.toLong, 1)))
case Some(codec) if codec.index_location == IndexLocationSetting.end => shardPath.readLastBytes(getShardIndexSize)
case _ => Fox.failure("No sharding codec found")
}

private def parseShardIndex(index: Array[Byte]): Array[(Long, Long)] = {
val decodedIndex = shardingCodec match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ case class Zarr3ArrayHeader(

private def getChunkSize: Array[Int] = {
val shardingCodecInnerChunkSize = codecs.flatMap {
case ShardingCodecConfiguration(chunk_shape, _, _) => Some(chunk_shape)
case _ => None
case ShardingCodecConfiguration(chunk_shape, _, _, _) => Some(chunk_shape)
case _ => None
}.headOption
shardingCodecInnerChunkSize.getOrElse(outerChunkSize)
}
Expand Down Expand Up @@ -195,7 +195,8 @@ object Zarr3ArrayHeader extends JsonImplicits {
chunk_shape <- config("chunk_shape").validate[Array[Int]]
codecs = readCodecs(config("codecs"))
index_codecs = readCodecs(config("index_codecs"))
} yield ShardingCodecConfiguration(chunk_shape, codecs, index_codecs)
index_location <- config("index_location").validate[IndexLocationSetting.IndexLocationSetting]
} yield ShardingCodecConfiguration(chunk_shape, codecs, index_codecs, index_location)

private def readCodecs(value: JsValue): Seq[CodecConfiguration] = {
val rawCodecSpecs: Seq[JsValue] = value match {
Expand Down