Skip to content

Commit

Permalink
Skip typing data chunk arrays if no rechunking is needed (#7370)
Browse files Browse the repository at this point in the history
* WIP: skip typing the array if no partial copying is needed

* do the thing

* changelog

* cleanup

* fix flat shape for MultiArray
  • Loading branch information
fm3 committed Oct 16, 2023
1 parent 74347e6 commit bdd6a47
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 47 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.unreleased.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ For upgrade instructions, please check the [migration guide](MIGRATIONS.released
### Added
- Added a new tool that allows either measuring the distance of a path or a non-self-crossing area. [#7258](https://github.com/scalableminds/webknossos/pull/7258)
- Added social media link previews for links to datasets and annotations (only if they are public or if the links contain sharing tokens). [#7331](https://github.com/scalableminds/webknossos/pull/7331)
- Loading sharded zarr3 datasets is now significantly faster. [#7363](https://github.com/scalableminds/webknossos/pull/7363)
- Loading sharded zarr3 datasets is now significantly faster. [#7363](https://github.com/scalableminds/webknossos/pull/7363) and [#7370](https://github.com/scalableminds/webknossos/pull/7370)
- Higher-dimension coordinates (e.g., for the t axis) are now encoded in the URL, too, so that reloading the page will keep you at your current position. Only relevant for 4D datasets. [#7328](https://github.com/scalableminds/webknossos/pull/7328)

### Changed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,20 @@ import scala.concurrent.ExecutionContext
class ChunkReader(header: DatasetHeader) {

private lazy val chunkTyper = ChunkTyper.createFromHeader(header)
private lazy val shortcutChunkTyper = new ShortcutChunkTyper(header)

def read(path: VaultPath, chunkShapeFromMetadata: Array[Int], range: Option[NumericRange[Long]])(
implicit ec: ExecutionContext): Fox[MultiArray] =
def read(path: VaultPath,
chunkShapeFromMetadata: Array[Int],
range: Option[NumericRange[Long]],
useSkipTypingShortcut: Boolean)(implicit ec: ExecutionContext): Fox[MultiArray] =
for {
chunkBytesAndShapeBox: Box[(Array[Byte], Option[Array[Int]])] <- readChunkBytesAndShape(path, range).futureBox
chunkShape: Array[Int] = chunkBytesAndShapeBox.toOption.flatMap(_._2).getOrElse(chunkShapeFromMetadata)
typed <- chunkBytesAndShapeBox.map(_._1) match {
case Full(chunkBytes) if useSkipTypingShortcut =>
shortcutChunkTyper.wrapAndType(chunkBytes, chunkShape).toFox ?~> "chunk.shortcutWrapAndType.failed"
case Empty if useSkipTypingShortcut =>
shortcutChunkTyper.createFromFillValue(chunkShape).toFox ?~> "chunk.shortcutCreateFromFillValue.failed"
case Full(chunkBytes) =>
chunkTyper.wrapAndType(chunkBytes, chunkShape).toFox ?~> "chunk.wrapAndType.failed"
case Empty =>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package com.scalableminds.webknossos.datastore.datareaders

import com.typesafe.scalalogging.LazyLogging
import net.liftweb.common.Box
import net.liftweb.util.Helpers.tryo

Expand Down Expand Up @@ -57,7 +56,7 @@ class DoubleChunkTyper(val header: DatasetHeader) extends ChunkTyper {
}.get)
}

class ShortChunkTyper(val header: DatasetHeader) extends ChunkTyper with LazyLogging {
class ShortChunkTyper(val header: DatasetHeader) extends ChunkTyper {

val ma2DataType: MADataType = MADataType.SHORT

Expand Down Expand Up @@ -117,3 +116,19 @@ class FloatChunkTyper(val header: DatasetHeader) extends ChunkTyper {
MultiArray.factory(ma2DataType, chunkSizeOrdered(chunkShape), typedStorage)
}.get)
}

// In no-partial-copy shortcut, the MultiArray shape is never used, so it is just set to flat.
// type is always BYTE
class ShortcutChunkTyper(val header: DatasetHeader) extends ChunkTyper {
val ma2DataType: MADataType = MADataType.BYTE

def wrapAndType(bytes: Array[Byte], chunkShape: Array[Int]): Box[MultiArray] = tryo {
val flatShape = Array(bytes.length)
MultiArray.factory(ma2DataType, flatShape, bytes)
}

override def createFromFillValue(chunkShape: Array[Int]): Box[MultiArray] = {
val flatShape = Array(chunkShape.product * header.bytesPerElement)
MultiArrayUtils.createFilledArray(ma2DataType, flatShape, header.fillValueNumber)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,14 @@ class DatasetArray(vaultPath: VaultPath,
// returns byte array in fortran-order with little-endian values
private def readBytes(shape: Array[Int], offset: Array[Int])(implicit ec: ExecutionContext): Fox[Array[Byte]] =
for {
typedData <- readAsFortranOrder(shape, offset)
asBytes <- BytesConverter.toByteArray(typedData, header.resolvedDataType, ByteOrder.LITTLE_ENDIAN)
typedMultiArray <- readAsFortranOrder(shape, offset)
asBytes <- BytesConverter.toByteArray(typedMultiArray, header.resolvedDataType, ByteOrder.LITTLE_ENDIAN)
} yield asBytes

// Read from array. Note that shape and offset should be passed in XYZ order, left-padded with 0 and 1 respectively.
// This function will internally adapt to the array's axis order so that XYZ data in fortran-order is returned.
private def readAsFortranOrder(shape: Array[Int], offset: Array[Int])(implicit ec: ExecutionContext): Fox[Object] = {
private def readAsFortranOrder(shape: Array[Int], offset: Array[Int])(
implicit ec: ExecutionContext): Fox[MultiArray] = {
val totalOffset: Array[Int] = offset.zip(header.voxelOffset).map { case (o, v) => o - v }
val chunkIndices = ChunkUtils.computeChunkIndices(axisOrder.permuteIndicesReverse(header.datasetShape),
axisOrder.permuteIndicesReverse(header.chunkSize),
Expand All @@ -98,12 +99,13 @@ class DatasetArray(vaultPath: VaultPath,
if (partialCopyingIsNotNeeded(shape, totalOffset, chunkIndices)) {
for {
chunkIndex <- chunkIndices.headOption.toFox
sourceChunk: MultiArray <- getSourceChunkDataWithCache(axisOrder.permuteIndices(chunkIndex))
} yield sourceChunk.getStorage
sourceChunk: MultiArray <- getSourceChunkDataWithCache(axisOrder.permuteIndices(chunkIndex),
useSkipTypingShortcut = true)
} yield sourceChunk
} else {
val targetBuffer = MultiArrayUtils.createDataBuffer(header.resolvedDataType, shape)
val targetInCOrder: MultiArray =
MultiArrayUtils.orderFlippedView(MultiArrayUtils.createArrayWithGivenStorage(targetBuffer, shape.reverse))
val targetMultiArray = MultiArrayUtils.createArrayWithGivenStorage(targetBuffer, shape.reverse)
val targetInCOrder: MultiArray = MultiArrayUtils.orderFlippedView(targetMultiArray)
val copiedFuture = Fox.combined(chunkIndices.map { chunkIndex: Array[Int] =>
for {
sourceChunk: MultiArray <- getSourceChunkDataWithCache(axisOrder.permuteIndices(chunkIndex))
Expand All @@ -119,7 +121,7 @@ class DatasetArray(vaultPath: VaultPath,
})
for {
_ <- copiedFuture
} yield targetBuffer
} yield targetMultiArray
}
}

Expand All @@ -133,20 +135,23 @@ class DatasetArray(vaultPath: VaultPath,
private def chunkContentsCacheKey(chunkIndex: Array[Int]): String =
s"${dataSourceId}__${layerName}__${vaultPath}__chunk_${chunkIndex.mkString(",")}"

private def getSourceChunkDataWithCache(chunkIndex: Array[Int])(implicit ec: ExecutionContext): Fox[MultiArray] =
sharedChunkContentsCache.getOrLoad(chunkContentsCacheKey(chunkIndex), _ => readSourceChunkData(chunkIndex))
private def getSourceChunkDataWithCache(chunkIndex: Array[Int], useSkipTypingShortcut: Boolean = false)(
implicit ec: ExecutionContext): Fox[MultiArray] =
sharedChunkContentsCache.getOrLoad(chunkContentsCacheKey(chunkIndex),
_ => readSourceChunkData(chunkIndex, useSkipTypingShortcut))

private def readSourceChunkData(chunkIndex: Array[Int])(implicit ec: ExecutionContext): Fox[MultiArray] =
private def readSourceChunkData(chunkIndex: Array[Int], useSkipTypingShortcut: Boolean)(
implicit ec: ExecutionContext): Fox[MultiArray] =
if (header.isSharded) {
for {
(shardPath, chunkRange) <- getShardedChunkPathAndRange(chunkIndex) ?~> "chunk.getShardedPathAndRange.failed"
chunkShape = header.chunkSizeAtIndex(chunkIndex)
multiArray <- chunkReader.read(shardPath, chunkShape, Some(chunkRange))
multiArray <- chunkReader.read(shardPath, chunkShape, Some(chunkRange), useSkipTypingShortcut)
} yield multiArray
} else {
val chunkPath = vaultPath / getChunkFilename(chunkIndex)
val chunkShape = header.chunkSizeAtIndex(chunkIndex)
chunkReader.read(chunkPath, chunkShape, None)
chunkReader.read(chunkPath, chunkShape, None, useSkipTypingShortcut)
}

protected def getChunkFilename(chunkIndex: Array[Int]): String =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,38 +6,46 @@ import net.liftweb.common.Box
import net.liftweb.util.Helpers.tryo

import java.nio.{ByteBuffer, ByteOrder}
import ucar.ma2.{Array => MultiArray}

object BytesConverter {
def toByteArray(array: Object, dataType: ArrayDataType, byteOrder: ByteOrder): Box[Array[Byte]] = tryo {
def toByteArray(multiArray: MultiArray, dataType: ArrayDataType, byteOrder: ByteOrder): Box[Array[Byte]] = tryo {
val array = multiArray.getStorage
val bytesPerElement = bytesPerElementFor(dataType)
dataType match {
case ArrayDataType.u1 | ArrayDataType.i1 =>
array.asInstanceOf[Array[Byte]]
case ArrayDataType.u2 | ArrayDataType.i2 =>
val arrayTyped = array.asInstanceOf[Array[Short]]
val byteBuffer = makeByteBuffer(arrayTyped.length * bytesPerElement, byteOrder)
byteBuffer.asShortBuffer().put(arrayTyped)
byteBuffer.array()
case ArrayDataType.u4 | ArrayDataType.i4 =>
val arrayTyped = array.asInstanceOf[Array[Int]]
val byteBuffer = makeByteBuffer(arrayTyped.length * bytesPerElement, byteOrder)
byteBuffer.asIntBuffer().put(arrayTyped)
byteBuffer.array()
case ArrayDataType.i8 | ArrayDataType.u8 =>
val arrayTyped = array.asInstanceOf[Array[Long]]
val byteBuffer = makeByteBuffer(arrayTyped.length * bytesPerElement, byteOrder)
byteBuffer.asLongBuffer().put(arrayTyped)
byteBuffer.array()
case ArrayDataType.f4 =>
val arrayTyped = array.asInstanceOf[Array[Float]]
val byteBuffer = makeByteBuffer(arrayTyped.length * bytesPerElement, byteOrder)
byteBuffer.asFloatBuffer().put(arrayTyped)
byteBuffer.array()
case ArrayDataType.f8 =>
val arrayTyped = array.asInstanceOf[Array[Double]]
val byteBuffer = makeByteBuffer(arrayTyped.length * bytesPerElement, byteOrder)
byteBuffer.asDoubleBuffer().put(arrayTyped)
byteBuffer.array()
// If the multiArray dtype size is 1, use the array directly.
// This may be happen due to the skipTyping shortcut even for non-uint8-datasets
if (multiArray.getDataType.getSize == 1) {
array.asInstanceOf[Array[Byte]]
} else {
dataType match {
case ArrayDataType.u1 | ArrayDataType.i1 =>
array.asInstanceOf[Array[Byte]]
case ArrayDataType.u2 | ArrayDataType.i2 =>
val arrayTyped = array.asInstanceOf[Array[Short]]
val byteBuffer = makeByteBuffer(arrayTyped.length * bytesPerElement, byteOrder)
byteBuffer.asShortBuffer().put(arrayTyped)
byteBuffer.array()
case ArrayDataType.u4 | ArrayDataType.i4 =>
val arrayTyped = array.asInstanceOf[Array[Int]]
val byteBuffer = makeByteBuffer(arrayTyped.length * bytesPerElement, byteOrder)
byteBuffer.asIntBuffer().put(arrayTyped)
byteBuffer.array()
case ArrayDataType.i8 | ArrayDataType.u8 =>
val arrayTyped = array.asInstanceOf[Array[Long]]
val byteBuffer = makeByteBuffer(arrayTyped.length * bytesPerElement, byteOrder)
byteBuffer.asLongBuffer().put(arrayTyped)
byteBuffer.array()
case ArrayDataType.f4 =>
val arrayTyped = array.asInstanceOf[Array[Float]]
val byteBuffer = makeByteBuffer(arrayTyped.length * bytesPerElement, byteOrder)
byteBuffer.asFloatBuffer().put(arrayTyped)
byteBuffer.array()
case ArrayDataType.f8 =>
val arrayTyped = array.asInstanceOf[Array[Double]]
val byteBuffer = makeByteBuffer(arrayTyped.length * bytesPerElement, byteOrder)
byteBuffer.asDoubleBuffer().put(arrayTyped)
byteBuffer.array()
}
}
}

Expand Down

0 comments on commit bdd6a47

Please sign in to comment.