Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rendercanvas/contexts/bitmapcontext.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def set_bitmap(self, bitmap):
"""Set the rendered bitmap image.

Call this in the draw event. The bitmap must be an object that can be
conveted to a memoryview, like a numpy array. It must represent a 2D
converted to a memoryview, like a numpy array. It must represent a 2D
image in either grayscale or rgba format, with uint8 values
"""

Expand Down
200 changes: 171 additions & 29 deletions rendercanvas/contexts/wgpucontext.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import time
from typing import Sequence

from .basecontext import BaseContext
Expand Down Expand Up @@ -99,7 +100,6 @@ def configure(
# "tone_mapping": tone_mapping,
"alpha_mode": alpha_mode,
}

# Let subclass finnish the configuration, then store the config
self._configure(config)
self._config = config
Expand Down Expand Up @@ -189,11 +189,28 @@ def __init__(self, present_info: dict):
# The last used texture
self._texture = None

# A ring-buffer to download the rendered images to the CPU/RAM. The
# image is first copied from the texture to an available copy-buffer.
# This is very fast (which is why we don't have a ring of textures).
# Mapping the buffers to RAM takes time, and we want to wait for this
# asynchronously.
#
# I feel that using just one buffer is sufficient. Adding more costs
# memory, and does not necessarily improve the FPS. It can actually
# strain the GPU more, because it would be busy mapping multiple buffers
# at once. I leave the ring-mechanism in-place for now, so we can
# experiment with it.
self._downloaders = [None] # Put as many None's as you want buffers

def _get_capabilities(self):
"""Get dict of capabilities and cache the result."""

import wgpu

# Store usage flags now that we have the wgpu namespace
self._our_texture_usage = wgpu.TextureUsage.COPY_SRC
self._our_buffer_usage = wgpu.BufferUsage.COPY_DST | wgpu.BufferUsage.MAP_READ

capabilities = {}

# Query format capabilities from the info provided by the canvas
Expand Down Expand Up @@ -260,8 +277,14 @@ def _configure(self, config: dict):
f"Configure: unsupported alpha-mode: {alpha_mode} not in {cap_alpha_modes}"
)

# (re)create downloaders
self._downloaders[:] = [
ImageDownloader(config["device"], self._our_buffer_usage)
]

def _unconfigure(self) -> None:
self._drop_texture()
self._downloaders[:] = [None for _ in self._downloaders]

def _get_current_texture(self):
# When the texture is active right now, we could either:
Expand All @@ -271,8 +294,6 @@ def _get_current_texture(self):
# Right now we return the existing texture, so user can retrieve it in different render passes that write to the same frame.

if self._texture is None:
import wgpu

width, height = self.physical_size
width, height = max(width, 1), max(height, 1)

Expand All @@ -283,7 +304,7 @@ def _get_current_texture(self):
label="present",
size=(width, height, 1),
format=self._config["format"],
usage=self._config["usage"] | wgpu.TextureUsage.COPY_SRC,
usage=self._config["usage"] | self._our_texture_usage,
)

return self._texture
Expand All @@ -292,17 +313,56 @@ def _rc_present(self) -> None:
if not self._texture:
return {"method": "skip"}

bitmap = self._get_bitmap()
# TODO: in some cases, like offscreen backend, we don't want to skip the first frame!

# Get bitmap from oldest downloader
bitmap = None
downloader = self._downloaders.pop(0)
try:
bitmap = downloader.get_bitmap()
finally:
self._downloaders.append(downloader)

# Select new downloader
downloader = self._downloaders[-1]
downloader.initiate_download(self._texture)

self._drop_texture()
return {"method": "bitmap", "format": "rgba-u8", "data": bitmap}
if bitmap is None:
return {"method": "skip"}
else:
return {"method": "bitmap", "format": "rgba-u8", "data": bitmap}

def _rc_close(self):
self._drop_texture()


class ImageDownloader:
"""A helper class that wraps a copy-buffer to async-download an image from a texture."""

def __init__(self, device, buffer_usage):
self._device = device
self._buffer_usage = buffer_usage
self._buffer = None
self._time = 0

def initiate_download(self, texture):
# TODO: assert not waiting

def _get_bitmap(self):
texture = self._texture
device = texture._device
self._parse_texture_metadata(texture)
nbytes = self._padded_stride * self._texture_size[1]
self._ensure_size(nbytes)
self._copy_texture(texture)

# Note: the buffer.map_async() method by default also does a flush, to hide a bug in wgpu-core (https://github.com/gfx-rs/wgpu/issues/5173).
# That bug does not affect this use-case, so we use a special (undocumented :/) map-mode to prevent wgpu-py from doing its sync thing.
self._awaitable = self._buffer.map_async("READ_NOSYNC", 0, nbytes)

def _parse_texture_metadata(self, texture):
size = texture.size
format = texture.format
nchannels = 4 # we expect rgba or bgra

if not format.startswith(("rgba", "bgra")):
raise RuntimeError(f"Image present unsupported texture format {format}.")
if "8" in format:
Expand All @@ -316,21 +376,6 @@ def _get_bitmap(self):
f"Image present unsupported texture format bitdepth {format}."
)

data = device.queue.read_texture(
{
"texture": texture,
"mip_level": 0,
"origin": (0, 0, 0),
},
{
"offset": 0,
"bytes_per_row": bytes_per_pixel * size[0],
"rows_per_image": size[1],
},
size,
)

# Derive struct dtype from wgpu texture format
memoryview_type = "B"
if "float" in format:
memoryview_type = "e" if "16" in format else "f"
Expand All @@ -344,10 +389,107 @@ def _get_bitmap(self):
if "sint" in format:
memoryview_type = memoryview_type.lower()

# Represent as memory object to avoid numpy dependency
# Equivalent: np.frombuffer(data, np.uint8).reshape(size[1], size[0], nchannels)
plain_stride = bytes_per_pixel * size[0]
extra_stride = (256 - plain_stride % 256) % 256
padded_stride = plain_stride + extra_stride

self._memoryview_type = memoryview_type
self._nchannels = nchannels
self._plain_stride = plain_stride
self._padded_stride = padded_stride
self._texture_size = size

def _ensure_size(self, required_size):
# Get buffer and decide whether we can still use it
buffer = self._buffer
if buffer is None:
pass # No buffer
elif required_size > buffer.size:
buffer = None # Buffer too small
elif required_size < 0.25 * buffer.size:
buffer = None # Buffer too large
elif required_size > 0.75 * buffer.size:
self._time = time.perf_counter() # Size is fine
elif time.perf_counter() - self._time > 5.0:
buffer = None # Too large too long

# Create a new buffer if we need one
if buffer is None:
buffer_size = required_size
buffer_size += (4096 - buffer_size % 4096) % 4096
self._buffer = self._device.create_buffer(
label="copy-buffer", size=buffer_size, usage=self._buffer_usage
)

return data.cast(memoryview_type, (size[1], size[0], nchannels))
def _copy_texture(self, texture):
source = {
"texture": texture,
"mip_level": 0,
"origin": (0, 0, 0),
}

def _rc_close(self):
self._drop_texture()
destination = {
"buffer": self._buffer,
"offset": 0,
"bytes_per_row": self._padded_stride,
"rows_per_image": self._texture_size[1],
}

# Copy data to temp buffer
encoder = self._device.create_command_encoder()
encoder.copy_texture_to_buffer(source, destination, texture.size)
command_buffer = encoder.finish()
self._device.queue.submit([command_buffer])

def get_bitmap(self):
if self._buffer is None: # todo: more explicit state tracking
return None

memoryview_type = self._memoryview_type
plain_stride = self._plain_stride
padded_stride = self._padded_stride

nbytes = plain_stride * self._texture_size[1]
plain_shape = (self._texture_size[1], self._texture_size[0], self._nchannels)

# Download from mappable buffer
# Because we use `copy=False``, we *must* copy the data.
if self._buffer.map_state == "pending":
self._awaitable.sync_wait()
mapped_data = self._buffer.read_mapped(copy=False)

# Copy the data
if padded_stride > plain_stride:
# Copy per row
data = memoryview(bytearray(nbytes)).cast(mapped_data.format)
i_start = 0
for i in range(self._texture_size[1]):
row = mapped_data[i * padded_stride : i * padded_stride + plain_stride]
data[i_start : i_start + plain_stride] = row
i_start += plain_stride
else:
# Copy as a whole
data = memoryview(bytearray(mapped_data)).cast(mapped_data.format)

# Alternative copy solution using Numpy.
# I expected this to be faster, but does not really seem to be. Seems not worth it
# since we technically don't depend on Numpy. Leaving here for reference.
# import numpy as np
# mapped_data = np.asarray(mapped_data)[:data_length]
# data = np.empty(nbytes, dtype=mapped_data.dtype)
# mapped_data.shape = -1, padded_stride
# data.shape = -1, plain_stride
# data[:] = mapped_data[:, :plain_stride]
# data.shape = -1
# data = memoryview(data)

# Since we use read_mapped(copy=False), we must unmap it *after* we've copied the data.
self._buffer.unmap()

# Derive struct dtype from wgpu texture format

# Represent as memory object to avoid numpy dependency
# Equivalent: np.frombuffer(data, np.uint8).reshape(plain_shape)
data = data.cast(memoryview_type, plain_shape)

return data
2 changes: 1 addition & 1 deletion rendercanvas/stub.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def _rc_call_later(self, delay, callback):

class StubCanvasGroup(BaseCanvasGroup):
"""
The ``CanvasGroup`` representss a group of canvas objects from the same class, that share a loop.
The ``CanvasGroup`` represents a group of canvas objects from the same class, that share a loop.

The initial/default loop is passed when the ``CanvasGroup`` is instantiated.

Expand Down
Loading