Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove manual blend stack spilling and rely on scratch memory instead #77

Merged
merged 1 commit into from
Jul 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 0 additions & 17 deletions piet-gpu/shader/coarse.comp
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,6 @@ void main() {
uint part_start_ix = 0;
uint ready_ix = 0;

// Leave room for the fine rasterizer scratch allocation.
Alloc scratch_alloc = slice_mem(cmd_alloc, 0, Alloc_size);
cmd_ref.offset += Alloc_size;

uint num_begin_slots = 0;
uint begin_slot = 0;
bool mem_ok = mem_error == NO_ERROR;
while (true) {
for (uint i = 0; i < N_SLICE; i++) {
Expand Down Expand Up @@ -373,8 +367,6 @@ void main() {
if (clip_depth < 32) {
clip_one_mask &= ~(1 << clip_depth);
}
begin_slot++;
num_begin_slots = max(num_begin_slots, begin_slot);
}
clip_depth++;
break;
Expand All @@ -386,7 +378,6 @@ void main() {
}
Cmd_Solid_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
begin_slot--;
Cmd_EndClip_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
}
Expand Down Expand Up @@ -414,13 +405,5 @@ void main() {
}
if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) {
Cmd_End_write(cmd_alloc, cmd_ref);
if (num_begin_slots > 0) {
// Write scratch allocation: one state per BeginClip per rasterizer chunk.
uint scratch_size = num_begin_slots * TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE * 4;
MallocResult scratch = malloc(scratch_size);
// Ignore scratch.failed; we don't use the allocation and kernel4
// checks for memory overflow before using it.
alloc_write(scratch_alloc, scratch_alloc.offset, scratch.alloc);
}
}
}
Binary file modified piet-gpu/shader/coarse.spv
Binary file not shown.
29 changes: 11 additions & 18 deletions piet-gpu/shader/kernel4.comp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ layout(rgba8, set = 0, binding = 3) uniform restrict readonly image2D images[1];
#include "ptcl.h"
#include "tile.h"

#define MAX_BLEND_STACK 128
mediump vec3 tosRGB(mediump vec3 rgb) {
bvec3 cutoff = greaterThanEqual(rgb, vec3(0.0031308));
mediump vec3 below = vec3(12.92)*rgb;
Expand Down Expand Up @@ -90,13 +91,11 @@ void main() {
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
CmdRef cmd_ref = CmdRef(cmd_alloc.offset);

// Read scrach space allocation, written first in the command list.
Alloc scratch_alloc = alloc_read(cmd_alloc, cmd_ref.offset);
cmd_ref.offset += Alloc_size;

uvec2 xy_uint = uvec2(gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_WorkGroupID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
vec2 xy = vec2(xy_uint);
mediump vec4 rgba[CHUNK];
uint blend_stack[MAX_BLEND_STACK][CHUNK];
mediump float blend_alpha_stack[MAX_BLEND_STACK][CHUNK];
for (uint i = 0; i < CHUNK; i++) {
rgba[i] = vec4(0.0);
// TODO: remove this debug image support when the actual image method is plumbed.
Expand Down Expand Up @@ -208,29 +207,23 @@ void main() {
cmd_ref.offset += 4 + CmdImage_size;
break;
case Cmd_BeginClip:
uint base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX +
gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
for (uint k = 0; k < CHUNK; k++) {
uvec2 offset = chunk_offset(k);
uint srgb = packsRGB(vec4(rgba[k]));
mediump float alpha = clamp(abs(area[k]), 0.0, 1.0);
write_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), srgb);
write_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), floatBitsToUint(alpha));
// We reject any inputs that might overflow in render_ctx.rs.
// The following is a sanity check so we don't corrupt memory should there be malformed inputs.
uint d = min(clip_depth, MAX_BLEND_STACK - 1);
blend_stack[d][k] = packsRGB(vec4(rgba[k]));
blend_alpha_stack[d][k] = clamp(abs(area[k]), 0.0, 1.0);
rgba[k] = vec4(0.0);
}
clip_depth++;
cmd_ref.offset += 4;
break;
case Cmd_EndClip:
clip_depth--;
base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX +
gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
for (uint k = 0; k < CHUNK; k++) {
uvec2 offset = chunk_offset(k);
uint srgb = read_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX));
uint alpha = read_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX));
mediump vec4 bg = unpacksRGB(srgb);
mediump vec4 fg = rgba[k] * area[k] * uintBitsToFloat(alpha);
uint d = min(clip_depth, MAX_BLEND_STACK - 1);
mediump vec4 bg = unpacksRGB(blend_stack[d][k]);
mediump vec4 fg = rgba[k] * area[k] * blend_alpha_stack[d][k];
rgba[k] = bg * (1.0 - fg.a) + fg;
}
cmd_ref.offset += 4;
Expand Down
Binary file modified piet-gpu/shader/kernel4.spv
Binary file not shown.
Binary file modified piet-gpu/shader/kernel4_idx.spv
Binary file not shown.
2 changes: 2 additions & 0 deletions piet-gpu/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ const WIDTH_IN_TILES: usize = 128;
const HEIGHT_IN_TILES: usize = 96;
const PTCL_INITIAL_ALLOC: usize = 1024;

const MAX_BLEND_STACK: usize = 128;

const N_CIRCLES: usize = 0;

pub fn render_svg(rc: &mut impl RenderContext, filename: &str, scale: f64) {
Expand Down
4 changes: 4 additions & 0 deletions piet-gpu/src/render_ctx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use piet::{
Color, Error, FixedGradient, FontFamily, HitTestPoint, ImageFormat, InterpolationMode,
IntoBrush, LineMetric, RenderContext, StrokeStyle, Text, TextLayout, TextLayoutBuilder,
};
use crate::MAX_BLEND_STACK;

use piet_gpu_types::encoder::{Encode, Encoder};
use piet_gpu_types::scene::{
Expand Down Expand Up @@ -211,6 +212,9 @@ impl RenderContext for PietGpuRenderContext {
self.elements.push(Element::BeginClip(Clip {
bbox: Default::default(),
}));
if self.clip_stack.len() >= MAX_BLEND_STACK {
panic!("Maximum clip/blend stack size {} exceeded", MAX_BLEND_STACK);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not convinced this should be a panic. For one, it is actually pretty likely that the clip stack depth in fine rasterization is significantly less than the depth in the scene graph, because of optimizations (all empty or all alpha = 1.0 tiles). But I can see the rationale of catching it here, so the GPU doesn't need to handle overflow cases.

}
self.clip_stack.push(ClipElement {
bbox: None,
begin_ix,
Expand Down