Skip to content

Commit

Permalink
Merge pull request #45 from linebender/clip_scratch
Browse files Browse the repository at this point in the history
Scratch buffer for clip stack
  • Loading branch information
raphlinus authored Nov 25, 2020
2 parents 180047d + a60c2dd commit 4bbc7de
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 20 deletions.
2 changes: 1 addition & 1 deletion piet-gpu-hal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ pub trait CmdBuf<D: Device> {
/// This is readily supported in Vulkan, but for portability it is remarkably
/// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute
/// kernel, or organize the code not to need it.
unsafe fn clear_buffer(&self, buffer: &D::Buffer);
unsafe fn clear_buffer(&self, buffer: &D::Buffer, size: Option<u64>);

unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer);

Expand Down
5 changes: 3 additions & 2 deletions piet-gpu-hal/src/vulkan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -902,9 +902,10 @@ impl crate::CmdBuf<VkDevice> for CmdBuf {
);
}

unsafe fn clear_buffer(&self, buffer: &Buffer) {
unsafe fn clear_buffer(&self, buffer: &Buffer, size: Option<u64>) {
let device = &self.device.device;
device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, vk::WHOLE_SIZE, 0);
let size = size.unwrap_or(vk::WHOLE_SIZE);
device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, size, 0);
}

unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) {
Expand Down
74 changes: 61 additions & 13 deletions piet-gpu/shader/kernel4.comp
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,39 @@ layout(set = 0, binding = 1) buffer TileBuf {
uint[] tile;
};

layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image;
layout(set = 0, binding = 2) buffer ClipScratchBuf {
uint[] clip_scratch;
};

layout(rgba8, set = 0, binding = 3) uniform writeonly image2D image;

#include "ptcl.h"
#include "tile.h"

#define BLEND_STACK_SIZE 4

// Layout of clip_scratch buffer:
// [0] is the alloc bump offset (in units of 32 bit words, initially 0)
// Starting at 1 is a sequence of frames.
// Each frame is WIDTH * HEIGHT 32-bit words, then a link reference.

#define CLIP_LINK_OFFSET (TILE_WIDTH_PX * TILE_HEIGHT_PX)
#define CLIP_BUF_SIZE (CLIP_LINK_OFFSET + 1)

shared uint sh_clip_alloc;

// Allocate a scratch buffer for clipping. Unlike offsets in the rest of the code,
// it counts 32-bit words.
uint alloc_clip_buf(uint link) {
if (gl_LocalInvocationID.x == 0 && gl_LocalInvocationID.y == 0) {
uint alloc = atomicAdd(clip_scratch[0], CLIP_BUF_SIZE) + 1;
sh_clip_alloc = alloc;
clip_scratch[alloc + CLIP_LINK_OFFSET] = link;
}
barrier();
return sh_clip_alloc;
}

// Calculate coverage based on backdrop + coverage of each line segment
float[CHUNK] computeArea(vec2 xy, int backdrop, uint tile_ref) {
// Probably better to store as float, but conversion is no doubt cheap.
Expand Down Expand Up @@ -72,7 +98,9 @@ void main() {
vec3 rgb[CHUNK];
float mask[CHUNK];
uint blend_stack[BLEND_STACK_SIZE][CHUNK];
uint blend_spill = 0;
uint blend_sp = 0;
uint clip_tos = 0;
for (uint i = 0; i < CHUNK; i++) {
rgb[i] = vec3(0.5);
mask[i] = 1.0;
Expand Down Expand Up @@ -142,26 +170,46 @@ void main() {
}
break;
case Cmd_BeginClip:
CmdBeginClip begin_clip = Cmd_BeginClip_read(cmd_ref);
area = computeArea(xy, begin_clip.backdrop, begin_clip.tile_ref);
for (uint k = 0; k < CHUNK; k++) {
blend_stack[blend_sp][k] = packUnorm4x8(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0)));
}
blend_sp++;
break;
case Cmd_BeginSolidClip:
CmdBeginSolidClip begin_solid_clip = Cmd_BeginSolidClip_read(cmd_ref);
float solid_alpha = begin_solid_clip.alpha;
for (uint k = 0; k < CHUNK; k++) {
blend_stack[blend_sp][k] = packUnorm4x8(vec4(rgb[k], solid_alpha));
uint blend_slot = blend_sp % BLEND_STACK_SIZE;
if (blend_sp == blend_spill + BLEND_STACK_SIZE) {
// spill to scratch buffer
clip_tos = alloc_clip_buf(clip_tos);
uint base_ix = clip_tos + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
for (uint k = 0; k < CHUNK; k++) {
clip_scratch[base_ix + k * TILE_WIDTH_PX * CHUNK_DY] = blend_stack[blend_slot][k];
}
blend_spill++;
}
if (tag == Cmd_BeginClip) {
CmdBeginClip begin_clip = Cmd_BeginClip_read(cmd_ref);
area = computeArea(xy, begin_clip.backdrop, begin_clip.tile_ref);
for (uint k = 0; k < CHUNK; k++) {
blend_stack[blend_slot][k] = packUnorm4x8(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0)));
}
} else {
CmdBeginSolidClip begin_solid_clip = Cmd_BeginSolidClip_read(cmd_ref);
float solid_alpha = begin_solid_clip.alpha;
for (uint k = 0; k < CHUNK; k++) {
blend_stack[blend_slot][k] = packUnorm4x8(vec4(rgb[k], solid_alpha));
}
}
blend_sp++;
break;
case Cmd_EndClip:
CmdEndClip end_clip = Cmd_EndClip_read(cmd_ref);
blend_slot = (blend_sp - 1) % BLEND_STACK_SIZE;
if (blend_sp == blend_spill) {
uint base_ix = clip_tos + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
for (uint k = 0; k < CHUNK; k++) {
blend_stack[blend_slot][k] = clip_scratch[base_ix + k * TILE_WIDTH_PX * CHUNK_DY];
}
clip_tos = clip_scratch[clip_tos + CLIP_LINK_OFFSET];
blend_spill--;
}
blend_sp--;
for (uint k = 0; k < CHUNK; k++) {
vec4 rgba = unpackUnorm4x8(blend_stack[blend_sp][k]);
vec4 rgba = unpackUnorm4x8(blend_stack[blend_slot][k]);
rgb[k] = mix(rgba.rgb, rgb[k], end_clip.alpha * rgba.a);
}
break;
Expand Down
Binary file modified piet-gpu/shader/kernel4.spv
Binary file not shown.
46 changes: 42 additions & 4 deletions piet-gpu/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ pub fn render_scene(rc: &mut impl RenderContext) {
5.0,
);
//render_cardioid(rc);
render_tiger(rc);
render_clip_test(rc);
//render_tiger(rc);
}

#[allow(unused)]
Expand All @@ -94,6 +95,33 @@ fn render_cardioid(rc: &mut impl RenderContext) {
rc.stroke(&path, &Color::BLACK, 2.0);
}

#[allow(unused)]
fn render_clip_test(rc: &mut impl RenderContext) {
const N: usize = 16;
const X0: f64 = 50.0;
const Y0: f64 = 50.0;
const X1: f64 = 100.0;
const Y1: f64 = 100.0;
let step = 1.0 / ((N + 1) as f64);
for i in 0..N {
let t = ((i + 1) as f64) * step;
rc.save();
let mut path = BezPath::new();
path.move_to((X0, Y0));
path.line_to((X1, Y0));
path.line_to((X1, Y0 + t * (Y1 - Y0)));
path.line_to((X1 + t * (X0 - X1), Y1));
path.line_to((X0, Y1));
path.close_path();
rc.clip(path);
}
let rect = piet::kurbo::Rect::new(X0, Y0, X1, Y1);
rc.fill(rect, &Color::BLACK);
for _ in 0..N {
rc.restore();
}
}

fn render_tiger(rc: &mut impl RenderContext) {
let xml_str = std::str::from_utf8(include_bytes!("../Ghostscript_Tiger.svg")).unwrap();
let start = std::time::Instant::now();
Expand Down Expand Up @@ -163,6 +191,8 @@ pub struct Renderer {
coarse_alloc_buf_host: hub::Buffer,
coarse_alloc_buf_dev: hub::Buffer,

clip_scratch_buf: hub::Buffer,

k4_pipeline: hub::Pipeline,
k4_ds: hub::DescriptorSet,

Expand Down Expand Up @@ -278,6 +308,8 @@ impl Renderer {
&[],
)?;

let clip_scratch_buf = session.create_buffer(1024 * 1024, dev)?;

let mut coarse_alloc_buf_host = session.create_buffer(8, host)?;
let coarse_alloc_buf_dev = session.create_buffer(8, dev)?;

Expand All @@ -298,10 +330,14 @@ impl Renderer {
)?;

let k4_code = include_bytes!("../shader/kernel4.spv");
let k4_pipeline = session.create_simple_compute_pipeline(k4_code, 2, 1)?;
let k4_pipeline = session.create_simple_compute_pipeline(k4_code, 3, 1)?;
let k4_ds = session.create_descriptor_set(
&k4_pipeline,
&[ptcl_buf.vk_buffer(), tile_buf.vk_buffer()],
&[
ptcl_buf.vk_buffer(),
tile_buf.vk_buffer(),
clip_scratch_buf.vk_buffer(),
],
&[image_dev.vk_image()],
)?;

Expand Down Expand Up @@ -335,6 +371,7 @@ impl Renderer {
bin_alloc_buf_dev,
coarse_alloc_buf_host,
coarse_alloc_buf_dev,
clip_scratch_buf,
n_elements,
n_paths,
n_pathseg,
Expand All @@ -355,7 +392,8 @@ impl Renderer {
self.coarse_alloc_buf_host.vk_buffer(),
self.coarse_alloc_buf_dev.vk_buffer(),
);
cmd_buf.clear_buffer(self.state_buf.vk_buffer());
cmd_buf.clear_buffer(self.state_buf.vk_buffer(), None);
cmd_buf.clear_buffer(self.clip_scratch_buf.vk_buffer(), Some(4));
cmd_buf.memory_barrier();
cmd_buf.image_barrier(
self.image_dev.vk_image(),
Expand Down

0 comments on commit 4bbc7de

Please sign in to comment.