Skip to content

Commit

Permalink
Merge pull request #138 from linebender/new_element
Browse files Browse the repository at this point in the history
Beginnings of new element pipeline
  • Loading branch information
raphlinus committed Dec 15, 2021
2 parents 22b8607 + 9601989 commit 41c7118
Show file tree
Hide file tree
Showing 106 changed files with 10,488 additions and 1,352 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
**/shader/* linguist-language=glsl
**/shader/gen/* linguist-generated

7 changes: 5 additions & 2 deletions piet-gpu-hal/src/dx12.rs
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,7 @@ impl crate::backend::Device for Dx12Device {
Flags: d3d12::D3D12_PIPELINE_STATE_FLAG_NONE,
};
let pipeline_state = self.device.create_compute_pipeline_state(&desc)?;

Ok(Pipeline {
pipeline_state,
root_signature,
Expand Down Expand Up @@ -725,8 +726,10 @@ impl crate::backend::DescriptorSetBuilder<Dx12Device> for DescriptorSetBuilder {
}
}

fn add_textures(&mut self, _images: &[&Image]) {
todo!()
fn add_textures(&mut self, images: &[&Image]) {
for img in images {
self.handles.push(img.cpu_ref.as_ref().unwrap().handle());
}
}

unsafe fn build(
Expand Down
5 changes: 3 additions & 2 deletions piet-gpu-hal/src/metal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,8 @@ impl crate::backend::Device for MtlDevice {
}

unsafe fn destroy_image(&self, _image: &Self::Image) -> Result<(), Error> {
todo!()
// TODO figure out what we want to do here
Ok(())
}

unsafe fn create_compute_pipeline(
Expand Down Expand Up @@ -429,7 +430,7 @@ impl crate::backend::CmdBuf<MtlDevice> for CmdBuf {
encoder.set_buffer(buf_ix, Some(&buffer.buffer), 0);
buf_ix += 1;
}
let mut img_ix = 0;
let mut img_ix = buf_ix;
for image in &descriptor_set.images {
encoder.set_texture(img_ix, Some(&image.texture));
img_ix += 1;
Expand Down
32 changes: 17 additions & 15 deletions piet-gpu/bin/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::path::Path;

use clap::{App, Arg};

use piet_gpu_hal::{BufferUsage, Error, Instance, Session};
use piet_gpu_hal::{BufferUsage, Error, Instance, InstanceFlags, Session};

use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer};

Expand Down Expand Up @@ -226,7 +226,7 @@ fn main() -> Result<(), Error> {
.takes_value(true),
)
.get_matches();
let (instance, _) = Instance::new(None, Default::default())?;
let (instance, _) = Instance::new(None, InstanceFlags::default())?;
unsafe {
let device = instance.device(None)?;
let session = Session::new(device);
Expand Down Expand Up @@ -256,29 +256,31 @@ fn main() -> Result<(), Error> {
cmd_buf.begin();
renderer.record(&mut cmd_buf, &query_pool, 0);
cmd_buf.copy_image_to_buffer(&renderer.image_dev, &image_buf);
cmd_buf.finish_timestamps(&query_pool);
cmd_buf.host_barrier();
cmd_buf.finish();
let start = std::time::Instant::now();
let submitted = session.run_cmd_buf(cmd_buf, &[], &[])?;
submitted.wait()?;
println!("elapsed = {:?}", start.elapsed());
let ts = session.fetch_query_pool(&query_pool).unwrap();
println!("Element kernel time: {:.3}ms", ts[0] * 1e3);
println!(
"Tile allocation kernel time: {:.3}ms",
(ts[1] - ts[0]) * 1e3
);
println!("Coarse path kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3);
println!("Backdrop kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
println!("Binning kernel time: {:.3}ms", (ts[4] - ts[3]) * 1e3);
println!("Coarse raster kernel time: {:.3}ms", (ts[5] - ts[4]) * 1e3);
println!("Render kernel time: {:.3}ms", (ts[6] - ts[5]) * 1e3);
if !ts.is_empty() {
println!("Element kernel time: {:.3}ms", ts[0] * 1e3);
println!(
"Tile allocation kernel time: {:.3}ms",
(ts[1] - ts[0]) * 1e3
);
println!("Coarse path kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3);
println!("Backdrop kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
println!("Binning kernel time: {:.3}ms", (ts[4] - ts[3]) * 1e3);
println!("Coarse raster kernel time: {:.3}ms", (ts[5] - ts[4]) * 1e3);
println!("Render kernel time: {:.3}ms", (ts[6] - ts[5]) * 1e3);
}

/*
let mut data: Vec<u32> = Default::default();
renderer.tile_buf.read(&mut data).unwrap();
piet_gpu::dump_k1_data(&data);
trace_ptcl(&data);
renderer.memory_buf_dev.read(&mut data).unwrap();
piet_gpu::dump_k1_data(&data[2..]);
*/

let mut img_data: Vec<u8> = Default::default();
Expand Down
24 changes: 13 additions & 11 deletions piet-gpu/bin/winit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,19 @@ fn main() -> Result<(), Error> {
if let Some(submitted) = submitted[frame_idx].take() {
cmd_bufs[frame_idx] = submitted.wait().unwrap();
let ts = session.fetch_query_pool(&query_pools[frame_idx]).unwrap();
info_string = format!(
"{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms",
ts[6] * 1e3,
ts[0] * 1e3,
(ts[1] - ts[0]) * 1e3,
(ts[2] - ts[1]) * 1e3,
(ts[3] - ts[2]) * 1e3,
(ts[4] - ts[3]) * 1e3,
(ts[5] - ts[4]) * 1e3,
(ts[6] - ts[5]) * 1e3,
);
if !ts.is_empty() {
info_string = format!(
"{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms",
ts[6] * 1e3,
ts[0] * 1e3,
(ts[1] - ts[0]) * 1e3,
(ts[2] - ts[1]) * 1e3,
(ts[3] - ts[2]) * 1e3,
(ts[4] - ts[3]) * 1e3,
(ts[5] - ts[4]) * 1e3,
(ts[6] - ts[5]) * 1e3,
);
}
}

let mut ctx = PietGpuRenderContext::new();
Expand Down
6 changes: 3 additions & 3 deletions piet-gpu/shader/backdrop.comp
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ void main() {
// Prefix sum of sh_row_count
for (uint i = 0; i < LG_BACKDROP_WG; i++) {
barrier();
if (gl_LocalInvocationID.y == 0 && th_ix >= (1 << i)) {
row_count += sh_row_count[th_ix - (1 << i)];
if (gl_LocalInvocationID.y == 0 && th_ix >= (1u << i)) {
row_count += sh_row_count[th_ix - (1u << i)];
}
barrier();
if (gl_LocalInvocationID.y == 0) {
Expand All @@ -102,7 +102,7 @@ void main() {
// Binary search to find element
uint el_ix = 0;
for (uint i = 0; i < LG_BACKDROP_WG; i++) {
uint probe = el_ix + ((BACKDROP_WG / 2) >> i);
uint probe = el_ix + (uint(BACKDROP_WG / 2) >> i);
if (row >= sh_row_count[probe - 1]) {
el_ix = probe;
}
Expand Down
4 changes: 2 additions & 2 deletions piet-gpu/shader/bbox_clear.comp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ layout(binding = 1) readonly buffer ConfigBuf {

void main() {
uint ix = gl_GlobalInvocationID.x;
if (ix < conf.n_elements) {
uint out_ix = (conf.bbox_alloc.offset >> 2) + 4 * ix;
if (ix < conf.n_path) {
uint out_ix = (conf.bbox_alloc.offset >> 2) + 6 * ix;
memory[out_ix] = 0xffff;
memory[out_ix + 1] = 0xffff;
memory[out_ix + 2] = 0;
Expand Down
2 changes: 1 addition & 1 deletion piet-gpu/shader/binning.comp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ void main() {
if (x0 == x1) y1 = y0;
int x = x0, y = y0;
uint my_slice = gl_LocalInvocationID.x / 32;
uint my_mask = 1 << (gl_LocalInvocationID.x & 31);
uint my_mask = 1u << (gl_LocalInvocationID.x & 31);
while (y < y1) {
atomicOr(bitmaps[my_slice][y * width_in_bins + x], my_mask);
x++;
Expand Down
48 changes: 33 additions & 15 deletions piet-gpu/shader/build.ninja
Original file line number Diff line number Diff line change
Expand Up @@ -14,31 +14,49 @@ rule glsl
command = $glslang_validator $flags -V -o $out $in

rule hlsl
command = $spirv_cross --hlsl $in --output $out
command = $spirv_cross --hlsl --shader-model 60 $in --output $out

rule dxil
command = $dxc -T cs_6_0 $in -Fo $out

rule msl
command = $spirv_cross --msl $in --output $out $msl_flags

build gen/binning.spv: glsl binning.comp | annotated.h state.h bins.h setup.h mem.h
build gen/binning.hlsl: hlsl gen/binning.spv
build gen/binning.dxil: dxil gen/binning.hlsl
build gen/binning.msl: msl gen/binning.spv

build elements.spv: glsl elements.comp | scene.h state.h annotated.h
build gen/tile_alloc.spv: glsl tile_alloc.comp | annotated.h tile.h setup.h
build gen/tile_alloc.hlsl: hlsl gen/tile_alloc.spv
build gen/tile_alloc.dxil: dxil gen/tile_alloc.hlsl
build gen/tile_alloc.msl: msl gen/tile_alloc.spv

build binning.spv: glsl binning.comp | annotated.h state.h bins.h setup.h
build gen/path_coarse.spv: glsl path_coarse.comp | annotated.h pathseg.h tile.h setup.h
build gen/path_coarse.hlsl: hlsl gen/path_coarse.spv
build gen/path_coarse.dxil: dxil gen/path_coarse.hlsl
build gen/path_coarse.msl: msl gen/path_coarse.spv

build tile_alloc.spv: glsl tile_alloc.comp | annotated.h tile.h setup.h
build gen/backdrop.spv: glsl backdrop.comp | annotated.h tile.h setup.h
build gen/backdrop.hlsl: hlsl gen/backdrop.spv
build gen/backdrop.dxil: dxil gen/backdrop.hlsl
build gen/backdrop.msl: msl gen/backdrop.spv

build path_coarse.spv: glsl path_coarse.comp | annotated.h pathseg.h tile.h setup.h

build backdrop.spv: glsl backdrop.comp | annotated.h tile.h setup.h

build backdrop_lg.spv: glsl backdrop.comp | annotated.h tile.h setup.h
build gen/backdrop_lg.spv: glsl backdrop.comp | annotated.h tile.h setup.h
flags = -DBACKDROP_DIST_FACTOR=4
build gen/backdrop_lg.hlsl: hlsl gen/backdrop_lg.spv
build gen/backdrop_lg.dxil: dxil gen/backdrop_lg.hlsl
build gen/backdrop_lg.msl: msl gen/backdrop_lg.spv

build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
build gen/coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
build gen/coarse.hlsl: hlsl gen/coarse.spv
build gen/coarse.dxil: dxil gen/coarse.hlsl
build gen/coarse.msl: msl gen/coarse.spv

build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h
build gen/kernel4.spv: glsl kernel4.comp | ptcl.h setup.h
build gen/kernel4.hlsl: hlsl gen/kernel4.spv
build gen/kernel4.dxil: dxil gen/kernel4.hlsl
build gen/kernel4.msl: msl gen/kernel4.spv

# New element pipeline follows

Expand All @@ -47,7 +65,7 @@ build gen/transform_reduce.hlsl: hlsl gen/transform_reduce.spv
build gen/transform_reduce.dxil: dxil gen/transform_reduce.hlsl
build gen/transform_reduce.msl: msl gen/transform_reduce.spv

build gen/transform_root.spv: glsl transform_scan.comp
build gen/transform_root.spv: glsl transform_scan.comp | setup.h
flags = -DROOT
build gen/transform_root.hlsl: hlsl gen/transform_root.spv
build gen/transform_root.dxil: dxil gen/transform_root.hlsl
Expand All @@ -63,7 +81,7 @@ build gen/pathtag_reduce.hlsl: hlsl gen/pathtag_reduce.spv
build gen/pathtag_reduce.dxil: dxil gen/pathtag_reduce.hlsl
build gen/pathtag_reduce.msl: msl gen/pathtag_reduce.spv

build gen/pathtag_root.spv: glsl pathtag_scan.comp | pathtag.h
build gen/pathtag_root.spv: glsl pathtag_scan.comp | pathtag.h setup.h
flags = -DROOT
build gen/pathtag_root.hlsl: hlsl gen/pathtag_root.spv
build gen/pathtag_root.dxil: dxil gen/pathtag_root.hlsl
Expand All @@ -84,13 +102,13 @@ build gen/draw_reduce.hlsl: hlsl gen/draw_reduce.spv
build gen/draw_reduce.dxil: dxil gen/draw_reduce.hlsl
build gen/draw_reduce.msl: msl gen/draw_reduce.spv

build gen/draw_root.spv: glsl draw_scan.comp | drawtag.h
build gen/draw_root.spv: glsl draw_scan.comp | drawtag.h setup.h
flags = -DROOT
build gen/draw_root.hlsl: hlsl gen/draw_root.spv
build gen/draw_root.dxil: dxil gen/draw_root.hlsl
build gen/draw_root.msl: msl gen/draw_root.spv

build gen/draw_leaf.spv: glsl draw_leaf.comp | scene.h drawtag.h setup.h mem.h
build gen/draw_leaf.spv: glsl draw_leaf.comp | scene.h drawtag.h annotated.h setup.h mem.h
build gen/draw_leaf.hlsl: hlsl gen/draw_leaf.spv
build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl
build gen/draw_leaf.msl: msl gen/draw_leaf.spv
20 changes: 10 additions & 10 deletions piet-gpu/shader/coarse.comp
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,8 @@ void main() {
}
barrier();
if (th_ix < N_PART_READ) {
if (th_ix >= (1 << i)) {
count += sh_part_count[th_ix - (1 << i)];
if (th_ix >= (1u << i)) {
count += sh_part_count[th_ix - (1u << i)];
}
}
barrier();
Expand All @@ -190,7 +190,7 @@ void main() {
if (ix >= wr_ix && ix < ready_ix && mem_ok) {
uint part_ix = 0;
for (uint i = 0; i < LG_N_PART_READ; i++) {
uint probe = part_ix + ((N_PART_READ / 2) >> i);
uint probe = part_ix + (uint(N_PART_READ / 2) >> i);
if (ix >= sh_part_count[probe - 1]) {
part_ix = probe;
}
Expand Down Expand Up @@ -257,8 +257,8 @@ void main() {
sh_tile_count[th_ix] = tile_count;
for (uint i = 0; i < LG_N_TILE; i++) {
barrier();
if (th_ix >= (1 << i)) {
tile_count += sh_tile_count[th_ix - (1 << i)];
if (th_ix >= (1u << i)) {
tile_count += sh_tile_count[th_ix - (1u << i)];
}
barrier();
sh_tile_count[th_ix] = tile_count;
Expand All @@ -269,7 +269,7 @@ void main() {
// Binary search to find element
uint el_ix = 0;
for (uint i = 0; i < LG_N_TILE; i++) {
uint probe = el_ix + ((N_TILE / 2) >> i);
uint probe = el_ix + (uint(N_TILE / 2) >> i);
if (ix >= sh_tile_count[probe - 1]) {
el_ix = probe;
}
Expand All @@ -292,7 +292,7 @@ void main() {
}
if (include_tile) {
uint el_slice = el_ix / 32;
uint el_mask = 1 << (el_ix & 31);
uint el_mask = 1u << (el_ix & 31);
atomicOr(sh_bitmaps[el_slice][y * N_TILE_X + x], el_mask);
}
}
Expand Down Expand Up @@ -372,7 +372,7 @@ void main() {
if (tile.tile.offset == 0 && tile.backdrop == 0) {
clip_zero_depth = clip_depth + 1;
} else if (tile.tile.offset == 0 && clip_depth < 32) {
clip_one_mask |= (1 << clip_depth);
clip_one_mask |= (1u << clip_depth);
} else {
AnnoBeginClip begin_clip = Annotated_BeginClip_read(conf.anno_alloc, ref);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
Expand All @@ -382,14 +382,14 @@ void main() {
Cmd_BeginClip_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
if (clip_depth < 32) {
clip_one_mask &= ~(1 << clip_depth);
clip_one_mask &= ~(1u << clip_depth);
}
}
clip_depth++;
break;
case Annotated_EndClip:
clip_depth--;
if (clip_depth >= 32 || (clip_one_mask & (1 << clip_depth)) == 0) {
if (clip_depth >= 32 || (clip_one_mask & (1u << clip_depth)) == 0) {
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
Expand Down

0 comments on commit 41c7118

Please sign in to comment.