Skip to content

Commit

Permalink
unify GPU memory management
Browse files Browse the repository at this point in the history
Merge all static and dynamic buffers to just one, "memory". Add a malloc
function for dynamic allocations.

Unify static allocation offsets into a "config" buffer containing scene setup
(number of paths, number of path segments), as well as the memory offsets of
the static allocations.

Finally, set an overflow flag when an allocation fail, and make sure to exit
shader execution as soon as that triggers. Add checks before beginning
execution in case the client wants to run two or more shaders before checking
the flag.

The "state" buffer is left alone because it needs zero'ing and because it is
accessed with the "volatile" keyword.

Fixes #40

Signed-off-by: Elias Naur <mail@eliasnaur.com>
  • Loading branch information
eliasnaur committed Dec 27, 2020
1 parent a2a2d12 commit 4de67d9
Show file tree
Hide file tree
Showing 23 changed files with 463 additions and 448 deletions.
13 changes: 9 additions & 4 deletions piet-gpu-derive/src/glsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,22 @@ pub fn gen_glsl(module: &LayoutModule) -> String {

for name in &module.def_names {
let def = module.defs.get(name).unwrap();
let mem = &"memory".to_owned();
let mut buf_name = &module.name;
if !module.name.eq(&"state") && !module.name.eq(&"scene") {
buf_name = mem;
}
match def {
(_size, LayoutTypeDef::Struct(fields)) => {
gen_struct_read(&mut r, &module.name, &name, fields);
gen_struct_read(&mut r, buf_name, &name, fields);
if module.gpu_write {
gen_struct_write(&mut r, &module.name, &name, fields);
gen_struct_write(&mut r, buf_name, &name, fields);
}
}
(_size, LayoutTypeDef::Enum(en)) => {
gen_enum_read(&mut r, &module.name, &name, en);
gen_enum_read(&mut r, buf_name, &name, en);
if module.gpu_write {
gen_enum_write(&mut r, &module.name, &name, en);
gen_enum_write(&mut r, buf_name, &name, en);
}
}
}
Expand Down
72 changes: 36 additions & 36 deletions piet-gpu/shader/annotated.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ AnnotatedRef Annotated_index(AnnotatedRef ref, uint index) {

AnnoFill AnnoFill_read(AnnoFillRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = annotated[ix + 0];
uint raw1 = annotated[ix + 1];
uint raw2 = annotated[ix + 2];
uint raw3 = annotated[ix + 3];
uint raw4 = annotated[ix + 4];
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw2 = memory[ix + 2];
uint raw3 = memory[ix + 3];
uint raw4 = memory[ix + 4];
AnnoFill s;
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.rgba_color = raw4;
Expand All @@ -77,21 +77,21 @@ AnnoFill AnnoFill_read(AnnoFillRef ref) {

void AnnoFill_write(AnnoFillRef ref, AnnoFill s) {
uint ix = ref.offset >> 2;
annotated[ix + 0] = floatBitsToUint(s.bbox.x);
annotated[ix + 1] = floatBitsToUint(s.bbox.y);
annotated[ix + 2] = floatBitsToUint(s.bbox.z);
annotated[ix + 3] = floatBitsToUint(s.bbox.w);
annotated[ix + 4] = s.rgba_color;
memory[ix + 0] = floatBitsToUint(s.bbox.x);
memory[ix + 1] = floatBitsToUint(s.bbox.y);
memory[ix + 2] = floatBitsToUint(s.bbox.z);
memory[ix + 3] = floatBitsToUint(s.bbox.w);
memory[ix + 4] = s.rgba_color;
}

AnnoStroke AnnoStroke_read(AnnoStrokeRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = annotated[ix + 0];
uint raw1 = annotated[ix + 1];
uint raw2 = annotated[ix + 2];
uint raw3 = annotated[ix + 3];
uint raw4 = annotated[ix + 4];
uint raw5 = annotated[ix + 5];
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw2 = memory[ix + 2];
uint raw3 = memory[ix + 3];
uint raw4 = memory[ix + 4];
uint raw5 = memory[ix + 5];
AnnoStroke s;
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.rgba_color = raw4;
Expand All @@ -101,35 +101,35 @@ AnnoStroke AnnoStroke_read(AnnoStrokeRef ref) {

void AnnoStroke_write(AnnoStrokeRef ref, AnnoStroke s) {
uint ix = ref.offset >> 2;
annotated[ix + 0] = floatBitsToUint(s.bbox.x);
annotated[ix + 1] = floatBitsToUint(s.bbox.y);
annotated[ix + 2] = floatBitsToUint(s.bbox.z);
annotated[ix + 3] = floatBitsToUint(s.bbox.w);
annotated[ix + 4] = s.rgba_color;
annotated[ix + 5] = floatBitsToUint(s.linewidth);
memory[ix + 0] = floatBitsToUint(s.bbox.x);
memory[ix + 1] = floatBitsToUint(s.bbox.y);
memory[ix + 2] = floatBitsToUint(s.bbox.z);
memory[ix + 3] = floatBitsToUint(s.bbox.w);
memory[ix + 4] = s.rgba_color;
memory[ix + 5] = floatBitsToUint(s.linewidth);
}

AnnoClip AnnoClip_read(AnnoClipRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = annotated[ix + 0];
uint raw1 = annotated[ix + 1];
uint raw2 = annotated[ix + 2];
uint raw3 = annotated[ix + 3];
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw2 = memory[ix + 2];
uint raw3 = memory[ix + 3];
AnnoClip s;
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
return s;
}

void AnnoClip_write(AnnoClipRef ref, AnnoClip s) {
uint ix = ref.offset >> 2;
annotated[ix + 0] = floatBitsToUint(s.bbox.x);
annotated[ix + 1] = floatBitsToUint(s.bbox.y);
annotated[ix + 2] = floatBitsToUint(s.bbox.z);
annotated[ix + 3] = floatBitsToUint(s.bbox.w);
memory[ix + 0] = floatBitsToUint(s.bbox.x);
memory[ix + 1] = floatBitsToUint(s.bbox.y);
memory[ix + 2] = floatBitsToUint(s.bbox.z);
memory[ix + 3] = floatBitsToUint(s.bbox.w);
}

uint Annotated_tag(AnnotatedRef ref) {
return annotated[ref.offset >> 2];
return memory[ref.offset >> 2];
}

AnnoStroke Annotated_Stroke_read(AnnotatedRef ref) {
Expand All @@ -149,26 +149,26 @@ AnnoClip Annotated_EndClip_read(AnnotatedRef ref) {
}

void Annotated_Nop_write(AnnotatedRef ref) {
annotated[ref.offset >> 2] = Annotated_Nop;
memory[ref.offset >> 2] = Annotated_Nop;
}

void Annotated_Stroke_write(AnnotatedRef ref, AnnoStroke s) {
annotated[ref.offset >> 2] = Annotated_Stroke;
memory[ref.offset >> 2] = Annotated_Stroke;
AnnoStroke_write(AnnoStrokeRef(ref.offset + 4), s);
}

void Annotated_Fill_write(AnnotatedRef ref, AnnoFill s) {
annotated[ref.offset >> 2] = Annotated_Fill;
memory[ref.offset >> 2] = Annotated_Fill;
AnnoFill_write(AnnoFillRef(ref.offset + 4), s);
}

void Annotated_BeginClip_write(AnnotatedRef ref, AnnoClip s) {
annotated[ref.offset >> 2] = Annotated_BeginClip;
memory[ref.offset >> 2] = Annotated_BeginClip;
AnnoClip_write(AnnoClipRef(ref.offset + 4), s);
}

void Annotated_EndClip_write(AnnotatedRef ref, AnnoClip s) {
annotated[ref.offset >> 2] = Annotated_EndClip;
memory[ref.offset >> 2] = Annotated_EndClip;
AnnoClip_write(AnnoClipRef(ref.offset + 4), s);
}

34 changes: 13 additions & 21 deletions piet-gpu/shader/backdrop.comp
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,15 @@
#extension GL_GOOGLE_include_directive : enable

#include "setup.h"
#include "mem.h"

#define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
#define BACKDROP_WG (1 << LG_BACKDROP_WG)

layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;

layout(set = 0, binding = 0) buffer AnnotatedBuf {
uint[] annotated;
};

// This is really only used for n_elements; maybe we can handle that
// a different way, but it's convenient to have the same signature as
// tile allocation.
layout(set = 0, binding = 1) readonly buffer AllocBuf {
uint n_elements; // paths
uint n_pathseg;
uint alloc;
};

layout(set = 0, binding = 2) buffer TileBuf {
uint[] tile;
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf;
};

#include "annotated.h"
Expand All @@ -47,18 +35,22 @@ shared uint sh_row_base[BACKDROP_WG];
shared uint sh_row_width[BACKDROP_WG];

void main() {
if (mem_overflow) {
return;
}

uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size);
AnnotatedRef ref = AnnotatedRef(conf.anno_base + element_ix * Annotated_size);

// Work assignment: 1 thread : 1 path element
uint row_count = 0;
if (element_ix < n_elements) {
if (element_ix < conf.n_elements) {
uint tag = Annotated_tag(ref);
switch (tag) {
case Annotated_Fill:
case Annotated_BeginClip:
PathRef path_ref = PathRef(element_ix * Path_size);
PathRef path_ref = PathRef(conf.tile_base + element_ix * Path_size);
Path path = Path_read(path_ref);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
Expand Down Expand Up @@ -98,11 +90,11 @@ void main() {
// Process one row sequentially
// Read backdrop value per tile and prefix sum it
uint tile_el_ix = sh_row_base[el_ix] + seq_ix * 2 * width;
uint sum = tile[tile_el_ix];
uint sum = memory[tile_el_ix];
for (uint x = 1; x < width; x++) {
tile_el_ix += 2;
sum += tile[tile_el_ix];
tile[tile_el_ix] = sum;
sum += memory[tile_el_ix];
memory[tile_el_ix] = sum;
}
}
}
Binary file modified piet-gpu/shader/backdrop.spv
Binary file not shown.
45 changes: 26 additions & 19 deletions piet-gpu/shader/binning.comp
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,12 @@
#extension GL_GOOGLE_include_directive : enable

#include "setup.h"
#include "mem.h"

layout(local_size_x = N_TILE, local_size_y = 1) in;

layout(set = 0, binding = 0) buffer AnnotatedBuf {
uint[] annotated;
};

layout(set = 0, binding = 1) buffer AllocBuf {
uint n_elements; // paths
uint alloc;
};

layout(set = 0, binding = 2) buffer BinsBuf {
uint[] bins;
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf;
};

#include "annotated.h"
Expand All @@ -41,19 +33,27 @@ layout(set = 0, binding = 2) buffer BinsBuf {
shared uint bitmaps[N_SLICE][N_TILE];
shared uint count[N_SLICE][N_TILE];
shared uint sh_chunk_start[N_TILE];
shared bool sh_alloc_failed;

void main() {
uint my_n_elements = n_elements;
if (mem_overflow) {
return;
}

uint my_n_elements = conf.n_elements;
uint my_partition = gl_WorkGroupID.x;

for (uint i = 0; i < N_SLICE; i++) {
bitmaps[i][gl_LocalInvocationID.x] = 0;
}
if (gl_LocalInvocationID.x == 0) {
sh_alloc_failed = false;
}
barrier();

// Read inputs and determine coverage of bins
uint element_ix = my_partition * N_TILE + gl_LocalInvocationID.x;
AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size);
AnnotatedRef ref = AnnotatedRef(conf.anno_base + element_ix * Annotated_size);
uint tag = Annotated_Nop;
if (element_ix < my_n_elements) {
tag = Annotated_tag(ref);
Expand Down Expand Up @@ -103,19 +103,26 @@ void main() {
count[i][gl_LocalInvocationID.x] = element_count;
}
// element_count is number of elements covering bin for this invocation.
uint chunk_start = 0;
Alloc chunk_alloc = Alloc(0, false);
if (element_count != 0) {
// TODO: aggregate atomic adds (subgroup is probably fastest)
chunk_start = atomicAdd(alloc, element_count * BinInstance_size);
sh_chunk_start[gl_LocalInvocationID.x] = chunk_start;
chunk_alloc = malloc(element_count * BinInstance_size);
sh_chunk_start[gl_LocalInvocationID.x] = chunk_alloc.offset;
if (chunk_alloc.failed) {
sh_alloc_failed = true;
}
}
// Note: it might be more efficient for reading to do this in the
// other order (each bin is a contiguous sequence of partitions)
uint out_ix = (my_partition * N_TILE + gl_LocalInvocationID.x) * 2;
bins[out_ix] = element_count;
bins[out_ix + 1] = chunk_start;
uint out_ix = (conf.bin_base >> 2) + (my_partition * N_TILE + gl_LocalInvocationID.x) * 2;
memory[out_ix] = element_count;
memory[out_ix + 1] = chunk_alloc.offset;

barrier();
if (sh_alloc_failed) {
return;
}

// Use similar strategy as Laine & Karras paper; loop over bbox of bins
// touched by this element
x = x0;
Expand Down
Binary file modified piet-gpu/shader/binning.spv
Binary file not shown.
4 changes: 2 additions & 2 deletions piet-gpu/shader/bins.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) {

BinInstance BinInstance_read(BinInstanceRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = bins[ix + 0];
uint raw0 = memory[ix + 0];
BinInstance s;
s.element_ix = raw0;
return s;
}

void BinInstance_write(BinInstanceRef ref, BinInstance s) {
uint ix = ref.offset >> 2;
bins[ix + 0] = s.element_ix;
memory[ix + 0] = s.element_ix;
}

Loading

0 comments on commit 4de67d9

Please sign in to comment.