Skip to content

Commit

Permalink
Merge pull request #175 from naivisoftware/gpu_compute_apple_fix
Browse files Browse the repository at this point in the history
GPU workgroup size compute fix macOS
  • Loading branch information
cklosters committed Apr 7, 2022
2 parents f4d9034 + 946559e commit 2c4f4dd
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 12 deletions.
8 changes: 6 additions & 2 deletions demos/computeflocking/data/shaders/flock.comp
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,13 @@
// Workgroup size as specialization constant as per:
// https://www.khronos.org/registry/OpenGL/specs/gl/GLSLangSpec.4.60.html#specialization-constant-qualifier

// When a workgroup size specialization constant is detected, NAP automatically overwrites
// it with the maximum group size of the device on pipeline creation.
// NAP overwrites the workgroup size specialization constant, when detected and not 0, with the maximum group
// size supported by the device on pipeline creation.
layout(local_size_x_id = 0) in;

// When declaring the workgroup size specialization constant to be higher than 0, NAP will not
// override the workgroup size when creating the compute pipeline. Uncommenting this bit and commenting out
// the above will let the shader define the workgroup size.
//layout(local_size_x = 512) in;

struct Boid
Expand Down
3 changes: 0 additions & 3 deletions modules/naprender/data/shaders/constant.vert
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ uniform nap
} mvp;

in vec3 in_Position;
in vec3 in_UV0;

out vec3 passUVs;

void main(void)
{
Expand Down
11 changes: 9 additions & 2 deletions modules/naprender/src/renderservice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1052,9 +1052,16 @@ namespace nap
entry.constantID = static_cast<uint>(const_ids[i]);
entry.offset = static_cast<uint>(spec_entries.size() * sizeof(uint));
entry.size = sizeof(uint);

spec_entries.emplace_back(std::move(entry));
spec_data.emplace_back(computeShader.getWorkGroupSize()[i]);
uint32 work_group_size = computeShader.getWorkGroupSize()[i];
#ifdef __APPLE__
// Clamp work group size for Apple to 512, based on maxTotalThreadsPerThreadgroup,
// which doesn't necessarily match physical device limits, especially on older devices.
// See: https://developer.apple.com/documentation/metal/compute_passes/calculating_threadgroup_and_grid_sizes
// And: https://github.com/KhronosGroup/SPIRV-Cross/issues/837
work_group_size = math::min<uint32>(work_group_size, 512);
#endif // __APPLE__
spec_data.emplace_back(work_group_size);
}
}

Expand Down
6 changes: 1 addition & 5 deletions modules/naprender/src/shader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -866,10 +866,6 @@ namespace nap
if (!parseShaderVariables(comp_shader_compiler, VK_SHADER_STAGE_COMPUTE_BIT, mUBODeclarations, mSSBODeclarations, mSamplerDeclarations, errorState))
return false;

// Query useful compute info
std::array<uint, 3> max_workgroup_size;
std::memcpy(max_workgroup_size.data(), &mRenderService->getPhysicalDeviceProperties().limits.maxComputeWorkGroupSize[0], sizeof(max_workgroup_size));

// Cache workgroup size specialization constants
std::array<spirv_cross::SpecializationConstant, 3> spec_constants;
comp_shader_compiler.get_work_group_size_specialization_constants(spec_constants[0], spec_constants[1], spec_constants[2]);
Expand All @@ -884,7 +880,7 @@ namespace nap
{
// Overwrite workgroup size with quaried maximum supported workgroup size
mWorkGroupSizeConstantIds[i] = spec_constants[i].constant_id;
mWorkGroupSize[i] = max_workgroup_size[i];
mWorkGroupSize[i] = mRenderService->getPhysicalDeviceProperties().limits.maxComputeWorkGroupSize[i];
}
else
{
Expand Down

0 comments on commit 2c4f4dd

Please sign in to comment.