Skip to content

Commit

Permalink
GPUDevice: Support geometry shaders
Browse files Browse the repository at this point in the history
  • Loading branch information
stenzek committed Sep 2, 2023
1 parent 4fc984e commit e804b5e
Show file tree
Hide file tree
Showing 22 changed files with 121 additions and 44 deletions.
1 change: 1 addition & 0 deletions src/core/gpu.cpp
Expand Up @@ -1591,6 +1591,7 @@ bool GPU::CompileDisplayPipeline()

plconfig.vertex_shader = vso.get();
plconfig.fragment_shader = fso.get();
plconfig.geometry_shader = nullptr;
if (!(m_display_pipeline = g_gpu_device->CreatePipeline(plconfig)))
return false;
GL_OBJECT_NAME(m_display_pipeline, "Display Pipeline [%s]",
Expand Down
1 change: 1 addition & 0 deletions src/core/gpu_hw.cpp
Expand Up @@ -680,6 +680,7 @@ bool GPU_HW::CompilePipelines()
plconfig.depth_format = VRAM_DS_FORMAT;
plconfig.samples = m_multisamples;
plconfig.per_sample_shading = m_per_sample_shading;
plconfig.geometry_shader = nullptr;

// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
for (u8 depth_test = 0; depth_test < 3; depth_test++)
Expand Down
4 changes: 2 additions & 2 deletions src/core/shader_cache_version.h
@@ -1,7 +1,7 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)

#pragma once
#include "common/types.h"

static constexpr u32 SHADER_CACHE_VERSION = 9;
static constexpr u32 SHADER_CACHE_VERSION = 10;
1 change: 1 addition & 0 deletions src/util/d3d11_device.cpp
Expand Up @@ -173,6 +173,7 @@ void D3D11Device::SetFeatures()
m_features.noperspective_interpolation = true;
m_features.supports_texture_buffers = true;
m_features.texture_buffers_emulated_with_ssbo = false;
m_features.geometry_shaders = true;
m_features.partial_msaa_resolve = false;
m_features.gpu_timing = true;
m_features.shader_cache = true;
Expand Down
1 change: 1 addition & 0 deletions src/util/d3d11_device.h
Expand Up @@ -183,6 +183,7 @@ class D3D11Device final : public GPUDevice

ID3D11InputLayout* m_current_input_layout = nullptr;
ID3D11VertexShader* m_current_vertex_shader = nullptr;
ID3D11GeometryShader* m_current_geometry_shader = nullptr;
ID3D11PixelShader* m_current_pixel_shader = nullptr;
ID3D11RasterizerState* m_current_rasterizer_state = nullptr;
ID3D11DepthStencilState* m_current_depth_state = nullptr;
Expand Down
36 changes: 27 additions & 9 deletions src/util/d3d11_pipeline.cpp
Expand Up @@ -34,6 +34,12 @@ ID3D11PixelShader* D3D11Shader::GetPixelShader() const
return static_cast<ID3D11PixelShader*>(m_shader.Get());
}

ID3D11GeometryShader* D3D11Shader::GetGeometryShader() const
{
DebugAssert(m_stage == GPUShaderStage::Geometry);
return static_cast<ID3D11GeometryShader*>(m_shader.Get());
}

ID3D11ComputeShader* D3D11Shader::GetComputeShader() const
{
DebugAssert(m_stage == GPUShaderStage::Compute);
Expand Down Expand Up @@ -64,6 +70,11 @@ std::unique_ptr<GPUShader> D3D11Device::CreateShaderFromBinary(GPUShaderStage st
reinterpret_cast<ID3D11PixelShader**>(shader.GetAddressOf()));
break;

case GPUShaderStage::Geometry:
hr = m_device->CreateGeometryShader(data.data(), data.size(), nullptr,
reinterpret_cast<ID3D11GeometryShader**>(shader.GetAddressOf()));
break;

case GPUShaderStage::Compute:
hr = m_device->CreateComputeShader(data.data(), data.size(), nullptr,
reinterpret_cast<ID3D11ComputeShader**>(shader.GetAddressOf()));
Expand Down Expand Up @@ -98,11 +109,11 @@ std::unique_ptr<GPUShader> D3D11Device::CreateShaderFromSource(GPUShaderStage st

D3D11Pipeline::D3D11Pipeline(ComPtr<ID3D11RasterizerState> rs, ComPtr<ID3D11DepthStencilState> ds,
ComPtr<ID3D11BlendState> bs, ComPtr<ID3D11InputLayout> il, ComPtr<ID3D11VertexShader> vs,
ComPtr<ID3D11PixelShader> ps, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride,
u32 blend_factor)
ComPtr<ID3D11GeometryShader> gs, ComPtr<ID3D11PixelShader> ps,
D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor)
: m_rs(std::move(rs)), m_ds(std::move(ds)), m_bs(std::move(bs)), m_il(std::move(il)), m_vs(std::move(vs)),
m_ps(std::move(ps)), m_topology(topology), m_vertex_stride(vertex_stride), m_blend_factor(blend_factor),
m_blend_factor_float(GPUDevice::RGBA8ToFloat(blend_factor))
m_gs(std::move(gs)), m_ps(std::move(ps)), m_topology(topology), m_vertex_stride(vertex_stride),
m_blend_factor(blend_factor), m_blend_factor_float(GPUDevice::RGBA8ToFloat(blend_factor))
{
}

Expand Down Expand Up @@ -318,11 +329,12 @@ std::unique_ptr<GPUPipeline> D3D11Device::CreatePipeline(const GPUPipeline::Grap
D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, // TriangleStrips
}};

return std::unique_ptr<GPUPipeline>(
new D3D11Pipeline(std::move(rs), std::move(ds), std::move(bs), std::move(il),
static_cast<const D3D11Shader*>(config.vertex_shader)->GetVertexShader(),
static_cast<const D3D11Shader*>(config.fragment_shader)->GetPixelShader(),
primitives[static_cast<u8>(config.primitive)], vertex_stride, config.blend.constant));
return std::unique_ptr<GPUPipeline>(new D3D11Pipeline(
std::move(rs), std::move(ds), std::move(bs), std::move(il),
static_cast<const D3D11Shader*>(config.vertex_shader)->GetVertexShader(),
config.geometry_shader ? static_cast<const D3D11Shader*>(config.geometry_shader)->GetGeometryShader() : nullptr,
static_cast<const D3D11Shader*>(config.fragment_shader)->GetPixelShader(),
primitives[static_cast<u8>(config.primitive)], vertex_stride, config.blend.constant));
}

void D3D11Device::SetPipeline(GPUPipeline* pipeline)
Expand Down Expand Up @@ -358,6 +370,12 @@ void D3D11Device::SetPipeline(GPUPipeline* pipeline)
m_context->VSSetShader(vs, nullptr, 0);
}

if (ID3D11GeometryShader* gs = PL->GetGeometryShader(); m_current_geometry_shader != gs)
{
m_current_geometry_shader = gs;
m_context->GSSetShader(gs, nullptr, 0);
}

if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps)
{
m_current_pixel_shader = ps;
Expand Down
7 changes: 5 additions & 2 deletions src/util/d3d11_pipeline.h
Expand Up @@ -25,6 +25,7 @@ class D3D11Shader final : public GPUShader

ID3D11VertexShader* GetVertexShader() const;
ID3D11PixelShader* GetPixelShader() const;
ID3D11GeometryShader* GetGeometryShader() const;
ID3D11ComputeShader* GetComputeShader() const;

ALWAYS_INLINE const std::vector<u8>& GetBytecode() const { return m_bytecode; }
Expand Down Expand Up @@ -55,6 +56,7 @@ class D3D11Pipeline final : public GPUPipeline
ALWAYS_INLINE ID3D11BlendState* GetBlendState() const { return m_bs.Get(); }
ALWAYS_INLINE ID3D11InputLayout* GetInputLayout() const { return m_il.Get(); }
ALWAYS_INLINE ID3D11VertexShader* GetVertexShader() const { return m_vs.Get(); }
ALWAYS_INLINE ID3D11GeometryShader* GetGeometryShader() const { return m_gs.Get(); }
ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return m_ps.Get(); }
ALWAYS_INLINE D3D11_PRIMITIVE_TOPOLOGY GetPrimitiveTopology() const { return m_topology; }
ALWAYS_INLINE u32 GetVertexStride() const { return m_vertex_stride; }
Expand All @@ -63,14 +65,15 @@ class D3D11Pipeline final : public GPUPipeline

private:
D3D11Pipeline(ComPtr<ID3D11RasterizerState> rs, ComPtr<ID3D11DepthStencilState> ds, ComPtr<ID3D11BlendState> bs,
ComPtr<ID3D11InputLayout> il, ComPtr<ID3D11VertexShader> vs, ComPtr<ID3D11PixelShader> ps,
D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor);
ComPtr<ID3D11InputLayout> il, ComPtr<ID3D11VertexShader> vs, ComPtr<ID3D11GeometryShader> gs,
ComPtr<ID3D11PixelShader> ps, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor);

ComPtr<ID3D11RasterizerState> m_rs;
ComPtr<ID3D11DepthStencilState> m_ds;
ComPtr<ID3D11BlendState> m_bs;
ComPtr<ID3D11InputLayout> m_il;
ComPtr<ID3D11VertexShader> m_vs;
ComPtr<ID3D11GeometryShader> m_gs;
ComPtr<ID3D11PixelShader> m_ps;
D3D11_PRIMITIVE_TOPOLOGY m_topology;
u32 m_vertex_stride;
Expand Down
1 change: 1 addition & 0 deletions src/util/d3d12_device.cpp
Expand Up @@ -1203,6 +1203,7 @@ void D3D12Device::SetFeatures()
m_features.per_sample_shading = true;
m_features.supports_texture_buffers = true;
m_features.texture_buffers_emulated_with_ssbo = false;
m_features.geometry_shaders = true;
m_features.partial_msaa_resolve = true;
m_features.gpu_timing = true;
m_features.shader_cache = true;
Expand Down
7 changes: 7 additions & 0 deletions src/util/d3d12_pipeline.cpp
Expand Up @@ -88,6 +88,8 @@ std::string D3D12Pipeline::GetPipelineName(const GraphicsConfig& config)
hash.Update(shader->GetBytecodeData(), shader->GetBytecodeSize());
if (const D3D12Shader* shader = static_cast<const D3D12Shader*>(config.fragment_shader))
hash.Update(shader->GetBytecodeData(), shader->GetBytecodeSize());
if (const D3D12Shader* shader = static_cast<const D3D12Shader*>(config.geometry_shader))
hash.Update(shader->GetBytecodeData(), shader->GetBytecodeSize());
hash.Update(&config.color_format, sizeof(config.color_format));
hash.Update(&config.depth_format, sizeof(config.depth_format));
hash.Update(&config.samples, sizeof(config.samples));
Expand Down Expand Up @@ -178,6 +180,11 @@ std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::Grap
static_cast<const D3D12Shader*>(config.vertex_shader)->GetBytecodeSize());
gpb.SetPixelShader(static_cast<const D3D12Shader*>(config.fragment_shader)->GetBytecodeData(),
static_cast<const D3D12Shader*>(config.fragment_shader)->GetBytecodeSize());
if (config.geometry_shader)
{
gpb.SetGeometryShader(static_cast<const D3D12Shader*>(config.geometry_shader)->GetBytecodeData(),
static_cast<const D3D12Shader*>(config.geometry_shader)->GetBytecodeSize());
}
gpb.SetPrimitiveTopologyType(primitive_types[static_cast<u8>(config.primitive)]);

if (!config.input_layout.vertex_attributes.empty())
Expand Down
12 changes: 8 additions & 4 deletions src/util/d3d_common.cpp
Expand Up @@ -358,29 +358,33 @@ std::optional<DynamicHeapArray<u8>> D3DCommon::CompileShader(D3D_FEATURE_LEVEL f
{
case D3D_FEATURE_LEVEL_10_0:
{
static constexpr std::array<const char*, 4> targets = {{"vs_4_0", "ps_4_0", "cs_4_0"}};
static constexpr std::array<const char*, static_cast<u32>(GPUShaderStage::MaxCount)> targets = {
{"vs_4_0", "ps_4_0", "gs_4_0", "cs_4_0"}};
target = targets[static_cast<int>(stage)];
}
break;

case D3D_FEATURE_LEVEL_10_1:
{
static constexpr std::array<const char*, 4> targets = {{"vs_4_1", "ps_4_1", "cs_4_1"}};
static constexpr std::array<const char*, static_cast<u32>(GPUShaderStage::MaxCount)> targets = {
{"vs_4_1", "ps_4_1", "gs_4_0", "cs_4_1"}};
target = targets[static_cast<int>(stage)];
}
break;

case D3D_FEATURE_LEVEL_11_0:
{
static constexpr std::array<const char*, 4> targets = {{"vs_5_0", "ps_5_0", "cs_5_0"}};
static constexpr std::array<const char*, static_cast<u32>(GPUShaderStage::MaxCount)> targets = {
{"vs_5_0", "ps_5_0", "gs_5_0", "cs_5_0"}};
target = targets[static_cast<int>(stage)];
}
break;

case D3D_FEATURE_LEVEL_11_1:
default:
{
static constexpr std::array<const char*, 4> targets = {{"vs_5_1", "ps_5_1", "cs_5_1"}};
static constexpr std::array<const char*, static_cast<u32>(GPUShaderStage::MaxCount)> targets = {
{"vs_5_1", "ps_5_1", "gs_5_1", "cs_5_1"}};
target = targets[static_cast<int>(stage)];
}
break;
Expand Down
19 changes: 6 additions & 13 deletions src/util/gpu_device.cpp
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)

#include "gpu_device.h"
#include "core/host.h" // TODO: Remove, needed for getting fullscreen mode.
#include "core/host.h" // TODO: Remove, needed for getting fullscreen mode.
#include "core/settings.h" // TODO: Remove, needed for dump directory.
#include "shadergen.h"

Expand Down Expand Up @@ -78,18 +78,10 @@ GPUShader::~GPUShader() = default;

const char* GPUShader::GetStageName(GPUShaderStage stage)
{
switch (stage)
{
case GPUShaderStage::Vertex:
return "Vertex";
case GPUShaderStage::Fragment:
return "Fragment";
case GPUShaderStage::Compute:
return "Compute";
default:
UnreachableCode();
return "";
}
static constexpr std::array<const char*, static_cast<u32>(GPUShaderStage::MaxCount)> names = {"Vertex", "Fragment",
"Geometry", "Compute"};

return names[static_cast<u32>(stage)];
}

GPUPipeline::GPUPipeline() = default;
Expand Down Expand Up @@ -432,6 +424,7 @@ bool GPUDevice::CreateResources()
plconfig.samples = 1;
plconfig.per_sample_shading = false;
plconfig.vertex_shader = imgui_vs.get();
plconfig.geometry_shader = nullptr;
plconfig.fragment_shader = imgui_fs.get();

m_imgui_pipeline = CreatePipeline(plconfig);
Expand Down
3 changes: 3 additions & 0 deletions src/util/gpu_device.h
Expand Up @@ -114,6 +114,7 @@ enum class GPUShaderStage : u8
{
Vertex,
Fragment,
Geometry,
Compute,

MaxCount
Expand Down Expand Up @@ -374,6 +375,7 @@ class GPUPipeline
BlendState blend;

GPUShader* vertex_shader;
GPUShader* geometry_shader;
GPUShader* fragment_shader;

GPUTexture::Format color_format;
Expand Down Expand Up @@ -433,6 +435,7 @@ class GPUDevice
bool noperspective_interpolation : 1;
bool supports_texture_buffers : 1;
bool texture_buffers_emulated_with_ssbo : 1;
bool geometry_shaders : 1;
bool partial_msaa_resolve : 1;
bool gpu_timing : 1;
bool shader_cache : 1;
Expand Down
1 change: 1 addition & 0 deletions src/util/metal_device.mm
Expand Up @@ -210,6 +210,7 @@ static void RunOnMainThread(F&& f)
m_features.noperspective_interpolation = true;
m_features.supports_texture_buffers = true;
m_features.texture_buffers_emulated_with_ssbo = true;
m_features.geometry_shaders = false;
m_features.partial_msaa_resolve = true;
m_features.shader_cache = true;
m_features.pipeline_cache = false;
Expand Down
2 changes: 2 additions & 0 deletions src/util/opengl_device.cpp
Expand Up @@ -460,6 +460,8 @@ bool OpenGLDevice::CheckFeatures(bool* buggy_pbo)
// noperspective is not supported in GLSL ES.
m_features.noperspective_interpolation = !is_gles;

m_features.geometry_shaders = GLAD_GL_VERSION_3_2 || GLAD_GL_ES_VERSION_3_2;

m_features.gpu_timing = !(m_gl_context->IsGLES() &&
(!GLAD_GL_EXT_disjoint_timer_query || !glGetQueryObjectivEXT || !glGetQueryObjectui64vEXT));
m_features.partial_msaa_resolve = true;
Expand Down
32 changes: 24 additions & 8 deletions src/util/opengl_pipeline.cpp
Expand Up @@ -41,7 +41,7 @@ struct PipelineDiskCacheIndexEntry
u32 uncompressed_size;
u32 compressed_size;
};
static_assert(sizeof(PipelineDiskCacheIndexEntry) == 128); // No padding
static_assert(sizeof(PipelineDiskCacheIndexEntry) == 112); // No padding

static unsigned s_next_bad_shader_id = 1;

Expand All @@ -50,6 +50,7 @@ static GLenum GetGLShaderType(GPUShaderStage stage)
static constexpr std::array<GLenum, static_cast<u32>(GPUShaderStage::MaxCount)> mapping = {{
GL_VERTEX_SHADER, // Vertex
GL_FRAGMENT_SHADER, // Fragment
GL_GEOMETRY_SHADER, // Geometry
GL_COMPUTE_SHADER, // Compute
}};

Expand Down Expand Up @@ -222,23 +223,34 @@ size_t OpenGLPipeline::ProgramCacheKeyHash::operator()(const ProgramCacheKey& k)
{
// TODO: maybe use xxhash here...
std::size_t h = 0;
hash_combine(h, k.vs_key.entry_point_low, k.vs_key.entry_point_high, k.vs_key.source_hash_low,
k.vs_key.source_hash_high, k.vs_key.source_length, k.vs_key.shader_type);
hash_combine(h, k.fs_key.entry_point_low, k.fs_key.entry_point_high, k.fs_key.source_hash_low,
k.fs_key.source_hash_high, k.fs_key.source_length, k.fs_key.shader_type);
hash_combine(h, k.va_key.num_vertex_attributes, k.va_key.vertex_attribute_stride);
for (const VertexAttribute& va : k.va_key.vertex_attributes)
hash_combine(h, va.key);
hash_combine(h, k.vs_hash_low, k.vs_hash_high, k.vs_length);
hash_combine(h, k.fs_hash_low, k.fs_hash_high, k.fs_length);
hash_combine(h, k.gs_hash_low, k.gs_hash_high, k.gs_length);
return h;
}

OpenGLPipeline::ProgramCacheKey OpenGLPipeline::GetProgramCacheKey(const GraphicsConfig& plconfig)
{
Assert(plconfig.input_layout.vertex_attributes.size() <= MAX_VERTEX_ATTRIBUTES);

const GPUShaderCache::CacheIndexKey& vs_key = static_cast<const OpenGLShader*>(plconfig.vertex_shader)->GetKey();
const GPUShaderCache::CacheIndexKey& fs_key = static_cast<const OpenGLShader*>(plconfig.fragment_shader)->GetKey();
const GPUShaderCache::CacheIndexKey* gs_key =
plconfig.geometry_shader ? &static_cast<const OpenGLShader*>(plconfig.geometry_shader)->GetKey() : nullptr;

ProgramCacheKey ret;
ret.vs_key = static_cast<const OpenGLShader*>(plconfig.vertex_shader)->GetKey();
ret.fs_key = static_cast<const OpenGLShader*>(plconfig.fragment_shader)->GetKey();
ret.vs_hash_low = vs_key.source_hash_low;
ret.vs_hash_high = vs_key.source_hash_high;
ret.vs_length = vs_key.source_length;
ret.fs_hash_low = fs_key.source_hash_low;
ret.fs_hash_high = fs_key.source_hash_high;
ret.fs_length = fs_key.source_length;
ret.gs_hash_low = gs_key ? gs_key->source_hash_low : 0;
ret.gs_hash_high = gs_key ? gs_key->source_hash_high : 0;
ret.gs_length = gs_key ? gs_key->source_length : 0;

std::memset(ret.va_key.vertex_attributes, 0, sizeof(ret.va_key.vertex_attributes));
ret.va_key.vertex_attribute_stride = 0;
Expand Down Expand Up @@ -300,7 +312,9 @@ GLuint OpenGLDevice::CompileProgram(const GPUPipeline::GraphicsConfig& plconfig)
{
OpenGLShader* vertex_shader = static_cast<OpenGLShader*>(plconfig.vertex_shader);
OpenGLShader* fragment_shader = static_cast<OpenGLShader*>(plconfig.fragment_shader);
if (!vertex_shader || !fragment_shader || !vertex_shader->Compile() || !fragment_shader->Compile())
OpenGLShader* geometry_shader = static_cast<OpenGLShader*>(plconfig.geometry_shader);
if (!vertex_shader || !fragment_shader || !vertex_shader->Compile() || !fragment_shader->Compile() ||
(geometry_shader && !geometry_shader->Compile()))
{
Log_ErrorPrintf("Failed to compile shaders.");
return 0;
Expand All @@ -320,6 +334,8 @@ GLuint OpenGLDevice::CompileProgram(const GPUPipeline::GraphicsConfig& plconfig)
Assert(plconfig.vertex_shader && plconfig.fragment_shader);
glAttachShader(program_id, vertex_shader->GetGLId());
glAttachShader(program_id, fragment_shader->GetGLId());
if (geometry_shader)
glAttachShader(program_id, geometry_shader->GetGLId());

if (!ShaderGen::UseGLSLBindingLayout())
{
Expand Down

0 comments on commit e804b5e

Please sign in to comment.