Skip to content

Commit

Permalink
GPU: Support emulating a depth buffer from PGXP depth values
Browse files Browse the repository at this point in the history
  • Loading branch information
stenzek committed Dec 22, 2020
1 parent f393ea6 commit aa15432
Show file tree
Hide file tree
Showing 13 changed files with 242 additions and 47 deletions.
85 changes: 78 additions & 7 deletions src/core/gpu_hw.cpp
Expand Up @@ -75,6 +75,7 @@ bool GPU_HW::Initialize(HostDisplay* host_display)
m_texture_filtering = GPUTextureFilter::Nearest;
}

m_pgxp_depth_buffer = g_settings.gpu_pgxp_depth_buffer;
PrintSettingsToLog();
return true;
}
Expand Down Expand Up @@ -123,7 +124,8 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed)
(m_resolution_scale != resolution_scale || m_multisamples != multisamples ||
m_true_color != g_settings.gpu_true_color || m_per_sample_shading != per_sample_shading ||
m_scaled_dithering != g_settings.gpu_scaled_dithering || m_texture_filtering != g_settings.gpu_texture_filter ||
m_using_uv_limits != use_uv_limits || m_chroma_smoothing != g_settings.gpu_24bit_chroma_smoothing);
m_using_uv_limits != use_uv_limits || m_chroma_smoothing != g_settings.gpu_24bit_chroma_smoothing ||
m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer());

if (m_resolution_scale != resolution_scale)
{
Expand Down Expand Up @@ -161,6 +163,14 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed)
if (!m_supports_dual_source_blend && TextureFilterRequiresDualSourceBlend(m_texture_filtering))
m_texture_filtering = GPUTextureFilter::Nearest;

if (m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer())
{
m_pgxp_depth_buffer = g_settings.UsingPGXPDepthBuffer();
m_batch.use_depth_buffer = false;
if (m_pgxp_depth_buffer)
ClearDepthBuffer();
}

PrintSettingsToLog();
}

Expand Down Expand Up @@ -202,6 +212,7 @@ void GPU_HW::PrintSettingsToLog()
Log_InfoPrintf("Texture Filtering: %s", Settings::GetTextureFilterDisplayName(m_texture_filtering));
Log_InfoPrintf("Dual-source blending: %s", m_supports_dual_source_blend ? "Supported" : "Not supported");
Log_InfoPrintf("Using UV limits: %s", m_using_uv_limits ? "YES" : "NO");
Log_InfoPrintf("Depth buffer: %s", m_pgxp_depth_buffer ? "YES" : "NO");
}

void GPU_HW::UpdateVRAMReadTexture()
Expand Down Expand Up @@ -320,6 +331,44 @@ void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices)
vertices[i].SetUVLimits(min_u, max_u, min_v, max_v);
}

void GPU_HW::SetBatchDepthBuffer(bool enabled)
{
if (m_batch.use_depth_buffer == enabled)
return;

if (GetBatchVertexCount() > 0)
{
FlushRender();
EnsureVertexBufferSpaceForCurrentCommand();
}

m_batch.use_depth_buffer = enabled;
m_last_depth_z = 1.0f;
}

void GPU_HW::CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices)
{
DebugAssert(num_vertices == 3 || num_vertices == 4);
float average_z;
if (num_vertices == 3)
average_z = std::min((vertices[0].w + vertices[1].w + vertices[2].w) / 3.0f, 1.0f);
else
average_z = std::min((vertices[0].w + vertices[1].w + vertices[2].w + vertices[3].w) / 4.0f, 1.0f);

if ((average_z - m_last_depth_z) >= g_settings.gpu_pgxp_depth_clear_threshold)
{
if (GetBatchVertexCount() > 0)
{
FlushRender();
EnsureVertexBufferSpaceForCurrentCommand();
}

ClearDepthBuffer();
}

m_last_depth_z = average_z;
}

void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth)
{
const float dx = x1 - x0;
Expand Down Expand Up @@ -453,10 +502,19 @@ void GPU_HW::LoadVertices()
m_drawing_offset.y, &vertices[i].x, &vertices[i].y, &vertices[i].w);
}
}
if (!valid_w)
if (pgxp)
{
for (BatchVertex& v : vertices)
v.w = 1.0f;
if (!valid_w)
{
SetBatchDepthBuffer(false);
for (BatchVertex& v : vertices)
v.w = 1.0f;
}
else if (g_settings.gpu_pgxp_depth_buffer)
{
SetBatchDepthBuffer(true);
CheckForDepthClear(vertices.data(), num_vertices);
}
}

if (rc.quad_polygon && m_resolution_scale > 1)
Expand Down Expand Up @@ -580,12 +638,13 @@ void GPU_HW::LoadVertices()
break;
}

// we can split the rectangle up into potentially 8 quads
DebugAssert(GetBatchVertexSpace() >= MAX_VERTICES_FOR_RECTANGLE);

if (!IsDrawingAreaIsValid())
return;

// we can split the rectangle up into potentially 8 quads
SetBatchDepthBuffer(false);
DebugAssert(GetBatchVertexSpace() >= MAX_VERTICES_FOR_RECTANGLE);

// Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat.
u16 tex_top = orig_tex_top;
for (s32 y_offset = 0; y_offset < rectangle_height;)
Expand Down Expand Up @@ -634,6 +693,8 @@ void GPU_HW::LoadVertices()

case GPUPrimitive::Line:
{
SetBatchDepthBuffer(false);

if (!rc.polyline)
{
DebugAssert(GetBatchVertexSpace() >= 2);
Expand Down Expand Up @@ -760,6 +821,10 @@ GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32
VRAMFillUBOData uniforms;
std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) =
RGBA8ToFloat(color);

if (m_pgxp_depth_buffer)
uniforms.u_fill_color[3] = 1.0f;

uniforms.u_interlaced_displayed_field = GetActiveLineLSB();
return uniforms;
}
Expand Down Expand Up @@ -879,6 +944,9 @@ void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand()

void GPU_HW::ResetBatchVertexDepth()
{
if (m_pgxp_depth_buffer)
return;

Log_PerfPrint("Resetting batch vertex depth");
FlushRender();
UpdateDepthBufferFromMaskBit();
Expand Down Expand Up @@ -1023,6 +1091,9 @@ void GPU_HW::FlushRender()
{
m_drawing_area_changed = false;
SetScissorFromDrawingArea();

if (m_pgxp_depth_buffer && m_last_depth_z < 1.0f)
ClearDepthBuffer();
}

if (m_batch_ubo_dirty)
Expand Down
9 changes: 8 additions & 1 deletion src/core/gpu_hw.h
Expand Up @@ -100,6 +100,7 @@ class GPU_HW : public GPU
bool interlacing;
bool set_mask_while_drawing;
bool check_mask_before_draw;
bool use_depth_buffer;

// We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled
// on a per-pixel basis, and the opaque pixels shouldn't be blended at all.
Expand Down Expand Up @@ -179,6 +180,7 @@ class GPU_HW : public GPU

virtual void UpdateVRAMReadTexture();
virtual void UpdateDepthBufferFromMaskBit() = 0;
virtual void ClearDepthBuffer() = 0;
virtual void SetScissorFromDrawingArea() = 0;
virtual void MapBatchVertexPointer(u32 required_vertices) = 0;
virtual void UnmapBatchVertexPointer(u32 used_vertices) = 0;
Expand Down Expand Up @@ -280,7 +282,10 @@ class GPU_HW : public GPU

/// Computes polygon U/V boundaries.
static void ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices);
static bool AreUVLimitsNeeded();

/// Sets the depth test flag for PGXP depth buffering.
void SetBatchDepthBuffer(bool enabled);
void CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices);

HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram_shadow;

Expand All @@ -289,6 +294,7 @@ class GPU_HW : public GPU
BatchVertex* m_batch_current_vertex_ptr = nullptr;
u32 m_batch_base_vertex = 0;
s32 m_current_depth = 0;
float m_last_depth_z = 1.0f;

u32 m_resolution_scale = 1;
u32 m_multisamples = 1;
Expand All @@ -303,6 +309,7 @@ class GPU_HW : public GPU
bool m_supports_per_sample_shading = false;
bool m_supports_dual_source_blend = false;
bool m_using_uv_limits = false;
bool m_pgxp_depth_buffer = false;

BatchConfig m_batch = {};
BatchUBOData m_batch_ubo_data = {};
Expand Down
42 changes: 34 additions & 8 deletions src/core/gpu_hw_d3d11.cpp
Expand Up @@ -235,9 +235,10 @@ void GPU_HW_D3D11::ClearFramebuffer()
{
static constexpr std::array<float, 4> color = {};
m_context->ClearRenderTargetView(m_vram_texture.GetD3DRTV(), color.data());
m_context->ClearDepthStencilView(m_vram_depth_view.Get(), D3D11_CLEAR_DEPTH, 0.0f, 0);
m_context->ClearDepthStencilView(m_vram_depth_view.Get(), D3D11_CLEAR_DEPTH, m_pgxp_depth_buffer ? 1.0f : 0.0f, 0);
m_context->ClearRenderTargetView(m_display_texture, color.data());
SetFullVRAMDirtyRectangle();
m_last_depth_z = 1.0f;
}

void GPU_HW_D3D11::DestroyFramebuffer()
Expand Down Expand Up @@ -287,6 +288,7 @@ bool GPU_HW_D3D11::CreateStateObjects()
rs_desc.CullMode = D3D11_CULL_NONE;
rs_desc.ScissorEnable = TRUE;
rs_desc.MultisampleEnable = IsUsingMultisampling();
rs_desc.DepthClipEnable = FALSE;
hr = m_device->CreateRasterizerState(&rs_desc, m_cull_none_rasterizer_state.ReleaseAndGetAddressOf());
if (FAILED(hr))
return false;
Expand Down Expand Up @@ -316,11 +318,16 @@ bool GPU_HW_D3D11::CreateStateObjects()
if (FAILED(hr))
return false;

ds_desc.DepthFunc = D3D11_COMPARISON_GREATER_EQUAL;
ds_desc.DepthFunc = D3D11_COMPARISON_LESS_EQUAL;
hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_test_less_state.ReleaseAndGetAddressOf());
if (FAILED(hr))
return false;

ds_desc.DepthFunc = D3D11_COMPARISON_GREATER_EQUAL;
hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_test_greater_state.ReleaseAndGetAddressOf());
if (FAILED(hr))
return false;

CD3D11_BLEND_DESC bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT());
hr = m_device->CreateBlendState(&bl_desc, m_blend_disabled_state.ReleaseAndGetAddressOf());
if (FAILED(hr))
Expand Down Expand Up @@ -377,6 +384,7 @@ void GPU_HW_D3D11::DestroyStateObjects()
m_point_sampler_state.Reset();
m_blend_no_color_writes_state.Reset();
m_blend_disabled_state.Reset();
m_depth_test_greater_state.Reset();
m_depth_test_less_state.Reset();
m_depth_test_always_state.Reset();
m_depth_disabled_state.Reset();
Expand All @@ -392,7 +400,7 @@ bool GPU_HW_D3D11::CompileShaders()

GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading,
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
m_supports_dual_source_blend);
m_pgxp_depth_buffer, m_supports_dual_source_blend);

Common::Timer compile_time;
const int progress_total = 1 + 1 + 2 + (4 * 9 * 2 * 2) + 7 + (2 * 3);
Expand Down Expand Up @@ -622,8 +630,12 @@ void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_verte
const GPUTransparencyMode transparency_mode =
(render_mode == BatchRenderMode::OnlyOpaque) ? GPUTransparencyMode::Disabled : m_batch.transparency_mode;
m_context->OMSetBlendState(m_batch_blend_states[static_cast<u8>(transparency_mode)].Get(), nullptr, 0xFFFFFFFFu);

m_context->OMSetDepthStencilState(
m_batch.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
(m_batch.use_depth_buffer ?
m_depth_test_less_state.Get() :
(m_batch.check_mask_before_draw ? m_depth_test_greater_state.Get() : m_depth_test_always_state.Get())),
0);

m_context->Draw(num_vertices, base_vertex);
}
Expand Down Expand Up @@ -798,7 +810,8 @@ void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* d

const VRAMWriteUBOData uniforms =
GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned, set_mask, check_mask);
m_context->OMSetDepthStencilState(check_mask ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
m_context->OMSetDepthStencilState(
(check_mask && !m_batch.use_depth_buffer) ? m_depth_test_greater_state.Get() : m_depth_test_always_state.Get(), 0);
m_context->PSSetShaderResources(0, 1, m_texture_stream_buffer_srv_r16ui.GetAddressOf());

// the viewport should already be set to the full vram, so just adjust the scissor
Expand All @@ -825,13 +838,15 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt
const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale);
SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(),
dst_bounds_scaled.GetHeight());
m_context->OMSetDepthStencilState(
m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
m_context->OMSetDepthStencilState((m_GPUSTAT.check_mask_before_draw && !m_batch.use_depth_buffer) ?
m_depth_test_greater_state.Get() :
m_depth_test_always_state.Get(),
0);
m_context->PSSetShaderResources(0, 1, m_vram_read_texture.GetD3DSRVArray());
DrawUtilityShader(m_vram_copy_pixel_shader.Get(), &uniforms, sizeof(uniforms));
RestoreGraphicsAPIState();

if (m_GPUSTAT.check_mask_before_draw)
if (m_GPUSTAT.check_mask_before_draw && !m_batch.use_depth_buffer)
m_current_depth++;

return;
Expand Down Expand Up @@ -877,6 +892,9 @@ void GPU_HW_D3D11::UpdateVRAMReadTexture()

void GPU_HW_D3D11::UpdateDepthBufferFromMaskBit()
{
if (m_pgxp_depth_buffer)
return;

SetViewportAndScissor(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());

m_context->OMSetRenderTargets(0, nullptr, m_vram_depth_view.Get());
Expand All @@ -890,6 +908,14 @@ void GPU_HW_D3D11::UpdateDepthBufferFromMaskBit()
RestoreGraphicsAPIState();
}

void GPU_HW_D3D11::ClearDepthBuffer()
{
DebugAssert(m_pgxp_depth_buffer);

m_context->ClearDepthStencilView(m_vram_depth_view.Get(), D3D11_CLEAR_DEPTH, 1.0f, 0);
m_last_depth_z = 1.0f;
}

std::unique_ptr<GPU> GPU::CreateHardwareD3D11Renderer()
{
return std::make_unique<GPU_HW_D3D11>();
Expand Down
2 changes: 2 additions & 0 deletions src/core/gpu_hw_d3d11.h
Expand Up @@ -35,6 +35,7 @@ class GPU_HW_D3D11 : public GPU_HW
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void UpdateVRAMReadTexture() override;
void UpdateDepthBufferFromMaskBit() override;
void ClearDepthBuffer() override;
void SetScissorFromDrawingArea() override;
void MapBatchVertexPointer(u32 required_vertices) override;
void UnmapBatchVertexPointer(u32 used_vertices) override;
Expand Down Expand Up @@ -94,6 +95,7 @@ class GPU_HW_D3D11 : public GPU_HW
ComPtr<ID3D11DepthStencilState> m_depth_disabled_state;
ComPtr<ID3D11DepthStencilState> m_depth_test_always_state;
ComPtr<ID3D11DepthStencilState> m_depth_test_less_state;
ComPtr<ID3D11DepthStencilState> m_depth_test_greater_state;

ComPtr<ID3D11BlendState> m_blend_disabled_state;
ComPtr<ID3D11BlendState> m_blend_no_color_writes_state;
Expand Down

0 comments on commit aa15432

Please sign in to comment.