Permalink
Browse files

i965: Rewrite the HiZ op

The HiZ op was implemented as a meta-op. This patch reimplements it by
emitting a special HiZ batch. This fixes several known bugs, and likely
a lot of undiscovered ones too.

==== Why the HiZ meta-op needed to die ====

The HiZ op was implemented as a meta-op, which caused lots of trouble. All
other meta-ops occur as a result of some GL call (for example, glClear and
glGenerateMipmap), but the HiZ meta-op was special. It was called in
places that Mesa (in particular, the vbo and swrast modules) did not
expect---and were not prepared for---state changes to occur (for example:
glDraw; glCallList; within glBegin/End blocks; and within
swrast_prepare_render as a result of intel_miptree_map).

In an attempt to work around these unexpected state changes, I added two
hooks in i965:
  - A hook for glDraw, located in brw_predraw_resolve_buffers (which is
    called in the glDraw path). This hook detected if a predraw resolve
    meta-op had occurred, and would hackishly repropagate some GL state
    if necessary. This ensured that the meta-op state changes would not
    intefere with the vbo module's subsequent execution of glDraw.
  - A hook for glBegin, implemented by brwPrepareExecBegin. This hook
    resolved all buffers before entering
    a glBegin/End block, thus preventing an infinitely recurring call to
    vbo_exec_FlushVertices. The vbo module calls vbo_exec_FlushVertices to
    flush its vertex queue in response to GL state changes.

Unfortunately, these hooks were not sufficient. The meta-op state changes
still interacted badly with glPopAttrib (as discovered in bug 44927) and
with swrast rendering (as discovered by debugging gen6's swrast fallback
for glBitmap). I expect there are more undiscovered bugs. Rather than play
whack-a-mole in a minefield, the sane approach is to replace the HiZ
meta-op with something safer.

==== How it was killed ====

This patch consists of several logical components:
  1. Rewrite the HiZ op by replacing function gen6_resolve_slice with
     gen6_hiz_exec and gen7_hiz_exec. The new functions do not call
     a meta-op, but instead manually construct and emit a batch to "draw"
     the HiZ op's rectangle primitive. The new functions alter no GL
     state.
  2. Add fields to brw_context::hiz for the new HiZ op.
  3. Emit a workaround flush when toggling 3DSTATE_VS.VsFunctionEnable.
  4. Kill all dead HiZ code:
     - the function gen6_resolve_slice
     - the dirty flag BRW_NEW_HIZ
     - the dead fields in brw_context::hiz
     - the state packet manipulation triggered by the now removed
       brw_context::hiz::op
     - the meta-op workaround in brw_predraw_resolve_buffers (discussed
       above)
     - the meta-op workaround brwPrepareExecBegin (discussed above)

Note: This is a candidate for the 8.0 branch.
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Acked-by: Paul Berry <stereotype441@gmail.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=43327
Reported-by: xunx.fang@intel.com
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=44927
Reported-by: chao.a.chen@intel.com
Signed-off-by: Chad Versace <chad.versace@linux.intel.com>
  • Loading branch information...
1 parent d594662 commit 7b36c68ba6899c7f30fd56b7ef07a78b027771ac @chadversary chadversary committed Jan 26, 2012
@@ -100,6 +100,7 @@ i965_C_FILES := \
gen7_cc_state.c \
gen7_clip_state.c \
gen7_disable.c \
+ gen7_hiz.c \
gen7_misc_state.c \
gen7_sampler_state.c \
gen7_sf_state.c \
@@ -41,8 +41,6 @@
#include "brw_draw.h"
#include "brw_state.h"
-#include "gen6_hiz.h"
-
#include "intel_fbo.h"
#include "intel_mipmap_tree.h"
#include "intel_regions.h"
@@ -57,58 +55,6 @@
* Mesa's Driver Functions
***************************************/
-/**
- * \brief Prepare for entry into glBegin/glEnd block.
- *
- * Resolve buffers before entering a glBegin/glEnd block. This is
- * necessary to prevent recursive calls to FLUSH_VERTICES.
- *
- * This resolves the depth buffer of each enabled depth texture and the HiZ
- * buffer of the attached depth renderbuffer.
- *
- * Details
- * -------
- * When vertices are queued during a glBegin/glEnd block, those vertices must
- * be drawn before any rendering state changes. To ensure this, Mesa calls
- * FLUSH_VERTICES as a prehook to such state changes. Therefore,
- * FLUSH_VERTICES itself cannot change rendering state without falling into a
- * recursive trap.
- *
- * This precludes meta-ops, namely buffer resolves, from occurring while any
- * vertices are queued. To prevent that situation, we resolve some buffers on
- * entering a glBegin/glEnd
- *
- * \see brwCleanupExecEnd()
- */
-static void brwPrepareExecBegin(struct gl_context *ctx)
-{
- struct brw_context *brw = brw_context(ctx);
- struct intel_context *intel = &brw->intel;
- struct intel_renderbuffer *draw_irb;
- struct intel_texture_object *tex_obj;
-
- if (!intel->has_hiz) {
- /* The context uses no feature that requires buffer resolves. */
- return;
- }
-
- /* Resolve each enabled texture. */
- for (int i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
- if (!ctx->Texture.Unit[i]._ReallyEnabled)
- continue;
- tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
- if (!tex_obj || !tex_obj->mt)
- continue;
- intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt);
- }
-
- /* Resolve the attached depth buffer. */
- draw_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
- if (draw_irb) {
- intel_renderbuffer_resolve_hiz(intel, draw_irb);
- }
-}
-
static void brwInitDriverFunctions(struct intel_screen *screen,
struct dd_function_table *functions)
{
@@ -117,7 +63,6 @@ static void brwInitDriverFunctions(struct intel_screen *screen,
brwInitFragProgFuncs( functions );
brw_init_queryobj_functions(functions);
- functions->PrepareExecBegin = brwPrepareExecBegin;
functions->BeginTransformFeedback = brw_begin_transform_feedback;
if (screen->gen >= 7)
@@ -119,6 +119,10 @@
#define BRW_MAX_CURBE (32*16)
struct brw_context;
+struct brw_instruction;
+struct brw_vs_prog_key;
+struct brw_wm_prog_key;
+struct brw_wm_prog_data;
enum brw_state_id {
BRW_STATE_URB_FENCE,
@@ -144,7 +148,6 @@ enum brw_state_id {
BRW_STATE_VS_CONSTBUF,
BRW_STATE_PROGRAM_CACHE,
BRW_STATE_STATE_BASE_ADDRESS,
- BRW_STATE_HIZ,
BRW_STATE_SOL_INDICES,
};
@@ -174,7 +177,6 @@ enum brw_state_id {
#define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF)
#define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE)
#define BRW_NEW_STATE_BASE_ADDRESS (1 << BRW_STATE_STATE_BASE_ADDRESS)
-#define BRW_NEW_HIZ (1 << BRW_STATE_HIZ)
#define BRW_NEW_SOL_INDICES (1 << BRW_STATE_SOL_INDICES)
struct brw_state_flags {
@@ -950,38 +952,18 @@ struct brw_context
int state_batch_count;
/**
- * \brief State needed to execute HiZ meta-ops
+ * \brief State needed to execute HiZ ops.
*
- * All fields except \c op are initialized by gen6_hiz_init().
+ * \see gen6_hiz_init()
+ * \see gen6_hiz_exec()
*/
struct brw_hiz_state {
- /**
- * \brief Indicates which HiZ operation is in progress.
+ /** \brief VBO for rectangle primitive.
*
- * See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
- * - 7.5.3.1 Depth Buffer Clear
- * - 7.5.3.2 Depth Buffer Resolve
- * - 7.5.3.3 Hierarchical Depth Buffer Resolve
+ * Rather than using glGenBuffers(), we allocate the VBO directly
+ * through drm.
*/
- enum brw_hiz_op {
- BRW_HIZ_OP_NONE = 0,
- BRW_HIZ_OP_DEPTH_CLEAR,
- BRW_HIZ_OP_DEPTH_RESOLVE,
- BRW_HIZ_OP_HIZ_RESOLVE,
- } op;
-
- /** \brief Shader state */
- struct {
- GLuint program;
- GLuint position_vbo;
- GLint position_location;
- } shader;
-
- /** \brief VAO for the rectangle primitive's vertices. */
- GLuint vao;
-
- GLuint fbo;
- struct gl_renderbuffer *depth_rb;
+ drm_intel_bo *vertex_bo;
} hiz;
struct brw_sol_state {
@@ -126,12 +126,7 @@ static void gen6_set_prim(struct brw_context *brw,
DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));
- if (brw->hiz.op) {
- assert(prim->mode == GL_TRIANGLES);
- hw_prim = _3DPRIM_RECTLIST;
- } else {
- hw_prim = prim_to_hw_prim[prim->mode];
- }
+ hw_prim = prim_to_hw_prim[prim->mode];
if (hw_prim != brw->primitive) {
brw->primitive = hw_prim;
@@ -307,17 +302,11 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
struct intel_renderbuffer *depth_irb;
struct intel_texture_object *tex_obj;
- bool did_resolve = false;
-
- /* Avoid recursive HiZ op. */
- if (brw->hiz.op) {
- return;
- }
/* Resolve the depth buffer's HiZ buffer. */
depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
if (depth_irb && depth_irb->mt) {
- did_resolve |= intel_renderbuffer_resolve_hiz(intel, depth_irb);
+ intel_renderbuffer_resolve_hiz(intel, depth_irb);
}
/* Resolve depth buffer of each enabled depth texture. */
@@ -327,33 +316,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
if (!tex_obj || !tex_obj->mt)
continue;
- did_resolve |= intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt);
- }
-
- if (did_resolve) {
- /* Call vbo_bind_array() to synchronize the vbo module's vertex
- * attributes to the gl_context's.
- *
- * Details
- * -------
- * The vbo module tracks vertex attributes separately from the
- * gl_context. Specifically, the vbo module maintins vertex attributes
- * in vbo_exec_context::array::inputs, which is synchronized with
- * gl_context::Array::ArrayObj::VertexAttrib by vbo_bind_array().
- * vbo_draw_arrays() calls vbo_bind_array() to perform the
- * synchronization before calling the real draw call,
- * vbo_context::draw_arrays.
- *
- * At this point (after performing a resolve meta-op but before calling
- * vbo_bind_array), the gl_context's vertex attributes have been
- * restored to their original state (that is, their state before the
- * meta-op began), but the vbo module's vertex attribute are those used
- * in the last meta-op. Therefore we must manually synchronize the two with
- * vbo_bind_array() before continuing with the original draw command.
- */
- _mesa_update_state(ctx);
- vbo_bind_arrays(ctx);
- _mesa_update_state(ctx);
+ intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt);
}
}
@@ -372,9 +335,7 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
struct intel_renderbuffer *depth_irb =
intel_get_renderbuffer(fb, BUFFER_DEPTH);
- if (depth_irb &&
- ctx->Depth.Mask &&
- !brw->hiz.op) {
+ if (depth_irb && ctx->Depth.Mask) {
intel_renderbuffer_set_needs_depth_resolve(depth_irb);
}
}
@@ -372,7 +372,6 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
- DEFINE_BIT(BRW_NEW_HIZ),
{0, 0, 0}
};
@@ -50,6 +50,7 @@
#include "brw_wm.h"
#include "gen6_hiz.h"
+#include "gen7_hiz.h"
#include "glsl/ralloc.h"
@@ -70,9 +71,11 @@ static void brw_destroy_context( struct intel_context *intel )
brw_destroy_state(brw);
brw_draw_destroy( brw );
+
ralloc_free(brw->wm.compile_data);
dri_bo_release(&brw->curbe.curbe_bo);
+ dri_bo_release(&brw->hiz.vertex_bo);
dri_bo_release(&brw->vs.const_bo);
dri_bo_release(&brw->wm.const_bo);
@@ -236,8 +239,15 @@ void brwInitVtbl( struct brw_context *brw )
brw->intel.vtbl.is_hiz_depth_format = brw_is_hiz_depth_format;
if (brw->intel.has_hiz) {
- brw->intel.vtbl.resolve_depth_slice = gen6_resolve_depth_slice;
- brw->intel.vtbl.resolve_hiz_slice = gen6_resolve_hiz_slice;
+ if (brw->intel.gen == 7) {
+ brw->intel.vtbl.resolve_depth_slice = gen7_resolve_depth_slice;
+ brw->intel.vtbl.resolve_hiz_slice = gen7_resolve_hiz_slice;
+ } else if (brw->intel.gen == 6) {
+ brw->intel.vtbl.resolve_depth_slice = gen6_resolve_depth_slice;
+ brw->intel.vtbl.resolve_hiz_slice = gen6_resolve_hiz_slice;
+ } else {
+ assert(0);
+ }
}
if (brw->intel.gen >= 7) {
@@ -67,23 +67,6 @@ upload_clip_state(struct brw_context *brw)
GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
}
- if (brw->hiz.op) {
- /* HiZ operations emit a rectangle primitive, which requires clipping to
- * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
- * Section 1.3 3D Primitives Overview:
- * RECTLIST:
- * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
- * Mode should be set to a value other than CLIPMODE_NORMAL.
- */
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- return;
- }
-
if (!ctx->Transform.DepthClamp)
depth_clamp = GEN6_CLIP_Z_TEST;
@@ -124,8 +107,7 @@ const struct brw_tracked_state gen6_clip_state = {
.dirty = {
.mesa = _NEW_TRANSFORM | _NEW_LIGHT,
.brw = (BRW_NEW_CONTEXT |
- BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_HIZ),
+ BRW_NEW_FRAGMENT_PROGRAM),
.cache = 0
},
.emit = upload_clip_state,
@@ -82,11 +82,7 @@ gen6_upload_depth_stencil_state(struct brw_context *brw)
}
/* _NEW_DEPTH */
- if ((ctx->Depth.Test || brw->hiz.op) && depth_irb) {
- assert(brw->hiz.op != BRW_HIZ_OP_DEPTH_RESOLVE || ctx->Depth.Test);
- assert(brw->hiz.op != BRW_HIZ_OP_HIZ_RESOLVE || !ctx->Depth.Test);
- assert(brw->hiz.op != BRW_HIZ_OP_DEPTH_CLEAR || !ctx->Depth.Test);
-
+ if (ctx->Depth.Test && depth_irb) {
ds->ds2.depth_test_enable = ctx->Depth.Test;
ds->ds2.depth_test_func = intel_translate_compare_func(ctx->Depth.Func);
ds->ds2.depth_write_enable = ctx->Depth.Mask;
@@ -98,8 +94,7 @@ gen6_upload_depth_stencil_state(struct brw_context *brw)
const struct brw_tracked_state gen6_depth_stencil_state = {
.dirty = {
.mesa = _NEW_DEPTH | _NEW_STENCIL | _NEW_BUFFERS,
- .brw = (BRW_NEW_BATCH |
- BRW_NEW_HIZ),
+ .brw = BRW_NEW_BATCH,
.cache = 0,
},
.emit = gen6_upload_depth_stencil_state,
Oops, something went wrong.

0 comments on commit 7b36c68

Please sign in to comment.