Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compute shaders taking FBOs as input yields error messages. #1184

Open
Clockwork-Muse opened this issue Aug 10, 2021 · 5 comments
Open

Compute shaders taking FBOs as input yields error messages. #1184

Clockwork-Muse opened this issue Aug 10, 2021 · 5 comments

Comments

@Clockwork-Muse
Copy link

Description

If a compute shader is given a buffer that is the output of a framebuffer, for example one handed to add_render_texture(), then a gl error message is generated:

GL_INVALID_OPERATION error generated. No active compute shader.

... despite this message, the compute shader actually runs (and appears to run successfully).

Steps to Reproduce

from typing import Tuple
from direct import showbase

from direct.showbase.ShowBase import ShowBase
from panda3d.core import Camera, CardMaker, ClockObject, ComputeNode, FrameBufferProperties, GraphicsOutput, GraphicsPipe, Lens, LVector2i, NodePath, Shader, Texture, WindowProperties, loadPrcFileData

loadPrcFileData("",
"""
    want-pstats 1
    gl-debug #t
    audio-library-name null
    show-frame-rate-meter 1
    sync-video 0
    win-fixed-size 1
    win-size 1024 1024
    notify-level-glgsg debug
    notify-level-shader spam
""")

simple_vertex_shader = \
"""
#version 460

// Uniform inputs
uniform mat4 p3d_ModelViewProjectionMatrix;

// Vertex inputs
in vec4 p3d_Vertex;
in vec2 p3d_MultiTexCoord0;

out vec2 VertexTexCoord;

void main() {
    VertexTexCoord = p3d_MultiTexCoord0;
    gl_Position = p3d_ModelViewProjectionMatrix * p3d_Vertex;
}
"""

def setup_frame_id(show_base: ShowBase, size: LVector2i, root: NodePath, lens: Lens) -> Tuple[Camera, Texture]:
    window_props = WindowProperties(size=size)
    flags = GraphicsPipe.BFRefuseWindow

    props = FrameBufferProperties()
    props.set_rgba_bits(32, 0, 0, 0)
    props.set_float_color(True)
    props.set_multisamples(0)
    props.set_force_hardware(True)
    props.set_srgb_color(False)
    props.set_depth_bits(24)
    props.set_stencil_bits(8)
    props.set_float_depth(True)

    shader = Shader.make(Shader.SL_GLSL,
    vertex=simple_vertex_shader,
    fragment=
    """
    #version 460

    uniform int SomeInput;
    out vec3 Color;

    void main() {
        Color = vec3(float(SomeInput));
    }
    """)

    buffer = show_base.graphicsEngine.make_output(
        show_base.pipe, "frame id buffer", -1, props, window_props, flags, show_base.win.get_gsg(), show_base.win)
    texture = Texture()
    buffer.add_render_texture(
        tex=texture, mode=GraphicsOutput.RTMBindOrCopy, bitplane=GraphicsOutput.RTPColor)
    texture.set_format(Texture.F_r32)
    texture.set_clear_color((0, 0, 0, 0))
    buffer.set_inverted(True)
    camera = show_base.make_camera(buffer, camName="frame id", lens=lens)
    camera.reparent_to(root)

    temp_node = NodePath("temp")
    temp_node.set_shader(shader, 1)
    temp_node.set_shader_input("SomeInput", 0)
    camera.node().set_initial_state(temp_node.get_state())

    return camera, texture

def setup_compute(root: NodePath, input_texture: Texture) -> Texture:
    some_texture = Texture()
    some_texture.setup_2d_texture(input_texture.x_size, input_texture.y_size, Texture.T_unsigned_short, Texture.F_r16i)
    some_texture.set_clear_color((0, 0, 0, 0))
    some_texture.clear_image()

    compute_shader = Shader.make_compute(Shader.SL_GLSL,
    """
    #version 460

    layout (local_size_x = 1, local_size_y = 1) in;

    uniform restrict readonly layout (r32f) image2D InputTexture;
    uniform restrict writeonly layout (r16ui) uimage2D OutputTexture;

    void main() {
        const ivec2 coordinates = ivec2(gl_GlobalInvocationID.xy);
        const float val = imageLoad(InputTexture, coordinates).r;
        imageStore(OutputTexture, coordinates, uvec4(uint(val)));
    }

    """)
    compute_chain = root.attach_new_node("compute_chain")
    compute_chain.set_bin("fixed", 41)
    cn = ComputeNode("compute_node")
    cn.add_dispatch(1, 1, 1)
    compute_chain = compute_chain.attach_new_node(cn)
    compute_chain.set_shader(compute_shader)
    compute_chain.set_shader_input("InputTexture", input_texture)
    compute_chain.set_shader_input("OutputTexture", some_texture)

    return some_texture

base = ShowBase()

clock = ClockObject.get_global_clock()
clock.mode = ClockObject.M_forced
clock.set_frame_rate(30)

cam, original_texture = setup_frame_id(base, LVector2i(1024, 1024), base.render, base.camLens)
converted_texture = setup_compute(base.render, original_texture)

cm = CardMaker("screen quad")
cm.set_frame_fullscreen_quad()
card = base.render.attach_new_node(cm.generate())
card.set_shader_input("SomeInput", 1)

base.run()

Environment

  • Operating system: Windows 10 (not at my Ubuntu machine atm, will check tomorrow)
  • System architecture: x86_64
  • Panda3D version: 1.10.9
  • Installation method: pip
  • Python version (if using Python): 3.9.6
@Clockwork-Muse
Copy link
Author

Debug log:

Known pipe types:
  wglGraphicsPipe
(all display modules loaded.)
:display:gsg:glgsg(debug): HAS EXT WGL_ARB_pixel_format 1
:display:gsg:glgsg(debug): HAS EXT WGL_ARB_multisample 1
:display:gsg:glgsg(debug): HAS EXT WGL_ARB_create_context 1
:display:gsg:glgsg(debug): GL_VENDOR = NVIDIA Corporation
:display:gsg:glgsg(debug): GL_RENDERER = NVIDIA GeForce GTX 1080/PCIe/SSE2
:display:gsg:glgsg(debug): GL_VERSION = 4.6.0 NVIDIA 471.41
:display:gsg:glgsg(debug): GL_VERSION decoded to: 4.6
:display:gsg:glgsg(debug): GL_SHADING_LANGUAGE_VERSION = 4.60 NVIDIA
:display:gsg:glgsg(debug): Detected GLSL version: 4.60
:display:gsg:glgsg(debug): HAS EXT GL_ARB_compatibility 1
:display:gsg:glgsg(debug): Using compatibility profile
:display:gsg:glgsg(debug): GL Extensions:
  GL_AMD_multi_draw_indirect             GL_AMD_seamless_cubemap_per_texture
  GL_AMD_vertex_shader_layer             GL_AMD_vertex_shader_viewport_index
  GL_ARB_ES2_compatibility               GL_ARB_ES3_1_compatibility
  GL_ARB_ES3_2_compatibility             GL_ARB_ES3_compatibility
  GL_ARB_arrays_of_arrays                GL_ARB_base_instance
  GL_ARB_bindless_texture                GL_ARB_blend_func_extended
  GL_ARB_buffer_storage                  GL_ARB_clear_buffer_object
  GL_ARB_clear_texture                   GL_ARB_clip_control
  GL_ARB_color_buffer_float              GL_ARB_compatibility
  GL_ARB_compressed_texture_pixel_storage
  GL_ARB_compute_shader                  GL_ARB_compute_variable_group_size
  GL_ARB_conditional_render_inverted     GL_ARB_conservative_depth
  GL_ARB_copy_buffer                     GL_ARB_copy_image
  GL_ARB_cull_distance                   GL_ARB_debug_output
  GL_ARB_depth_buffer_float              GL_ARB_depth_clamp
  GL_ARB_depth_texture                   GL_ARB_derivative_control
  GL_ARB_direct_state_access             GL_ARB_draw_buffers
  GL_ARB_draw_buffers_blend              GL_ARB_draw_elements_base_vertex
  GL_ARB_draw_indirect                   GL_ARB_draw_instanced
  GL_ARB_enhanced_layouts                GL_ARB_explicit_attrib_location
  GL_ARB_explicit_uniform_location       GL_ARB_fragment_coord_conventions
  GL_ARB_fragment_layer_viewport         GL_ARB_fragment_program
  GL_ARB_fragment_program_shadow         GL_ARB_fragment_shader
  GL_ARB_fragment_shader_interlock       GL_ARB_framebuffer_no_attachments
  GL_ARB_framebuffer_object              GL_ARB_framebuffer_sRGB
  GL_ARB_geometry_shader4                GL_ARB_get_program_binary
  GL_ARB_get_texture_sub_image           GL_ARB_gl_spirv
  GL_ARB_gpu_shader5                     GL_ARB_gpu_shader_fp64
  GL_ARB_gpu_shader_int64                GL_ARB_half_float_pixel
  GL_ARB_half_float_vertex               GL_ARB_imaging
  GL_ARB_indirect_parameters             GL_ARB_instanced_arrays
  GL_ARB_internalformat_query            GL_ARB_internalformat_query2
  GL_ARB_invalidate_subdata              GL_ARB_map_buffer_alignment
  GL_ARB_map_buffer_range                GL_ARB_multi_bind
  GL_ARB_multi_draw_indirect             GL_ARB_multisample
  GL_ARB_multitexture                    GL_ARB_occlusion_query
  GL_ARB_occlusion_query2                GL_ARB_parallel_shader_compile
  GL_ARB_pipeline_statistics_query       GL_ARB_pixel_buffer_object
  GL_ARB_point_parameters                GL_ARB_point_sprite
  GL_ARB_polygon_offset_clamp            GL_ARB_post_depth_coverage
  GL_ARB_program_interface_query         GL_ARB_provoking_vertex
  GL_ARB_query_buffer_object             GL_ARB_robust_buffer_access_behavior
  GL_ARB_robustness                      GL_ARB_sample_locations
  GL_ARB_sample_shading                  GL_ARB_sampler_objects
  GL_ARB_seamless_cube_map               GL_ARB_seamless_cubemap_per_texture
  GL_ARB_separate_shader_objects         GL_ARB_shader_atomic_counter_ops
  GL_ARB_shader_atomic_counters          GL_ARB_shader_ballot
  GL_ARB_shader_bit_encoding             GL_ARB_shader_clock
  GL_ARB_shader_draw_parameters          GL_ARB_shader_group_vote
  GL_ARB_shader_image_load_store         GL_ARB_shader_image_size
  GL_ARB_shader_objects                  GL_ARB_shader_precision
  GL_ARB_shader_storage_buffer_object    GL_ARB_shader_subroutine
  GL_ARB_shader_texture_image_samples    GL_ARB_shader_texture_lod
  GL_ARB_shader_viewport_layer_array     GL_ARB_shading_language_100
  GL_ARB_shading_language_420pack        GL_ARB_shading_language_include
  GL_ARB_shading_language_packing        GL_ARB_shadow
  GL_ARB_sparse_buffer                   GL_ARB_sparse_texture
  GL_ARB_sparse_texture2                 GL_ARB_sparse_texture_clamp
  GL_ARB_spirv_extensions                GL_ARB_stencil_texturing
  GL_ARB_sync                            GL_ARB_tessellation_shader
  GL_ARB_texture_barrier                 GL_ARB_texture_border_clamp
  GL_ARB_texture_buffer_object           GL_ARB_texture_buffer_object_rgb32
  GL_ARB_texture_buffer_range            GL_ARB_texture_compression
  GL_ARB_texture_compression_bptc        GL_ARB_texture_compression_rgtc
  GL_ARB_texture_cube_map                GL_ARB_texture_cube_map_array
  GL_ARB_texture_env_add                 GL_ARB_texture_env_combine
  GL_ARB_texture_env_crossbar            GL_ARB_texture_env_dot3
  GL_ARB_texture_filter_anisotropic      GL_ARB_texture_filter_minmax
  GL_ARB_texture_float                   GL_ARB_texture_gather
  GL_ARB_texture_mirror_clamp_to_edge    GL_ARB_texture_mirrored_repeat
  GL_ARB_texture_multisample             GL_ARB_texture_non_power_of_two
  GL_ARB_texture_query_levels            GL_ARB_texture_query_lod
  GL_ARB_texture_rectangle               GL_ARB_texture_rg
  GL_ARB_texture_rgb10_a2ui              GL_ARB_texture_stencil8
  GL_ARB_texture_storage                 GL_ARB_texture_storage_multisample
  GL_ARB_texture_swizzle                 GL_ARB_texture_view
  GL_ARB_timer_query                     GL_ARB_transform_feedback2
  GL_ARB_transform_feedback3             GL_ARB_transform_feedback_instanced
  GL_ARB_transform_feedback_overflow_query
  GL_ARB_transpose_matrix                GL_ARB_uniform_buffer_object
  GL_ARB_vertex_array_bgra               GL_ARB_vertex_array_object
  GL_ARB_vertex_attrib_64bit             GL_ARB_vertex_attrib_binding
  GL_ARB_vertex_buffer_object            GL_ARB_vertex_program
  GL_ARB_vertex_shader                   GL_ARB_vertex_type_10f_11f_11f_rev
  GL_ARB_vertex_type_2_10_10_10_rev      GL_ARB_viewport_array
  GL_ARB_window_pos                      GL_ATI_draw_buffers
  GL_ATI_texture_float                   GL_ATI_texture_mirror_once
  GL_EXTX_framebuffer_mixed_formats      GL_EXT_Cg_shader
  GL_EXT_abgr                            GL_EXT_bgra
  GL_EXT_bindable_uniform                GL_EXT_blend_color
  GL_EXT_blend_equation_separate         GL_EXT_blend_func_separate
  GL_EXT_blend_minmax                    GL_EXT_blend_subtract
  GL_EXT_compiled_vertex_array           GL_EXT_depth_bounds_test
  GL_EXT_direct_state_access             GL_EXT_draw_buffers2
  GL_EXT_draw_instanced                  GL_EXT_draw_range_elements
  GL_EXT_fog_coord                       GL_EXT_framebuffer_blit
  GL_EXT_framebuffer_multisample         GL_EXT_framebuffer_multisample_blit_scaled
  GL_EXT_framebuffer_object              GL_EXT_framebuffer_sRGB
  GL_EXT_geometry_shader4                GL_EXT_gpu_program_parameters
  GL_EXT_gpu_shader4                     GL_EXT_import_sync_object
  GL_EXT_memory_object                   GL_EXT_memory_object_win32
  GL_EXT_multi_draw_arrays               GL_EXT_multiview_texture_multisample
  GL_EXT_multiview_timer_query           GL_EXT_packed_depth_stencil
  GL_EXT_packed_float                    GL_EXT_packed_pixels
  GL_EXT_pixel_buffer_object             GL_EXT_point_parameters
  GL_EXT_polygon_offset_clamp            GL_EXT_post_depth_coverage
  GL_EXT_provoking_vertex                GL_EXT_raster_multisample
  GL_EXT_rescale_normal                  GL_EXT_secondary_color
  GL_EXT_semaphore                       GL_EXT_semaphore_win32
  GL_EXT_separate_shader_objects         GL_EXT_separate_specular_color
  GL_EXT_shader_image_load_formatted     GL_EXT_shader_image_load_store
  GL_EXT_shader_integer_mix              GL_EXT_shadow_funcs
  GL_EXT_sparse_texture2                 GL_EXT_stencil_two_side
  GL_EXT_stencil_wrap                    GL_EXT_texture3D
  GL_EXT_texture_array                   GL_EXT_texture_buffer_object
  GL_EXT_texture_compression_dxt1        GL_EXT_texture_compression_latc
  GL_EXT_texture_compression_rgtc        GL_EXT_texture_compression_s3tc
  GL_EXT_texture_cube_map                GL_EXT_texture_edge_clamp
  GL_EXT_texture_env_add                 GL_EXT_texture_env_combine
  GL_EXT_texture_env_dot3                GL_EXT_texture_filter_anisotropic
  GL_EXT_texture_filter_minmax           GL_EXT_texture_integer
  GL_EXT_texture_lod                     GL_EXT_texture_lod_bias
  GL_EXT_texture_mirror_clamp            GL_EXT_texture_object
  GL_EXT_texture_sRGB                    GL_EXT_texture_sRGB_R8
  GL_EXT_texture_sRGB_decode             GL_EXT_texture_shadow_lod
  GL_EXT_texture_shared_exponent         GL_EXT_texture_storage
  GL_EXT_texture_swizzle                 GL_EXT_timer_query
  GL_EXT_transform_feedback2             GL_EXT_vertex_array
  GL_EXT_vertex_array_bgra               GL_EXT_vertex_attrib_64bit
  GL_EXT_win32_keyed_mutex               GL_EXT_window_rectangles
  GL_IBM_rasterpos_clip                  GL_IBM_texture_mirrored_repeat
  GL_KHR_blend_equation_advanced         GL_KHR_blend_equation_advanced_coherent
  GL_KHR_context_flush_control           GL_KHR_debug
  GL_KHR_no_error                        GL_KHR_parallel_shader_compile
  GL_KHR_robust_buffer_access_behavior   GL_KHR_robustness
  GL_KHR_shader_subgroup                 GL_KTX_buffer_region
  GL_NVX_blend_equation_advanced_multi_draw_buffers
  GL_NVX_conditional_render              GL_NVX_gpu_memory_info
  GL_NVX_gpu_multicast2                  GL_NVX_linked_gpu_multicast
  GL_NVX_multigpu_info                   GL_NVX_nvenc_interop
  GL_NVX_progress_fence                  GL_NV_ES1_1_compatibility
  GL_NV_ES3_1_compatibility              GL_NV_alpha_to_coverage_dither_control
  GL_NV_bindless_multi_draw_indirect     GL_NV_bindless_multi_draw_indirect_count
  GL_NV_bindless_texture                 GL_NV_blend_equation_advanced
  GL_NV_blend_equation_advanced_coherent GL_NV_blend_minmax_factor
  GL_NV_blend_square                     GL_NV_clip_space_w_scaling
  GL_NV_command_list                     GL_NV_compute_program5
  GL_NV_conditional_render               GL_NV_conservative_raster
  GL_NV_conservative_raster_dilate       GL_NV_conservative_raster_pre_snap_triangles
  GL_NV_copy_depth_to_color              GL_NV_copy_image
  GL_NV_depth_buffer_float               GL_NV_depth_clamp
  GL_NV_draw_texture                     GL_NV_draw_vulkan_image
  GL_NV_explicit_multisample             GL_NV_feature_query
  GL_NV_fence                            GL_NV_fill_rectangle
  GL_NV_float_buffer                     GL_NV_fog_distance
  GL_NV_fragment_coverage_to_color       GL_NV_fragment_program
  GL_NV_fragment_program2                GL_NV_fragment_program_option
  GL_NV_fragment_shader_interlock        GL_NV_framebuffer_mixed_samples
  GL_NV_framebuffer_multisample_coverage GL_NV_geometry_shader4
  GL_NV_geometry_shader_passthrough      GL_NV_gpu_multicast
  GL_NV_gpu_program4                     GL_NV_gpu_program4_1
  GL_NV_gpu_program5                     GL_NV_gpu_program5_mem_extended
  GL_NV_gpu_program_fp64                 GL_NV_gpu_shader5
  GL_NV_half_float                       GL_NV_internalformat_sample_query
  GL_NV_light_max_exponent               GL_NV_memory_attachment
  GL_NV_memory_object_sparse             GL_NV_multisample_coverage
  GL_NV_multisample_filter_hint          GL_NV_occlusion_query
  GL_NV_packed_depth_stencil             GL_NV_parameter_buffer_object
  GL_NV_parameter_buffer_object2         GL_NV_path_rendering
  GL_NV_path_rendering_shared_edge       GL_NV_pixel_data_range
  GL_NV_point_sprite                     GL_NV_primitive_restart
  GL_NV_query_resource                   GL_NV_query_resource_tag
  GL_NV_register_combiners               GL_NV_register_combiners2
  GL_NV_sample_locations                 GL_NV_sample_mask_override_coverage
  GL_NV_shader_atomic_counters           GL_NV_shader_atomic_float
  GL_NV_shader_atomic_float64            GL_NV_shader_atomic_fp16_vector
  GL_NV_shader_atomic_int64              GL_NV_shader_buffer_load
  GL_NV_shader_storage_buffer_object     GL_NV_shader_subgroup_partitioned
  GL_NV_shader_thread_group              GL_NV_shader_thread_shuffle
  GL_NV_stereo_view_rendering            GL_NV_texgen_reflection
  GL_NV_texture_barrier                  GL_NV_texture_compression_vtc
  GL_NV_texture_env_combine4             GL_NV_texture_multisample
  GL_NV_texture_rectangle                GL_NV_texture_rectangle_compressed
  GL_NV_texture_shader                   GL_NV_texture_shader2
  GL_NV_texture_shader3                  GL_NV_timeline_semaphore
  GL_NV_transform_feedback               GL_NV_transform_feedback2
  GL_NV_uniform_buffer_unified_memory    GL_NV_vertex_array_range
  GL_NV_vertex_array_range2              GL_NV_vertex_attrib_integer_64bit
  GL_NV_vertex_buffer_unified_memory     GL_NV_vertex_program
  GL_NV_vertex_program1_1                GL_NV_vertex_program2
  GL_NV_vertex_program2_option           GL_NV_vertex_program3
  GL_NV_viewport_array2                  GL_NV_viewport_swizzle
  GL_OVR_multiview                       GL_OVR_multiview2
  GL_S3_s3tc                             GL_SGIS_generate_mipmap
  GL_SGIS_texture_lod                    GL_SGIX_depth_texture
  GL_SGIX_shadow                         GL_SUN_slice_accum
  GL_WIN_swap_hint                       WGL_ARB_buffer_region
  WGL_ARB_context_flush_control          WGL_ARB_create_context
  WGL_ARB_create_context_no_error        WGL_ARB_create_context_profile
  WGL_ARB_create_context_robustness      WGL_ARB_extensions_string
  WGL_ARB_make_current_read              WGL_ARB_multisample
  WGL_ARB_pbuffer                        WGL_ARB_pixel_format
  WGL_ARB_pixel_format_float             WGL_ARB_render_texture
  WGL_ATI_pixel_format_float             WGL_EXT_colorspace
  WGL_EXT_create_context_es2_profile     WGL_EXT_create_context_es_profile
  WGL_EXT_extensions_string              WGL_EXT_framebuffer_sRGB
  WGL_EXT_pixel_format_packed_float      WGL_EXT_swap_control
  WGL_EXT_swap_control_tear              WGL_NVX_DX_interop
  WGL_NV_DX_interop                      WGL_NV_DX_interop2
  WGL_NV_copy_image                      WGL_NV_delay_before_swap
  WGL_NV_float_buffer                    WGL_NV_multigpu_context
  WGL_NV_multisample_coverage            WGL_NV_render_depth_texture
  WGL_NV_render_texture_rectangle
:display:gsg:glgsg(debug): HAS EXT GL_EXT_debug_marker 0
:display:gsg:glgsg: gl-debug enabled.
:display:gsg:glgsg(debug): HAS EXT GL_ARB_vertex_program 1
:display:gsg:glgsg(debug): HAS EXT GL_ARB_fragment_program 1
:display:gsg:glgsg(debug): HAS EXT GL_NV_gpu_program5 1
:display:gsg:glgsg(debug): HAS EXT GL_NV_framebuffer_multisample_coverage 1
:display:gsg:glgsg(debug): Occlusion query counter provides 32 bits.
:display:gsg:glgsg(debug): HAS EXT GL_EXT_texture_mirror_clamp 1
:display:gsg:glgsg(debug): max texture dimension = 32768, max 3d texture = 16384, max 2d texture array = 2048, max cube map = 32768
:display:gsg:glgsg(debug): max_elements_vertices = 1048576, max_elements_indices = 1048576
:display:gsg:glgsg(debug): vertex buffer objects are supported.
:display:gsg:glgsg(debug): Supported compressed texture formats:
  GL_COMPRESSED_RGB_S3TC_DXT1_EXT
  GL_COMPRESSED_RGBA_S3TC_DXT3_EXT
  GL_COMPRESSED_RGBA_S3TC_DXT5_EXT
  GL_PALETTE4_RGB8_OES
  GL_PALETTE4_RGBA8_OES
  GL_PALETTE4_R5_G6_B5_OES
  GL_PALETTE4_RGBA4_OES
  GL_PALETTE4_RGB5_A1_OES
  GL_PALETTE8_RGB8_OES
  GL_PALETTE8_RGBA8_OES
  GL_PALETTE8_R5_G6_B5_OES
  GL_PALETTE8_RGBA4_OES
  GL_PALETTE8_RGB5_A1_OES
  GL_COMPRESSED_RGB8_ETC2
  GL_COMPRESSED_SRGB8_ETC2
  GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2
  GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2
  GL_COMPRESSED_RGBA8_ETC2_EAC
  GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC
  GL_COMPRESSED_R11_EAC
  GL_COMPRESSED_SIGNED_R11_EAC
  GL_COMPRESSED_RG11_EAC
  GL_COMPRESSED_SIGNED_RG11_EAC
  GL_COMPRESSED_RGBA_ASTC_4x4_KHR
  GL_COMPRESSED_RGBA_ASTC_5x4_KHR
  GL_COMPRESSED_RGBA_ASTC_5x5_KHR
  GL_COMPRESSED_RGBA_ASTC_6x5_KHR
  GL_COMPRESSED_RGBA_ASTC_6x6_KHR
  GL_COMPRESSED_RGBA_ASTC_8x5_KHR
  GL_COMPRESSED_RGBA_ASTC_8x6_KHR
  GL_COMPRESSED_RGBA_ASTC_8x8_KHR
  GL_COMPRESSED_RGBA_ASTC_10x5_KHR
  GL_COMPRESSED_RGBA_ASTC_10x6_KHR
  GL_COMPRESSED_RGBA_ASTC_10x8_KHR
  GL_COMPRESSED_RGBA_ASTC_10x10_KHR
  GL_COMPRESSED_RGBA_ASTC_12x10_KHR
  GL_COMPRESSED_RGBA_ASTC_12x12_KHR
  GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR
  GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR
  GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR
  GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR
  GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR
  GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR
  GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR
  GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR
  GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR
  GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR
  GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR
  GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR
  GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR
  GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR
:display:gsg:glgsg(debug): HAS EXT GL_ARB_bindless_texture 1
:display:gsg:glgsg(debug): HAS EXT GL_EXT_stencil_two_side 1
:display:gsg:glgsg(debug): max lights = 8
:display:gsg:glgsg(debug): max clip planes = 8
:display:gsg:glgsg(debug): max texture stages = 4
:display:gsg:glgsg(debug): Supported program binary formats:
:display:gsg:glgsg(debug):   0x8E21
:display:gsg:glgsg(debug): Supported Cg profiles:
:display:gsg:glgsg(debug):   vp20
:display:gsg:glgsg(debug):   fp20
:display:gsg:glgsg(debug):   vp30
:display:gsg:glgsg(debug):   fp30
:display:gsg:glgsg(debug):   arbvp1
:display:gsg:glgsg(debug):   fp40
:display:gsg:glgsg(debug):   arbfp1
:display:gsg:glgsg(debug):   vp40
:display:gsg:glgsg(debug):   glslv
:display:gsg:glgsg(debug):   glslf
:display:gsg:glgsg(debug):   glslg
:display:gsg:glgsg(debug):   gp4fp
:display:gsg:glgsg(debug):   gp4vp
:display:gsg:glgsg(debug):   gp4gp
:display:gsg:glgsg(debug):   gp5fp
:display:gsg:glgsg(debug):   gp5vp
:display:gsg:glgsg(debug):   gp5gp
:display:gsg:glgsg(debug):   gp5tcp
:display:gsg:glgsg(debug):   gp5tep
:display:gsg:glgsg(debug): Cg GLSL version = CG_GL_GLSL_120
:display:gsg:glgsg(debug): Cg latest vertex profile = gp5vp
:display:gsg:glgsg(debug): Cg latest fragment profile = gp5fp
:display:gsg:glgsg(debug): Cg latest geometry profile = gp5gp
:display:gsg:glgsg(debug): basic-shaders-only #f
:display:gsg:glgsg(debug): Cg active vertex profile = gp5vp
:display:gsg:glgsg(debug): Cg active fragment profile = gp5fp
:display:gsg:glgsg(debug): Cg active geometry profile = gp5gp
:display:gsg:glgsg(debug): shader model = 5.0
:display:gsg:glgsg(debug): HAS EXT WGL_EXT_swap_control 1
:display:gsg:glgsg(debug): HAS EXT WGL_ARB_pbuffer 1
:display:gsg:glgsg(debug): HAS EXT WGL_ARB_pixel_format 1
:display:gsg:glgsg(debug): HAS EXT WGL_ARB_multisample 1
:display:gsg:glgsg(debug): HAS EXT WGL_ARB_render_texture 1
:net(error): Unable to open TCP connection to server [::1]:5185
:pstats(error): Couldn't connect to PStatServer at localhost:5185
:display:gsg:glgsg(debug): Creating depth stencil renderbuffer.
:display:gsg:glgsg: Framebuffer detailed info: The driver allocated storage for renderbuffer 1.
:display:gsg:glgsg(debug): Binding texture  to color attachment.
:display:gsg:glgsg(debug): loading texture with NULL image 
:display:gsg:glgsg(debug): loading new texture object for , 1024 x 1024 x 1, z = 0, mipmaps 0, uses_mipmaps = 0
:display:gsg:glgsg(debug):   (initializing NULL image)
:display:gsg:glgsg(debug): Creating depth stencil renderbuffer.
:display:gsg:glgsg: Framebuffer detailed info: The driver allocated storage for renderbuffer 1.
:display:gsg:glgsg(debug): Binding texture  to color attachment.
:display:gsg:glgsg(debug): Compiling GLSL vertex shader created-shader
:display:gsg:glgsg(debug): Compiling GLSL fragment shader created-shader
:display:gsg:glgsg(debug): Linking GLSL shader created-shader
:display:gsg:glgsg(debug): Active attribute p3d_Vertex with size 1 and type 0x8b52 is bound to location 0
:display:gsg:glgsg(debug): Active uniform SomeInput with size 1 and type 0x1404 is bound to location 0
:display:gsg:glgsg(debug): Active uniform p3d_ModelViewProjectionMatrix with size 1 and type 0x8b5c is bound to location 1
:display:gsg:glgsg(debug): Buffer detailed info: Buffer object 1 (bound to GL_ARRAY_BUFFER_ARB, usage hint is GL_STATIC_DRAW) will use VIDEO memory as the source for buffer object operations.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(debug): Compiling GLSL compute shader created-shader
:display:gsg:glgsg(debug): Linking GLSL shader created-shader
:display:gsg:glgsg(debug): Active uniform InputTexture with size 1 and type 0x904d is bound to location 0
:display:gsg:glgsg(debug): Active uniform OutputTexture with size 1 and type 0x9063 is bound to location 1
:display:gsg:glgsg(debug): loading texture with NULL image
:display:gsg:glgsg(debug): loading new texture object for , 1024 x 1024 x 1, z = 0, mipmaps 0, uses_mipmaps = 0
:display:gsg:glgsg(debug):   (initializing NULL image)
:display:gsg:glgsg(debug): Buffer detailed info: Buffer object 2 (bound to GL_ARRAY_BUFFER_ARB, usage hint is GL_STATIC_DRAW) will use VIDEO memory as the source for buffer object operations.
:display:gsg:glgsg(debug): loading uncompressed texture _0
:display:gsg:glgsg(debug): loading new texture object for _0, 256 x 256 x 1, z = 0, mipmaps 1, uses_mipmaps = 0
:display:gsg:glgsg(debug): Buffer detailed info: Buffer object 3 (bound to GL_ARRAY_BUFFER_ARB, usage hint is GL_STATIC_DRAW) will use VIDEO memory as the source for buffer object operations.
:display:gsg:glgsg(debug): Buffer detailed info: Buffer object 4 (bound to GL_ELEMENT_ARRAY_BUFFER_ARB, usage hint is GL_STATIC_DRAW) will use VIDEO memory as the source for buffer object operations.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(error): GL_INVALID_OPERATION error generated. No active compute shader.
:display:gsg:glgsg(debug): GLGraphicsStateGuardian 0000023418C75060 destructing

@rdb
Copy link
Member

rdb commented Aug 10, 2021

When you render the offscreen buffer, Panda encounters the ComputeNode in the scene graph too, and in this case the shader on the ComputeNode is being overridden by your initial state, resulting in the ComputeNode being dispatched without a valid compute shader:

    temp_node = NodePath("temp")
    temp_node.set_shader(shader, 1)
    temp_node.set_shader_input("SomeInput", 0)
    camera.node().set_initial_state(temp_node.get_state())

You should hide your ComputeNode from this camera using draw masks.

@Clockwork-Muse
Copy link
Author

:headdesk:

and in this case the shader on the ComputeNode is being overridden by your initial state,

Yes, that was it.

You should hide your ComputeNode from this camera using draw masks.

I can't, I don't think. In my real code I'm instantiating multiple cameras as different viewpoints, and disabling the main camera. I specifically need the compute shader to go off after that camera renders to the buffer, because I need to read from it. Unless something else causes it to go off? (Although given that extracting the data from a non-fbo takes so much time due to stalls, besides being delayed, I may have to go back to pixel shaders anyways).

Beyond that, I can re-apply the compute shader by giving a higher override, but I haven't been able to get draw masks to correctly control the compute shader - the following outputs a red screen, but should be yellow, I think (and is if compute_chain.hide() is removed).

from typing import Tuple
from direct import showbase

from direct.showbase.ShowBase import ShowBase
from panda3d.core import Camera, CardMaker, ClockObject, ComputeNode, DrawMask, FrameBufferProperties, GraphicsOutput, GraphicsPipe, Lens, LVector2i, NodePath, Shader, Texture, WindowProperties, loadPrcFileData

loadPrcFileData("",
"""
    want-pstats 1
    gl-debug #t
    audio-library-name null
    show-frame-rate-meter 1
    sync-video 0
    win-fixed-size 1
    win-size 1024 1024
    notify-level-glgsg debug
    notify-level-shader spam
""")

simple_vertex_shader = \
"""
#version 460

// Uniform inputs
uniform mat4 p3d_ModelViewProjectionMatrix;

// Vertex inputs
in vec4 p3d_Vertex;
in vec2 p3d_MultiTexCoord0;

out vec2 VertexTexCoord;

void main() {
    VertexTexCoord = p3d_MultiTexCoord0;
    gl_Position = p3d_ModelViewProjectionMatrix * p3d_Vertex;
}
"""

compute_node_mask = DrawMask.bit(15)

def setup_frame_id(show_base: ShowBase, size: LVector2i, root: NodePath, lens: Lens) -> Tuple[Camera, Texture]:
    window_props = WindowProperties(size=size)
    flags = GraphicsPipe.BFRefuseWindow

    props = FrameBufferProperties()
    props.set_rgba_bits(32, 0, 0, 0)
    props.set_float_color(True)
    props.set_multisamples(0)
    props.set_force_hardware(True)
    props.set_srgb_color(False)
    props.set_depth_bits(24)
    props.set_stencil_bits(8)
    props.set_float_depth(True)

    shader = Shader.make(Shader.SL_GLSL,
    vertex=simple_vertex_shader,
    fragment=
    """
    #version 460

    uniform int SomeInput;
    out vec3 Color;

    void main() {
        Color = vec3(float(SomeInput));
    }
    """)

    buffer = show_base.graphicsEngine.make_output(
        show_base.pipe, "frame id buffer", -88, props, window_props, flags, show_base.win.get_gsg(), show_base.win)
    texture = Texture()
    buffer.add_render_texture(
        tex=texture, mode=GraphicsOutput.RTMBindOrCopy, bitplane=GraphicsOutput.RTPColor)
    texture.set_format(Texture.F_r32)
    texture.set_clear_color((0, 0, 0, 0))
    buffer.set_inverted(True)
    camera = show_base.make_camera(buffer, camName="frame id", lens=lens)
    camera.reparent_to(root)

    temp_node = NodePath("temp")
    temp_node.set_shader(shader, 1)
    temp_node.set_shader_input("SomeInput", 0)
    camera.node().set_initial_state(temp_node.get_state())
    # camera.node().camera_mask = DrawMask.all_on() ^ compute_node_mask
    # camera.node().camera_mask.clear_bit(15)
    print(camera.node().camera_mask)

    return camera, texture

def setup_compute(root: NodePath, input_texture: Texture) -> Texture:
    some_texture = Texture()
    some_texture.setup_2d_texture(input_texture.x_size, input_texture.y_size, Texture.T_unsigned_short, Texture.F_r16i)
    some_texture.set_clear_color((0, 0, 0, 0))
    some_texture.clear_image()

    compute_shader = Shader.make_compute(Shader.SL_GLSL,
    """
    #version 460

    layout (local_size_x = 1, local_size_y = 1) in;

    uniform restrict readonly layout (r32f) image2D InputTexture;
    uniform restrict writeonly layout (r16ui) uimage2D OutputTexture;

    void main() {
        const ivec2 coordinates = ivec2(gl_GlobalInvocationID.xy);
        const float val = imageLoad(InputTexture, coordinates).r;
        imageStore(OutputTexture, coordinates, uvec4(uint(val > 0 ? 1 : 0)));
    }

    """)
    compute_chain = root.attach_new_node("compute_chain")
    cn = ComputeNode("compute_node")
    cn.add_dispatch(input_texture.x_size, input_texture.y_size, 1)
    compute_chain = compute_chain.attach_new_node(cn)
    compute_chain.set_bin("fixed", -5)
    compute_chain.set_shader(compute_shader, 5)
    compute_chain.set_shader_input("InputTexture", input_texture)
    compute_chain.set_shader_input("OutputTexture", some_texture)
    compute_chain.hide()
    compute_chain.show(DrawMask.all_on())

    return some_texture

def attach_card(root: NodePath, ot: Texture, ct: Texture):
    shader = Shader.make(Shader.SL_GLSL,
    vertex=simple_vertex_shader,
    fragment=
    """
    #version 460

    in vec2 VertexTexCoord;
    uniform restrict readonly layout (r32f) image2D OriginalTexture;
    uniform restrict readonly layout (r16ui) uimage2D ConvertedTexture;
    out vec3 Color;

    void main() {
        ivec2 coordinates = ivec2(VertexTexCoord * imageSize(OriginalTexture).xy);
        Color.r = imageLoad(OriginalTexture, coordinates).r > 0 ? 1.0 : 0.0;
        Color.g = imageLoad(ConvertedTexture, coordinates).r > 0 ? 1.0 : 0.0;
    }
    """)

    i_cm = CardMaker("screen quad")
    i_cm.set_frame_fullscreen_quad()
    i_cm.set_uv_range(ot)
    i_card = root.attach_new_node(i_cm.generate())
    i_card.set_shader(shader, 50)
    i_card.set_shader_inputs(
        OriginalTexture=ot,
        ConvertedTexture=ct
    )
    i_card.set_bin("fixed", 10)


base = ShowBase()

clock = ClockObject.get_global_clock()
clock.mode = ClockObject.M_forced
clock.set_frame_rate(30)

cam, original_texture = setup_frame_id(base, LVector2i(1024, 1024), base.render, base.camLens)
converted_texture = setup_compute(cam, original_texture)
attach_card(base.render2d, original_texture, converted_texture)

cm = CardMaker("screen quad")
cm.set_frame_fullscreen_quad()
card = base.render.attach_new_node(cm.generate())
card.set_shader_input("SomeInput", 1)
# base.cam.node().camera_mask = DrawMask.all_on() ^ compute_node_mask
# base.cam.node().camera_mask.clear_bit(15)
print(base.cam.node().camera_mask)
# base.cam2d.node().camera_mask.clear_bit(15)
# base.cam2d.node().camera_mask = DrawMask.all_on() ^ compute_node_mask
print(base.cam2d.node().camera_mask)
base.render.ls()
base.run()

@rdb
Copy link
Member

rdb commented Aug 20, 2021

You just want to set the camera mask to eg. 0b1 and then call .hide(0b1) to hide the compute node to that camera.

@Clockwork-Muse
Copy link
Author

You just want to set the camera mask to eg. 0b1 and then call .hide(0b1) to hide the compute node to that camera.

This wouldn't help in my real code, because I potentially only have one camera (the extra one here is only being used for debug output).
That aside, isn't there still a problem here, because shouldn't

compute_chain.hide()
compute_chain.show(DrawMask.all_on())

unhide for everything?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants