New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Rewrite the blur shader to remove the variable-length loop and to use the texture filtering hardware more effectively. #3028
Changes from all commits
b9ae5a3
28f977c
e509c87
4684f99
File filter...
Jump to…
| @@ -5,131 +5,120 @@ | ||
| #include shared,prim_shared | ||
|
|
||
| varying vec3 vUv; | ||
| flat varying vec4 vUvRect; | ||
| flat varying vec2 vOffsetScale; | ||
| flat varying float vSigma; | ||
| flat varying int vBlurRadius; | ||
| flat varying vec2 vSrcSizeInv; | ||
| flat varying vec4 vSrcRect; | ||
| // The coefficient and `exp(2.0 * coefficient)`, in that order. | ||
| flat varying vec2 vCoefficients; | ||
| flat varying int vVertical; | ||
|
|
||
| #ifdef WR_FEATURE_COLOR_TARGET | ||
| #define TEXTURE_SIZE() vec2(textureSize(sCacheRGBA8, 0).xy) | ||
| #else | ||
| #define TEXTURE_SIZE() vec2(textureSize(sCacheA8, 0).xy) | ||
| #endif | ||
|
|
||
| #ifdef WR_VERTEX_SHADER | ||
| // Applies a separable gaussian blur in one direction, as specified | ||
| // by the dir field in the blur command. | ||
|
|
||
| #define DIR_HORIZONTAL 0 | ||
| #define DIR_VERTICAL 1 | ||
|
|
||
| in int aBlurRenderTaskAddress; | ||
| in int aBlurSourceTaskAddress; | ||
| in int aBlurDirection; | ||
|
|
||
| struct BlurTask { | ||
| RenderTaskCommonData common_data; | ||
| float blur_radius; | ||
| vec2 coefficients; | ||
| int direction; | ||
| }; | ||
|
|
||
| BlurTask fetch_blur_task(int address) { | ||
| BlurTask fetchBlurTask(int address) { | ||
pcwalton
Author
Collaborator
|
||
| RenderTaskData task_data = fetch_render_task_data(address); | ||
|
|
||
| BlurTask task = BlurTask( | ||
| task_data.common_data, | ||
| task_data.data1.x | ||
| ); | ||
|
|
||
| BlurTask task = BlurTask(task_data.common_data, | ||
| task_data.data1.xy, | ||
| int(task_data.data1.z)); | ||
| return task; | ||
| } | ||
|
|
||
| void main(void) { | ||
| BlurTask blur_task = fetch_blur_task(aBlurRenderTaskAddress); | ||
| RenderTaskCommonData src_task = fetch_render_task_common_data(aBlurSourceTaskAddress); | ||
| BlurTask blurTask = fetchBlurTask(aBlurRenderTaskAddress); | ||
| RenderTaskCommonData srcTask = fetch_render_task_common_data(aBlurSourceTaskAddress); | ||
|
|
||
| RectWithSize src_rect = src_task.task_rect; | ||
| RectWithSize target_rect = blur_task.common_data.task_rect; | ||
|
|
||
| #if defined WR_FEATURE_COLOR_TARGET | ||
| vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0).xy); | ||
| #else | ||
| vec2 texture_size = vec2(textureSize(sCacheA8, 0).xy); | ||
| #endif | ||
| vUv.z = src_task.texture_layer_index; | ||
| vBlurRadius = int(3.0 * blur_task.blur_radius); | ||
| vSigma = blur_task.blur_radius; | ||
|
|
||
| switch (aBlurDirection) { | ||
| case DIR_HORIZONTAL: | ||
| vOffsetScale = vec2(1.0 / texture_size.x, 0.0); | ||
| break; | ||
| case DIR_VERTICAL: | ||
| vOffsetScale = vec2(0.0, 1.0 / texture_size.y); | ||
| break; | ||
| default: | ||
| vOffsetScale = vec2(0.0); | ||
| } | ||
| RectWithSize srcRect = srcTask.task_rect; | ||
| RectWithSize targetRect = blurTask.common_data.task_rect; | ||
|
|
||
| vUvRect = vec4(src_rect.p0 + vec2(0.5), | ||
| src_rect.p0 + src_rect.size - vec2(0.5)); | ||
| vUvRect /= texture_size.xyxy; | ||
| vec2 position = targetRect.p0 + targetRect.size * aPosition.xy; | ||
|
|
||
| vec2 pos = target_rect.p0 + target_rect.size * aPosition.xy; | ||
| vec4 uvBounds = vec4(srcRect.p0, srcRect.p0 + srcRect.size); | ||
|
|
||
| vec2 uv0 = src_rect.p0 / texture_size; | ||
| vec2 uv1 = (src_rect.p0 + src_rect.size) / texture_size; | ||
| vUv.xy = mix(uv0, uv1, aPosition.xy); | ||
| vUv = vec3(mix(uvBounds.xy, uvBounds.zw, aPosition.xy), srcTask.texture_layer_index); | ||
| vSrcSizeInv = 1.0 / TEXTURE_SIZE(); | ||
| vSrcRect = vec4(srcRect.p0, srcRect.p0 + srcRect.size) + vec4(0.5, 0.5, -0.5, -0.5); | ||
| vCoefficients = blurTask.coefficients; | ||
| vVertical = blurTask.direction; | ||
|
|
||
| gl_Position = uTransform * vec4(pos, 0.0, 1.0); | ||
| gl_Position = uTransform * vec4(position, 0.0, 1.0); | ||
| } | ||
|
|
||
| #endif | ||
|
|
||
| #ifdef WR_FRAGMENT_SHADER | ||
|
|
||
| #if defined WR_FEATURE_COLOR_TARGET | ||
| #define SUPPORT 4 | ||
|
||
|
|
||
| #ifdef WR_FEATURE_COLOR_TARGET | ||
| #define SAMPLE_TYPE vec4 | ||
| #define SAMPLE_TEXTURE(uv) texture(sCacheRGBA8, uv) | ||
| #else | ||
| #define SAMPLE_TYPE float | ||
| #define SAMPLE_TEXTURE(uv) texture(sCacheA8, uv).r | ||
| #endif | ||
|
|
||
| // TODO(gw): Write a fast path blur that handles smaller blur radii | ||
| // with a offset / weight uniform table and a constant | ||
| // loop iteration count! | ||
|
|
||
| // TODO(gw): Make use of the bilinear sampling trick to reduce | ||
| // the number of texture fetches needed for a gaussian blur. | ||
| // Accumulates two texels into the blurred fragment we're building up. | ||
| void accumulate(float offset, | ||
| float crossAxisCoord, | ||
| inout vec2 gaussCoefficient, | ||
| inout SAMPLE_TYPE colorSum, | ||
| inout float factorSum) { | ||
| float factorA = gaussCoefficient.x; | ||
| gaussCoefficient *= vec2(gaussCoefficient.y, vCoefficients.y); | ||
| float factorB = gaussCoefficient.x; | ||
| gaussCoefficient *= vec2(gaussCoefficient.y, vCoefficients.y); | ||
|
|
||
| // Compute the texture coordinate that provides the correct linear combination of the two | ||
| // texels in question. | ||
| float factors = factorA + factorB; | ||
| float sampleOffset = offset + factorB / factors; | ||
|
|
||
| vec2 texCoord = vec2(sampleOffset, crossAxisCoord); | ||
| texCoord = clamp(vVertical != 0 ? texCoord.yx : texCoord.xy, vSrcRect.xy, vSrcRect.zw); | ||
|
|
||
| colorSum += factors * SAMPLE_TEXTURE(vec3(texCoord * vSrcSizeInv, vUv.z)); | ||
| factorSum += factors; | ||
| } | ||
|
|
||
| void main(void) { | ||
| SAMPLE_TYPE original_color = SAMPLE_TEXTURE(vUv); | ||
|
|
||
| // TODO(gw): The gauss function gets NaNs when blur radius | ||
| // is zero. In the future, detect this earlier | ||
| // and skip the blur passes completely. | ||
| if (vBlurRadius == 0) { | ||
| oFragColor = vec4(original_color); | ||
| // FIXME(pcwalton): We shouldn't end up with zero blur radii in the first place! | ||
| if (vCoefficients.x == 0.0) { | ||
| vec2 texCoord = clamp(vUv.xy, vSrcRect.xy, vSrcRect.zw); | ||
| oFragColor = vec4(SAMPLE_TEXTURE(vec3(texCoord * vSrcSizeInv, vUv.z))); | ||
| return; | ||
| } | ||
|
|
||
| // Incremental Gaussian Coefficent Calculation (See GPU Gems 3 pp. 877 - 889) | ||
| vec3 gauss_coefficient; | ||
| gauss_coefficient.x = 1.0 / (sqrt(2.0 * 3.14159265) * vSigma); | ||
| gauss_coefficient.y = exp(-0.5 / (vSigma * vSigma)); | ||
| gauss_coefficient.z = gauss_coefficient.y * gauss_coefficient.y; | ||
|
|
||
| float gauss_coefficient_sum = 0.0; | ||
| SAMPLE_TYPE avg_color = original_color * gauss_coefficient.x; | ||
| gauss_coefficient_sum += gauss_coefficient.x; | ||
| gauss_coefficient.xy *= gauss_coefficient.yz; | ||
| bool vertical = vVertical != 0; | ||
| vec2 axisCoord = vertical ? vUv.yx : vUv.xy; | ||
| float start = floor(axisCoord.x - float(SUPPORT)) + 0.5; | ||
kvark
Member
|
||
|
|
||
| for (int i=1 ; i <= vBlurRadius ; ++i) { | ||
| vec2 offset = vOffsetScale * float(i); | ||
| float offset = start - axisCoord.x; | ||
|
|
||
| vec2 st0 = clamp(vUv.xy - offset, vUvRect.xy, vUvRect.zw); | ||
| avg_color += SAMPLE_TEXTURE(vec3(st0, vUv.z)) * gauss_coefficient.x; | ||
| // See K. Turkowski, "Incremental Computation of the Gaussian", GPU Gems 3, chapter 40: | ||
| // | ||
| // https://developer.nvidia.com/gpugems/GPUGems3/gpugems3_ch40.html | ||
| vec2 gaussCoefficient = exp(vCoefficients.x * vec2(offset * offset, 2.0 * offset + 1.0)); | ||
|
|
||
| vec2 st1 = clamp(vUv.xy + offset, vUvRect.xy, vUvRect.zw); | ||
| avg_color += SAMPLE_TEXTURE(vec3(st1, vUv.z)) * gauss_coefficient.x; | ||
| SAMPLE_TYPE colorSum = SAMPLE_TYPE(0.0); | ||
| float factorSum = 0.0; | ||
|
|
||
| gauss_coefficient_sum += 2.0 * gauss_coefficient.x; | ||
| gauss_coefficient.xy *= gauss_coefficient.yz; | ||
| } | ||
| for (int i = 0; i < SUPPORT + 1; i++) | ||
| accumulate(start + float(i) * 2.0, axisCoord.y, gaussCoefficient, colorSum, factorSum); | ||
|
|
||
| oFragColor = vec4(avg_color) / gauss_coefficient_sum; | ||
| oFragColor = vec4(colorSum / factorSum); | ||
| } | ||
|
|
||
| #endif | ||
please stick to the existing naming convention in the shaders