Add an implementation of RPDF dither on the final output.

mltframework · Nov 6, 2012 · ff9e68a · ff9e68a
1 parent 3a1e190
commit ff9e68a
Show file tree

Hide file tree

Showing 9 changed files with 273 additions and 3 deletions.
diff --git a/.gitignore b/.gitignore
@@ -21,6 +21,7 @@ diffusion_effect_test
 white_balance_effect_test
 lift_gamma_gain_effect_test
 resample_effect_test
+dither_effect_test
 flat_input_test
 ycbcr_input_test
 chain-*.frag

diff --git a/Makefile b/Makefile
@@ -27,6 +27,7 @@ TESTS += diffusion_effect_test
 TESTS += white_balance_effect_test
 TESTS += lift_gamma_gain_effect_test
 TESTS += resample_effect_test
+TESTS += dither_effect_test
 TESTS += flat_input_test
 TESTS += ycbcr_input_test
 
@@ -53,6 +54,7 @@ LIB_OBJS += unsharp_mask_effect.o
 LIB_OBJS += mix_effect.o
 LIB_OBJS += resize_effect.o
 LIB_OBJS += resample_effect.o
+LIB_OBJS += dither_effect.o
 LIB_OBJS += deconvolution_sharpen_effect.o
 LIB_OBJS += sandbox_effect.o
 
@@ -94,6 +96,8 @@ lift_gamma_gain_effect_test: lift_gamma_gain_effect_test.o $(TEST_OBJS) libmovit
 	$(CXX) -o $@ $^ $(LDFLAGS)
 resample_effect_test: resample_effect_test.o $(TEST_OBJS) libmovit.a
 	$(CXX) -o $@ $^ $(LDFLAGS)
+dither_effect_test: dither_effect_test.o $(TEST_OBJS) libmovit.a
+	$(CXX) -o $@ $^ $(LDFLAGS)
 flat_input_test: flat_input_test.o $(TEST_OBJS) libmovit.a
 	$(CXX) -o $@ $^ $(LDFLAGS)
 ycbcr_input_test: ycbcr_input_test.o $(TEST_OBJS) libmovit.a

diff --git a/demo.cpp b/demo.cpp
@@ -178,6 +178,7 @@ int main(int argc, char **argv)
 	//sandbox_effect->set_float("parm", 42.0f);
 	//chain.add_effect(new MirrorEffect());
 	chain.add_output(inout_format);
+	chain.set_dither_bits(8);
 	chain.finalize();
 
 	// generate a PBO to hold the data we read back with glReadPixels()

diff --git a/dither_effect.cpp b/dither_effect.cpp
@@ -0,0 +1,94 @@
+#include <math.h>
+#include <assert.h>
+
+#include "dither_effect.h"
+#include "util.h"
+#include "opengl.h"
+
+namespace {
+
+// A simple LCG (linear congruental generator) random generator.
+// We implement our own so we can be deterministic from frame to frame
+// and run to run; we don't have special needs for speed or quality,
+// as long as the period is reasonably long. The output is in range
+// [0, 2^31>.
+//
+// This comes from http://en.wikipedia.org/wiki/Linear_congruential_generator.
+unsigned lcg_rand(unsigned x)
+{
+	return (x * 1103515245U + 12345U) & ((1U << 31) - 1);
+} 
+
+}  // namespace
+
+DitherEffect::DitherEffect()
+	: width(1280), height(720), num_bits(8),
+	  last_width(-1), last_height(-1), last_num_bits(-1)
+{
+	register_int("output_width", &width);
+	register_int("output_height", &height);
+	register_int("num_bits", &num_bits);
+
+	glGenTextures(1, &texnum);
+}
+
+DitherEffect::~DitherEffect()
+{
+	glDeleteTextures(1, &texnum);
+}
+
+std::string DitherEffect::output_fragment_shader()
+{
+	return read_file("dither_effect.frag");
+}
+
+void DitherEffect::update_texture(GLuint glsl_program_num, const std::string &prefix, unsigned *sampler_num)
+{
+	float *dither_noise = new float[width * height];
+	float dither_double_amplitude = 1.0f / (1 << num_bits);
+
+	// Using the resolution as a seed gives us a consistent dither from frame to frame.
+	// It also gives a different dither for e.g. different aspect ratios, which _feels_
+	// good, but probably shouldn't matter.
+	unsigned seed = (width << 16) ^ height;
+	for (int i = 0; i < width * height; ++i) {
+		seed = lcg_rand(seed);
+		float normalized_rand = seed * (1.0f / (1U << 31)) - 0.5;  // [-0.5, 0.5>
+		dither_noise[i] = dither_double_amplitude * normalized_rand;
+	}
+
+	glActiveTexture(GL_TEXTURE0 + *sampler_num);
+	check_error();
+	glBindTexture(GL_TEXTURE_2D, texnum);
+	check_error();
+	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+	check_error();
+	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+	check_error();
+	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+	check_error();
+	glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE16F_ARB, width, height, 0, GL_LUMINANCE, GL_FLOAT, dither_noise);
+	check_error();
+
+	delete[] dither_noise;
+}
+
+void DitherEffect::set_gl_state(GLuint glsl_program_num, const std::string &prefix, unsigned *sampler_num)
+{
+	Effect::set_gl_state(glsl_program_num, prefix, sampler_num);
+
+	if (width != last_width || height != last_height || num_bits != last_num_bits) {
+		update_texture(glsl_program_num, prefix, sampler_num);
+		last_width = width;
+		last_height = height;
+		last_num_bits = num_bits;
+	}
+
+	glActiveTexture(GL_TEXTURE0 + *sampler_num);
+	check_error();
+	glBindTexture(GL_TEXTURE_2D, texnum);
+	check_error();
+
+	set_uniform_int(glsl_program_num, prefix, "dither_tex", *sampler_num);
+	++sampler_num;
+}
diff --git a/dither_effect.frag b/dither_effect.frag
@@ -0,0 +1,9 @@
+uniform sampler2D PREFIX(dither_tex);
+
+vec4 FUNCNAME(vec2 tc) {
+	// We also choose to dither alpha, just in case.
+	// Maybe it should in theory have a separate dither,
+	// but I doubt it matters much. We currently don't
+	// really handle alpha in any case.
+	return INPUT(tc) + texture2D(PREFIX(dither_tex), tc).xxxx;
+}
diff --git a/dither_effect.h b/dither_effect.h
@@ -0,0 +1,67 @@
+#ifndef _DITHER_EFFECT_H
+#define _DITHER_EFFECT_H 1
+
+// Implements simple rectangular-PDF dither.
+//
+// Although all of our processing internally is in floating-point (a mix of 16-
+// and 32-bit), eventually most pipelines will end up downconverting to a fixed-point
+// format, typically 8-bits unsigned integer (GL_RGBA8).
+//
+// The hardware will typically do proper rounding for us, so that we minimize
+// quantization noise, but for some applications, if you look closely, you can still
+// see some banding; 8 bits is not really all that much (and if we didn't have the
+// perceptual gamma curve, it would be a lot worse).
+//
+// The standard solution to this is dithering; in short, to add a small random component
+// to each pixel before quantization. This increases the overall noise floor slightly,
+// but allows us to represent frequency components with an amplitude lower than 1/256.
+// 
+// My standard reference on dither is:
+//
+//   Cameron Nicklaus Christou: “Optimal Dither and Noise Shaping in Image Processing”
+//   http://uwspace.uwaterloo.ca/bitstream/10012/3867/1/thesis.pdf
+//
+// However, we need to make two significant deviations from the recommendations it makes.
+// First of all, it recommends using a triangular-PDF (TPDF) dither (which can be synthesized
+// effectively by adding two uniformly distributed random numbers) instead of rectangular-PDF
+// (RPDF; using one uniformly distributed random number), in order to make the second moment
+// of the error signal independent from the original image. However, since the recommended
+// TPDF must be twice as wide as the RPDF, it means it can go to +/- 1, which means that
+// some of the time, it will add enough noise to change a pixel just by itself. Given that
+// a very common use case for us is converting 8-bit -> 8-bit (ie., no bit reduction at all),
+// it would seem like a more important goal to have no noise in that situation than to
+// improve the dither further.
+//
+// Second, the thesis recommends noise shaping (also known as error diffusion in the image
+// processing world). This is, however, very hard to implement properly on a GPU, since it
+// almost by definition feeds the value of output pixels into the neighboring input pixels.
+// Maybe one could make a version that implemented the noise shapers by way of FIR filters
+// instead of IIR like this, but it would seem a lot of work for very subtle gain.
+//
+// We keep the dither noise fixed as long as the output resolution doesn't change;
+// this ensures we don't upset video codecs too much. (One could also dither in time,
+// like many LCD monitors do, but it starts to get very hairy, again, for limited gains.)
+// The dither is also deterministic across runs.
+
+#include "effect.h"
+
+class DitherEffect : public Effect {
+public:
+	DitherEffect();
+	~DitherEffect();
+	virtual std::string effect_type_id() const { return "DitherEffect"; }
+	std::string output_fragment_shader();
+
+	void set_gl_state(GLuint glsl_program_num, const std::string &prefix, unsigned *sampler_num);
+
+private:
+	void update_texture(GLuint glsl_program_num, const std::string &prefix, unsigned *sampler_num);
+
+	int width, height, num_bits;
+	int last_width, last_height, last_num_bits;
+
+	GLuint texnum;
+	bool need_texture_update;
+};
+
+#endif // !defined(_DITHER_EFFECT_H)
diff --git a/dither_effect_test.cpp b/dither_effect_test.cpp
@@ -0,0 +1,54 @@
+// Unit tests for DitherEffect.
+
+#include <math.h>
+
+#include "test_util.h"
+#include "gtest/gtest.h"
+
+TEST(DitherEffectTest, NoDitherOnExactValues) {
+	const int size = 4;
+
+	float data[size * size] = {
+		0.0, 1.0, 0.0, 1.0,
+		0.0, 1.0, 1.0, 0.0,
+		0.0, 0.2, 1.0, 0.2,
+		0.0, 0.0, 0.0, 0.0,
+	};
+	unsigned char expected_data[size * size] = {
+		0, 255,   0, 255,
+		0, 255, 255,   0,
+		0,  51, 255,  51,
+		0,   0,   0,   0,
+	};
+	unsigned char out_data[size * size];
+
+	EffectChainTester tester(data, size, size, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR, GL_RGBA8);
+	tester.get_chain()->set_dither_bits(8);
+	tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR);
+
+	expect_equal(expected_data, out_data, size, size);
+}
+
+TEST(DitherEffectTest, SinusoidBelowOneLevelComesThrough) {
+	const float frequency = 0.3f * M_PI;
+	const unsigned size = 2048;
+	const float amplitude = 0.25f / 255.0f;  // 6 dB below what can be represented without dithering.
+
+	float data[size];
+	for (unsigned i = 0; i < size; ++i) {
+		data[i] = 0.2 + amplitude * sin(i * frequency);
+	}
+	unsigned char out_data[size];
+
+	EffectChainTester tester(data, size, 1, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR, GL_RGBA8);
+	tester.get_chain()->set_dither_bits(8);
+	tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR);
+
+	// Measure how strong the given sinusoid is in the output.
+	float sum = 0.0f;	
+	for (unsigned i = 0; i < size; ++i) {
+		sum += 2.0 * (int(out_data[i]) - 0.2*255.0) * sin(i * frequency);
+	}
+
+	EXPECT_NEAR(amplitude, sum / (size * 255.0f), 1e-5);
+}
diff --git a/effect_chain.cpp b/effect_chain.cpp
@@ -15,13 +15,16 @@
 #include "gamma_expansion_effect.h"
 #include "gamma_compression_effect.h"
 #include "colorspace_conversion_effect.h"
+#include "dither_effect.h"
 #include "input.h"
 #include "opengl.h"
 
 EffectChain::EffectChain(float aspect_nom, float aspect_denom)
 	: aspect_nom(aspect_nom),
 	  aspect_denom(aspect_denom),
+	  dither_effect(NULL),
 	  fbo(0),
+	  num_dither_bits(0),
 	  finalized(false) {}
 
 EffectChain::~EffectChain()
@@ -933,6 +936,22 @@ void EffectChain::fix_output_gamma()
 		connect_nodes(output, conversion);
 	}
 }
+
+// If the user has requested dither, add a DitherEffect right at the end
+// (after GammaCompressionEffect etc.). This needs to be done after everything else,
+// since dither is about the only effect that can _not_ be done in linear space.
+void EffectChain::add_dither_if_needed()
+{
+	if (num_dither_bits == 0) {
+		return;
+	}
+	Node *output = find_output_node();
+	Node *dither = add_node(new DitherEffect());
+	CHECK(dither->effect->set_int("num_bits", num_dither_bits));
+	connect_nodes(output, dither);
+
+	dither_effect = dither->effect;
+}
 
 // Find the output node. This is, simply, one that has no outgoing links.
 // If there are multiple ones, the graph is malformed (we do not support
@@ -984,12 +1003,16 @@ void EffectChain::finalize()
 	fix_internal_gamma_by_asking_inputs(8);
 	fix_internal_gamma_by_inserting_nodes(9);
 
-	output_dot("step10-final.dot");
+	output_dot("step10-before-dither.dot");
+
+	add_dither_if_needed();
+
+	output_dot("step11-final.dot");
 
 	// Construct all needed GLSL programs, starting at the output.
 	construct_glsl_programs(find_output_node());
 
-	output_dot("step11-split-to-phases.dot");
+	output_dot("step12-split-to-phases.dot");
 
 	// If we have more than one phase, we need intermediate render-to-texture.
 	// Construct an FBO, and then as many textures as we need.
@@ -1126,6 +1149,10 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
 			glBindFramebuffer(GL_FRAMEBUFFER, dest_fbo);
 			check_error();
 			glViewport(x, y, width, height);
+			if (dither_effect != NULL) {
+				CHECK(dither_effect->set_int("output_width", width));
+				CHECK(dither_effect->set_int("output_height", height));
+			}
 		} else {
 			Node *output_node = phases[phase]->effects.back();
 			glFramebufferTexture2D(

diff --git a/effect_chain.h b/effect_chain.h
@@ -90,8 +90,18 @@ class EffectChain {
 	Effect *add_effect(Effect *effect, const std::vector<Effect *> &inputs);
 
 	void add_output(const ImageFormat &format);
+
+	// Set number of output bits, to scale the dither.
+	// 8 is the right value for most outputs.
+	// The default, 0, is a special value that means no dither.
+	void set_dither_bits(unsigned num_bits)
+	{
+		this->num_dither_bits = num_bits;
+	}
+
 	void finalize();
 
+
 	//void render(unsigned char *src, unsigned char *dst);
 	void render_to_screen()
 	{
@@ -168,19 +178,22 @@ class EffectChain {
 	void fix_internal_gamma_by_asking_inputs(unsigned step);
 	void fix_internal_gamma_by_inserting_nodes(unsigned step);
 	void fix_output_gamma();
+	void add_dither_if_needed();
 
 	float aspect_nom, aspect_denom;
 	ImageFormat output_format;
 
 	std::vector<Node *> nodes;
 	std::map<Effect *, Node *> node_map;
+	Effect *dither_effect;
 
 	std::vector<Input *> inputs;  // Also contained in nodes.
 
 	GLuint fbo;
 	std::vector<Phase *> phases;
 
-	GLenum format, bytes_per_pixel;
+	GLenum format;
+	unsigned bytes_per_pixel, num_dither_bits;
 	bool finalized;
 };