Faster blit_with_alpha()

The `blit_with_alpha` function has a noticeable effect on the time it takes to join a game. To reduce the join times, I replace the `blit_with_alpha` function with a new one: * It does not uses floating-point numbers. * It directly operates on the raw pixel data instead of using the comparatively slow `setPixel` and `getPixel` functions from Irrlicht. Only ECF_A8R8G8B8 base images are supported now. If the top image does not have the ECF_A8R8G8B8 colour format, it is converted; I assume that this happens rarely. * There are case distinctions for fully opaque, fully transparent and semi-transparent pixels. This may increase the performance if the mixing between two semi-transparent happens rarely. * The new function no longer has the `src_pos` argument since it was always the zero vector. * `dst_pos` and `size` are passed by reference and no longer by value. * The function is only documented once where it is declared. For backwards compatibility, `blit_with_alpha` still mixes colours without gamma correction. `blit_with_alpha` nonetheless behaves slightly different than before: If a semi-transparent pixel is drawn on top of another semi-transparent pixel, the colour is mixed in a way which we can consider to be more correct now.
minetest · Mar 9, 2024 · d5db142 · d5db142
1 parent ccc15e4
commit d5db142
Showing 1 changed file with 117 additions and 53 deletions.
diff --git a/src/client/texturesource.cpp b/src/client/texturesource.cpp
@@ -404,12 +404,22 @@ u32 TextureSource::getTextureId(const std::string &name)
 	return 0;
 }
 
-// Draw an image on top of another one, using the alpha channel of the
-// source image
-// overlay: only modify destination pixels that are fully opaque.
+
+/** Draw an image on top of another one with gamma-incorrect alpha compositing
+ *
+ * This exists because IImage::copyToWithAlpha() doesn't seem to always work.
+ *
+ * \tparam overlay If enabled, only modify pixels in dst which are fully opaque.
+ *   Defaults to false.
+ * \param src Top image. This image must have the ECF_A8R8G8B8 colour format.
+ * \param dst Bottom image.
+ *   The top image is drawn onto this base image in-place.
+ * \param dst_pos An offset vector to move src before drawing it onto dst
+ * \param size Size limit of the copied area
+*/
 template<bool overlay = false>
 static void blit_with_alpha(video::IImage *src, video::IImage *dst,
-		v2s32 src_pos, v2s32 dst_pos, v2u32 size);
+	const v2s32 &dst_pos, const v2u32 &size);
 
 // Apply a color to an image.  Uses an int (0-255) to calculate the ratio.
 // If the ratio is 255 or -1 and keep_alpha is true, then it multiples the
@@ -912,7 +922,7 @@ video::IImage* TextureSource::generateImage(std::string_view name,
 
 		if (baseimg) {
 			core::dimension2d<u32> dim = tmp->getDimension();
-			blit_with_alpha(tmp, baseimg, v2s32(0, 0), v2s32(0, 0), dim);
+			blit_with_alpha(tmp, baseimg, v2s32(0, 0), dim);
 			tmp->drop();
 		} else {
 			baseimg = tmp;
@@ -999,10 +1009,8 @@ void blitBaseImage(video::IImage* &src, video::IImage* &dst)
 	core::dimension2d<u32> dim_dst = dst->getDimension();
 	// Position to copy the blitted to in the base image
 	core::position2d<s32> pos_to(0,0);
-	// Position to copy the blitted from in the blitted image
-	core::position2d<s32> pos_from(0,0);
 
-	blit_with_alpha(src, dst, pos_from, pos_to, dim_dst);
+	blit_with_alpha(src, dst, pos_to, dim_dst);
 }
 
 #define CHECK_BASEIMG() \
@@ -1190,7 +1198,7 @@ bool TextureSource::generateImagePart(std::string_view part_of_name,
 					continue;
 				}
 
-				blit_with_alpha(img, baseimg, v2s32(0,0), pos_base, dim);
+				blit_with_alpha(img, baseimg, pos_base, dim);
 				img->drop();
 			}
 		}
@@ -1235,7 +1243,7 @@ bool TextureSource::generateImagePart(std::string_view part_of_name,
 			if (baseimg == nullptr) {
 				baseimg = img;
 			} else {
-				blit_with_alpha(img, baseimg, v2s32(0, 0), v2s32(x, y), dim);
+				blit_with_alpha(img, baseimg, v2s32(x, y), dim);
 				img->drop();
 			}
 		}
@@ -1848,58 +1856,114 @@ bool TextureSource::generateImagePart(std::string_view part_of_name,
 
 #undef CHECK_DIM
 
-/*
-	Calculate the color of a single pixel drawn on top of another pixel.
 
-	This is a little more complicated than just video::SColor::getInterpolated
-	because getInterpolated does not handle alpha correctly.  For example, a
-	pixel with alpha=64 drawn atop a pixel with alpha=128 should yield a
-	pixel with alpha=160, while getInterpolated would yield alpha=96.
-*/
-static inline video::SColor blitPixel(const video::SColor src_c, const video::SColor dst_c, u32 ratio)
+namespace {
+
+/// Draw src on top of dst
+template <bool overlay, class Colour>
+void blit_pixel(const Colour &src, Colour &dst)
 {
-	if (dst_c.getAlpha() == 0)
-		return src_c;
-	video::SColor out_c = src_c.getInterpolated(dst_c, (float)ratio / 255.0f);
-	out_c.setAlpha(dst_c.getAlpha() + (255 - dst_c.getAlpha()) *
-		src_c.getAlpha() * ratio / (255 * 255));
-	return out_c;
+	if (src.a == 255 || dst.a == 0) {
+		if constexpr (overlay) {
+			if (dst.a != 255)
+				return;
+		}
+		// The top pixel is fully opaque or the bottom pixel is
+		// fully transparent -> replace the colour
+		dst = src;
+	} else if (src.a == 0) {
+		// A fully transparent pixel is on top -> do nothing
+		return;
+	} else if (dst.a == 255) {
+		// A semi-transparent pixel is on top and an opaque one in
+		// the bottom -> lerp r, g, and b
+		dst.r = (dst.r * (255 - src.a) + src.r * src.a) / 255;
+		dst.g = (dst.g * (255 - src.a) + src.g * src.a) / 255;
+		dst.b = (dst.b * (255 - src.a) + src.b * src.a) / 255;
+	} else {
+		if constexpr (overlay) {
+			return;
+		} else {
+			// A semi-transparent pixel is on top of a
+			// semi-transparent pixel -> general alpha compositing
+			auto a_new_255{src.a * 255 + (255 - src.a) * dst.a};
+			dst.r = (dst.r * (255 - src.a) * dst.a + src.r * src.a * 255)
+				/ a_new_255;
+			dst.g = (dst.g * (255 - src.a) * dst.a + src.g * src.a * 255)
+				/ a_new_255;
+			dst.b = (dst.b * (255 - src.a) * dst.a + src.b * src.a * 255)
+				/ a_new_255;
+			dst.a = a_new_255 / 255;
+		}
+	}
 }
 
-/*
-	Draw an image on top of another one, using the alpha channel of the
-	source image
-
-	This exists because IImage::copyToWithAlpha() doesn't seem to always
-	work.
-*/
-template<bool overlay>
-static void blit_with_alpha(video::IImage *src, video::IImage *dst,
-		v2s32 src_pos, v2s32 dst_pos, v2u32 size)
+/// A helper function for blit_with_alpha to support different endianesses
+template<bool overlay, class Colour>
+void blit_with_alpha_any_endian(video::IImage *src, video::IImage *dst,
+	const v2s32 &dst_pos, const v2u32 &size)
 {
-	auto src_dim = src->getDimension();
-	auto dst_dim = dst->getDimension();
+	if (dst->getColorFormat() != video::ECF_A8R8G8B8)
+		throw BaseException("blit_with_alpha() supports only ECF_A8R8G8B8 "
+			"destination images.");
+
+	auto src_dim{src->getDimension()};
+	auto dst_dim{dst->getDimension()};
+	bool drop_src{false};
+	if (src->getColorFormat() != video::ECF_A8R8G8B8) {
+		video::IVideoDriver *driver{RenderingEngine::get_video_driver()};
+		video::IImage *src_converted{driver->createImage(video::ECF_A8R8G8B8,
+			src_dim)};
+		if (!src_converted)
+			throw BaseException("blit_with_alpha() failed to convert the "
+				"source image to ECF_A8R8G8B8.");
+		src->copyTo(src_converted);
+		src = src_converted;
+		drop_src = true;
+	}
+
+	Colour *pixels_src{reinterpret_cast<Colour *>(src->getData())};
+	Colour *pixels_dst{reinterpret_cast<Colour *>(dst->getData())};
 
 	// Limit y and x to the overlapping ranges
 	// s.t. the positions are all in bounds after offsetting.
-	for (u32 y0 = std::max(0, -dst_pos.Y);
-			y0 < std::min<s64>({size.Y, src_dim.Height, dst_dim.Height - (s64) dst_pos.Y});
-			++y0)
-	for (u32 x0 = std::max(0, -dst_pos.X);
-			x0 < std::min<s64>({size.X, src_dim.Width, dst_dim.Width - (s64) dst_pos.X});
-			++x0)
-	{
-		s32 src_x = src_pos.X + x0;
-		s32 src_y = src_pos.Y + y0;
-		s32 dst_x = dst_pos.X + x0;
-		s32 dst_y = dst_pos.Y + y0;
-		video::SColor src_c = src->getPixel(src_x, src_y);
-		video::SColor dst_c = dst->getPixel(dst_x, dst_y);
-		if (!overlay || (dst_c.getAlpha() == 255 && src_c.getAlpha() != 0)) {
-			dst_c = blitPixel(src_c, dst_c, src_c.getAlpha());
-			dst->setPixel(dst_x, dst_y, dst_c);
+	u32 x_start{static_cast<u32>(std::max(0, -dst_pos.X))};
+	u32 y_start{static_cast<u32>(std::max(0, -dst_pos.Y))};
+	u32 x_end{static_cast<u32>(std::min<s64>({size.X, src_dim.Width,
+		dst_dim.Width - (s64) dst_pos.X}))};
+	u32 y_end{static_cast<u32>(std::min<s64>({size.Y, src_dim.Height,
+		dst_dim.Height - (s64) dst_pos.Y}))};
+	for (u32 y0{y_start}; y0 < y_end; ++y0) {
+		size_t i_src{y0 * src_dim.Width + x_start};
+		size_t i_dst{(dst_pos.Y + y0) * dst_dim.Width
+			+ dst_pos.X + x_start};
+		for (u32 x0{x_start}; x0 < x_end; ++x0) {
+			blit_pixel<overlay, Colour>(pixels_src[i_src++],
+				pixels_dst[i_dst++]);
 		}
 	}
+	if (drop_src)
+		src->drop();
+}
+
+}  // namespace
+
+template<bool overlay>
+static void blit_with_alpha(video::IImage *src, video::IImage *dst,
+	const v2s32 &dst_pos, const v2u32 &size)
+{
+	bool is_little_endian{((union { u32 x; u8 c; }){1}).c == 1};
+	if (is_little_endian) {
+		struct Colour {
+			u8 b, g, r, a;
+		};
+		blit_with_alpha_any_endian<overlay, Colour>(src, dst, dst_pos, size);
+	} else {
+		struct Colour {
+			u8 a, r, g, b;
+		};
+		blit_with_alpha_any_endian<overlay, Colour>(src, dst, dst_pos, size);
+	}
 }
 
 /*
@@ -2243,7 +2307,7 @@ static void draw_crack(video::IImage *crack, video::IImage *dst,
 	auto blit = use_overlay ? blit_with_alpha<true> : blit_with_alpha<false>;
 	for (s32 i = 0; i < frame_count; ++i) {
 		v2s32 dst_pos(0, frame_size.Height * i);
-		blit(crack_scaled, dst, v2s32(0,0), dst_pos, frame_size);
+		blit(crack_scaled, dst, dst_pos, frame_size);
 	}
 
 	crack_scaled->drop();