Skip to content

Commit

Permalink
Faster blit_with_alpha()
Browse files Browse the repository at this point in the history
The `blit_with_alpha` function has a noticeable effect on the time it takes to join a game.

To reduce the join times, I replace the `blit_with_alpha` function with a new one:
* It does not uses floating-point numbers.
* It directly operates on the raw pixel data instead of using the comparatively
  slow `setPixel` and `getPixel` functions from Irrlicht.
  Only ECF_A8R8G8B8 base images are supported now.
  If the top image does not have the ECF_A8R8G8B8 colour format, it is converted;
  I assume that this happens rarely.
* There are case distinctions for fully opaque, fully transparent and semi-transparent pixels.
  This may increase the performance if the mixing between two semi-transparent happens rarely.
* The new function no longer has the `src_pos` argument since it was always the zero vector.
* `dst_pos` and `size` are passed by reference and no longer by value.
* The function is only documented once where it is declared.

For backwards compatibility, `blit_with_alpha` still mixes colours without gamma correction.
`blit_with_alpha` nonetheless behaves slightly different than before:
If a semi-transparent pixel is drawn on top of another semi-transparent pixel,
the colour is mixed in a way which we can consider to be more correct now.
  • Loading branch information
HybridDog committed Mar 9, 2024
1 parent ccc15e4 commit d5db142
Showing 1 changed file with 117 additions and 53 deletions.
170 changes: 117 additions & 53 deletions src/client/texturesource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,12 +404,22 @@ u32 TextureSource::getTextureId(const std::string &name)
return 0;
}

// Draw an image on top of another one, using the alpha channel of the
// source image
// overlay: only modify destination pixels that are fully opaque.

/** Draw an image on top of another one with gamma-incorrect alpha compositing
*
* This exists because IImage::copyToWithAlpha() doesn't seem to always work.
*
* \tparam overlay If enabled, only modify pixels in dst which are fully opaque.
* Defaults to false.
* \param src Top image. This image must have the ECF_A8R8G8B8 colour format.
* \param dst Bottom image.
* The top image is drawn onto this base image in-place.
* \param dst_pos An offset vector to move src before drawing it onto dst
* \param size Size limit of the copied area
*/
template<bool overlay = false>
static void blit_with_alpha(video::IImage *src, video::IImage *dst,
v2s32 src_pos, v2s32 dst_pos, v2u32 size);
const v2s32 &dst_pos, const v2u32 &size);

// Apply a color to an image. Uses an int (0-255) to calculate the ratio.
// If the ratio is 255 or -1 and keep_alpha is true, then it multiples the
Expand Down Expand Up @@ -912,7 +922,7 @@ video::IImage* TextureSource::generateImage(std::string_view name,

if (baseimg) {
core::dimension2d<u32> dim = tmp->getDimension();
blit_with_alpha(tmp, baseimg, v2s32(0, 0), v2s32(0, 0), dim);
blit_with_alpha(tmp, baseimg, v2s32(0, 0), dim);
tmp->drop();
} else {
baseimg = tmp;
Expand Down Expand Up @@ -999,10 +1009,8 @@ void blitBaseImage(video::IImage* &src, video::IImage* &dst)
core::dimension2d<u32> dim_dst = dst->getDimension();
// Position to copy the blitted to in the base image
core::position2d<s32> pos_to(0,0);
// Position to copy the blitted from in the blitted image
core::position2d<s32> pos_from(0,0);

blit_with_alpha(src, dst, pos_from, pos_to, dim_dst);
blit_with_alpha(src, dst, pos_to, dim_dst);
}

#define CHECK_BASEIMG() \
Expand Down Expand Up @@ -1190,7 +1198,7 @@ bool TextureSource::generateImagePart(std::string_view part_of_name,
continue;
}

blit_with_alpha(img, baseimg, v2s32(0,0), pos_base, dim);
blit_with_alpha(img, baseimg, pos_base, dim);
img->drop();
}
}
Expand Down Expand Up @@ -1235,7 +1243,7 @@ bool TextureSource::generateImagePart(std::string_view part_of_name,
if (baseimg == nullptr) {
baseimg = img;
} else {
blit_with_alpha(img, baseimg, v2s32(0, 0), v2s32(x, y), dim);
blit_with_alpha(img, baseimg, v2s32(x, y), dim);
img->drop();
}
}
Expand Down Expand Up @@ -1848,58 +1856,114 @@ bool TextureSource::generateImagePart(std::string_view part_of_name,

#undef CHECK_DIM

/*
Calculate the color of a single pixel drawn on top of another pixel.

This is a little more complicated than just video::SColor::getInterpolated
because getInterpolated does not handle alpha correctly. For example, a
pixel with alpha=64 drawn atop a pixel with alpha=128 should yield a
pixel with alpha=160, while getInterpolated would yield alpha=96.
*/
static inline video::SColor blitPixel(const video::SColor src_c, const video::SColor dst_c, u32 ratio)
namespace {

/// Draw src on top of dst
template <bool overlay, class Colour>
void blit_pixel(const Colour &src, Colour &dst)
{
if (dst_c.getAlpha() == 0)
return src_c;
video::SColor out_c = src_c.getInterpolated(dst_c, (float)ratio / 255.0f);
out_c.setAlpha(dst_c.getAlpha() + (255 - dst_c.getAlpha()) *
src_c.getAlpha() * ratio / (255 * 255));
return out_c;
if (src.a == 255 || dst.a == 0) {
if constexpr (overlay) {
if (dst.a != 255)
return;
}
// The top pixel is fully opaque or the bottom pixel is
// fully transparent -> replace the colour
dst = src;
} else if (src.a == 0) {
// A fully transparent pixel is on top -> do nothing
return;
} else if (dst.a == 255) {
// A semi-transparent pixel is on top and an opaque one in
// the bottom -> lerp r, g, and b
dst.r = (dst.r * (255 - src.a) + src.r * src.a) / 255;
dst.g = (dst.g * (255 - src.a) + src.g * src.a) / 255;
dst.b = (dst.b * (255 - src.a) + src.b * src.a) / 255;
} else {
if constexpr (overlay) {
return;
} else {
// A semi-transparent pixel is on top of a
// semi-transparent pixel -> general alpha compositing
auto a_new_255{src.a * 255 + (255 - src.a) * dst.a};
dst.r = (dst.r * (255 - src.a) * dst.a + src.r * src.a * 255)
/ a_new_255;
dst.g = (dst.g * (255 - src.a) * dst.a + src.g * src.a * 255)
/ a_new_255;
dst.b = (dst.b * (255 - src.a) * dst.a + src.b * src.a * 255)
/ a_new_255;
dst.a = a_new_255 / 255;
}
}
}

/*
Draw an image on top of another one, using the alpha channel of the
source image
This exists because IImage::copyToWithAlpha() doesn't seem to always
work.
*/
template<bool overlay>
static void blit_with_alpha(video::IImage *src, video::IImage *dst,
v2s32 src_pos, v2s32 dst_pos, v2u32 size)
/// A helper function for blit_with_alpha to support different endianesses
template<bool overlay, class Colour>
void blit_with_alpha_any_endian(video::IImage *src, video::IImage *dst,
const v2s32 &dst_pos, const v2u32 &size)
{
auto src_dim = src->getDimension();
auto dst_dim = dst->getDimension();
if (dst->getColorFormat() != video::ECF_A8R8G8B8)
throw BaseException("blit_with_alpha() supports only ECF_A8R8G8B8 "
"destination images.");

auto src_dim{src->getDimension()};
auto dst_dim{dst->getDimension()};
bool drop_src{false};
if (src->getColorFormat() != video::ECF_A8R8G8B8) {
video::IVideoDriver *driver{RenderingEngine::get_video_driver()};
video::IImage *src_converted{driver->createImage(video::ECF_A8R8G8B8,
src_dim)};
if (!src_converted)
throw BaseException("blit_with_alpha() failed to convert the "
"source image to ECF_A8R8G8B8.");
src->copyTo(src_converted);
src = src_converted;
drop_src = true;
}

Colour *pixels_src{reinterpret_cast<Colour *>(src->getData())};
Colour *pixels_dst{reinterpret_cast<Colour *>(dst->getData())};

// Limit y and x to the overlapping ranges
// s.t. the positions are all in bounds after offsetting.
for (u32 y0 = std::max(0, -dst_pos.Y);
y0 < std::min<s64>({size.Y, src_dim.Height, dst_dim.Height - (s64) dst_pos.Y});
++y0)
for (u32 x0 = std::max(0, -dst_pos.X);
x0 < std::min<s64>({size.X, src_dim.Width, dst_dim.Width - (s64) dst_pos.X});
++x0)
{
s32 src_x = src_pos.X + x0;
s32 src_y = src_pos.Y + y0;
s32 dst_x = dst_pos.X + x0;
s32 dst_y = dst_pos.Y + y0;
video::SColor src_c = src->getPixel(src_x, src_y);
video::SColor dst_c = dst->getPixel(dst_x, dst_y);
if (!overlay || (dst_c.getAlpha() == 255 && src_c.getAlpha() != 0)) {
dst_c = blitPixel(src_c, dst_c, src_c.getAlpha());
dst->setPixel(dst_x, dst_y, dst_c);
u32 x_start{static_cast<u32>(std::max(0, -dst_pos.X))};
u32 y_start{static_cast<u32>(std::max(0, -dst_pos.Y))};
u32 x_end{static_cast<u32>(std::min<s64>({size.X, src_dim.Width,
dst_dim.Width - (s64) dst_pos.X}))};
u32 y_end{static_cast<u32>(std::min<s64>({size.Y, src_dim.Height,
dst_dim.Height - (s64) dst_pos.Y}))};
for (u32 y0{y_start}; y0 < y_end; ++y0) {
size_t i_src{y0 * src_dim.Width + x_start};
size_t i_dst{(dst_pos.Y + y0) * dst_dim.Width
+ dst_pos.X + x_start};
for (u32 x0{x_start}; x0 < x_end; ++x0) {
blit_pixel<overlay, Colour>(pixels_src[i_src++],
pixels_dst[i_dst++]);
}
}
if (drop_src)
src->drop();
}

} // namespace

template<bool overlay>
static void blit_with_alpha(video::IImage *src, video::IImage *dst,
const v2s32 &dst_pos, const v2u32 &size)
{
bool is_little_endian{((union { u32 x; u8 c; }){1}).c == 1};
if (is_little_endian) {
struct Colour {
u8 b, g, r, a;
};
blit_with_alpha_any_endian<overlay, Colour>(src, dst, dst_pos, size);
} else {
struct Colour {
u8 a, r, g, b;
};
blit_with_alpha_any_endian<overlay, Colour>(src, dst, dst_pos, size);
}
}

/*
Expand Down Expand Up @@ -2243,7 +2307,7 @@ static void draw_crack(video::IImage *crack, video::IImage *dst,
auto blit = use_overlay ? blit_with_alpha<true> : blit_with_alpha<false>;
for (s32 i = 0; i < frame_count; ++i) {
v2s32 dst_pos(0, frame_size.Height * i);
blit(crack_scaled, dst, v2s32(0,0), dst_pos, frame_size);
blit(crack_scaled, dst, dst_pos, frame_size);
}

crack_scaled->drop();
Expand Down

0 comments on commit d5db142

Please sign in to comment.