Skip to content

SLP vectorizer fails to vectorize unrolled loop #46522

@davidbolvansky

Description

@davidbolvansky
Bugzilla Link 47178
Version trunk
OS Linux
CC @anton-afanasyev,@fhahn,@RKSimon,@rotateright

Extended Description

#include <stdint.h>
#include <cstddef>

#define STRIPE_WIDTH 32
#define STRIPE_MASK   (STRIPE_WIDTH - 1)

static int16_t dither_line[2 * STRIPE_WIDTH] = {
#if STRIPE_WIDTH > 8
     8, 40,  8, 40,  8, 40,  8, 40,  8, 40,  8, 40,  8, 40,  8, 40,
    56, 24, 56, 24, 56, 24, 56, 24, 56, 24, 56, 24, 56, 24, 56, 24,
#else
     8, 40,  8, 40,  8, 40,  8, 40,
    56, 24, 56, 24, 56, 24, 56, 24,
#endif
};


void ass_stripe_pack_c(uint8_t *dst, ptrdiff_t dst_stride, const int16_t *src,
                       uintptr_t width, uintptr_t height)
{
    for (uintptr_t x = 0; x < width; x += STRIPE_WIDTH) {
        uint8_t *ptr = dst;
        for (uintptr_t y = 0; y < height; ++y) {
            const int16_t *dither = dither_line + (y & 1) * STRIPE_WIDTH;
            for (int k = 0; k < STRIPE_WIDTH; ++k)
                ptr[k] = (uint16_t) (src[k] - (src[k] >> 8) + dither[k]) >> 6;
                //ptr[k] = (255 * src[k] + 0x1FFF) / 0x4000;
            ptr += dst_stride;
            src += STRIPE_WIDTH;
        }
        dst += STRIPE_WIDTH;
    }
    uintptr_t left = dst_stride - ((width + STRIPE_MASK) & ~STRIPE_MASK);
    for (uintptr_t y = 0; y < height; ++y) {
        for (uintptr_t x = 0; x < left; ++x)
            dst[x] = 0;
        dst += dst_stride;
    }
}

GCC and ICC can vectorize it.

https://godbolt.org/z/nXCxW8

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions