Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8264143 Lanai: RenderPerfTest.BgrSwBlitImage has artefacts on apple M1 #3368

Closed
wants to merge 3 commits into from
Closed
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -49,9 +49,15 @@
// Consider deleting this field, since it's always MTLPixelFormatBGRA8Unorm
jboolean hasAlpha;
jboolean isPremult;
NSString* swizzleKernel;
const uint8_t* swizzleMap;
} MTLRasterFormatInfo;


const uint8_t rgb_to_rgba[4] = {0, 1, 2, 3};
const uint8_t xrgb_to_rgba[4] = {1, 2, 3, 0};
const uint8_t bgr_to_rgba[4] = {2, 1, 0, 3};
const uint8_t xbgr_to_rgba[4] = {3, 2, 1, 0};

/**
* This table contains the "pixel formats" for all system memory surfaces
* that Metal is capable of handling, indexed by the "PF_" constants defined
@@ -62,10 +68,10 @@
MTLRasterFormatInfo RasterFormatInfos[] = {
{ 1, 0, nil }, /* 0 - IntArgb */ // Argb (in java notation)
{ 1, 1, nil }, /* 1 - IntArgbPre */
{ 0, 1, @"rgb_to_rgba" }, /* 2 - IntRgb */
{ 0, 1, @"xrgb_to_rgba" }, /* 3 - IntRgbx */
{ 0, 1, @"bgr_to_rgba" }, /* 4 - IntBgr */
{ 0, 1, @"xbgr_to_rgba" }, /* 5 - IntBgrx */
{ 0, 1, rgb_to_rgba }, /* 2 - IntRgb */
{ 0, 1, xrgb_to_rgba }, /* 3 - IntRgbx */
{ 0, 1, bgr_to_rgba }, /* 4 - IntBgr */
{ 0, 1, xbgr_to_rgba }, /* 5 - IntBgrx */

// TODO: support 2-byte formats
// { GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV,
@@ -152,6 +158,12 @@ void drawTex2Tex(MTLContext *mtlc,
[encoder drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:6];
}

static void fillSwizzleUniforms(struct SwizzleUniforms *uniforms, const MTLRasterFormatInfo *rfi) {
const size_t SWIZZLE_MAP_SIZE = 4;
memcpy(&uniforms->swizzle, rfi->swizzleMap, SWIZZLE_MAP_SIZE);
uniforms->hasAlpha = rfi->hasAlpha;
}

static void
replaceTextureRegion(MTLContext *mtlc, id<MTLTexture> dest, const SurfaceDataRasInfo *srcInfo,
const MTLRasterFormatInfo *rfi,
@@ -180,24 +192,30 @@ void drawTex2Tex(MTLContext *mtlc,
}
[buff didModifyRange:NSMakeRange(0, buff.length)];

if (rfi->swizzleKernel != nil) {
if (rfi->swizzleMap != nil) {
id <MTLBuffer> swizzled = [[mtlc.device newBufferWithLength:(sw * sh * srcInfo->pixelStride) options:MTLResourceStorageModeManaged] autorelease];

// this should be cheap, since data is already on GPU
id<MTLCommandBuffer> cb = [mtlc createCommandBuffer];
id<MTLComputeCommandEncoder> computeEncoder = [cb computeCommandEncoder];
id<MTLComputePipelineState> computePipelineState = [mtlc.pipelineStateStorage
getComputePipelineState:rfi->swizzleKernel];
getComputePipelineState:@"swizzle_to_rgba"];
[computeEncoder setComputePipelineState:computePipelineState];

[computeEncoder setBuffer:buff offset:0 atIndex:0];
[computeEncoder setBuffer:swizzled offset:0 atIndex:1];

struct SwizzleUniforms uniforms;
fillSwizzleUniforms(&uniforms, rfi);
[computeEncoder setBytes:&uniforms length:sizeof(struct SwizzleUniforms) atIndex:2];

NSUInteger pixelCount = buff.length / srcInfo->pixelStride;
[computeEncoder setBytes:&pixelCount length:sizeof(NSUInteger) atIndex:3];

NSUInteger threadGroupSize = computePipelineState.maxTotalThreadsPerThreadgroup;
if (threadGroupSize == 0) {
threadGroupSize = 1;
}
NSUInteger pixelCount = buff.length / srcInfo->pixelStride;
MTLSize threadsPerGroup = MTLSizeMake(threadGroupSize, 1, 1);
MTLSize threadGroups = MTLSizeMake((pixelCount + threadGroupSize - 1) / threadGroupSize,
1, 1);
@@ -26,6 +26,7 @@
#ifndef COMMON_H
#define COMMON_H

#include <stdint.h>
#include <simd/simd.h>

#define PGRAM_VERTEX_COUNT 6
@@ -156,4 +157,9 @@ struct LCDFrameUniforms {
vector_float3 gamma;
vector_float3 invgamma;
};

struct SwizzleUniforms {
uint8_t swizzle[4];
uint8_t hasAlpha;
};
#endif
@@ -640,45 +640,25 @@ kernel void stencil2tex(const device uchar *imageBuffer [[buffer(0)]],

// work item deals with 4 byte pixel
// assuming that data is aligned
kernel void rgb_to_rgba(const device uchar *imageBuffer [[buffer(0)]],
device uchar *outputBuffer [[buffer(1)]],
uint gid [[thread_position_in_grid]])
kernel void swizzle_to_rgba(const device uchar *imageBuffer [[buffer(0)]],
device uchar *outputBuffer [[buffer(1)]],
constant SwizzleUniforms& uniforms [[buffer(2)]],
constant uint& size [[buffer(3)]],
uint gid [[thread_position_in_grid]])
{
outputBuffer[4 * gid] = imageBuffer[4 * gid]; // r
outputBuffer[4 * gid + 1] = imageBuffer[4 * gid + 1]; // g
outputBuffer[4 * gid + 2] = imageBuffer[4 * gid + 2]; // b
outputBuffer[4 * gid + 3] = 255; // a
}

kernel void bgr_to_rgba(const device uchar *imageBuffer [[buffer(0)]],
device uchar *outputBuffer [[buffer(1)]],
uint gid [[thread_position_in_grid]])
{
outputBuffer[4 * gid] = imageBuffer[4 * gid + 2]; // r
outputBuffer[4 * gid + 1] = imageBuffer[4 * gid + 1]; // g
outputBuffer[4 * gid + 2] = imageBuffer[4 * gid]; // b
outputBuffer[4 * gid + 3] = 255; // a
}

kernel void xrgb_to_rgba(const device uchar *imageBuffer [[buffer(0)]],
device uchar *outputBuffer [[buffer(1)]],
uint gid [[thread_position_in_grid]])
{
outputBuffer[4 * gid] = imageBuffer[4 * gid + 1]; // r
outputBuffer[4 * gid + 1] = imageBuffer[4 * gid + 2]; // g
outputBuffer[4 * gid + 2] = imageBuffer[4 * gid + 3]; // b
outputBuffer[4 * gid + 3] = imageBuffer[4 * gid]; // a
}
if (gid > size) {
return;
}

outputBuffer[4 * gid] = imageBuffer[4 * gid + uniforms.swizzle[0]]; // r
outputBuffer[4 * gid + 1] = imageBuffer[4 * gid + uniforms.swizzle[1]]; // g
outputBuffer[4 * gid + 2] = imageBuffer[4 * gid + uniforms.swizzle[2]]; // b

kernel void xbgr_to_rgba(const device uchar *imageBuffer [[buffer(0)]],
device uchar *outputBuffer [[buffer(1)]],
uint gid [[thread_position_in_grid]])
{
outputBuffer[4 * gid] = imageBuffer[4 * gid + 3]; // r
outputBuffer[4 * gid + 1] = imageBuffer[4 * gid + 2]; // g
outputBuffer[4 * gid + 2] = imageBuffer[4 * gid + 1]; // b
outputBuffer[4 * gid + 3] = imageBuffer[4 * gid]; // a
if (uniforms.hasAlpha) {
outputBuffer[4 * gid + 3] = imageBuffer[4 * gid + uniforms.swizzle[3]];
} else {
outputBuffer[4 * gid + 3] = 255;
}
}

// ----------------------------------------------------------------------------