From 6aef519bbe3bafb13f76cf52994a1ab51b4f6718 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Mon, 27 Oct 2025 18:01:11 +0100 Subject: [PATCH 1/8] consistency tweak --- src/external/rlsw.h | 236 ++++++++++++++++++++++---------------------- 1 file changed, 118 insertions(+), 118 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index e049e707ec01..93ce5081a7ba 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -683,59 +683,59 @@ SWAPI void swBindTexture(uint32_t id); #define SW_DEPTH_PIXEL_SIZE (SW_DEPTH_BUFFER_BITS/8) #if (SW_COLOR_BUFFER_BITS == 8) - #define COLOR_TYPE uint8_t - #define COLOR_IS_PACKED 1 - #define PACK_COLOR(r,g,b) ((((uint8_t)((r)*7+0.5f))&0x07)<<5 | (((uint8_t)((g)*7+0.5f))&0x07)<<2 | ((uint8_t)((b)*3+0.5f))&0x03) - #define UNPACK_R(p) (((p)>>5)&0x07) - #define UNPACK_G(p) (((p)>>2)&0x07) - #define UNPACK_B(p) ((p)&0x03) - #define SCALE_R(v) ((v)*255+3)/7 - #define SCALE_G(v) ((v)*255+3)/7 - #define SCALE_B(v) ((v)*255+1)/3 - #define TO_FLOAT_R(v) ((v)*(1.0f/7.0f)) - #define TO_FLOAT_G(v) ((v)*(1.0f/7.0f)) - #define TO_FLOAT_B(v) ((v)*(1.0f/3.0f)) + #define SW_COLOR_TYPE uint8_t + #define SW_COLOR_IS_PACKED 1 + #define SW_PACK_COLOR(r,g,b) ((((uint8_t)((r)*7+0.5f))&0x07)<<5 | (((uint8_t)((g)*7+0.5f))&0x07)<<2 | ((uint8_t)((b)*3+0.5f))&0x03) + #define SW_UNPACK_R(p) (((p)>>5)&0x07) + #define SW_UNPACK_G(p) (((p)>>2)&0x07) + #define SW_UNPACK_B(p) ((p)&0x03) + #define SW_SCALE_R(v) ((v)*255+3)/7 + #define SW_SCALE_G(v) ((v)*255+3)/7 + #define SW_SCALE_B(v) ((v)*255+1)/3 + #define SW_TO_FLOAT_R(v) ((v)*(1.0f/7.0f)) + #define SW_TO_FLOAT_G(v) ((v)*(1.0f/7.0f)) + #define SW_TO_FLOAT_B(v) ((v)*(1.0f/3.0f)) #elif (SW_COLOR_BUFFER_BITS == 16) - #define COLOR_TYPE uint16_t - #define COLOR_IS_PACKED 1 - #define PACK_COLOR(r,g,b) ((((uint16_t)((r)*31+0.5f))&0x1F)<<11 | (((uint16_t)((g)*63+0.5f))&0x3F)<<5 | ((uint16_t)((b)*31+0.5f))&0x1F) - #define UNPACK_R(p) (((p)>>11)&0x1F) - #define UNPACK_G(p) (((p)>>5)&0x3F) - #define UNPACK_B(p) ((p)&0x1F) - #define SCALE_R(v) ((v)*255+15)/31 - #define SCALE_G(v) ((v)*255+31)/63 - #define SCALE_B(v) ((v)*255+15)/31 - #define TO_FLOAT_R(v) ((v)*(1.0f/31.0f)) - #define TO_FLOAT_G(v) ((v)*(1.0f/63.0f)) - #define TO_FLOAT_B(v) ((v)*(1.0f/31.0f)) + #define SW_COLOR_TYPE uint16_t + #define SW_COLOR_IS_PACKED 1 + #define SW_PACK_COLOR(r,g,b) ((((uint16_t)((r)*31+0.5f))&0x1F)<<11 | (((uint16_t)((g)*63+0.5f))&0x3F)<<5 | ((uint16_t)((b)*31+0.5f))&0x1F) + #define SW_UNPACK_R(p) (((p)>>11)&0x1F) + #define SW_UNPACK_G(p) (((p)>>5)&0x3F) + #define SW_UNPACK_B(p) ((p)&0x1F) + #define SW_SCALE_R(v) ((v)*255+15)/31 + #define SW_SCALE_G(v) ((v)*255+31)/63 + #define SW_SCALE_B(v) ((v)*255+15)/31 + #define SW_TO_FLOAT_R(v) ((v)*(1.0f/31.0f)) + #define SW_TO_FLOAT_G(v) ((v)*(1.0f/63.0f)) + #define SW_TO_FLOAT_B(v) ((v)*(1.0f/31.0f)) #else // 32 bits - #define COLOR_TYPE uint8_t - #define COLOR_IS_PACKED 0 + #define SW_COLOR_TYPE uint8_t + #define SW_COLOR_IS_PACKED 0 #endif #if (SW_DEPTH_BUFFER_BITS == 8) - #define DEPTH_TYPE uint8_t - #define DEPTH_IS_PACKED 1 - #define DEPTH_MAX UINT8_MAX - #define DEPTH_SCALE (1.0f/UINT8_MAX) - #define PACK_DEPTH(d) ((DEPTH_TYPE)((d)*DEPTH_MAX)) - #define UNPACK_DEPTH(p) (p) + #define SW_DEPTH_TYPE uint8_t + #define SW_DEPTH_IS_PACKED 1 + #define SW_DEPTH_MAX UINT8_MAX + #define SW_DEPTH_SCALE (1.0f/UINT8_MAX) + #define SW_PACK_DEPTH(d) ((SW_DEPTH_TYPE)((d)*SW_DEPTH_MAX)) + #define SW_UNPACK_DEPTH(p) (p) #elif (SW_DEPTH_BUFFER_BITS == 16) - #define DEPTH_TYPE uint16_t - #define DEPTH_IS_PACKED 1 - #define DEPTH_MAX UINT16_MAX - #define DEPTH_SCALE (1.0f/UINT16_MAX) - #define PACK_DEPTH(d) ((DEPTH_TYPE)((d)*DEPTH_MAX)) - #define UNPACK_DEPTH(p) (p) + #define SW_DEPTH_TYPE uint16_t + #define SW_DEPTH_IS_PACKED 1 + #define SW_DEPTH_MAX UINT16_MAX + #define SW_DEPTH_SCALE (1.0f/UINT16_MAX) + #define SW_PACK_DEPTH(d) ((SW_DEPTH_TYPE)((d)*SW_DEPTH_MAX)) + #define SW_UNPACK_DEPTH(p) (p) #else // 24 bits - #define DEPTH_TYPE uint8_t - #define DEPTH_IS_PACKED 0 - #define DEPTH_MAX 0xFFFFFF - #define DEPTH_SCALE (1.0f/0xFFFFFF) - #define PACK_DEPTH_0(d) (((uint32_t)((d)*DEPTH_MAX)>>16)&0xFF) - #define PACK_DEPTH_1(d) (((uint32_t)((d)*DEPTH_MAX)>>8)&0xFF) - #define PACK_DEPTH_2(d) ((uint32_t)((d)*DEPTH_MAX)&0xFF) - #define UNPACK_DEPTH(p) (((p)[0]<<16)|((p)[1]<<8)|(p)[2]) + #define SW_DEPTH_TYPE uint8_t + #define SW_DEPTH_IS_PACKED 0 + #define SW_DEPTH_MAX 0xFFFFFF + #define SW_DEPTH_SCALE (1.0f/0xFFFFFF) + #define SW_PACK_DEPTH_0(d) (((uint32_t)((d)*SW_DEPTH_MAX)>>16)&0xFF) + #define SW_PACK_DEPTH_1(d) (((uint32_t)((d)*SW_DEPTH_MAX)>>8)&0xFF) + #define SW_PACK_DEPTH_2(d) ((uint32_t)((d)*SW_DEPTH_MAX)&0xFF) + #define SW_UNPACK_DEPTH(p) (((p)[0]<<16)|((p)[1]<<8)|(p)[2]) #endif #define GET_COLOR_PTR(ptr, offset) ((void*)((uint8_t*)(ptr) + (offset)*SW_COLOR_PIXEL_SIZE)) @@ -1254,11 +1254,11 @@ static inline bool sw_framebuffer_resize(int w, int h) static inline void sw_framebuffer_read_color(float dst[4], const void *src) { -#if COLOR_IS_PACKED - COLOR_TYPE pixel = ((COLOR_TYPE*)src)[0]; - dst[0] = TO_FLOAT_R(UNPACK_R(pixel)); - dst[1] = TO_FLOAT_G(UNPACK_G(pixel)); - dst[2] = TO_FLOAT_B(UNPACK_B(pixel)); +#if SW_COLOR_IS_PACKED + SW_COLOR_TYPE pixel = ((SW_COLOR_TYPE*)src)[0]; + dst[0] = SW_TO_FLOAT_R(SW_UNPACK_R(pixel)); + dst[1] = SW_TO_FLOAT_G(SW_UNPACK_G(pixel)); + dst[2] = SW_TO_FLOAT_B(SW_UNPACK_B(pixel)); dst[3] = 1.0f; #else sw_float_from_unorm8_simd(dst, src); @@ -1267,14 +1267,14 @@ static inline void sw_framebuffer_read_color(float dst[4], const void *src) static inline void sw_framebuffer_read_color8(uint8_t dst[4], const void *src) { -#if COLOR_IS_PACKED - COLOR_TYPE pixel = ((COLOR_TYPE*)src)[0]; - dst[0] = SCALE_R(UNPACK_R(pixel)); - dst[1] = SCALE_G(UNPACK_G(pixel)); - dst[2] = SCALE_B(UNPACK_B(pixel)); +#if SW_COLOR_IS_PACKED + SW_COLOR_TYPE pixel = ((SW_COLOR_TYPE*)src)[0]; + dst[0] = SW_SCALE_R(SW_UNPACK_R(pixel)); + dst[1] = SW_SCALE_G(SW_UNPACK_G(pixel)); + dst[2] = SW_SCALE_B(SW_UNPACK_B(pixel)); dst[3] = 255; #else - const COLOR_TYPE *p = (const COLOR_TYPE*)src; + const SW_COLOR_TYPE *p = (const SW_COLOR_TYPE*)src; dst[0] = p[0]; dst[1] = p[1]; dst[2] = p[2]; @@ -1284,8 +1284,8 @@ static inline void sw_framebuffer_read_color8(uint8_t dst[4], const void *src) static inline void sw_framebuffer_write_color(void *dst, const float src[4]) { -#if COLOR_IS_PACKED - ((COLOR_TYPE*)dst)[0] = PACK_COLOR(src[0], src[1], src[2]); +#if SW_COLOR_IS_PACKED + ((SW_COLOR_TYPE*)dst)[0] = SW_PACK_COLOR(src[0], src[1], src[2]); #else sw_float_to_unorm8_simd(dst, src); #endif @@ -1293,15 +1293,15 @@ static inline void sw_framebuffer_write_color(void *dst, const float src[4]) static inline void sw_framebuffer_fill_color(void *ptr, int size, const float color[4]) { -#if COLOR_IS_PACKED - COLOR_TYPE packed = PACK_COLOR(color[0], color[1], color[2]); - COLOR_TYPE *p = (COLOR_TYPE*)ptr; +#if SW_COLOR_IS_PACKED + SW_COLOR_TYPE packed = SW_PACK_COLOR(color[0], color[1], color[2]); + SW_COLOR_TYPE *p = (SW_COLOR_TYPE*)ptr; #else - COLOR_TYPE r = sw_clampi(color[0]*255, 0, 255); - COLOR_TYPE g = sw_clampi(color[1]*255, 0, 255); - COLOR_TYPE b = sw_clampi(color[2]*255, 0, 255); - COLOR_TYPE a = sw_clampi(color[3]*255, 0, 255); - COLOR_TYPE *p = (COLOR_TYPE*)ptr; + SW_COLOR_TYPE r = sw_clampi(color[0]*255, 0, 255); + SW_COLOR_TYPE g = sw_clampi(color[1]*255, 0, 255); + SW_COLOR_TYPE b = sw_clampi(color[2]*255, 0, 255); + SW_COLOR_TYPE a = sw_clampi(color[3]*255, 0, 255); + SW_COLOR_TYPE *p = (SW_COLOR_TYPE*)ptr; #endif if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) @@ -1309,11 +1309,11 @@ static inline void sw_framebuffer_fill_color(void *ptr, int size, const float co int w = RLSW.scMax[0] - RLSW.scMin[0] + 1; for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { -#if COLOR_IS_PACKED - COLOR_TYPE *row = p + y*RLSW.framebuffer.width + RLSW.scMin[0]; +#if SW_COLOR_IS_PACKED + SW_COLOR_TYPE *row = p + y*RLSW.framebuffer.width + RLSW.scMin[0]; for (int x = 0; x < w; x++) *row++ = packed; #else - COLOR_TYPE *row = p + 3*(y*RLSW.framebuffer.width + RLSW.scMin[0]); + SW_COLOR_TYPE *row = p + 3*(y*RLSW.framebuffer.width + RLSW.scMin[0]); for (int x = 0; x < w; x++) { *row++ = r; @@ -1326,7 +1326,7 @@ static inline void sw_framebuffer_fill_color(void *ptr, int size, const float co } else { -#if COLOR_IS_PACKED +#if SW_COLOR_IS_PACKED for (int i = 0; i < size; i++) *p++ = packed; #else for (int i = 0; i < size; i++) @@ -1342,37 +1342,37 @@ static inline void sw_framebuffer_fill_color(void *ptr, int size, const float co static inline float sw_framebuffer_read_depth(const void *src) { -#if DEPTH_IS_PACKED - return ((DEPTH_TYPE*)src)[0]*DEPTH_SCALE; +#if SW_DEPTH_IS_PACKED + return ((SW_DEPTH_TYPE*)src)[0]*SW_DEPTH_SCALE; #else - const DEPTH_TYPE *p = (const DEPTH_TYPE*)src; - uint32_t d = UNPACK_DEPTH(p); - return d*DEPTH_SCALE; + const SW_DEPTH_TYPE *p = (const SW_DEPTH_TYPE*)src; + uint32_t d = SW_UNPACK_DEPTH(p); + return d*SW_DEPTH_SCALE; #endif } static inline void sw_framebuffer_write_depth(void *dst, float depth) { -#if DEPTH_IS_PACKED - ((DEPTH_TYPE*)dst)[0] = PACK_DEPTH(depth); +#if SW_DEPTH_IS_PACKED + ((SW_DEPTH_TYPE*)dst)[0] = SW_PACK_DEPTH(depth); #else - DEPTH_TYPE *p = (DEPTH_TYPE*)dst; - p[0] = PACK_DEPTH_0(depth); - p[1] = PACK_DEPTH_1(depth); - p[2] = PACK_DEPTH_2(depth); + SW_DEPTH_TYPE *p = (SW_DEPTH_TYPE*)dst; + p[0] = SW_PACK_DEPTH_0(depth); + p[1] = SW_PACK_DEPTH_1(depth); + p[2] = SW_PACK_DEPTH_2(depth); #endif } static inline void sw_framebuffer_fill_depth(void *ptr, int size, float value) { -#if DEPTH_IS_PACKED - DEPTH_TYPE d = PACK_DEPTH(value); - DEPTH_TYPE *p = (DEPTH_TYPE*)ptr; +#if SW_DEPTH_IS_PACKED + SW_DEPTH_TYPE d = SW_PACK_DEPTH(value); + SW_DEPTH_TYPE *p = (SW_DEPTH_TYPE*)ptr; #else - DEPTH_TYPE d0 = PACK_DEPTH_0(value); - DEPTH_TYPE d1 = PACK_DEPTH_1(value); - DEPTH_TYPE d2 = PACK_DEPTH_2(value); - DEPTH_TYPE *p = (DEPTH_TYPE*)ptr; + SW_DEPTH_TYPE d0 = SW_PACK_DEPTH_0(value); + SW_DEPTH_TYPE d1 = SW_PACK_DEPTH_1(value); + SW_DEPTH_TYPE d2 = SW_PACK_DEPTH_2(value); + SW_DEPTH_TYPE *p = (SW_DEPTH_TYPE*)ptr; #endif if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) @@ -1380,11 +1380,11 @@ static inline void sw_framebuffer_fill_depth(void *ptr, int size, float value) int w = RLSW.scMax[0] - RLSW.scMin[0] + 1; for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { -#if DEPTH_IS_PACKED - DEPTH_TYPE *row = p + y*RLSW.framebuffer.width + RLSW.scMin[0]; +#if SW_DEPTH_IS_PACKED + SW_DEPTH_TYPE *row = p + y*RLSW.framebuffer.width + RLSW.scMin[0]; for (int x = 0; x < w; x++) *row++ = d; #else - DEPTH_TYPE *row = p + 3*(y*RLSW.framebuffer.width + RLSW.scMin[0]); + SW_DEPTH_TYPE *row = p + 3*(y*RLSW.framebuffer.width + RLSW.scMin[0]); for (int x = 0; x < w; x++) { *row++ = d0; @@ -1396,7 +1396,7 @@ static inline void sw_framebuffer_fill_depth(void *ptr, int size, float value) } else { -#if DEPTH_IS_PACKED +#if SW_DEPTH_IS_PACKED for (int i = 0; i < size; i++) *p++ = d; #else for (int i = 0; i < size; i++) @@ -1411,25 +1411,25 @@ static inline void sw_framebuffer_fill_depth(void *ptr, int size, float value) static inline void sw_framebuffer_fill(void *colorPtr, void *depthPtr, int size, float color[4], float depth) { -#if COLOR_IS_PACKED - COLOR_TYPE packedColor = PACK_COLOR(color[0], color[1], color[2]); - COLOR_TYPE *pColor = (COLOR_TYPE*)colorPtr; +#if SW_COLOR_IS_PACKED + SW_COLOR_TYPE packedColor = SW_PACK_COLOR(color[0], color[1], color[2]); + SW_COLOR_TYPE *pColor = (SW_COLOR_TYPE*)colorPtr; #else - COLOR_TYPE r = sw_clampi(color[0]*255, 0, 255); - COLOR_TYPE g = sw_clampi(color[1]*255, 0, 255); - COLOR_TYPE b = sw_clampi(color[2]*255, 0, 255); - COLOR_TYPE a = sw_clampi(color[3]*255, 0, 255); - COLOR_TYPE *pColor = (COLOR_TYPE*)colorPtr; + SW_COLOR_TYPE r = sw_clampi(color[0]*255, 0, 255); + SW_COLOR_TYPE g = sw_clampi(color[1]*255, 0, 255); + SW_COLOR_TYPE b = sw_clampi(color[2]*255, 0, 255); + SW_COLOR_TYPE a = sw_clampi(color[3]*255, 0, 255); + SW_COLOR_TYPE *pColor = (SW_COLOR_TYPE*)colorPtr; #endif -#if DEPTH_IS_PACKED - DEPTH_TYPE d = PACK_DEPTH(depth); - DEPTH_TYPE *pDepth = (DEPTH_TYPE*)depthPtr; +#if SW_DEPTH_IS_PACKED + SW_DEPTH_TYPE d = SW_PACK_DEPTH(depth); + SW_DEPTH_TYPE *pDepth = (SW_DEPTH_TYPE*)depthPtr; #else - DEPTH_TYPE d0 = PACK_DEPTH_0(depth); - DEPTH_TYPE d1 = PACK_DEPTH_1(depth); - DEPTH_TYPE d2 = PACK_DEPTH_2(depth); - DEPTH_TYPE *pDepth = (DEPTH_TYPE*)depthPtr; + SW_DEPTH_TYPE d0 = SW_PACK_DEPTH_0(depth); + SW_DEPTH_TYPE d1 = SW_PACK_DEPTH_1(depth); + SW_DEPTH_TYPE d2 = SW_PACK_DEPTH_2(depth); + SW_DEPTH_TYPE *pDepth = (SW_DEPTH_TYPE*)depthPtr; #endif if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) @@ -1437,21 +1437,21 @@ static inline void sw_framebuffer_fill(void *colorPtr, void *depthPtr, int size, int w = RLSW.scMax[0] - RLSW.scMin[0] + 1; for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { -#if COLOR_IS_PACKED - COLOR_TYPE *rowColor = pColor + y*RLSW.framebuffer.width + RLSW.scMin[0]; +#if SW_COLOR_IS_PACKED + SW_COLOR_TYPE *rowColor = pColor + y*RLSW.framebuffer.width + RLSW.scMin[0]; #else - COLOR_TYPE *rowColor = pColor + 3*(y*RLSW.framebuffer.width + RLSW.scMin[0]); + SW_COLOR_TYPE *rowColor = pColor + 3*(y*RLSW.framebuffer.width + RLSW.scMin[0]); #endif -#if DEPTH_IS_PACKED - DEPTH_TYPE *rowDepth = pDepth + y*RLSW.framebuffer.width + RLSW.scMin[0]; +#if SW_DEPTH_IS_PACKED + SW_DEPTH_TYPE *rowDepth = pDepth + y*RLSW.framebuffer.width + RLSW.scMin[0]; #else - DEPTH_TYPE *rowDepth = pDepth + 3*(y*RLSW.framebuffer.width + RLSW.scMin[0]); + SW_DEPTH_TYPE *rowDepth = pDepth + 3*(y*RLSW.framebuffer.width + RLSW.scMin[0]); #endif for (int x = 0; x < w; x++) { -#if COLOR_IS_PACKED +#if SW_COLOR_IS_PACKED *rowColor++ = packedColor; #else *rowColor++ = r; @@ -1460,7 +1460,7 @@ static inline void sw_framebuffer_fill(void *colorPtr, void *depthPtr, int size, *rowColor++ = a; #endif -#if DEPTH_IS_PACKED +#if SW_DEPTH_IS_PACKED *rowDepth++ = d; #else *rowDepth++ = d0; @@ -1474,7 +1474,7 @@ static inline void sw_framebuffer_fill(void *colorPtr, void *depthPtr, int size, { for (int i = 0; i < size; i++) { -#if COLOR_IS_PACKED +#if SW_COLOR_IS_PACKED *pColor++ = packedColor; #else *pColor++ = r; @@ -1483,7 +1483,7 @@ static inline void sw_framebuffer_fill(void *colorPtr, void *depthPtr, int size, *pColor++ = a; #endif -#if DEPTH_IS_PACKED +#if SW_DEPTH_IS_PACKED *pDepth++ = d; #else *pDepth++ = d0; From cb6ef52f27eeae86330d79d8d8f0019c6aa19fa8 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Mon, 27 Oct 2025 20:57:13 +0100 Subject: [PATCH 2/8] unified color and depth buffer --- src/external/rlsw.h | 363 ++++++++++++++++---------------------------- 1 file changed, 129 insertions(+), 234 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 93ce5081a7ba..a74e3203e9b7 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -529,7 +529,6 @@ SWAPI void swClose(void); SWAPI bool swResizeFramebuffer(int w, int h); SWAPI void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, void *pixels); SWAPI void swBlitFramebuffer(int xDst, int yDst, int wDst, int hDst, int xSrc, int ySrc, int wSrc, int hSrc, SWformat format, SWtype type, void *pixels); -SWAPI void *swGetColorBuffer(int *w, int *h); SWAPI void swEnable(SWstate state); SWAPI void swDisable(SWstate state); @@ -685,6 +684,7 @@ SWAPI void swBindTexture(uint32_t id); #if (SW_COLOR_BUFFER_BITS == 8) #define SW_COLOR_TYPE uint8_t #define SW_COLOR_IS_PACKED 1 + #define SW_COLOR_PACK_COMP 1 #define SW_PACK_COLOR(r,g,b) ((((uint8_t)((r)*7+0.5f))&0x07)<<5 | (((uint8_t)((g)*7+0.5f))&0x07)<<2 | ((uint8_t)((b)*3+0.5f))&0x03) #define SW_UNPACK_R(p) (((p)>>5)&0x07) #define SW_UNPACK_G(p) (((p)>>2)&0x07) @@ -698,6 +698,7 @@ SWAPI void swBindTexture(uint32_t id); #elif (SW_COLOR_BUFFER_BITS == 16) #define SW_COLOR_TYPE uint16_t #define SW_COLOR_IS_PACKED 1 + #define SW_COLOR_PACK_COMP 1 #define SW_PACK_COLOR(r,g,b) ((((uint16_t)((r)*31+0.5f))&0x1F)<<11 | (((uint16_t)((g)*63+0.5f))&0x3F)<<5 | ((uint16_t)((b)*31+0.5f))&0x1F) #define SW_UNPACK_R(p) (((p)>>11)&0x1F) #define SW_UNPACK_G(p) (((p)>>5)&0x3F) @@ -711,11 +712,13 @@ SWAPI void swBindTexture(uint32_t id); #else // 32 bits #define SW_COLOR_TYPE uint8_t #define SW_COLOR_IS_PACKED 0 + #define SW_COLOR_PACK_COMP 4 #endif #if (SW_DEPTH_BUFFER_BITS == 8) #define SW_DEPTH_TYPE uint8_t #define SW_DEPTH_IS_PACKED 1 + #define SW_DEPTH_PACK_COMP 1 #define SW_DEPTH_MAX UINT8_MAX #define SW_DEPTH_SCALE (1.0f/UINT8_MAX) #define SW_PACK_DEPTH(d) ((SW_DEPTH_TYPE)((d)*SW_DEPTH_MAX)) @@ -723,6 +726,7 @@ SWAPI void swBindTexture(uint32_t id); #elif (SW_DEPTH_BUFFER_BITS == 16) #define SW_DEPTH_TYPE uint16_t #define SW_DEPTH_IS_PACKED 1 + #define SW_DEPTH_PACK_COMP 1 #define SW_DEPTH_MAX UINT16_MAX #define SW_DEPTH_SCALE (1.0f/UINT16_MAX) #define SW_PACK_DEPTH(d) ((SW_DEPTH_TYPE)((d)*SW_DEPTH_MAX)) @@ -730,6 +734,7 @@ SWAPI void swBindTexture(uint32_t id); #else // 24 bits #define SW_DEPTH_TYPE uint8_t #define SW_DEPTH_IS_PACKED 0 + #define SW_DEPTH_PACK_COMP 3 #define SW_DEPTH_MAX 0xFFFFFF #define SW_DEPTH_SCALE (1.0f/0xFFFFFF) #define SW_PACK_DEPTH_0(d) (((uint32_t)((d)*SW_DEPTH_MAX)>>16)&0xFF) @@ -738,11 +743,6 @@ SWAPI void swBindTexture(uint32_t id); #define SW_UNPACK_DEPTH(p) (((p)[0]<<16)|((p)[1]<<8)|(p)[2]) #endif -#define GET_COLOR_PTR(ptr, offset) ((void*)((uint8_t*)(ptr) + (offset)*SW_COLOR_PIXEL_SIZE)) -#define GET_DEPTH_PTR(ptr, offset) ((void*)((uint8_t*)(ptr) + (offset)*SW_DEPTH_PIXEL_SIZE)) -#define INC_COLOR_PTR(ptr) ((ptr) = (void*)((uint8_t*)(ptr) + SW_COLOR_PIXEL_SIZE)) -#define INC_DEPTH_PTR(ptr) ((ptr) = (void*)((uint8_t*)(ptr) + SW_DEPTH_PIXEL_SIZE)) - #define SW_STATE_CHECK(flags) (SW_STATE_CHECK_EX(RLSW.stateFlags, (flags))) #define SW_STATE_CHECK_EX(state, flags) (((state) & (flags)) == (flags)) @@ -809,8 +809,12 @@ typedef struct { } sw_texture_t; typedef struct { - void *color; - void *depth; + SW_COLOR_TYPE color[SW_COLOR_PACK_COMP]; + SW_DEPTH_TYPE depth[SW_DEPTH_PACK_COMP]; +} sw_pixel_t; + +typedef struct { + sw_pixel_t* pixels; int width; int height; int allocSz; @@ -1204,15 +1208,8 @@ static inline bool sw_framebuffer_load(int w, int h) { int size = w*h; - RLSW.framebuffer.color = SW_MALLOC(SW_COLOR_PIXEL_SIZE*size); - if (RLSW.framebuffer.color == NULL) return false; - - RLSW.framebuffer.depth = SW_MALLOC(SW_DEPTH_PIXEL_SIZE*size); - if (RLSW.framebuffer.depth == NULL) - { - SW_FREE(RLSW.framebuffer.color); - return false; - } + RLSW.framebuffer.pixels = SW_MALLOC(sizeof(sw_pixel_t)*size); + if (RLSW.framebuffer.pixels == NULL) return false; RLSW.framebuffer.width = w; RLSW.framebuffer.height = h; @@ -1232,18 +1229,10 @@ static inline bool sw_framebuffer_resize(int w, int h) return true; } - void *newColor = SW_REALLOC(RLSW.framebuffer.color, SW_COLOR_PIXEL_SIZE*newSize); - if (newColor == NULL) return false; - - void *newDepth = SW_REALLOC(RLSW.framebuffer.depth, SW_DEPTH_PIXEL_SIZE*newSize); - if (newDepth == NULL) - { - SW_FREE(newColor); - return false; - } + void *newPixels = SW_REALLOC(RLSW.framebuffer.pixels, sizeof(sw_pixel_t)*newSize); + if (newPixels == NULL) return false; - RLSW.framebuffer.color = newColor; - RLSW.framebuffer.depth = newDepth; + RLSW.framebuffer.pixels = newPixels; RLSW.framebuffer.width = w; RLSW.framebuffer.height = h; @@ -1252,29 +1241,29 @@ static inline bool sw_framebuffer_resize(int w, int h) return true; } -static inline void sw_framebuffer_read_color(float dst[4], const void *src) +static inline void sw_framebuffer_read_color(float dst[4], const sw_pixel_t *src) { #if SW_COLOR_IS_PACKED - SW_COLOR_TYPE pixel = ((SW_COLOR_TYPE*)src)[0]; + SW_COLOR_TYPE pixel = src->color[0]; dst[0] = SW_TO_FLOAT_R(SW_UNPACK_R(pixel)); dst[1] = SW_TO_FLOAT_G(SW_UNPACK_G(pixel)); dst[2] = SW_TO_FLOAT_B(SW_UNPACK_B(pixel)); dst[3] = 1.0f; #else - sw_float_from_unorm8_simd(dst, src); + sw_float_from_unorm8_simd(dst, src->color); #endif } -static inline void sw_framebuffer_read_color8(uint8_t dst[4], const void *src) +static inline void sw_framebuffer_read_color8(uint8_t dst[4], const sw_pixel_t *src) { #if SW_COLOR_IS_PACKED - SW_COLOR_TYPE pixel = ((SW_COLOR_TYPE*)src)[0]; + SW_COLOR_TYPE pixel = src->color[0]; dst[0] = SW_SCALE_R(SW_UNPACK_R(pixel)); dst[1] = SW_SCALE_G(SW_UNPACK_G(pixel)); dst[2] = SW_SCALE_B(SW_UNPACK_B(pixel)); dst[3] = 255; #else - const SW_COLOR_TYPE *p = (const SW_COLOR_TYPE*)src; + const SW_COLOR_TYPE *p = src->color; dst[0] = p[0]; dst[1] = p[1]; dst[2] = p[2]; @@ -1282,26 +1271,28 @@ static inline void sw_framebuffer_read_color8(uint8_t dst[4], const void *src) #endif } -static inline void sw_framebuffer_write_color(void *dst, const float src[4]) +static inline void sw_framebuffer_write_color(sw_pixel_t *dst, const float src[4]) { #if SW_COLOR_IS_PACKED - ((SW_COLOR_TYPE*)dst)[0] = SW_PACK_COLOR(src[0], src[1], src[2]); + dst->color[0] = SW_PACK_COLOR(src[0], src[1], src[2]); #else - sw_float_to_unorm8_simd(dst, src); + sw_float_to_unorm8_simd(dst->color, src); #endif } -static inline void sw_framebuffer_fill_color(void *ptr, int size, const float color[4]) +static inline void sw_framebuffer_fill_color(sw_pixel_t *ptr, int size, const float color[4]) { #if SW_COLOR_IS_PACKED - SW_COLOR_TYPE packed = SW_PACK_COLOR(color[0], color[1], color[2]); - SW_COLOR_TYPE *p = (SW_COLOR_TYPE*)ptr; + SW_COLOR_TYPE value[1] = { + SW_PACK_COLOR(color[0], color[1], color[2]) + }; #else - SW_COLOR_TYPE r = sw_clampi(color[0]*255, 0, 255); - SW_COLOR_TYPE g = sw_clampi(color[1]*255, 0, 255); - SW_COLOR_TYPE b = sw_clampi(color[2]*255, 0, 255); - SW_COLOR_TYPE a = sw_clampi(color[3]*255, 0, 255); - SW_COLOR_TYPE *p = (SW_COLOR_TYPE*)ptr; + SW_COLOR_TYPE value[4] = { + sw_clampi(color[0]*255, 0, 255), + sw_clampi(color[1]*255, 0, 255), + sw_clampi(color[2]*255, 0, 255), + sw_clampi(color[3]*255, 0, 255) + }; #endif if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) @@ -1309,70 +1300,54 @@ static inline void sw_framebuffer_fill_color(void *ptr, int size, const float co int w = RLSW.scMax[0] - RLSW.scMin[0] + 1; for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { -#if SW_COLOR_IS_PACKED - SW_COLOR_TYPE *row = p + y*RLSW.framebuffer.width + RLSW.scMin[0]; - for (int x = 0; x < w; x++) *row++ = packed; -#else - SW_COLOR_TYPE *row = p + 3*(y*RLSW.framebuffer.width + RLSW.scMin[0]); - for (int x = 0; x < w; x++) + sw_pixel_t *row = ptr + y*RLSW.framebuffer.width + RLSW.scMin[0]; + for (int x = 0; x < w; x++, row++) { - *row++ = r; - *row++ = g; - *row++ = b; - *row++ = a; + for (int i = 0; i < SW_COLOR_PACK_COMP; i++) row->color[i] = value[i]; } -#endif } } else { -#if SW_COLOR_IS_PACKED - for (int i = 0; i < size; i++) *p++ = packed; -#else - for (int i = 0; i < size; i++) + for (int i = 0; i < size; i++, ptr++) { - *p++ = r; - *p++ = g; - *p++ = b; - *p++ = a; + for (int j = 0; j < SW_COLOR_PACK_COMP; j++) ptr->color[j] = value[j]; } -#endif } } -static inline float sw_framebuffer_read_depth(const void *src) +static inline float sw_framebuffer_read_depth(const sw_pixel_t *src) { #if SW_DEPTH_IS_PACKED - return ((SW_DEPTH_TYPE*)src)[0]*SW_DEPTH_SCALE; + return src->depth[0]*SW_DEPTH_SCALE; #else - const SW_DEPTH_TYPE *p = (const SW_DEPTH_TYPE*)src; - uint32_t d = SW_UNPACK_DEPTH(p); - return d*SW_DEPTH_SCALE; + return SW_UNPACK_DEPTH(src->depth)*SW_DEPTH_SCALE; #endif } -static inline void sw_framebuffer_write_depth(void *dst, float depth) +static inline void sw_framebuffer_write_depth(sw_pixel_t *dst, float depth) { #if SW_DEPTH_IS_PACKED - ((SW_DEPTH_TYPE*)dst)[0] = SW_PACK_DEPTH(depth); + dst->depth[0] = SW_PACK_DEPTH(depth); #else - SW_DEPTH_TYPE *p = (SW_DEPTH_TYPE*)dst; - p[0] = SW_PACK_DEPTH_0(depth); - p[1] = SW_PACK_DEPTH_1(depth); - p[2] = SW_PACK_DEPTH_2(depth); + dst->depth[0] = SW_PACK_DEPTH_0(depth); + dst->depth[1] = SW_PACK_DEPTH_1(depth); + dst->depth[2] = SW_PACK_DEPTH_2(depth); #endif } -static inline void sw_framebuffer_fill_depth(void *ptr, int size, float value) +static inline void sw_framebuffer_fill_depth(sw_pixel_t *ptr, int size, float depth) { #if SW_DEPTH_IS_PACKED - SW_DEPTH_TYPE d = SW_PACK_DEPTH(value); - SW_DEPTH_TYPE *p = (SW_DEPTH_TYPE*)ptr; + SW_DEPTH_TYPE value[1] = { + SW_PACK_DEPTH(depth) + }; #else - SW_DEPTH_TYPE d0 = SW_PACK_DEPTH_0(value); - SW_DEPTH_TYPE d1 = SW_PACK_DEPTH_1(value); - SW_DEPTH_TYPE d2 = SW_PACK_DEPTH_2(value); - SW_DEPTH_TYPE *p = (SW_DEPTH_TYPE*)ptr; + SW_DEPTH_TYPE value[3] = { + SW_PACK_DEPTH_0(depth), + SW_PACK_DEPTH_1(depth), + SW_PACK_DEPTH_2(depth) + }; #endif if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) @@ -1380,56 +1355,41 @@ static inline void sw_framebuffer_fill_depth(void *ptr, int size, float value) int w = RLSW.scMax[0] - RLSW.scMin[0] + 1; for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { -#if SW_DEPTH_IS_PACKED - SW_DEPTH_TYPE *row = p + y*RLSW.framebuffer.width + RLSW.scMin[0]; - for (int x = 0; x < w; x++) *row++ = d; -#else - SW_DEPTH_TYPE *row = p + 3*(y*RLSW.framebuffer.width + RLSW.scMin[0]); - for (int x = 0; x < w; x++) + sw_pixel_t *row = ptr + y*RLSW.framebuffer.width + RLSW.scMin[0]; + for (int x = 0; x < w; x++, row++) { - *row++ = d0; - *row++ = d1; - *row++ = d2; + for (int i = 0; i < SW_DEPTH_PACK_COMP; i++) row->depth[i] = value[i]; } -#endif } } else { -#if SW_DEPTH_IS_PACKED - for (int i = 0; i < size; i++) *p++ = d; -#else - for (int i = 0; i < size; i++) + for (int i = 0; i < size; i++, ptr++) { - *p++ = d0; - *p++ = d1; - *p++ = d2; + for (int j = 0; j < SW_DEPTH_PACK_COMP; j++) ptr->depth[j] = value[j]; } -#endif } } -static inline void sw_framebuffer_fill(void *colorPtr, void *depthPtr, int size, float color[4], float depth) +static inline void sw_framebuffer_fill(sw_pixel_t *ptr, int size, float color[4], float depth) { + sw_pixel_t value = { 0 }; + #if SW_COLOR_IS_PACKED - SW_COLOR_TYPE packedColor = SW_PACK_COLOR(color[0], color[1], color[2]); - SW_COLOR_TYPE *pColor = (SW_COLOR_TYPE*)colorPtr; + value.color[0] = SW_PACK_COLOR(color[0], color[1], color[2]); #else - SW_COLOR_TYPE r = sw_clampi(color[0]*255, 0, 255); - SW_COLOR_TYPE g = sw_clampi(color[1]*255, 0, 255); - SW_COLOR_TYPE b = sw_clampi(color[2]*255, 0, 255); - SW_COLOR_TYPE a = sw_clampi(color[3]*255, 0, 255); - SW_COLOR_TYPE *pColor = (SW_COLOR_TYPE*)colorPtr; + value.color[0] = sw_clampi(color[0]*255, 0, 255); + value.color[1] = sw_clampi(color[1]*255, 0, 255); + value.color[2] = sw_clampi(color[2]*255, 0, 255); + value.color[3] = sw_clampi(color[3]*255, 0, 255); #endif #if SW_DEPTH_IS_PACKED - SW_DEPTH_TYPE d = SW_PACK_DEPTH(depth); - SW_DEPTH_TYPE *pDepth = (SW_DEPTH_TYPE*)depthPtr; + value.depth[0] = SW_PACK_DEPTH(depth); #else - SW_DEPTH_TYPE d0 = SW_PACK_DEPTH_0(depth); - SW_DEPTH_TYPE d1 = SW_PACK_DEPTH_1(depth); - SW_DEPTH_TYPE d2 = SW_PACK_DEPTH_2(depth); - SW_DEPTH_TYPE *pDepth = (SW_DEPTH_TYPE*)depthPtr; + value.depth[0] = SW_PACK_DEPTH_0(depth); + value.depth[1] = SW_PACK_DEPTH_1(depth); + value.depth[2] = SW_PACK_DEPTH_2(depth); #endif if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) @@ -1437,76 +1397,32 @@ static inline void sw_framebuffer_fill(void *colorPtr, void *depthPtr, int size, int w = RLSW.scMax[0] - RLSW.scMin[0] + 1; for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { -#if SW_COLOR_IS_PACKED - SW_COLOR_TYPE *rowColor = pColor + y*RLSW.framebuffer.width + RLSW.scMin[0]; -#else - SW_COLOR_TYPE *rowColor = pColor + 3*(y*RLSW.framebuffer.width + RLSW.scMin[0]); -#endif - -#if SW_DEPTH_IS_PACKED - SW_DEPTH_TYPE *rowDepth = pDepth + y*RLSW.framebuffer.width + RLSW.scMin[0]; -#else - SW_DEPTH_TYPE *rowDepth = pDepth + 3*(y*RLSW.framebuffer.width + RLSW.scMin[0]); -#endif - - for (int x = 0; x < w; x++) - { -#if SW_COLOR_IS_PACKED - *rowColor++ = packedColor; -#else - *rowColor++ = r; - *rowColor++ = g; - *rowColor++ = b; - *rowColor++ = a; -#endif - -#if SW_DEPTH_IS_PACKED - *rowDepth++ = d; -#else - *rowDepth++ = d0; - *rowDepth++ = d1; - *rowDepth++ = d2; -#endif - } + sw_pixel_t *row = ptr + y*RLSW.framebuffer.width + RLSW.scMin[0]; + for (int x = 0; x < w; x++, row++) *row = value; } } else { - for (int i = 0; i < size; i++) - { -#if SW_COLOR_IS_PACKED - *pColor++ = packedColor; -#else - *pColor++ = r; - *pColor++ = g; - *pColor++ = b; - *pColor++ = a; -#endif - -#if SW_DEPTH_IS_PACKED - *pDepth++ = d; -#else - *pDepth++ = d0; - *pDepth++ = d1; - *pDepth++ = d2; -#endif - } + for (int i = 0; i < size; i++, ptr++) *ptr = value; } } #define DEFINE_FRAMEBUFFER_COPY_BEGIN(name, DST_PTR_T) \ -static inline void sw_framebuffer_copy_to_##name(int x, int y, int w, int h, DST_PTR_T *dst) \ +static inline void sw_framebuffer_copy_to_##name(int x, int y, int w, int h, DST_PTR_T *dst) \ { \ - const void *src = RLSW.framebuffer.color; \ + const int stride = RLSW.framebuffer.width; \ + const sw_pixel_t *src = RLSW.framebuffer.pixels + (y * stride + x); \ \ - for (int iy = y; iy < h; iy++) { \ - for (int ix = x; ix < w; ix++) { \ + for (int iy = 0; iy < h; iy++) { \ + const sw_pixel_t *line = src; \ + for (int ix = 0; ix < w; ix++) { \ uint8_t color[4]; \ - sw_framebuffer_read_color8(color, src); \ + sw_framebuffer_read_color8(color, line); \ #define DEFINE_FRAMEBUFFER_COPY_END() \ - INC_COLOR_PTR(src); \ + ++line; \ } \ + src += stride; \ } \ } @@ -1620,22 +1536,24 @@ static inline void sw_framebuffer_blit_to_##name( int xSrc, int ySrc, int wSrc, int hSrc, \ DST_PTR_T *dst) \ { \ - const uint8_t *srcBase = (uint8_t*)RLSW.framebuffer.color; \ - int fbWidth = RLSW.framebuffer.width; \ + const sw_pixel_t *srcBase = RLSW.framebuffer.pixels; \ + const int fbWidth = RLSW.framebuffer.width; \ \ - uint32_t xScale = ((uint32_t)wSrc << 16)/(uint32_t)wDst; \ - uint32_t yScale = ((uint32_t)hSrc << 16)/(uint32_t)hDst; \ + const uint32_t xScale = ((uint32_t)wSrc << 16) / (uint32_t)wDst; \ + const uint32_t yScale = ((uint32_t)hSrc << 16) / (uint32_t)hDst; \ \ for (int dy = 0; dy < hDst; dy++) { \ - uint32_t yFix = ((uint32_t)ySrc << 16) + dy*yScale; \ + uint32_t yFix = ((uint32_t)ySrc << 16) + dy * yScale; \ int sy = yFix >> 16; \ + const sw_pixel_t *srcLine = srcBase + sy * fbWidth + xSrc; \ \ + const sw_pixel_t *srcPtr = srcLine; \ for (int dx = 0; dx < wDst; dx++) { \ - uint32_t xFix = dx*xScale; \ + uint32_t xFix = dx * xScale; \ int sx = xFix >> 16; \ - const void *srcPtr = GET_COLOR_PTR(srcBase, sy*fbWidth + sx); \ + const sw_pixel_t *pixel = srcPtr + sx; \ uint8_t color[4]; \ - sw_framebuffer_read_color8(color, srcPtr); \ + sw_framebuffer_read_color8(color, pixel); #define DEFINE_FRAMEBUFFER_BLIT_END() \ } \ @@ -2378,8 +2296,7 @@ static inline void FUNC_NAME(const sw_texture_t *tex, const sw_vertex_t *start, \ /* Pre-calculate the starting pointers for the framebuffer row */ \ int y = (int)start->screen[1]; \ - void *cptr = GET_COLOR_PTR(RLSW.framebuffer.color, y*RLSW.framebuffer.width + xStart); \ - void *dptr = GET_DEPTH_PTR(RLSW.framebuffer.depth, y*RLSW.framebuffer.width + xStart); \ + sw_pixel_t *ptr = RLSW.framebuffer.pixels + y*RLSW.framebuffer.width + xStart; \ \ /* Scanline rasterization */ \ for (int x = xStart; x < xEnd; x++) \ @@ -2395,12 +2312,12 @@ static inline void FUNC_NAME(const sw_texture_t *tex, const sw_vertex_t *start, if (ENABLE_DEPTH_TEST) \ { \ /* TODO: Implement different depth funcs? */ \ - float depth = sw_framebuffer_read_depth(dptr); \ + float depth = sw_framebuffer_read_depth(ptr); \ if (z > depth) goto discard; \ } \ \ /* TODO: Implement depth mask */ \ - sw_framebuffer_write_depth(dptr, z); \ + sw_framebuffer_write_depth(ptr, z); \ \ if (ENABLE_TEXTURE) \ { \ @@ -2417,13 +2334,13 @@ static inline void FUNC_NAME(const sw_texture_t *tex, const sw_vertex_t *start, if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ - sw_framebuffer_read_color(dstColor, cptr); \ + sw_framebuffer_read_color(dstColor, ptr); \ sw_blend_colors(dstColor, srcColor); \ - sw_framebuffer_write_color(cptr, dstColor); \ + sw_framebuffer_write_color(ptr, dstColor); \ } \ else \ { \ - sw_framebuffer_write_color(cptr, srcColor); \ + sw_framebuffer_write_color(ptr, srcColor); \ } \ \ /* Increment the interpolation parameter, UVs, and pointers */ \ @@ -2439,9 +2356,7 @@ static inline void FUNC_NAME(const sw_texture_t *tex, const sw_vertex_t *start, u += dUdx; \ v += dVdx; \ } \ - \ - INC_COLOR_PTR(cptr); \ - INC_DEPTH_PTR(dptr); \ + ++ptr; \ } \ } @@ -2809,8 +2724,7 @@ static inline void FUNC_NAME(void) const sw_texture_t *tex; \ if (ENABLE_TEXTURE) tex = &RLSW.loadedTextures[RLSW.currentTexture]; \ \ - void *cDstBase = RLSW.framebuffer.color; \ - void *dDstBase = RLSW.framebuffer.depth; \ + sw_pixel_t *pixels = RLSW.framebuffer.pixels; \ int wDst = RLSW.framebuffer.width; \ \ float zScanline = v0->homogeneous[2] + dZdx*xSubstep + dZdy*ySubstep; \ @@ -2826,8 +2740,7 @@ static inline void FUNC_NAME(void) \ for (int y = yMin; y < yMax; y++) \ { \ - void *cptr = GET_COLOR_PTR(cDstBase, y*wDst + xMin); \ - void *dptr = GET_DEPTH_PTR(dDstBase, y*wDst + xMin); \ + sw_pixel_t *ptr = pixels + y*wDst + xMin; \ \ float z = zScanline; \ float u = uScanline; \ @@ -2855,12 +2768,12 @@ static inline void FUNC_NAME(void) if (ENABLE_DEPTH_TEST) \ { \ /* TODO: Implement different depth funcs? */ \ - float depth = sw_framebuffer_read_depth(dptr); \ + float depth = sw_framebuffer_read_depth(ptr); \ if (z > depth) goto discard; \ } \ \ /* TODO: Implement depth mask */ \ - sw_framebuffer_write_depth(dptr, z); \ + sw_framebuffer_write_depth(ptr, z); \ \ if (ENABLE_TEXTURE) \ { \ @@ -2875,11 +2788,11 @@ static inline void FUNC_NAME(void) if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ - sw_framebuffer_read_color(dstColor, cptr); \ + sw_framebuffer_read_color(dstColor, ptr); \ sw_blend_colors(dstColor, srcColor); \ - sw_framebuffer_write_color(cptr, dstColor); \ + sw_framebuffer_write_color(ptr, dstColor); \ } \ - else sw_framebuffer_write_color(cptr, srcColor); \ + else sw_framebuffer_write_color(ptr, srcColor); \ \ discard: \ z += dZdx; \ @@ -2887,15 +2800,12 @@ static inline void FUNC_NAME(void) color[1] += dCdx[1]; \ color[2] += dCdx[2]; \ color[3] += dCdx[3]; \ - \ if (ENABLE_TEXTURE) \ { \ u += dUdx; \ v += dVdx; \ } \ - \ - INC_COLOR_PTR(cptr); \ - INC_DEPTH_PTR(dptr); \ + ++ptr; \ } \ \ zScanline += dZdy; \ @@ -3119,8 +3029,7 @@ static inline void FUNC_NAME(const sw_vertex_t *v0, const sw_vertex_t *v1) \ float a = v0->color[3] + aInc*substep; \ \ const int fbWidth = RLSW.framebuffer.width; \ - void *cBuffer = RLSW.framebuffer.color; \ - void *dBuffer = RLSW.framebuffer.depth; \ + sw_pixel_t *pixels = RLSW.framebuffer.pixels; \ \ int numPixels = (int)(steps - substep) + 1; \ \ @@ -3130,28 +3039,26 @@ static inline void FUNC_NAME(const sw_vertex_t *v0, const sw_vertex_t *v1) \ int px = (int)(x - 0.5f); \ int py = (int)(y - 0.5f); \ \ - int offset = py*fbWidth + px; \ - void *dptr = GET_DEPTH_PTR(dBuffer, offset); \ + sw_pixel_t *ptr = pixels + py*fbWidth + px; \ \ if (ENABLE_DEPTH_TEST) \ { \ - float depth = sw_framebuffer_read_depth(dptr); \ + float depth = sw_framebuffer_read_depth(ptr); \ if (z > depth) goto discard; \ } \ \ - sw_framebuffer_write_depth(dptr, z); \ + sw_framebuffer_write_depth(ptr, z); \ \ - void *cptr = GET_COLOR_PTR(cBuffer, offset); \ float color[4] = {r, g, b, a}; \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ - sw_framebuffer_read_color(dstColor, cptr); \ + sw_framebuffer_read_color(dstColor, ptr); \ sw_blend_colors(dstColor, color); \ - sw_framebuffer_write_color(cptr, dstColor); \ + sw_framebuffer_write_color(ptr, dstColor); \ } \ - else sw_framebuffer_write_color(cptr, color); \ + else sw_framebuffer_write_color(ptr, color); \ \ discard: \ x += xInc; y += yInc; z += zInc; \ @@ -3292,27 +3199,24 @@ static inline void FUNC_NAME(int x, int y, float z, const float color[4]) \ } \ \ int offset = y*RLSW.framebuffer.width + x; \ - \ - void *dptr = GET_DEPTH_PTR(RLSW.framebuffer.depth, offset); \ + sw_pixel_t *ptr = RLSW.framebuffer.pixels + offset; \ \ if (ENABLE_DEPTH_TEST) \ { \ - float depth = sw_framebuffer_read_depth(dptr); \ + float depth = sw_framebuffer_read_depth(ptr); \ if (z > depth) return; \ } \ \ - sw_framebuffer_write_depth(dptr, z); \ - \ - void *cptr = GET_COLOR_PTR(RLSW.framebuffer.color, offset); \ + sw_framebuffer_write_depth(ptr, z); \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ - sw_framebuffer_read_color(dstColor, cptr); \ + sw_framebuffer_read_color(dstColor, ptr); \ sw_blend_colors(dstColor, color); \ - sw_framebuffer_write_color(cptr, dstColor); \ + sw_framebuffer_write_color(ptr, dstColor); \ } \ - else sw_framebuffer_write_color(cptr, color); \ + else sw_framebuffer_write_color(ptr, color); \ } #define DEFINE_POINT_THICK_RASTER(FUNC_NAME, RASTER_FUNC) \ @@ -3669,8 +3573,7 @@ void swClose(void) } } - SW_FREE(RLSW.framebuffer.color); - SW_FREE(RLSW.framebuffer.depth); + SW_FREE(RLSW.framebuffer.pixels); SW_FREE(RLSW.loadedTextures); SW_FREE(RLSW.freeTextureIds); @@ -3770,14 +3673,6 @@ void swBlitFramebuffer(int xDst, int yDst, int wDst, int hDst, int xSrc, int ySr } } -void *swGetColorBuffer(int *w, int *h) -{ - if (w) *w = RLSW.framebuffer.width; - if (h) *h = RLSW.framebuffer.height; - - return RLSW.framebuffer.color; -} - void swEnable(SWstate state) { switch (state) @@ -3944,15 +3839,15 @@ void swClear(uint32_t bitmask) if ((bitmask & (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) == (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) { - sw_framebuffer_fill(RLSW.framebuffer.color, RLSW.framebuffer.depth,size, RLSW.clearColor, RLSW.clearDepth); + sw_framebuffer_fill(RLSW.framebuffer.pixels, size, RLSW.clearColor, RLSW.clearDepth); } else if (bitmask & (SW_COLOR_BUFFER_BIT)) { - sw_framebuffer_fill_color(RLSW.framebuffer.color, size, RLSW.clearColor); + sw_framebuffer_fill_color(RLSW.framebuffer.pixels, size, RLSW.clearColor); } else if (bitmask & SW_DEPTH_BUFFER_BIT) { - sw_framebuffer_fill_depth(RLSW.framebuffer.depth, size, RLSW.clearDepth); + sw_framebuffer_fill_depth(RLSW.framebuffer.pixels, size, RLSW.clearDepth); } } From 152cdf4ffff91eab2ccf2fbc4a2bf4c99e876121 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Mon, 27 Oct 2025 22:15:30 +0100 Subject: [PATCH 3/8] tweaks --- src/external/rlsw.h | 72 ++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index a74e3203e9b7..e666c27ca832 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -1271,6 +1271,15 @@ static inline void sw_framebuffer_read_color8(uint8_t dst[4], const sw_pixel_t * #endif } +static inline float sw_framebuffer_read_depth(const sw_pixel_t *src) +{ +#if SW_DEPTH_IS_PACKED + return src->depth[0]*SW_DEPTH_SCALE; +#else + return SW_UNPACK_DEPTH(src->depth)*SW_DEPTH_SCALE; +#endif +} + static inline void sw_framebuffer_write_color(sw_pixel_t *dst, const float src[4]) { #if SW_COLOR_IS_PACKED @@ -1280,6 +1289,17 @@ static inline void sw_framebuffer_write_color(sw_pixel_t *dst, const float src[4 #endif } +static inline void sw_framebuffer_write_depth(sw_pixel_t *dst, float depth) +{ +#if SW_DEPTH_IS_PACKED + dst->depth[0] = SW_PACK_DEPTH(depth); +#else + dst->depth[0] = SW_PACK_DEPTH_0(depth); + dst->depth[1] = SW_PACK_DEPTH_1(depth); + dst->depth[2] = SW_PACK_DEPTH_2(depth); +#endif +} + static inline void sw_framebuffer_fill_color(sw_pixel_t *ptr, int size, const float color[4]) { #if SW_COLOR_IS_PACKED @@ -1316,26 +1336,6 @@ static inline void sw_framebuffer_fill_color(sw_pixel_t *ptr, int size, const fl } } -static inline float sw_framebuffer_read_depth(const sw_pixel_t *src) -{ -#if SW_DEPTH_IS_PACKED - return src->depth[0]*SW_DEPTH_SCALE; -#else - return SW_UNPACK_DEPTH(src->depth)*SW_DEPTH_SCALE; -#endif -} - -static inline void sw_framebuffer_write_depth(sw_pixel_t *dst, float depth) -{ -#if SW_DEPTH_IS_PACKED - dst->depth[0] = SW_PACK_DEPTH(depth); -#else - dst->depth[0] = SW_PACK_DEPTH_0(depth); - dst->depth[1] = SW_PACK_DEPTH_1(depth); - dst->depth[2] = SW_PACK_DEPTH_2(depth); -#endif -} - static inline void sw_framebuffer_fill_depth(sw_pixel_t *ptr, int size, float depth) { #if SW_DEPTH_IS_PACKED @@ -1411,7 +1411,7 @@ static inline void sw_framebuffer_fill(sw_pixel_t *ptr, int size, float color[4] static inline void sw_framebuffer_copy_to_##name(int x, int y, int w, int h, DST_PTR_T *dst) \ { \ const int stride = RLSW.framebuffer.width; \ - const sw_pixel_t *src = RLSW.framebuffer.pixels + (y * stride + x); \ + const sw_pixel_t *src = RLSW.framebuffer.pixels + (y*stride + x); \ \ for (int iy = 0; iy < h; iy++) { \ const sw_pixel_t *line = src; \ @@ -1539,17 +1539,17 @@ static inline void sw_framebuffer_blit_to_##name( const sw_pixel_t *srcBase = RLSW.framebuffer.pixels; \ const int fbWidth = RLSW.framebuffer.width; \ \ - const uint32_t xScale = ((uint32_t)wSrc << 16) / (uint32_t)wDst; \ - const uint32_t yScale = ((uint32_t)hSrc << 16) / (uint32_t)hDst; \ + const uint32_t xScale = ((uint32_t)wSrc << 16)/(uint32_t)wDst; \ + const uint32_t yScale = ((uint32_t)hSrc << 16)/(uint32_t)hDst; \ \ for (int dy = 0; dy < hDst; dy++) { \ - uint32_t yFix = ((uint32_t)ySrc << 16) + dy * yScale; \ + uint32_t yFix = ((uint32_t)ySrc << 16) + dy*yScale; \ int sy = yFix >> 16; \ - const sw_pixel_t *srcLine = srcBase + sy * fbWidth + xSrc; \ + const sw_pixel_t *srcLine = srcBase + sy*fbWidth + xSrc; \ \ const sw_pixel_t *srcPtr = srcLine; \ for (int dx = 0; dx < wDst; dx++) { \ - uint32_t xFix = dx * xScale; \ + uint32_t xFix = dx*xScale; \ int sx = xFix >> 16; \ const sw_pixel_t *pixel = srcPtr + sx; \ uint8_t color[4]; \ @@ -4420,7 +4420,7 @@ void swDrawArrays(SWdraw mode, int offset, int count) float u, v; if (texcoords) { - int idx = 2 * i; + int idx = 2*i; u = texcoords[idx]; v = texcoords[idx + 1]; } @@ -4431,8 +4431,8 @@ void swDrawArrays(SWdraw mode, int offset, int count) } float texcoord[2]; - texcoord[0] = texMatrix[0] * u + texMatrix[4] * v + texMatrix[12]; - texcoord[1] = texMatrix[1] * u + texMatrix[5] * v + texMatrix[13]; + texcoord[0] = texMatrix[0]*u + texMatrix[4]*v + texMatrix[12]; + texcoord[1] = texMatrix[1]*u + texMatrix[5]*v + texMatrix[13]; float color[4] = { defaultColor[0], @@ -4443,14 +4443,14 @@ void swDrawArrays(SWdraw mode, int offset, int count) if (colors) { - int idx = 4 * i; + int idx = 4*i; color[0] *= (float)colors[idx]*SW_INV_255; color[1] *= (float)colors[idx + 1]*SW_INV_255; color[2] *= (float)colors[idx + 2]*SW_INV_255; color[3] *= (float)colors[idx + 3]*SW_INV_255; } - int idx = 3 * i; + int idx = 3*i; float position[4] = { positions[idx], positions[idx + 1], @@ -4516,7 +4516,7 @@ void swDrawElements(SWdraw mode, int count, int type, const void *indices) float u, v; if (texcoords) { - int idx = 2 * index; + int idx = 2*index; u = texcoords[idx]; v = texcoords[idx + 1]; } @@ -4527,8 +4527,8 @@ void swDrawElements(SWdraw mode, int count, int type, const void *indices) } float texcoord[2]; - texcoord[0] = texMatrix[0] * u + texMatrix[4] * v + texMatrix[12]; - texcoord[1] = texMatrix[1] * u + texMatrix[5] * v + texMatrix[13]; + texcoord[0] = texMatrix[0]*u + texMatrix[4]*v + texMatrix[12]; + texcoord[1] = texMatrix[1]*u + texMatrix[5]*v + texMatrix[13]; float color[4] = { defaultColor[0], @@ -4539,14 +4539,14 @@ void swDrawElements(SWdraw mode, int count, int type, const void *indices) if (colors) { - int idx = 4 * index; + int idx = 4*index; color[0] *= (float)colors[idx]*SW_INV_255; color[1] *= (float)colors[idx + 1]*SW_INV_255; color[2] *= (float)colors[idx + 2]*SW_INV_255; color[3] *= (float)colors[idx + 3]*SW_INV_255; } - int idx = 3 * index; + int idx = 3*index; float position[4] = { positions[idx], positions[idx + 1], From 3adf3e6146946cc51887f8c4448d9dfd70007e5b Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Mon, 27 Oct 2025 22:49:20 +0100 Subject: [PATCH 4/8] review the storage of clear values + complete get/set depth value --- src/external/rlsw.h | 102 ++++++++++++++------------------------------ 1 file changed, 31 insertions(+), 71 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index e666c27ca832..4518c02d4221 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -198,6 +198,7 @@ typedef double GLclampd; //#define GL_ATTRIB_STACK_DEPTH 0x0BB0 //#define GL_CLIENT_ATTRIB_STACK_DEPTH 0x0BB1 #define GL_COLOR_CLEAR_VALUE 0x0C22 +#define GL_DEPTH_CLEAR_VALUE 0x0B73 //#define GL_COLOR_WRITEMASK 0x0C23 //#define GL_CURRENT_INDEX 0x0B01 #define GL_CURRENT_COLOR 0x0B00 @@ -332,6 +333,7 @@ typedef double GLclampd; #define glViewport(x, y, w, h) swViewport((x), (y), (w), (h)) #define glScissor(x, y, w, h) swScissor((x), (y), (w), (h)) #define glClearColor(r, g, b, a) swClearColor((r), (g), (b), (a)) +#define glClearDepth(d) swClearDepth((d)) #define glClear(bitmask) swClear((bitmask)) #define glBlendFunc(sfactor, dfactor) swBlendFunc((sfactor), (dfactor)) #define glPolygonMode(face, mode) swPolygonMode((mode)) @@ -384,7 +386,6 @@ typedef double GLclampd; #define glBindTexture(tr, id) swBindTexture((id)) // OpenGL functions NOT IMPLEMENTED by rlsw -#define glClearDepth(X) ((void)(X)) #define glDepthMask(X) ((void)(X)) #define glColorMask(X,Y,Z,W) ((void)(X),(void)(Y),(void)(Z),(void)(W)) #define glPixelStorei(X,Y) ((void)(X),(void)(Y)) @@ -415,6 +416,7 @@ typedef enum { SW_VERSION = GL_VERSION, SW_EXTENSIONS = GL_EXTENSIONS, SW_COLOR_CLEAR_VALUE = GL_COLOR_CLEAR_VALUE, + SW_DEPTH_CLEAR_VALUE = GL_DEPTH_CLEAR_VALUE, SW_CURRENT_COLOR = GL_CURRENT_COLOR, SW_CURRENT_TEXTURE_COORDS = GL_CURRENT_TEXTURE_COORDS, SW_POINT_SIZE = GL_POINT_SIZE, @@ -541,6 +543,7 @@ SWAPI void swViewport(int x, int y, int width, int height); SWAPI void swScissor(int x, int y, int width, int height); SWAPI void swClearColor(float r, float g, float b, float a); +SWAPI void swClearDepth(float depth); SWAPI void swClear(uint32_t bitmask); SWAPI void swBlendFunc(SWfactor sfactor, SWfactor dfactor); @@ -822,8 +825,7 @@ typedef struct { typedef struct { sw_framebuffer_t framebuffer; // Main framebuffer - float clearColor[4]; // Color used to clear the screen - float clearDepth; // Depth value used to clear the screen + sw_pixel_t clearValue; // Clear value of the framebuffer float vpCenter[2]; // Viewport center float vpHalf[2]; // Viewport half dimensions @@ -1300,21 +1302,8 @@ static inline void sw_framebuffer_write_depth(sw_pixel_t *dst, float depth) #endif } -static inline void sw_framebuffer_fill_color(sw_pixel_t *ptr, int size, const float color[4]) +static inline void sw_framebuffer_fill_color(sw_pixel_t *ptr, int size, const SW_COLOR_TYPE color[SW_COLOR_PACK_COMP]) { -#if SW_COLOR_IS_PACKED - SW_COLOR_TYPE value[1] = { - SW_PACK_COLOR(color[0], color[1], color[2]) - }; -#else - SW_COLOR_TYPE value[4] = { - sw_clampi(color[0]*255, 0, 255), - sw_clampi(color[1]*255, 0, 255), - sw_clampi(color[2]*255, 0, 255), - sw_clampi(color[3]*255, 0, 255) - }; -#endif - if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { int w = RLSW.scMax[0] - RLSW.scMin[0] + 1; @@ -1323,7 +1312,7 @@ static inline void sw_framebuffer_fill_color(sw_pixel_t *ptr, int size, const fl sw_pixel_t *row = ptr + y*RLSW.framebuffer.width + RLSW.scMin[0]; for (int x = 0; x < w; x++, row++) { - for (int i = 0; i < SW_COLOR_PACK_COMP; i++) row->color[i] = value[i]; + for (int i = 0; i < SW_COLOR_PACK_COMP; i++) row->color[i] = color[i]; } } } @@ -1331,25 +1320,13 @@ static inline void sw_framebuffer_fill_color(sw_pixel_t *ptr, int size, const fl { for (int i = 0; i < size; i++, ptr++) { - for (int j = 0; j < SW_COLOR_PACK_COMP; j++) ptr->color[j] = value[j]; + for (int j = 0; j < SW_COLOR_PACK_COMP; j++) ptr->color[j] = color[j]; } } } -static inline void sw_framebuffer_fill_depth(sw_pixel_t *ptr, int size, float depth) +static inline void sw_framebuffer_fill_depth(sw_pixel_t *ptr, int size, const SW_DEPTH_TYPE depth[SW_DEPTH_PACK_COMP]) { -#if SW_DEPTH_IS_PACKED - SW_DEPTH_TYPE value[1] = { - SW_PACK_DEPTH(depth) - }; -#else - SW_DEPTH_TYPE value[3] = { - SW_PACK_DEPTH_0(depth), - SW_PACK_DEPTH_1(depth), - SW_PACK_DEPTH_2(depth) - }; -#endif - if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { int w = RLSW.scMax[0] - RLSW.scMin[0] + 1; @@ -1358,7 +1335,7 @@ static inline void sw_framebuffer_fill_depth(sw_pixel_t *ptr, int size, float de sw_pixel_t *row = ptr + y*RLSW.framebuffer.width + RLSW.scMin[0]; for (int x = 0; x < w; x++, row++) { - for (int i = 0; i < SW_DEPTH_PACK_COMP; i++) row->depth[i] = value[i]; + for (int i = 0; i < SW_DEPTH_PACK_COMP; i++) row->depth[i] = depth[i]; } } } @@ -1366,32 +1343,13 @@ static inline void sw_framebuffer_fill_depth(sw_pixel_t *ptr, int size, float de { for (int i = 0; i < size; i++, ptr++) { - for (int j = 0; j < SW_DEPTH_PACK_COMP; j++) ptr->depth[j] = value[j]; + for (int j = 0; j < SW_DEPTH_PACK_COMP; j++) ptr->depth[j] = depth[j]; } } } -static inline void sw_framebuffer_fill(sw_pixel_t *ptr, int size, float color[4], float depth) +static inline void sw_framebuffer_fill(sw_pixel_t *ptr, int size, sw_pixel_t value) { - sw_pixel_t value = { 0 }; - -#if SW_COLOR_IS_PACKED - value.color[0] = SW_PACK_COLOR(color[0], color[1], color[2]); -#else - value.color[0] = sw_clampi(color[0]*255, 0, 255); - value.color[1] = sw_clampi(color[1]*255, 0, 255); - value.color[2] = sw_clampi(color[2]*255, 0, 255); - value.color[3] = sw_clampi(color[3]*255, 0, 255); -#endif - -#if SW_DEPTH_IS_PACKED - value.depth[0] = SW_PACK_DEPTH(depth); -#else - value.depth[0] = SW_PACK_DEPTH_0(depth); - value.depth[1] = SW_PACK_DEPTH_1(depth); - value.depth[2] = SW_PACK_DEPTH_2(depth); -#endif - if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { int w = RLSW.scMax[0] - RLSW.scMin[0] + 1; @@ -1411,7 +1369,7 @@ static inline void sw_framebuffer_fill(sw_pixel_t *ptr, int size, float color[4] static inline void sw_framebuffer_copy_to_##name(int x, int y, int w, int h, DST_PTR_T *dst) \ { \ const int stride = RLSW.framebuffer.width; \ - const sw_pixel_t *src = RLSW.framebuffer.pixels + (y*stride + x); \ + const sw_pixel_t *src = RLSW.framebuffer.pixels + (y*stride + x); \ \ for (int iy = 0; iy < h; iy++) { \ const sw_pixel_t *line = src; \ @@ -3502,11 +3460,9 @@ bool swInit(int w, int h) RLSW.freeTextureIds = (uint32_t *)SW_MALLOC(SW_MAX_TEXTURES*sizeof(uint32_t)); if (RLSW.loadedTextures == NULL) { swClose(); return false; } - RLSW.clearColor[0] = 0.0f; - RLSW.clearColor[1] = 0.0f; - RLSW.clearColor[2] = 0.0f; - RLSW.clearColor[3] = 1.0f; - RLSW.clearDepth = 1.0f; + const float clearColor[4] = { 0.0f, 0.0f, 0.0f, 1.0f }; + sw_framebuffer_write_color(&RLSW.clearValue, clearColor); + sw_framebuffer_write_depth(&RLSW.clearValue, 1.0f); RLSW.currentMatrixMode = SW_MODELVIEW; RLSW.currentMatrix = &RLSW.stackModelview[0]; @@ -3716,10 +3672,11 @@ void swGetFloatv(SWget name, float *v) { case SW_COLOR_CLEAR_VALUE: { - v[0] = RLSW.clearColor[0]; - v[1] = RLSW.clearColor[1]; - v[2] = RLSW.clearColor[2]; - v[3] = RLSW.clearColor[3]; + sw_framebuffer_read_color(v, &RLSW.clearValue); + } break; + case SW_DEPTH_CLEAR_VALUE: + { + v[0] = sw_framebuffer_read_depth(&RLSW.clearValue); } break; case SW_CURRENT_COLOR: { @@ -3827,10 +3784,13 @@ void swScissor(int x, int y, int width, int height) void swClearColor(float r, float g, float b, float a) { - RLSW.clearColor[0] = r; - RLSW.clearColor[1] = g; - RLSW.clearColor[2] = b; - RLSW.clearColor[3] = a; + float v[4] = { r, g, b, a }; + sw_framebuffer_write_color(&RLSW.clearValue, v); +} + +void swClearDepth(float depth) +{ + sw_framebuffer_write_depth(&RLSW.clearValue, depth); } void swClear(uint32_t bitmask) @@ -3839,15 +3799,15 @@ void swClear(uint32_t bitmask) if ((bitmask & (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) == (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) { - sw_framebuffer_fill(RLSW.framebuffer.pixels, size, RLSW.clearColor, RLSW.clearDepth); + sw_framebuffer_fill(RLSW.framebuffer.pixels, size, RLSW.clearValue); } else if (bitmask & (SW_COLOR_BUFFER_BIT)) { - sw_framebuffer_fill_color(RLSW.framebuffer.pixels, size, RLSW.clearColor); + sw_framebuffer_fill_color(RLSW.framebuffer.pixels, size, RLSW.clearValue.color); } else if (bitmask & SW_DEPTH_BUFFER_BIT) { - sw_framebuffer_fill_depth(RLSW.framebuffer.pixels, size, RLSW.clearDepth); + sw_framebuffer_fill_depth(RLSW.framebuffer.pixels, size, RLSW.clearValue.depth); } } From 3cddffd5b5afc840b5fc147a9334777d4328051d Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Mon, 27 Oct 2025 23:39:38 +0100 Subject: [PATCH 5/8] copy/blit fast path --- src/external/rlsw.h | 69 ++++++++++++++++++++++++++++++++------------- 1 file changed, 50 insertions(+), 19 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 4518c02d4221..3d35ecfb7f55 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -1365,6 +1365,31 @@ static inline void sw_framebuffer_fill(sw_pixel_t *ptr, int size, sw_pixel_t val } } +static inline void sw_framebuffer_copy_fast(void* dst) +{ + int size = RLSW.framebuffer.width * RLSW.framebuffer.height; + const sw_pixel_t *pixels = RLSW.framebuffer.pixels; + +#if SW_COLOR_BUFFER_BITS == 8 + uint8_t *dst8 = (uint8_t*)dst; + for (int i = 0; i < size; i++) dst8[i] = pixels[i].color[0]; +#elif SW_COLOR_BUFFER_BITS == 16 + uint16_t *dst16 = (uint16_t*)dst; + for (int i = 0; i < size; i++) dst16[i] = *(uint16_t*)pixels[i].color; +#else // 32 bits + uint32_t *dst32 = (uint32_t*)dst; + #if SW_GL_FRAMEBUFFER_COPY_BGRA + for (int i = 0; i < size; i++) + { + const uint8_t *c = pixels[i].color; + dst32[i] = (uint32_t)c[2] | ((uint32_t)c[1] << 8) | ((uint32_t)c[0] << 16) | ((uint32_t)c[3] << 24); + } + #else // RGBA + for (int i = 0; i < size; i++) dst32[i] = *(uint32_t*)pixels[i].color; + #endif +#endif +} + #define DEFINE_FRAMEBUFFER_COPY_BEGIN(name, DST_PTR_T) \ static inline void sw_framebuffer_copy_to_##name(int x, int y, int w, int h, DST_PTR_T *dst) \ { \ @@ -3545,17 +3570,8 @@ void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, { sw_pixelformat_t pFormat = (sw_pixelformat_t)sw_get_pixel_format(format, type); - if (w <= 0) - { - RLSW.errCode = SW_INVALID_VALUE; - return; - } - - if (h <= 0) - { - RLSW.errCode = SW_INVALID_VALUE; - return; - } + if (w <= 0) { RLSW.errCode = SW_INVALID_VALUE; return; } + if (h <= 0) { RLSW.errCode = SW_INVALID_VALUE; return; } if (w > RLSW.framebuffer.width) w = RLSW.framebuffer.width; if (h > RLSW.framebuffer.height) h = RLSW.framebuffer.height; @@ -3563,6 +3579,25 @@ void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, x = sw_clampi(x, 0, w); y = sw_clampi(y, 0, h); + if (x >= w || y >= h) return; + + if (x == 0 && y == 0 && w == RLSW.framebuffer.width && h == RLSW.framebuffer.height) + { + #if SW_COLOR_BUFFER_BITS == 32 + if (pFormat == SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8) + { + sw_framebuffer_copy_fast(pixels); + return; + } + #elif SW_COLOR_BUFFER_BITS == 16 + if (pFormat == SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5) + { + sw_framebuffer_copy_fast(pixels); + return; + } + #endif + } + switch (pFormat) { case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: sw_framebuffer_copy_to_GRAYALPHA(x, y, w, h, (uint8_t *)pixels); break; @@ -3589,17 +3624,13 @@ void swBlitFramebuffer(int xDst, int yDst, int wDst, int hDst, int xSrc, int ySr { sw_pixelformat_t pFormat = (sw_pixelformat_t)sw_get_pixel_format(format, type); - if (wSrc <= 0) + if (xDst == xSrc && yDst == ySrc && wDst == wSrc && hDst == hSrc) { - RLSW.errCode = SW_INVALID_VALUE; - return; + swCopyFramebuffer(xSrc, ySrc, wSrc, hSrc, format, type, pixels); } - if (hSrc <= 0) - { - RLSW.errCode = SW_INVALID_VALUE; - return; - } + if (wSrc <= 0) { RLSW.errCode = SW_INVALID_VALUE; return; } + if (hSrc <= 0) { RLSW.errCode = SW_INVALID_VALUE; return; } if (wSrc > RLSW.framebuffer.width) wSrc = RLSW.framebuffer.width; if (hSrc > RLSW.framebuffer.height) hSrc = RLSW.framebuffer.height; From 7cf53cc61520cf332bebd7dad464e257f2545f9f Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Tue, 28 Oct 2025 01:10:12 +0100 Subject: [PATCH 6/8] better simd read/write --- src/external/rlsw.h | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 3d35ecfb7f55..aafbba8fed12 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -610,6 +610,7 @@ SWAPI void swBindTexture(uint32_t id); #define RLSW_IMPLEMENTATION #if defined(RLSW_IMPLEMENTATION) +#include #include #include #include // Required for: floorf(), fabsf() @@ -1081,30 +1082,24 @@ static inline void sw_float_to_unorm8_simd(uint8_t dst[4], const float src[4]) #if defined(SW_HAS_NEON) float32x4_t values = vld1q_f32(src); float32x4_t scaled = vmulq_n_f32(values, 255.0f); - scaled = vminq_f32(vmaxq_f32(scaled, vdupq_n_f32(0.0f)), vdupq_n_f32(255.0f)); - uint32x4_t clamped = vcvtq_u32_f32(scaled); - - uint16x4_t narrow16 = vmovn_u32(clamped); - uint8x8_t narrow8 = vmovn_u16(vcombine_u16(narrow16, narrow16)); - - vst1_lane_u32((uint32_t*)dst, vreinterpret_u32_u8(narrow8), 0); + int32x4_t clamped_s32 = vcvtq_s32_f32(scaled); // f32 -> s32 (truncated) + int16x4_t narrow16_s = vqmovn_s32(clamped_s32); + int16x8_t combined16_s = vcombine_s16(narrow16_s, narrow16_s); + uint8x8_t narrow8_u = vqmovun_s16(combined16_s); + vst1_lane_u32((uint32_t*)dst, vreinterpret_u32_u8(narrow8_u), 0); #elif defined(SW_HAS_SSE41) __m128 values = _mm_loadu_ps(src); __m128 scaled = _mm_mul_ps(values, _mm_set1_ps(255.0f)); - scaled = _mm_max_ps(_mm_min_ps(scaled, _mm_set1_ps(255.0f)), _mm_setzero_ps()); - __m128i clamped = _mm_cvtps_epi32(scaled); - - clamped = _mm_packus_epi32(clamped, clamped); - clamped = _mm_packus_epi16(clamped, clamped); + __m128i clamped = _mm_cvtps_epi32(scaled); // f32 -> s32 (truncated) + clamped = _mm_packus_epi32(clamped, clamped); // s32 -> u16 (saturated < 0 à 0) + clamped = _mm_packus_epi16(clamped, clamped); // u16 -> u8 (saturated > 255 à 255) *(uint32_t*)dst = _mm_cvtsi128_si32(clamped); #elif defined(SW_HAS_SSE2) __m128 values = _mm_loadu_ps(src); __m128 scaled = _mm_mul_ps(values, _mm_set1_ps(255.0f)); - scaled = _mm_max_ps(_mm_min_ps(scaled, _mm_set1_ps(255.0f)), _mm_setzero_ps()); - __m128i clamped = _mm_cvtps_epi32(scaled); - - clamped = _mm_packs_epi32(clamped, clamped); - clamped = _mm_packus_epi16(clamped, clamped); + __m128i clamped = _mm_cvtps_epi32(scaled); // f32 -> s32 (truncated) + clamped = _mm_packs_epi32(clamped, clamped); // s32 -> s16 (saturated) + clamped = _mm_packus_epi16(clamped, clamped); // s16 -> u8 (saturated < 0 à 0) *(uint32_t*)dst = _mm_cvtsi128_si32(clamped); #else for (int i = 0; i < 4; i++) @@ -1112,7 +1107,7 @@ static inline void sw_float_to_unorm8_simd(uint8_t dst[4], const float src[4]) float val = src[i]*255.0f; val = (val > 255.0f)? 255.0f : val; val = (val < 0.0f)? 0.0f : val; - dst[i] = (uint8_t)(val + 0.5f); + dst[i] = (uint8_t)val; } #endif } @@ -1120,13 +1115,9 @@ static inline void sw_float_to_unorm8_simd(uint8_t dst[4], const float src[4]) static inline void sw_float_from_unorm8_simd(float dst[4], const uint8_t src[4]) { #if defined(SW_HAS_NEON) - uint32x4_t bytes = vdupq_n_u32(0); - bytes = vld1q_lane_u32((const uint32_t*)src, bytes, 0); - - uint8x8_t bytes8 = vreinterpret_u8_u32(vget_low_u32(bytes)); + uint8x8_t bytes8 = vld1_u8(src); //< Read 8 bytes, faster, but let's hope we're not at the end of the page (unlikely)... uint16x8_t bytes16 = vmovl_u8(bytes8); uint32x4_t ints = vmovl_u16(vget_low_u16(bytes16)); - float32x4_t floats = vcvtq_f32_u32(ints); floats = vmulq_n_f32(floats, SW_INV_255); vst1q_f32(dst, floats); From 50a22daf8697b05445c163f30af873074adcd390 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Tue, 28 Oct 2025 01:33:41 +0100 Subject: [PATCH 7/8] framebuffer alignment --- src/external/rlsw.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index aafbba8fed12..aad587f57d0e 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -813,6 +813,7 @@ typedef struct { } sw_texture_t; typedef struct { + alignas(SW_COLOR_PIXEL_SIZE) SW_COLOR_TYPE color[SW_COLOR_PACK_COMP]; SW_DEPTH_TYPE depth[SW_DEPTH_PACK_COMP]; } sw_pixel_t; @@ -1358,7 +1359,7 @@ static inline void sw_framebuffer_fill(sw_pixel_t *ptr, int size, sw_pixel_t val static inline void sw_framebuffer_copy_fast(void* dst) { - int size = RLSW.framebuffer.width * RLSW.framebuffer.height; + int size = RLSW.framebuffer.width*RLSW.framebuffer.height; const sw_pixel_t *pixels = RLSW.framebuffer.pixels; #if SW_COLOR_BUFFER_BITS == 8 From b17c5396a6cb7faac97274e23d68d93958077d18 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Tue, 28 Oct 2025 02:29:30 +0100 Subject: [PATCH 8/8] fix 'typo' my french slipped out --- src/external/rlsw.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index aad587f57d0e..ba2716790de0 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -1092,15 +1092,15 @@ static inline void sw_float_to_unorm8_simd(uint8_t dst[4], const float src[4]) __m128 values = _mm_loadu_ps(src); __m128 scaled = _mm_mul_ps(values, _mm_set1_ps(255.0f)); __m128i clamped = _mm_cvtps_epi32(scaled); // f32 -> s32 (truncated) - clamped = _mm_packus_epi32(clamped, clamped); // s32 -> u16 (saturated < 0 à 0) - clamped = _mm_packus_epi16(clamped, clamped); // u16 -> u8 (saturated > 255 à 255) + clamped = _mm_packus_epi32(clamped, clamped); // s32 -> u16 (saturated < 0 to 0) + clamped = _mm_packus_epi16(clamped, clamped); // u16 -> u8 (saturated > 255 to 255) *(uint32_t*)dst = _mm_cvtsi128_si32(clamped); #elif defined(SW_HAS_SSE2) __m128 values = _mm_loadu_ps(src); __m128 scaled = _mm_mul_ps(values, _mm_set1_ps(255.0f)); __m128i clamped = _mm_cvtps_epi32(scaled); // f32 -> s32 (truncated) clamped = _mm_packs_epi32(clamped, clamped); // s32 -> s16 (saturated) - clamped = _mm_packus_epi16(clamped, clamped); // s16 -> u8 (saturated < 0 à 0) + clamped = _mm_packus_epi16(clamped, clamped); // s16 -> u8 (saturated < 0 to 0) *(uint32_t*)dst = _mm_cvtsi128_si32(clamped); #else for (int i = 0; i < 4; i++)