diff --git a/src_c/collisions.c b/src_c/collisions.c index 771627bd..8cc29ed3 100644 --- a/src_c/collisions.c +++ b/src_c/collisions.c @@ -201,8 +201,10 @@ pgIntersection_LineRect(pgLineBase *line, SDL_Rect *rect, double *X, double *Y, double *T) { #if AVX2_IS_SUPPORTED - return pgIntersection_LineRect_avx2(line, rect, X, Y, T); -#else + if (pg_HasAVX2()) + return pgIntersection_LineRect_avx2(line, rect, X, Y, T); +#endif /* ~__AVX2__ */ + double x = (double)rect->x; double y = (double)rect->y; double w = (double)rect->w; @@ -237,15 +239,16 @@ pgIntersection_LineRect(pgLineBase *line, SDL_Rect *rect, double *X, double *Y, } return ret; -#endif /* ~__AVX2__ */ } static int pgCollision_RectLine(SDL_Rect *rect, pgLineBase *line) { #if AVX2_IS_SUPPORTED - return pgCollision_RectLine_avx2(rect, line); -#else + if (pg_HasAVX2()) + return pgCollision_RectLine_avx2(rect, line); +#endif /* ~__AVX2__ */ + double x = (double)rect->x; double y = (double)rect->y; double w = (double)rect->w; @@ -258,7 +261,6 @@ pgCollision_RectLine(SDL_Rect *rect, pgLineBase *line) return pgCollision_LineLine(line, &a) || pgCollision_LineLine(line, &b) || pgCollision_LineLine(line, &c) || pgCollision_LineLine(line, &d); -#endif /* ~__AVX2__ */ } static int @@ -361,8 +363,10 @@ static int pgRaycast_LineRect(pgLineBase *line, SDL_Rect *rect, double max_t, double *T) { #if AVX2_IS_SUPPORTED - return pgRaycast_LineRect_avx2(line, rect, max_t, T); -#else + if (pg_HasAVX2()) + return pgRaycast_LineRect_avx2(line, rect, max_t, T); +#endif /* ~__AVX2__ */ + double x = (double)rect->x; double y = (double)rect->y; double w = (double)rect->w; @@ -392,7 +396,6 @@ pgRaycast_LineRect(pgLineBase *line, SDL_Rect *rect, double max_t, double *T) } return ret; -#endif /* ~__AVX2__ */ } static int diff --git a/src_c/simd_collisions.h b/src_c/simd_collisions.h index a9226904..f3729459 100644 --- a/src_c/simd_collisions.h +++ b/src_c/simd_collisions.h @@ -20,6 +20,9 @@ #endif /* ~defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \ !defined(SDL_DISABLE_IMMINTRIN_H) */ +PG_FORCEINLINE static int +pg_HasAVX2(void); + #ifdef AVX2_IS_SUPPORTED PG_FORCEINLINE static int pgIntersection_LineRect_avx2(pgLineBase *line, SDL_Rect *rect, double *X, diff --git a/src_c/simd_collisions_avx2.c b/src_c/simd_collisions_avx2.c index 035f25ca..f203d064 100644 --- a/src_c/simd_collisions_avx2.c +++ b/src_c/simd_collisions_avx2.c @@ -1,10 +1,59 @@ #include "include/pygame.h" #include "simd_collisions.h" +#include + +#if defined(_MSC_VER) +#include +#include +#endif #if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H) #include #endif /* defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H) */ +PG_FORCEINLINE static int +pg_HasAVX2(void) +{ + // The check is cached. + static int has_avx2 = -1; + if (has_avx2 != -1) + return has_avx2; + +#if AVX2_IS_SUPPORTED +#if defined(__GNUC__) + // Reference: + // https://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/X86-Built-in-Functions.html + has_avx2 = __builtin_cpu_supports("avx2"); +#elif defined(_MSC_VER) + // Reference: + // https://learn.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex?view=msvc-170 + + int cpu_info[4]; + __cpuid(cpu_info, 0); + + int info_n = cpu_info[0]; + int *data = (int *)_alloca(sizeof(int) * info_n * 4); + // int data[info_n][4]; + + for (int i = 0; i <= info_n; i++) { + __cpuidex(cpu_info, i, 0); + + // memcpy(&data[i], cpu_info, sizeof(int) * 4); + memcpy(data + i * 4, cpu_info, sizeof(int) * 4); + } + + // has_avx2 = data[7][1] >> 5 & 1; + has_avx2 = data[7 * 4 + 1] >> 5 & 1; +#else + has_avx2 = 0; +#endif +#else + has_avx2 = 0; +#endif /* ~__AVX2__ */ + + return has_avx2; +} + #if AVX2_IS_SUPPORTED PG_FORCEINLINE static int pgIntersection_LineRect_avx2(pgLineBase *line, SDL_Rect *rect, double *X, @@ -97,9 +146,7 @@ pgIntersection_LineRect_avx2(pgLineBase *line, SDL_Rect *rect, double *X, return 1; } -#endif /* ~AVX2_IS_SUPPORTED */ -#if AVX2_IS_SUPPORTED PG_FORCEINLINE static int pgCollision_RectLine_avx2(SDL_Rect *rect, pgLineBase *line) { @@ -168,9 +215,7 @@ pgCollision_RectLine_avx2(SDL_Rect *rect, pgLineBase *line) // if no lines touch the rectangle then this will be false return _mm256_movemask_pd(t_u_256d) != 0x0; } -#endif /* ~AVX2_IS_SUPPORTED */ -#if AVX2_IS_SUPPORTED PG_FORCEINLINE static int pgRaycast_LineRect_avx2(pgLineBase *line, SDL_Rect *rect, double max_t, double *T) @@ -255,4 +300,4 @@ pgRaycast_LineRect_avx2(pgLineBase *line, SDL_Rect *rect, double max_t, return 1; } -#endif /* ~AVX2_IS_SUPPORTED */ \ No newline at end of file +#endif /* ~AVX2_IS_SUPPORTED */