Skip to content
Permalink
Browse files

Fixed bug in AVX detection and added AVX2 detection

  • Loading branch information
slouken committed Jul 12, 2014
1 parent f1ca7bd commit eb1c6044b2d5b657f9fbffff720c218db35f439f
Showing with 89 additions and 16 deletions.
  1. +5 −0 include/SDL_cpuinfo.h
  2. +82 −16 src/cpuinfo/SDL_cpuinfo.c
  3. +1 −0 src/dynapi/SDL_dynapi_overrides.h
  4. +1 −0 src/dynapi/SDL_dynapi_procs.h
@@ -139,6 +139,11 @@ extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE42(void);
*/
extern DECLSPEC SDL_bool SDLCALL SDL_HasAVX(void);

/**
* This function returns true if the CPU has AVX2 features.
*/
extern DECLSPEC SDL_bool SDLCALL SDL_HasAVX2(void);

/**
* This function returns the amount of RAM configured in the system, in MB.
*/
@@ -60,6 +60,7 @@
#define CPU_HAS_SSE41 0x00000100
#define CPU_HAS_SSE42 0x00000200
#define CPU_HAS_AVX 0x00000400
#define CPU_HAS_AVX2 0x00000800

#if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
/* This is the brute force way of detecting instruction sets...
@@ -73,7 +74,7 @@ illegal_instruction(int sig)
}
#endif /* HAVE_SETJMP */

static SDL_INLINE int
static int
CPU_haveCPUID(void)
{
int has_CPUID = 0;
@@ -172,6 +173,7 @@ CPU_haveCPUID(void)
#define cpuid(func, a, b, c, d) \
__asm__ __volatile__ ( \
" pushl %%ebx \n" \
" xorl %%ecx,%%ecx \n" \
" cpuid \n" \
" movl %%ebx, %%esi \n" \
" popl %%ebx \n" : \
@@ -180,6 +182,7 @@ CPU_haveCPUID(void)
#define cpuid(func, a, b, c, d) \
__asm__ __volatile__ ( \
" pushq %%rbx \n" \
" xorq %%rcx,%%rcx \n" \
" cpuid \n" \
" movq %%rbx, %%rsi \n" \
" popq %%rbx \n" : \
@@ -188,6 +191,7 @@ CPU_haveCPUID(void)
#define cpuid(func, a, b, c, d) \
__asm { \
__asm mov eax, func \
__asm xor ecx, ecx \
__asm cpuid \
__asm mov a, eax \
__asm mov b, ebx \
@@ -209,7 +213,7 @@ CPU_haveCPUID(void)
a = b = c = d = 0
#endif

static SDL_INLINE int
static int
CPU_getCPUIDFeatures(void)
{
int features = 0;
@@ -223,7 +227,41 @@ CPU_getCPUIDFeatures(void)
return features;
}

static SDL_INLINE int
static SDL_bool
CPU_OSSavesYMM(void)
{
int a, b, c, d;

/* Check to make sure we can call xgetbv */
cpuid(0, a, b, c, d);
if (a < 1) {
return SDL_FALSE;
}
cpuid(1, a, b, c, d);
if (!(c & 0x08000000)) {
return SDL_FALSE;
}

/* Call xgetbv to see if YMM register state is saved */
a = 0;
#if defined(__GNUC__) && (defined(i386) || defined(__x86_64__))
asm(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
#elif defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
a = (int)_xgetbv(0);
#elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
__asm
{
xor ecx, ecx
_asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
mov a, xcr0
}
#else
#error Need xgetbv implementation!
#endif
return ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
}

static int
CPU_haveRDTSC(void)
{
if (CPU_haveCPUID()) {
@@ -232,7 +270,7 @@ CPU_haveRDTSC(void)
return 0;
}

static SDL_INLINE int
static int
CPU_haveAltiVec(void)
{
volatile int altivec = 0;
@@ -259,7 +297,7 @@ CPU_haveAltiVec(void)
return altivec;
}

static SDL_INLINE int
static int
CPU_haveMMX(void)
{
if (CPU_haveCPUID()) {
@@ -268,7 +306,7 @@ CPU_haveMMX(void)
return 0;
}

static SDL_INLINE int
static int
CPU_have3DNow(void)
{
if (CPU_haveCPUID()) {
@@ -283,7 +321,7 @@ CPU_have3DNow(void)
return 0;
}

static SDL_INLINE int
static int
CPU_haveSSE(void)
{
if (CPU_haveCPUID()) {
@@ -292,7 +330,7 @@ CPU_haveSSE(void)
return 0;
}

static SDL_INLINE int
static int
CPU_haveSSE2(void)
{
if (CPU_haveCPUID()) {
@@ -301,7 +339,7 @@ CPU_haveSSE2(void)
return 0;
}

static SDL_INLINE int
static int
CPU_haveSSE3(void)
{
if (CPU_haveCPUID()) {
@@ -316,13 +354,13 @@ CPU_haveSSE3(void)
return 0;
}

static SDL_INLINE int
static int
CPU_haveSSE41(void)
{
if (CPU_haveCPUID()) {
int a, b, c, d;

cpuid(1, a, b, c, d);
cpuid(0, a, b, c, d);
if (a >= 1) {
cpuid(1, a, b, c, d);
return (c & 0x00080000);
@@ -331,13 +369,13 @@ CPU_haveSSE41(void)
return 0;
}

static SDL_INLINE int
static int
CPU_haveSSE42(void)
{
if (CPU_haveCPUID()) {
int a, b, c, d;

cpuid(1, a, b, c, d);
cpuid(0, a, b, c, d);
if (a >= 1) {
cpuid(1, a, b, c, d);
return (c & 0x00100000);
@@ -346,13 +384,13 @@ CPU_haveSSE42(void)
return 0;
}

static SDL_INLINE int
static int
CPU_haveAVX(void)
{
if (CPU_haveCPUID()) {
if (CPU_haveCPUID() && CPU_OSSavesYMM()) {
int a, b, c, d;

cpuid(1, a, b, c, d);
cpuid(0, a, b, c, d);
if (a >= 1) {
cpuid(1, a, b, c, d);
return (c & 0x10000000);
@@ -361,6 +399,21 @@ CPU_haveAVX(void)
return 0;
}

static int
CPU_haveAVX2(void)
{
if (CPU_haveCPUID() && CPU_OSSavesYMM()) {
int a, b, c, d;

cpuid(0, a, b, c, d);
if (a >= 7) {
cpuid(7, a, b, c, d);
return (b & 0x00000020);
}
}
return 0;
}

static int SDL_CPUCount = 0;

int
@@ -560,6 +613,9 @@ SDL_GetCPUFeatures(void)
if (CPU_haveAVX()) {
SDL_CPUFeatures |= CPU_HAS_AVX;
}
if (CPU_haveAVX2()) {
SDL_CPUFeatures |= CPU_HAS_AVX2;
}
}
return SDL_CPUFeatures;
}
@@ -654,6 +710,15 @@ SDL_HasAVX(void)
return SDL_FALSE;
}

SDL_bool
SDL_HasAVX2(void)
{
if (SDL_GetCPUFeatures() & CPU_HAS_AVX2) {
return SDL_TRUE;
}
return SDL_FALSE;
}

static int SDL_SystemRAM = 0;

int
@@ -720,6 +785,7 @@ main()
printf("SSE4.1: %d\n", SDL_HasSSE41());
printf("SSE4.2: %d\n", SDL_HasSSE42());
printf("AVX: %d\n", SDL_HasAVX());
printf("AVX2: %d\n", SDL_HasAVX2());
printf("RAM: %d MB\n", SDL_GetSystemRAM());
return 0;
}
@@ -587,3 +587,4 @@
#define SDL_CaptureMouse SDL_CaptureMouse_REAL
#define SDL_SetWindowHitTest SDL_SetWindowHitTest_REAL
#define SDL_GetGlobalMouseState SDL_GetGlobalMouseState_REAL
#define SDL_HasAVX2 SDL_HasAVX2_REAL
@@ -619,3 +619,4 @@ SDL_DYNAPI_PROC(float,SDL_tanf,(float a),(a),return)
SDL_DYNAPI_PROC(int,SDL_CaptureMouse,(SDL_bool a),(a),return)
SDL_DYNAPI_PROC(int,SDL_SetWindowHitTest,(SDL_Window *a, SDL_HitTest b, void *c),(a,b,c),return)
SDL_DYNAPI_PROC(Uint32,SDL_GetGlobalMouseState,(int *a, int *b),(a,b),return)
SDL_DYNAPI_PROC(SDL_bool,SDL_HasAVX2,(void),(),return)

0 comments on commit eb1c604

Please sign in to comment.