From 4b0c6f6634b62aada5426ea556574717d1095a45 Mon Sep 17 00:00:00 2001 From: Fabian Giesen Date: Tue, 17 Feb 2015 01:21:40 -0800 Subject: [PATCH] stb_image: NEON and SSE2 SIMD detection fixes. This fixes two things. First, the logic to disable SSE2 on GCC unless "-msse2" was not specific enough, and ended up disabling SIMD support on NEON targets entirely. Shuffle the detection logic around to make that bit x86-specific. Second, 32-bit MinGW assumes 16-byte aligned stacks, but this is not in the Windows ABI and hence DLLs and callbacks don't necessarily provide it. This caused a crash. This can be fixed by providing the right command-line option, which we have no control over. As a compromise, disable the SSE2 path on MinGW unless a specific #define explained in the comments is set. That way, we default to safe (never-crashing) behavior unless the user explicitly signals they know what they're doing. --- stb_image.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/stb_image.h b/stb_image.h index c3945c2e25..2af8e3a403 100644 --- a/stb_image.h +++ b/stb_image.h @@ -624,7 +624,12 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #define STBI_FREE(p) free(p) #endif -#if defined(__GNUC__) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) +// x86/x64 detection +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +#define STBI_X86_TARGET +#endif + +#if defined(__GNUC__) && defined(STBI_X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) // gcc doesn't support sse2 intrinsics unless you compile with -msse2, // (but compiling with -msse2 allows the compiler to use SSE2 everywhere; // this is just broken and gcc are jerks for not fixing it properly @@ -632,7 +637,20 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #define STBI_NO_SIMD #endif -#if !defined(STBI_NO_SIMD) && (defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)) +#if defined(__MINGW32__) && defined(STBI_X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD) +// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the +// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant. +// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not +// simultaneously enabling "-mstackrealign". +// +// See https://github.com/nothings/stb/issues/81 for more information. +// +// So default to no SSE2 on 32-bit MinGW. If you've read this far and added +// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2. +#define STBI_NO_SIMD +#endif + +#if !defined(STBI_NO_SIMD) && defined(STBI_X86_TARGET) #define STBI_SSE2 #include