diff --git a/clang/lib/Headers/ammintrin.h b/clang/lib/Headers/ammintrin.h index 1af2096595cac..f549ab80d9465 100644 --- a/clang/lib/Headers/ammintrin.h +++ b/clang/lib/Headers/ammintrin.h @@ -155,9 +155,9 @@ _mm_insert_si64(__m128i __x, __m128i __y) /// \param __a /// The 64-bit double-precision floating-point register value to be stored. static __inline__ void __DEFAULT_FN_ATTRS -_mm_stream_sd(double *__p, __m128d __a) +_mm_stream_sd(void *__p, __m128d __a) { - __builtin_ia32_movntsd(__p, (__v2df)__a); + __builtin_ia32_movntsd((double *)__p, (__v2df)__a); } /// Stores a 32-bit single-precision floating-point value in a 32-bit @@ -173,9 +173,9 @@ _mm_stream_sd(double *__p, __m128d __a) /// \param __a /// The 32-bit single-precision floating-point register value to be stored. static __inline__ void __DEFAULT_FN_ATTRS -_mm_stream_ss(float *__p, __m128 __a) +_mm_stream_ss(void *__p, __m128 __a) { - __builtin_ia32_movntss(__p, (__v4sf)__a); + __builtin_ia32_movntss((float *)__p, (__v4sf)__a); } #undef __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index c45006193eddc..9196c8c7d24f7 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -2979,7 +2979,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b) /// A pointer to the 32-byte aligned memory containing the vector to load. /// \returns A 256-bit integer vector loaded from memory. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_stream_load_si256(__m256i const *__V) +_mm256_stream_load_si256(const void *__V) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 94fac5e6c9da4..b796bb773ec11 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -3563,7 +3563,7 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a) /// \param __b /// A 256-bit integer vector containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_si256(__m256i *__a, __m256i __b) +_mm256_stream_si256(void *__a, __m256i __b) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a); @@ -3583,7 +3583,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b) /// \param __b /// A 256-bit vector of [4 x double] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_pd(double *__a, __m256d __b) +_mm256_stream_pd(void *__a, __m256d __b) { typedef __v4df __v4df_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a); @@ -3604,7 +3604,7 @@ _mm256_stream_pd(double *__a, __m256d __b) /// \param __a /// A 256-bit vector of [8 x float] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_ps(float *__p, __m256 __a) +_mm256_stream_ps(void *__p, __m256 __a) { typedef __v8sf __v8sf_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p); diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 064d974936598..8de2864b11065 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -3945,7 +3945,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, /// A pointer to the 128-bit aligned memory location used to store the value. /// \param __a /// A vector of [2 x double] containing the 64-bit values to be stored. -static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p, __m128d __a) { __builtin_nontemporal_store((__v2df)__a, (__v2df *)__p); } @@ -3963,7 +3963,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, /// A pointer to the 128-bit aligned memory location used to store the value. /// \param __a /// A 128-bit integer vector containing the values to be stored. -static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p, __m128i __a) { __builtin_nontemporal_store((__v2di)__a, (__v2di *)__p); } @@ -3983,8 +3983,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, /// A 32-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) - _mm_stream_si32(int *__p, int __a) { - __builtin_ia32_movnti(__p, __a); + _mm_stream_si32(void *__p, int __a) { + __builtin_ia32_movnti((int *)__p, __a); } #ifdef __x86_64__ @@ -4003,8 +4003,8 @@ static __inline__ void /// A 64-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) - _mm_stream_si64(long long *__p, long long __a) { - __builtin_ia32_movnti64(__p, __a); + _mm_stream_si64(void *__p, long long __a) { + __builtin_ia32_movnti64((long long *)__p, __a); } #endif diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 16d8855a1c0b5..feb61bec3b487 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -645,7 +645,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1, /// \returns A 128-bit integer vector containing the data stored at the /// specified memory location. static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_stream_load_si128(__m128i const *__V) { +_mm_stream_load_si128(const void *__V) { return (__m128i)__builtin_nontemporal_load((const __v2di *)__V); } diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 80aa2a817f6af..2d1d33c69f0bb 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2121,9 +2121,9 @@ _mm_storer_ps(float *__p, __m128 __a) /// \param __a /// A 64-bit integer containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS_MMX -_mm_stream_pi(__m64 *__p, __m64 __a) +_mm_stream_pi(void *__p, __m64 __a) { - __builtin_ia32_movntq(__p, __a); + __builtin_ia32_movntq((__m64 *)__p, __a); } /// Moves packed float values from a 128-bit vector of [4 x float] to a @@ -2140,7 +2140,7 @@ _mm_stream_pi(__m64 *__p, __m64 __a) /// \param __a /// A 128-bit vector of [4 x float] containing the values to be moved. static __inline__ void __DEFAULT_FN_ATTRS -_mm_stream_ps(float *__p, __m128 __a) +_mm_stream_ps(void *__p, __m128 __a) { __builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p); } diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index b68d192051b9b..9178ecaf3f8fe 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1940,18 +1940,36 @@ void test_mm256_stream_pd(double* A, __m256d B) { _mm256_stream_pd(A, B); } +void test_mm256_stream_pd_void(void *A, __m256d B) { + // CHECK-LABEL: test_mm256_stream_pd_void + // CHECK: store <4 x double> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal + _mm256_stream_pd(A, B); +} + void test_mm256_stream_ps(float* A, __m256 B) { // CHECK-LABEL: test_mm256_stream_ps // CHECK: store <8 x float> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal _mm256_stream_ps(A, B); } +void test_mm256_stream_ps_void(void *A, __m256 B) { + // CHECK-LABEL: test_mm256_stream_ps_void + // CHECK: store <8 x float> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal + _mm256_stream_ps(A, B); +} + void test_mm256_stream_si256(__m256i* A, __m256i B) { // CHECK-LABEL: test_mm256_stream_si256 // CHECK: store <4 x i64> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal _mm256_stream_si256(A, B); } +void test_mm256_stream_si256_void(void *A, __m256i B) { + // CHECK-LABEL: test_mm256_stream_si256_void + // CHECK: store <4 x i64> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal + _mm256_stream_si256(A, B); +} + __m256d test_mm256_sub_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_sub_pd // CHECK: fsub <4 x double> diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 2750e1b227483..fd72e25afdb45 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -1223,6 +1223,12 @@ __m256i test_mm256_stream_load_si256(__m256i const *a) { return _mm256_stream_load_si256(a); } +__m256i test_mm256_stream_load_si256_void(const void *a) { + // CHECK-LABEL: test_mm256_stream_load_si256_void + // CHECK: load <4 x i64>, ptr %{{.*}}, align 32, !nontemporal + return _mm256_stream_load_si256(a); +} + __m256i test_mm256_sub_epi8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sub_epi8 // CHECK: sub <32 x i8> diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index 663dd3f9341d7..5b5bc301bddc0 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -601,6 +601,12 @@ void test_mm_stream_pi(__m64 *p, __m64 a) { _mm_stream_pi(p, a); } +void test_mm_stream_pi_void(void *p, __m64 a) { + // CHECK-LABEL: test_mm_stream_pi_void + // CHECK: call void @llvm.x86.mmx.movnt.dq + _mm_stream_pi(p, a); +} + __m64 test_mm_sub_pi8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_sub_pi8 // CHECK: call x86_mmx @llvm.x86.mmx.psub.b diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index da40380926d2c..885c82856522d 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -720,6 +720,12 @@ void test_mm_stream_ps(float*A, __m128 B) { _mm_stream_ps(A, B); } +void test_mm_stream_ps_void(void *A, __m128 B) { + // CHECK-LABEL: test_mm_stream_ps_void + // CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal + _mm_stream_ps(A, B); +} + __m128 test_mm_sub_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_sub_ps // CHECK: fsub <4 x float> diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 7c62a128c331f..7165d2791827c 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1488,18 +1488,36 @@ void test_mm_stream_pd(double *A, __m128d B) { _mm_stream_pd(A, B); } +void test_mm_stream_pd_void(void *A, __m128d B) { + // CHECK-LABEL: test_mm_stream_pd_void + // CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal + _mm_stream_pd(A, B); +} + void test_mm_stream_si32(int *A, int B) { // CHECK-LABEL: test_mm_stream_si32 // CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal _mm_stream_si32(A, B); } +void test_mm_stream_si32_void(void *A, int B) { + // CHECK-LABEL: test_mm_stream_si32_void + // CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal + _mm_stream_si32(A, B); +} + #ifdef __x86_64__ void test_mm_stream_si64(long long *A, long long B) { // X64-LABEL: test_mm_stream_si64 // X64: store i64 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal _mm_stream_si64(A, B); } + +void test_mm_stream_si64_void(void *A, long long B) { + // X64-LABEL: test_mm_stream_si64_void + // X64: store i64 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal + _mm_stream_si64(A, B); +} #endif void test_mm_stream_si128(__m128i *A, __m128i B) { @@ -1508,6 +1526,12 @@ void test_mm_stream_si128(__m128i *A, __m128i B) { _mm_stream_si128(A, B); } +void test_mm_stream_si128_void(void *A, __m128i B) { + // CHECK-LABEL: test_mm_stream_si128_void + // CHECK: store <2 x i64> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal + _mm_stream_si128(A, B); +} + __m128i test_mm_sub_epi8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_sub_epi8 // CHECK: sub <16 x i8> diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index fe59cbcaf1938..bfe7a917a8855 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -358,6 +358,12 @@ __m128i test_mm_stream_load_si128(__m128i const *a) { return _mm_stream_load_si128(a); } +__m128i test_mm_stream_load_si128_void(const void *a) { + // CHECK-LABEL: test_mm_stream_load_si128_void + // CHECK: load <2 x i64>, ptr %{{.*}}, align 16, !nontemporal + return _mm_stream_load_si128(a); +} + int test_mm_test_all_ones(__m128i x) { // CHECK-LABEL: test_mm_test_all_ones // CHECK: call i32 @llvm.x86.sse41.ptestc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) diff --git a/clang/test/CodeGen/X86/sse4a-builtins.c b/clang/test/CodeGen/X86/sse4a-builtins.c index 0180f08a3c2e0..5d38a9cde329d 100644 --- a/clang/test/CodeGen/X86/sse4a-builtins.c +++ b/clang/test/CodeGen/X86/sse4a-builtins.c @@ -37,9 +37,23 @@ void test_mm_stream_sd(double *p, __m128d a) { _mm_stream_sd(p, a); } +void test_mm_stream_sd_void(void *p, __m128d a) { + // CHECK-LABEL: test_mm_stream_sd_void + // CHECK: extractelement <2 x double> %{{.*}}, i64 0 + // CHECK: store double %{{.*}}, ptr %{{.*}}, align 1, !nontemporal + _mm_stream_sd(p, a); +} + void test_mm_stream_ss(float *p, __m128 a) { // CHECK-LABEL: test_mm_stream_ss // CHECK: extractelement <4 x float> %{{.*}}, i64 0 // CHECK: store float %{{.*}}, ptr %{{.*}}, align 1, !nontemporal _mm_stream_ss(p, a); } + +void test_mm_stream_s_void(void *p, __m128 a) { + // CHECK-LABEL: test_mm_stream_s_void + // CHECK: extractelement <4 x float> %{{.*}}, i64 0 + // CHECK: store float %{{.*}}, ptr %{{.*}}, align 1, !nontemporal + _mm_stream_ss(p, a); +}