271 changes: 271 additions & 0 deletions clang/lib/Headers/avx512erintrin.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
#endif

#ifndef __AVX512ERINTRIN_H
#define __AVX512ERINTRIN_H

/* exp2a23 */
#define _mm512_exp2a23_round_pd(A, R) \
((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)-1, (int)(R)))

#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(S), (__mmask8)(M), \
(int)(R)))

#define _mm512_maskz_exp2a23_round_pd(M, A, R) \
((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)(M), (int)(R)))

#define _mm512_exp2a23_pd(A) \
_mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)

#define _mm512_mask_exp2a23_pd(S, M, A) \
_mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)

#define _mm512_maskz_exp2a23_pd(M, A) \
_mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)

#define _mm512_exp2a23_round_ps(A, R) \
((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)-1, (int)(R)))

#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
(__v16sf)(__m512)(S), (__mmask16)(M), \
(int)(R)))

#define _mm512_maskz_exp2a23_round_ps(M, A, R) \
((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)(M), (int)(R)))

#define _mm512_exp2a23_ps(A) \
_mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)

#define _mm512_mask_exp2a23_ps(S, M, A) \
_mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)

#define _mm512_maskz_exp2a23_ps(M, A) \
_mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)

/* rsqrt28 */
#define _mm512_rsqrt28_round_pd(A, R) \
((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)-1, (int)(R)))

#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(S), (__mmask8)(M), \
(int)(R)))

#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)(M), (int)(R)))

#define _mm512_rsqrt28_pd(A) \
_mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)

#define _mm512_mask_rsqrt28_pd(S, M, A) \
_mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)

#define _mm512_maskz_rsqrt28_pd(M, A) \
_mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)

#define _mm512_rsqrt28_round_ps(A, R) \
((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)-1, (int)(R)))

#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)(__m512)(S), (__mmask16)(M), \
(int)(R)))

#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)(M), (int)(R)))

#define _mm512_rsqrt28_ps(A) \
_mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)

#define _mm512_mask_rsqrt28_ps(S, M, A) \
_mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)

#define _mm512_maskz_rsqrt28_ps(M, A) \
_mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)

#define _mm_rsqrt28_round_ss(A, B, R) \
((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)-1, (int)(R)))

#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)(__m128)(S), \
(__mmask8)(M), (int)(R)))

#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(M), (int)(R)))

#define _mm_rsqrt28_ss(A, B) \
_mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)

#define _mm_mask_rsqrt28_ss(S, M, A, B) \
_mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)

#define _mm_maskz_rsqrt28_ss(M, A, B) \
_mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)

#define _mm_rsqrt28_round_sd(A, B, R) \
((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)-1, (int)(R)))

#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)(__m128d)(S), \
(__mmask8)(M), (int)(R)))

#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)(M), (int)(R)))

#define _mm_rsqrt28_sd(A, B) \
_mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)

#define _mm_mask_rsqrt28_sd(S, M, A, B) \
_mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)

#define _mm_maskz_rsqrt28_sd(M, A, B) \
_mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)

/* rcp28 */
#define _mm512_rcp28_round_pd(A, R) \
((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)-1, (int)(R)))

#define _mm512_mask_rcp28_round_pd(S, M, A, R) \
((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(S), (__mmask8)(M), \
(int)(R)))

#define _mm512_maskz_rcp28_round_pd(M, A, R) \
((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)(M), (int)(R)))

#define _mm512_rcp28_pd(A) \
_mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)

#define _mm512_mask_rcp28_pd(S, M, A) \
_mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)

#define _mm512_maskz_rcp28_pd(M, A) \
_mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)

#define _mm512_rcp28_round_ps(A, R) \
((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)-1, (int)(R)))

#define _mm512_mask_rcp28_round_ps(S, M, A, R) \
((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)(__m512)(S), (__mmask16)(M), \
(int)(R)))

#define _mm512_maskz_rcp28_round_ps(M, A, R) \
((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)(M), (int)(R)))

#define _mm512_rcp28_ps(A) \
_mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)

#define _mm512_mask_rcp28_ps(S, M, A) \
_mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)

#define _mm512_maskz_rcp28_ps(M, A) \
_mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)

#define _mm_rcp28_round_ss(A, B, R) \
((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)-1, (int)(R)))

#define _mm_mask_rcp28_round_ss(S, M, A, B, R) \
((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)(__m128)(S), \
(__mmask8)(M), (int)(R)))

#define _mm_maskz_rcp28_round_ss(M, A, B, R) \
((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(M), (int)(R)))

#define _mm_rcp28_ss(A, B) \
_mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)

#define _mm_mask_rcp28_ss(S, M, A, B) \
_mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)

#define _mm_maskz_rcp28_ss(M, A, B) \
_mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)

#define _mm_rcp28_round_sd(A, B, R) \
((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)-1, (int)(R)))

#define _mm_mask_rcp28_round_sd(S, M, A, B, R) \
((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)(__m128d)(S), \
(__mmask8)(M), (int)(R)))

#define _mm_maskz_rcp28_round_sd(M, A, B, R) \
((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)(M), (int)(R)))

#define _mm_rcp28_sd(A, B) \
_mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)

#define _mm_mask_rcp28_sd(S, M, A, B) \
_mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)

#define _mm_maskz_rcp28_sd(M, A, B) \
_mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)

#endif /* __AVX512ERINTRIN_H */
92 changes: 92 additions & 0 deletions clang/lib/Headers/avx512pfintrin.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*===------------- avx512pfintrin.h - PF intrinsics ------------------------===
*
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
#endif

#ifndef __AVX512PFINTRIN_H
#define __AVX512PFINTRIN_H

#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
(void const *)(addr), (int)(scale), \
(int)(hint))

#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) \
__builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \
(void const *)(addr), (int)(scale), \
(int)(hint))

#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfdps((__mmask16)(mask), \
(__v16si)(__m512i)(index), (void const *)(addr), \
(int)(scale), (int)(hint))

#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) \
__builtin_ia32_gatherpfdps((__mmask16) -1, \
(__v16si)(__m512i)(index), (void const *)(addr), \
(int)(scale), (int)(hint))

#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
(void const *)(addr), (int)(scale), \
(int)(hint))

#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) \
__builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \
(void const *)(addr), (int)(scale), \
(int)(hint))

#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
(void const *)(addr), (int)(scale), (int)(hint))

#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) \
__builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \
(void const *)(addr), (int)(scale), (int)(hint))

#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) \
__builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \
(void *)(addr), (int)(scale), \
(int)(hint))

#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
(void *)(addr), (int)(scale), \
(int)(hint))

#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) \
__builtin_ia32_scatterpfdps((__mmask16)-1, (__v16si)(__m512i)(index), \
(void *)(addr), (int)(scale), (int)(hint))

#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfdps((__mmask16)(mask), \
(__v16si)(__m512i)(index), (void *)(addr), \
(int)(scale), (int)(hint))

#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) \
__builtin_ia32_scatterpfqpd((__mmask8)-1, (__v8di)(__m512i)(index), \
(void *)(addr), (int)(scale), \
(int)(hint))

#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
(void *)(addr), (int)(scale), \
(int)(hint))

#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) \
__builtin_ia32_scatterpfqps((__mmask8)-1, (__v8di)(__m512i)(index), \
(void *)(addr), (int)(scale), (int)(hint))

#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
(void *)(addr), (int)(scale), (int)(hint))

#endif
8 changes: 8 additions & 0 deletions clang/lib/Headers/immintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,10 @@
#include <avx512vldqintrin.h>
#endif

#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512ER__)
#include <avx512erintrin.h>
#endif

#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512IFMA__)
#include <avx512ifmaintrin.h>
#endif
Expand Down Expand Up @@ -182,6 +186,10 @@
#include <avx512vlvbmi2intrin.h>
#endif

#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512PF__)
#include <avx512pfintrin.h>
#endif

#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512FP16__)
#include <avx512fp16intrin.h>
#endif
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Headers/module.modulemap
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ module _Builtin_intrinsics [system] [extern_c] {
textual header "avxintrin.h"
textual header "avx2intrin.h"
textual header "avx512fintrin.h"
textual header "avx512erintrin.h"
textual header "fmaintrin.h"

header "x86intrin.h"
Expand Down
30 changes: 30 additions & 0 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5911,9 +5911,15 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vcvttph2udq512_mask:
case X86::BI__builtin_ia32_vcvttph2qq512_mask:
case X86::BI__builtin_ia32_vcvttph2uqq512_mask:
case X86::BI__builtin_ia32_exp2pd_mask:
case X86::BI__builtin_ia32_exp2ps_mask:
case X86::BI__builtin_ia32_getexppd512_mask:
case X86::BI__builtin_ia32_getexpps512_mask:
case X86::BI__builtin_ia32_getexpph512_mask:
case X86::BI__builtin_ia32_rcp28pd_mask:
case X86::BI__builtin_ia32_rcp28ps_mask:
case X86::BI__builtin_ia32_rsqrt28pd_mask:
case X86::BI__builtin_ia32_rsqrt28ps_mask:
case X86::BI__builtin_ia32_vcomisd:
case X86::BI__builtin_ia32_vcomiss:
case X86::BI__builtin_ia32_vcomish:
Expand All @@ -5940,12 +5946,16 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_minsd_round_mask:
case X86::BI__builtin_ia32_minss_round_mask:
case X86::BI__builtin_ia32_minsh_round_mask:
case X86::BI__builtin_ia32_rcp28sd_round_mask:
case X86::BI__builtin_ia32_rcp28ss_round_mask:
case X86::BI__builtin_ia32_reducepd512_mask:
case X86::BI__builtin_ia32_reduceps512_mask:
case X86::BI__builtin_ia32_reduceph512_mask:
case X86::BI__builtin_ia32_rndscalepd_mask:
case X86::BI__builtin_ia32_rndscaleps_mask:
case X86::BI__builtin_ia32_rndscaleph_mask:
case X86::BI__builtin_ia32_rsqrt28sd_round_mask:
case X86::BI__builtin_ia32_rsqrt28ss_round_mask:
ArgNum = 4;
break;
case X86::BI__builtin_ia32_fixupimmpd512_mask:
Expand Down Expand Up @@ -6156,6 +6166,16 @@ bool Sema::CheckX86BuiltinGatherScatterScale(unsigned BuiltinID,
switch (BuiltinID) {
default:
return false;
case X86::BI__builtin_ia32_gatherpfdpd:
case X86::BI__builtin_ia32_gatherpfdps:
case X86::BI__builtin_ia32_gatherpfqpd:
case X86::BI__builtin_ia32_gatherpfqps:
case X86::BI__builtin_ia32_scatterpfdpd:
case X86::BI__builtin_ia32_scatterpfdps:
case X86::BI__builtin_ia32_scatterpfqpd:
case X86::BI__builtin_ia32_scatterpfqps:
ArgNum = 3;
break;
case X86::BI__builtin_ia32_gatherd_pd:
case X86::BI__builtin_ia32_gatherd_pd256:
case X86::BI__builtin_ia32_gatherq_pd:
Expand Down Expand Up @@ -6662,6 +6682,16 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
case X86::BI__builtin_ia32_vsm3rnds2:
i = 3; l = 0; u = 255;
break;
case X86::BI__builtin_ia32_gatherpfdpd:
case X86::BI__builtin_ia32_gatherpfdps:
case X86::BI__builtin_ia32_gatherpfqpd:
case X86::BI__builtin_ia32_gatherpfqps:
case X86::BI__builtin_ia32_scatterpfdpd:
case X86::BI__builtin_ia32_scatterpfdps:
case X86::BI__builtin_ia32_scatterpfqpd:
case X86::BI__builtin_ia32_scatterpfqps:
i = 4; l = 2; u = 3;
break;
case X86::BI__builtin_ia32_reducesd_mask:
case X86::BI__builtin_ia32_reducess_mask:
case X86::BI__builtin_ia32_rndscalesd_round_mask:
Expand Down
347 changes: 347 additions & 0 deletions clang/test/CodeGen/X86/avx512er-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,347 @@
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512er -emit-llvm -o - -Wall | FileCheck %s


#include <immintrin.h>

__m512d test_mm512_rsqrt28_round_pd(__m512d a) {
// CHECK-LABEL: @test_mm512_rsqrt28_round_pd
// CHECK: @llvm.x86.avx512.rsqrt28.pd
return _mm512_rsqrt28_round_pd(a, _MM_FROUND_NO_EXC);
}

__m512d test_mm512_mask_rsqrt28_round_pd(__m512d s, __mmask8 m, __m512d a) {
// CHECK-LABEL: @test_mm512_mask_rsqrt28_round_pd
// CHECK: @llvm.x86.avx512.rsqrt28.pd
return _mm512_mask_rsqrt28_round_pd(s, m, a, _MM_FROUND_NO_EXC);
}

__m512d test_mm512_maskz_rsqrt28_round_pd(__mmask8 m, __m512d a) {
// CHECK-LABEL: @test_mm512_maskz_rsqrt28_round_pd
// CHECK: @llvm.x86.avx512.rsqrt28.pd
return _mm512_maskz_rsqrt28_round_pd(m, a, _MM_FROUND_NO_EXC);
}

__m512d test_mm512_rsqrt28_pd(__m512d a) {
// CHECK-LABEL: @test_mm512_rsqrt28_pd
// CHECK: @llvm.x86.avx512.rsqrt28.pd
return _mm512_rsqrt28_pd(a);
}

__m512d test_mm512_mask_rsqrt28_pd(__m512d s, __mmask8 m, __m512d a) {
// CHECK-LABEL: @test_mm512_mask_rsqrt28_pd
// CHECK: @llvm.x86.avx512.rsqrt28.pd
return _mm512_mask_rsqrt28_pd(s, m, a);
}

__m512d test_mm512_maskz_rsqrt28_pd(__mmask8 m, __m512d a) {
// CHECK-LABEL: @test_mm512_maskz_rsqrt28_pd
// CHECK: @llvm.x86.avx512.rsqrt28.pd
return _mm512_maskz_rsqrt28_pd(m, a);
}

__m512 test_mm512_rsqrt28_round_ps(__m512 a) {
// CHECK-LABEL: @test_mm512_rsqrt28_round_ps
// CHECK: @llvm.x86.avx512.rsqrt28.ps
return _mm512_rsqrt28_round_ps(a, _MM_FROUND_NO_EXC);
}

__m512 test_mm512_mask_rsqrt28_round_ps(__m512 s, __mmask16 m, __m512 a) {
// CHECK-LABEL: @test_mm512_mask_rsqrt28_round_ps
// CHECK: @llvm.x86.avx512.rsqrt28.ps
return _mm512_mask_rsqrt28_round_ps(s, m, a, _MM_FROUND_NO_EXC);
}

__m512 test_mm512_maskz_rsqrt28_round_ps(__mmask16 m, __m512 a) {
// CHECK-LABEL: @test_mm512_maskz_rsqrt28_round_ps
// CHECK: @llvm.x86.avx512.rsqrt28.ps
return _mm512_maskz_rsqrt28_round_ps(m, a, _MM_FROUND_NO_EXC);
}

__m512 test_mm512_rsqrt28_ps(__m512 a) {
// CHECK-LABEL: @test_mm512_rsqrt28_ps
// CHECK: @llvm.x86.avx512.rsqrt28.ps
return _mm512_rsqrt28_ps(a);
}

__m512 test_mm512_mask_rsqrt28_ps(__m512 s, __mmask16 m, __m512 a) {
// CHECK-LABEL: @test_mm512_mask_rsqrt28_ps
// CHECK: @llvm.x86.avx512.rsqrt28.ps
return _mm512_mask_rsqrt28_ps(s, m, a);
}

__m512 test_mm512_maskz_rsqrt28_ps(__mmask16 m, __m512 a) {
// CHECK-LABEL: @test_mm512_maskz_rsqrt28_ps
// CHECK: @llvm.x86.avx512.rsqrt28.ps
return _mm512_maskz_rsqrt28_ps(m, a);
}

__m128 test_mm_rsqrt28_round_ss(__m128 a, __m128 b) {
// CHECK-LABEL: @test_mm_rsqrt28_round_ss
// CHECK: @llvm.x86.avx512.rsqrt28.ss
return _mm_rsqrt28_round_ss(a, b, _MM_FROUND_NO_EXC);
}

__m128 test_mm_mask_rsqrt28_round_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
// CHECK-LABEL: @test_mm_mask_rsqrt28_round_ss
// CHECK: @llvm.x86.avx512.rsqrt28.ss
return _mm_mask_rsqrt28_round_ss(s, m, a, b, _MM_FROUND_NO_EXC);
}

__m128 test_mm_maskz_rsqrt28_round_ss(__mmask16 m, __m128 a, __m128 b) {
// CHECK-LABEL: @test_mm_maskz_rsqrt28_round_ss
// CHECK: @llvm.x86.avx512.rsqrt28.ss
return _mm_maskz_rsqrt28_round_ss(m, a, b, _MM_FROUND_NO_EXC);
}

__m128 test_mm_rsqrt28_ss(__m128 a, __m128 b) {
// CHECK-LABEL: @test_mm_rsqrt28_ss
// CHECK: @llvm.x86.avx512.rsqrt28.ss
return _mm_rsqrt28_ss(a, b);
}

__m128 test_mm_mask_rsqrt28_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
// CHECK-LABEL: @test_mm_mask_rsqrt28_ss
// CHECK: @llvm.x86.avx512.rsqrt28.ss
return _mm_mask_rsqrt28_ss(s, m, a, b);
}

__m128 test_mm_maskz_rsqrt28_ss(__mmask16 m, __m128 a, __m128 b) {
// CHECK-LABEL: @test_mm_maskz_rsqrt28_ss
// CHECK: @llvm.x86.avx512.rsqrt28.ss
return _mm_maskz_rsqrt28_ss(m, a, b);
}

__m128d test_mm_rsqrt28_round_sd(__m128d a, __m128d b) {
// CHECK-LABEL: @test_mm_rsqrt28_round_sd
// CHECK: @llvm.x86.avx512.rsqrt28.sd
return _mm_rsqrt28_round_sd(a, b, _MM_FROUND_NO_EXC);
}

__m128d test_mm_mask_rsqrt28_round_sd(__m128d s, __mmask8 m, __m128d a, __m128d b) {
// CHECK-LABEL: @test_mm_mask_rsqrt28_round_sd
// CHECK: @llvm.x86.avx512.rsqrt28.sd
return _mm_mask_rsqrt28_round_sd(s, m, a, b, _MM_FROUND_NO_EXC);
}

__m128d test_mm_maskz_rsqrt28_round_sd(__mmask8 m, __m128d a, __m128d b) {
// CHECK-LABEL: @test_mm_maskz_rsqrt28_round_sd
// CHECK: @llvm.x86.avx512.rsqrt28.sd
return _mm_maskz_rsqrt28_round_sd(m, a, b, _MM_FROUND_NO_EXC);
}

__m512d test_mm512_rcp28_round_pd(__m512d a) {
// CHECK-LABEL: @test_mm512_rcp28_round_pd
// CHECK: @llvm.x86.avx512.rcp28.pd
return _mm512_rcp28_round_pd(a, _MM_FROUND_NO_EXC);
}

__m512d test_mm512_mask_rcp28_round_pd(__m512d s, __mmask8 m, __m512d a) {
// CHECK-LABEL: @test_mm512_mask_rcp28_round_pd
// CHECK: @llvm.x86.avx512.rcp28.pd
return _mm512_mask_rcp28_round_pd(s, m, a, _MM_FROUND_NO_EXC);
}

__m512d test_mm512_maskz_rcp28_round_pd(__mmask8 m, __m512d a) {
// CHECK-LABEL: @test_mm512_maskz_rcp28_round_pd
// CHECK: @llvm.x86.avx512.rcp28.pd
return _mm512_maskz_rcp28_round_pd(m, a, _MM_FROUND_NO_EXC);
}

__m512d test_mm512_rcp28_pd(__m512d a) {
// CHECK-LABEL: @test_mm512_rcp28_pd
// CHECK: @llvm.x86.avx512.rcp28.pd
return _mm512_rcp28_pd(a);
}

__m512d test_mm512_mask_rcp28_pd(__m512d s, __mmask8 m, __m512d a) {
// CHECK-LABEL: @test_mm512_mask_rcp28_pd
// CHECK: @llvm.x86.avx512.rcp28.pd
return _mm512_mask_rcp28_pd(s, m, a);
}

__m512d test_mm512_maskz_rcp28_pd(__mmask8 m, __m512d a) {
// CHECK-LABEL: @test_mm512_maskz_rcp28_pd
// CHECK: @llvm.x86.avx512.rcp28.pd
return _mm512_maskz_rcp28_pd(m, a);
}

__m512 test_mm512_rcp28_round_ps(__m512 a) {
// CHECK-LABEL: @test_mm512_rcp28_round_ps
// CHECK: @llvm.x86.avx512.rcp28.ps
return _mm512_rcp28_round_ps(a, _MM_FROUND_NO_EXC);
}

__m512 test_mm512_mask_rcp28_round_ps(__m512 s, __mmask16 m, __m512 a) {
// CHECK-LABEL: @test_mm512_mask_rcp28_round_ps
// CHECK: @llvm.x86.avx512.rcp28.ps
return _mm512_mask_rcp28_round_ps(s, m, a, _MM_FROUND_NO_EXC);
}

__m512 test_mm512_maskz_rcp28_round_ps(__mmask16 m, __m512 a) {
// CHECK-LABEL: @test_mm512_maskz_rcp28_round_ps
// CHECK: @llvm.x86.avx512.rcp28.ps
return _mm512_maskz_rcp28_round_ps(m, a, _MM_FROUND_NO_EXC);
}

__m512 test_mm512_rcp28_ps(__m512 a) {
// CHECK-LABEL: @test_mm512_rcp28_ps
// CHECK: @llvm.x86.avx512.rcp28.ps
return _mm512_rcp28_ps(a);
}

__m512 test_mm512_mask_rcp28_ps(__m512 s, __mmask16 m, __m512 a) {
// CHECK-LABEL: @test_mm512_mask_rcp28_ps
// CHECK: @llvm.x86.avx512.rcp28.ps
return _mm512_mask_rcp28_ps(s, m, a);
}

__m512 test_mm512_maskz_rcp28_ps(__mmask16 m, __m512 a) {
// CHECK-LABEL: @test_mm512_maskz_rcp28_ps
// CHECK: @llvm.x86.avx512.rcp28.ps
return _mm512_maskz_rcp28_ps(m, a);
}

__m128 test_mm_rcp28_round_ss(__m128 a, __m128 b) {
// CHECK-LABEL: @test_mm_rcp28_round_ss
// CHECK: @llvm.x86.avx512.rcp28.ss
return _mm_rcp28_round_ss(a, b, _MM_FROUND_NO_EXC);
}

__m128 test_mm_mask_rcp28_round_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
// CHECK-LABEL: @test_mm_mask_rcp28_round_ss
// CHECK: @llvm.x86.avx512.rcp28.ss
return _mm_mask_rcp28_round_ss(s, m, a, b, _MM_FROUND_NO_EXC);
}

__m128 test_mm_maskz_rcp28_round_ss(__mmask16 m, __m128 a, __m128 b) {
// CHECK-LABEL: @test_mm_maskz_rcp28_round_ss
// CHECK: @llvm.x86.avx512.rcp28.ss
return _mm_maskz_rcp28_round_ss(m, a, b, _MM_FROUND_NO_EXC);
}

__m128 test_mm_rcp28_ss(__m128 a, __m128 b) {
// CHECK-LABEL: @test_mm_rcp28_ss
// CHECK: @llvm.x86.avx512.rcp28.ss
return _mm_rcp28_ss(a, b);
}

__m128 test_mm_mask_rcp28_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
// CHECK-LABEL: @test_mm_mask_rcp28_ss
// CHECK: @llvm.x86.avx512.rcp28.ss
return _mm_mask_rcp28_ss(s, m, a, b);
}

__m128 test_mm_maskz_rcp28_ss(__mmask16 m, __m128 a, __m128 b) {
// CHECK-LABEL: @test_mm_maskz_rcp28_ss
// CHECK: @llvm.x86.avx512.rcp28.ss
return _mm_maskz_rcp28_ss(m, a, b);
}

__m128d test_mm_rcp28_round_sd(__m128d a, __m128d b) {
// CHECK-LABEL: @test_mm_rcp28_round_sd
// CHECK: @llvm.x86.avx512.rcp28.sd
return _mm_rcp28_round_sd(a, b, _MM_FROUND_NO_EXC);
}

__m128d test_mm_mask_rcp28_round_sd(__m128d s, __mmask8 m, __m128d a, __m128d b) {
// CHECK-LABEL: @test_mm_mask_rcp28_round_sd
// CHECK: @llvm.x86.avx512.rcp28.sd
return _mm_mask_rcp28_round_sd(s, m, a, b, _MM_FROUND_NO_EXC);
}

__m128d test_mm_maskz_rcp28_round_sd(__mmask8 m, __m128d a, __m128d b) {
// CHECK-LABEL: @test_mm_maskz_rcp28_round_sd
// CHECK: @llvm.x86.avx512.rcp28.sd
return _mm_maskz_rcp28_round_sd(m, a, b, _MM_FROUND_NO_EXC);
}

__m128d test_mm_rcp28_sd(__m128d a, __m128d b) {
// CHECK-LABEL: @test_mm_rcp28_sd
// CHECK: @llvm.x86.avx512.rcp28.sd
return _mm_rcp28_sd(a, b);
}

__m128d test_mm_mask_rcp28_sd(__m128d s, __mmask8 m, __m128d a, __m128d b) {
// CHECK-LABEL: @test_mm_mask_rcp28_sd
// CHECK: @llvm.x86.avx512.rcp28.sd
return _mm_mask_rcp28_sd(s, m, a, b);
}

__m128d test_mm_maskz_rcp28_sd(__mmask8 m, __m128d a, __m128d b) {
// CHECK-LABEL: @test_mm_maskz_rcp28_sd
// CHECK: @llvm.x86.avx512.rcp28.sd
return _mm_maskz_rcp28_sd(m, a, b);
}

__m512d test_mm512_exp2a23_round_pd(__m512d a) {
// CHECK-LABEL: @test_mm512_exp2a23_round_pd
// CHECK: @llvm.x86.avx512.exp2.pd
return _mm512_exp2a23_round_pd(a, _MM_FROUND_NO_EXC);
}

__m512d test_mm512_mask_exp2a23_round_pd(__m512d s, __mmask8 m, __m512d a) {
// CHECK-LABEL: @test_mm512_mask_exp2a23_round_pd
// CHECK: @llvm.x86.avx512.exp2.pd
return _mm512_mask_exp2a23_round_pd(s, m, a, _MM_FROUND_NO_EXC);
}

__m512d test_mm512_maskz_exp2a23_round_pd(__mmask8 m, __m512d a) {
// CHECK-LABEL: @test_mm512_maskz_exp2a23_round_pd
// CHECK: @llvm.x86.avx512.exp2.pd
return _mm512_maskz_exp2a23_round_pd(m, a, _MM_FROUND_NO_EXC);
}

__m512d test_mm512_exp2a23_pd(__m512d a) {
// CHECK-LABEL: @test_mm512_exp2a23_pd
// CHECK: @llvm.x86.avx512.exp2.pd
return _mm512_exp2a23_pd(a);
}

__m512d test_mm512_mask_exp2a23_pd(__m512d s, __mmask8 m, __m512d a) {
// CHECK-LABEL: @test_mm512_mask_exp2a23_pd
// CHECK: @llvm.x86.avx512.exp2.pd
return _mm512_mask_exp2a23_pd(s, m, a);
}

__m512d test_mm512_maskz_exp2a23_pd(__mmask8 m, __m512d a) {
// CHECK-LABEL: @test_mm512_maskz_exp2a23_pd
// CHECK: @llvm.x86.avx512.exp2.pd
return _mm512_maskz_exp2a23_pd(m, a);
}

__m512 test_mm512_exp2a23_round_ps(__m512 a) {
// CHECK-LABEL: @test_mm512_exp2a23_round_ps
// CHECK: @llvm.x86.avx512.exp2.ps
return _mm512_exp2a23_round_ps(a, _MM_FROUND_NO_EXC);
}

__m512 test_mm512_mask_exp2a23_round_ps(__m512 s, __mmask16 m, __m512 a) {
// CHECK-LABEL: @test_mm512_mask_exp2a23_round_ps
// CHECK: @llvm.x86.avx512.exp2.ps
return _mm512_mask_exp2a23_round_ps(s, m, a, _MM_FROUND_NO_EXC);
}

__m512 test_mm512_maskz_exp2a23_round_ps(__mmask16 m, __m512 a) {
// CHECK-LABEL: @test_mm512_maskz_exp2a23_round_ps
// CHECK: @llvm.x86.avx512.exp2.ps
return _mm512_maskz_exp2a23_round_ps(m, a, _MM_FROUND_NO_EXC);
}

__m512 test_mm512_exp2a23_ps(__m512 a) {
// CHECK-LABEL: @test_mm512_exp2a23_ps
// CHECK: @llvm.x86.avx512.exp2.ps
return _mm512_exp2a23_ps(a);
}

__m512 test_mm512_mask_exp2a23_ps(__m512 s, __mmask16 m, __m512 a) {
// CHECK-LABEL: @test_mm512_mask_exp2a23_ps
// CHECK: @llvm.x86.avx512.exp2.ps
return _mm512_mask_exp2a23_ps(s, m, a);
}

__m512 test_mm512_maskz_exp2a23_ps(__mmask16 m, __m512 a) {
// CHECK-LABEL: @test_mm512_maskz_exp2a23_ps
// CHECK: @llvm.x86.avx512.exp2.ps
return _mm512_maskz_exp2a23_ps(m, a);
}

100 changes: 100 additions & 0 deletions clang/test/CodeGen/X86/avx512pf-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512pf -emit-llvm -o - -Wall | FileCheck %s


#include <immintrin.h>

void test_mm512_mask_prefetch_i32gather_pd(__m256i index, __mmask8 mask, void const *addr) {
// CHECK-LABEL: @test_mm512_mask_prefetch_i32gather_pd
// CHECK: @llvm.x86.avx512.gatherpf.dpd
return _mm512_mask_prefetch_i32gather_pd(index, mask, addr, 2, _MM_HINT_T0);
}

void test_mm512_prefetch_i32gather_pd(__m256i index, void const *addr) {
// CHECK-LABEL: @test_mm512_prefetch_i32gather_pd
// CHECK: @llvm.x86.avx512.gatherpf.dpd
return _mm512_prefetch_i32gather_pd(index, addr, 2, _MM_HINT_T0);
}

void test_mm512_mask_prefetch_i32gather_ps(__m512i index, __mmask16 mask, void const *addr) {
// CHECK-LABEL: @test_mm512_mask_prefetch_i32gather_ps
// CHECK: @llvm.x86.avx512.gatherpf.dps
return _mm512_mask_prefetch_i32gather_ps(index, mask, addr, 2, _MM_HINT_T0);
}

void test_mm512_prefetch_i32gather_ps(__m512i index, void const *addr) {
// CHECK-LABEL: @test_mm512_prefetch_i32gather_ps
// CHECK: @llvm.x86.avx512.gatherpf.dps
return _mm512_prefetch_i32gather_ps(index, addr, 2, _MM_HINT_T0);
}

void test_mm512_mask_prefetch_i64gather_pd(__m512i index, __mmask8 mask, void const *addr) {
// CHECK-LABEL: @test_mm512_mask_prefetch_i64gather_pd
// CHECK: @llvm.x86.avx512.gatherpf.qpd
return _mm512_mask_prefetch_i64gather_pd(index, mask, addr, 2, _MM_HINT_T0);
}

void test_mm512_prefetch_i64gather_pd(__m512i index, void const *addr) {
// CHECK-LABEL: @test_mm512_prefetch_i64gather_pd
// CHECK: @llvm.x86.avx512.gatherpf.qpd
return _mm512_prefetch_i64gather_pd(index, addr, 2, _MM_HINT_T0);
}

void test_mm512_mask_prefetch_i64gather_ps(__m512i index, __mmask8 mask, void const *addr) {
// CHECK-LABEL: @test_mm512_mask_prefetch_i64gather_ps
// CHECK: @llvm.x86.avx512.gatherpf.qps
return _mm512_mask_prefetch_i64gather_ps(index, mask, addr, 2, _MM_HINT_T0);
}

void test_mm512_prefetch_i64gather_ps(__m512i index, void const *addr) {
// CHECK-LABEL: @test_mm512_prefetch_i64gather_ps
// CHECK: @llvm.x86.avx512.gatherpf.qps
return _mm512_prefetch_i64gather_ps(index, addr, 2, _MM_HINT_T0);
}

void test_mm512_prefetch_i32scatter_pd(void *addr, __m256i index) {
// CHECK-LABEL: @test_mm512_prefetch_i32scatter_pd
// CHECK: @llvm.x86.avx512.scatterpf.dpd.512
return _mm512_prefetch_i32scatter_pd(addr, index, 1, _MM_HINT_T1);
}

void test_mm512_mask_prefetch_i32scatter_pd(void *addr, __mmask8 mask, __m256i index) {
// CHECK-LABEL: @test_mm512_mask_prefetch_i32scatter_pd
// CHECK: @llvm.x86.avx512.scatterpf.dpd.512
return _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, 1, _MM_HINT_T1);
}

void test_mm512_prefetch_i32scatter_ps(void *addr, __m512i index) {
// CHECK-LABEL: @test_mm512_prefetch_i32scatter_ps
// CHECK: @llvm.x86.avx512.scatterpf.dps.512
return _mm512_prefetch_i32scatter_ps(addr, index, 1, _MM_HINT_T1);
}

void test_mm512_mask_prefetch_i32scatter_ps(void *addr, __mmask16 mask, __m512i index) {
// CHECK-LABEL: @test_mm512_mask_prefetch_i32scatter_ps
// CHECK: @llvm.x86.avx512.scatterpf.dps.512
return _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, 1, _MM_HINT_T1);
}

void test_mm512_prefetch_i64scatter_pd(void *addr, __m512i index) {
// CHECK-LABEL: @test_mm512_prefetch_i64scatter_pd
// CHECK: @llvm.x86.avx512.scatterpf.qpd.512
return _mm512_prefetch_i64scatter_pd(addr, index, 1, _MM_HINT_T1);
}

void test_mm512_mask_prefetch_i64scatter_pd(void *addr, __mmask16 mask, __m512i index) {
// CHECK-LABEL: @test_mm512_mask_prefetch_i64scatter_pd
// CHECK: @llvm.x86.avx512.scatterpf.qpd.512
return _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, 1, _MM_HINT_T1);
}

void test_mm512_prefetch_i64scatter_ps(void *addr, __m512i index) {
// CHECK-LABEL: @test_mm512_prefetch_i64scatter_ps
// CHECK: @llvm.x86.avx512.scatterpf.qps.512
return _mm512_prefetch_i64scatter_ps(addr, index, 1, _MM_HINT_T1);
}

void test_mm512_mask_prefetch_i64scatter_ps(void *addr, __mmask16 mask, __m512i index) {
// CHECK-LABEL: @test_mm512_mask_prefetch_i64scatter_ps
// CHECK: @llvm.x86.avx512.scatterpf.qps.512
return _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, 1, _MM_HINT_T1);
}
10 changes: 5 additions & 5 deletions clang/test/CodeGen/attr-cpuspecific.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ void TwoVersions(void);
// LINUX: define weak_odr ptr @TwoVersions.resolver()
// LINUX: call void @__cpu_indicator_init
// LINUX: %[[FEAT_INIT:.+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
// LINUX: %[[FEAT_JOIN:.+]] = and i32 %[[FEAT_INIT]], 9422847
// LINUX: %[[FEAT_CHECK:.+]] = icmp eq i32 %[[FEAT_JOIN]], 9422847
// LINUX: %[[FEAT_JOIN:.+]] = and i32 %[[FEAT_INIT]], 59754495
// LINUX: %[[FEAT_CHECK:.+]] = icmp eq i32 %[[FEAT_JOIN]], 59754495
// LINUX: ret ptr @TwoVersions.Z
// LINUX: ret ptr @TwoVersions.S
// LINUX: call void @llvm.trap
Expand All @@ -85,8 +85,8 @@ void TwoVersions(void);
// WINDOWS: define weak_odr dso_local void @TwoVersions() comdat
// WINDOWS: call void @__cpu_indicator_init()
// WINDOWS: %[[FEAT_INIT:.+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
// WINDOWS: %[[FEAT_JOIN:.+]] = and i32 %[[FEAT_INIT]], 9422847
// WINDOWS: %[[FEAT_CHECK:.+]] = icmp eq i32 %[[FEAT_JOIN]], 9422847
// WINDOWS: %[[FEAT_JOIN:.+]] = and i32 %[[FEAT_INIT]], 59754495
// WINDOWS: %[[FEAT_CHECK:.+]] = icmp eq i32 %[[FEAT_JOIN]], 59754495
// WINDOWS: call void @TwoVersions.Z()
// WINDOWS-NEXT: ret void
// WINDOWS: call void @TwoVersions.S()
Expand Down Expand Up @@ -354,7 +354,7 @@ void OrderDispatchUsageSpecific(void) {}

// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
// CHECK-SAME: "tune-cpu"="ivybridge"
// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
// CHECK-SAME: "tune-cpu"="knl"
// CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+movbe,+sahf,+sse,+sse2,+sse3,+ssse3,+x87"
// CHECK-SAME: "tune-cpu"="atom"
4 changes: 2 additions & 2 deletions clang/test/CodeGen/attr-target-x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {}
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686"
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
// CHECK-NOT: tune-cpu
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-avx10.1-256,-avx10.1-512,-vaes"
// CHECK-NOT: tune-cpu
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-3dnow,-3dnowa,-mmx"
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGen/function-target-features.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | FileCheck %s -check-prefix=AVX-FEATURE
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | FileCheck %s -check-prefix=AVX-NO-CPU
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f -target-feature +avx512bw | FileCheck %s -check-prefix=TWO-AVX
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f -target-feature +avx512er | FileCheck %s -check-prefix=TWO-AVX
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-cpu corei7 | FileCheck %s -check-prefix=CORE-CPU
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-cpu corei7 -target-feature +avx | FileCheck %s -check-prefix=CORE-CPU-AND-FEATURES
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-cpu x86-64 | FileCheck %s -check-prefix=X86-64-CPU
Expand All @@ -17,7 +17,7 @@ void foo(void) {}

// AVX-FEATURE: "target-features"{{.*}}+avx
// AVX-NO-CPU-NOT: target-cpu
// TWO-AVX: "target-features"={{.*}}+avx512bw{{.*}}+avx512f
// TWO-AVX: "target-features"={{.*}}+avx512er{{.*}}+avx512f
// CORE-CPU: "target-cpu"="corei7"
// CORE-CPU-AND-FEATURES: "target-cpu"="corei7" "target-features"={{.*}}+avx
// X86-64-CPU: "target-cpu"="x86-64"
Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGen/target-builtin-noerror.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ void verifyfeaturestrings(void) {
(void)__builtin_cpu_supports("avx512bw");
(void)__builtin_cpu_supports("avx512dq");
(void)__builtin_cpu_supports("avx512cd");
(void)__builtin_cpu_supports("avx512er");
(void)__builtin_cpu_supports("avx512pf");
(void)__builtin_cpu_supports("avx512vbmi");
(void)__builtin_cpu_supports("avx512ifma");
(void)__builtin_cpu_supports("avx5124vnniw");
Expand Down
10 changes: 8 additions & 2 deletions clang/test/Driver/cl-x86-flags.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@
// RUN: %clang_cl -m32 -arch:avx2 --target=i386-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=avx2 %s
// avx2: invalid /arch: argument

// RUN: %clang_cl -m32 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_32_ARCH_AVX512F -- %s
// RUN: %clang_cl -m32 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify=KNL1 -DTEST_32_ARCH_AVX512F -- %s
// KNL1-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// KNL1-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// KNL1-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
#if defined(TEST_32_ARCH_AVX512F)
#if _M_IX86_FP != 2 || !__AVX__ || !__AVX2__ || !__AVX512F__ || __AVX512BW__
#error fail
Expand Down Expand Up @@ -109,7 +112,10 @@
// RUN: %clang_cl -m64 -arch:avx2 --target=x86_64-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=avx264 %s
// avx264: invalid /arch: argument

// RUN: %clang_cl -m64 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_64_ARCH_AVX512F -- %s
// RUN: %clang_cl -m64 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify=KNL2 -DTEST_64_ARCH_AVX512F -- %s
// KNL2-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// KNL2-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// KNL2-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
#if defined(TEST_64_ARCH_AVX512F)
#if _M_IX86_FP || !__AVX__ || !__AVX2__ || !__AVX512F__ || __AVX512BW__
#error fail
Expand Down
13 changes: 9 additions & 4 deletions clang/test/Driver/x86-target-features.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
// SSE4-AES: "-target-feature" "+sse4.2" "-target-feature" "+aes"
// NO-SSE4-AES: "-target-feature" "-sse4.1" "-target-feature" "-aes"

// RUN: %clang --target=i386 -march=i386 -mavx -mavx2 -mavx512f -mavx512cd -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma %s -### 2>&1 | FileCheck -check-prefix=AVX %s
// RUN: %clang --target=i386 -march=i386 -mno-avx -mno-avx2 -mno-avx512f -mno-avx512cd -mno-avx512dq -mno-avx512bw -mno-avx512vl -mno-avx512vbmi -mno-avx512vbmi2 -mno-avx512ifma %s -### 2>&1 | FileCheck -check-prefix=NO-AVX %s
// AVX: "-target-feature" "+avx" "-target-feature" "+avx2" "-target-feature" "+avx512f" "-target-feature" "+avx512cd" "-target-feature" "+avx512dq" "-target-feature" "+avx512bw" "-target-feature" "+avx512vl" "-target-feature" "+avx512vbmi" "-target-feature" "+avx512vbmi2" "-target-feature" "+avx512ifma"
// NO-AVX: "-target-feature" "-avx" "-target-feature" "-avx2" "-target-feature" "-avx512f" "-target-feature" "-avx512cd" "-target-feature" "-avx512dq" "-target-feature" "-avx512bw" "-target-feature" "-avx512vl" "-target-feature" "-avx512vbmi" "-target-feature" "-avx512vbmi2" "-target-feature" "-avx512ifma"
// RUN: %clang --target=i386 -march=i386 -mavx -mavx2 -mavx512f -mavx512cd -mavx512er -mavx512pf -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma %s -### 2>&1 | FileCheck -check-prefix=AVX %s
// RUN: %clang --target=i386 -march=i386 -mno-avx -mno-avx2 -mno-avx512f -mno-avx512cd -mno-avx512er -mno-avx512pf -mno-avx512dq -mno-avx512bw -mno-avx512vl -mno-avx512vbmi -mno-avx512vbmi2 -mno-avx512ifma %s -### 2>&1 | FileCheck -check-prefix=NO-AVX %s
// AVX: "-target-feature" "+avx" "-target-feature" "+avx2" "-target-feature" "+avx512f" "-target-feature" "+avx512cd" "-target-feature" "+avx512er" "-target-feature" "+avx512pf" "-target-feature" "+avx512dq" "-target-feature" "+avx512bw" "-target-feature" "+avx512vl" "-target-feature" "+avx512vbmi" "-target-feature" "+avx512vbmi2" "-target-feature" "+avx512ifma"
// NO-AVX: "-target-feature" "-avx" "-target-feature" "-avx2" "-target-feature" "-avx512f" "-target-feature" "-avx512cd" "-target-feature" "-avx512er" "-target-feature" "-avx512pf" "-target-feature" "-avx512dq" "-target-feature" "-avx512bw" "-target-feature" "-avx512vl" "-target-feature" "-avx512vbmi" "-target-feature" "-avx512vbmi2" "-target-feature" "-avx512ifma"

// RUN: %clang --target=i386 -march=i386 -mpclmul -mrdrnd -mfsgsbase -mbmi -mbmi2 %s -### 2>&1 | FileCheck -check-prefix=BMI %s
// RUN: %clang --target=i386 -march=i386 -mno-pclmul -mno-rdrnd -mno-fsgsbase -mno-bmi -mno-bmi2 %s -### 2>&1 | FileCheck -check-prefix=NO-BMI %s
Expand Down Expand Up @@ -86,6 +86,11 @@
// SGX: "-target-feature" "+sgx"
// NO-SGX: "-target-feature" "-sgx"

// RUN: %clang --target=i386 -march=i386 -mprefetchwt1 %s -### 2>&1 | FileCheck -check-prefix=PREFETCHWT1 %s
// RUN: %clang --target=i386 -march=i386 -mno-prefetchwt1 %s -### 2>&1 | FileCheck -check-prefix=NO-PREFETCHWT1 %s
// PREFETCHWT1: "-target-feature" "+prefetchwt1"
// NO-PREFETCHWT1: "-target-feature" "-prefetchwt1"

// RUN: %clang --target=i386 -march=i386 -mprefetchi %s -### -o %t.o 2>&1 | FileCheck -check-prefix=PREFETCHI %s
// RUN: %clang --target=i386 -march=i386 -mno-prefetchi %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-PREFETCHI %s
// PREFETCHI: "-target-feature" "+prefetchi"
Expand Down
10 changes: 8 additions & 2 deletions clang/test/Frontend/x86-target-cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,14 @@
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu cannonlake -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu icelake-client -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu icelake-server -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knl -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knm -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knl -verify=knl %s
// knl-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// knl-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// knl-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knm -verify=knm %s
// knm-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// knm-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// knm-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu bonnell -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu silvermont -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu k8 -verify %s
Expand Down
12 changes: 12 additions & 0 deletions clang/test/Preprocessor/predefined-arch-macros.c
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,9 @@
// CHECK_KNL_M32: #define __AES__ 1
// CHECK_KNL_M32: #define __AVX2__ 1
// CHECK_KNL_M32: #define __AVX512CD__ 1
// CHECK_KNL_M32: #define __AVX512ER__ 1
// CHECK_KNL_M32: #define __AVX512F__ 1
// CHECK_KNL_M32: #define __AVX512PF__ 1
// CHECK_KNL_M32: #define __AVX__ 1
// CHECK_KNL_M32: #define __BMI2__ 1
// CHECK_KNL_M32: #define __BMI__ 1
Expand All @@ -806,6 +808,7 @@
// CHECK_KNL_M32: #define __MOVBE__ 1
// CHECK_KNL_M32: #define __PCLMUL__ 1
// CHECK_KNL_M32: #define __POPCNT__ 1
// CHECK_KNL_M32: #define __PREFETCHWT1__ 1
// CHECK_KNL_M32: #define __PRFCHW__ 1
// CHECK_KNL_M32: #define __RDRND__ 1
// CHECK_KNL_M32: #define __SSE2__ 1
Expand All @@ -829,7 +832,9 @@
// CHECK_KNL_M64: #define __AES__ 1
// CHECK_KNL_M64: #define __AVX2__ 1
// CHECK_KNL_M64: #define __AVX512CD__ 1
// CHECK_KNL_M64: #define __AVX512ER__ 1
// CHECK_KNL_M64: #define __AVX512F__ 1
// CHECK_KNL_M64: #define __AVX512PF__ 1
// CHECK_KNL_M64: #define __AVX__ 1
// CHECK_KNL_M64: #define __BMI2__ 1
// CHECK_KNL_M64: #define __BMI__ 1
Expand All @@ -842,6 +847,7 @@
// CHECK_KNL_M64: #define __MOVBE__ 1
// CHECK_KNL_M64: #define __PCLMUL__ 1
// CHECK_KNL_M64: #define __POPCNT__ 1
// CHECK_KNL_M64: #define __PREFETCHWT1__ 1
// CHECK_KNL_M64: #define __PRFCHW__ 1
// CHECK_KNL_M64: #define __RDRND__ 1
// CHECK_KNL_M64: #define __SSE2_MATH__ 1
Expand All @@ -868,7 +874,9 @@
// CHECK_KNM_M32: #define __AES__ 1
// CHECK_KNM_M32: #define __AVX2__ 1
// CHECK_KNM_M32: #define __AVX512CD__ 1
// CHECK_KNM_M32: #define __AVX512ER__ 1
// CHECK_KNM_M32: #define __AVX512F__ 1
// CHECK_KNM_M32: #define __AVX512PF__ 1
// CHECK_KNM_M32: #define __AVX512VPOPCNTDQ__ 1
// CHECK_KNM_M32: #define __AVX__ 1
// CHECK_KNM_M32: #define __BMI2__ 1
Expand All @@ -882,6 +890,7 @@
// CHECK_KNM_M32: #define __MOVBE__ 1
// CHECK_KNM_M32: #define __PCLMUL__ 1
// CHECK_KNM_M32: #define __POPCNT__ 1
// CHECK_KNM_M32: #define __PREFETCHWT1__ 1
// CHECK_KNM_M32: #define __PRFCHW__ 1
// CHECK_KNM_M32: #define __RDRND__ 1
// CHECK_KNM_M32: #define __SSE2__ 1
Expand All @@ -902,7 +911,9 @@
// CHECK_KNM_M64: #define __AES__ 1
// CHECK_KNM_M64: #define __AVX2__ 1
// CHECK_KNM_M64: #define __AVX512CD__ 1
// CHECK_KNM_M64: #define __AVX512ER__ 1
// CHECK_KNM_M64: #define __AVX512F__ 1
// CHECK_KNM_M64: #define __AVX512PF__ 1
// CHECK_KNM_M64: #define __AVX512VPOPCNTDQ__ 1
// CHECK_KNM_M64: #define __AVX__ 1
// CHECK_KNM_M64: #define __BMI2__ 1
Expand All @@ -916,6 +927,7 @@
// CHECK_KNM_M64: #define __MOVBE__ 1
// CHECK_KNM_M64: #define __PCLMUL__ 1
// CHECK_KNM_M64: #define __POPCNT__ 1
// CHECK_KNM_M64: #define __PREFETCHWT1__ 1
// CHECK_KNM_M64: #define __PRFCHW__ 1
// CHECK_KNM_M64: #define __RDRND__ 1
// CHECK_KNM_M64: #define __SSE2_MATH__ 1
Expand Down
50 changes: 50 additions & 0 deletions clang/test/Preprocessor/x86_target_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,38 @@
// AVX512CD: #define __SSE__ 1
// AVX512CD: #define __SSSE3__ 1

// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512er -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512ER %s

// AVX512ER: #define __AVX2__ 1
// AVX512ER: #define __AVX512ER__ 1
// AVX512ER: #define __AVX512F__ 1
// AVX512ER: #define __AVX__ 1
// AVX512ER: #define __EVEX512__ 1
// AVX512ER: #define __SSE2_MATH__ 1
// AVX512ER: #define __SSE2__ 1
// AVX512ER: #define __SSE3__ 1
// AVX512ER: #define __SSE4_1__ 1
// AVX512ER: #define __SSE4_2__ 1
// AVX512ER: #define __SSE_MATH__ 1
// AVX512ER: #define __SSE__ 1
// AVX512ER: #define __SSSE3__ 1

// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512pf -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512PF %s

// AVX512PF: #define __AVX2__ 1
// AVX512PF: #define __AVX512F__ 1
// AVX512PF: #define __AVX512PF__ 1
// AVX512PF: #define __AVX__ 1
// AVX512PF: #define __EVEX512__ 1
// AVX512PF: #define __SSE2_MATH__ 1
// AVX512PF: #define __SSE2__ 1
// AVX512PF: #define __SSE3__ 1
// AVX512PF: #define __SSE4_1__ 1
// AVX512PF: #define __SSE4_2__ 1
// AVX512PF: #define __SSE_MATH__ 1
// AVX512PF: #define __SSE__ 1
// AVX512PF: #define __SSSE3__ 1

// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512dq -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512DQ %s

// AVX512DQ: #define __AVX2__ 1
Expand Down Expand Up @@ -139,6 +171,22 @@
// AVX512VL: #define __SSE__ 1
// AVX512VL: #define __SSSE3__ 1

// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512pf -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512F2 %s

// AVX512F2: #define __AVX2__ 1
// AVX512F2-NOT: #define __AVX512F__ 1
// AVX512F2-NOT: #define __AVX512PF__ 1
// AVX512F2-NOT: #define __EVEX512__ 1
// AVX512F2: #define __AVX__ 1
// AVX512F2: #define __SSE2_MATH__ 1
// AVX512F2: #define __SSE2__ 1
// AVX512F2: #define __SSE3__ 1
// AVX512F2: #define __SSE4_1__ 1
// AVX512F2: #define __SSE4_2__ 1
// AVX512F2: #define __SSE_MATH__ 1
// AVX512F2: #define __SSE__ 1
// AVX512F2: #define __SSSE3__ 1

// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512ifma -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512IFMA %s

// AVX512IFMA: #define __AVX2__ 1
Expand Down Expand Up @@ -592,12 +640,14 @@

// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512f -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512cd -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512er -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s
// NOEVEX512-NOT: #define __AVX512F__ 1
// NOEVEX512-NOT: #define __EVEX256__ 1
// NOEVEX512-NOT: #define __EVEX512__ 1

// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512f -mno-evex512 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512NOEVEX512 %s
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512cd -mno-evex512 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512NOEVEX512 %s
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512er -mno-evex512 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512NOEVEX512 %s
// AVX512NOEVEX512: #define __AVX512F__ 1
// AVX512NOEVEX512-NOT: #define __EVEX256__ 1
// AVX512NOEVEX512-NOT: #define __EVEX512__ 1
Expand Down
8 changes: 8 additions & 0 deletions clang/test/Sema/builtins-x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,14 @@ __m128i test_mm_mask_i32gather_epi32(__m128i a, int const *b, __m128i c, __m128i
return __builtin_ia32_gatherd_d(a, b, c, mask, 5); // expected-error {{scale argument must be 1, 2, 4, or 8}}
}

void _mm512_mask_prefetch_i32gather_ps(__m512i index, __mmask16 mask, int const *addr) {
__builtin_ia32_gatherpfdps(mask, index, addr, 5, 1); // expected-error {{scale argument must be 1, 2, 4, or 8}}
}

void _mm512_mask_prefetch_i32gather_ps_2(__m512i index, __mmask16 mask, int const *addr) {
__builtin_ia32_gatherpfdps(mask, index, addr, 1, 1); // expected-error {{argument value 1 is outside the valid range [2, 3]}}
}

__m512i test_mm512_shldi_epi64(__m512i __A, __m512i __B) {
return __builtin_ia32_vpshldq512(__A, __B, 1024); // expected-error {{argument value 1024 is outside the valid range [0, 255]}}
}
Expand Down
3 changes: 0 additions & 3 deletions llvm/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,6 @@ Changes to the Windows Target
Changes to the X86 Backend
--------------------------

- Removed knl/knm specific ISA intrinsics: AVX512PF, AVX512ER, PREFETCHWT1,
while assembly encoding/decoding supports are kept.

Changes to the OCaml bindings
-----------------------------

Expand Down
84 changes: 84 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -3843,6 +3843,58 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
DefaultAttrsIntrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
[IntrNoMem]>;

def int_x86_avx512_rcp28_ps : ClangBuiltin<"__builtin_ia32_rcp28ps_mask">,
DefaultAttrsIntrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_rcp28_pd : ClangBuiltin<"__builtin_ia32_rcp28pd_mask">,
DefaultAttrsIntrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_exp2_ps : ClangBuiltin<"__builtin_ia32_exp2ps_mask">,
DefaultAttrsIntrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_exp2_pd : ClangBuiltin<"__builtin_ia32_exp2pd_mask">,
DefaultAttrsIntrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;

def int_x86_avx512_rcp28_ss : ClangBuiltin<"__builtin_ia32_rcp28ss_round_mask">,
DefaultAttrsIntrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
def int_x86_avx512_rcp28_sd : ClangBuiltin<"__builtin_ia32_rcp28sd_round_mask">,
DefaultAttrsIntrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
def int_x86_avx512_rsqrt28_ps : ClangBuiltin<"__builtin_ia32_rsqrt28ps_mask">,
DefaultAttrsIntrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_rsqrt28_pd : ClangBuiltin<"__builtin_ia32_rsqrt28pd_mask">,
DefaultAttrsIntrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_rsqrt28_ss : ClangBuiltin<"__builtin_ia32_rsqrt28ss_round_mask">,
DefaultAttrsIntrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
def int_x86_avx512_rsqrt28_sd : ClangBuiltin<"__builtin_ia32_rsqrt28sd_round_mask">,
DefaultAttrsIntrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
def int_x86_avx512_psad_bw_512 : ClangBuiltin<"__builtin_ia32_psadbw512">,
DefaultAttrsIntrinsic<[llvm_v8i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
[IntrNoMem, Commutative]>;
Expand Down Expand Up @@ -4125,6 +4177,38 @@ let TargetPrefix = "x86" in {
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
[ImmArg<ArgIndex<4>>]>;

// gather prefetch
// NOTE: These can't be ArgMemOnly because you can put the address completely
// in the index register.
def int_x86_avx512_gatherpf_dpd_512 : ClangBuiltin<"__builtin_ia32_gatherpfdpd">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
def int_x86_avx512_gatherpf_dps_512 : ClangBuiltin<"__builtin_ia32_gatherpfdps">,
Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
def int_x86_avx512_gatherpf_qpd_512 : ClangBuiltin<"__builtin_ia32_gatherpfqpd">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
def int_x86_avx512_gatherpf_qps_512 : ClangBuiltin<"__builtin_ia32_gatherpfqps">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;

// scatter prefetch
// NOTE: These can't be ArgMemOnly because you can put the address completely
// in the index register.
def int_x86_avx512_scatterpf_dpd_512 : ClangBuiltin<"__builtin_ia32_scatterpfdpd">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
def int_x86_avx512_scatterpf_dps_512 : ClangBuiltin<"__builtin_ia32_scatterpfdps">,
Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
def int_x86_avx512_scatterpf_qpd_512 : ClangBuiltin<"__builtin_ia32_scatterpfqpd">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
def int_x86_avx512_scatterpf_qps_512 : ClangBuiltin<"__builtin_ia32_scatterpfqps">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
}

// AVX512 gather/scatter intrinsics that use vXi1 masks.
Expand Down
9 changes: 6 additions & 3 deletions llvm/include/llvm/TargetParser/X86TargetParser.def
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,8 @@ X86_FEATURE_COMPAT(AVX512VL, "avx512vl", 20)
X86_FEATURE_COMPAT(AVX512BW, "avx512bw", 21)
X86_FEATURE_COMPAT(AVX512DQ, "avx512dq", 22)
X86_FEATURE_COMPAT(AVX512CD, "avx512cd", 23)
X86_FEATURE (NF, "nf")
X86_FEATURE (CF, "cf")
X86_FEATURE_COMPAT(AVX512ER, "avx512er", 24)
X86_FEATURE_COMPAT(AVX512PF, "avx512pf", 25)
X86_FEATURE_COMPAT(AVX512VBMI, "avx512vbmi", 26)
X86_FEATURE_COMPAT(AVX512IFMA, "avx512ifma", 27)
X86_FEATURE_COMPAT(AVX5124VNNIW, "avx5124vnniw", 28)
Expand Down Expand Up @@ -202,7 +202,7 @@ X86_FEATURE_COMPAT(MWAITX, "mwaitx", 0)
X86_FEATURE (X87, "x87")
X86_FEATURE_COMPAT(PCONFIG, "pconfig", 0)
X86_FEATURE_COMPAT(PKU, "pku", 0)
X86_FEATURE (EVEX512, "evex512")
X86_FEATURE_COMPAT(PREFETCHWT1, "prefetchwt1", 0)
X86_FEATURE_COMPAT(PRFCHW, "prfchw", 0)
X86_FEATURE_COMPAT(PTWRITE, "ptwrite", 0)
X86_FEATURE_COMPAT(RDPID, "rdpid", 0)
Expand Down Expand Up @@ -252,6 +252,9 @@ X86_FEATURE (EGPR, "egpr")
X86_FEATURE_COMPAT(USERMSR, "usermsr", 0)
X86_FEATURE_COMPAT(AVX10_1, "avx10.1-256", 0)
X86_FEATURE_COMPAT(AVX10_1_512, "avx10.1-512", 0)
X86_FEATURE (EVEX512, "evex512")
X86_FEATURE (NF, "nf")
X86_FEATURE (CF, "cf")
// These features aren't really CPU features, but the frontend can set them.
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches")
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/X86/X86.td
Original file line number Diff line number Diff line change
Expand Up @@ -124,15 +124,24 @@ def FeatureEVEX512 : SubtargetFeature<"evex512", "HasEVEX512", "true",
def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512",
"Enable AVX-512 instructions",
[FeatureAVX2, FeatureFMA, FeatureF16C]>;
def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true",
"Enable AVX-512 Exponential and Reciprocal Instructions",
[FeatureAVX512]>;
def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
"Enable AVX-512 Conflict Detection Instructions",
[FeatureAVX512]>;
def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
"true", "Enable AVX-512 Population Count Instructions",
[FeatureAVX512]>;
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
"Enable AVX-512 PreFetch Instructions",
[FeatureAVX512]>;
def FeaturePREFETCHI : SubtargetFeature<"prefetchi", "HasPREFETCHI",
"true",
"Prefetch instruction with T0 or T1 Hint">;
def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
"true",
"Prefetch with Intent to Write and T1 Hint">;
def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
"Enable AVX-512 Doubleword and Quadword Instructions",
[FeatureAVX512]>;
Expand Down Expand Up @@ -1303,7 +1312,10 @@ def ProcessorFeatures {
FeatureFSGSBase,
FeatureAVX512,
FeatureEVEX512,
FeatureERI,
FeatureCDI,
FeaturePFI,
FeaturePREFETCHWT1,
FeatureADX,
FeatureRDSEED,
FeatureMOVBE,
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33849,8 +33849,18 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(ADDSUB)
NODE_NAME_CASE(RCP14)
NODE_NAME_CASE(RCP14S)
NODE_NAME_CASE(RCP28)
NODE_NAME_CASE(RCP28_SAE)
NODE_NAME_CASE(RCP28S)
NODE_NAME_CASE(RCP28S_SAE)
NODE_NAME_CASE(EXP2)
NODE_NAME_CASE(EXP2_SAE)
NODE_NAME_CASE(RSQRT14)
NODE_NAME_CASE(RSQRT14S)
NODE_NAME_CASE(RSQRT28)
NODE_NAME_CASE(RSQRT28_SAE)
NODE_NAME_CASE(RSQRT28S)
NODE_NAME_CASE(RSQRT28S_SAE)
NODE_NAME_CASE(FADD_RND)
NODE_NAME_CASE(FADDS)
NODE_NAME_CASE(FADDS_RND)
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,18 @@ namespace llvm {
// Test if in transactional execution.
XTEST,

// ERI instructions.
RSQRT28,
RSQRT28_SAE,
RSQRT28S,
RSQRT28S_SAE,
RCP28,
RCP28_SAE,
RCP28S,
RCP28S_SAE,
EXP2,
EXP2_SAE,

// Conversions between float and half-float.
CVTPS2PH,
CVTPS2PH_SAE,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86Instr3DNow.td
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
TB, Requires<[HasPrefetchW]>;

def PREFETCHWT1 : I<0x0D, MRM2m, (outs), (ins i8mem:$addr), "prefetchwt1\t$addr",
[]>, TB;
[(prefetch addr:$addr, (i32 1), (i32 PrefetchWT1Level), (i32 1))]>,
TB, Requires<[HasPREFETCHWT1]>;
}

// "3DNowA" instructions
Expand Down
91 changes: 16 additions & 75 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -9265,37 +9265,6 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
}
}

multiclass avx512_fp28_s_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86FoldableSchedWrite sched> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR], hasSideEffects = 0 in {
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(null_frag)>, Sched<[sched]>, SIMD_EXC;
defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(null_frag)>, EVEX_B, Sched<[sched]>;
let mayLoad = 1 in
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(null_frag)>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
}
}

multiclass avx512_eri_s_ass<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched> {
defm SSZ : avx512_fp28_s_ass<opc, OpcodeStr#"ss", f32x_info, sched>,
EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV;
defm SDZ : avx512_fp28_s_ass<opc, OpcodeStr#"sd", f64x_info, sched>,
EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV;
}

defm VRCP28 : avx512_eri_s_ass<0xCB, "vrcp28", SchedWriteFRcp.Scl>;
defm VRSQRT28 : avx512_eri_s_ass<0xCD, "vrsqrt28", SchedWriteFRsqrt.Scl>;

multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
Expand All @@ -9311,6 +9280,13 @@ multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV;
}

let Predicates = [HasERI] in {
defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
SchedWriteFRcp.Scl>;
defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
SchedWriteFRsqrt.Scl>;
}

defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
SchedWriteFRnd.Scl>,
avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
Expand Down Expand Up @@ -9349,49 +9325,6 @@ multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
EVEX_B, Sched<[sched]>;
}

multiclass avx512_fp28_p_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86FoldableSchedWrite sched> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1,
hasSideEffects = 0 in {
defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(null_frag)>, Sched<[sched]>;
let mayLoad = 1 in
defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
(null_frag)>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
let mayLoad = 1 in
defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
(null_frag)>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_fp28_p_sae_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86FoldableSchedWrite sched> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR], hasSideEffects = 0 in
defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr,
"{sae}, $src", "$src, {sae}",
(null_frag)>, Sched<[sched]>, EVEX_B;
}

multiclass avx512_eri_ass<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
defm PSZ : avx512_fp28_p_ass<opc, OpcodeStr#"ps", v16f32_info, sched.ZMM>,
avx512_fp28_p_sae_ass<opc, OpcodeStr#"ps", v16f32_info, sched.ZMM>,
T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp28_p_ass<opc, OpcodeStr#"pd", v8f64_info, sched.ZMM>,
avx512_fp28_p_sae_ass<opc, OpcodeStr#"pd", v8f64_info, sched.ZMM>,
T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
}

defm VRSQRT28 : avx512_eri_ass<0xCC, "vrsqrt28", SchedWriteFRsqrt>, EVEX;
defm VRCP28 : avx512_eri_ass<0xCA, "vrcp28", SchedWriteFRcp>, EVEX;
defm VEXP2 : avx512_eri_ass<0xC8, "vexp2", SchedWriteFAdd>, EVEX;

multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeSAE, X86SchedWriteWidths sched> {
defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
Expand Down Expand Up @@ -9434,6 +9367,14 @@ multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
}
}
let Predicates = [HasERI] in {
defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
SchedWriteFRsqrt>, EVEX;
defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
SchedWriteFRcp>, EVEX;
defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
SchedWriteFAdd>, EVEX;
}
defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
SchedWriteFRnd>,
avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
Expand Down Expand Up @@ -10367,7 +10308,7 @@ defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter",
// prefetch
multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
RegisterClass KRC, X86MemOperand memop> {
let mayLoad = 1, mayStore = 1 in
let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
!strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
EVEX, EVEX_K, Sched<[WriteLoad]>;
Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Target/X86/X86InstrFragments.td
Original file line number Diff line number Diff line change
Expand Up @@ -607,8 +607,14 @@ def X86any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
[(X86strict_fcmp node:$lhs, node:$rhs),
(X86fcmp node:$lhs, node:$rhs)]>;

// PREFETCHWT1 is supported we want to use it for everything but T0.
def PrefetchWLevel : PatFrag<(ops), (i32 timm), [{
return N->getSExtValue() <= 3;
return N->getSExtValue() == 3 || !Subtarget->hasPREFETCHWT1();
}]>;

// Use PREFETCHWT1 for NTA, T2, T1.
def PrefetchWT1Level : TImmLeaf<i32, [{
return Imm < 3;
}]>;

def X86lock_add_nocf : PatFrag<(ops node:$lhs, node:$rhs),
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
Original file line number Diff line number Diff line change
Expand Up @@ -600,8 +600,19 @@ def X86Vpdpbusds : SDNode<"X86ISD::VPDPBUSDS", SDTVnni>;
def X86Vpdpwssd : SDNode<"X86ISD::VPDPWSSD", SDTVnni>;
def X86Vpdpwssds : SDNode<"X86ISD::VPDPWSSDS", SDTVnni>;

def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", SDTFPUnaryOp>;
def X86rsqrt28SAE: SDNode<"X86ISD::RSQRT28_SAE", SDTFPUnaryOp>;
def X86rcp28 : SDNode<"X86ISD::RCP28", SDTFPUnaryOp>;
def X86rcp28SAE : SDNode<"X86ISD::RCP28_SAE", SDTFPUnaryOp>;
def X86exp2 : SDNode<"X86ISD::EXP2", SDTFPUnaryOp>;
def X86exp2SAE : SDNode<"X86ISD::EXP2_SAE", SDTFPUnaryOp>;

def X86rsqrt14s : SDNode<"X86ISD::RSQRT14S", SDTFPBinOp>;
def X86rcp14s : SDNode<"X86ISD::RCP14S", SDTFPBinOp>;
def X86rsqrt28s : SDNode<"X86ISD::RSQRT28S", SDTFPBinOp>;
def X86rsqrt28SAEs : SDNode<"X86ISD::RSQRT28S_SAE", SDTFPBinOp>;
def X86rcp28s : SDNode<"X86ISD::RCP28S", SDTFPBinOp>;
def X86rcp28SAEs : SDNode<"X86ISD::RCP28S_SAE", SDTFPBinOp>;
def X86Ranges : SDNode<"X86ISD::VRANGES", SDTFPBinOpImm>;
def X86RndScales : SDNode<"X86ISD::VRNDSCALES", SDTFPBinOpImm>;
def X86Reduces : SDNode<"X86ISD::VREDUCES", SDTFPBinOpImm>;
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/X86InstrPredicates.td
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">;
def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">;
def HasCDI : Predicate<"Subtarget->hasCDI()">;
def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">;
def HasPFI : Predicate<"Subtarget->hasPFI()">;
def HasERI : Predicate<"Subtarget->hasERI()">;
def HasDQI : Predicate<"Subtarget->hasDQI()">;
def NoDQI : Predicate<"!Subtarget->hasDQI()">;
def HasBWI : Predicate<"Subtarget->hasBWI()">;
Expand Down Expand Up @@ -145,6 +147,7 @@ def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">;
def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
def HasPREFETCHI : Predicate<"Subtarget->hasPREFETCHI()">;
def HasPrefetchW : Predicate<"Subtarget->hasPrefetchW()">;
def HasPREFETCHWT1 : Predicate<"Subtarget->hasPREFETCHWT1()">;
def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">;
def HasLAHFSAHF64 : Predicate<"Subtarget->hasLAHFSAHF64()">;
def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">;
Expand Down
27 changes: 27 additions & 0 deletions llvm/lib/Target/X86/X86IntrinsicsInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,15 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, 0, 0),
X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, 0, 0),

X86_INTRINSIC_DATA(avx512_gatherpf_dpd_512, PREFETCH,
X86::VGATHERPF0DPDm, X86::VGATHERPF1DPDm),
X86_INTRINSIC_DATA(avx512_gatherpf_dps_512, PREFETCH,
X86::VGATHERPF0DPSm, X86::VGATHERPF1DPSm),
X86_INTRINSIC_DATA(avx512_gatherpf_qpd_512, PREFETCH,
X86::VGATHERPF0QPDm, X86::VGATHERPF1QPDm),
X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),

X86_INTRINSIC_DATA(avx512_mask_gather_dpd_512, GATHER, 0, 0),
X86_INTRINSIC_DATA(avx512_mask_gather_dpi_512, GATHER, 0, 0),
X86_INTRINSIC_DATA(avx512_mask_gather_dpq_512, GATHER, 0, 0),
Expand Down Expand Up @@ -283,6 +292,14 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_scatterdiv4_si, SCATTER, 0, 0),
X86_INTRINSIC_DATA(avx512_scatterdiv8_sf, SCATTER, 0, 0),
X86_INTRINSIC_DATA(avx512_scatterdiv8_si, SCATTER, 0, 0),
X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH, X86::VSCATTERPF0DPDm,
X86::VSCATTERPF1DPDm),
X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH, X86::VSCATTERPF0DPSm,
X86::VSCATTERPF1DPSm),
X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH, X86::VSCATTERPF0QPDm,
X86::VSCATTERPF1QPDm),
X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH, X86::VSCATTERPF0QPSm,
X86::VSCATTERPF1QPSm),
X86_INTRINSIC_DATA(avx512_scattersiv2_df, SCATTER, 0, 0),
X86_INTRINSIC_DATA(avx512_scattersiv2_di, SCATTER, 0, 0),
X86_INTRINSIC_DATA(avx512_scattersiv4_df, SCATTER, 0, 0),
Expand Down Expand Up @@ -437,6 +454,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_dbpsadbw_512, INTR_TYPE_3OP_IMM8, X86ISD::DBPSADBW, 0),
X86_INTRINSIC_DATA(avx512_div_pd_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
X86_INTRINSIC_DATA(avx512_div_ps_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_SAE, X86ISD::EXP2, X86ISD::EXP2_SAE),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_SAE, X86ISD::EXP2, X86ISD::EXP2_SAE),
X86_INTRINSIC_DATA(avx512_fpclass_pd_128, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
X86_INTRINSIC_DATA(avx512_fpclass_pd_256, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
X86_INTRINSIC_DATA(avx512_fpclass_pd_512, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
Expand Down Expand Up @@ -889,6 +908,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0),
X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0),
X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_SAE, X86ISD::RCP28, X86ISD::RCP28_SAE),
X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_SAE, X86ISD::RCP28, X86ISD::RCP28_SAE),
X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_SAE, X86ISD::RCP28S, X86ISD::RCP28S_SAE),
X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_SAE, X86ISD::RCP28S, X86ISD::RCP28S_SAE),
X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
Expand All @@ -897,6 +920,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),
X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_SAE,X86ISD::RSQRT28, X86ISD::RSQRT28_SAE),
X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_SAE,X86ISD::RSQRT28, X86ISD::RSQRT28_SAE),
X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_SAE,X86ISD::RSQRT28S, X86ISD::RSQRT28S_SAE),
X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_SAE,X86ISD::RSQRT28S, X86ISD::RSQRT28S_SAE),
X86_INTRINSIC_DATA(avx512_sitofp_round, INTR_TYPE_1OP, ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND),
X86_INTRINSIC_DATA(avx512_sqrt_pd_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND),
X86_INTRINSIC_DATA(avx512_sqrt_ps_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND),
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Target/X86/X86Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,15 +213,17 @@ class X86Subtarget final : public X86GenSubtargetInfo {
bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
bool hasPrefetchW() const {
// The PREFETCHW instruction was added with 3DNow but later CPUs gave it
// its own CPUID bit as part of deprecating 3DNow. We assume the
// its own CPUID bit as part of deprecating 3DNow. Intel eventually added
// it and KNL has another that prefetches to L2 cache. We assume the
// L1 version exists if the L2 version does.
return hasThreeDNow() || hasPRFCHW();
return hasThreeDNow() || hasPRFCHW() || hasPREFETCHWT1();
}
bool hasSSEPrefetch() const {
// We implicitly enable these when we have a write prefix supporting cache
// level OR if we have prfchw, but don't already have a read prefetch from
// 3dnow.
return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHI();
return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1() ||
hasPREFETCHI();
}
bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); }
// These are generic getters that OR together all of the thunk types
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/TargetParser/Host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1005,6 +1005,8 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
CPU = "cascadelake";
} else if (testFeature(X86::FEATURE_AVX512VL)) {
CPU = "skylake-avx512";
} else if (testFeature(X86::FEATURE_AVX512ER)) {
CPU = "knl";
} else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
if (testFeature(X86::FEATURE_SHA))
CPU = "goldmont";
Expand Down Expand Up @@ -1298,6 +1300,10 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(X86::FEATURE_AVX512IFMA);
if (HasLeaf7 && ((EBX >> 23) & 1))
setFeature(X86::FEATURE_CLFLUSHOPT);
if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX512PF);
if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX512ER);
if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX512CD);
if (HasLeaf7 && ((EBX >> 29) & 1))
Expand Down Expand Up @@ -1804,11 +1810,14 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1);
Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;

Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1);
Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1);
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/TargetParser/X86TargetParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ constexpr FeatureBitset FeaturesBroadwell =

// Intel Knights Landing and Knights Mill
// Knights Landing has feature parity with Broadwell.
constexpr FeatureBitset FeaturesKNL = FeaturesBroadwell | FeatureAES |
FeatureAVX512F | FeatureEVEX512 |
FeatureAVX512CD;
constexpr FeatureBitset FeaturesKNL =
FeaturesBroadwell | FeatureAES | FeatureAVX512F | FeatureEVEX512 |
FeatureAVX512CD | FeatureAVX512ER | FeatureAVX512PF | FeaturePREFETCHWT1;
constexpr FeatureBitset FeaturesKNM = FeaturesKNL | FeatureAVX512VPOPCNTDQ;

// Intel Skylake processors.
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,5 @@ entry:
; Function Attrs: nounwind readnone
declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32) #1

attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+evex512,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-vzeroupper" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
24 changes: 24 additions & 0 deletions llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,30 @@ define void @gather_qps(<8 x i64> %ind, <8 x float> %src, ptr %base, ptr %stbuf)
ret void
}

declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, ptr , i32, i32);
declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, ptr , i32, i32);
define void @prefetch(<8 x i64> %ind, ptr %base) {
; CHECK-LABEL: prefetch:
; CHECK: ## %bb.0:
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1}
; CHECK-NEXT: kxorw %k0, %k0, %k1
; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1}
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1}
; CHECK-NEXT: movb $120, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, ptr %base, i32 4, i32 3)
call void @llvm.x86.avx512.gatherpf.qps.512(i8 0, <8 x i64> %ind, ptr %base, i32 4, i32 2)
call void @llvm.x86.avx512.scatterpf.qps.512(i8 1, <8 x i64> %ind, ptr %base, i32 2, i32 3)
call void @llvm.x86.avx512.scatterpf.qps.512(i8 120, <8 x i64> %ind, ptr %base, i32 2, i32 2)
ret void
}

declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, ptr, <2 x i64>, i8, i32)

define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, ptr %x1, <2 x i64> %x2, i8 %x3) {
Expand Down
24 changes: 24 additions & 0 deletions llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,30 @@ define dso_local void @gather_qps(<8 x i64> %ind, <8 x float> %src, ptr %base, p
ret void
}

declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, ptr , i32, i32);
declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, ptr , i32, i32);
define dso_local void @prefetch(<8 x i64> %ind, ptr %base) {
; CHECK-LABEL: prefetch:
; CHECK: # %bb.0:
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1}
; CHECK-NEXT: kxorw %k0, %k0, %k1
; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1}
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1}
; CHECK-NEXT: movb $120, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, ptr %base, i32 4, i32 3)
call void @llvm.x86.avx512.gatherpf.qps.512(i8 0, <8 x i64> %ind, ptr %base, i32 4, i32 2)
call void @llvm.x86.avx512.scatterpf.qps.512(i8 1, <8 x i64> %ind, ptr %base, i32 2, i32 3)
call void @llvm.x86.avx512.scatterpf.qps.512(i8 120, <8 x i64> %ind, ptr %base, i32 2, i32 2)
ret void
}

define <2 x double> @test_int_x86_avx512_mask_gather3div2_df(<2 x double> %x0, ptr %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_gather3div2_df:
; CHECK: # %bb.0:
Expand Down
Loading