4 changes: 4 additions & 0 deletions clang/lib/CodeGen/CGHLSLRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,14 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot)
GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddI8Packed, dot4add_i8packed)
GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddU8Packed, dot4add_u8packed)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveCountBits, wave_active_countbits)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh)
GENERATE_HLSL_INTRINSIC_FUNCTION(NClamp, nclamp)
GENERATE_HLSL_INTRINSIC_FUNCTION(SClamp, sclamp)
GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp)

GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding, handle_fromBinding)

Expand Down
8 changes: 6 additions & 2 deletions clang/lib/Driver/XRayArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,12 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
false)) {
XRayShared = true;

// DSO instrumentation is currently limited to x86_64
if (Triple.getArch() != llvm::Triple::x86_64) {
// Certain targets support DSO instrumentation
switch (Triple.getArch()) {
case llvm::Triple::aarch64:
case llvm::Triple::x86_64:
break;
default:
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< "-fxray-shared" << Triple.str();
}
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Headers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ set(x86_files
adcintrin.h
adxintrin.h
ammintrin.h
amxavx512intrin.h
amxcomplexintrin.h
amxfp16intrin.h
amxfp8intrin.h
Expand Down
382 changes: 382 additions & 0 deletions clang/lib/Headers/amxavx512intrin.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,382 @@
/*===--------------------- amxavx512intrin.h - AMXAVX512 --------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===------------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <amxavx512intrin.h> directly; include <immintrin.h> instead."
#endif // __IMMINTRIN_H

#ifndef __AMX_AVX512INTRIN_H
#define __AMX_AVX512INTRIN_H
#ifdef __x86_64__

#define __DEFAULT_FN_ATTRS_AVX512 \
__attribute__((__always_inline__, __nodebug__, \
__target__("amx-avx512,avx10.2-512")))

/// Moves a row from a tile register to a zmm destination register, converting
/// the int32 source elements to fp32. The row of the tile is selected by a
/// 32b GPR.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m512i _tile_cvtrowd2ps(__tile tsrc, unsigned int row);
/// \endcode
///
/// \code{.operation}
/// VL := 512
/// VL_bytes := VL >> 3
/// row_index := row & 0xffff
/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
/// FOR i := 0 TO (VL_bytes / 4) - 1
/// IF i + row_chunk / 4 >= tsrc.colsb / 4
/// dst.dword[i] := 0
/// ELSE
/// dst.f32[i] := CONVERT_INT32_TO_FP32(tsrc.row[row_index].dword[row_chunk/4+i], RNE)
/// FI
/// ENDFOR
/// dst[MAX_VL-1:VL] := 0
/// zero_tileconfig_start()
/// \endcode
///
/// This intrinsic corresponds to the \c TCVTROWD2PS instruction.
///
/// \param tsrc
/// The source tile. Max size is 1024 Bytes.
/// \param row
/// The row of the source tile
#define _tile_cvtrowd2ps(tsrc, row) __builtin_ia32_tcvtrowd2ps(tsrc, row)

/// Moves a row from a tile register to a zmm destination register, converting
/// the fp32 source elements to bf16. It places the resulting bf16 elements
/// in the high 16 bits within each dword. The row of the tile is selected
/// by a 32b GPR.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m512i _tile_cvtrowps2pbf16h(__tile tsrc, unsigned int row);
/// \endcode
///
/// \code{.operation}
/// VL := 512
/// VL_bytes := VL >> 3
/// row_index := row & 0xffff
/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
/// FOR i := 0 TO (VL_bytes / 4) - 1
/// IF i + row_chunk / 4 >= tsrc.colsb / 4
/// dst.dword[i] := 0
/// ELSE
/// dst.word[2*i+0] := 0
/// dst.bf16[2*i+1] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
/// FI
/// ENDFOR
/// dst[MAX_VL-1:VL] := 0
/// zero_tileconfig_start()
/// \endcode
///
/// This intrinsic corresponds to the \c TCVTROWPS2PBF16H instruction.
///
/// \param tsrc
/// The source tile. Max size is 1024 Bytes.
/// \param row
/// The the row of the source tile.
#define _tile_cvtrowps2pbf16h(tsrc, row) \
__builtin_ia32_tcvtrowps2pbf16h(tsrc, row)

/// Moves a row from a tile register to a zmm destination register, converting
/// the fp32 source elements to bf16. It places the resulting bf16 elements
/// in the low 16 bits within each dword. The row of the tile is selected
/// by a 32b GPR.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m512i _tile_cvtrowps2pbf16l(__tile tsrc, unsigned int row);
/// \endcode
///
/// \code{.operation}
/// VL := 512
/// VL_bytes := VL >> 3
/// row_index := row & 0xffff
/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
/// FOR i := 0 TO (VL_bytes / 4) - 1
/// IF i + row_chunk / 4 >= tsrc.colsb / 4
/// dst.dword[i] := 0
/// ELSE
/// dst.word[2*i+1] := 0
/// dst.bf16[2*i+0] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
/// FI
/// ENDFOR
/// dst[MAX_VL-1:VL] := 0
/// zero_tileconfig_start()
/// \endcode
///
/// This intrinsic corresponds to the \c TCVTROWPS2PBF16L instruction.
///
/// \param tsrc
/// The source tile. Max size is 1024 Bytes.
/// \param row
/// The the row of the source tile.
#define _tile_cvtrowps2pbf16l(tsrc, row) \
__builtin_ia32_tcvtrowps2pbf16l(tsrc, row)

/// Moves a row from a tile register to a zmm destination register, converting
/// the fp32 source elements to fp16. It places the resulting fp16 elements
/// in the high 16 bits within each dword. The row of the tile is selected
/// by a 32b GPR.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m512i _tile_cvtrowps2phh(__tile tsrc, unsigned int row);
/// \endcode
///
/// \code{.operation}
/// VL := 512
/// VL_bytes := VL >> 3
/// row_index := row & 0xffff
/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
/// FOR i := 0 TO (VL_bytes / 4) - 1
/// IF i + row_chunk / 4 >= tsrc.colsb / 4
/// dst.dword[i] := 0
/// ELSE
/// dst.word[2*i+0] := 0
/// dst.fp16[2*i+1] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
/// FI
/// ENDFOR
/// dst[MAX_VL-1:VL] := 0
/// zero_tileconfig_start()
/// \endcode
///
/// This intrinsic corresponds to the \c TCVTROWPS2PHH instruction.
///
/// \param tsrc
/// The source tile. Max size is 1024 Bytes.
/// \param row
/// The the row of the source tile.
#define _tile_cvtrowps2phh(tsrc, row) __builtin_ia32_tcvtrowps2phh(tsrc, row)

/// Moves a row from a tile register to a zmm destination register, converting
/// the fp32 source elements to fp16. It places the resulting fp16 elements
/// in the low 16 bits within each dword. The row of the tile is selected
/// by a 32b GPR.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m512i _tile_cvtrowps2phl(__tile tsrc, unsigned int row);
/// \endcode
///
/// \code{.operation}
/// VL := 512
/// VL_bytes := VL >> 3
/// row_index := row & 0xffff
/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
/// FOR i := 0 TO (VL_bytes / 4) - 1
/// IF i + row_chunk / 4 >= tsrc.colsb / 4
/// dst.dword[i] := 0
/// ELSE
/// dst.word[2*i+1] := 0
/// dst.fp16[2*i+0] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
/// FI
/// ENDFOR
/// dst[MAX_VL-1:VL] := 0
/// zero_tileconfig_start()
/// \endcode
///
/// This intrinsic corresponds to the \c TCVTROWPS2PHL instruction.
///
/// \param tsrc
/// The source tile. Max size is 1024 Bytes.
/// \param row
/// The the row of the source tile.
#define _tile_cvtrowps2phl(tsrc, row) __builtin_ia32_tcvtrowps2phl(tsrc, row)

/// Move one row of a tile data to a v16f32 data.
/// The row of the tile is selected by a 32b GPR.
///
/// \headerfile <immintrin.h>
///
/// \code
/// __m512 _tile_movrow(__tile a, unsigned b);
/// \endcode
///
/// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction.
///
/// \param a
/// The 1st source tile. Max size is 1024 Bytes.
/// \param b
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v16f32 data. Size is 64 Bytes.
///
/// \code{.operation}
/// VL := 512
/// VL_bytes := VL>>3
/// row_index := b&0xffff
/// row_chunk := ((b>>16)&0xffff) * VL_bytes
/// FOR i := 0 TO (VL_bytes-1)
/// IF (row_chunk + i >= a.colsb)
/// dst.byte[i] := 0
/// ELSE
/// dst.byte[i] := a.row[row_index].byte[row_chunk+i]
/// ENDFOR
/// \endcode
#define _tile_movrow(a, b) __builtin_ia32_tilemovrow(a, b)

/// This is internal intrinsic. C/C++ user should avoid calling it directly.

static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal(
unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowd2ps_internal(m, n, src, u);
}

static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
_tile_cvtrowps2pbf16h_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2pbf16h_internal(m, n, src, u);
}

static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
_tile_cvtrowps2pbf16l_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2pbf16l_internal(m, n, src, u);
}

static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal(
unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2phh_internal(m, n, src, u);
}

static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phl_internal(
unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2phl_internal(m, n, src, u);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_AVX512 _tile_movrow_internal(
unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
return (__m512i)__builtin_ia32_tilemovrow_internal(m, n, src, u);
}

/// Move a row from a tile (src0) to a v16f32 dst, converting the int32 source
/// elements to fp32. No SIMD exceptions are generated. Rounding is done as if
/// MXCSR.RC=RNE. Embedded rounding is not supported.
/// The row and chunk elements of tile is fetched from 32bit src1.
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> TCVTROWD2PS </c> instruction.
///
/// \param src0
/// The 1st source tile. Max size is 1024 Bytes.
/// \param src1
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v16f32 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) {
return _tile_cvtrowd2ps_internal(src0.row, src0.col, src0.tile, src1);
}

/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source
/// elements to bf16 at high 16-bits of each dword.
/// The row and chunk elements of tile is fetched from 32bit src1.
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16H </c> instruction.
///
/// \param src0
/// The 1st source tile. Max size is 1024 Bytes.
/// \param src1
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v32bf16 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2pbf16h_internal(src0.row, src0.col, src0.tile, src1);
}

/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source
/// elements to bf16 at low 16-bits of each dword.
/// The row and chunk elements of tile is fetched from 32bit src1.
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16L </c> instruction.
///
/// \param src0
/// The 1st source tile. Max size is 1024 Bytes.
/// \param src1
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v32bf16 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
static __m512bh __tile_cvtrowps2pbf16l(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2pbf16l_internal(src0.row, src0.col, src0.tile, src1);
}

/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source
/// elements to fp16 at high 16-bits of each dword.
/// The row and chunk elements of tile is fetched from 32bit src1.
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> TCVTROWPS2PHH </c> instruction.
///
/// \param src0
/// The 1st source tile. Max size is 1024 Bytes.
/// \param src1
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v32fp16 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
static __m512h __tile_cvtrowps2phh(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2phh_internal(src0.row, src0.col, src0.tile, src1);
}

/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source
/// elements to fp16 at low 16-bits of each dword.
/// The row and chunk elements of tile is fetched from 32bit src1.
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> TCVTROWPS2PHL </c> instruction.
///
/// \param src0
/// The 1st source tile. Max size is 1024 Bytes.
/// \param src1
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v32fp16 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
static __m512h __tile_cvtrowps2phl(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2phl_internal(src0.row, src0.col, src0.tile, src1);
}

/// Move one row of a tile data to a v16f32 data.
/// The row of the tile is selected by a 32b GPR.
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction.
///
/// \param src0
/// The 1st source tile. Max size is 1024 Bytes.
/// \param src1
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v16i32 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
static __m512i __tile_movrow(__tile1024i src0, unsigned src1) {
return (__m512i)_tile_movrow_internal(src0.row, src0.col, src0.tile, src1);
}

#endif // __x86_64__
#endif // __AMX_AVX512INTRIN_H
5 changes: 3 additions & 2 deletions clang/lib/Headers/emmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -4626,8 +4626,9 @@ _mm_movepi64_pi64(__m128i __a) {
/// A 64-bit value.
/// \returns A 128-bit integer vector. The lower 64 bits contain the value from
/// the operand. The upper 64 bits are assigned zeros.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a) {
return __extension__(__m128i)(__v2di){(long long)__a, 0};
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_movpi64_epi64(__m64 __a) {
return __builtin_shufflevector((__v1di)__a, _mm_setzero_si64(), 0, 1);
}

/// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Headers/immintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,10 @@ _storebe_i64(void * __P, long long __D) {
#include <amxtransposeintrin.h>
#endif

#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_AVX512__)
#include <amxavx512intrin.h>
#endif

#if !defined(__SCE__) || __has_feature(modules) || \
defined(__AVX512VP2INTERSECT__)
#include <avx512vp2intersectintrin.h>
Expand Down
78 changes: 37 additions & 41 deletions clang/lib/Headers/mmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ typedef char __v16qi __attribute__((__vector_size__(16)));
__min_vector_width__(128)))
#endif

#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 constexpr
#else
#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2
#endif

#define __trunc64(x) \
(__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
#define __anyext128(x) \
Expand Down Expand Up @@ -1332,10 +1338,9 @@ _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
/// This intrinsic corresponds to the <c> PXOR </c> instruction.
///
/// \returns An initialized 64-bit integer vector with all elements set to zero.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_setzero_si64(void)
{
return __extension__ (__m64){ 0LL };
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_setzero_si64(void) {
return __extension__(__m64){0LL};
}

/// Constructs a 64-bit integer vector initialized with the specified
Expand All @@ -1353,10 +1358,9 @@ _mm_setzero_si64(void)
/// A 32-bit integer value used to initialize the lower 32 bits of the
/// result.
/// \returns An initialized 64-bit integer vector.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_set_pi32(int __i1, int __i0)
{
return __extension__ (__m64)(__v2si){__i0, __i1};
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_set_pi32(int __i1, int __i0) {
return __extension__(__m64)(__v2si){__i0, __i1};
}

/// Constructs a 64-bit integer vector initialized with the specified
Expand All @@ -1376,10 +1380,9 @@ _mm_set_pi32(int __i1, int __i0)
/// \param __s0
/// A 16-bit integer value used to initialize bits [15:0] of the result.
/// \returns An initialized 64-bit integer vector.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
{
return __extension__ (__m64)(__v4hi){__s0, __s1, __s2, __s3};
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_set_pi16(short __s3, short __s2, short __s1, short __s0) {
return __extension__(__m64)(__v4hi){__s0, __s1, __s2, __s3};
}

/// Constructs a 64-bit integer vector initialized with the specified
Expand Down Expand Up @@ -1407,12 +1410,11 @@ _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
/// \param __b0
/// An 8-bit integer value used to initialize bits [7:0] of the result.
/// \returns An initialized 64-bit integer vector.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
char __b1, char __b0)
{
return __extension__ (__m64)(__v8qi){__b0, __b1, __b2, __b3,
__b4, __b5, __b6, __b7};
char __b1, char __b0) {
return __extension__(__m64)(__v8qi){__b0, __b1, __b2, __b3,
__b4, __b5, __b6, __b7};
}

/// Constructs a 64-bit integer vector of [2 x i32], with each of the
Expand All @@ -1428,10 +1430,9 @@ _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
/// A 32-bit integer value used to initialize each vector element of the
/// result.
/// \returns An initialized 64-bit integer vector of [2 x i32].
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_set1_pi32(int __i)
{
return _mm_set_pi32(__i, __i);
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_set1_pi32(int __i) {
return _mm_set_pi32(__i, __i);
}

/// Constructs a 64-bit integer vector of [4 x i16], with each of the
Expand All @@ -1447,10 +1448,9 @@ _mm_set1_pi32(int __i)
/// A 16-bit integer value used to initialize each vector element of the
/// result.
/// \returns An initialized 64-bit integer vector of [4 x i16].
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_set1_pi16(short __w)
{
return _mm_set_pi16(__w, __w, __w, __w);
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_set1_pi16(short __w) {
return _mm_set_pi16(__w, __w, __w, __w);
}

/// Constructs a 64-bit integer vector of [8 x i8], with each of the
Expand All @@ -1465,10 +1465,9 @@ _mm_set1_pi16(short __w)
/// An 8-bit integer value used to initialize each vector element of the
/// result.
/// \returns An initialized 64-bit integer vector of [8 x i8].
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_set1_pi8(char __b)
{
return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_set1_pi8(char __b) {
return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
}

/// Constructs a 64-bit integer vector, initialized in reverse order with
Expand All @@ -1486,10 +1485,9 @@ _mm_set1_pi8(char __b)
/// A 32-bit integer value used to initialize the upper 32 bits of the
/// result.
/// \returns An initialized 64-bit integer vector.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_setr_pi32(int __i0, int __i1)
{
return _mm_set_pi32(__i1, __i0);
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_setr_pi32(int __i0, int __i1) {
return _mm_set_pi32(__i1, __i0);
}

/// Constructs a 64-bit integer vector, initialized in reverse order with
Expand All @@ -1509,10 +1507,9 @@ _mm_setr_pi32(int __i0, int __i1)
/// \param __w3
/// A 16-bit integer value used to initialize bits [63:48] of the result.
/// \returns An initialized 64-bit integer vector.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
{
return _mm_set_pi16(__w3, __w2, __w1, __w0);
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) {
return _mm_set_pi16(__w3, __w2, __w1, __w0);
}

/// Constructs a 64-bit integer vector, initialized in reverse order with
Expand Down Expand Up @@ -1540,11 +1537,10 @@ _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
/// \param __b7
/// An 8-bit integer value used to initialize bits [63:56] of the result.
/// \returns An initialized 64-bit integer vector.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
char __b6, char __b7)
{
return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
char __b6, char __b7) {
return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
}

#undef __anyext128
Expand Down
5 changes: 0 additions & 5 deletions clang/lib/Headers/stdalign.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@
#ifndef __STDALIGN_H
#define __STDALIGN_H

#if defined(__MVS__) && __has_include_next(<stdalign.h>)
#include_next <stdalign.h>
#else

#if defined(__cplusplus) || \
(defined(__STDC_VERSION__) && __STDC_VERSION__ < 202311L)
#ifndef __cplusplus
Expand All @@ -25,5 +21,4 @@
#define __alignof_is_defined 1
#endif /* __STDC_VERSION__ */

#endif /* __MVS__ */
#endif /* __STDALIGN_H */
7 changes: 4 additions & 3 deletions clang/lib/Parse/ParseOpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1498,14 +1498,15 @@ StmtResult Parser::ParseOpenACCDirectiveStmt() {
return StmtError();

StmtResult AssocStmt;
SemaOpenACC::AssociatedStmtRAII AssocStmtRAII(
getActions().OpenACC(), DirInfo.DirKind, {}, DirInfo.Clauses);
SemaOpenACC::AssociatedStmtRAII AssocStmtRAII(getActions().OpenACC(),
DirInfo.DirKind, DirInfo.DirLoc,
{}, DirInfo.Clauses);
if (doesDirectiveHaveAssociatedStmt(DirInfo.DirKind)) {
ParsingOpenACCDirectiveRAII DirScope(*this, /*Value=*/false);
ParseScope ACCScope(this, getOpenACCScopeFlags(DirInfo.DirKind));

AssocStmt = getActions().OpenACC().ActOnAssociatedStmt(
DirInfo.StartLoc, DirInfo.DirKind, ParseStatement());
DirInfo.StartLoc, DirInfo.DirKind, DirInfo.Clauses, ParseStatement());
}

return getActions().OpenACC().ActOnEndStmtDirective(
Expand Down
6 changes: 5 additions & 1 deletion clang/lib/Parse/ParseStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2360,7 +2360,11 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) {
// OpenACC Restricts a for-loop inside of certain construct/clause
// combinations, so diagnose that here in OpenACC mode.
SemaOpenACC::LoopInConstructRAII LCR{getActions().OpenACC()};
getActions().OpenACC().ActOnForStmtBegin(ForLoc);
if (ForRangeInfo.ParsedForRangeDecl())
getActions().OpenACC().ActOnRangeForStmtBegin(ForLoc, ForRangeStmt.get());
else
getActions().OpenACC().ActOnForStmtBegin(
ForLoc, FirstPart.get(), SecondPart.get().second, ThirdPart.get());

// C99 6.8.5p5 - In C99, the body of the for statement is a scope, even if
// there is no compound stmt. C90 does not have this clause. We only do this
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Sema/SemaDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15021,7 +15021,8 @@ Decl *Sema::ActOnParamDeclarator(Scope *S, Declarator &D,
: diag::warn_deprecated_register)
<< FixItHint::CreateRemoval(DS.getStorageClassSpecLoc());
} else if (!getLangOpts().CPlusPlus &&
DS.getTypeSpecType() == DeclSpec::TST_void) {
DS.getTypeSpecType() == DeclSpec::TST_void &&
D.getNumTypeObjects() == 0) {
Diag(DS.getStorageClassSpecLoc(),
diag::err_invalid_storage_class_in_func_decl)
<< FixItHint::CreateRemoval(DS.getStorageClassSpecLoc());
Expand Down
458 changes: 447 additions & 11 deletions clang/lib/Sema/SemaOpenACC.cpp

Large diffs are not rendered by default.

49 changes: 24 additions & 25 deletions clang/lib/Sema/SemaTemplateInstantiate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1749,31 +1749,21 @@ namespace {
return inherited::TransformLambdaBody(E, Body);
}

ExprResult RebuildSizeOfPackExpr(SourceLocation OperatorLoc,
NamedDecl *Pack, SourceLocation PackLoc,
SourceLocation RParenLoc,
std::optional<unsigned> Length,
ArrayRef<TemplateArgument> PartialArgs) {
if (SemaRef.CodeSynthesisContexts.back().Kind !=
Sema::CodeSynthesisContext::ConstraintNormalization)
return inherited::RebuildSizeOfPackExpr(OperatorLoc, Pack, PackLoc,
RParenLoc, Length, PartialArgs);

#ifndef NDEBUG
for (auto *Iter = TemplateArgs.begin(); Iter != TemplateArgs.end();
++Iter)
for (const TemplateArgument &TA : Iter->Args)
assert(TA.getKind() != TemplateArgument::Pack || TA.pack_size() == 1);
#endif
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(
SemaRef, /*NewSubstitutionIndex=*/0);
Decl *NewPack = TransformDecl(PackLoc, Pack);
if (!NewPack)
return ExprError();

return inherited::RebuildSizeOfPackExpr(OperatorLoc,
cast<NamedDecl>(NewPack), PackLoc,
RParenLoc, Length, PartialArgs);
ExprResult TransformSizeOfPackExpr(SizeOfPackExpr *E) {
ExprResult Transformed = inherited::TransformSizeOfPackExpr(E);
if (!Transformed.isUsable())
return Transformed;
auto *TransformedExpr = cast<SizeOfPackExpr>(Transformed.get());
if (SemaRef.CodeSynthesisContexts.back().Kind ==
Sema::CodeSynthesisContext::ConstraintNormalization &&
TransformedExpr->getPack() == E->getPack()) {
Decl *NewPack =
TransformDecl(E->getPackLoc(), TransformedExpr->getPack());
if (!NewPack)
return ExprError();
TransformedExpr->setPack(cast<NamedDecl>(NewPack));
}
return TransformedExpr;
}

ExprResult TransformRequiresExpr(RequiresExpr *E) {
Expand Down Expand Up @@ -1899,6 +1889,15 @@ Decl *TemplateInstantiator::TransformDecl(SourceLocation Loc, Decl *D) {
TemplateArgument Arg = TemplateArgs(TTP->getDepth(), TTP->getPosition());

if (TTP->isParameterPack()) {
// We might not have an index for pack expansion when normalizing
// constraint expressions. In that case, resort to instantiation scopes
// for the transformed declarations.
if (SemaRef.ArgumentPackSubstitutionIndex == -1 &&
SemaRef.CodeSynthesisContexts.back().Kind ==
Sema::CodeSynthesisContext::ConstraintNormalization) {
return SemaRef.FindInstantiatedDecl(Loc, cast<NamedDecl>(D),
TemplateArgs);
}
assert(Arg.getKind() == TemplateArgument::Pack &&
"Missing argument pack");
Arg = getPackSubstitutedTemplateArgument(getSema(), Arg);
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Sema/SemaX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,12 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_t2rpntlvwz0t1:
case X86::BI__builtin_ia32_t2rpntlvwz1:
case X86::BI__builtin_ia32_t2rpntlvwz1t1:
case X86::BI__builtin_ia32_tcvtrowps2pbf16h:
case X86::BI__builtin_ia32_tcvtrowps2pbf16l:
case X86::BI__builtin_ia32_tcvtrowps2phh:
case X86::BI__builtin_ia32_tcvtrowps2phl:
case X86::BI__builtin_ia32_tcvtrowd2ps:
case X86::BI__builtin_ia32_tilemovrow:
return CheckBuiltinTileArgumentsRange(TheCall, 0);
case X86::BI__builtin_ia32_tdpbssd:
case X86::BI__builtin_ia32_tdpbsud:
Expand Down
20 changes: 11 additions & 9 deletions clang/lib/Sema/TreeTransform.h
Original file line number Diff line number Diff line change
Expand Up @@ -8298,7 +8298,9 @@ TreeTransform<Derived>::TransformForStmt(ForStmt *S) {
// OpenACC Restricts a for-loop inside of certain construct/clause
// combinations, so diagnose that here in OpenACC mode.
SemaOpenACC::LoopInConstructRAII LCR{SemaRef.OpenACC()};
SemaRef.OpenACC().ActOnForStmtBegin(S->getBeginLoc());
SemaRef.OpenACC().ActOnForStmtBegin(
S->getBeginLoc(), S->getInit(), Init.get(), S->getCond(),
Cond.get().second, S->getInc(), Inc.get());

// Transform the body
StmtResult Body = getDerived().TransformStmt(S->getBody());
Expand Down Expand Up @@ -9048,7 +9050,7 @@ TreeTransform<Derived>::TransformCXXForRangeStmt(CXXForRangeStmt *S) {
// OpenACC Restricts a while-loop inside of certain construct/clause
// combinations, so diagnose that here in OpenACC mode.
SemaOpenACC::LoopInConstructRAII LCR{SemaRef.OpenACC()};
SemaRef.OpenACC().ActOnForStmtBegin(S->getBeginLoc());
SemaRef.OpenACC().ActOnRangeForStmtBegin(S->getBeginLoc(), S, NewStmt.get());

StmtResult Body = getDerived().TransformStmt(S->getBody());
if (Body.isInvalid())
Expand Down Expand Up @@ -12073,11 +12075,11 @@ StmtResult TreeTransform<Derived>::TransformOpenACCComputeConstruct(

// Transform Structured Block.
SemaOpenACC::AssociatedStmtRAII AssocStmtRAII(
getSema().OpenACC(), C->getDirectiveKind(), C->clauses(),
TransformedClauses);
getSema().OpenACC(), C->getDirectiveKind(), C->getDirectiveLoc(),
C->clauses(), TransformedClauses);
StmtResult StrBlock = getDerived().TransformStmt(C->getStructuredBlock());
StrBlock = getSema().OpenACC().ActOnAssociatedStmt(
C->getBeginLoc(), C->getDirectiveKind(), StrBlock);
C->getBeginLoc(), C->getDirectiveKind(), TransformedClauses, StrBlock);

return getDerived().RebuildOpenACCComputeConstruct(
C->getDirectiveKind(), C->getBeginLoc(), C->getDirectiveLoc(),
Expand All @@ -12100,11 +12102,11 @@ TreeTransform<Derived>::TransformOpenACCLoopConstruct(OpenACCLoopConstruct *C) {

// Transform Loop.
SemaOpenACC::AssociatedStmtRAII AssocStmtRAII(
getSema().OpenACC(), C->getDirectiveKind(), C->clauses(),
TransformedClauses);
getSema().OpenACC(), C->getDirectiveKind(), C->getDirectiveLoc(),
C->clauses(), TransformedClauses);
StmtResult Loop = getDerived().TransformStmt(C->getLoop());
Loop = getSema().OpenACC().ActOnAssociatedStmt(C->getBeginLoc(),
C->getDirectiveKind(), Loop);
Loop = getSema().OpenACC().ActOnAssociatedStmt(
C->getBeginLoc(), C->getDirectiveKind(), TransformedClauses, Loop);

return getDerived().RebuildOpenACCLoopConstruct(
C->getBeginLoc(), C->getDirectiveLoc(), C->getEndLoc(),
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Serialization/ASTReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5866,6 +5866,12 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
}

CurrentModule->Kind = Kind;
// Note that we may be rewriting an existing location and it is important
// to keep doing that. In particular, we would like to prefer a
// `DefinitionLoc` loaded from the module file instead of the location
// created in the current source manager, because it allows the new
// location to be marked as "unaffecting" when writing and avoid creating
// duplicate locations for the same module map file.
CurrentModule->DefinitionLoc = DefinitionLoc;
CurrentModule->Signature = F.Signature;
CurrentModule->IsFromModuleFile = true;
Expand Down
46 changes: 35 additions & 11 deletions clang/lib/Serialization/ASTWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "clang/AST/TypeLocVisitor.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/FileEntry.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/FileSystemOptions.h"
#include "clang/Basic/IdentifierTable.h"
Expand Down Expand Up @@ -81,6 +82,7 @@
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
Expand Down Expand Up @@ -166,18 +168,25 @@ static TypeCode getTypeCodeForTypeClass(Type::TypeClass id) {

namespace {

std::optional<std::set<const FileEntry *>>
struct AffectingModuleMaps {
llvm::DenseSet<FileID> DefinitionFileIDs;
llvm::DenseSet<const FileEntry *> DefinitionFiles;
};

std::optional<AffectingModuleMaps>
GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
if (!PP.getHeaderSearchInfo()
.getHeaderSearchOpts()
.ModulesPruneNonAffectingModuleMaps)
return std::nullopt;

const HeaderSearch &HS = PP.getHeaderSearchInfo();
const SourceManager &SM = PP.getSourceManager();
const ModuleMap &MM = HS.getModuleMap();

std::set<const FileEntry *> ModuleMaps;
std::set<const Module *> ProcessedModules;
llvm::DenseSet<FileID> ModuleMaps;

llvm::DenseSet<const Module *> ProcessedModules;
auto CollectModuleMapsForHierarchy = [&](const Module *M) {
M = M->getTopLevelModule();

Expand All @@ -192,13 +201,13 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {

// The containing module map is affecting, because it's being pointed
// into by Module::DefinitionLoc.
if (auto FE = MM.getContainingModuleMapFile(Mod))
ModuleMaps.insert(*FE);
if (auto F = MM.getContainingModuleMapFileID(Mod); F.isValid())
ModuleMaps.insert(F);
// For inferred modules, the module map that allowed inferring is not
// related to the virtual containing module map file. It did affect the
// compilation, though.
if (auto FE = MM.getModuleMapFileForUniquing(Mod))
ModuleMaps.insert(*FE);
if (auto UniqF = MM.getModuleMapFileIDForUniquing(Mod); UniqF.isValid())
ModuleMaps.insert(UniqF);

for (auto *SubM : Mod->submodules())
Q.push(SubM);
Expand Down Expand Up @@ -268,7 +277,16 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
// just ban module map hierarchies where module map defining a (sub)module X
// includes a module map defining a module that's not a submodule of X.

return ModuleMaps;
llvm::DenseSet<const FileEntry *> ModuleFileEntries;
for (FileID MM : ModuleMaps) {
if (auto *FE = SM.getFileEntryForID(MM))
ModuleFileEntries.insert(FE);
}

AffectingModuleMaps R;
R.DefinitionFileIDs = std::move(ModuleMaps);
R.DefinitionFiles = std::move(ModuleFileEntries);
return std::move(R);
}

class ASTTypeWriter {
Expand Down Expand Up @@ -1770,14 +1788,17 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
continue;

// Do not emit input files that do not affect current module.
if (!IsSLocAffecting[I])
if (!IsSLocFileEntryAffecting[I])
continue;

InputFileEntry Entry(*Cache->OrigEntry);
Entry.IsSystemFile = isSystem(File.getFileCharacteristic());
Entry.IsTransient = Cache->IsTransient;
Entry.BufferOverridden = Cache->BufferOverridden;
Entry.IsTopLevel = getAffectingIncludeLoc(SourceMgr, File).isInvalid();

FileID IncludeFileID = SourceMgr.getFileID(File.getIncludeLoc());
Entry.IsTopLevel = IncludeFileID.isInvalid() || IncludeFileID.ID < 0 ||
!IsSLocFileEntryAffecting[IncludeFileID.ID];
Entry.IsModuleMap = isModuleMap(File.getFileCharacteristic());

uint64_t ContentHash = 0;
Expand Down Expand Up @@ -4920,6 +4941,7 @@ void ASTWriter::computeNonAffectingInputFiles() {
unsigned N = SrcMgr.local_sloc_entry_size();

IsSLocAffecting.resize(N, true);
IsSLocFileEntryAffecting.resize(N, true);

if (!WritingModule)
return;
Expand Down Expand Up @@ -4956,10 +4978,12 @@ void ASTWriter::computeNonAffectingInputFiles() {
continue;

// Don't prune module maps that are affecting.
if (llvm::is_contained(*AffectingModuleMaps, *Cache->OrigEntry))
if (AffectingModuleMaps->DefinitionFileIDs.contains(FID))
continue;

IsSLocAffecting[I] = false;
IsSLocFileEntryAffecting[I] =
AffectingModuleMaps->DefinitionFiles.contains(*Cache->OrigEntry);

FileIDAdjustment += 1;
// Even empty files take up one element in the offset table.
Expand Down
198 changes: 99 additions & 99 deletions clang/test/AST/ast-print-openacc-loop-construct.cpp

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions clang/test/C/C2y/n3344.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
void baz(volatile void); // expected-error {{'void' as parameter must not have type qualifiers}}
void bar(const void); // expected-error {{'void' as parameter must not have type qualifiers}}
void foo(register void); // expected-error {{invalid storage class specifier in function declarator}}
void foop(void register); // expected-error {{invalid storage class specifier in function declarator}}
void quux(static void); // expected-error {{invalid storage class specifier in function declarator}}
void quobble(auto void); // expected-error {{invalid storage class specifier in function declarator}}
void quubble(extern void); // expected-error {{invalid storage class specifier in function declarator}}
Expand All @@ -28,3 +29,7 @@ void quabble(_Thread_local void); // expected-error {{'_Thread_local' is only al
#endif
void bing(void, ...); // expected-error {{'void' must be the first and only parameter if specified}}

// These declarations are fine.
void one(register void *);
void two(void register *);
void three(register void * (*)[4]);
52 changes: 52 additions & 0 deletions clang/test/CodeGen/X86/amx_avx512_api.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown \
// RUN: -target-feature +amx-avx512 -target-feature +avx10.2-512 \
// RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK

#include <immintrin.h>

char buf[1024];
#define STRIDE 32

char buf2[1024];

__m512 test_tile_cvtrowd2ps(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowd2ps
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <16 x float> @llvm.x86.tcvtrowd2ps.internal
return __tile_cvtrowd2ps(a, b);
}

__m512bh test_tile_cvtrowps2pbf16h(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2pbf16h
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h.internal
return __tile_cvtrowps2pbf16h(a, b);
}

__m512bh test_tile_cvtrowps2pbf16l(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2pbf16l
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l.internal
return __tile_cvtrowps2pbf16l(a, b);
}

__m512h test_tile_cvtrowps2phh(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2phh
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <32 x half> @llvm.x86.tcvtrowps2phh.internal
return __tile_cvtrowps2phh(a, b);
}

__m512h test_tile_cvtrowps2phl(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2phl
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <32 x half> @llvm.x86.tcvtrowps2phl.internal
return __tile_cvtrowps2phl(a, b);
}

__m512i test_tile_movrow(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_movrow
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <16 x i32> @llvm.x86.tilemovrow.internal
return __tile_movrow(a, b);
}
41 changes: 41 additions & 0 deletions clang/test/CodeGen/X86/amxavx512-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tile -target-feature +amx-avx512 \
// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s

#include <immintrin.h>
#include <stddef.h>

__m512 test_tile_cvtrowd2ps(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowd2ps(
// CHECK: call <16 x float> @llvm.x86.tcvtrowd2ps(i8 1, i32 %{{.*}})
return _tile_cvtrowd2ps(1, A);
}

__m512bh test_tile_cvtrowps2pbf16h(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2pbf16h(
// CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h(i8 1, i32 %{{.*}})
return _tile_cvtrowps2pbf16h(1, A);
}

__m512bh test_tile_cvtrowps2pbf16l(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2pbf16l(
// CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l(i8 1, i32 %{{.*}})
return _tile_cvtrowps2pbf16l(1, A);
}

__m512h test_tile_cvtrowps2phh(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2phh(
// CHECK: call <32 x half> @llvm.x86.tcvtrowps2phh(i8 1, i32 %{{.*}})
return _tile_cvtrowps2phh(1, A);
}

__m512h test_tile_cvtrowps2phl(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2phl(
// CHECK: call <32 x half> @llvm.x86.tcvtrowps2phl(i8 1, i32 %{{.*}})
return _tile_cvtrowps2phl(1, A);
}

__m512i test_tile_movrow(unsigned int A) {
// CHECK-LABEL: @test_tile_movrow
// CHECK: %1 = call <16 x i32> @llvm.x86.tilemovrow(i8 1, i32 %{{.*}})
return _tile_movrow(1, A);
}
19 changes: 19 additions & 0 deletions clang/test/CodeGen/X86/builtin_test_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,25 @@ constexpr bool match_m64(__m64 _v, unsigned long long a) {
return v[0] == a;
}

constexpr bool match_v1di(__m64 v, long long a) {
return v[0] == a;
}

constexpr bool match_v2si(__m64 _v, int a, int b) {
__v2si v = (__v2si)_v;
return v[0] == a && v[1] == b;
}

constexpr bool match_v4hi(__m64 _v, short a, short b, short c, short d) {
__v4hi v = (__v4hi)_v;
return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
}

constexpr bool match_v8qi(__m64 _v, char a, char b, char c, char d, char e, char f, char g, char h) {
__v8qi v = (__v8qi)_v;
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
}

constexpr bool match_m128(__m128 v, float a, float b, float c, float d) {
return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
}
Expand Down
31 changes: 26 additions & 5 deletions clang/test/CodeGen/X86/mmx-builtins.c
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx


#include <immintrin.h>
#include "builtin_test_helpers.h"

__m64 test_mm_abs_pi8(__m64 a) {
// CHECK-LABEL: test_mm_abs_pi8
Expand Down Expand Up @@ -327,7 +332,7 @@ __m64 test_mm_min_pu8(__m64 a, __m64 b) {

int test_mm_movemask_pi8(__m64 a) {
// CHECK-LABEL: test_mm_movemask_pi8
// CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(
// CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128(
return _mm_movemask_pi8(a);
}

Expand Down Expand Up @@ -405,6 +410,7 @@ __m64 test_mm_set_pi8(char a, char b, char c, char d, char e, char f, char g, ch
// CHECK: insertelement <8 x i8>
return _mm_set_pi8(a, b, c, d, e, f, g, h);
}
TEST_CONSTEXPR(match_v8qi(_mm_set_pi8(0, -1, 2, -3, 4, -5, 6, -7), -7, 6, -5, 4, -3, 2, -1, 0));

__m64 test_mm_set_pi16(short a, short b, short c, short d) {
// CHECK-LABEL: test_mm_set_pi16
Expand All @@ -414,13 +420,15 @@ __m64 test_mm_set_pi16(short a, short b, short c, short d) {
// CHECK: insertelement <4 x i16>
return _mm_set_pi16(a, b, c, d);
}
TEST_CONSTEXPR(match_v4hi(_mm_set_pi16(101, 102, -103, -104), -104, -103, 102, 101));

__m64 test_mm_set_pi32(int a, int b) {
// CHECK-LABEL: test_mm_set_pi32
// CHECK: insertelement <2 x i32>
// CHECK: insertelement <2 x i32>
return _mm_set_pi32(a, b);
}
TEST_CONSTEXPR(match_v2si(_mm_set_pi32(5000, -1500), -1500, 5000));

__m64 test_mm_setr_pi8(char a, char b, char c, char d, char e, char f, char g, char h) {
// CHECK-LABEL: test_mm_setr_pi8
Expand All @@ -434,6 +442,7 @@ __m64 test_mm_setr_pi8(char a, char b, char c, char d, char e, char f, char g, c
// CHECK: insertelement <8 x i8>
return _mm_setr_pi8(a, b, c, d, e, f, g, h);
}
TEST_CONSTEXPR(match_v8qi(_mm_setr_pi8(0, -1, 2, -3, 4, -5, 6, -7), 0, -1, 2, -3, 4, -5, 6, -7));

__m64 test_mm_setr_pi16(short a, short b, short c, short d) {
// CHECK-LABEL: test_mm_setr_pi16
Expand All @@ -443,13 +452,22 @@ __m64 test_mm_setr_pi16(short a, short b, short c, short d) {
// CHECK: insertelement <4 x i16>
return _mm_setr_pi16(a, b, c, d);
}
TEST_CONSTEXPR(match_v4hi(_mm_setr_pi16(101, 102, -103, -104), 101, 102, -103, -104));

__m64 test_mm_setr_pi32(int a, int b) {
// CHECK-LABEL: test_mm_setr_pi32
// CHECK: insertelement <2 x i32>
// CHECK: insertelement <2 x i32>
return _mm_setr_pi32(a, b);
}
TEST_CONSTEXPR(match_v2si(_mm_setr_pi32(5000, -1500), 5000, -1500));

__m64 test_mm_setzero_si64() {
// CHECK-LABEL: test_mm_setzero_si64
// CHECK: zeroinitializer
return _mm_setzero_si64();
}
TEST_CONSTEXPR(match_m64(_mm_setzero_si64(), 0ULL));

__m64 test_mm_set1_pi8(char a) {
// CHECK-LABEL: test_mm_set1_pi8
Expand All @@ -463,6 +481,7 @@ __m64 test_mm_set1_pi8(char a) {
// CHECK: insertelement <8 x i8>
return _mm_set1_pi8(a);
}
TEST_CONSTEXPR(match_v8qi(_mm_set1_pi8(99), 99, 99, 99, 99, 99, 99, 99, 99));

__m64 test_mm_set1_pi16(short a) {
// CHECK-LABEL: test_mm_set1_pi16
Expand All @@ -472,13 +491,15 @@ __m64 test_mm_set1_pi16(short a) {
// CHECK: insertelement <4 x i16>
return _mm_set1_pi16(a);
}
TEST_CONSTEXPR(match_v4hi(_mm_set1_pi16(-128), -128, -128, -128, -128));

__m64 test_mm_set1_pi32(int a) {
// CHECK-LABEL: test_mm_set1_pi32
// CHECK: insertelement <2 x i32>
// CHECK: insertelement <2 x i32>
return _mm_set1_pi32(a);
}
TEST_CONSTEXPR(match_v2si(_mm_set1_pi32(55), 55, 55));

__m64 test_mm_shuffle_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_shuffle_pi8
Expand Down
11 changes: 4 additions & 7 deletions clang/test/CodeGen/X86/sse2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -867,22 +867,19 @@ __m128d test_mm_min_sd(__m128d A, __m128d B) {
return _mm_min_sd(A, B);
}

__m64 test_mm_movepi64_pi64(__m128i A)
{
__m64 test_mm_movepi64_pi64(__m128i A) {
// CHECK-LABEL: test_mm_movepi64_pi64
// CHECK: [[EXT:%.*]] = extractelement <2 x i64> %1, i32 0
return _mm_movepi64_pi64(A);
}
TEST_CONSTEXPR(match_m64(_mm_movepi64_pi64((__m128i){8, -8}), 8ULL));

__m128i test_mm_movpi64_epi64(__m64 A)
{
__m128i test_mm_movpi64_epi64(__m64 A) {
// CHECK-LABEL: test_mm_movpi64_epi64
// CHECK: [[CAST:%.*]] = bitcast <1 x i64> %{{.*}} to i64
// CHECK: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[CAST]], i32 0
// CHECK: insertelement <2 x i64> [[INS]], i64 0, i32 1
// CHECK: shufflevector <1 x i64> %{{.*}}, <1 x i64> %{{.*}}, <2 x i32> <i32 0, i32 1>
return _mm_movpi64_epi64(A);
}
TEST_CONSTEXPR(match_m128i(_mm_movpi64_epi64((__m64){5LL}), 5ULL, 0ULL));

__m128i test_mm_move_epi64(__m128i A) {
// CHECK-LABEL: test_mm_move_epi64
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGen/aarch64-cpu-supports-target.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ int check_all_feature() {
return 2;
else if (__builtin_cpu_supports("aes+pmull+fp16+dit+dpb+dpb2+jscvt"))
return 3;
else if (__builtin_cpu_supports("fcma+rcpc+rcpc2+rcpc3+frintts+dgh"))
else if (__builtin_cpu_supports("fcma+rcpc+rcpc2+rcpc3+frintts"))
return 4;
else if (__builtin_cpu_supports("i8mm+bf16+sve"))
return 5;
Expand Down
7 changes: 2 additions & 5 deletions clang/test/CodeGen/aarch64-fmv-dependencies.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@ __attribute__((target_version("bti"))) int fmv(void) { return 0; }
// CHECK: define dso_local i32 @fmv._Mcrc() #[[crc:[0-9]+]] {
__attribute__((target_version("crc"))) int fmv(void) { return 0; }

// CHECK: define dso_local i32 @fmv._Mdgh() #[[ATTR0:[0-9]+]] {
__attribute__((target_version("dgh"))) int fmv(void) { return 0; }

// CHECK: define dso_local i32 @fmv._Mdit() #[[dit:[0-9]+]] {
__attribute__((target_version("dit"))) int fmv(void) { return 0; }

Expand Down Expand Up @@ -157,7 +154,6 @@ int caller() {
// CHECK: attributes #[[bf16]] = { {{.*}} "target-features"="+bf16,+fp-armv8,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[bti]] = { {{.*}} "target-features"="+bti,+fp-armv8,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[crc]] = { {{.*}} "target-features"="+crc,+fp-armv8,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[ATTR0]] = { {{.*}} "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[dit]] = { {{.*}} "target-features"="+dit,+fp-armv8,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[dotprod]] = { {{.*}} "target-features"="+dotprod,+fp-armv8,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[dpb]] = { {{.*}} "target-features"="+ccpp,+fp-armv8,+neon,+outline-atomics,+v8a"
Expand All @@ -167,6 +163,7 @@ int caller() {
// CHECK: attributes #[[fcma]] = { {{.*}} "target-features"="+complxnum,+fp-armv8,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[flagm]] = { {{.*}} "target-features"="+flagm,+fp-armv8,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[flagm2]] = { {{.*}} "target-features"="+altnzcv,+flagm,+fp-armv8,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[ATTR0]] = { {{.*}} "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[fp16]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[fp16fml]] = { {{.*}} "target-features"="+fp-armv8,+fp16fml,+fullfp16,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[frintts]] = { {{.*}} "target-features"="+fp-armv8,+fptoint,+neon,+outline-atomics,+v8a"
Expand All @@ -192,7 +189,7 @@ int caller() {
// CHECK: attributes #[[ssbs]] = { {{.*}} "target-features"="+fp-armv8,+neon,+outline-atomics,+ssbs,+v8a"
// CHECK: attributes #[[sve]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+v8a"
// CHECK: attributes #[[sve2]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+v8a"
// CHECK: attributes #[[sve2_aes]] = { {{.*}} "target-features"="+aes,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2-aes,+v8a"
// CHECK: attributes #[[sve2_aes]] = { {{.*}} "target-features"="+aes,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve-aes,+sve2,+v8a"
// CHECK: attributes #[[sve2_bitperm]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2-bitperm,+v8a"
// CHECK: attributes #[[sve2_sha3]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2-sha3,+v8a"
// CHECK: attributes #[[sve2_sm4]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2-sm4,+v8a"
Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_aesd.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK

// REQUIRES: aarch64-registered-target

Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_aese.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK

// REQUIRES: aarch64-registered-target

Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_aesimc.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK

// REQUIRES: aarch64-registered-target

Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_aesmc.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK

// REQUIRES: aarch64-registered-target

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: aarch64-registered-target

// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK

#include <arm_sve.h>

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: aarch64-registered-target

// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK

#include <arm_sve.h>

Expand Down
28 changes: 6 additions & 22 deletions clang/test/CodeGen/attr-target-version.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ int __attribute__((target_version("dpb"))) fmv_one(void) { return 2; }
int __attribute__((target_version("default"))) fmv_one(void) { return 0; }
int __attribute__((target_version("fp"))) fmv_two(void) { return 1; }
int __attribute__((target_version("simd"))) fmv_two(void) { return 2; }
int __attribute__((target_version("dgh"))) fmv_two(void) { return 3; }
int __attribute__((target_version("fp16+simd"))) fmv_two(void) { return 4; }
int __attribute__((target_version("default"))) fmv_two(void) { return 0; }
int foo() {
Expand Down Expand Up @@ -255,13 +254,6 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_two._Mdgh
// CHECK-SAME: () #[[ATTR9]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 3
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_two._Mfp16Msimd
// CHECK-SAME: () #[[ATTR13:[0-9]+]] {
// CHECK-NEXT: entry:
Expand Down Expand Up @@ -576,29 +568,21 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
// CHECK-NEXT: ret ptr @fmv_two._Mfp16Msimd
// CHECK: resolver_else:
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 33554432
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 33554432
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 512
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 512
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
// CHECK: resolver_return1:
// CHECK-NEXT: ret ptr @fmv_two._Mdgh
// CHECK-NEXT: ret ptr @fmv_two._Msimd
// CHECK: resolver_else2:
// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 512
// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 512
// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 256
// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 256
// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]]
// CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
// CHECK: resolver_return3:
// CHECK-NEXT: ret ptr @fmv_two._Msimd
// CHECK: resolver_else4:
// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 256
// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 256
// CHECK-NEXT: [[TMP15:%.*]] = and i1 true, [[TMP14]]
// CHECK-NEXT: br i1 [[TMP15]], label [[RESOLVER_RETURN5:%.*]], label [[RESOLVER_ELSE6:%.*]]
// CHECK: resolver_return5:
// CHECK-NEXT: ret ptr @fmv_two._Mfp
// CHECK: resolver_else6:
// CHECK: resolver_else4:
// CHECK-NEXT: ret ptr @fmv_two.default
//
//
Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGen/attr-target-x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,10 @@ void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {}
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686"
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
// CHECK-NOT: tune-cpu
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-vaes"
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-amx-avx512,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-vaes"
// CHECK-NOT: tune-cpu
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-mmx"
// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
Expand All @@ -76,5 +76,5 @@ void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {}
// CHECK: "target-cpu"="x86-64-v4"
// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"

// CHECK: #12 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-avx10.1-512,-avx10.2-512,-evex512"
// CHECK: #12 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-amx-avx512,-avx10.1-512,-avx10.2-512,-evex512"
// CHECK: #13 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave"
18 changes: 18 additions & 0 deletions clang/test/CodeGenHLSL/builtins/WaveActiveCountBits.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \
// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \
// RUN: FileCheck %s -DTARGET=dx
// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \
// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
// RUN: FileCheck %s -DTARGET=spv

// Test basic lowering to runtime function call.

// CHECK-LABEL: test_bool
int test_bool(bool expr) {
// CHECK: call {{.*}} @llvm.[[TARGET]].wave.active.countbits
return WaveActiveCountBits(expr);
}

// CHECK: declare i32 @llvm.[[TARGET]].wave.active.countbits(i1) #[[#attr:]]

// CHECK: attributes #[[#attr]] = {{{.*}} convergent {{.*}}}
4 changes: 2 additions & 2 deletions clang/test/CodeGenHLSL/builtins/clamp-builtin.hlsl
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s

// CHECK-LABEL: builtin_test_clamp_int4
// CHECK: %dx.clamp = call <4 x i32> @llvm.dx.clamp.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
// CHECK: ret <4 x i32> %dx.clamp
// CHECK: %hlsl.clamp = call <4 x i32> @llvm.dx.sclamp.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
// CHECK: ret <4 x i32> %hlsl.clamp
int4 builtin_test_clamp_int4(int4 p0, int4 p1, int4 p2) {
return __builtin_hlsl_elementwise_clamp(p0, p1, p2);
}
174 changes: 92 additions & 82 deletions clang/test/CodeGenHLSL/builtins/clamp.hlsl
Original file line number Diff line number Diff line change
@@ -1,133 +1,143 @@
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
// RUN: -fnative-half-type -emit-llvm -disable-llvm-passes -o - | \
// RUN: FileCheck %s --check-prefixes=CHECK,NATIVE_HALF
// RUN: FileCheck %s --check-prefixes=CHECK,NATIVE_HALF \
// RUN: -DTARGET=dx -DFNATTRS=noundef
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
// RUN: -emit-llvm -disable-llvm-passes -o - | \
// RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF
// RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF \
// RUN: -DTARGET=dx -DFNATTRS=noundef
// RUN: %clang_cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute %s \
// RUN: -fnative-half-type -emit-llvm -disable-llvm-passes -o - | \
// RUN: FileCheck %s --check-prefixes=CHECK,NATIVE_HALF \
// RUN: -DTARGET=spv -DFNATTRS="spir_func noundef"
// RUN: %clang_cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute %s \
// RUN: -emit-llvm -disable-llvm-passes -o - | \
// RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF \
// RUN: -DTARGET=spv -DFNATTRS="spir_func noundef"

#ifdef __HLSL_ENABLE_16_BIT
// NATIVE_HALF-LABEL: define noundef i16 @_Z16test_clamp_short
// NATIVE_HALF: call i16 @llvm.dx.clamp.i16(
// NATIVE_HALF: define [[FNATTRS]] i16 @_Z16test_clamp_short
// NATIVE_HALF: call i16 @llvm.[[TARGET]].sclamp.i16(
int16_t test_clamp_short(int16_t p0, int16_t p1) { return clamp(p0, p1,p1); }
// NATIVE_HALF-LABEL: define noundef <2 x i16> @_Z17test_clamp_short2
// NATIVE_HALF: call <2 x i16> @llvm.dx.clamp.v2i16(
// NATIVE_HALF: define [[FNATTRS]] <2 x i16> @_Z17test_clamp_short2
// NATIVE_HALF: call <2 x i16> @llvm.[[TARGET]].sclamp.v2i16(
int16_t2 test_clamp_short2(int16_t2 p0, int16_t2 p1) { return clamp(p0, p1,p1); }
// NATIVE_HALF-LABEL: define noundef <3 x i16> @_Z17test_clamp_short3
// NATIVE_HALF: call <3 x i16> @llvm.dx.clamp.v3i16
// NATIVE_HALF: define [[FNATTRS]] <3 x i16> @_Z17test_clamp_short3
// NATIVE_HALF: call <3 x i16> @llvm.[[TARGET]].sclamp.v3i16
int16_t3 test_clamp_short3(int16_t3 p0, int16_t3 p1) { return clamp(p0, p1,p1); }
// NATIVE_HALF-LABEL: define noundef <4 x i16> @_Z17test_clamp_short4
// NATIVE_HALF: call <4 x i16> @llvm.dx.clamp.v4i16
// NATIVE_HALF: define [[FNATTRS]] <4 x i16> @_Z17test_clamp_short4
// NATIVE_HALF: call <4 x i16> @llvm.[[TARGET]].sclamp.v4i16
int16_t4 test_clamp_short4(int16_t4 p0, int16_t4 p1) { return clamp(p0, p1,p1); }

// NATIVE_HALF-LABEL: define noundef i16 @_Z17test_clamp_ushort
// NATIVE_HALF: call i16 @llvm.dx.uclamp.i16(
// NATIVE_HALF: define [[FNATTRS]] i16 @_Z17test_clamp_ushort
// NATIVE_HALF: call i16 @llvm.[[TARGET]].uclamp.i16(
uint16_t test_clamp_ushort(uint16_t p0, uint16_t p1) { return clamp(p0, p1,p1); }
// NATIVE_HALF-LABEL: define noundef <2 x i16> @_Z18test_clamp_ushort2
// NATIVE_HALF: call <2 x i16> @llvm.dx.uclamp.v2i16
// NATIVE_HALF: define [[FNATTRS]] <2 x i16> @_Z18test_clamp_ushort2
// NATIVE_HALF: call <2 x i16> @llvm.[[TARGET]].uclamp.v2i16
uint16_t2 test_clamp_ushort2(uint16_t2 p0, uint16_t2 p1) { return clamp(p0, p1,p1); }
// NATIVE_HALF-LABEL: define noundef <3 x i16> @_Z18test_clamp_ushort3
// NATIVE_HALF: call <3 x i16> @llvm.dx.uclamp.v3i16
// NATIVE_HALF: define [[FNATTRS]] <3 x i16> @_Z18test_clamp_ushort3
// NATIVE_HALF: call <3 x i16> @llvm.[[TARGET]].uclamp.v3i16
uint16_t3 test_clamp_ushort3(uint16_t3 p0, uint16_t3 p1) { return clamp(p0, p1,p1); }
// NATIVE_HALF-LABEL: define noundef <4 x i16> @_Z18test_clamp_ushort4
// NATIVE_HALF: call <4 x i16> @llvm.dx.uclamp.v4i16
// NATIVE_HALF: define [[FNATTRS]] <4 x i16> @_Z18test_clamp_ushort4
// NATIVE_HALF: call <4 x i16> @llvm.[[TARGET]].uclamp.v4i16
uint16_t4 test_clamp_ushort4(uint16_t4 p0, uint16_t4 p1) { return clamp(p0, p1,p1); }
#endif

// CHECK-LABEL: define noundef i32 @_Z14test_clamp_int
// CHECK: call i32 @llvm.dx.clamp.i32(
// CHECK: define [[FNATTRS]] i32 @_Z14test_clamp_int
// CHECK: call i32 @llvm.[[TARGET]].sclamp.i32(
int test_clamp_int(int p0, int p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <2 x i32> @_Z15test_clamp_int2
// CHECK: call <2 x i32> @llvm.dx.clamp.v2i32
// CHECK: define [[FNATTRS]] <2 x i32> @_Z15test_clamp_int2
// CHECK: call <2 x i32> @llvm.[[TARGET]].sclamp.v2i32
int2 test_clamp_int2(int2 p0, int2 p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <3 x i32> @_Z15test_clamp_int3
// CHECK: call <3 x i32> @llvm.dx.clamp.v3i32
// CHECK: define [[FNATTRS]] <3 x i32> @_Z15test_clamp_int3
// CHECK: call <3 x i32> @llvm.[[TARGET]].sclamp.v3i32
int3 test_clamp_int3(int3 p0, int3 p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <4 x i32> @_Z15test_clamp_int4
// CHECK: call <4 x i32> @llvm.dx.clamp.v4i32
// CHECK: define [[FNATTRS]] <4 x i32> @_Z15test_clamp_int4
// CHECK: call <4 x i32> @llvm.[[TARGET]].sclamp.v4i32
int4 test_clamp_int4(int4 p0, int4 p1) { return clamp(p0, p1,p1); }

// CHECK-LABEL: define noundef i32 @_Z15test_clamp_uint
// CHECK: call i32 @llvm.dx.uclamp.i32(
// CHECK: define [[FNATTRS]] i32 @_Z15test_clamp_uint
// CHECK: call i32 @llvm.[[TARGET]].uclamp.i32(
int test_clamp_uint(uint p0, uint p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <2 x i32> @_Z16test_clamp_uint2
// CHECK: call <2 x i32> @llvm.dx.uclamp.v2i32
// CHECK: define [[FNATTRS]] <2 x i32> @_Z16test_clamp_uint2
// CHECK: call <2 x i32> @llvm.[[TARGET]].uclamp.v2i32
uint2 test_clamp_uint2(uint2 p0, uint2 p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <3 x i32> @_Z16test_clamp_uint3
// CHECK: call <3 x i32> @llvm.dx.uclamp.v3i32
// CHECK: define [[FNATTRS]] <3 x i32> @_Z16test_clamp_uint3
// CHECK: call <3 x i32> @llvm.[[TARGET]].uclamp.v3i32
uint3 test_clamp_uint3(uint3 p0, uint3 p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <4 x i32> @_Z16test_clamp_uint4
// CHECK: call <4 x i32> @llvm.dx.uclamp.v4i32
// CHECK: define [[FNATTRS]] <4 x i32> @_Z16test_clamp_uint4
// CHECK: call <4 x i32> @llvm.[[TARGET]].uclamp.v4i32
uint4 test_clamp_uint4(uint4 p0, uint4 p1) { return clamp(p0, p1,p1); }

// CHECK-LABEL: define noundef i64 @_Z15test_clamp_long
// CHECK: call i64 @llvm.dx.clamp.i64(
// CHECK: define [[FNATTRS]] i64 @_Z15test_clamp_long
// CHECK: call i64 @llvm.[[TARGET]].sclamp.i64(
int64_t test_clamp_long(int64_t p0, int64_t p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <2 x i64> @_Z16test_clamp_long2
// CHECK: call <2 x i64> @llvm.dx.clamp.v2i64
// CHECK: define [[FNATTRS]] <2 x i64> @_Z16test_clamp_long2
// CHECK: call <2 x i64> @llvm.[[TARGET]].sclamp.v2i64
int64_t2 test_clamp_long2(int64_t2 p0, int64_t2 p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <3 x i64> @_Z16test_clamp_long3
// CHECK: call <3 x i64> @llvm.dx.clamp.v3i64
// CHECK: define [[FNATTRS]] <3 x i64> @_Z16test_clamp_long3
// CHECK: call <3 x i64> @llvm.[[TARGET]].sclamp.v3i64
int64_t3 test_clamp_long3(int64_t3 p0, int64_t3 p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <4 x i64> @_Z16test_clamp_long4
// CHECK: call <4 x i64> @llvm.dx.clamp.v4i64
// CHECK: define [[FNATTRS]] <4 x i64> @_Z16test_clamp_long4
// CHECK: call <4 x i64> @llvm.[[TARGET]].sclamp.v4i64
int64_t4 test_clamp_long4(int64_t4 p0, int64_t4 p1) { return clamp(p0, p1,p1); }

// CHECK-LABEL: define noundef i64 @_Z16test_clamp_ulong
// CHECK: call i64 @llvm.dx.uclamp.i64(
// CHECK: define [[FNATTRS]] i64 @_Z16test_clamp_ulong
// CHECK: call i64 @llvm.[[TARGET]].uclamp.i64(
uint64_t test_clamp_ulong(uint64_t p0, uint64_t p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <2 x i64> @_Z17test_clamp_ulong2
// CHECK: call <2 x i64> @llvm.dx.uclamp.v2i64
// CHECK: define [[FNATTRS]] <2 x i64> @_Z17test_clamp_ulong2
// CHECK: call <2 x i64> @llvm.[[TARGET]].uclamp.v2i64
uint64_t2 test_clamp_ulong2(uint64_t2 p0, uint64_t2 p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <3 x i64> @_Z17test_clamp_ulong3
// CHECK: call <3 x i64> @llvm.dx.uclamp.v3i64
// CHECK: define [[FNATTRS]] <3 x i64> @_Z17test_clamp_ulong3
// CHECK: call <3 x i64> @llvm.[[TARGET]].uclamp.v3i64
uint64_t3 test_clamp_ulong3(uint64_t3 p0, uint64_t3 p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <4 x i64> @_Z17test_clamp_ulong4
// CHECK: call <4 x i64> @llvm.dx.uclamp.v4i64
// CHECK: define [[FNATTRS]] <4 x i64> @_Z17test_clamp_ulong4
// CHECK: call <4 x i64> @llvm.[[TARGET]].uclamp.v4i64
uint64_t4 test_clamp_ulong4(uint64_t4 p0, uint64_t4 p1) { return clamp(p0, p1,p1); }

// NATIVE_HALF-LABEL: define noundef half @_Z15test_clamp_half
// NATIVE_HALF: call half @llvm.dx.clamp.f16(
// NO_HALF-LABEL: define noundef float @_Z15test_clamp_half
// NO_HALF: call float @llvm.dx.clamp.f32(
// NATIVE_HALF: define [[FNATTRS]] half @_Z15test_clamp_half
// NATIVE_HALF: call half @llvm.[[TARGET]].nclamp.f16(
// NO_HALF: define [[FNATTRS]] float @_Z15test_clamp_half
// NO_HALF: call float @llvm.[[TARGET]].nclamp.f32(
half test_clamp_half(half p0, half p1) { return clamp(p0, p1,p1); }
// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z16test_clamp_half2
// NATIVE_HALF: call <2 x half> @llvm.dx.clamp.v2f16
// NO_HALF-LABEL: define noundef <2 x float> @_Z16test_clamp_half2
// NO_HALF: call <2 x float> @llvm.dx.clamp.v2f32(
// NATIVE_HALF: define [[FNATTRS]] <2 x half> @_Z16test_clamp_half2
// NATIVE_HALF: call <2 x half> @llvm.[[TARGET]].nclamp.v2f16
// NO_HALF: define [[FNATTRS]] <2 x float> @_Z16test_clamp_half2
// NO_HALF: call <2 x float> @llvm.[[TARGET]].nclamp.v2f32(
half2 test_clamp_half2(half2 p0, half2 p1) { return clamp(p0, p1,p1); }
// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z16test_clamp_half3
// NATIVE_HALF: call <3 x half> @llvm.dx.clamp.v3f16
// NO_HALF-LABEL: define noundef <3 x float> @_Z16test_clamp_half3
// NO_HALF: call <3 x float> @llvm.dx.clamp.v3f32(
// NATIVE_HALF: define [[FNATTRS]] <3 x half> @_Z16test_clamp_half3
// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].nclamp.v3f16
// NO_HALF: define [[FNATTRS]] <3 x float> @_Z16test_clamp_half3
// NO_HALF: call <3 x float> @llvm.[[TARGET]].nclamp.v3f32(
half3 test_clamp_half3(half3 p0, half3 p1) { return clamp(p0, p1,p1); }
// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z16test_clamp_half4
// NATIVE_HALF: call <4 x half> @llvm.dx.clamp.v4f16
// NO_HALF-LABEL: define noundef <4 x float> @_Z16test_clamp_half4
// NO_HALF: call <4 x float> @llvm.dx.clamp.v4f32(
// NATIVE_HALF: define [[FNATTRS]] <4 x half> @_Z16test_clamp_half4
// NATIVE_HALF: call <4 x half> @llvm.[[TARGET]].nclamp.v4f16
// NO_HALF: define [[FNATTRS]] <4 x float> @_Z16test_clamp_half4
// NO_HALF: call <4 x float> @llvm.[[TARGET]].nclamp.v4f32(
half4 test_clamp_half4(half4 p0, half4 p1) { return clamp(p0, p1,p1); }

// CHECK-LABEL: define noundef float @_Z16test_clamp_float
// CHECK: call float @llvm.dx.clamp.f32(
// CHECK: define [[FNATTRS]] float @_Z16test_clamp_float
// CHECK: call float @llvm.[[TARGET]].nclamp.f32(
float test_clamp_float(float p0, float p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <2 x float> @_Z17test_clamp_float2
// CHECK: call <2 x float> @llvm.dx.clamp.v2f32
// CHECK: define [[FNATTRS]] <2 x float> @_Z17test_clamp_float2
// CHECK: call <2 x float> @llvm.[[TARGET]].nclamp.v2f32
float2 test_clamp_float2(float2 p0, float2 p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <3 x float> @_Z17test_clamp_float3
// CHECK: call <3 x float> @llvm.dx.clamp.v3f32
// CHECK: define [[FNATTRS]] <3 x float> @_Z17test_clamp_float3
// CHECK: call <3 x float> @llvm.[[TARGET]].nclamp.v3f32
float3 test_clamp_float3(float3 p0, float3 p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <4 x float> @_Z17test_clamp_float4
// CHECK: call <4 x float> @llvm.dx.clamp.v4f32
// CHECK: define [[FNATTRS]] <4 x float> @_Z17test_clamp_float4
// CHECK: call <4 x float> @llvm.[[TARGET]].nclamp.v4f32
float4 test_clamp_float4(float4 p0, float4 p1) { return clamp(p0, p1,p1); }

// CHECK-LABEL: define noundef double @_Z17test_clamp_double
// CHECK: call double @llvm.dx.clamp.f64(
// CHECK: define [[FNATTRS]] double @_Z17test_clamp_double
// CHECK: call double @llvm.[[TARGET]].nclamp.f64(
double test_clamp_double(double p0, double p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <2 x double> @_Z18test_clamp_double2
// CHECK: call <2 x double> @llvm.dx.clamp.v2f64
// CHECK: define [[FNATTRS]] <2 x double> @_Z18test_clamp_double2
// CHECK: call <2 x double> @llvm.[[TARGET]].nclamp.v2f64
double2 test_clamp_double2(double2 p0, double2 p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <3 x double> @_Z18test_clamp_double3
// CHECK: call <3 x double> @llvm.dx.clamp.v3f64
// CHECK: define [[FNATTRS]] <3 x double> @_Z18test_clamp_double3
// CHECK: call <3 x double> @llvm.[[TARGET]].nclamp.v3f64
double3 test_clamp_double3(double3 p0, double3 p1) { return clamp(p0, p1,p1); }
// CHECK-LABEL: define noundef <4 x double> @_Z18test_clamp_double4
// CHECK: call <4 x double> @llvm.dx.clamp.v4f64
// CHECK: define [[FNATTRS]] <4 x double> @_Z18test_clamp_double4
// CHECK: call <4 x double> @llvm.[[TARGET]].nclamp.v4f64
double4 test_clamp_double4(double4 p0, double4 p1) { return clamp(p0, p1,p1); }
16 changes: 11 additions & 5 deletions clang/test/Driver/XRay/xray-shared.cpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
// Check supported targets
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s
// RUN: %clang -### --target=aarch64-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s

// Check unsupported targets
// RUN: not %clang -### --target=arm-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET
// RUN: not %clang -### --target=mips-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET
// RUN: not %clang -### --target=loongarch64-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET
// RUN: not %clang -### --target=hexagon-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET
// RUN: not %clang -### --target=powerpc64le-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET

// Check PIC requirement
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fpic -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s
// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fno-PIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-PIC
// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fno-pic -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-PIC

// On 64 bit darwin, PIC is always enabled
// RUN: %clang -### --target=x86_64-apple-darwin -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s

// Check unsupported targets
// RUN: not %clang -### --target=aarch64-pc-freebsd -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET
// RUN: not %clang -### --target=arm64-apple-macos -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET

// CHECK: "-cc1" {{.*}}"-fxray-instrument" {{.*}}"-fxray-shared"
// ERR-TARGET: error: unsupported option '-fxray-shared' for target
// ERR-PIC: error: option '-fxray-shared' cannot be specified without '-fPIC'
Expand Down
9 changes: 6 additions & 3 deletions clang/test/Driver/aarch64-implied-sve-features.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
// SVE2-BITPERM-REVERT: "-target-feature" "+sve" "-target-feature" "+sve2" "-target-feature" "-sve2-bitperm"

// RUN: %clang --target=aarch64-linux-gnu -march=armv8-a+sve2-aes+nosve2-aes %s -### 2>&1 | FileCheck %s --check-prefix=SVE2-AES-REVERT
// SVE2-AES-REVERT: "-target-feature" "+sve" "-target-feature" "+sve2" "-target-feature" "-sve2-aes"
// SVE2-AES-REVERT: "-target-feature" "+sve" "-target-feature" "+sve-aes" "-target-feature" "+sve2" "-target-feature" "-sve2-aes"

// RUN: %clang --target=aarch64-linux-gnu -march=armv8-a+sve2-sha3+nosve2-sha3 %s -### 2>&1 | FileCheck %s --check-prefix=SVE2-SHA3-REVERT
// SVE2-SHA3-REVERT: "-target-feature" "+sve" "-target-feature" "+sve2" "-target-feature" "-sve2-sha3"
Expand All @@ -47,8 +47,11 @@
// RUN: %clang --target=aarch64-linux-gnu -march=armv8-a+sve2-sha3 %s -### 2>&1 | FileCheck %s --check-prefix=SVE2-SHA3
// SVE2-SHA3: "-target-feature" "+sve" "-target-feature" "+sve2" "-target-feature" "+sve2-sha3"

// RUN: %clang --target=aarch64-linux-gnu -march=armv8-a+sve-aes %s -### 2>&1 | FileCheck %s --check-prefix=SVE-AES
// SVE-AES: "-target-feature" "+aes"{{.*}} "-target-feature" "+sve-aes"

// RUN: %clang --target=aarch64-linux-gnu -march=armv8-a+sve2-aes %s -### 2>&1 | FileCheck %s --check-prefix=SVE2-AES
// SVE2-AES: "-target-feature" "+sve" "-target-feature" "+sve2" "-target-feature" "+sve2-aes"
// SVE2-AES: "-target-feature" "+sve" "-target-feature" "+sve-aes" "-target-feature" "+sve2" "-target-feature" "+sve2-aes"

// RUN: %clang --target=aarch64-linux-gnu -march=armv8-a+sve2-sm4 %s -### 2>&1 | FileCheck %s --check-prefix=SVE2-SM4
// SVE2-SM4: "-target-feature" "+sve" "-target-feature" "+sve2" "-target-feature" "+sve2-sm4"
Expand All @@ -66,7 +69,7 @@
// SVE-SUBFEATURE-CONFLICT-NOT: "-target-feature" "+sve"

// RUN: %clang --target=aarch64-linux-gnu -march=armv8-a+nosve+sve2-aes %s -### 2>&1 | FileCheck %s --check-prefix=SVE-SUBFEATURE-CONFLICT-REV
// SVE-SUBFEATURE-CONFLICT-REV: "-target-feature" "+sve" "-target-feature" "+sve2" "-target-feature" "+sve2-aes"
// SVE-SUBFEATURE-CONFLICT-REV: "-target-feature" "+sve" "-target-feature" "+sve-aes" "-target-feature" "+sve2" "-target-feature" "+sve2-aes"

// RUN: %clang --target=aarch64-linux-gnu -mcpu=neoverse-n2+nosve2 %s -### 2>&1 | FileCheck %s --check-prefix=SVE-MCPU-FEATURES
// SVE-MCPU-FEATURES-NOT: "-target-feature" "+sve2-bitperm"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support
// CHECK-NEXT: FEAT_SM4, FEAT_SM3 Enable SM3 and SM4 support
// CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions
// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension
// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable Armv8.4-A TLB Range and Maintenance instructions
// CHECK-NEXT: FEAT_TRF Enable Armv8.4-A Trace extension
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
// CHECK-NEXT: FEAT_SME_F64F64 Enable Scalable Matrix Extension (SME) F64F64 instructions
// CHECK-NEXT: FEAT_SME_I16I64 Enable Scalable Matrix Extension (SME) I16I64 instructions
// CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions
// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension
// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable Armv8.4-A TLB Range and Maintenance instructions
// CHECK-NEXT: FEAT_TRF Enable Armv8.4-A Trace extension
// CHECK-NEXT: FEAT_UAO Enable Armv8.2-A UAO PState
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
// CHECK-NEXT: FEAT_SB Enable Armv8.5-A Speculation Barrier
// CHECK-NEXT: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension
// CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions
// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension
// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable Armv8.4-A TLB Range and Maintenance instructions
// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
// CHECK-NEXT: FEAT_SB Enable Armv8.5-A Speculation Barrier
// CHECK-NEXT: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension
// CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions
// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension
// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable Armv8.4-A TLB Range and Maintenance instructions
// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
// CHECK-NEXT: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension
// CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions
// CHECK-NEXT: FEAT_SPECRES2 Enable Speculation Restriction Instruction
// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension
// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable Armv8.4-A TLB Range and Maintenance instructions
// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
// CHECK-NEXT: FEAT_SB Enable Armv8.5-A Speculation Barrier
// CHECK-NEXT: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension
// CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions
// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension
// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions
// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
// CHECK-NEXT: FEAT_SB Enable Armv8.5-A Speculation Barrier
// CHECK-NEXT: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension
// CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions
// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension
// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions
// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
// CHECK-NEXT: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension
// CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions
// CHECK-NEXT: FEAT_SPECRES2 Enable Speculation Restriction Instruction
// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension
// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions
// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
// CHECK-NEXT: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension
// CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions
// CHECK-NEXT: FEAT_SPECRES2 Enable Speculation Restriction Instruction
// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension
// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions
// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
// CHECK-NEXT: FEAT_SB Enable Armv8.5-A Speculation Barrier
// CHECK-NEXT: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension
// CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions
// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension
// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions
// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
// CHECK-NEXT: FEAT_SB Enable Armv8.5-A Speculation Barrier
// CHECK-NEXT: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension
// CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions
// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension
// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions
// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions
Expand Down
Loading