| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,382 @@ | ||
| /*===--------------------- amxavx512intrin.h - AMXAVX512 --------------------=== | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| * | ||
| *===------------------------------------------------------------------------=== | ||
| */ | ||
| #ifndef __IMMINTRIN_H | ||
| #error "Never use <amxavx512intrin.h> directly; include <immintrin.h> instead." | ||
| #endif // __IMMINTRIN_H | ||
|
|
||
| #ifndef __AMX_AVX512INTRIN_H | ||
| #define __AMX_AVX512INTRIN_H | ||
| #if defined(__x86_64__) && defined(__SSE2__) | ||
|
|
||
| #define __DEFAULT_FN_ATTRS_AVX512 \ | ||
| __attribute__((__always_inline__, __nodebug__, \ | ||
| __target__("amx-avx512,avx10.2-512"))) | ||
|
|
||
| /// Moves a row from a tile register to a zmm destination register, converting | ||
| /// the int32 source elements to fp32. The row of the tile is selected by a | ||
| /// 32b GPR. | ||
| /// | ||
| /// \headerfile <x86intrin.h> | ||
| /// | ||
| /// \code | ||
| /// __m512i _tile_cvtrowd2ps(__tile tsrc, unsigned int row); | ||
| /// \endcode | ||
| /// | ||
| /// \code{.operation} | ||
| /// VL := 512 | ||
| /// VL_bytes := VL >> 3 | ||
| /// row_index := row & 0xffff | ||
| /// row_chunk := ((row >> 16) & 0xffff) * VL_bytes | ||
| /// FOR i := 0 TO (VL_bytes / 4) - 1 | ||
| /// IF i + row_chunk / 4 >= tsrc.colsb / 4 | ||
| /// dst.dword[i] := 0 | ||
| /// ELSE | ||
| /// dst.f32[i] := CONVERT_INT32_TO_FP32(tsrc.row[row_index].dword[row_chunk/4+i], RNE) | ||
| /// FI | ||
| /// ENDFOR | ||
| /// dst[MAX_VL-1:VL] := 0 | ||
| /// zero_tileconfig_start() | ||
| /// \endcode | ||
| /// | ||
| /// This intrinsic corresponds to the \c TCVTROWD2PS instruction. | ||
| /// | ||
| /// \param tsrc | ||
| /// The source tile. Max size is 1024 Bytes. | ||
| /// \param row | ||
| /// The row of the source tile | ||
| #define _tile_cvtrowd2ps(tsrc, row) __builtin_ia32_tcvtrowd2ps(tsrc, row) | ||
|
|
||
| /// Moves a row from a tile register to a zmm destination register, converting | ||
| /// the fp32 source elements to bf16. It places the resulting bf16 elements | ||
| /// in the high 16 bits within each dword. The row of the tile is selected | ||
| /// by a 32b GPR. | ||
| /// | ||
| /// \headerfile <x86intrin.h> | ||
| /// | ||
| /// \code | ||
| /// __m512i _tile_cvtrowps2pbf16h(__tile tsrc, unsigned int row); | ||
| /// \endcode | ||
| /// | ||
| /// \code{.operation} | ||
| /// VL := 512 | ||
| /// VL_bytes := VL >> 3 | ||
| /// row_index := row & 0xffff | ||
| /// row_chunk := ((row >> 16) & 0xffff) * VL_bytes | ||
| /// FOR i := 0 TO (VL_bytes / 4) - 1 | ||
| /// IF i + row_chunk / 4 >= tsrc.colsb / 4 | ||
| /// dst.dword[i] := 0 | ||
| /// ELSE | ||
| /// dst.word[2*i+0] := 0 | ||
| /// dst.bf16[2*i+1] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE) | ||
| /// FI | ||
| /// ENDFOR | ||
| /// dst[MAX_VL-1:VL] := 0 | ||
| /// zero_tileconfig_start() | ||
| /// \endcode | ||
| /// | ||
| /// This intrinsic corresponds to the \c TCVTROWPS2PBF16H instruction. | ||
| /// | ||
| /// \param tsrc | ||
| /// The source tile. Max size is 1024 Bytes. | ||
| /// \param row | ||
| /// The the row of the source tile. | ||
| #define _tile_cvtrowps2pbf16h(tsrc, row) \ | ||
| __builtin_ia32_tcvtrowps2pbf16h(tsrc, row) | ||
|
|
||
| /// Moves a row from a tile register to a zmm destination register, converting | ||
| /// the fp32 source elements to bf16. It places the resulting bf16 elements | ||
| /// in the low 16 bits within each dword. The row of the tile is selected | ||
| /// by a 32b GPR. | ||
| /// | ||
| /// \headerfile <x86intrin.h> | ||
| /// | ||
| /// \code | ||
| /// __m512i _tile_cvtrowps2pbf16l(__tile tsrc, unsigned int row); | ||
| /// \endcode | ||
| /// | ||
| /// \code{.operation} | ||
| /// VL := 512 | ||
| /// VL_bytes := VL >> 3 | ||
| /// row_index := row & 0xffff | ||
| /// row_chunk := ((row >> 16) & 0xffff) * VL_bytes | ||
| /// FOR i := 0 TO (VL_bytes / 4) - 1 | ||
| /// IF i + row_chunk / 4 >= tsrc.colsb / 4 | ||
| /// dst.dword[i] := 0 | ||
| /// ELSE | ||
| /// dst.word[2*i+1] := 0 | ||
| /// dst.bf16[2*i+0] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE) | ||
| /// FI | ||
| /// ENDFOR | ||
| /// dst[MAX_VL-1:VL] := 0 | ||
| /// zero_tileconfig_start() | ||
| /// \endcode | ||
| /// | ||
| /// This intrinsic corresponds to the \c TCVTROWPS2PBF16L instruction. | ||
| /// | ||
| /// \param tsrc | ||
| /// The source tile. Max size is 1024 Bytes. | ||
| /// \param row | ||
| /// The the row of the source tile. | ||
| #define _tile_cvtrowps2pbf16l(tsrc, row) \ | ||
| __builtin_ia32_tcvtrowps2pbf16l(tsrc, row) | ||
|
|
||
| /// Moves a row from a tile register to a zmm destination register, converting | ||
| /// the fp32 source elements to fp16. It places the resulting fp16 elements | ||
| /// in the high 16 bits within each dword. The row of the tile is selected | ||
| /// by a 32b GPR. | ||
| /// | ||
| /// \headerfile <x86intrin.h> | ||
| /// | ||
| /// \code | ||
| /// __m512i _tile_cvtrowps2phh(__tile tsrc, unsigned int row); | ||
| /// \endcode | ||
| /// | ||
| /// \code{.operation} | ||
| /// VL := 512 | ||
| /// VL_bytes := VL >> 3 | ||
| /// row_index := row & 0xffff | ||
| /// row_chunk := ((row >> 16) & 0xffff) * VL_bytes | ||
| /// FOR i := 0 TO (VL_bytes / 4) - 1 | ||
| /// IF i + row_chunk / 4 >= tsrc.colsb / 4 | ||
| /// dst.dword[i] := 0 | ||
| /// ELSE | ||
| /// dst.word[2*i+0] := 0 | ||
| /// dst.fp16[2*i+1] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE) | ||
| /// FI | ||
| /// ENDFOR | ||
| /// dst[MAX_VL-1:VL] := 0 | ||
| /// zero_tileconfig_start() | ||
| /// \endcode | ||
| /// | ||
| /// This intrinsic corresponds to the \c TCVTROWPS2PHH instruction. | ||
| /// | ||
| /// \param tsrc | ||
| /// The source tile. Max size is 1024 Bytes. | ||
| /// \param row | ||
| /// The the row of the source tile. | ||
| #define _tile_cvtrowps2phh(tsrc, row) __builtin_ia32_tcvtrowps2phh(tsrc, row) | ||
|
|
||
| /// Moves a row from a tile register to a zmm destination register, converting | ||
| /// the fp32 source elements to fp16. It places the resulting fp16 elements | ||
| /// in the low 16 bits within each dword. The row of the tile is selected | ||
| /// by a 32b GPR. | ||
| /// | ||
| /// \headerfile <x86intrin.h> | ||
| /// | ||
| /// \code | ||
| /// __m512i _tile_cvtrowps2phl(__tile tsrc, unsigned int row); | ||
| /// \endcode | ||
| /// | ||
| /// \code{.operation} | ||
| /// VL := 512 | ||
| /// VL_bytes := VL >> 3 | ||
| /// row_index := row & 0xffff | ||
| /// row_chunk := ((row >> 16) & 0xffff) * VL_bytes | ||
| /// FOR i := 0 TO (VL_bytes / 4) - 1 | ||
| /// IF i + row_chunk / 4 >= tsrc.colsb / 4 | ||
| /// dst.dword[i] := 0 | ||
| /// ELSE | ||
| /// dst.word[2*i+1] := 0 | ||
| /// dst.fp16[2*i+0] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE) | ||
| /// FI | ||
| /// ENDFOR | ||
| /// dst[MAX_VL-1:VL] := 0 | ||
| /// zero_tileconfig_start() | ||
| /// \endcode | ||
| /// | ||
| /// This intrinsic corresponds to the \c TCVTROWPS2PHL instruction. | ||
| /// | ||
| /// \param tsrc | ||
| /// The source tile. Max size is 1024 Bytes. | ||
| /// \param row | ||
| /// The the row of the source tile. | ||
| #define _tile_cvtrowps2phl(tsrc, row) __builtin_ia32_tcvtrowps2phl(tsrc, row) | ||
|
|
||
| /// Move one row of a tile data to a v16f32 data. | ||
| /// The row of the tile is selected by a 32b GPR. | ||
| /// | ||
| /// \headerfile <immintrin.h> | ||
| /// | ||
| /// \code | ||
| /// __m512 _tile_movrow(__tile a, unsigned b); | ||
| /// \endcode | ||
| /// | ||
| /// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction. | ||
| /// | ||
| /// \param a | ||
| /// The 1st source tile. Max size is 1024 Bytes. | ||
| /// \param b | ||
| /// The 2nd source r32. Size is 4 Bytes. | ||
| /// \returns | ||
| /// The destination v16f32 data. Size is 64 Bytes. | ||
| /// | ||
| /// \code{.operation} | ||
| /// VL := 512 | ||
| /// VL_bytes := VL>>3 | ||
| /// row_index := b&0xffff | ||
| /// row_chunk := ((b>>16)&0xffff) * VL_bytes | ||
| /// FOR i := 0 TO (VL_bytes-1) | ||
| /// IF (row_chunk + i >= a.colsb) | ||
| /// dst.byte[i] := 0 | ||
| /// ELSE | ||
| /// dst.byte[i] := a.row[row_index].byte[row_chunk+i] | ||
| /// ENDFOR | ||
| /// \endcode | ||
| #define _tile_movrow(a, b) __builtin_ia32_tilemovrow(a, b) | ||
|
|
||
| /// This is internal intrinsic. C/C++ user should avoid calling it directly. | ||
|
|
||
| static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal( | ||
| unsigned short m, unsigned short n, _tile1024i src, unsigned u) { | ||
| return __builtin_ia32_tcvtrowd2ps_internal(m, n, src, u); | ||
| } | ||
|
|
||
| static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512 | ||
| _tile_cvtrowps2pbf16h_internal(unsigned short m, unsigned short n, | ||
| _tile1024i src, unsigned u) { | ||
| return __builtin_ia32_tcvtrowps2pbf16h_internal(m, n, src, u); | ||
| } | ||
|
|
||
| static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512 | ||
| _tile_cvtrowps2pbf16l_internal(unsigned short m, unsigned short n, | ||
| _tile1024i src, unsigned u) { | ||
| return __builtin_ia32_tcvtrowps2pbf16l_internal(m, n, src, u); | ||
| } | ||
|
|
||
| static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal( | ||
| unsigned short m, unsigned short n, _tile1024i src, unsigned u) { | ||
| return __builtin_ia32_tcvtrowps2phh_internal(m, n, src, u); | ||
| } | ||
|
|
||
| static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phl_internal( | ||
| unsigned short m, unsigned short n, _tile1024i src, unsigned u) { | ||
| return __builtin_ia32_tcvtrowps2phl_internal(m, n, src, u); | ||
| } | ||
|
|
||
| static __inline__ __m512i __DEFAULT_FN_ATTRS_AVX512 _tile_movrow_internal( | ||
| unsigned short m, unsigned short n, _tile1024i src, unsigned u) { | ||
| return (__m512i)__builtin_ia32_tilemovrow_internal(m, n, src, u); | ||
| } | ||
|
|
||
| /// Move a row from a tile (src0) to a v16f32 dst, converting the int32 source | ||
| /// elements to fp32. No SIMD exceptions are generated. Rounding is done as if | ||
| /// MXCSR.RC=RNE. Embedded rounding is not supported. | ||
| /// The row and chunk elements of tile is fetched from 32bit src1. | ||
| /// | ||
| /// \headerfile <immintrin.h> | ||
| /// | ||
| /// This intrinsic corresponds to the <c> TCVTROWD2PS </c> instruction. | ||
| /// | ||
| /// \param src0 | ||
| /// The 1st source tile. Max size is 1024 Bytes. | ||
| /// \param src1 | ||
| /// The 2nd source r32. Size is 4 Bytes. | ||
| /// \returns | ||
| /// The destination v16f32 data. Size is 64 Bytes. | ||
| __DEFAULT_FN_ATTRS_AVX512 | ||
| static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) { | ||
| return _tile_cvtrowd2ps_internal(src0.row, src0.col, src0.tile, src1); | ||
| } | ||
|
|
||
| /// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source | ||
| /// elements to bf16 at high 16-bits of each dword. | ||
| /// The row and chunk elements of tile is fetched from 32bit src1. | ||
| /// | ||
| /// \headerfile <immintrin.h> | ||
| /// | ||
| /// This intrinsic corresponds to the <c> TCVTROWPS2PBF16H </c> instruction. | ||
| /// | ||
| /// \param src0 | ||
| /// The 1st source tile. Max size is 1024 Bytes. | ||
| /// \param src1 | ||
| /// The 2nd source r32. Size is 4 Bytes. | ||
| /// \returns | ||
| /// The destination v32bf16 data. Size is 64 Bytes. | ||
| __DEFAULT_FN_ATTRS_AVX512 | ||
| static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) { | ||
| return _tile_cvtrowps2pbf16h_internal(src0.row, src0.col, src0.tile, src1); | ||
| } | ||
|
|
||
| /// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source | ||
| /// elements to bf16 at low 16-bits of each dword. | ||
| /// The row and chunk elements of tile is fetched from 32bit src1. | ||
| /// | ||
| /// \headerfile <immintrin.h> | ||
| /// | ||
| /// This intrinsic corresponds to the <c> TCVTROWPS2PBF16L </c> instruction. | ||
| /// | ||
| /// \param src0 | ||
| /// The 1st source tile. Max size is 1024 Bytes. | ||
| /// \param src1 | ||
| /// The 2nd source r32. Size is 4 Bytes. | ||
| /// \returns | ||
| /// The destination v32bf16 data. Size is 64 Bytes. | ||
| __DEFAULT_FN_ATTRS_AVX512 | ||
| static __m512bh __tile_cvtrowps2pbf16l(__tile1024i src0, unsigned src1) { | ||
| return _tile_cvtrowps2pbf16l_internal(src0.row, src0.col, src0.tile, src1); | ||
| } | ||
|
|
||
| /// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source | ||
| /// elements to fp16 at high 16-bits of each dword. | ||
| /// The row and chunk elements of tile is fetched from 32bit src1. | ||
| /// | ||
| /// \headerfile <immintrin.h> | ||
| /// | ||
| /// This intrinsic corresponds to the <c> TCVTROWPS2PHH </c> instruction. | ||
| /// | ||
| /// \param src0 | ||
| /// The 1st source tile. Max size is 1024 Bytes. | ||
| /// \param src1 | ||
| /// The 2nd source r32. Size is 4 Bytes. | ||
| /// \returns | ||
| /// The destination v32fp16 data. Size is 64 Bytes. | ||
| __DEFAULT_FN_ATTRS_AVX512 | ||
| static __m512h __tile_cvtrowps2phh(__tile1024i src0, unsigned src1) { | ||
| return _tile_cvtrowps2phh_internal(src0.row, src0.col, src0.tile, src1); | ||
| } | ||
|
|
||
| /// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source | ||
| /// elements to fp16 at low 16-bits of each dword. | ||
| /// The row and chunk elements of tile is fetched from 32bit src1. | ||
| /// | ||
| /// \headerfile <immintrin.h> | ||
| /// | ||
| /// This intrinsic corresponds to the <c> TCVTROWPS2PHL </c> instruction. | ||
| /// | ||
| /// \param src0 | ||
| /// The 1st source tile. Max size is 1024 Bytes. | ||
| /// \param src1 | ||
| /// The 2nd source r32. Size is 4 Bytes. | ||
| /// \returns | ||
| /// The destination v32fp16 data. Size is 64 Bytes. | ||
| __DEFAULT_FN_ATTRS_AVX512 | ||
| static __m512h __tile_cvtrowps2phl(__tile1024i src0, unsigned src1) { | ||
| return _tile_cvtrowps2phl_internal(src0.row, src0.col, src0.tile, src1); | ||
| } | ||
|
|
||
| /// Move one row of a tile data to a v16f32 data. | ||
| /// The row of the tile is selected by a 32b GPR. | ||
| /// | ||
| /// \headerfile <immintrin.h> | ||
| /// | ||
| /// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction. | ||
| /// | ||
| /// \param src0 | ||
| /// The 1st source tile. Max size is 1024 Bytes. | ||
| /// \param src1 | ||
| /// The 2nd source r32. Size is 4 Bytes. | ||
| /// \returns | ||
| /// The destination v16i32 data. Size is 64 Bytes. | ||
| __DEFAULT_FN_ATTRS_AVX512 | ||
| static __m512i __tile_movrow(__tile1024i src0, unsigned src1) { | ||
| return (__m512i)_tile_movrow_internal(src0.row, src0.col, src0.tile, src1); | ||
| } | ||
|
|
||
| #endif // __x86_64__ && __SSE2__ | ||
| #endif // __AMX_AVX512INTRIN_H |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump -DEMPTY %s | FileCheck -check-prefix=EMPTY %s | ||
| // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump %s | FileCheck %s | ||
|
|
||
|
|
||
| // This test tests two different AST generations. The "EMPTY" test mode verifies | ||
| // the AST generated by forward declaration of the HLSL types which happens on | ||
| // initializing the HLSL external AST with an AST Context. | ||
|
|
||
| // The non-empty mode has a use that requires the AppendStructuredBuffer type be complete, | ||
| // which results in the AST being populated by the external AST source. That | ||
| // case covers the full implementation of the template declaration and the | ||
| // instantiated specialization. | ||
|
|
||
| // EMPTY: ClassTemplateDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit AppendStructuredBuffer | ||
| // EMPTY-NEXT: TemplateTypeParmDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class depth 0 index 0 element_type | ||
| // EMPTY-NEXT: CXXRecordDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit <undeserialized declarations> class AppendStructuredBuffer | ||
| // EMPTY-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final | ||
|
|
||
| // There should be no more occurrences of AppendStructuredBuffer | ||
| // EMPTY-NOT: {{[^[:alnum:]]}}AppendStructuredBuffer | ||
|
|
||
| #ifndef EMPTY | ||
|
|
||
| AppendStructuredBuffer<int> Buffer; | ||
|
|
||
| #endif | ||
|
|
||
| // CHECK: ClassTemplateDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit AppendStructuredBuffer | ||
| // CHECK-NEXT: TemplateTypeParmDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class depth 0 index 0 element_type | ||
| // CHECK-NEXT: CXXRecordDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit class AppendStructuredBuffer definition | ||
|
|
||
| // CHECK: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final | ||
| // CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit h '__hlsl_resource_t | ||
| // CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]] | ||
| // CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]] | ||
| // CHECK-SAME{LITERAL}: [[hlsl::contained_type(element_type)]] | ||
| // CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit RawBuffer | ||
|
|
||
| // CHECK-NOT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &const (unsigned int) const' | ||
| // CHECK-NOT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &(unsigned int)' | ||
|
|
||
| // CHECK: ClassTemplateSpecializationDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class AppendStructuredBuffer definition | ||
| // CHECK: TemplateArgument type 'int' | ||
| // CHECK-NEXT: BuiltinType 0x{{[0-9A-Fa-f]+}} 'int' | ||
| // CHECK-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final | ||
| // CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit h '__hlsl_resource_t | ||
| // CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]] | ||
| // CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]] | ||
| // CHECK-SAME{LITERAL}: [[hlsl::contained_type(int)]] | ||
| // CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit RawBuffer |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump -DEMPTY %s | FileCheck -check-prefix=EMPTY %s | ||
| // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump %s | FileCheck %s | ||
|
|
||
|
|
||
| // This test tests two different AST generations. The "EMPTY" test mode verifies | ||
| // the AST generated by forward declaration of the HLSL types which happens on | ||
| // initializing the HLSL external AST with an AST Context. | ||
|
|
||
| // The non-empty mode has a use that requires the ConsumeStructuredBuffer type be complete, | ||
| // which results in the AST being populated by the external AST source. That | ||
| // case covers the full implementation of the template declaration and the | ||
| // instantiated specialization. | ||
|
|
||
| // EMPTY: ClassTemplateDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit ConsumeStructuredBuffer | ||
| // EMPTY-NEXT: TemplateTypeParmDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class depth 0 index 0 element_type | ||
| // EMPTY-NEXT: CXXRecordDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit <undeserialized declarations> class ConsumeStructuredBuffer | ||
| // EMPTY-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final | ||
|
|
||
| // There should be no more occurrences of ConsumeStructuredBuffer | ||
| // EMPTY-NOT: {{[^[:alnum:]]}}ConsumeStructuredBuffer | ||
|
|
||
| #ifndef EMPTY | ||
|
|
||
| ConsumeStructuredBuffer<int> Buffer; | ||
|
|
||
| #endif | ||
|
|
||
| // CHECK: ClassTemplateDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit ConsumeStructuredBuffer | ||
| // CHECK-NEXT: TemplateTypeParmDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class depth 0 index 0 element_type | ||
| // CHECK-NEXT: CXXRecordDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit class ConsumeStructuredBuffer definition | ||
|
|
||
| // CHECK: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final | ||
| // CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit h '__hlsl_resource_t | ||
| // CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]] | ||
| // CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]] | ||
| // CHECK-SAME{LITERAL}: [[hlsl::contained_type(element_type)]] | ||
| // CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit RawBuffer | ||
|
|
||
| // CHECK-NOT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &const (unsigned int) const' | ||
| // CHECK-NOT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &(unsigned int)' | ||
|
|
||
| // CHECK: ClassTemplateSpecializationDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class ConsumeStructuredBuffer definition | ||
|
|
||
| // CHECK: TemplateArgument type 'int' | ||
| // CHECK-NEXT: BuiltinType 0x{{[0-9A-Fa-f]+}} 'int' | ||
| // CHECK-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final | ||
| // CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit h '__hlsl_resource_t | ||
| // CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]] | ||
| // CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]] | ||
| // CHECK-SAME{LITERAL}: [[hlsl::contained_type(int)]] | ||
| // CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit RawBuffer |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| // RUN: %clang_cc1 -verify -std=c2y -Wall -pedantic %s | ||
| // RUN: %clang_cc1 -verify=gnu -Wall -pedantic %s | ||
|
|
||
| /* WG14 N3341: Yes | ||
| * Slay Some Earthly Demons III | ||
| * | ||
| * Empty structure and union objects are now implementation-defined. | ||
| */ | ||
|
|
||
| // expected-no-diagnostics | ||
|
|
||
| struct R {}; // gnu-warning {{empty struct is a GNU extension}} | ||
| #if __STDC_VERSION__ >= 201112L | ||
| struct S { struct { }; }; // gnu-warning {{empty struct is a GNU extension}} | ||
| #endif | ||
| struct T { int : 0; }; // gnu-warning {{struct without named members is a GNU extension}} | ||
| union U {}; // gnu-warning {{empty union is a GNU extension}} | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| // RUN: %clang_cc1 -verify=expected,both -std=c2y -Wall -pedantic %s | ||
| // RUN: %clang_cc1 -verify=clang,both -Wall -pedantic %s | ||
|
|
||
| /* WG14 N3342: Yes | ||
| * Slay Some Earthly Demons IV | ||
| * | ||
| * Qualified function types are now implementation-defined instead of | ||
| * undefined. Clang strips the qualifiers. | ||
| */ | ||
|
|
||
| typedef int f(void); | ||
|
|
||
| const f one; /* expected-warning {{'const' qualifier on function type 'f' (aka 'int (void)') has no effect}} | ||
| clang-warning {{'const' qualifier on function type 'f' (aka 'int (void)') has no effect and is a Clang extension}} | ||
| */ | ||
| volatile f two; /* expected-warning {{'volatile' qualifier on function type 'f' (aka 'int (void)') has no effect}} | ||
| clang-warning {{'volatile' qualifier on function type 'f' (aka 'int (void)') has no effect and is a Clang extension}} | ||
| */ | ||
|
|
||
| const volatile f three; /* expected-warning {{'const' qualifier on function type 'f' (aka 'int (void)') has no effect}} | ||
| clang-warning {{'const' qualifier on function type 'f' (aka 'int (void)') has no effect and is a Clang extension}} | ||
| expected-warning {{'volatile' qualifier on function type 'f' (aka 'int (void)') has no effect}} | ||
| clang-warning {{'volatile' qualifier on function type 'f' (aka 'int (void)') has no effect and is a Clang extension}} | ||
| */ | ||
|
|
||
| #if __STDC_VERSION__ >= 201112L | ||
| // Atomic types have an explicit constraint making it ill-formed. | ||
| _Atomic f four; // both-error {{_Atomic cannot be applied to function type 'f' (aka 'int (void)')}} | ||
| #endif | ||
|
|
||
| // There's no point to testing 'restrict' because that requires a pointer type. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,74 @@ | ||
| // RUN: %clang_cc1 -verify -std=c2y -Wall -pedantic -ffreestanding %s | ||
| // RUN: %clang_cc1 -std=c99 -verify=expected,ped -Wall -pedantic -ffreestanding %s | ||
|
|
||
| /* WG14 N3346: Yes | ||
| * Slay Some Earthly Demons VIII | ||
| * | ||
| * Updates some undefined behavior during initialization to instead be a | ||
| * constraint violation. | ||
| */ | ||
|
|
||
| // The initializer for a scalar shall be a single expression, optionally | ||
| // enclosed in braces, or it shall be an empty initializer. | ||
| int i = 12, j = {12}, k = {}; // ped-warning {{use of an empty initializer is a C23 extension}} | ||
|
|
||
| struct S { | ||
| int i; | ||
| float f; | ||
| int : 0; | ||
| char c; | ||
| }; | ||
|
|
||
| void test1(void) { | ||
| // The initializer for an object that has structure or union type shall be | ||
| // either a single expression that has compatible type or a brace-enclosed | ||
| // list of initializers for the elements or named members. | ||
| struct S s1 = { 1, 1.2f, 'a' }; | ||
| struct S s2 = s1; | ||
|
|
||
| // Despite being structurally identical to S, T is not compatible with S. | ||
| struct T { int i; float f; int : 0; char c; } t; | ||
| struct S s3 = t; // expected-error {{initializing 'struct S' with an expression of incompatible type 'struct T'}} | ||
| } | ||
|
|
||
| void test2(void) { | ||
| typedef __WCHAR_TYPE__ wchar_t; | ||
|
|
||
| // The initializer for an array shall be either a string literal, optionally | ||
| // enclosed in braces, or a brace-enclosed list of initializers for the | ||
| // elements. An array initialized by character string literal or UTF-8 string | ||
| // literal shall have a character type as element type. An array initialized | ||
| // with a wide string literal shall have element type compatible with a | ||
| // qualified or unqualified wchar_t, char16_t, or char32_t, and the string | ||
| // literal shall have the corresponding encoding prefix (L, u, or U, | ||
| // respectively). | ||
| char str1[] = "string literal"; | ||
| char str2[] = { "string literal" }; | ||
|
|
||
| float str5[] = "this doesn't work"; // expected-error {{array initializer must be an initializer list}} | ||
| float str6[] = { "this also doesn't work" }; // expected-error {{initializing 'float' with an expression of incompatible type 'char[23]'}} | ||
|
|
||
| wchar_t str7[] = L"string literal"; | ||
| wchar_t str8[] = { L"string literal" }; | ||
|
|
||
| #if __STDC_VERSION__ >= 201112L | ||
| typedef __CHAR16_TYPE__ char16_t; | ||
| typedef __CHAR32_TYPE__ char32_t; | ||
|
|
||
| char str3[] = u8"string literal"; | ||
| char str4[] = { u8"string literal" }; | ||
|
|
||
| char16_t str9[] = u"string literal"; | ||
| char16_t str10[] = { u"string literal" }; | ||
| char32_t str11[] = U"string literal"; | ||
| char32_t str12[] = { U"string literal" }; | ||
|
|
||
| char16_t str15[] = "nope"; // expected-error {{initializing wide char array with non-wide string literal}} | ||
| char16_t str16[] = { "nope" }; // expected-error-re {{incompatible pointer to integer conversion initializing 'char16_t' (aka '{{.*}}') with an expression of type 'char[5]'}} | ||
| char32_t str17[] = "nope"; // expected-error {{initializing wide char array with non-wide string literal}} | ||
| char32_t str18[] = { "nope" }; // expected-error-re {{incompatible pointer to integer conversion initializing 'char32_t' (aka '{{.*}}') with an expression of type 'char[5]'}} | ||
| #endif | ||
|
|
||
| wchar_t str13[] = "nope"; // expected-error {{initializing wide char array with non-wide string literal}} | ||
| wchar_t str14[] = { "nope" }; // expected-error-re {{incompatible pointer to integer conversion initializing 'wchar_t' (aka '{{.*}}') with an expression of type 'char[5]'}} | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| // RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown \ | ||
| // RUN: -target-feature +amx-avx512 -target-feature +avx10.2-512 \ | ||
| // RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK | ||
|
|
||
| #include <immintrin.h> | ||
|
|
||
| char buf[1024]; | ||
| #define STRIDE 32 | ||
|
|
||
| char buf2[1024]; | ||
|
|
||
| __m512 test_tile_cvtrowd2ps(__tile1024i a, unsigned b) { | ||
| //CHECK-LABEL: @test_tile_cvtrowd2ps | ||
| //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) | ||
| //CHECK-DAG: call <16 x float> @llvm.x86.tcvtrowd2ps.internal | ||
| return __tile_cvtrowd2ps(a, b); | ||
| } | ||
|
|
||
| __m512bh test_tile_cvtrowps2pbf16h(__tile1024i a, unsigned b) { | ||
| //CHECK-LABEL: @test_tile_cvtrowps2pbf16h | ||
| //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) | ||
| //CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h.internal | ||
| return __tile_cvtrowps2pbf16h(a, b); | ||
| } | ||
|
|
||
| __m512bh test_tile_cvtrowps2pbf16l(__tile1024i a, unsigned b) { | ||
| //CHECK-LABEL: @test_tile_cvtrowps2pbf16l | ||
| //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) | ||
| //CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l.internal | ||
| return __tile_cvtrowps2pbf16l(a, b); | ||
| } | ||
|
|
||
| __m512h test_tile_cvtrowps2phh(__tile1024i a, unsigned b) { | ||
| //CHECK-LABEL: @test_tile_cvtrowps2phh | ||
| //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) | ||
| //CHECK-DAG: call <32 x half> @llvm.x86.tcvtrowps2phh.internal | ||
| return __tile_cvtrowps2phh(a, b); | ||
| } | ||
|
|
||
| __m512h test_tile_cvtrowps2phl(__tile1024i a, unsigned b) { | ||
| //CHECK-LABEL: @test_tile_cvtrowps2phl | ||
| //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) | ||
| //CHECK-DAG: call <32 x half> @llvm.x86.tcvtrowps2phl.internal | ||
| return __tile_cvtrowps2phl(a, b); | ||
| } | ||
|
|
||
| __m512i test_tile_movrow(__tile1024i a, unsigned b) { | ||
| //CHECK-LABEL: @test_tile_movrow | ||
| //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) | ||
| //CHECK-DAG: call <16 x i32> @llvm.x86.tilemovrow.internal | ||
| return __tile_movrow(a, b); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| // RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tile -target-feature +amx-avx512 \ | ||
| // RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s | ||
|
|
||
| #include <immintrin.h> | ||
| #include <stddef.h> | ||
|
|
||
| __m512 test_tile_cvtrowd2ps(unsigned int A) { | ||
| // CHECK-LABEL: @test_tile_cvtrowd2ps( | ||
| // CHECK: call <16 x float> @llvm.x86.tcvtrowd2ps(i8 1, i32 %{{.*}}) | ||
| return _tile_cvtrowd2ps(1, A); | ||
| } | ||
|
|
||
| __m512bh test_tile_cvtrowps2pbf16h(unsigned int A) { | ||
| // CHECK-LABEL: @test_tile_cvtrowps2pbf16h( | ||
| // CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h(i8 1, i32 %{{.*}}) | ||
| return _tile_cvtrowps2pbf16h(1, A); | ||
| } | ||
|
|
||
| __m512bh test_tile_cvtrowps2pbf16l(unsigned int A) { | ||
| // CHECK-LABEL: @test_tile_cvtrowps2pbf16l( | ||
| // CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l(i8 1, i32 %{{.*}}) | ||
| return _tile_cvtrowps2pbf16l(1, A); | ||
| } | ||
|
|
||
| __m512h test_tile_cvtrowps2phh(unsigned int A) { | ||
| // CHECK-LABEL: @test_tile_cvtrowps2phh( | ||
| // CHECK: call <32 x half> @llvm.x86.tcvtrowps2phh(i8 1, i32 %{{.*}}) | ||
| return _tile_cvtrowps2phh(1, A); | ||
| } | ||
|
|
||
| __m512h test_tile_cvtrowps2phl(unsigned int A) { | ||
| // CHECK-LABEL: @test_tile_cvtrowps2phl( | ||
| // CHECK: call <32 x half> @llvm.x86.tcvtrowps2phl(i8 1, i32 %{{.*}}) | ||
| return _tile_cvtrowps2phl(1, A); | ||
| } | ||
|
|
||
| __m512i test_tile_movrow(unsigned int A) { | ||
| // CHECK-LABEL: @test_tile_movrow | ||
| // CHECK: %1 = call <16 x i32> @llvm.x86.tilemovrow(i8 1, i32 %{{.*}}) | ||
| return _tile_movrow(1, A); | ||
| } |