Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- [Clang] Declare AMDGPU target as supporting BF16 for storage-only purposes on amdgcn - Add Sema & CodeGen tests cases. - Also add cases that D138651 would have covered as this patch replaces it. - [AMDGPU] Add BF16 storage-only support - Support legalization/dealing with bf16 operations in DAGIsel. - bf16 as a type remains illegal and is represented as i16 for storage purposes. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D139398
- Loading branch information
Showing
11 changed files
with
3,514 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py | ||
// REQUIRES: amdgpu-registered-target | ||
// REQUIRES: x86-registered-target | ||
|
||
// RUN: %clang_cc1 "-aux-triple" "x86_64-unknown-linux-gnu" "-triple" "amdgcn-amd-amdhsa" \ | ||
// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -emit-llvm -o - %s | FileCheck %s | ||
|
||
#include "Inputs/cuda.h" | ||
|
||
// CHECK-LABEL: @_Z8test_argPu6__bf16u6__bf16( | ||
// CHECK-NEXT: entry: | ||
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) | ||
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5) | ||
// CHECK-NEXT: [[BF16:%.*]] = alloca bfloat, align 2, addrspace(5) | ||
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr | ||
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr | ||
// CHECK-NEXT: [[BF16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BF16]] to ptr | ||
// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 | ||
// CHECK-NEXT: store bfloat [[IN:%.*]], ptr [[IN_ADDR_ASCAST]], align 2 | ||
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[IN_ADDR_ASCAST]], align 2 | ||
// CHECK-NEXT: store bfloat [[TMP0]], ptr [[BF16_ASCAST]], align 2 | ||
// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[BF16_ASCAST]], align 2 | ||
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 | ||
// CHECK-NEXT: store bfloat [[TMP1]], ptr [[TMP2]], align 2 | ||
// CHECK-NEXT: ret void | ||
// | ||
__device__ void test_arg(__bf16 *out, __bf16 in) { | ||
__bf16 bf16 = in; | ||
*out = bf16; | ||
} | ||
|
||
// CHECK-LABEL: @_Z9test_loadPu6__bf16S_( | ||
// CHECK-NEXT: entry: | ||
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) | ||
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) | ||
// CHECK-NEXT: [[BF16:%.*]] = alloca bfloat, align 2, addrspace(5) | ||
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr | ||
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr | ||
// CHECK-NEXT: [[BF16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BF16]] to ptr | ||
// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 | ||
// CHECK-NEXT: store ptr [[IN:%.*]], ptr [[IN_ADDR_ASCAST]], align 8 | ||
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IN_ADDR_ASCAST]], align 8 | ||
// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[TMP0]], align 2 | ||
// CHECK-NEXT: store bfloat [[TMP1]], ptr [[BF16_ASCAST]], align 2 | ||
// CHECK-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[BF16_ASCAST]], align 2 | ||
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 | ||
// CHECK-NEXT: store bfloat [[TMP2]], ptr [[TMP3]], align 2 | ||
// CHECK-NEXT: ret void | ||
// | ||
__device__ void test_load(__bf16 *out, __bf16 *in) { | ||
__bf16 bf16 = *in; | ||
*out = bf16; | ||
} | ||
|
||
// CHECK-LABEL: @_Z8test_retu6__bf16( | ||
// CHECK-NEXT: entry: | ||
// CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2, addrspace(5) | ||
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5) | ||
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr | ||
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr | ||
// CHECK-NEXT: store bfloat [[IN:%.*]], ptr [[IN_ADDR_ASCAST]], align 2 | ||
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[IN_ADDR_ASCAST]], align 2 | ||
// CHECK-NEXT: ret bfloat [[TMP0]] | ||
// | ||
__device__ __bf16 test_ret( __bf16 in) { | ||
return in; | ||
} | ||
|
||
// CHECK-LABEL: @_Z9test_callu6__bf16( | ||
// CHECK-NEXT: entry: | ||
// CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2, addrspace(5) | ||
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5) | ||
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr | ||
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr | ||
// CHECK-NEXT: store bfloat [[IN:%.*]], ptr [[IN_ADDR_ASCAST]], align 2 | ||
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[IN_ADDR_ASCAST]], align 2 | ||
// CHECK-NEXT: [[CALL:%.*]] = call contract noundef bfloat @_Z8test_retu6__bf16(bfloat noundef [[TMP0]]) #[[ATTR1:[0-9]+]] | ||
// CHECK-NEXT: ret bfloat [[CALL]] | ||
// | ||
__device__ __bf16 test_call( __bf16 in) { | ||
return test_ret(in); | ||
} | ||
|
||
|
||
// CHECK-LABEL: @_Z15test_vec_assignv( | ||
// CHECK-NEXT: entry: | ||
// CHECK-NEXT: [[VEC2_A:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5) | ||
// CHECK-NEXT: [[VEC2_B:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5) | ||
// CHECK-NEXT: [[VEC4_A:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) | ||
// CHECK-NEXT: [[VEC4_B:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) | ||
// CHECK-NEXT: [[VEC8_A:%.*]] = alloca <8 x bfloat>, align 16, addrspace(5) | ||
// CHECK-NEXT: [[VEC8_B:%.*]] = alloca <8 x bfloat>, align 16, addrspace(5) | ||
// CHECK-NEXT: [[VEC16_A:%.*]] = alloca <16 x bfloat>, align 32, addrspace(5) | ||
// CHECK-NEXT: [[VEC16_B:%.*]] = alloca <16 x bfloat>, align 32, addrspace(5) | ||
// CHECK-NEXT: [[VEC2_A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC2_A]] to ptr | ||
// CHECK-NEXT: [[VEC2_B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC2_B]] to ptr | ||
// CHECK-NEXT: [[VEC4_A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4_A]] to ptr | ||
// CHECK-NEXT: [[VEC4_B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4_B]] to ptr | ||
// CHECK-NEXT: [[VEC8_A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8_A]] to ptr | ||
// CHECK-NEXT: [[VEC8_B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8_B]] to ptr | ||
// CHECK-NEXT: [[VEC16_A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC16_A]] to ptr | ||
// CHECK-NEXT: [[VEC16_B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC16_B]] to ptr | ||
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x bfloat>, ptr [[VEC2_B_ASCAST]], align 4 | ||
// CHECK-NEXT: store <2 x bfloat> [[TMP0]], ptr [[VEC2_A_ASCAST]], align 4 | ||
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x bfloat>, ptr [[VEC4_B_ASCAST]], align 8 | ||
// CHECK-NEXT: store <4 x bfloat> [[TMP1]], ptr [[VEC4_A_ASCAST]], align 8 | ||
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x bfloat>, ptr [[VEC8_B_ASCAST]], align 16 | ||
// CHECK-NEXT: store <8 x bfloat> [[TMP2]], ptr [[VEC8_A_ASCAST]], align 16 | ||
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x bfloat>, ptr [[VEC16_B_ASCAST]], align 32 | ||
// CHECK-NEXT: store <16 x bfloat> [[TMP3]], ptr [[VEC16_A_ASCAST]], align 32 | ||
// CHECK-NEXT: ret void | ||
// | ||
__device__ void test_vec_assign() { | ||
typedef __attribute__((ext_vector_type(2))) __bf16 bf16_x2; | ||
bf16_x2 vec2_a, vec2_b; | ||
vec2_a = vec2_b; | ||
|
||
typedef __attribute__((ext_vector_type(4))) __bf16 bf16_x4; | ||
bf16_x4 vec4_a, vec4_b; | ||
vec4_a = vec4_b; | ||
|
||
typedef __attribute__((ext_vector_type(8))) __bf16 bf16_x8; | ||
bf16_x8 vec8_a, vec8_b; | ||
vec8_a = vec8_b; | ||
|
||
typedef __attribute__((ext_vector_type(16))) __bf16 bf16_x16; | ||
bf16_x16 vec16_a, vec16_b; | ||
vec16_a = vec16_b; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
// REQUIRES: amdgpu-registered-target | ||
// REQUIRES: x86-registered-target | ||
|
||
// RUN: %clang_cc1 "-triple" "x86_64-unknown-linux-gnu" "-aux-triple" "amdgcn-amd-amdhsa"\ | ||
// RUN: "-target-cpu" "x86-64" -fsyntax-only -verify=amdgcn %s | ||
// RUN: %clang_cc1 "-aux-triple" "x86_64-unknown-linux-gnu" "-triple" "amdgcn-amd-amdhsa"\ | ||
// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -fsyntax-only -verify=amdgcn %s | ||
|
||
// RUN: %clang_cc1 "-aux-triple" "x86_64-unknown-linux-gnu" "-triple" "r600-unknown-unknown"\ | ||
// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -fsyntax-only -verify=amdgcn,r600 %s | ||
|
||
// AMDGCN has storage-only support for bf16. R600 does not support it should error out when | ||
// it's the main target. | ||
|
||
#include "Inputs/cuda.h" | ||
|
||
// There should be no errors on using the type itself, or when loading/storing values for amdgcn. | ||
// r600 should error on all uses of the type. | ||
|
||
// r600-error@+1 {{__bf16 is not supported on this target}} | ||
typedef __attribute__((ext_vector_type(2))) __bf16 bf16_x2; | ||
// r600-error@+1 {{__bf16 is not supported on this target}} | ||
typedef __attribute__((ext_vector_type(4))) __bf16 bf16_x4; | ||
// r600-error@+1 {{__bf16 is not supported on this target}} | ||
typedef __attribute__((ext_vector_type(8))) __bf16 bf16_x8; | ||
// r600-error@+1 {{__bf16 is not supported on this target}} | ||
typedef __attribute__((ext_vector_type(16))) __bf16 bf16_x16; | ||
|
||
// r600-error@+1 2 {{__bf16 is not supported on this target}} | ||
__device__ void test(bool b, __bf16 *out, __bf16 in) { | ||
__bf16 bf16 = in; // r600-error {{__bf16 is not supported on this target}} | ||
|
||
bf16 + bf16; // amdgcn-error {{invalid operands to binary expression ('__bf16' and '__bf16')}} | ||
bf16 - bf16; // amdgcn-error {{invalid operands to binary expression ('__bf16' and '__bf16')}} | ||
bf16 * bf16; // amdgcn-error {{invalid operands to binary expression ('__bf16' and '__bf16')}} | ||
bf16 / bf16; // amdgcn-error {{invalid operands to binary expression ('__bf16' and '__bf16')}} | ||
|
||
__fp16 fp16; | ||
|
||
bf16 + fp16; // amdgcn-error {{invalid operands to binary expression ('__bf16' and '__fp16')}} | ||
fp16 + bf16; // amdgcn-error {{invalid operands to binary expression ('__fp16' and '__bf16')}} | ||
bf16 - fp16; // amdgcn-error {{invalid operands to binary expression ('__bf16' and '__fp16')}} | ||
fp16 - bf16; // amdgcn-error {{invalid operands to binary expression ('__fp16' and '__bf16')}} | ||
bf16 * fp16; // amdgcn-error {{invalid operands to binary expression ('__bf16' and '__fp16')}} | ||
fp16 * bf16; // amdgcn-error {{invalid operands to binary expression ('__fp16' and '__bf16')}} | ||
bf16 / fp16; // amdgcn-error {{invalid operands to binary expression ('__bf16' and '__fp16')}} | ||
fp16 / bf16; // amdgcn-error {{invalid operands to binary expression ('__fp16' and '__bf16')}} | ||
bf16 = fp16; // amdgcn-error {{assigning to '__bf16' from incompatible type '__fp16'}} | ||
fp16 = bf16; // amdgcn-error {{assigning to '__fp16' from incompatible type '__bf16'}} | ||
bf16 + (b ? fp16 : bf16); // amdgcn-error {{incompatible operand types ('__fp16' and '__bf16')}} | ||
*out = bf16; | ||
|
||
// amdgcn-error@+1 {{static_cast from '__bf16' to 'unsigned short' is not allowed}} | ||
unsigned short u16bf16 = static_cast<unsigned short>(bf16); | ||
// amdgcn-error@+2 {{C-style cast from 'unsigned short' to '__bf16' is not allowed}} | ||
// r600-error@+1 {{__bf16 is not supported on this target}} | ||
bf16 = (__bf16)u16bf16; | ||
|
||
// amdgcn-error@+1 {{static_cast from '__bf16' to 'float' is not allowed}} | ||
float f32bf16 = static_cast<float>(bf16); | ||
// amdgcn-error@+2 {{C-style cast from 'float' to '__bf16' is not allowed}} | ||
// r600-error@+1 {{__bf16 is not supported on this target}} | ||
bf16 = (__bf16)f32bf16; | ||
|
||
// amdgcn-error@+1 {{static_cast from '__bf16' to 'double' is not allowed}} | ||
double f64bf16 = static_cast<double>(bf16); | ||
// amdgcn-error@+2 {{C-style cast from 'double' to '__bf16' is not allowed}} | ||
// r600-error@+1 {{__bf16 is not supported on this target}} | ||
bf16 = (__bf16)f64bf16; | ||
|
||
// r600-error@+1 {{__bf16 is not supported on this target}} | ||
typedef __attribute__((ext_vector_type(2))) __bf16 bf16_x2; | ||
bf16_x2 vec2_a, vec2_b; | ||
vec2_a = vec2_b; | ||
|
||
// r600-error@+1 {{__bf16 is not supported on this target}} | ||
typedef __attribute__((ext_vector_type(4))) __bf16 bf16_x4; | ||
bf16_x4 vec4_a, vec4_b; | ||
vec4_a = vec4_b; | ||
|
||
// r600-error@+1 {{__bf16 is not supported on this target}} | ||
typedef __attribute__((ext_vector_type(8))) __bf16 bf16_x8; | ||
bf16_x8 vec8_a, vec8_b; | ||
vec8_a = vec8_b; | ||
|
||
// r600-error@+1 {{__bf16 is not supported on this target}} | ||
typedef __attribute__((ext_vector_type(16))) __bf16 bf16_x16; | ||
bf16_x16 vec16_a, vec16_b; | ||
vec16_a = vec16_b; | ||
} | ||
|
||
// r600-error@+1 2 {{__bf16 is not supported on this target}} | ||
__bf16 hostfn(__bf16 a) { | ||
return a; | ||
} | ||
|
||
// r600-error@+2 {{__bf16 is not supported on this target}} | ||
// r600-error@+1 {{vector size not an integral multiple of component size}} | ||
typedef __bf16 foo __attribute__((__vector_size__(16), __aligned__(16))); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.