Skip to content

Commit 24c75a2

Browse files
authored
[AMDGPU][Clang] Support for type inferring extended image builtins for AMDGPU (#164358)
Introduces the builtins for extended image insts for amdgcn.
1 parent 9cf3e8a commit 24c75a2

File tree

8 files changed

+2150
-3
lines changed

8 files changed

+2150
-3
lines changed

clang/include/clang/Basic/Builtins.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
// SJ -> sigjmp_buf
4444
// K -> ucontext_t
4545
// p -> pid_t
46+
// e -> _Float16 for HIP/C++ and __fp16 for OpenCL
4647
// . -> "...". This may only occur at the end of the function list.
4748
//
4849
// Types may be prefixed with the following modifiers:

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -967,6 +967,47 @@ TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f32_f32, "V4fifffQtV4ibii", "n
967967
TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f16_f32, "V4hifffQtV4ibii", "nc", "image-insts")
968968
TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f32_f32, "V4fifffQtV4ibii", "nc", "image-insts")
969969
TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f16_f32, "V4hifffQtV4ibii", "nc", "image-insts")
970+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1d_v4f32_f32, "V4fifQtV4ibii", "nc", "extended-image-insts")
971+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1d_v4f16_f32, "V4eifQtV4ibii", "nc", "extended-image-insts")
972+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts")
973+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32, "V4eiffQtV4ibii", "nc", "extended-image-insts")
974+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2d_f32_f32, "fiffQtV4ibii", "nc", "extended-image-insts")
975+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2d_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts")
976+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2d_v4f16_f32, "V4eiffQtV4ibii", "nc", "extended-image-insts")
977+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2darray_f32_f32, "fifffQtV4ibii", "nc", "extended-image-insts")
978+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
979+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts")
980+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_3d_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
981+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_3d_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts")
982+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_cube_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
983+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_cube_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts")
984+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1d_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts")
985+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1d_v4f16_f32, "V4eiffQtV4ibii", "nc", "extended-image-insts")
986+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1darray_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
987+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1darray_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts")
988+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2d_f32_f32, "fifffQtV4ibii", "nc", "extended-image-insts")
989+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2d_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
990+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2d_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts")
991+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2darray_f32_f32, "fiffffQtV4ibii", "nc", "extended-image-insts")
992+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2darray_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts")
993+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2darray_v4f16_f32, "V4eiffffQtV4ibii", "nc", "extended-image-insts")
994+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_3d_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts")
995+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_3d_v4f16_f32, "V4eiffffQtV4ibii", "nc", "extended-image-insts")
996+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_cube_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts")
997+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_cube_v4f16_f32, "V4eiffffQtV4ibii", "nc", "extended-image-insts")
998+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1d_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
999+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1d_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts")
1000+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1darray_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts")
1001+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1darray_v4f16_f32, "V4eiffffQtV4ibii", "nc", "extended-image-insts")
1002+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2d_f32_f32, "fiffffffQtV4ibii", "nc", "extended-image-insts")
1003+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2d_v4f32_f32, "V4fiffffffQtV4ibii", "nc", "extended-image-insts")
1004+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2d_v4f16_f32, "V4eiffffffQtV4ibii", "nc", "extended-image-insts")
1005+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2darray_f32_f32, "fifffffffQtV4ibii", "nc", "extended-image-insts")
1006+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2darray_v4f32_f32, "V4fifffffffQtV4ibii", "nc", "extended-image-insts")
1007+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2darray_v4f16_f32, "V4eifffffffQtV4ibii", "nc", "extended-image-insts")
1008+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_3d_v4f32_f32, "V4fifffffffffQtV4ibii", "nc", "extended-image-insts")
1009+
TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_3d_v4f16_f32, "V4eifffffffffQtV4ibii", "nc", "extended-image-insts")
1010+
TARGET_BUILTIN(__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts")
9701011

9711012
#undef BUILTIN
9721013
#undef TARGET_BUILTIN

clang/lib/AST/ASTContext.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12403,6 +12403,11 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
1240312403
// Read the base type.
1240412404
switch (*Str++) {
1240512405
default: llvm_unreachable("Unknown builtin type letter!");
12406+
case 'e':
12407+
assert(HowLong == 0 && !Signed && !Unsigned &&
12408+
"Bad modifiers used with 'e'!");
12409+
Type = Context.getLangOpts().OpenCL ? Context.HalfTy : Context.Float16Ty;
12410+
break;
1240612411
case 'x':
1240712412
assert(HowLong == 0 && !Signed && !Unsigned &&
1240812413
"Bad modifiers used with 'x'!");

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -647,8 +647,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
647647
case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
648648
llvm::Type *ResultType = ConvertType(E->getType());
649649
llvm::Value *Src = EmitScalarExpr(E->getArg(0));
650-
Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
651-
return Builder.CreateCall(F, { Src });
650+
Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
651+
return Builder.CreateCall(F, {Src});
652652
}
653653
case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
654654
case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
@@ -1139,6 +1139,83 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
11391139
case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
11401140
return emitAMDGCNImageOverloadedReturnType(
11411141
*this, E, Intrinsic::amdgcn_image_sample_cube, false);
1142+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
1143+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
1144+
return emitAMDGCNImageOverloadedReturnType(
1145+
*this, E, Intrinsic::amdgcn_image_sample_lz_1d, false);
1146+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
1147+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
1148+
return emitAMDGCNImageOverloadedReturnType(
1149+
*this, E, Intrinsic::amdgcn_image_sample_l_1d, false);
1150+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
1151+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
1152+
return emitAMDGCNImageOverloadedReturnType(
1153+
*this, E, Intrinsic::amdgcn_image_sample_d_1d, false);
1154+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
1155+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
1156+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
1157+
return emitAMDGCNImageOverloadedReturnType(
1158+
*this, E, Intrinsic::amdgcn_image_sample_lz_2d, false);
1159+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
1160+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
1161+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
1162+
return emitAMDGCNImageOverloadedReturnType(
1163+
*this, E, Intrinsic::amdgcn_image_sample_l_2d, false);
1164+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
1165+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
1166+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
1167+
return emitAMDGCNImageOverloadedReturnType(
1168+
*this, E, Intrinsic::amdgcn_image_sample_d_2d, false);
1169+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
1170+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
1171+
return emitAMDGCNImageOverloadedReturnType(
1172+
*this, E, Intrinsic::amdgcn_image_sample_lz_3d, false);
1173+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
1174+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
1175+
return emitAMDGCNImageOverloadedReturnType(
1176+
*this, E, Intrinsic::amdgcn_image_sample_l_3d, false);
1177+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
1178+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
1179+
return emitAMDGCNImageOverloadedReturnType(
1180+
*this, E, Intrinsic::amdgcn_image_sample_d_3d, false);
1181+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
1182+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
1183+
return emitAMDGCNImageOverloadedReturnType(
1184+
*this, E, Intrinsic::amdgcn_image_sample_lz_cube, false);
1185+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
1186+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
1187+
return emitAMDGCNImageOverloadedReturnType(
1188+
*this, E, Intrinsic::amdgcn_image_sample_l_cube, false);
1189+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
1190+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
1191+
return emitAMDGCNImageOverloadedReturnType(
1192+
*this, E, Intrinsic::amdgcn_image_sample_lz_1darray, false);
1193+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
1194+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
1195+
return emitAMDGCNImageOverloadedReturnType(
1196+
*this, E, Intrinsic::amdgcn_image_sample_l_1darray, false);
1197+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
1198+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
1199+
return emitAMDGCNImageOverloadedReturnType(
1200+
*this, E, Intrinsic::amdgcn_image_sample_d_1darray, false);
1201+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
1202+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
1203+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
1204+
return emitAMDGCNImageOverloadedReturnType(
1205+
*this, E, Intrinsic::amdgcn_image_sample_lz_2darray, false);
1206+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
1207+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
1208+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
1209+
return emitAMDGCNImageOverloadedReturnType(
1210+
*this, E, Intrinsic::amdgcn_image_sample_l_2darray, false);
1211+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
1212+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
1213+
case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
1214+
return emitAMDGCNImageOverloadedReturnType(
1215+
*this, E, Intrinsic::amdgcn_image_sample_d_2darray, false);
1216+
case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32:
1217+
return emitAMDGCNImageOverloadedReturnType(
1218+
*this, E, Intrinsic::amdgcn_image_gather4_lz_2d, false);
11421219
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
11431220
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
11441221
llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);

clang/lib/Sema/SemaAMDGPU.cpp

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,48 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
153153
case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
154154
case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
155155
case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
156-
case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: {
156+
case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
157+
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
158+
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
159+
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
160+
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
161+
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
162+
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
163+
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
164+
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
165+
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
166+
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
167+
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
168+
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
169+
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
170+
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
171+
case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
172+
case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
173+
case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
174+
case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
175+
case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
176+
case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
177+
case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
178+
case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
179+
case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
180+
case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
181+
case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
182+
case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
183+
case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
184+
case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
185+
case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
186+
case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
187+
case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
188+
case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
189+
case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
190+
case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
191+
case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
192+
case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
193+
case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
194+
case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
195+
case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
196+
case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
197+
case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: {
157198
StringRef FeatureList(
158199
getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
159200
if (!Builtin::evaluateRequiredTargetFeatures(FeatureList,

0 commit comments

Comments
 (0)