-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Revert "Revert "[AMDGPU] const-fold imm operands of #71669
Conversation
amdgcn_update_dpp intrinsic (llvm#71139)"" This reverts commit d1fb930 and fixes the lit test clang/test/CodeGenHIP/dpp-const-fold.hip
|
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-clang Author: Pravin Jagtap (pravinjagtap) Changesamdgcn_update_dpp intrinsic (#71139)"" This reverts commit d1fb930 and fixes the lit test clang/test/CodeGenHIP/dpp-const-fold.hip Full diff: https://github.com/llvm/llvm-project/pull/71669.diff 3 Files Affected:
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 5ab81cc605819c3..e7e498e8a933131 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5708,18 +5708,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::FunctionType *FTy = F->getFunctionType();
for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
- Value *ArgValue;
- // If this is a normal argument, just emit it as a scalar.
- if ((ICEArguments & (1 << i)) == 0) {
- ArgValue = EmitScalarExpr(E->getArg(i));
- } else {
- // If this is required to be a constant, constant fold it so that we
- // know that the generated intrinsic gets a ConstantInt.
- ArgValue = llvm::ConstantInt::get(
- getLLVMContext(),
- *E->getArg(i)->getIntegerConstantExpr(getContext()));
- }
-
+ Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
// If the intrinsic arg type is different from the builtin arg type
// we need to do a bit cast.
llvm::Type *PTy = FTy->getParamType(i);
@@ -8599,15 +8588,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
}
}
- if ((ICEArguments & (1 << i)) == 0) {
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- } else {
- // If this is required to be a constant, constant fold it so that we know
- // that the generated intrinsic gets a ConstantInt.
- Ops.push_back(llvm::ConstantInt::get(
- getLLVMContext(),
- *E->getArg(i)->getIntegerConstantExpr(getContext())));
- }
+ Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
}
switch (BuiltinID) {
@@ -11094,15 +11075,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
continue;
}
}
- if ((ICEArguments & (1 << i)) == 0) {
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- } else {
- // If this is required to be a constant, constant fold it so that we know
- // that the generated intrinsic gets a ConstantInt.
- Ops.push_back(llvm::ConstantInt::get(
- getLLVMContext(),
- *E->getArg(i)->getIntegerConstantExpr(getContext())));
- }
+ Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
}
auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
@@ -13814,16 +13787,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
assert(Error == ASTContext::GE_None && "Should not codegen an error");
for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
- // If this is a normal argument, just emit it as a scalar.
- if ((ICEArguments & (1 << i)) == 0) {
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- continue;
- }
-
- // If this is required to be a constant, constant fold it so that we know
- // that the generated intrinsic gets a ConstantInt.
- Ops.push_back(llvm::ConstantInt::get(
- getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext())));
+ Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
}
// These exist so that the builtin that takes an immediate can be bounds
@@ -17588,6 +17552,23 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
}
+llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
+ unsigned Idx,
+ const CallExpr *E) {
+ llvm::Value *Arg = nullptr;
+ if ((ICEArguments & (1 << Idx)) == 0) {
+ Arg = EmitScalarExpr(E->getArg(Idx));
+ } else {
+ // If this is required to be a constant, constant fold it so that we
+ // know that the generated intrinsic gets a ConstantInt.
+ std::optional<llvm::APSInt> Result =
+ E->getArg(Idx)->getIntegerConstantExpr(getContext());
+ assert(Result && "Expected argument to be a constant");
+ Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
+ }
+ return Arg;
+}
+
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
@@ -17638,8 +17619,15 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_mov_dpp:
case AMDGPU::BI__builtin_amdgcn_update_dpp: {
llvm::SmallVector<llvm::Value *, 6> Args;
- for (unsigned I = 0; I != E->getNumArgs(); ++I)
- Args.push_back(EmitScalarExpr(E->getArg(I)));
+ // Find out if any arguments are required to be integer constant
+ // expressions.
+ unsigned ICEArguments = 0;
+ ASTContext::GetBuiltinTypeError Error;
+ getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
+ assert(Error == ASTContext::GE_None && "Should not codegen an error");
+ for (unsigned I = 0; I != E->getNumArgs(); ++I) {
+ Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
+ }
assert(Args.size() == 5 || Args.size() == 6);
if (Args.size() == 5)
Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
@@ -20615,17 +20603,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
Ops.push_back(AggValue);
continue;
}
-
- // If this is a normal argument, just emit it as a scalar.
- if ((ICEArguments & (1 << i)) == 0) {
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- continue;
- }
-
- // If this is required to be a constant, constant fold it so that we know
- // that the generated intrinsic gets a ConstantInt.
- Ops.push_back(llvm::ConstantInt::get(
- getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext())));
+ Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
}
Intrinsic::ID ID = Intrinsic::not_intrinsic;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 42f94c9b540191e..dc6773eb83f5ece 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4327,6 +4327,8 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+ llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx,
+ const CallExpr *E);
llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
diff --git a/clang/test/CodeGenHIP/dpp-const-fold.hip b/clang/test/CodeGenHIP/dpp-const-fold.hip
new file mode 100644
index 000000000000000..f311ede5f71e6b5
--- /dev/null
+++ b/clang/test/CodeGenHIP/dpp-const-fold.hip
@@ -0,0 +1,48 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang --offload-arch=gfx906 -S -o - -emit-llvm --cuda-device-only -nogpuinc -nogpulib\
+// RUN: %s | FileCheck %s
+
+constexpr static int OpCtrl()
+{
+ return 15 + 1;
+}
+
+constexpr static int RowMask()
+{
+ return 3 + 1;
+}
+
+constexpr static int BankMask()
+{
+ return 2 + 1;
+}
+
+constexpr static bool BountCtrl()
+{
+ return true & false;
+}
+
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 16, i32 0, i32 0, i1 false)
+__attribute__((global)) void test_update_dpp_const_fold_imm_operand_2(int* out, int a, int b)
+{
+ *out = __builtin_amdgcn_update_dpp(a, b, OpCtrl(), 0, 0, false);
+}
+
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 4, i32 0, i1 false)
+__attribute__((global)) void test_update_dpp_const_fold_imm_operand_3(int* out, int a, int b)
+{
+ *out = __builtin_amdgcn_update_dpp(a, b, 0, RowMask(), 0, false);
+}
+
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 3, i1 false)
+__attribute__((global)) void test_update_dpp_const_fold_imm_operand_4(int* out, int a, int b)
+{
+ *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, BankMask(), false);
+}
+
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 0, i1 false)
+__attribute__((global)) void test_update_dpp_const_fold_imm_operand_5(int* out, int a, int b)
+{
+ *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, BountCtrl());
+}
|
| @@ -0,0 +1,48 @@ | |||
| // REQUIRES: amdgpu-registered-target | |||
|
|
|||
| // RUN: %clang --offload-arch=gfx906 -S -o - -emit-llvm --cuda-device-only -nogpuinc -nogpulib\ | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should use %clang_cc1, look at some of the other CodeGen* test run lines for examples
amdgcn_update_dpp intrinsic (#71139)""
This reverts commit d1fb930 and fixes the lit test clang/test/CodeGenHIP/dpp-const-fold.hip