diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp index 7e6e0f1a06046..432e01a696a4e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp @@ -149,25 +149,12 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId, return result; } case AMDGPU::BI__builtin_amdgcn_div_fmas: - case AMDGPU::BI__builtin_amdgcn_div_fmasf: { - mlir::Value src0 = emitScalarExpr(expr->getArg(0)); - mlir::Value src1 = emitScalarExpr(expr->getArg(1)); - mlir::Value src2 = emitScalarExpr(expr->getArg(2)); - mlir::Value src3 = emitScalarExpr(expr->getArg(3)); - mlir::Value result = cir::LLVMIntrinsicCallOp::create( - builder, getLoc(expr->getExprLoc()), - builder.getStringAttr("amdgcn.div.fmas"), - src0.getType(), {src0, src1, src2, src3}) - .getResult(); - return result; - } - case AMDGPU::BI__builtin_amdgcn_ds_swizzle: { - mlir::Value src0 = emitScalarExpr(expr->getArg(0)); - mlir::Value src1 = emitScalarExpr(expr->getArg(1)); - return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()), - "amdgcn.ds.swizzle", src0.getType(), - mlir::ValueRange{src0, src1}); - } + case AMDGPU::BI__builtin_amdgcn_div_fmasf: + return emitBuiltinWithOneOverloadedType<4>(expr, "amdgcn.div.fmas") + .getValue(); + case AMDGPU::BI__builtin_amdgcn_ds_swizzle: + return emitBuiltinWithOneOverloadedType<2>(expr, "amdgcn.ds.swizzle") + .getValue(); case AMDGPU::BI__builtin_amdgcn_mov_dpp8: case AMDGPU::BI__builtin_amdgcn_mov_dpp: case AMDGPU::BI__builtin_amdgcn_update_dpp: { @@ -184,19 +171,12 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId, getContext().BuiltinInfo.getName(builtinId)); return mlir::Value{}; } - case AMDGPU::BI__builtin_amdgcn_readlane: { - mlir::Value src0 = emitScalarExpr(expr->getArg(0)); - mlir::Value src1 = emitScalarExpr(expr->getArg(1)); - return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()), - "amdgcn.readlane", src0.getType(), - mlir::ValueRange{src0, src1}); - } - case AMDGPU::BI__builtin_amdgcn_readfirstlane: { - mlir::Value src0 = emitScalarExpr(expr->getArg(0)); - return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()), - "amdgcn.readfirstlane", src0.getType(), - mlir::ValueRange{src0}); - } + case AMDGPU::BI__builtin_amdgcn_readlane: + return emitBuiltinWithOneOverloadedType<2>(expr, "amdgcn.readlane") + .getValue(); + case AMDGPU::BI__builtin_amdgcn_readfirstlane: + return emitBuiltinWithOneOverloadedType<1>(expr, "amdgcn.readfirstlane") + .getValue(); case AMDGPU::BI__builtin_amdgcn_wave_shuffle: { cgm.errorNYI(expr->getSourceRange(), std::string("unimplemented AMDGPU builtin call: ") + diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 9f2facd12f417..667d2292799fb 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1625,6 +1625,25 @@ class CIRGenFunction : public CIRGenTypeCache { void instantiateIndirectGotoBlock(); + /// Emit a simple LLVM intrinsic that takes N scalar arguments and whose + /// return type matches the type of the first argument. The intrinsic name is + /// used verbatim; any overload mangling (e.g. `.f32`, `.p1`) must be baked + /// into \p intrinName by the caller. + template + [[maybe_unused]] RValue + emitBuiltinWithOneOverloadedType(const CallExpr *e, + llvm::StringRef intrinName) { + static_assert(N, "expect non-empty argument"); + mlir::Type cirTy = convertType(e->getArg(0)->getType()); + SmallVector args; + for (unsigned i = 0; i < N; ++i) + args.push_back(emitScalarExpr(e->getArg(i))); + const auto call = cir::LLVMIntrinsicCallOp::create( + builder, getLoc(e->getExprLoc()), builder.getStringAttr(intrinName), + cirTy, args); + return RValue::get(call->getResult(0)); + } + RValue emitCall(const CIRGenFunctionInfo &funcInfo, const CIRGenCallee &callee, ReturnValueSlot returnValue, const CallArgList &args, cir::CIRCallOpInterface *callOp,