From 0b07a4aaeb2fa0f51c4700ff1047e6cda1b1543f Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Tue, 21 Oct 2025 03:26:03 -0700 Subject: [PATCH 1/2] [CIR] Upstream handling for __builtin_prefetch --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 33 +++++++++++++++++++ clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 21 ++++++++++++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 9 +++++ clang/test/CIR/CodeGen/builtin_prefetech.c | 20 +++++++++++ 4 files changed, 83 insertions(+) create mode 100644 clang/test/CIR/CodeGen/builtin_prefetech.c diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 919c194ed4453..92ca3c464f68a 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -4052,6 +4052,39 @@ def CIR_ExpectOp : CIR_Op<"expect", [ }]; } +//===----------------------------------------------------------------------===// +// PrefetchOp +//===----------------------------------------------------------------------===// + +def CIR_PrefetchOp : CIR_Op<"prefetch"> { + let summary = "prefetch operation"; + let description = [{ + The `cir.prefetch` op prefetches data from the memmory address. + + ```mlir + cir.prefetch(%0 : !cir.ptr) locality(1) write + ``` + + This opcode has the three attributes: + 1. The $locality is a temporal locality specifier + ranging from (0) - no locality, to (3) - extremely local keep in cache. + 2. The $isWrite is the specifier determining if the prefetch is prepaired + for a 'read' or 'write'. + If $isWrite doesn't specified it means that prefetch is prepared for 'read'. + }]; + + let arguments = (ins CIR_VoidPtrType:$addr, + ConfinedAttr, IntMaxValue<3>]>:$locality, + UnitAttr:$isWrite); + + let assemblyFormat = [{ + `(` $addr `:` qualified(type($addr)) `)` + `locality``(` $locality `)` + (`write` $isWrite^) : (`read`)? + attr-dict + }]; +} + //===----------------------------------------------------------------------===// // PtrDiffOp //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index ea31871806bd7..2571a402f9676 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -454,6 +454,27 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, assert(!cir::MissingFeatures::coroSizeBuiltinCall()); return getUndefRValue(e->getType()); } + case Builtin::BI__builtin_prefetch: { + auto evaluateOperandAsInt = [&](const Expr *arg) { + Expr::EvalResult res; + [[maybe_unused]] bool evalSucceed = + arg->EvaluateAsInt(res, cgm.getASTContext()); + assert(evalSucceed && "expression should be able to evaluate as int"); + return res.Val.getInt().getZExtValue(); + }; + + bool isWrite = false; + if (e->getNumArgs() > 1) + isWrite = evaluateOperandAsInt(e->getArg(1)); + + int locality = 0; + if (e->getNumArgs() > 2) + locality = evaluateOperandAsInt(e->getArg(2)); + + mlir::Value address = emitScalarExpr(e->getArg(0)); + cir::PrefetchOp::create(builder, loc, address, locality, isWrite); + return RValue::get(nullptr); + } } // If this is an alias for a lib function (e.g. __builtin_sin), emit diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index dc26dac3e349b..d999c9d9a6101 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1507,6 +1507,15 @@ static uint64_t getTypeSize(mlir::Type type, mlir::Operation &op) { return llvm::divideCeil(layout.getTypeSizeInBits(type), 8); } +mlir::LogicalResult CIRToLLVMPrefetchOpLowering::matchAndRewrite( + cir::PrefetchOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + rewriter.replaceOpWithNewOp( + op, adaptor.getAddr(), adaptor.getIsWrite(), adaptor.getLocality(), + /*DataCache*/ 1); + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMPtrDiffOpLowering::matchAndRewrite( cir::PtrDiffOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { diff --git a/clang/test/CIR/CodeGen/builtin_prefetech.c b/clang/test/CIR/CodeGen/builtin_prefetech.c new file mode 100644 index 0000000000000..343d9a808ad68 --- /dev/null +++ b/clang/test/CIR/CodeGen/builtin_prefetech.c @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s -check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM + +void foo(void *a) { + __builtin_prefetch(a, 1, 1); +} + +// CIR: cir.func dso_local @foo(%arg0: !cir.ptr loc({{.*}})) +// CIR: [[PTR_ALLOC:%.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["a", init] {alignment = 8 : i64} +// CIR: cir.store %arg0, [[PTR_ALLOC]] : !cir.ptr, !cir.ptr> +// CIR: [[PTR:%.*]] = cir.load{{.*}} [[PTR_ALLOC]] : !cir.ptr>, !cir.ptr +// CIR: cir.prefetch([[PTR]] : !cir.ptr) locality(1) write +// CIR: cir.return + +// LLVM: define dso_local void @foo(ptr [[ARG0:%.*]]) +// LLVM: [[PTR_ALLOC:%.*]] = alloca ptr, i64 1 +// LLVM: store ptr [[ARG0]], ptr [[PTR_ALLOC]] +// LLVM: [[PTR:%.*]] = load ptr, ptr [[PTR_ALLOC]] +// LLVM: call void @llvm.prefetch.p0(ptr [[PTR]], i32 1, i32 1, i32 1) +// LLVM: ret void From 645ffaa6ac0b4b95d3c87b351408452a6b0918fe Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Tue, 21 Oct 2025 18:45:04 -0700 Subject: [PATCH 2/2] Apply feedback --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 28 ++++++----- clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 2 +- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +- clang/test/CIR/CodeGen/builtin_prefetech.c | 49 ++++++++++++++----- 4 files changed, 54 insertions(+), 27 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 92ca3c464f68a..007f9e44255b1 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -4057,32 +4057,36 @@ def CIR_ExpectOp : CIR_Op<"expect", [ //===----------------------------------------------------------------------===// def CIR_PrefetchOp : CIR_Op<"prefetch"> { - let summary = "prefetch operation"; + let summary = "Prefetch operation"; let description = [{ - The `cir.prefetch` op prefetches data from the memmory address. + The `cir.prefetch` operation is a hint to the code generator to insert a + prefetch instruction if supported; otherwise, it is a noop. Prefetches + have no effect on the behavior of the program but can change its + performance characteristics. ```mlir cir.prefetch(%0 : !cir.ptr) locality(1) write ``` - This opcode has the three attributes: - 1. The $locality is a temporal locality specifier - ranging from (0) - no locality, to (3) - extremely local keep in cache. - 2. The $isWrite is the specifier determining if the prefetch is prepaired - for a 'read' or 'write'. - If $isWrite doesn't specified it means that prefetch is prepared for 'read'. + $locality is a temporal locality specifier ranging from (0) - no locality, + to (3) - extremely local, keep in cache. If $locality is not present, the + default value is 3. + + $isWrite specifies whether the prefetch is for a 'read' or 'write'. If + $isWrite is not specified, it means that prefetch is prepared for 'read'. }]; let arguments = (ins CIR_VoidPtrType:$addr, - ConfinedAttr, IntMaxValue<3>]>:$locality, + DefaultValuedAttr, IntMaxValue<3>]>, + "3">:$locality, UnitAttr:$isWrite); let assemblyFormat = [{ - `(` $addr `:` qualified(type($addr)) `)` - `locality``(` $locality `)` (`write` $isWrite^) : (`read`)? + `locality` `(` $locality `)` + $addr `:` qualified(type($addr)) attr-dict - }]; + }]; } //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index 2571a402f9676..ef5d56a63dcc6 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -467,7 +467,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, if (e->getNumArgs() > 1) isWrite = evaluateOperandAsInt(e->getArg(1)); - int locality = 0; + int locality = 3; if (e->getNumArgs() > 2) locality = evaluateOperandAsInt(e->getArg(2)); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index d999c9d9a6101..eb64570862513 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1512,7 +1512,7 @@ mlir::LogicalResult CIRToLLVMPrefetchOpLowering::matchAndRewrite( mlir::ConversionPatternRewriter &rewriter) const { rewriter.replaceOpWithNewOp( op, adaptor.getAddr(), adaptor.getIsWrite(), adaptor.getLocality(), - /*DataCache*/ 1); + /*DataCache=*/1); return mlir::success(); } diff --git a/clang/test/CIR/CodeGen/builtin_prefetech.c b/clang/test/CIR/CodeGen/builtin_prefetech.c index 343d9a808ad68..cfe85b9ba8104 100644 --- a/clang/test/CIR/CodeGen/builtin_prefetech.c +++ b/clang/test/CIR/CodeGen/builtin_prefetech.c @@ -1,20 +1,43 @@ // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s -check-prefix=CIR // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=OGCG void foo(void *a) { - __builtin_prefetch(a, 1, 1); + __builtin_prefetch(a); // rw=0, locality=3 + __builtin_prefetch(a, 0); // rw=0, locality=3 + __builtin_prefetch(a, 1); // rw=1, locality=3 + __builtin_prefetch(a, 1, 1); // rw=1, locality=1 } -// CIR: cir.func dso_local @foo(%arg0: !cir.ptr loc({{.*}})) -// CIR: [[PTR_ALLOC:%.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["a", init] {alignment = 8 : i64} -// CIR: cir.store %arg0, [[PTR_ALLOC]] : !cir.ptr, !cir.ptr> -// CIR: [[PTR:%.*]] = cir.load{{.*}} [[PTR_ALLOC]] : !cir.ptr>, !cir.ptr -// CIR: cir.prefetch([[PTR]] : !cir.ptr) locality(1) write -// CIR: cir.return +// CIR-LABEL: cir.func dso_local @foo( +// CIR: %[[ALLOCA:.*]] = cir.alloca !cir.ptr +// CIR: cir.store %arg0, %[[ALLOCA]] : !cir.ptr, !cir.ptr> +// CIR: %[[P1:.*]] = cir.load{{.*}} %[[ALLOCA]] : !cir.ptr>, !cir.ptr +// CIR: cir.prefetch read locality(3) %[[P1]] : !cir.ptr +// CIR: %[[P2:.*]] = cir.load{{.*}} %[[ALLOCA]] : !cir.ptr>, !cir.ptr +// CIR: cir.prefetch read locality(3) %[[P2]] : !cir.ptr +// CIR: %[[P3:.*]] = cir.load{{.*}} %[[ALLOCA]] : !cir.ptr>, !cir.ptr +// CIR: cir.prefetch write locality(3) %[[P3]] : !cir.ptr +// CIR: %[[P4:.*]] = cir.load{{.*}} %[[ALLOCA]] : !cir.ptr>, !cir.ptr +// CIR: cir.prefetch write locality(1) %[[P4]] : !cir.ptr +// CIR: cir.return -// LLVM: define dso_local void @foo(ptr [[ARG0:%.*]]) -// LLVM: [[PTR_ALLOC:%.*]] = alloca ptr, i64 1 -// LLVM: store ptr [[ARG0]], ptr [[PTR_ALLOC]] -// LLVM: [[PTR:%.*]] = load ptr, ptr [[PTR_ALLOC]] -// LLVM: call void @llvm.prefetch.p0(ptr [[PTR]], i32 1, i32 1, i32 1) -// LLVM: ret void +// LLVM-LABEL: define dso_local void @foo( +// LLVM: [[ALLOCA:%.*]] = alloca ptr, i64 1 +// LLVM: store ptr {{.*}}, ptr [[ALLOCA]] +// LLVM: [[LP1:%.*]] = load ptr, ptr [[ALLOCA]] +// LLVM: call void @llvm.prefetch.p0(ptr [[LP1]], i32 0, i32 3, i32 1) +// LLVM: [[LP2:%.*]] = load ptr, ptr [[ALLOCA]] +// LLVM: call void @llvm.prefetch.p0(ptr [[LP2]], i32 0, i32 3, i32 1) +// LLVM: [[LP3:%.*]] = load ptr, ptr [[ALLOCA]] +// LLVM: call void @llvm.prefetch.p0(ptr [[LP3]], i32 1, i32 3, i32 1) +// LLVM: [[LP4:%.*]] = load ptr, ptr [[ALLOCA]] +// LLVM: call void @llvm.prefetch.p0(ptr [[LP4]], i32 1, i32 1, i32 1) +// LLVM: ret void + +// OGCG-LABEL: define dso_local void @foo(ptr +// OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 1) +// OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 1) +// OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 1, i32 3, i32 1) +// OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 1, i32 1, i32 1) +// OGCG: ret void