diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 86d09d72fe6ca..2b361ed0982c6 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -4052,6 +4052,43 @@ def CIR_ExpectOp : CIR_Op<"expect", [ }]; } +//===----------------------------------------------------------------------===// +// PrefetchOp +//===----------------------------------------------------------------------===// + +def CIR_PrefetchOp : CIR_Op<"prefetch"> { + let summary = "Prefetch operation"; + let description = [{ + The `cir.prefetch` operation is a hint to the code generator to insert a + prefetch instruction if supported; otherwise, it is a noop. Prefetches + have no effect on the behavior of the program but can change its + performance characteristics. + + ```mlir + cir.prefetch(%0 : !cir.ptr) locality(1) write + ``` + + $locality is a temporal locality specifier ranging from (0) - no locality, + to (3) - extremely local, keep in cache. If $locality is not present, the + default value is 3. + + $isWrite specifies whether the prefetch is for a 'read' or 'write'. If + $isWrite is not specified, it means that prefetch is prepared for 'read'. + }]; + + let arguments = (ins CIR_VoidPtrType:$addr, + DefaultValuedAttr, IntMaxValue<3>]>, + "3">:$locality, + UnitAttr:$isWrite); + + let assemblyFormat = [{ + (`write` $isWrite^) : (`read`)? + `locality` `(` $locality `)` + $addr `:` qualified(type($addr)) + attr-dict + }]; +} + //===----------------------------------------------------------------------===// // PtrDiffOp //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index 27c4d11fa233a..62fa04e15c717 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -454,6 +454,27 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, assert(!cir::MissingFeatures::coroSizeBuiltinCall()); return getUndefRValue(e->getType()); } + case Builtin::BI__builtin_prefetch: { + auto evaluateOperandAsInt = [&](const Expr *arg) { + Expr::EvalResult res; + [[maybe_unused]] bool evalSucceed = + arg->EvaluateAsInt(res, cgm.getASTContext()); + assert(evalSucceed && "expression should be able to evaluate as int"); + return res.Val.getInt().getZExtValue(); + }; + + bool isWrite = false; + if (e->getNumArgs() > 1) + isWrite = evaluateOperandAsInt(e->getArg(1)); + + int locality = 3; + if (e->getNumArgs() > 2) + locality = evaluateOperandAsInt(e->getArg(2)); + + mlir::Value address = emitScalarExpr(e->getArg(0)); + cir::PrefetchOp::create(builder, loc, address, locality, isWrite); + return RValue::get(nullptr); + } } // If this is an alias for a lib function (e.g. __builtin_sin), emit diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index a30ae02c895c2..5a6193fa8d840 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1695,6 +1695,15 @@ static uint64_t getTypeSize(mlir::Type type, mlir::Operation &op) { return llvm::divideCeil(layout.getTypeSizeInBits(type), 8); } +mlir::LogicalResult CIRToLLVMPrefetchOpLowering::matchAndRewrite( + cir::PrefetchOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + rewriter.replaceOpWithNewOp( + op, adaptor.getAddr(), adaptor.getIsWrite(), adaptor.getLocality(), + /*DataCache=*/1); + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMPtrDiffOpLowering::matchAndRewrite( cir::PtrDiffOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { diff --git a/clang/test/CIR/CodeGen/builtin_prefetech.c b/clang/test/CIR/CodeGen/builtin_prefetech.c new file mode 100644 index 0000000000000..cfe85b9ba8104 --- /dev/null +++ b/clang/test/CIR/CodeGen/builtin_prefetech.c @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s -check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=OGCG + +void foo(void *a) { + __builtin_prefetch(a); // rw=0, locality=3 + __builtin_prefetch(a, 0); // rw=0, locality=3 + __builtin_prefetch(a, 1); // rw=1, locality=3 + __builtin_prefetch(a, 1, 1); // rw=1, locality=1 +} + +// CIR-LABEL: cir.func dso_local @foo( +// CIR: %[[ALLOCA:.*]] = cir.alloca !cir.ptr +// CIR: cir.store %arg0, %[[ALLOCA]] : !cir.ptr, !cir.ptr> +// CIR: %[[P1:.*]] = cir.load{{.*}} %[[ALLOCA]] : !cir.ptr>, !cir.ptr +// CIR: cir.prefetch read locality(3) %[[P1]] : !cir.ptr +// CIR: %[[P2:.*]] = cir.load{{.*}} %[[ALLOCA]] : !cir.ptr>, !cir.ptr +// CIR: cir.prefetch read locality(3) %[[P2]] : !cir.ptr +// CIR: %[[P3:.*]] = cir.load{{.*}} %[[ALLOCA]] : !cir.ptr>, !cir.ptr +// CIR: cir.prefetch write locality(3) %[[P3]] : !cir.ptr +// CIR: %[[P4:.*]] = cir.load{{.*}} %[[ALLOCA]] : !cir.ptr>, !cir.ptr +// CIR: cir.prefetch write locality(1) %[[P4]] : !cir.ptr +// CIR: cir.return + +// LLVM-LABEL: define dso_local void @foo( +// LLVM: [[ALLOCA:%.*]] = alloca ptr, i64 1 +// LLVM: store ptr {{.*}}, ptr [[ALLOCA]] +// LLVM: [[LP1:%.*]] = load ptr, ptr [[ALLOCA]] +// LLVM: call void @llvm.prefetch.p0(ptr [[LP1]], i32 0, i32 3, i32 1) +// LLVM: [[LP2:%.*]] = load ptr, ptr [[ALLOCA]] +// LLVM: call void @llvm.prefetch.p0(ptr [[LP2]], i32 0, i32 3, i32 1) +// LLVM: [[LP3:%.*]] = load ptr, ptr [[ALLOCA]] +// LLVM: call void @llvm.prefetch.p0(ptr [[LP3]], i32 1, i32 3, i32 1) +// LLVM: [[LP4:%.*]] = load ptr, ptr [[ALLOCA]] +// LLVM: call void @llvm.prefetch.p0(ptr [[LP4]], i32 1, i32 1, i32 1) +// LLVM: ret void + +// OGCG-LABEL: define dso_local void @foo(ptr +// OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 1) +// OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 1) +// OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 1, i32 3, i32 1) +// OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 1, i32 1, i32 1) +// OGCG: ret void