diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index bae52d63fda45..4b6eb98bc3530 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -351,6 +351,36 @@ def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface, }]; } +def fir_PrefetchOp : fir_Op<"prefetch", []> { + let summary = "prefetch a memory reference"; + + let description = [{ + The prefetch is a hint to the code generator that the memory reference will + be used in the near future. The prefetch is not guaranteed to be executed. + + ``` + %a = ... -> !fir.ref + fir.prefetch %a {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref + // ... + fir.load %a : !fir.ref // use the prefetched value + ``` + }]; + + /// `memref' is the address to be prefetched + /// `rw' : rw specifier > + /// read is 0 (default), write is 1 + /// `localityHint': temporal locality specifier > + /// value ranging from 0 - no locality to 3 - extremely local + /// `cacheType' : cache type specifier > + /// instruction cache is 0 (default), data cache is 1 + /// NOTE: The numerical values used here is in reference to the LLVM LangRef + let arguments = (ins AnyReferenceLike:$memref, UnitAttr:$rw, + ConfinedAttr, IntMaxValue<3>]>:$localityHint, + UnitAttr:$cacheType); + + let assemblyFormat = "$memref attr-dict `:` type(operands)"; +} + def fir_CopyOp : fir_Op<"copy", [DeclareOpInterfaceMethods]> { let summary = "copy constant size memory"; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 5779bcd5d293c..d6062bbabf821 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -3276,7 +3276,28 @@ class FirConverter : public Fortran::lower::AbstractConverter { attachInliningDirectiveToStmt(dir, &eval); }, [&](const Fortran::parser::CompilerDirective::Prefetch &prefetch) { - TODO(getCurrentLocation(), "!$dir prefetch"); + for (const auto &p : prefetch.v) { + Fortran::evaluate::ExpressionAnalyzer ea{ + bridge.getSemanticsContext()}; + Fortran::lower::SomeExpr expr{*ea.Analyze( + std::get(p.value().u))}; + Fortran::lower::StatementContext stmtCtx; + mlir::Location loc = genLocation(dir.source); + mlir::Value memRef{Fortran::lower::convertExprToHLFIR( + loc, *this, expr, localSymbols, stmtCtx) + .getBase()}; + if (mlir::isa( + fir::unwrapRefType(memRef.getType()))) { + memRef = fir::LoadOp::create(*builder, loc, memRef); + memRef = fir::BoxAddrOp::create(*builder, loc, memRef); + } + + // TODO: Don't use default value, instead get the following + // info from the directive + uint32_t isWrite{0}, localityHint{3}, isData{1}; + fir::PrefetchOp::create(*builder, loc, memRef, isWrite, + localityHint, isData); + } }, [&](const auto &) {}}, dir.u); diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index ca4aefb653d2a..8a1fe5fc5d988 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -3346,6 +3346,26 @@ struct GlobalOpConversion : public fir::FIROpConversion { } }; +/// `fir.prefetch` --> `llvm.prefetch` +struct PrefetchOpConversion : public fir::FIROpConversion { + using FIROpConversion::FIROpConversion; + + llvm::LogicalResult + matchAndRewrite(fir::PrefetchOp prefetch, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + mlir::IntegerAttr rw = mlir::IntegerAttr::get(rewriter.getI32Type(), + prefetch.getRwAttr() ? 1 : 0); + mlir::IntegerAttr localityHint = prefetch.getLocalityHintAttr(); + mlir::IntegerAttr cacheType = mlir::IntegerAttr::get( + rewriter.getI32Type(), prefetch.getCacheTypeAttr() ? 1 : 0); + mlir::LLVM::Prefetch::create(rewriter, prefetch.getLoc(), + adaptor.getOperands().front(), rw, + localityHint, cacheType); + rewriter.eraseOp(prefetch); + return mlir::success(); + } +}; + /// `fir.load` --> `llvm.load` struct LoadOpConversion : public fir::FIROpConversion { using FIROpConversion::FIROpConversion; @@ -4423,14 +4443,15 @@ void fir::populateFIRToLLVMConversionPatterns( FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion, GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion, LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion, - NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion, - SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion, - ShapeOpConversion, ShapeShiftOpConversion, ShiftOpConversion, - SliceOpConversion, StoreOpConversion, StringLitOpConversion, - SubcOpConversion, TypeDescOpConversion, TypeInfoOpConversion, - UnboxCharOpConversion, UnboxProcOpConversion, UndefOpConversion, - UnreachableOpConversion, XArrayCoorOpConversion, XEmboxOpConversion, - XReboxOpConversion, ZeroOpConversion>(converter, options); + NegcOpConversion, NoReassocOpConversion, PrefetchOpConversion, + SelectCaseOpConversion, SelectOpConversion, SelectRankOpConversion, + SelectTypeOpConversion, ShapeOpConversion, ShapeShiftOpConversion, + ShiftOpConversion, SliceOpConversion, StoreOpConversion, + StringLitOpConversion, SubcOpConversion, TypeDescOpConversion, + TypeInfoOpConversion, UnboxCharOpConversion, UnboxProcOpConversion, + UndefOpConversion, UnreachableOpConversion, XArrayCoorOpConversion, + XEmboxOpConversion, XReboxOpConversion, ZeroOpConversion>(converter, + options); // Patterns that are populated without a type converter do not trigger // target materializations for the operands of the root op. diff --git a/flang/test/Integration/prefetch.f90 b/flang/test/Integration/prefetch.f90 new file mode 100644 index 0000000000000..f3fb7a950e328 --- /dev/null +++ b/flang/test/Integration/prefetch.f90 @@ -0,0 +1,39 @@ +!===----------------------------------------------------------------------===! +! This directory can be used to add Integration tests involving multiple +! stages of the compiler (for eg. from Fortran to LLVM IR). It should not +! contain executable tests. We should only add tests here sparingly and only +! if there is no other way to test. Repeat this message in each test that is +! added to this directory and sub-directories. +!===----------------------------------------------------------------------===! + +! RUN: %flang_fc1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=LLVM + +!=============================================================================== +! Test lowering of prefetch directive +!=============================================================================== + +subroutine test_prefetch_01() + ! LLVM: {{.*}} = alloca i32, i64 1, align 4 + ! LLVM: %[[VAR_J:.*]] = alloca i32, i64 1, align 4 + ! LLVM: %[[VAR_I:.*]] = alloca i32, i64 1, align 4 + ! LLVM: %[[VAR_A:.*]] = alloca [256 x i32], i64 1, align 4 + + integer :: i, j + integer :: a(256) + + a = 23 + ! LLVM: call void @llvm.prefetch.p0(ptr %[[VAR_A]], i32 0, i32 3, i32 1) + !dir$ prefetch a + i = sum(a) + + ! LLVM: %[[LOAD_I:.*]] = load i32, ptr %[[VAR_I]], align 4 + ! LLVM: %{{.*}} = add nsw i32 %[[LOAD_I]], 64 + ! LLVM: %[[GEP_A:.*]] = getelementptr i32, ptr %[[VAR_A]], i64 {{.*}} + + ! LLVM: call void @llvm.prefetch.p0(ptr %[[GEP_A]], i32 0, i32 3, i32 1) + ! LLVM: call void @llvm.prefetch.p0(ptr %[[VAR_J]], i32 0, i32 3, i32 1) + do i = 1, (256 - 64) + !dir$ prefetch a(i+64), j + a(i) = a(i-32) + a(i+32) + j + end do +end subroutine test_prefetch_01 diff --git a/flang/test/Lower/HLFIR/prefetch.f90 b/flang/test/Lower/HLFIR/prefetch.f90 new file mode 100644 index 0000000000000..2a30584d94563 --- /dev/null +++ b/flang/test/Lower/HLFIR/prefetch.f90 @@ -0,0 +1,65 @@ +! Test lowering of prefetch directive +! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s --check-prefixes=HLFIR + +module test_prefetch_mod + implicit none + type :: t + integer :: a(256, 256) + end type t +end module test_prefetch_mod + +subroutine test_prefetch_01() + ! HLFIR: %[[H_A:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ea"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + ! HLFIR: %[[H_I:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ! HLFIR: %[[H_J:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) + + integer :: i, j + integer :: a(256) + + a = 23 + + ! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType, localityHint = 3 : i32} : !fir.ref> + !dir$ prefetch a + i = sum(a) + + ! HLFIR: %[[H_LOAD:.*]] = fir.load %[[H_I]]#0 : !fir.ref + ! HLFIR: %[[H_C64:.*]] = arith.constant 64 : i32 + ! HLFIR: %[[H_ADD:.*]] = arith.addi %[[H_LOAD]], %[[H_C64]] overflow : i32 + ! HLFIR: %[[H_CON:.*]] = fir.convert %[[H_ADD]] : (i32) -> i64 + ! HLFIR: %[[H_DESIG:.*]] = hlfir.designate %[[H_A]]#0 (%[[H_CON]]) : (!fir.ref>, i64) -> !fir.ref + + ! HLFIR: fir.prefetch %[[H_DESIG]] {cacheType, localityHint = 3 : i32} : !fir.ref + ! HLFIR: fir.prefetch %[[H_J]]#0 {cacheType, localityHint = 3 : i32} : !fir.ref + + do i = 1, (256 - 64) + !dir$ prefetch a(i+64), j + a(i) = a(i-32) + a(i+32) + j + end do +end subroutine test_prefetch_01 + +subroutine test_prefetch_02(t1) + use test_prefetch_mod + ! HLFIR: %[[H_A:.*]]:2 = hlfir.declare {{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_prefetch_02Ea"} + ! HLFIR: %[[H_ARG0:.*]]:2 = hlfir.declare {{.*}} dummy_scope {{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_prefetch_02Et1"} + type(t), intent(inout) :: t1 + integer, allocatable :: a(:, :) + + ! HLFIR: %[[H_DESIG_01:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} shape {{.*}} + ! HLFIR: fir.prefetch %[[H_DESIG_01]] {cacheType, localityHint = 3 : i32} : !fir.ref> + !dir$ prefetch t1%a + a = t1%a ** 2 + + do i = 1, 256 + ! HLFIR: %[[A_LOAD:.*]] = fir.load %[[H_A]]#0 : !fir.ref>>> + ! HLFIR: %[[A_BOX:.*]] = fir.box_addr %[[A_LOAD]] : (!fir.box>>) -> !fir.heap> + ! HLFIR: fir.prefetch %[[A_BOX]] {cacheType, localityHint = 3 : i32} : !fir.heap> + !dir$ prefetch a + a(i, :) = a(i, :) + i + do j = 1, 256 + ! HLFIR: %[[H_DESIG_02:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} {{.*}} + ! HLFIR: fir.prefetch %[[H_DESIG_02]] {cacheType, localityHint = 3 : i32} : !fir.ref + !dir$ prefetch t1%a(i, j) + t1%a(i, j) = (a(i, j) + i*j) / t1%a(i, j) + end do + end do +end subroutine test_prefetch_02