Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions flang-rt/lib/cuda/descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,6 @@ void RTDEF(CUFDescriptorCheckSection)(
}
}

void RTDEF(CUFSetAllocatorIndex)(
Descriptor *desc, int index, const char *sourceFile, int sourceLine) {
if (!desc) {
Terminator terminator{sourceFile, sourceLine};
terminator.Crash("descriptor is null");
}
desc->SetAllocIdx(index);
}

RT_EXT_API_GROUP_END
}
} // namespace Fortran::runtime::cuda
11 changes: 0 additions & 11 deletions flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,3 @@ TEST(AllocatableCUFTest, DescriptorAllocationTest) {
EXPECT_TRUE(desc != nullptr);
RTNAME(CUFFreeDescriptor)(desc);
}

TEST(AllocatableCUFTest, CUFSetAllocatorIndex) {
using Fortran::common::TypeCategory;
RTNAME(CUFRegisterAllocator)();
// REAL(4), DEVICE, ALLOCATABLE :: a(:)
auto a{createAllocatable(TypeCategory::Real, 4)};
EXPECT_EQ((int)kDefaultAllocator, a->GetAllocIdx());
RTNAME(CUFSetAllocatorIndex)(
a.get(), kDeviceAllocatorPos, __FILE__, __LINE__);
EXPECT_EQ((int)kDeviceAllocatorPos, a->GetAllocIdx());
}
4 changes: 0 additions & 4 deletions flang/include/flang/Lower/CUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,6 @@ static inline unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
return kDefaultAllocator;
}

void initializeDeviceComponentAllocator(
Fortran::lower::AbstractConverter &converter,
const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box);

mlir::Type gatherDeviceComponentCoordinatesAndType(
fir::FirOpBuilder &builder, mlir::Location loc,
const Fortran::semantics::Symbol &sym, fir::RecordType recTy,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@ void genSyncGlobalDescriptor(fir::FirOpBuilder &builder, mlir::Location loc,
void genDescriptorCheckSection(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value desc);

/// Generate runtime call to set the allocator index in the descriptor.
void genSetAllocatorIndex(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value desc, mlir::Value index);

} // namespace fir::runtime::cuda

#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_CUDA_DESCRIPTOR_H_
21 changes: 0 additions & 21 deletions flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -388,25 +388,4 @@ def cuf_StreamCastOp : cuf_Op<"stream_cast", [NoMemoryEffect]> {
let hasVerifier = 1;
}

def cuf_SetAllocatorIndexOp : cuf_Op<"set_allocator_idx", []> {
let summary = "Set the allocator index in a descriptor";

let description = [{
Allocator index in the Fortran descriptor is used to retrived the correct
CUDA allocator to allocate the memory on the device.
In many cases the allocator index is set when the descriptor is created. For
device components, the descriptor is part of the derived-type itself and
needs to be set after the derived-type is allocated in managed memory.
}];

let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
cuf_DataAttributeAttr:$data_attr);

let assemblyFormat = [{
$box `:` qualified(type($box)) attr-dict
}];

let hasVerifier = 1;
}

#endif // FORTRAN_DIALECT_CUF_CUF_OPS
4 changes: 0 additions & 4 deletions flang/include/flang/Runtime/CUDA/descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,6 @@ void RTDECL(CUFSyncGlobalDescriptor)(
void RTDECL(CUFDescriptorCheckSection)(
const Descriptor *, const char *sourceFile = nullptr, int sourceLine = 0);

/// Set the allocator index with the provided value.
void RTDECL(CUFSetAllocatorIndex)(Descriptor *, int index,
const char *sourceFile = nullptr, int sourceLine = 0);

} // extern "C"

} // namespace Fortran::runtime::cuda
Expand Down
3 changes: 0 additions & 3 deletions flang/lib/Lower/Allocatable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,9 +450,6 @@ class AllocateStmtHelper {
if (alloc.getSymbol().test(Fortran::semantics::Symbol::Flag::AccDeclare))
Fortran::lower::attachDeclarePostAllocAction(converter, builder,
alloc.getSymbol());
if (Fortran::semantics::HasCUDAComponent(alloc.getSymbol()))
Fortran::lower::initializeDeviceComponentAllocator(
converter, alloc.getSymbol(), box);
}

void setPinnedToFalse() {
Expand Down
89 changes: 0 additions & 89 deletions flang/lib/Lower/CUDA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,95 +17,6 @@

#define DEBUG_TYPE "flang-lower-cuda"

void Fortran::lower::initializeDeviceComponentAllocator(
Fortran::lower::AbstractConverter &converter,
const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box) {
if (const auto *details{
sym.GetUltimate()
.detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
const Fortran::semantics::DeclTypeSpec *type{details->type()};
const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived()
: nullptr};
if (derived) {
if (!FindCUDADeviceAllocatableUltimateComponent(*derived))
return; // No device components.

fir::FirOpBuilder &builder = converter.getFirOpBuilder();
mlir::Location loc = converter.getCurrentLocation();

mlir::Type baseTy = fir::unwrapRefType(box.getAddr().getType());

// Only pointer and allocatable needs post allocation initialization
// of components descriptors.
if (!fir::isAllocatableType(baseTy) && !fir::isPointerType(baseTy))
return;

// Extract the derived type.
mlir::Type ty = fir::getDerivedType(baseTy);
auto recTy = mlir::dyn_cast<fir::RecordType>(ty);
assert(recTy && "expected fir::RecordType");

if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(baseTy))
baseTy = boxTy.getEleTy();
baseTy = fir::unwrapRefType(baseTy);

Fortran::semantics::UltimateComponentIterator components{*derived};
mlir::Value loadedBox = fir::LoadOp::create(builder, loc, box.getAddr());
mlir::Value addr;
if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(baseTy)) {
mlir::Type idxTy = builder.getIndexType();
mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
llvm::SmallVector<fir::DoLoopOp> loops;
llvm::SmallVector<mlir::Value> indices;
llvm::SmallVector<mlir::Value> extents;
for (unsigned i = 0; i < seqTy.getDimension(); ++i) {
mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
auto dimInfo = fir::BoxDimsOp::create(builder, loc, idxTy, idxTy,
idxTy, loadedBox, dim);
mlir::Value lbub = mlir::arith::AddIOp::create(
builder, loc, dimInfo.getResult(0), dimInfo.getResult(1));
mlir::Value ext =
mlir::arith::SubIOp::create(builder, loc, lbub, one);
mlir::Value cmp = mlir::arith::CmpIOp::create(
builder, loc, mlir::arith::CmpIPredicate::sgt, ext, zero);
ext = mlir::arith::SelectOp::create(builder, loc, cmp, ext, zero);
extents.push_back(ext);

auto loop = fir::DoLoopOp::create(
builder, loc, dimInfo.getResult(0), dimInfo.getResult(1),
dimInfo.getResult(2), /*isUnordered=*/true,
/*finalCount=*/false, mlir::ValueRange{});
loops.push_back(loop);
indices.push_back(loop.getInductionVar());
builder.setInsertionPointToStart(loop.getBody());
}
mlir::Value boxAddr = fir::BoxAddrOp::create(builder, loc, loadedBox);
auto shape = fir::ShapeOp::create(builder, loc, extents);
addr = fir::ArrayCoorOp::create(
builder, loc, fir::ReferenceType::get(recTy), boxAddr, shape,
/*slice=*/mlir::Value{}, indices, /*typeparms=*/mlir::ValueRange{});
} else {
addr = fir::BoxAddrOp::create(builder, loc, loadedBox);
}
for (const auto &compSym : components) {
if (Fortran::semantics::IsDeviceAllocatable(compSym)) {
llvm::SmallVector<mlir::Value> coord;
mlir::Type fieldTy = gatherDeviceComponentCoordinatesAndType(
builder, loc, compSym, recTy, coord);
assert(coord.size() == 1 && "expect one coordinate");
mlir::Value comp = fir::CoordinateOp::create(
builder, loc, builder.getRefType(fieldTy), addr, coord[0]);
cuf::DataAttributeAttr dataAttr =
Fortran::lower::translateSymbolCUFDataAttribute(
builder.getContext(), compSym);
cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr);
}
}
}
}
}

mlir::Type Fortran::lower::gatherDeviceComponentCoordinatesAndType(
fir::FirOpBuilder &builder, mlir::Location loc,
const Fortran::semantics::Symbol &sym, fir::RecordType recTy,
Expand Down
62 changes: 0 additions & 62 deletions flang/lib/Lower/ConvertVariable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -786,62 +786,6 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter,
return res;
}

/// Device allocatable components in a derived-type don't have the correct
/// allocator index in their descriptor when they are created. After
/// initialization, cuf.set_allocator_idx operations are inserted to set the
/// correct allocator index for each device component.
static void
initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter,
const Fortran::semantics::Symbol &symbol,
Fortran::lower::SymMap &symMap) {
if (const auto *details{
symbol.GetUltimate()
.detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
const Fortran::semantics::DeclTypeSpec *type{details->type()};
const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived()
: nullptr};
if (derived) {
if (!FindCUDADeviceAllocatableUltimateComponent(*derived))
return; // No device components.

fir::FirOpBuilder &builder = converter.getFirOpBuilder();
mlir::Location loc = converter.getCurrentLocation();

fir::ExtendedValue exv =
converter.getSymbolExtendedValue(symbol.GetUltimate(), &symMap);
mlir::Type baseTy = fir::unwrapRefType(fir::getBase(exv).getType());
if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(baseTy))
baseTy = boxTy.getEleTy();
baseTy = fir::unwrapRefType(baseTy);

if (fir::isAllocatableType(fir::getBase(exv).getType()) ||
fir::isPointerType(fir::getBase(exv).getType()))
return; // Allocator index need to be set after allocation.

auto recTy =
mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy));
assert(recTy && "expected fir::RecordType");

Fortran::semantics::UltimateComponentIterator components{*derived};
for (const auto &sym : components) {
if (Fortran::semantics::IsDeviceAllocatable(sym)) {
llvm::SmallVector<mlir::Value> coord;
mlir::Type fieldTy =
Fortran::lower::gatherDeviceComponentCoordinatesAndType(
builder, loc, sym, recTy, coord);
mlir::Value base = fir::getBase(exv);
mlir::Value comp = fir::CoordinateOp::create(
builder, loc, builder.getRefType(fieldTy), base, coord);
cuf::DataAttributeAttr dataAttr =
Fortran::lower::translateSymbolCUFDataAttribute(
builder.getContext(), sym);
cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr);
}
}
}
}
}

/// Must \p var be default initialized at runtime when entering its scope.
static bool
mustBeDefaultInitializedAtRuntime(const Fortran::lower::pft::Variable &var) {
Expand Down Expand Up @@ -1164,9 +1108,6 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter,
if (mustBeDefaultInitializedAtRuntime(var))
Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(),
symMap);
if (converter.getFoldingContext().languageFeatures().IsEnabled(
Fortran::common::LanguageFeature::CUDA))
initializeDeviceComponentAllocator(converter, var.getSymbol(), symMap);
auto *builder = &converter.getFirOpBuilder();
if (needCUDAAlloc(var.getSymbol()) &&
!cuf::isCUDADeviceContext(builder->getRegion())) {
Expand Down Expand Up @@ -1426,9 +1367,6 @@ static void instantiateAlias(Fortran::lower::AbstractConverter &converter,
if (mustBeDefaultInitializedAtRuntime(var))
Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(),
symMap);
if (converter.getFoldingContext().languageFeatures().IsEnabled(
Fortran::common::LanguageFeature::CUDA))
initializeDeviceComponentAllocator(converter, var.getSymbol(), symMap);
}

//===--------------------------------------------------------------===//
Expand Down
15 changes: 0 additions & 15 deletions flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,3 @@ void fir::runtime::cuda::genDescriptorCheckSection(fir::FirOpBuilder &builder,
builder, loc, fTy, desc, sourceFile, sourceLine)};
fir::CallOp::create(builder, loc, func, args);
}

void fir::runtime::cuda::genSetAllocatorIndex(fir::FirOpBuilder &builder,
mlir::Location loc,
mlir::Value desc,
mlir::Value index) {
mlir::func::FuncOp func =
fir::runtime::getRuntimeFunc<mkRTKey(CUFSetAllocatorIndex)>(loc, builder);
auto fTy = func.getFunctionType();
mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
mlir::Value sourceLine =
fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
builder, loc, fTy, desc, index, sourceFile, sourceLine)};
fir::CallOp::create(builder, loc, func, args);
}
11 changes: 0 additions & 11 deletions flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,17 +345,6 @@ llvm::LogicalResult cuf::StreamCastOp::verify() {
return checkStreamType(*this);
}

//===----------------------------------------------------------------------===//
// SetAllocatorOp
//===----------------------------------------------------------------------===//

llvm::LogicalResult cuf::SetAllocatorIndexOp::verify() {
if (!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getBox().getType())))
return emitOpError(
"expect box to be a reference to class or box type value");
return mlir::success();
}

// Tablegen operators

#define GET_OP_CLASSES
Expand Down
32 changes: 2 additions & 30 deletions flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -928,34 +928,6 @@ struct CUFSyncDescriptorOpConversion
}
};

struct CUFSetAllocatorIndexOpConversion
: public mlir::OpRewritePattern<cuf::SetAllocatorIndexOp> {
using OpRewritePattern::OpRewritePattern;

mlir::LogicalResult
matchAndRewrite(cuf::SetAllocatorIndexOp op,
mlir::PatternRewriter &rewriter) const override {
auto mod = op->getParentOfType<mlir::ModuleOp>();
fir::FirOpBuilder builder(rewriter, mod);
mlir::Location loc = op.getLoc();
int idx = kDefaultAllocator;
if (op.getDataAttr() == cuf::DataAttribute::Device) {
idx = kDeviceAllocatorPos;
} else if (op.getDataAttr() == cuf::DataAttribute::Managed) {
idx = kManagedAllocatorPos;
} else if (op.getDataAttr() == cuf::DataAttribute::Unified) {
idx = kUnifiedAllocatorPos;
} else if (op.getDataAttr() == cuf::DataAttribute::Pinned) {
idx = kPinnedAllocatorPos;
}
mlir::Value index =
builder.createIntegerConstant(loc, builder.getI32Type(), idx);
fir::runtime::cuda::genSetAllocatorIndex(builder, loc, op.getBox(), index);
op.erase();
return mlir::success();
}
};

class CUFOpConversion : public fir::impl::CUFOpConversionBase<CUFOpConversion> {
public:
void runOnOperation() override {
Expand Down Expand Up @@ -1017,8 +989,8 @@ void cuf::populateCUFToFIRConversionPatterns(
const mlir::SymbolTable &symtab, mlir::RewritePatternSet &patterns) {
patterns.insert<CUFAllocOpConversion>(patterns.getContext(), &dl, &converter);
patterns.insert<CUFAllocateOpConversion, CUFDeallocateOpConversion,
CUFFreeOpConversion, CUFSyncDescriptorOpConversion,
CUFSetAllocatorIndexOpConversion>(patterns.getContext());
CUFFreeOpConversion, CUFSyncDescriptorOpConversion>(
patterns.getContext());
patterns.insert<CUFDataTransferOpConversion>(patterns.getContext(), symtab,
&dl, &converter);
patterns.insert<CUFLaunchOpConversion, CUFDeviceAddressOpConversion>(
Expand Down
15 changes: 0 additions & 15 deletions flang/test/Fir/CUDA/cuda-alloc-free.fir
Original file line number Diff line number Diff line change
Expand Up @@ -94,19 +94,4 @@ func.func @_QQalloc_char() attributes {fir.bindc_name = "alloc_char"} {
// CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64
// CHECK: fir.call @_FortranACUFMemAlloc(%[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda<device>} : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>


func.func @_QQsetalloc() {
%0 = cuf.alloc !fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QFEd1"} -> !fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>
%1 = fir.coordinate_of %0, a2 : (!fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
cuf.set_allocator_idx %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>}
return
}

// CHECK-LABEL: func.func @_QQsetalloc() {
// CHECK: %[[DT:.*]] = fir.call @_FortranACUFMemAlloc
// CHECK: %[[CONV:.*]] = fir.convert %[[DT]] : (!fir.llvm_ptr<i8>) -> !fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>
// CHECK: %[[COMP:.*]] = fir.coordinate_of %[[CONV]], a2 : (!fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
// CHECK: %[[DESC:.*]] = fir.convert %[[COMP]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: fir.call @_FortranACUFSetAllocatorIndex(%[[DESC]], %c2{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> ()

} // end module
Loading