diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 6e53d801a0d2f..f234fcee388e3 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -33,6 +33,7 @@ #include "llvm/Transforms/Utils/ModuleUtils.h" #include +#include #include #include @@ -1750,9 +1751,9 @@ void collectMapDataFromMapOperands(MapInfoData &mapData, mapData.BaseType.push_back( moduleTranslation.convertType(mapOp.getVarType())); - mapData.Sizes.push_back(getSizeInBytes( - dl, mapOp.getVarType(), mapOp, mapData.BasePointers.back(), - mapData.BaseType.back(), builder, moduleTranslation)); + mapData.Sizes.push_back( + getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(), + mapData.BaseType.back(), builder, moduleTranslation)); mapData.MapClause.push_back(mapOp.getOperation()); mapData.Types.push_back( llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType().value())); @@ -1783,6 +1784,134 @@ void collectMapDataFromMapOperands(MapInfoData &mapData, } } +static int getMapDataMemberIdx(MapInfoData &mapData, + mlir::omp::MapInfoOp memberOp) { + auto *res = llvm::find(mapData.MapClause, memberOp); + assert(res != mapData.MapClause.end()); + return std::distance(mapData.MapClause.begin(), res); +} + +static mlir::omp::MapInfoOp +getFirstOrLastMappedMemberPtr(mlir::omp::MapInfoOp mapInfo, bool first) { + // Only 1 member has been mapped, we can return it. + if (mapInfo.getMembersIndex()->size() == 1) + if (auto mapOp = mlir::dyn_cast( + mapInfo.getMembers()[0].getDefiningOp())) + return mapOp; + + std::vector indices(mapInfo.getMembersIndexAttr().size()); + std::iota(indices.begin(), indices.end(), 0); + llvm::sort(indices.begin(), indices.end(), + [&](const size_t a, const size_t b) { + return mapInfo.getMembersIndexAttr()[a] + .cast() + .getInt() < mapInfo.getMembersIndexAttr()[b] + .cast() + .getInt(); + }); + + if (auto mapOp = mlir::dyn_cast( + mapInfo.getMembers()[((first) ? indices.front() : indices.back())] + .getDefiningOp())) + return mapOp; + + assert(false && "getFirstOrLastMappedMemberPtr could not find approproaite " + "map information"); +} + +/// This function calculates the array/pointer offset for map data provided +/// with bounds operations, e.g. when provided something like the following: +/// +/// Fortran +/// map(tofrom: array(2:5, 3:2)) +/// or +/// C++ +/// map(tofrom: array[1:4][2:3]) +/// We must calculate the initial pointer offset to pass across, this function +/// performs this using bounds. +/// +/// NOTE: which while specified in row-major order it currently needs to be +/// flipped for Fortran's column order array allocation and access (as +/// opposed to C++'s row-major, hence the backwards processing where order is +/// important). This is likely important to keep in mind for the future when +/// we incorporate a C++ frontend, both frontends will need to agree on the +/// ordering of generated bounds operations (one may have to flip them) to +/// make the below lowering frontend agnostic. The offload size +/// calcualtion may also have to be adjusted for C++. +std::vector +calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, + llvm::IRBuilderBase &builder, bool isArrayTy, + mlir::OperandRange bounds) { + std::vector idx; + // There's no bounds to calculate an offset from, we can safely + // ignore and return no indices. + if (bounds.empty()) + return idx; + + // If we have an array type, then we have its type so can treat it as a + // normal GEP instruction where the bounds operations are simply indexes + // into the array. We currently do reverse order of the bounds, which + // I believe leans more towards Fortran's column-major in memory. + if (isArrayTy) { + idx.push_back(builder.getInt64(0)); + for (int i = bounds.size() - 1; i >= 0; --i) { + if (auto boundOp = mlir::dyn_cast_if_present( + bounds[i].getDefiningOp())) { + idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound())); + } + } + } else { + // If we do not have an array type, but we have bounds, then we're dealing + // with a pointer that's being treated like an array and we have the + // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base + // address (pointer pointing to the actual data) so we must caclulate the + // offset using a single index which the following two loops attempts to + // compute. + + // Calculates the size offset we need to make per row e.g. first row or + // column only needs to be offset by one, but the next would have to be + // the previous row/column offset multiplied by the extent of current row. + // + // For example ([1][10][100]): + // + // - First row/column we move by 1 for each index increment + // - Second row/column we move by 1 (first row/column) * 10 (extent/size of + // current) for 10 for each index increment + // - Third row/column we would move by 10 (second row/column) * + // (extent/size of current) 100 for 1000 for each index increment + std::vector dimensionIndexSizeOffset{builder.getInt64(1)}; + for (size_t i = 1; i < bounds.size(); ++i) { + if (auto boundOp = mlir::dyn_cast_if_present( + bounds[i].getDefiningOp())) { + dimensionIndexSizeOffset.push_back(builder.CreateMul( + moduleTranslation.lookupValue(boundOp.getExtent()), + dimensionIndexSizeOffset[i - 1])); + } + } + + // Now that we have calculated how much we move by per index, we must + // multiply each lower bound offset in indexes by the size offset we + // have calculated in the previous and accumulate the results to get + // our final resulting offset. + for (int i = bounds.size() - 1; i >= 0; --i) { + if (auto boundOp = mlir::dyn_cast_if_present( + bounds[i].getDefiningOp())) { + if (idx.empty()) + idx.emplace_back(builder.CreateMul( + moduleTranslation.lookupValue(boundOp.getLowerBound()), + dimensionIndexSizeOffset[i])); + else + idx.back() = builder.CreateAdd( + idx.back(), builder.CreateMul(moduleTranslation.lookupValue( + boundOp.getLowerBound()), + dimensionIndexSizeOffset[i])); + } + } + } + + return idx; +} + // This creates two insertions into the MapInfosTy data structure for the // "parent" of a set of members, (usually a container e.g. // class/structure/derived type) when subsequent members have also been @@ -1795,6 +1924,9 @@ void collectMapDataFromMapOperands(MapInfoData &mapData, // which is utilised in subsequent member mappings (by modifying there map type // with it) to indicate that a member is part of this parent and should be // treated by the runtime as such. Important to achieve the correct mapping. +// +// This function borrows a lot from it's Clang parallel function +// emitCombinedEntry inside of CGOpenMPRuntime.cpp static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers( LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, @@ -1810,7 +1942,6 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers( combinedInfo.Names.emplace_back(LLVM::createMappingInformation( mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder)); combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]); - combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]); // Calculate size of the parent object being mapped based on the // addresses at runtime, highAddr - lowAddr = size. This of course @@ -1819,42 +1950,68 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers( // Fortran pointers and allocatables, the mapping of the pointed to // data by the descriptor (which itself, is a structure containing // runtime information on the dynamically allocated data). - llvm::Value *lowAddr = builder.CreatePointerCast( - mapData.Pointers[mapDataIndex], builder.getPtrTy()); - llvm::Value *highAddr = builder.CreatePointerCast( - builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex], - mapData.Pointers[mapDataIndex], 1), - builder.getPtrTy()); + auto parentClause = + mlir::dyn_cast(mapData.MapClause[mapDataIndex]); + + llvm::Value *lowAddr, *highAddr; + if (!parentClause.getPartialMap()) { + lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex], + builder.getPtrTy()); + highAddr = builder.CreatePointerCast( + builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex], + mapData.Pointers[mapDataIndex], 1), + builder.getPtrTy()); + combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]); + } else { + auto mapOp = + mlir::dyn_cast(mapData.MapClause[mapDataIndex]); + int firstMemberIdx = getMapDataMemberIdx( + mapData, getFirstOrLastMappedMemberPtr(mapOp, true)); + lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx], + builder.getPtrTy()); + int lastMemberIdx = getMapDataMemberIdx( + mapData, getFirstOrLastMappedMemberPtr(mapOp, false)); + highAddr = builder.CreatePointerCast( + builder.CreateGEP(mapData.BaseType[lastMemberIdx], + mapData.Pointers[lastMemberIdx], builder.getInt64(1)), + builder.getPtrTy()); + combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]); + } + llvm::Value *size = builder.CreateIntCast( builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr), builder.getInt64Ty(), /*isSigned=*/false); combinedInfo.Sizes.push_back(size); - // This creates the initial MEMBER_OF mapping that consists of - // the parent/top level container (same as above effectively, except - // with a fixed initial compile time size and seperate maptype which - // indicates the true mape type (tofrom etc.) and that it is a part - // of a larger mapping and indicating the link between it and it's - // members that are also explicitly mapped). + // TODO: This will need expanded to include the whole host of logic for the + // map flags that Clang currently supports (e.g. it hsould take the map flag + // of the parent map flag, remove the OMP_MAP_TARGET_PARAM and do some further + // case specific flag modifications), for the moment it handles what we + // support as expected. llvm::omp::OpenMPOffloadMappingFlags mapFlag = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; - if (isTargetParams) - mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; llvm::omp::OpenMPOffloadMappingFlags memberOfFlag = ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1); ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag); - combinedInfo.Types.emplace_back(mapFlag); - combinedInfo.DevicePointers.emplace_back( - llvm::OpenMPIRBuilder::DeviceInfoTy::None); - combinedInfo.Names.emplace_back(LLVM::createMappingInformation( - mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder)); - combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]); - combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]); - combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]); - + // This creates the initial MEMBER_OF mapping that consists of + // the parent/top level container (same as above effectively, except + // with a fixed initial compile time size and seperate maptype which + // indicates the true mape type (tofrom etc.). This parent mapping is + // only relevant if the structure in it's totality is being mapped, + // otherwise the above suffices. + if (!parentClause.getPartialMap()) { + combinedInfo.Types.emplace_back(mapFlag); + combinedInfo.DevicePointers.emplace_back( + llvm::OpenMPIRBuilder::DeviceInfoTy::None); + combinedInfo.Names.emplace_back(LLVM::createMappingInformation( + mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder)); + combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]); + combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]); + combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]); + } return memberOfFlag; } @@ -1871,86 +2028,99 @@ static void processMapMembersWithParent( for (auto mappedMembers : parentClause.getMembers()) { auto memberClause = mlir::dyn_cast(mappedMembers.getDefiningOp()); - int memberDataIdx = -1; - for (size_t i = 0; i < mapData.MapClause.size(); ++i) { - if (mapData.MapClause[i] == memberClause) - memberDataIdx = i; - } + int memberDataIdx = getMapDataMemberIdx(mapData, memberClause); assert(memberDataIdx >= 0 && "could not find mapped member of structure"); // Same MemberOfFlag to indicate its link with parent and other members - // of, and we flag that it's part of a pointer and object coupling. + // of auto mapFlag = llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType().value()); mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag); - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; + combinedInfo.Types.emplace_back(mapFlag); combinedInfo.DevicePointers.emplace_back( llvm::OpenMPIRBuilder::DeviceInfoTy::None); combinedInfo.Names.emplace_back( LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder)); - - combinedInfo.BasePointers.emplace_back(mapData.BasePointers[memberDataIdx]); - - std::vector idx{builder.getInt64(0)}; - llvm::Value *offsetAddress = nullptr; - if (!memberClause.getBounds().empty()) { - if (mapData.BaseType[memberDataIdx]->isArrayTy()) { - for (int i = memberClause.getBounds().size() - 1; i >= 0; --i) { - if (auto boundOp = mlir::dyn_cast_if_present( - memberClause.getBounds()[i].getDefiningOp())) { - idx.push_back( - moduleTranslation.lookupValue(boundOp.getLowerBound())); - } - } - } else { - std::vector dimensionIndexSizeOffset{ - builder.getInt64(1)}; - for (size_t i = 1; i < memberClause.getBounds().size(); ++i) { - if (auto boundOp = mlir::dyn_cast_if_present( - memberClause.getBounds()[i].getDefiningOp())) { - dimensionIndexSizeOffset.push_back(builder.CreateMul( - moduleTranslation.lookupValue(boundOp.getExtent()), - dimensionIndexSizeOffset[i - 1])); - } - } - - for (int i = memberClause.getBounds().size() - 1; i >= 0; --i) { - if (auto boundOp = mlir::dyn_cast_if_present( - memberClause.getBounds()[i].getDefiningOp())) { - if (!offsetAddress) - offsetAddress = builder.CreateMul( - moduleTranslation.lookupValue(boundOp.getLowerBound()), - dimensionIndexSizeOffset[i]); - else - offsetAddress = builder.CreateAdd( - offsetAddress, - builder.CreateMul( - moduleTranslation.lookupValue(boundOp.getLowerBound()), - dimensionIndexSizeOffset[i])); - } - } - } - } - - llvm::Value *memberIdx = - builder.CreateLoad(builder.getPtrTy(), mapData.Pointers[memberDataIdx]); - memberIdx = builder.CreateInBoundsGEP( - mapData.BaseType[memberDataIdx], memberIdx, - offsetAddress ? std::vector{offsetAddress} : idx, - "member_idx"); - combinedInfo.Pointers.emplace_back(memberIdx); + combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]); + combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]); combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]); } } +// This may be a bit of a naive check, the intent is to verify if the +// mapped data being passed is a pointer -> pointee that requires special +// handling in certain cases. There may be a better way to verify this, but +// unfortunately with opaque pointers we lose the ability to easily check if +// something is a pointer whilst maintaining access to the underlying type. +static bool checkIfPointerMap(llvm::omp::OpenMPOffloadMappingFlags mapFlag) { + return static_cast< + std::underlying_type_t>( + mapFlag & + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ) != 0; +} + +static void +processIndividualMap(MapInfoData &mapData, size_t mapDataIdx, + llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, + bool isTargetParams, int mapDataParentIdx = -1) { + // Declare Target Mappings are excluded from being marked as + // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're + // marked with OMP_MAP_PTR_AND_OBJ instead. + auto mapFlag = mapData.Types[mapDataIdx]; + if (isTargetParams && !mapData.IsDeclareTarget[mapDataIdx]) + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; + + if (auto mapInfoOp = + dyn_cast(mapData.MapClause[mapDataIdx])) + if (mapInfoOp.getMapCaptureType().value() == + mlir::omp::VariableCaptureKind::ByCopy && + !checkIfPointerMap(mapFlag)) + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL; + + // if we're provided a mapDataParentIdx, then the data being mapped is + // part of a larger object (in a parent <-> member mapping) and in this + // case our BasePointer should be the parent. + if (mapDataParentIdx >= 0) + combinedInfo.BasePointers.emplace_back( + mapData.BasePointers[mapDataParentIdx]); + else + combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]); + + combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]); + combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]); + combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]); + combinedInfo.Types.emplace_back(mapFlag); + combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]); +} + static void processMapWithMembersOf( LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams) { + auto parentClause = + mlir::dyn_cast(mapData.MapClause[mapDataIndex]); + // If we have a partial map (no parent referneced in the map clauses of the + // directive, only members) and only a single member, we do not need to bind + // the map of the member to the parent, we can pass the member seperately. + if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) { + auto memberClause = mlir::dyn_cast( + parentClause.getMembers()[0].getDefiningOp()); + int memberDataIdx = getMapDataMemberIdx(mapData, memberClause); + // Primarily only scalars can be optimised this way it seems, array's + // need to be mapped as a regular record <-> member map even if partially + // mapping. + if (!mapData.BaseType[memberDataIdx]->isArrayTy()) { + processIndividualMap(mapData, memberDataIdx, combinedInfo, isTargetParams, + mapDataIndex); + return; + } + } + llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag = mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl, combinedInfo, mapData, mapDataIndex, isTargetParams); @@ -1959,6 +2129,79 @@ static void processMapWithMembersOf( memberOfParentFlag); } +// This is a variation on Clang's GenerateOpenMPCapturedVars, which +// generates different operation (e.g. load/store) combinations for +// arguments to the kernel, based on map capture kinds which are then +// utilised in the combinedInfo in place of the original Map value. +static void +createAlteredByCaptureMap(MapInfoData &mapData, + LLVM::ModuleTranslation &moduleTranslation, + llvm::IRBuilderBase &builder) { + for (size_t i = 0; i < mapData.MapClause.size(); ++i) { + // if it's declare target, skip it, it's handled seperately. + if (!mapData.IsDeclareTarget[i]) { + mlir::omp::VariableCaptureKind captureKind = + mlir::omp::VariableCaptureKind::ByRef; + + auto mapOp = + mlir::dyn_cast_if_present(mapData.MapClause[i]); + captureKind = mapOp.getMapCaptureType().value_or( + mlir::omp::VariableCaptureKind::ByRef); + + bool isPtrTy = checkIfPointerMap( + llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType().value())); + + // Currently handles array sectioning lowerbound case, but more + // logic may be required in the future. Clang invokes EmitLValue, + // which has specialised logic for special Clang types such as user + // defines, so it is possible we will have to extend this for + // structures or other complex types. As the general idea is that this + // function mimics some of the logic from Clang that we require for + // kernel argument passing from host -> device. + switch (captureKind) { + case mlir::omp::VariableCaptureKind::ByRef: { + llvm::Value *newV = mapData.Pointers[i]; + std::vector offsetIdx = calculateBoundsOffset( + moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(), + mapOp.getBounds()); + if (isPtrTy) + newV = builder.CreateLoad(builder.getPtrTy(), newV); + + if (!offsetIdx.empty()) + newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx, + "array_offset"); + mapData.Pointers[i] = newV; + } break; + case mlir::omp::VariableCaptureKind::ByCopy: { + llvm::Value *newV; + if (mapData.Pointers[i]->getType()->isPointerTy()) + newV = builder.CreateLoad(mapData.BaseType[i], mapData.Pointers[i]); + else + newV = mapData.Pointers[i]; + + if (!isPtrTy) { + auto curInsert = builder.saveIP(); + builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation)); + auto *memTempAlloc = + builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted"); + builder.restoreIP(curInsert); + + builder.CreateStore(newV, memTempAlloc); + newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc); + } + + mapData.Pointers[i] = newV; + mapData.BasePointers[i] = newV; + } break; + case mlir::omp::VariableCaptureKind::This: + case mlir::omp::VariableCaptureKind::VLAType: + mapData.MapClause[i]->emitOpError("Unhandled capture kind"); + break; + } + } + } +} + // Generate all map related information and fill the combinedInfo. static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, @@ -1968,6 +2211,20 @@ static void genMapInfos(llvm::IRBuilderBase &builder, const SmallVector &devPtrOperands = {}, const SmallVector &devAddrOperands = {}, bool isTargetParams = false) { + // We wish to modify some of the methods in which kernel arguments are + // passed based on their capture type by the target region, this can + // involve generating new loads and stores, which changes the + // MLIR value to LLVM value mapping, however, we only wish to do this + // locally for the current function/target and also avoid altering + // ModuleTranslation, so we remap the base pointer or pointer stored + // in the map infos corresponding MapInfoData, which is later accessed + // by genMapInfos and createTarget to help generate the kernel and + // kernel arg structure. It primarily becomes relevant in cases like + // bycopy, or byref range'd arrays. In the default case, we simply + // pass thee pointer byref as both basePointer and pointer. + if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) + createAlteredByCaptureMap(mapData, moduleTranslation, builder); + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); auto fail = [&combinedInfo]() -> void { @@ -1985,12 +2242,8 @@ static void genMapInfos(llvm::IRBuilderBase &builder, // utilise the size from any component of MapInfoData, if we can't // something is missing from the initial MapInfoData construction. for (size_t i = 0; i < mapData.MapClause.size(); ++i) { - // NOTE/TODO: We currently do not handle member mapping seperately from it's - // parent or explicit mapping of a parent and member in the same operation, - // this will need to change in the near future, for now we primarily handle - // descriptor mapping from fortran, generalised as mapping record types - // with implicit member maps. This lowering needs further generalisation to - // fully support fortran derived types, and C/C++ structures and classes. + // NOTE/TODO: We currently do not support arbitrary depth record + // type mapping. if (mapData.IsAMember[i]) continue; @@ -2001,27 +2254,7 @@ static void genMapInfos(llvm::IRBuilderBase &builder, continue; } - // Declare Target Mappings are excluded from being marked as - // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're - // marked with OMP_MAP_PTR_AND_OBJ instead. - auto mapFlag = mapData.Types[i]; - if (mapData.IsDeclareTarget[i]) - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; - else if (isTargetParams) - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; - - if (auto mapInfoOp = dyn_cast(mapData.MapClause[i])) - if (mapInfoOp.getMapCaptureType().value() == - mlir::omp::VariableCaptureKind::ByCopy && - !mapInfoOp.getVarType().isa()) - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL; - - combinedInfo.BasePointers.emplace_back(mapData.BasePointers[i]); - combinedInfo.Pointers.emplace_back(mapData.Pointers[i]); - combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[i]); - combinedInfo.Names.emplace_back(mapData.Names[i]); - combinedInfo.Types.emplace_back(mapFlag); - combinedInfo.Sizes.emplace_back(mapData.Sizes[i]); + processIndividualMap(mapData, i, combinedInfo, isTargetParams); } auto findMapInfo = [&combinedInfo](llvm::Value *val, unsigned &index) { @@ -2462,86 +2695,6 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, return builder.saveIP(); } -// This is a variation on Clang's GenerateOpenMPCapturedVars, which -// generates different operation (e.g. load/store) combinations for -// arguments to the kernel, based on map capture kinds which are then -// utilised in the combinedInfo in place of the original Map value. -static void -createAlteredByCaptureMap(MapInfoData &mapData, - LLVM::ModuleTranslation &moduleTranslation, - llvm::IRBuilderBase &builder) { - for (size_t i = 0; i < mapData.MapClause.size(); ++i) { - // if it's declare target, skip it, it's handled seperately. - if (!mapData.IsDeclareTarget[i]) { - mlir::omp::VariableCaptureKind captureKind = - mlir::omp::VariableCaptureKind::ByRef; - - if (auto mapOp = mlir::dyn_cast_if_present( - mapData.MapClause[i])) { - captureKind = mapOp.getMapCaptureType().value_or( - mlir::omp::VariableCaptureKind::ByRef); - } - - switch (captureKind) { - case mlir::omp::VariableCaptureKind::ByRef: { - // Currently handles array sectioning lowerbound case, but more - // logic may be required in the future. Clang invokes EmitLValue, - // which has specialised logic for special Clang types such as user - // defines, so it is possible we will have to extend this for - // structures or other complex types. As the general idea is that this - // function mimics some of the logic from Clang that we require for - // kernel argument passing from host -> device. - if (auto mapOp = mlir::dyn_cast_if_present( - mapData.MapClause[i])) { - if (!mapOp.getBounds().empty() && mapData.BaseType[i]->isArrayTy()) { - - std::vector idx = - std::vector{builder.getInt64(0)}; - for (int i = mapOp.getBounds().size() - 1; i >= 0; --i) { - if (auto boundOp = - mlir::dyn_cast_if_present( - mapOp.getBounds()[i].getDefiningOp())) { - idx.push_back( - moduleTranslation.lookupValue(boundOp.getLowerBound())); - } - } - - mapData.Pointers[i] = builder.CreateInBoundsGEP( - mapData.BaseType[i], mapData.Pointers[i], idx); - } - } - } break; - case mlir::omp::VariableCaptureKind::ByCopy: { - llvm::Type *type = mapData.BaseType[i]; - llvm::Value *newV; - if (mapData.Pointers[i]->getType()->isPointerTy()) - newV = builder.CreateLoad(type, mapData.Pointers[i]); - else - newV = mapData.Pointers[i]; - - if (!type->isPointerTy()) { - auto curInsert = builder.saveIP(); - builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation)); - auto *memTempAlloc = - builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted"); - builder.restoreIP(curInsert); - - builder.CreateStore(newV, memTempAlloc); - newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc); - } - - mapData.Pointers[i] = newV; - mapData.BasePointers[i] = newV; - } break; - case mlir::omp::VariableCaptureKind::This: - case mlir::omp::VariableCaptureKind::VLAType: - mapData.MapClause[i]->emitOpError("Unhandled capture kind"); - break; - } - } - } -} - static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { @@ -2610,20 +2763,6 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, dl, builder); - // We wish to modify some of the methods in which kernel arguments are - // passed based on their capture type by the target region, this can - // involve generating new loads and stores, which changes the - // MLIR value to LLVM value mapping, however, we only wish to do this - // locally for the current function/target and also avoid altering - // ModuleTranslation, so we remap the base pointer or pointer stored - // in the map infos corresponding MapInfoData, which is later accessed - // by genMapInfos and createTarget to help generate the kernel and - // kernel arg structure. It primarily becomes relevant in cases like - // bycopy, or byref range'd arrays. In the default case, we simply - // pass thee pointer byref as both basePointer and pointer. - if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) - createAlteredByCaptureMap(mapData, moduleTranslation, builder); - llvm::OpenMPIRBuilder::MapInfosTy combinedInfos; auto genMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir index 831cd05871c4e..30a76795952f7 100644 --- a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir @@ -2,10 +2,9 @@ // This test checks the offload sizes, map types and base pointers and pointers // provided to the OpenMP kernel argument structure are correct when lowering -// to LLVM-IR from MLIR when the fortran allocatables flag is switched on and -// a fortran allocatable descriptor type is provided alongside the omp.map_info, -// the test utilises mapping of array sections, full arrays and individual -// allocated scalars. +// to LLVM-IR from MLIR when a fortran allocatable descriptor type is provided +// alongside the omp.map_info, the test utilises mapping of array sections, +// full arrays and individual allocated scalars. module attributes {omp.is_target_device = false} { llvm.func @_QQmain() { @@ -26,8 +25,8 @@ module attributes {omp.is_target_device = false} { %14 = llvm.sub %11, %2 : i64 %15 = omp.bounds lower_bound(%7 : i64) upper_bound(%14 : i64) extent(%11 : i64) stride(%13 : i64) start_idx(%9 : i64) {stride_in_bytes = true} %16 = llvm.getelementptr %3[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> - %17 = omp.map_info var_ptr(%16 : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) bounds(%15) -> !llvm.ptr {name = "full_arr"} - %18 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) members(%17 : !llvm.ptr) -> !llvm.ptr {name = "full_arr"} + %17 = omp.map_info var_ptr(%16 : !llvm.ptr, f32) map_clauses(ptr_and_obj, tofrom) capture(ByRef) bounds(%15) -> !llvm.ptr {name = "full_arr"} + %18 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) members(%17 : !llvm.ptr : [0]) -> !llvm.ptr {name = "full_arr"} %19 = llvm.getelementptr %6[0, 7, %7, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> %20 = llvm.load %19 : !llvm.ptr -> i64 %21 = llvm.getelementptr %6[0, 7, %7, 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> @@ -38,11 +37,11 @@ module attributes {omp.is_target_device = false} { %26 = llvm.sub %0, %20 : i64 %27 = omp.bounds lower_bound(%25 : i64) upper_bound(%26 : i64) extent(%22 : i64) stride(%24 : i64) start_idx(%20 : i64) {stride_in_bytes = true} %28 = llvm.getelementptr %6[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> - %29 = omp.map_info var_ptr(%6 : !llvm.ptr, i32) var_ptr_ptr(%28 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%27) -> !llvm.ptr {name = "sect_arr(2:5)"} - %30 = omp.map_info var_ptr(%6 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) members(%29 : !llvm.ptr) -> !llvm.ptr {name = "sect_arr(2:5)"} + %29 = omp.map_info var_ptr(%6 : !llvm.ptr, i32) var_ptr_ptr(%28 : !llvm.ptr) map_clauses(ptr_and_obj, tofrom) capture(ByRef) bounds(%27) -> !llvm.ptr {name = "sect_arr(2:5)"} + %30 = omp.map_info var_ptr(%6 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) members(%29 : !llvm.ptr : [0]) -> !llvm.ptr {name = "sect_arr(2:5)"} %31 = llvm.getelementptr %5[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> - %32 = omp.map_info var_ptr(%5 : !llvm.ptr, f32) var_ptr_ptr(%31 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "scalar"} - %33 = omp.map_info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) members(%32 : !llvm.ptr) -> !llvm.ptr {name = "scalar"} + %32 = omp.map_info var_ptr(%5 : !llvm.ptr, f32) var_ptr_ptr(%31 : !llvm.ptr) map_clauses(ptr_and_obj, tofrom) capture(ByRef) -> !llvm.ptr {name = "scalar"} + %33 = omp.map_info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) members(%32 : !llvm.ptr : [0]) -> !llvm.ptr {name = "scalar"} omp.target map_entries(%17 -> %arg0, %18 -> %arg1, %29 -> %arg2, %30 -> %arg3, %32 -> %arg4, %33 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr, %arg3: !llvm.ptr, %arg4: !llvm.ptr, %arg5: !llvm.ptr): omp.terminator @@ -81,20 +80,19 @@ module attributes {omp.is_target_device = false} { // CHECK: %[[ARR_SECT_SIZE2:.*]] = add i64 %[[ARR_SECT_SIZE3]], 1 // CHECK: %[[ARR_SECT_SIZE1:.*]] = mul i64 1, %[[ARR_SECT_SIZE2]] // CHECK: %[[ARR_SECT_SIZE:.*]] = mul i64 %[[ARR_SECT_SIZE1]], 4 -// CHECK: %[[FULL_ARR_DESC_SIZE:.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEfull_arr, i32 1) to i64), i64 ptrtoint (ptr @_QFEfull_arr to i64)), ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK: %[[LFULL_ARR:.*]] = load ptr, ptr @_QFEfull_arr, align 8 // CHECK: %[[FULL_ARR_PTR:.*]] = getelementptr inbounds float, ptr %[[LFULL_ARR]], i64 0 -// CHECK: %[[ARR_SECT_DESC_SIZE:.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEsect_arr, i32 1) to i64), i64 ptrtoint (ptr @_QFEsect_arr to i64)), ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK: %[[ARR_SECT_OFFSET1:.*]] = mul i64 %[[ARR_SECT_OFFSET2]], 1 // CHECK: %[[LARR_SECT:.*]] = load ptr, ptr @_QFEsect_arr, align 8 // CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET1]] +// CHECK: %[[SCALAR_PTR_LOAD:.*]] = load ptr, ptr %[[SCALAR_BASE]], align 8 +// CHECK: %[[FULL_ARR_DESC_SIZE:.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEfull_arr, i32 1) to i64), i64 ptrtoint (ptr @_QFEfull_arr to i64)), ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK: %[[ARR_SECT_DESC_SIZE:.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEsect_arr, i32 1) to i64), i64 ptrtoint (ptr @_QFEsect_arr to i64)), ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK: %[[SCALAR_DESC_SZ4:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[SCALAR_ALLOCA]], i32 1 // CHECK: %[[SCALAR_DESC_SZ3:.*]] = ptrtoint ptr %[[SCALAR_DESC_SZ4]] to i64 // CHECK: %[[SCALAR_DESC_SZ2:.*]] = ptrtoint ptr %[[SCALAR_ALLOCA]] to i64 // CHECK: %[[SCALAR_DESC_SZ1:.*]] = sub i64 %[[SCALAR_DESC_SZ3]], %[[SCALAR_DESC_SZ2]] // CHECK: %[[SCALAR_DESC_SZ:.*]] = sdiv exact i64 %[[SCALAR_DESC_SZ1]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK: %[[SCALAR_PTR_LOAD:.*]] = load ptr, ptr %[[SCALAR_BASE]], align 8 -// CHECK: %[[SCALAR_PTR:.*]] = getelementptr inbounds float, ptr %[[SCALAR_PTR_LOAD]], i64 0 // CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 // CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADBASEPTRS]], align 8 @@ -143,6 +141,6 @@ module attributes {omp.is_target_device = false} { // CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 7 // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADPTRS]], align 8 // CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 8 -// CHECK: store ptr %[[SCALAR_BASE]], ptr %[[OFFLOADBASEPTRS]], align 8 +// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8 // CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 8 -// CHECK: store ptr %[[SCALAR_PTR]], ptr %[[OFFLOADPTRS]], align 8 +// CHECK: store ptr %[[SCALAR_PTR_LOAD]], ptr %[[OFFLOADPTRS]], align 8 diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir index b089d47f795df..7f575e9b9be2b 100644 --- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir @@ -66,20 +66,21 @@ llvm.func @_QPopenmp_target_data_region(%0 : !llvm.ptr) { // CHECK: %[[VAL_2:.*]] = alloca [1 x ptr], align 8 // CHECK: br label %[[VAL_3:.*]] // CHECK: entry: ; preds = %[[VAL_4:.*]] +// CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds [1024 x i32], ptr %[[ARR_DATA:.*]], i64 0, i64 0 // CHECK: %[[VAL_5:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 -// CHECK: store ptr %[[VAL_6:.*]], ptr %[[VAL_5]], align 8 -// CHECK: %[[VAL_7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 -// CHECK: store ptr %[[VAL_6]], ptr %[[VAL_7]], align 8 -// CHECK: %[[VAL_8:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_2]], i64 0, i64 0 -// CHECK: store ptr null, ptr %[[VAL_8]], align 8 -// CHECK: %[[VAL_9:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 -// CHECK: %[[VAL_10:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 -// CHECK: call void @__tgt_target_data_begin_mapper(ptr @2, i64 -1, i32 1, ptr %[[VAL_9]], ptr %[[VAL_10]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null) -// CHECK: %[[VAL_11:.*]] = getelementptr [1024 x i32], ptr %[[VAL_6]], i32 0, i64 0 -// CHECK: store i32 99, ptr %[[VAL_11]], align 4 -// CHECK: %[[VAL_12:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 -// CHECK: %[[VAL_13:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 -// CHECK: call void @__tgt_target_data_end_mapper(ptr @2, i64 -1, i32 1, ptr %[[VAL_12]], ptr %[[VAL_13]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null) +// CHECK: store ptr %[[ARR_DATA]], ptr %[[VAL_5]], align 8 +// CHECK: %[[VAL_6:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 +// CHECK: store ptr %[[ARR_OFFSET]], ptr %[[VAL_6]], align 8 +// CHECK: %[[VAL_7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_2]], i64 0, i64 0 +// CHECK: store ptr null, ptr %[[VAL_7]], align 8 +// CHECK: %[[VAL_8:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 +// CHECK: %[[VAL_9:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 +// CHECK: call void @__tgt_target_data_begin_mapper(ptr @2, i64 -1, i32 1, ptr %[[VAL_8]], ptr %[[VAL_9]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null) +// CHECK: %[[VAL_10:.*]] = getelementptr [1024 x i32], ptr %[[ARR_DATA]], i32 0, i64 0 +// CHECK: store i32 99, ptr %[[VAL_10]], align 4 +// CHECK: %[[VAL_11:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 +// CHECK: %[[VAL_12:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 +// CHECK: call void @__tgt_target_data_end_mapper(ptr @2, i64 -1, i32 1, ptr %[[VAL_11]], ptr %[[VAL_12]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null) // CHECK: ret void // ----- @@ -153,16 +154,18 @@ llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) { // CHECK: entry: ; preds = %[[VAL_12:.*]] // CHECK: br i1 %[[VAL_9]], label %[[VAL_13:.*]], label %[[VAL_14:.*]] // CHECK: omp_if.then: ; preds = %[[VAL_11]] +// CHECK: %[[ARR_OFFSET1:.*]] = getelementptr inbounds [1024 x i32], ptr %[[VAL_16:.*]], i64 0, i64 0 +// CHECK: %[[ARR_OFFSET2:.*]] = getelementptr inbounds [512 x i32], ptr %[[VAL_20:.*]], i64 0, i64 0 // CHECK: %[[VAL_15:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_3]], i32 0, i32 0 -// CHECK: store ptr %[[VAL_16:.*]], ptr %[[VAL_15]], align 8 +// CHECK: store ptr %[[VAL_16]], ptr %[[VAL_15]], align 8 // CHECK: %[[VAL_17:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_4]], i32 0, i32 0 -// CHECK: store ptr %[[VAL_16]], ptr %[[VAL_17]], align 8 +// CHECK: store ptr %[[ARR_OFFSET1]], ptr %[[VAL_17]], align 8 // CHECK: %[[VAL_18:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_5]], i64 0, i64 0 // CHECK: store ptr null, ptr %[[VAL_18]], align 8 // CHECK: %[[VAL_19:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_3]], i32 0, i32 1 -// CHECK: store ptr %[[VAL_20:.*]], ptr %[[VAL_19]], align 8 +// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_19]], align 8 // CHECK: %[[VAL_21:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_4]], i32 0, i32 1 -// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8 +// CHECK: store ptr %[[ARR_OFFSET2]], ptr %[[VAL_21]], align 8 // CHECK: %[[VAL_22:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_5]], i64 0, i64 1 // CHECK: store ptr null, ptr %[[VAL_22]], align 8 // CHECK: %[[VAL_23:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_3]], i32 0, i32 0 @@ -176,26 +179,28 @@ llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) { // CHECK: %[[VAL_27:.*]] = icmp sgt i32 %[[VAL_26]], 10 // CHECK: %[[VAL_28:.*]] = load i32, ptr %[[VAL_6]], align 4 // CHECK: br i1 %[[VAL_27]], label %[[VAL_29:.*]], label %[[VAL_30:.*]] -// CHECK: omp_if.then1: ; preds = %[[VAL_25]] +// CHECK: omp_if.then2: ; preds = %[[VAL_25]] +// CHECK: %[[ARR_OFFSET3:.*]] = getelementptr inbounds [1024 x i32], ptr %[[VAL_16]], i64 0, i64 0 +// CHECK: %[[ARR_OFFSET4:.*]] = getelementptr inbounds [512 x i32], ptr %[[VAL_20]], i64 0, i64 0 // CHECK: %[[VAL_31:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_0]], i32 0, i32 0 // CHECK: store ptr %[[VAL_16]], ptr %[[VAL_31]], align 8 // CHECK: %[[VAL_32:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_1]], i32 0, i32 0 -// CHECK: store ptr %[[VAL_16]], ptr %[[VAL_32]], align 8 +// CHECK: store ptr %[[ARR_OFFSET3]], ptr %[[VAL_32]], align 8 // CHECK: %[[VAL_33:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_2]], i64 0, i64 0 // CHECK: store ptr null, ptr %[[VAL_33]], align 8 // CHECK: %[[VAL_34:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_0]], i32 0, i32 1 // CHECK: store ptr %[[VAL_20]], ptr %[[VAL_34]], align 8 // CHECK: %[[VAL_35:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_1]], i32 0, i32 1 -// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_35]], align 8 +// CHECK: store ptr %[[ARR_OFFSET4]], ptr %[[VAL_35]], align 8 // CHECK: %[[VAL_36:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_2]], i64 0, i64 1 // CHECK: store ptr null, ptr %[[VAL_36]], align 8 // CHECK: %[[VAL_37:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_0]], i32 0, i32 0 // CHECK: %[[VAL_38:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_1]], i32 0, i32 0 // CHECK: call void @__tgt_target_data_end_mapper(ptr @3, i64 -1, i32 2, ptr %[[VAL_37]], ptr %[[VAL_38]], ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr @.offload_mapnames.3, ptr null) // CHECK: br label %[[VAL_39:.*]] -// CHECK: omp_if.else5: ; preds = %[[VAL_25]] +// CHECK: omp_if.else8: ; preds = %[[VAL_25]] // CHECK: br label %[[VAL_39]] -// CHECK: omp_if.end6: ; preds = %[[VAL_30]], %[[VAL_29]] +// CHECK: omp_if.end9: ; preds = %[[VAL_30]], %[[VAL_29]] // CHECK: ret void // ----- diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir new file mode 100644 index 0000000000000..12e98acf6d82e --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir @@ -0,0 +1,63 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// This test checks the offload sizes, map types and base pointers and pointers +// provided to the OpenMP kernel argument structure are correct when lowering +// to LLVM-IR from MLIR when performing explicit member mapping of a recrod type +// (C++/C class/structure, Fortran derived type) where only members of the record +// type are mapped. + +module attributes {omp.is_target_device = false} { +llvm.func @_QQmain() { + %0 = llvm.mlir.constant(10 : index) : i64 + %1 = llvm.mlir.constant(4 : index) : i64 + %2 = llvm.mlir.constant(1 : index) : i64 + %3 = llvm.mlir.constant(1 : i64) : i64 + %4 = llvm.alloca %3 x !llvm.struct<(f32, array<10 x i32>, i32)> : (i64) -> !llvm.ptr + %5 = llvm.mlir.constant(2 : i32) : i32 + %6 = llvm.getelementptr %4[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, i32)> + %7 = omp.map_info var_ptr(%6 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr + %8 = llvm.mlir.constant(1 : i32) : i32 + %9 = llvm.getelementptr %4[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, i32)> + %10 = omp.bounds lower_bound(%2 : i64) upper_bound(%1 : i64) extent(%0 : i64) stride(%2 : i64) start_idx(%2 : i64) + %11 = omp.map_info var_ptr(%9 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%10) -> !llvm.ptr + %12 = omp.map_info var_ptr(%4 : !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, i32)>) map_clauses(tofrom) capture(ByRef) members(%7, %11 : !llvm.ptr, !llvm.ptr : [2, 1]) -> !llvm.ptr {partial_map = true} + omp.target map_entries(%7 -> %arg0, %11 -> %arg1, %12 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr): + omp.terminator + } + llvm.return + } +} + +// CHECK: @.offload_sizes = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 16] +// CHECK: @.offload_maptypes = private unnamed_addr constant [3 x i64] [i64 32, i64 281474976710659, i64 281474976710659] + +// CHECK: define void @_QQmain() +// CHECK: %[[ALLOCA:.*]] = alloca { float, [10 x i32], i32 }, i64 1, align 8 +// CHECK: %[[MEMBER_ACCESS_1:.*]] = getelementptr { float, [10 x i32], i32 }, ptr %[[ALLOCA]], i32 0, i32 2 +// CHECK: %[[MEMBER_ACCESS_2:.*]] = getelementptr { float, [10 x i32], i32 }, ptr %[[ALLOCA]], i32 0, i32 1 + +// CHECK: %[[LAST_MEMBER:.*]] = getelementptr inbounds [10 x i32], ptr %[[MEMBER_ACCESS_2]], i64 0, i64 1 +// CHECK: %[[FIRST_MEMBER:.*]] = getelementptr i32, ptr %[[MEMBER_ACCESS_1]], i64 1 +// CHECK: %[[FIRST_MEMBER_OFF:.*]] = ptrtoint ptr %[[FIRST_MEMBER]] to i64 +// CHECK: %[[SECOND_MEMBER_OFF:.*]] = ptrtoint ptr %[[LAST_MEMBER]] to i64 +// CHECK: %[[MEMBER_DIFF:.*]] = sub i64 %[[FIRST_MEMBER_OFF]], %[[SECOND_MEMBER_OFF]] +// CHECK: %[[OFFLOAD_SIZE:.*]] = sdiv exact i64 %[[MEMBER_DIFF]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) + +// CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 +// CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8 +// CHECK: %[[PTR_ARR:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 0 +// CHECK: store ptr %[[LAST_MEMBER]], ptr %[[PTR_ARR]], align 8 +// CHECK: %[[SIZE_ARR:.*]] = getelementptr inbounds [3 x i64], ptr %.offload_sizes, i32 0, i32 0 +// CHECK: store i64 %[[OFFLOAD_SIZE]], ptr %[[SIZE_ARR]], align 8 + +// CHECK: %[[BASE_PTR_ARR_2:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 1 +// CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR_2]], align 8 +// CHECK: %[[PTR_ARR_2:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 1 +// CHECK: store ptr %[[MEMBER_ACCESS_1]], ptr %[[PTR_ARR_2]], align 8 + +// CHECK: %[[BASE_PTR_ARR_3:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 2 +// CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR_3]], align 8 +// CHECK: %[[PTR_ARR_3:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 2 +// CHECK: store ptr %[[LAST_MEMBER]], ptr %[[PTR_ARR_3]], align 8 + \ No newline at end of file