diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 875ce11391587..67875f668d4d3 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1537,13 +1537,6 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, return success(); } -int64_t getSizeInBytes(DataLayout &DL, const Type &type, const Type &eleType) { - if (isa(type)) - return DL.getTypeSize(eleType); - - return 0; -} - static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) { switch (deviceClause) { @@ -1638,13 +1631,141 @@ getRefPtrIfDeclareTarget(mlir::Value value, return nullptr; } +// A small helper structure to contain data gathered +// for map lowering and coalese it into one area and +// avoiding extra computations such as searches in the +// llvm module for lowered mapped varibles or checking +// if something is declare target (and retrieving the +// value) more than neccessary. +struct MapInfoData : llvm::OpenMPIRBuilder::MapInfosTy { + llvm::SmallVector IsDeclareTarget; + llvm::SmallVector MapClause; + llvm::SmallVector OriginalValue; + // Stripped off array/pointer to get the underlying + // element type + llvm::SmallVector BaseType; + + /// Append arrays in \a CurInfo. + void append(MapInfoData &CurInfo) { + IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(), + CurInfo.IsDeclareTarget.end()); + MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end()); + OriginalValue.append(CurInfo.OriginalValue.begin(), + CurInfo.OriginalValue.end()); + BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end()); + llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo); + } +}; + +uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) { + if (auto nestedArrTy = llvm::dyn_cast_if_present( + arrTy.getElementType())) + return getArrayElementSizeInBits(nestedArrTy, dl); + return dl.getTypeSizeInBits(arrTy.getElementType()); +} + +// This function calculates the size to be offloaded for a specified type, given +// its associated map clause (which can contain bounds information which affects +// the total size), this size is calculated based on the underlying element type +// e.g. given a 1-D array of ints, we will calculate the size from the integer +// type * number of elements in the array. This size can be used in other +// calculations but is ultimately used as an argument to the OpenMP runtimes +// kernel argument structure which is generated through the combinedInfo data +// structures. +// This function is somewhat equivalent to Clang's getExprTypeSize inside of +// CGOpenMPRuntime.cpp. +llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type, + Operation *clauseOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives + // the size in inconsistent byte or bit format. + uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type); + if (auto arrTy = llvm::dyn_cast_if_present(type)) { + underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl); + } + + if (auto memberClause = + mlir::dyn_cast_if_present(clauseOp)) { + // This calculates the size to transfer based on bounds and the underlying + // element type, provided bounds have been specified (Fortran + // pointers/allocatables/target and arrays that have sections specified fall + // into this as well). + if (!memberClause.getBounds().empty()) { + llvm::Value *elementCount = builder.getInt64(1); + for (auto bounds : memberClause.getBounds()) { + if (auto boundOp = mlir::dyn_cast_if_present( + bounds.getDefiningOp())) { + // The below calculation for the size to be mapped calculated from the + // map_info's bounds is: (elemCount * [UB - LB] + 1), later we + // multiply by the underlying element types byte size to get the full + // size to be offloaded based on the bounds + elementCount = builder.CreateMul( + elementCount, + builder.CreateAdd( + builder.CreateSub( + moduleTranslation.lookupValue(boundOp.getUpperBound()), + moduleTranslation.lookupValue(boundOp.getLowerBound())), + builder.getInt64(1))); + } + } + + // The size in bytes x number of elements, the sizeInBytes stored is + // the underyling types size, e.g. if ptr, it'll be the i32's + // size, so we do some on the fly runtime math to get the size in + // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need + // some adjustment for members with more complex types. + return builder.CreateMul(elementCount, + builder.getInt64(underlyingTypeSzInBits / 8)); + } + } + + return builder.getInt64(underlyingTypeSzInBits / 8); +} + +void collectMapDataFromMapOperands(MapInfoData &mapData, + llvm::SmallVectorImpl &mapOperands, + LLVM::ModuleTranslation &moduleTranslation, + DataLayout &dl, + llvm::IRBuilderBase &builder) { + for (mlir::Value mapValue : mapOperands) { + assert(mlir::isa(mapValue.getDefiningOp()) && + "missing map info operation or incorrect map info operation type"); + if (auto mapOp = mlir::dyn_cast_if_present( + mapValue.getDefiningOp())) { + mapData.OriginalValue.push_back( + moduleTranslation.lookupValue(mapOp.getVarPtr())); + mapData.Pointers.push_back(mapData.OriginalValue.back()); + + if (llvm::Value *refPtr = getRefPtrIfDeclareTarget( + mapOp.getVarPtr(), moduleTranslation)) { // declare target + mapData.IsDeclareTarget.push_back(true); + mapData.BasePointers.push_back(refPtr); + } else { // regular mapped variable + mapData.IsDeclareTarget.push_back(false); + mapData.BasePointers.push_back(mapData.OriginalValue.back()); + } + + mapData.Sizes.push_back(getSizeInBytes(dl, mapOp.getVarType(), mapOp, + builder, moduleTranslation)); + mapData.BaseType.push_back( + moduleTranslation.convertType(mapOp.getVarType())); + mapData.MapClause.push_back(mapOp.getOperation()); + mapData.Types.push_back( + llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType().value())); + mapData.Names.push_back(LLVM::createMappingInformation( + mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder())); + mapData.DevicePointers.push_back( + llvm::OpenMPIRBuilder::DeviceInfoTy::None); + } + } +} + // Generate all map related information and fill the combinedInfo. static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, - DataLayout &DL, + DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, - const SmallVector &mapOperands, - const ArrayAttr &mapTypes, + MapInfoData &mapData, const SmallVector &devPtrOperands = {}, const SmallVector &devAddrOperands = {}, bool isTargetParams = false) { @@ -1659,58 +1780,39 @@ static void genMapInfos(llvm::IRBuilderBase &builder, combinedInfo.Names.clear(); }; - auto findMapInfo = [&combinedInfo](llvm::Value *val, unsigned &index) { - index = 0; - for (auto basePtr : combinedInfo.BasePointers) { - if (basePtr == val) - return true; - index++; - } - return false; - }; - - unsigned index = 0; - for (const auto &mapOp : mapOperands) { - // Unlike dev_ptr and dev_addr operands these map operands point - // to a map entry operation which contains further information - // on the variable being mapped and how it should be mapped. - auto mapInfoOp = - mlir::dyn_cast(mapOp.getDefiningOp()); - - // TODO: Only LLVMPointerTypes are handled. - if (!mapInfoOp.getType().isa()) - return fail(); - - llvm::Value *mapOpValue = - moduleTranslation.lookupValue(mapInfoOp.getVarPtr()); - - llvm::Value *refPtr = - getRefPtrIfDeclareTarget(mapInfoOp.getVarPtr(), moduleTranslation); - - combinedInfo.BasePointers.emplace_back(refPtr ? refPtr : mapOpValue); - combinedInfo.Pointers.emplace_back(mapOpValue); - combinedInfo.DevicePointers.emplace_back( - llvm::OpenMPIRBuilder::DeviceInfoTy::None); - combinedInfo.Names.emplace_back(LLVM::createMappingInformation( - mapInfoOp.getVarPtr().getLoc(), *ompBuilder)); - - auto mapFlag = llvm::omp::OpenMPOffloadMappingFlags( - mapTypes[index].cast().getUInt()); - + // We operate under the assumption that all vectors that are + // required in MapInfoData are of equal lengths (either filled with + // default constructed data or appropiate information) so we can + // utilise the size from any component of MapInfoData, if we can't + // something is missing from the initial MapInfoData construction. + for (size_t i = 0; i < mapData.MapClause.size(); ++i) { // Declare Target Mappings are excluded from being marked as // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're marked // with OMP_MAP_PTR_AND_OBJ instead. - if (refPtr) + auto mapFlag = mapData.Types[i]; + if (mapData.IsDeclareTarget[i]) mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; else if (isTargetParams) mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; + combinedInfo.BasePointers.emplace_back(mapData.BasePointers[i]); + combinedInfo.Pointers.emplace_back(mapData.Pointers[i]); + combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[i]); + combinedInfo.Names.emplace_back(mapData.Names[i]); combinedInfo.Types.emplace_back(mapFlag); - combinedInfo.Sizes.emplace_back(builder.getInt64(getSizeInBytes( - DL, mapInfoOp.getVarPtr().getType(), mapInfoOp.getVarType()))); - index++; + combinedInfo.Sizes.emplace_back(mapData.Sizes[i]); } + auto findMapInfo = [&combinedInfo](llvm::Value *val, unsigned &index) { + index = 0; + for (llvm::Value *basePtr : combinedInfo.BasePointers) { + if (basePtr == val) + return true; + index++; + } + return false; + }; + auto addDevInfos = [&, fail](auto devOperands, auto devOpType) -> void { for (const auto &devOp : devOperands) { // TODO: Only LLVMPointerTypes are handled. @@ -1756,19 +1858,6 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); - auto getMapTypes = [](mlir::OperandRange mapOperands, - mlir::MLIRContext *ctx) { - SmallVector mapTypes; - for (auto mapValue : mapOperands) { - if (mapValue.getDefiningOp()) { - auto mapOp = - mlir::dyn_cast(mapValue.getDefiningOp()); - mapTypes.push_back(mapOp.getMapTypeAttr()); - } - } - return mlir::ArrayAttr::get(ctx, mapTypes); - }; - LogicalResult result = llvm::TypeSwitch(op) .Case([&](omp::DataOp dataOp) { @@ -1782,8 +1871,6 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, deviceID = intAttr.getInt(); mapOperands = dataOp.getMapOperands(); - mapTypes = - getMapTypes(dataOp.getMapOperands(), dataOp->getContext()); useDevPtrOperands = dataOp.getUseDevicePtr(); useDevAddrOperands = dataOp.getUseDeviceAddr(); return success(); @@ -1802,8 +1889,6 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, deviceID = intAttr.getInt(); RTLFn = llvm::omp::OMPRTL___tgt_target_data_begin_mapper; mapOperands = enterDataOp.getMapOperands(); - mapTypes = getMapTypes(enterDataOp.getMapOperands(), - enterDataOp->getContext()); return success(); }) .Case([&](omp::ExitDataOp exitDataOp) { @@ -1821,8 +1906,6 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, RTLFn = llvm::omp::OMPRTL___tgt_target_data_end_mapper; mapOperands = exitDataOp.getMapOperands(); - mapTypes = getMapTypes(exitDataOp.getMapOperands(), - exitDataOp->getContext()); return success(); }) .Default([&](Operation *op) { @@ -1835,17 +1918,20 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + MapInfoData mapData; + collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, DL, + builder); + // Fill up the arrays with all the mapped variables. llvm::OpenMPIRBuilder::MapInfosTy combinedInfo; auto genMapInfoCB = [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { builder.restoreIP(codeGenIP); - if (auto DataOp = dyn_cast(op)) { - genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapOperands, - mapTypes, useDevPtrOperands, useDevAddrOperands); + if (auto dataOp = dyn_cast(op)) { + genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData, + useDevPtrOperands, useDevAddrOperands); } else { - genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapOperands, - mapTypes); + genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData); } return combinedInfo; }; @@ -1997,61 +2083,232 @@ static bool targetOpSupported(Operation &opInst) { } static void -handleDeclareTargetMapVar(llvm::ArrayRef mapOperands, +handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder) { - for (const mlir::Value &mapOp : mapOperands) { - auto mapInfoOp = - mlir::dyn_cast(mapOp.getDefiningOp()); - llvm::Value *mapOpValue = - moduleTranslation.lookupValue(mapInfoOp.getVarPtr()); - if (auto *declareTarget = getRefPtrIfDeclareTarget(mapInfoOp.getVarPtr(), - moduleTranslation)) { - // The user's iterator will get invalidated if we modify an element, + for (size_t i = 0; i < mapData.MapClause.size(); ++i) { + // In the case of declare target mapped variables, the basePointer is + // the reference pointer generated by the convertDeclareTargetAttr + // method. Whereas the kernelValue is the original variable, so for + // the device we must replace all uses of this original global variable + // (stored in kernelValue) with the reference pointer (stored in + // basePointer for declare target mapped variables), as for device the + // data is mapped into this reference pointer and should be loaded + // from it, the original variable is discarded. On host both exist and + // metadata is generated (elsewhere in the convertDeclareTargetAttr) + // function to link the two variables in the runtime and then both the + // reference pointer and the pointer are assigned in the kernel argument + // structure for the host. + if (mapData.IsDeclareTarget[i]) { + // The users iterator will get invalidated if we modify an element, // so we populate this vector of uses to alter each user on an individual // basis to emit its own load (rather than one load for all). llvm::SmallVector userVec; - for (llvm::User *user : mapOpValue->users()) + for (llvm::User *user : mapData.OriginalValue[i]->users()) userVec.push_back(user); for (llvm::User *user : userVec) { if (auto *insn = dyn_cast(user)) { - auto *load = builder.CreateLoad( - moduleTranslation.convertType(mapInfoOp.getVarPtr().getType()), - declareTarget); + auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(), + mapData.BasePointers[i]); load->moveBefore(insn); - user->replaceUsesOfWith(mapOpValue, load); + user->replaceUsesOfWith(mapData.OriginalValue[i], load); } } } } } +// The createDeviceArgumentAccessor function generates +// instructions for retrieving (acessing) kernel +// arguments inside of the device kernel for use by +// the kernel. This enables different semantics such as +// the creation of temporary copies of data allowing +// semantics like read-only/no host write back kernel +// arguments. +// +// This currently implements a very light version of Clang's +// EmitParmDecl's handling of direct argument handling as well +// as a portion of the argument access generation based on +// capture types found at the end of emitOutlinedFunctionPrologue +// in Clang. The indirect path handling of EmitParmDecl's may be +// required for future work, but a direct 1-to-1 copy doesn't seem +// possible as the logic is rather scattered throughout Clang's +// lowering and perhaps we wish to deviate slightly. +// +// \param mapData - A container containing vectors of information +// corresponding to the input argument, which should have a +// corresponding entry in the MapInfoData containers +// OrigialValue's. +// \param arg - This is the generated kernel function argument that +// corresponds to the passed in input argument. We generated different +// accesses of this Argument, based on capture type and other Input +// related information. +// \param input - This is the host side value that will be passed to +// the kernel i.e. the kernel input, we rewrite all uses of this within +// the kernel (as we generate the kernel body based on the target's region +// which maintians references to the original input) to the retVal argument +// apon exit of this function inside of the OMPIRBuilder. This interlinks +// the kernel argument to future uses of it in the function providing +// appropriate "glue" instructions inbetween. +// \param retVal - This is the value that all uses of input inside of the +// kernel will be re-written to, the goal of this function is to generate +// an appropriate location for the kernel argument to be accessed from, +// e.g. ByRef will result in a temporary allocation location and then +// a store of the kernel argument into this allocated memory which +// will then be loaded from, ByCopy will use the allocated memory +// directly. static llvm::IRBuilderBase::InsertPoint -createDeviceArgumentAccessor(llvm::Argument &arg, llvm::Value *input, - llvm::Value *&retVal, llvm::IRBuilderBase &builder, +createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, + llvm::Value *input, llvm::Value *&retVal, + llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP) { builder.restoreIP(allocaIP); - llvm::Value *addr = + mlir::omp::VariableCaptureKind capture = + mlir::omp::VariableCaptureKind::ByRef; + llvm::Type *inputType = input->getType(); + + // Find the associated MapInfoData entry for the current input + for (size_t i = 0; i < mapData.MapClause.size(); ++i) + if (mapData.OriginalValue[i] == input) { + if (auto mapOp = mlir::dyn_cast_if_present( + mapData.MapClause[i])) { + capture = mapOp.getMapCaptureType().value_or( + mlir::omp::VariableCaptureKind::ByRef); + } + + inputType = mapData.BaseType[i]; + break; + } + + unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace(); + unsigned int defaultAS = + ompBuilder.M.getDataLayout().getProgramAddressSpace(); + + // Create the alloca for the argument the current point. + llvm::Value *v = builder.CreateAlloca(arg.getType()->isPointerTy() ? arg.getType() : llvm::Type::getInt64Ty(builder.getContext()), ompBuilder.M.getDataLayout().getAllocaAddrSpace()); - llvm::Value *addrAscast = - arg.getType()->isPointerTy() - ? builder.CreatePointerBitCastOrAddrSpaceCast(addr, input->getType()) - : addr; - builder.CreateStore(&arg, addrAscast); + if (allocaAS != defaultAS && arg.getType()->isPointerTy()) { + v = builder.CreatePointerBitCastOrAddrSpaceCast( + v, arg.getType()->getPointerTo(defaultAS)); + } + + builder.CreateStore(&arg, v); + builder.restoreIP(codeGenIP); - retVal = builder.CreateLoad(arg.getType(), addrAscast); + + switch (capture) { + case mlir::omp::VariableCaptureKind::ByCopy: { + // RHS of || aims to ignore conversions like int -> uint, but further + // extension of this path must be implemented for the moment it'll fall + // through to the assert. + if (inputType->isPointerTy() || v->getType() == inputType->getPointerTo()) { + retVal = v; + return builder.saveIP(); + } + + assert(false && "Currently unsupported OMPTargetVarCaptureByCopy Type"); + break; + } + case mlir::omp::VariableCaptureKind::ByRef: { + retVal = builder.CreateAlignedLoad( + v->getType(), v, + ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType())); + break; + } + case mlir::omp::VariableCaptureKind::This: + case mlir::omp::VariableCaptureKind::VLAType: + assert(false && "Currently unsupported capture kind"); + break; + } + return builder.saveIP(); } +// This is a variation on Clang's GenerateOpenMPCapturedVars, which +// generates different operation (e.g. load/store) combinations for +// arguments to the kernel, based on map capture kinds which are then +// utilised in the combinedInfo in place of the original Map value. +static void +createAlteredByCaptureMap(MapInfoData &mapData, + LLVM::ModuleTranslation &moduleTranslation, + llvm::IRBuilderBase &builder) { + for (size_t i = 0; i < mapData.MapClause.size(); ++i) { + // if it's declare target, skip it, it's handled seperately. + if (!mapData.IsDeclareTarget[i]) { + mlir::omp::VariableCaptureKind captureKind = + mlir::omp::VariableCaptureKind::ByRef; + + if (auto mapOp = mlir::dyn_cast_if_present( + mapData.MapClause[i])) { + captureKind = mapOp.getMapCaptureType().value_or( + mlir::omp::VariableCaptureKind::ByRef); + } + + switch (captureKind) { + case mlir::omp::VariableCaptureKind::ByRef: { + // Currently handles array sectioning lowerbound case, but more + // logic may be required in the future. Clang invokes EmitLValue, + // which has specialised logic for special Clang types such as user + // defines, so it is possible we will have to extend this for + // structures or other complex types. As the general idea is that this + // function mimics some of the logic from Clang that we require for + // kernel argument passing from host -> device. + if (auto mapOp = mlir::dyn_cast_if_present( + mapData.MapClause[i])) { + if (!mapOp.getBounds().empty() && mapData.BaseType[i]->isArrayTy()) { + + std::vector idx = + std::vector{builder.getInt64(0)}; + for (int i = mapOp.getBounds().size() - 1; i >= 0; --i) { + if (auto boundOp = + mlir::dyn_cast_if_present( + mapOp.getBounds()[i].getDefiningOp())) { + idx.push_back( + moduleTranslation.lookupValue(boundOp.getLowerBound())); + } + } + + mapData.Pointers[i] = builder.CreateInBoundsGEP( + mapData.BaseType[i], mapData.Pointers[i], idx); + } + } + } break; + case mlir::omp::VariableCaptureKind::ByCopy: { + llvm::Type *type = mapData.BaseType[i]; + llvm::Value *newV = builder.CreateLoad(type, mapData.Pointers[i]); + + if (!type->isPointerTy()) { + auto curInsert = builder.saveIP(); + builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation)); + auto *memTempAlloc = + builder.CreateAlloca(builder.getInt8PtrTy(), nullptr, ".casted"); + builder.restoreIP(curInsert); + + builder.CreateStore(newV, memTempAlloc); + newV = builder.CreateLoad(builder.getInt8PtrTy(), memTempAlloc); + } + + mapData.Pointers[i] = newV; + mapData.BasePointers[i] = newV; + } break; + case mlir::omp::VariableCaptureKind::This: + case mlir::omp::VariableCaptureKind::VLAType: + mapData.MapClause[i]->emitOpError("Unhandled capture kind"); + break; + } + } + } +} + static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { @@ -2062,26 +2319,6 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, auto targetOp = cast(opInst); auto &targetRegion = targetOp.getRegion(); - // This function filters out kernel data that will not show up as kernel - // input arguments to the generated kernel function but will still need - // explicitly mapped through supplying information to the OpenMP runtime - // (declare target). It also prepares some data used for generating the - // kernel and populating the associated OpenMP runtime data structures. - auto getKernelArguments = - [&](const llvm::SetVector &operandSet, - llvm::SmallVectorImpl &llvmInputs) { - for (Value operand : operandSet) { - if (!getRefPtrIfDeclareTarget(operand, moduleTranslation)) - llvmInputs.push_back(moduleTranslation.lookupValue(operand)); - } - }; - - llvm::SetVector operandSet; - getUsedValuesDefinedAbove(targetRegion, operandSet); - - llvm::SmallVector inputs; - getKernelArguments(operandSet, inputs); - LogicalResult bodyGenStatus = success(); using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; @@ -2115,31 +2352,32 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); - DataLayout DL = DataLayout(opInst.getParentOfType()); - SmallVector mapOperands = targetOp.getMapOperands(); - - auto getMapTypes = [](mlir::OperandRange mapOperands, - mlir::MLIRContext *ctx) { - SmallVector mapTypes; - for (auto mapValue : mapOperands) { - if (mapValue.getDefiningOp()) { - auto mapOp = - mlir::dyn_cast(mapValue.getDefiningOp()); - mapTypes.push_back(mapOp.getMapTypeAttr()); - } - } - return mlir::ArrayAttr::get(ctx, mapTypes); - }; - - ArrayAttr mapTypes = - getMapTypes(targetOp.getMapOperands(), targetOp->getContext()); + DataLayout dl = DataLayout(opInst.getParentOfType()); + llvm::SmallVector mapOperands = targetOp.getMapOperands(); + MapInfoData mapData; + collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, dl, + builder); + + // We wish to modify some of the methods in which kernel arguments are + // passed based on their capture type by the target region, this can + // involve generating new loads and stores, which changes the + // MLIR value to LLVM value mapping, however, we only wish to do this + // locally for the current function/target and also avoid altering + // ModuleTranslation, so we remap the base pointer or pointer stored + // in the map infos corresponding MapInfoData, which is later accessed + // by genMapInfos and createTarget to help generate the kernel and + // kernel arg structure. It primarily becomes relevant in cases like + // bycopy, or byref range'd arrays. In the default case, we simply + // pass thee pointer byref as both basePointer and pointer. + if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) + createAlteredByCaptureMap(mapData, moduleTranslation, builder); llvm::OpenMPIRBuilder::MapInfosTy combinedInfos; auto genMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { builder.restoreIP(codeGenIP); - genMapInfos(builder, moduleTranslation, DL, combinedInfos, mapOperands, - mapTypes, {}, {}, true); + genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, {}, {}, + true); return combinedInfos; }; @@ -2158,22 +2396,27 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, return codeGenIP; } - return createDeviceArgumentAccessor(arg, input, retVal, builder, + return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder, *ompBuilder, moduleTranslation, allocaIP, codeGenIP); }; + llvm::SmallVector kernelInput; + for (size_t i = 0; i < mapData.MapClause.size(); ++i) { + // declare target arguments are not passed to kernels as arguments + if (!mapData.IsDeclareTarget[i]) + kernelInput.push_back(mapData.OriginalValue[i]); + } + builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget( ompLoc, allocaIP, builder.saveIP(), entryInfo, defaultValTeams, - defaultValThreads, inputs, genMapInfoCB, bodyCB, argAccessorCB)); + defaultValThreads, kernelInput, genMapInfoCB, bodyCB, argAccessorCB)); // Remap access operations to declare target reference pointers for the // device, essentially generating extra loadop's as necessary - if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) { - SmallVector mapOperands = targetOp.getMapOperands(); - handleDeclareTargetMapVar(llvm::ArrayRef(mapOperands), moduleTranslation, - builder); - } + if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) + handleDeclareTargetMapVar(mapData, moduleTranslation, builder); + return bodyGenStatus; } diff --git a/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir b/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir new file mode 100644 index 0000000000000..056085123480b --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir @@ -0,0 +1,56 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// This test checks the offload sizes provided to the OpenMP kernel argument +// structure are correct when lowering to LLVM-IR from MLIR with 3-D bounds +// provided for a 3-D array. One with full default size, and the other with +// a user specified OpenMP array sectioning. We expect the default sized +// array bounds to lower to the full size of the array and the sectioned +// array to be the size of 3*3*1*element-byte-size (36 bytes in this case). + +module attributes {omp.is_target_device = false} { + llvm.func @_3d_target_array_section() { + %0 = llvm.mlir.addressof @_QFEinarray : !llvm.ptr + %1 = llvm.mlir.addressof @_QFEoutarray : !llvm.ptr + %2 = llvm.mlir.constant(1 : index) : i64 + %3 = llvm.mlir.constant(0 : index) : i64 + %4 = llvm.mlir.constant(2 : index) : i64 + %5 = omp.bounds lower_bound(%3 : i64) upper_bound(%4 : i64) stride(%2 : i64) start_idx(%2 : i64) + %6 = omp.bounds lower_bound(%2 : i64) upper_bound(%2 : i64) stride(%2 : i64) start_idx(%2 : i64) + %7 = omp.map_info var_ptr(%0 : !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>) map_clauses(tofrom) capture(ByRef) bounds(%5, %5, %6) -> !llvm.ptr {name = "inarray(1:3,1:3,2:2)"} + %8 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>) map_clauses(tofrom) capture(ByRef) bounds(%5, %5, %5) -> !llvm.ptr {name = "outarray(1:3,1:3,1:3)"} + omp.target map_entries(%7, %8 : !llvm.ptr, !llvm.ptr) { + %9 = llvm.mlir.constant(0 : i64) : i64 + %10 = llvm.mlir.constant(1 : i64) : i64 + %11 = llvm.getelementptr %0[0, %10, %9, %9] : (!llvm.ptr, i64, i64, i64) -> !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>> + %12 = llvm.load %11 : !llvm.ptr -> i32 + %13 = llvm.getelementptr %1[0, %10, %9, %9] : (!llvm.ptr, i64, i64, i64) -> !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>> + llvm.store %12, %13 : i32, !llvm.ptr + omp.terminator + } + llvm.return + } + llvm.mlir.global internal @_QFEinarray() {addr_space = 0 : i32} : !llvm.array<3 x array<3 x array<3 x i32>>> { + %0 = llvm.mlir.zero : !llvm.array<3 x array<3 x array<3 x i32>>> + llvm.return %0 : !llvm.array<3 x array<3 x array<3 x i32>>> + } + llvm.mlir.global internal @_QFEoutarray() {addr_space = 0 : i32} : !llvm.array<3 x array<3 x array<3 x i32>>> { + %0 = llvm.mlir.zero : !llvm.array<3 x array<3 x array<3 x i32>>> + llvm.return %0 : !llvm.array<3 x array<3 x array<3 x i32>>> + } +} + +// CHECK: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 36, i64 108] +// CHECK: @.offload_maptypes = private unnamed_addr constant [2 x i64] [i64 35, i64 35] +// CHECKL: @.offload_mapnames = private constant [2 x ptr] [ptr @0, ptr @1] + +// CHECK: define void @_3d_target_array_section() + +// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 +// CHECK: store ptr @_QFEinarray, ptr %[[OFFLOADBASEPTRS]], align 8 +// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 0 +// CHECK: store ptr getelementptr inbounds ([3 x [3 x [3 x i32]]], ptr @_QFEinarray, i64 0, i64 1, i64 0, i64 0), ptr %[[OFFLOADPTRS]], align 8 + +// CHECK: %[[OFFLOADBASEPTRS2:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 1 +// CHECK: store ptr @_QFEoutarray, ptr %[[OFFLOADBASEPTRS2]], align 8 +// CHECK: %[[OFFLOADPTRS2:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 1 +// CHECK: store ptr @_QFEoutarray, ptr %[[OFFLOADPTRS2]], align 8 diff --git a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir new file mode 100644 index 0000000000000..cf6b7257ac606 --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir @@ -0,0 +1,41 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +module attributes {omp.is_target_device = true} { + llvm.func @_QQmain() attributes {fir.bindc_name = "main"} { + %0 = llvm.mlir.addressof @_QFEi : !llvm.ptr + %1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr + %2 = omp.map_info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"} + %3 = omp.map_info var_ptr(%0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = "i"} + omp.target map_entries(%2, %3 : !llvm.ptr, !llvm.ptr) { + %4 = llvm.load %0 : !llvm.ptr -> i32 + llvm.store %4, %1 : i32, !llvm.ptr + omp.terminator + } + llvm.return + } + llvm.mlir.global internal @_QFEi() {addr_space = 0 : i32} : i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + llvm.return %0 : i32 + } + llvm.mlir.global internal @_QFEsp() {addr_space = 0 : i32} : i32 { + %0 = llvm.mlir.constant(0 : i32) : i32 + llvm.return %0 : i32 + } +} + +// CHECK: define {{.*}} void @__omp_offloading_{{.*}}_{{.*}}__QQmain_l{{.*}}(ptr %[[ARG_BYREF:.*]], ptr %[[ARG_BYCOPY:.*]]) { + +// CHECK: entry: +// CHECK: %[[ALLOCA_BYREF:.*]] = alloca ptr, align 8 +// CHECK: store ptr %[[ARG_BYREF]], ptr %[[ALLOCA_BYREF]], align 8 +// CHECK: %[[ALLOCA_BYCOPY:.*]] = alloca ptr, align 8 +// CHECK: store ptr %[[ARG_BYCOPY]], ptr %[[ALLOCA_BYCOPY]], align 8 + +// CHECK: user_code.entry: ; preds = %entry +// CHECK: %[[LOAD_BYREF:.*]] = load ptr, ptr %[[ALLOCA_BYREF]], align 8 +// CHECK: br label %omp.target + +// CHECK: omp.target: ; preds = %user_code.entry +// CHECK: %[[VAL_LOAD_BYCOPY:.*]] = load i32, ptr %[[ALLOCA_BYCOPY]], align 4 +// CHECK: store i32 %[[VAL_LOAD_BYCOPY]], ptr %[[LOAD_BYREF]], align 4 +// CHECK: br label %omp.region.cont diff --git a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir new file mode 100644 index 0000000000000..ca5dad8b4fc9a --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir @@ -0,0 +1,42 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +module attributes {omp.is_target_device = false} { + llvm.func @_QQmain() attributes {fir.bindc_name = "main"} { + %0 = llvm.mlir.addressof @_QFEi : !llvm.ptr + %1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr + %2 = omp.map_info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"} + %3 = omp.map_info var_ptr(%0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = "i"} + omp.target map_entries(%2, %3 : !llvm.ptr, !llvm.ptr) { + %4 = llvm.load %0 : !llvm.ptr -> i32 + llvm.store %4, %1 : i32, !llvm.ptr + omp.terminator + } + llvm.return + } + llvm.mlir.global internal @_QFEi() {addr_space = 0 : i32} : i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + llvm.return %0 : i32 + } + llvm.mlir.global internal @_QFEsp() {addr_space = 0 : i32} : i32 { + %0 = llvm.mlir.constant(0 : i32) : i32 + llvm.return %0 : i32 + } +} + +// CHECK: define void @_QQmain() { +// CHECK: %[[BYCOPY_ALLOCA:.*]] = alloca ptr, align 8 + +// CHECK: entry: ; preds = %0 +// CHECK: %[[LOAD_VAL:.*]] = load i32, ptr @_QFEi, align 4 +// CHECK: store i32 %[[LOAD_VAL]], ptr %[[BYCOPY_ALLOCA]], align 4 +// CHECK: %[[BYCOPY_LOAD:.*]] = load ptr, ptr %[[BYCOPY_ALLOCA]], align 8 + +// CHECK: %[[BASEPTR_BYREF:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 +// CHECK: store ptr @_QFEsp, ptr %[[BASEPTR_BYREF]], align 8 +// CHECK: %[[OFFLOADPTR_BYREF:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 0 +// CHECK: store ptr @_QFEsp, ptr %[[OFFLOADPTR_BYREF]], align 8 + +// CHECK: %[[BASEPTR_BYCOPY:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 1 +// CHECK: store ptr %[[BYCOPY_LOAD]], ptr %[[BASEPTR_BYCOPY]], align 8 +// CHECK: %[[OFFLOADPTR_BYREF:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 1 +// CHECK: store ptr %[[BYCOPY_LOAD]], ptr %[[OFFLOADPTR_BYREF]], align 8 diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir index f2431ec87933f..9221b410d766e 100644 --- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir @@ -38,15 +38,20 @@ llvm.func @_QPopenmp_target_data() { // ----- -llvm.func @_QPopenmp_target_data_region(%1 : !llvm.ptr) { - %2 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%2 : !llvm.ptr) { - %3 = llvm.mlir.constant(99 : i32) : i32 - %4 = llvm.mlir.constant(1 : i64) : i64 - %5 = llvm.mlir.constant(1 : i64) : i64 - %6 = llvm.mlir.constant(0 : i64) : i64 - %7 = llvm.getelementptr %1[0, %6] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<1024 x i32> - llvm.store %3, %7 : i32, !llvm.ptr +llvm.func @_QPopenmp_target_data_region(%0 : !llvm.ptr) { + %1 = llvm.mlir.constant(1023 : index) : i64 + %2 = llvm.mlir.constant(0 : index) : i64 + %3 = llvm.mlir.constant(1024 : index) : i64 + %4 = llvm.mlir.constant(1 : index) : i64 + %5 = omp.bounds lower_bound(%2 : i64) upper_bound(%1 : i64) extent(%3 : i64) stride(%4 : i64) start_idx(%4 : i64) + %6 = omp.map_info var_ptr(%0 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) bounds(%5) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%6 : !llvm.ptr) { + %7 = llvm.mlir.constant(99 : i32) : i32 + %8 = llvm.mlir.constant(1 : i64) : i64 + %9 = llvm.mlir.constant(1 : i64) : i64 + %10 = llvm.mlir.constant(0 : i64) : i64 + %11 = llvm.getelementptr %0[0, %10] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<1024 x i32> + llvm.store %7, %11 : i32, !llvm.ptr omp.terminator } llvm.return @@ -92,16 +97,36 @@ llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) { %11 = llvm.mlir.constant(10 : i32) : i32 %12 = llvm.icmp "slt" %10, %11 : i32 %13 = llvm.load %5 : !llvm.ptr -> i32 - %map1 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} - %map2 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !llvm.ptr {name = ""} + %14 = llvm.mlir.constant(1023 : index) : i64 + %15 = llvm.mlir.constant(0 : index) : i64 + %16 = llvm.mlir.constant(1024 : index) : i64 + %17 = llvm.mlir.constant(1 : index) : i64 + %18 = omp.bounds lower_bound(%15 : i64) upper_bound(%14 : i64) extent(%16 : i64) stride(%17 : i64) start_idx(%17 : i64) + %map1 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(to) capture(ByRef) bounds(%18) -> !llvm.ptr {name = ""} + %19 = llvm.mlir.constant(511 : index) : i64 + %20 = llvm.mlir.constant(0 : index) : i64 + %21 = llvm.mlir.constant(512 : index) : i64 + %22 = llvm.mlir.constant(1 : index) : i64 + %23 = omp.bounds lower_bound(%20 : i64) upper_bound(%19 : i64) extent(%21 : i64) stride(%22 : i64) start_idx(%22 : i64) + %map2 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%23) -> !llvm.ptr {name = ""} omp.target_enter_data if(%12 : i1) device(%13 : i32) map_entries(%map1, %map2 : !llvm.ptr, !llvm.ptr) - %14 = llvm.load %7 : !llvm.ptr -> i32 - %15 = llvm.mlir.constant(10 : i32) : i32 - %16 = llvm.icmp "sgt" %14, %15 : i32 - %17 = llvm.load %5 : !llvm.ptr -> i32 - %map3 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} - %map4 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_exit_data if(%16 : i1) device(%17 : i32) map_entries(%map3, %map4 : !llvm.ptr, !llvm.ptr) + %24 = llvm.load %7 : !llvm.ptr -> i32 + %25 = llvm.mlir.constant(10 : i32) : i32 + %26 = llvm.icmp "sgt" %24, %25 : i32 + %27 = llvm.load %5 : !llvm.ptr -> i32 + %28 = llvm.mlir.constant(1023 : index) : i64 + %29 = llvm.mlir.constant(0 : index) : i64 + %30 = llvm.mlir.constant(1024 : index) : i64 + %31 = llvm.mlir.constant(1 : index) : i64 + %32 = omp.bounds lower_bound(%29 : i64) upper_bound(%28 : i64) extent(%30 : i64) stride(%31 : i64) start_idx(%31 : i64) + %map3 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) bounds(%32) -> !llvm.ptr {name = ""} + %33 = llvm.mlir.constant(511 : index) : i64 + %34 = llvm.mlir.constant(0 : index) : i64 + %35 = llvm.mlir.constant(512 : index) : i64 + %36 = llvm.mlir.constant(1 : index) : i64 + %37 = omp.bounds lower_bound(%34 : i64) upper_bound(%33 : i64) extent(%35 : i64) stride(%36 : i64) start_idx(%36 : i64) + %map4 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%37) -> !llvm.ptr {name = ""} + omp.target_exit_data if(%26 : i1) device(%27 : i32) map_entries(%map3, %map4 : !llvm.ptr, !llvm.ptr) llvm.return } diff --git a/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir index 20ad6d30c2f52..1d8799ecd446f 100644 --- a/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir @@ -38,7 +38,7 @@ module attributes {omp.is_target_device = false} { // CHECK: store ptr %[[ADDR_B]], ptr %[[GEP2]], align 8 // CHECK: %[[GEP3:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[STRUCTARG]], i32 0, i32 2 // CHECK: store ptr %[[ADDR_C]], ptr %[[GEP3]], align 8 -// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE]]..omp_par, ptr %[[STRUCTARG]]) +// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @{{.*}}, i32 1, ptr @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE]]..omp_par, ptr %[[STRUCTARG]]) // CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE]]..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %[[STRUCTARG2:.*]]) #0 { diff --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90 new file mode 100644 index 0000000000000..11d3b6936bcea --- /dev/null +++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90 @@ -0,0 +1,27 @@ +! Basic offloading test of arrays with provided lower +! and upper bounds as specified by OpenMP's sectioning +! REQUIRES: flang, amdgcn-amd-amdhsa +! UNSUPPORTED: nvptx64-nvidia-cuda +! UNSUPPORTED: nvptx64-nvidia-cuda-LTO +! UNSUPPORTED: aarch64-unknown-linux-gnu +! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +! UNSUPPORTED: x86_64-pc-linux-gnu +! UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +program main + implicit none + integer :: write_arr(10) = (/0,0,0,0,0,0,0,0,0,0/) + integer :: read_arr(10) = (/1,2,3,4,5,6,7,8,9,10/) + integer :: i = 2 + + !$omp target map(to:read_arr(2:5)) map(from:write_arr(2:5)) map(tofrom:i) + do i = 2, 5 + write_arr(i) = read_arr(i) + end do + !$omp end target + + print *, write_arr(:) +end program + +! CHECK: 0 2 3 4 5 0 0 0 0 0 diff --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90 new file mode 100644 index 0000000000000..28b2afced4d1b --- /dev/null +++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90 @@ -0,0 +1,39 @@ +! Basic offloading test of a regular array explicitly +! passed within a target region +! REQUIRES: flang, amdgcn-amd-amdhsa +! UNSUPPORTED: nvptx64-nvidia-cuda +! UNSUPPORTED: nvptx64-nvidia-cuda-LTO +! UNSUPPORTED: aarch64-unknown-linux-gnu +! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +! UNSUPPORTED: x86_64-pc-linux-gnu +! UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +program main + implicit none + integer :: inArray(3,3,3) + integer :: outArray(3,3,3) + integer :: i, j, k + + do i = 1, 3 + do j = 1, 3 + do k = 1, 3 + inArray(i, j, k) = 42 + outArray(i, j, k) = 0 + end do + end do + end do + +!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3), j, k) + do j = 1, 3 + do k = 1, 3 + outArray(k, j, 2) = inArray(k, j, 2) + end do + end do +!$omp end target + + print *, outArray + +end program + +! CHECK: 0 0 0 0 0 0 0 0 0 42 42 42 42 42 42 42 42 42 0 0 0 0 0 0 0 0 0 diff --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90 new file mode 100644 index 0000000000000..58f42138ad0af --- /dev/null +++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90 @@ -0,0 +1,45 @@ +! Basic offloading test of a regular array explicitly +! passed within a target region +! REQUIRES: flang, amdgcn-amd-amdhsa +! UNSUPPORTED: nvptx64-nvidia-cuda +! UNSUPPORTED: nvptx64-nvidia-cuda-LTO +! UNSUPPORTED: aarch64-unknown-linux-gnu +! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +! UNSUPPORTED: x86_64-pc-linux-gnu +! UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +program main + implicit none + integer :: x(2,2,2) + integer :: i = 1, j = 1, k = 1 + integer :: counter = 1 + do i = 1, 2 + do j = 1, 2 + do k = 1, 2 + x(i, j, k) = 0 + end do + end do + end do + +!$omp target map(tofrom:x, i, j, k, counter) + do i = 1, 2 + do j = 1, 2 + do k = 1, 2 + x(i, j, k) = counter + counter = counter + 1 + end do + end do + end do +!$omp end target + + do i = 1, 2 + do j = 1, 2 + do k = 1, 2 + print *, x(i, j, k) + end do + end do + end do +end program main + +! CHECK: 1 2 3 4 5 6 7 8 diff --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-array.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-array.f90 new file mode 100644 index 0000000000000..d3c799ff3334f --- /dev/null +++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-array.f90 @@ -0,0 +1,27 @@ +! Basic offloading test of a regular array explicitly +! passed within a target region +! REQUIRES: flang, amdgcn-amd-amdhsa +! UNSUPPORTED: nvptx64-nvidia-cuda +! UNSUPPORTED: nvptx64-nvidia-cuda-LTO +! UNSUPPORTED: aarch64-unknown-linux-gnu +! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +! UNSUPPORTED: x86_64-pc-linux-gnu +! UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +program main + integer :: x(10) = (/0,0,0,0,0,0,0,0,0,0/) + integer :: i = 1 + integer :: j = 11 + + !$omp target map(tofrom:x, i, j) + do while (i <= j) + x(i) = i; + i = i + 1 + end do + !$omp end target + + PRINT *, x(:) +end program main + +! CHECK: 1 2 3 4 5 6 7 8 9 10