Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
//==-- Builder/CUDAIntrinsicCall.h - lowering of CUDA intrinsics ---*-C++-*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef FORTRAN_LOWER_CUDAINTRINSICCALL_H
#define FORTRAN_LOWER_CUDAINTRINSICCALL_H

#include "flang/Optimizer/Builder/IntrinsicCall.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"

namespace fir {

struct CUDAIntrinsicLibrary : IntrinsicLibrary {

// Constructors.
explicit CUDAIntrinsicLibrary(fir::FirOpBuilder &builder, mlir::Location loc)
: IntrinsicLibrary(builder, loc) {}
CUDAIntrinsicLibrary() = delete;
CUDAIntrinsicLibrary(const CUDAIntrinsicLibrary &) = delete;

// CUDA intrinsic handlers.
mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAtomicAddR2(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
template <int extent>
fir::ExtendedValue genAtomicAddVector(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAtomicCas(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtomicDec(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAtomicExch(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtomicInc(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicMax(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicMin(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicSub(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAtomicXor(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genBarrierArrive(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genBarrierArriveCnt(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genBarrierInit(llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genBarrierTryWait(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genBarrierTryWaitSleep(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genFenceProxyAsync(llvm::ArrayRef<fir::ExtendedValue>);
template <const char *fctName, int extent>
fir::ExtendedValue genLDXXFunc(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genMatchAllSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genMatchAnySync(mlir::Type, llvm::ArrayRef<mlir::Value>);
template <typename OpTy>
mlir::Value genNVVMTime(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genSyncThreads(llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genSyncThreadsAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genSyncThreadsCount(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genSyncThreadsOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genSyncWarp(llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genThisGrid(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genThisThreadBlock(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genThisWarp(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genThreadFence(llvm::ArrayRef<fir::ExtendedValue>);
void genThreadFenceBlock(llvm::ArrayRef<fir::ExtendedValue>);
void genThreadFenceSystem(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkCommitGroup(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkLoadC4(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkLoadC8(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkLoadI4(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkLoadI8(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkLoadR2(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkLoadR4(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkLoadR8(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkStoreC4(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkStoreC8(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkStoreI4(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkStoreI8(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkStoreR2(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkStoreR4(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkStoreR8(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkWaitGroup(llvm::ArrayRef<fir::ExtendedValue>);
template <mlir::NVVM::VoteSyncKind kind>
mlir::Value genVoteSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
};

const IntrinsicHandler *findCUDAIntrinsicHandler(llvm::StringRef name);

} // namespace fir

#endif // FORTRAN_LOWER_CUDAINTRINSICCALL_H
64 changes: 0 additions & 64 deletions flang/include/flang/Optimizer/Builder/IntrinsicCall.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include "flang/Runtime/iostat-consts.h"
#include "mlir/Dialect/Complex/IR/Complex.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/Math/IR/Math.h"
#include <optional>

Expand Down Expand Up @@ -187,37 +186,13 @@ struct IntrinsicLibrary {
mlir::Value genAnint(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAny(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtanpi(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAtomicAddR2(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
template <int extent>
fir::ExtendedValue genAtomicAddVector(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAtomicCas(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtomicDec(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAtomicExch(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtomicInc(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicMax(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicMin(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicSub(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAtomicXor(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue
genCommandArgumentCount(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAsind(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAsinpi(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAssociated(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtand(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genBarrierArrive(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genBarrierArriveCnt(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genBarrierInit(llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genBarrierTryWait(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genBarrierTryWaitSleep(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genBesselJn(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genBesselYn(mlir::Type,
Expand All @@ -239,9 +214,6 @@ struct IntrinsicLibrary {
fir::ExtendedValue genCount(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
void genCpuTime(llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genCshift(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
template <const char *fctName, int extent>
fir::ExtendedValue genCUDALDXXFunc(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genCAssociatedCFunPtr(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genCAssociatedCPtr(mlir::Type,
Expand Down Expand Up @@ -281,7 +253,6 @@ struct IntrinsicLibrary {
llvm::ArrayRef<fir::ExtendedValue>);
template <Extremum, ExtremumBehavior>
mlir::Value genExtremum(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genFenceProxyAsync(llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genFloor(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genFraction(mlir::Type resultType,
mlir::ArrayRef<mlir::Value> args);
Expand Down Expand Up @@ -373,8 +344,6 @@ struct IntrinsicLibrary {
mlir::Value genMalloc(mlir::Type, llvm::ArrayRef<mlir::Value>);
template <typename Shift>
mlir::Value genMask(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genMatchAllSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genMatchAnySync(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genMatmul(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genMatmulTranspose(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
Expand All @@ -397,8 +366,6 @@ struct IntrinsicLibrary {
fir::ExtendedValue genNull(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genNumImages(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
template <typename OpTy>
mlir::Value genNVVMTime(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genPack(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genParity(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
void genPerror(llvm::ArrayRef<fir::ExtendedValue>);
Expand Down Expand Up @@ -453,56 +420,25 @@ struct IntrinsicLibrary {
fir::ExtendedValue genSum(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
void genSignalSubroutine(llvm::ArrayRef<fir::ExtendedValue>);
void genSleep(llvm::ArrayRef<fir::ExtendedValue>);
void genSyncThreads(llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genSyncThreadsAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genSyncThreadsCount(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genSyncThreadsOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genSyncWarp(llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genSystem(std::optional<mlir::Type>,
mlir::ArrayRef<fir::ExtendedValue> args);
void genSystemClock(llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genTand(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genTanpi(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genTime(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genTMABulkCommitGroup(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkLoadC4(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkLoadC8(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkLoadI4(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkLoadI8(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkLoadR2(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkLoadR4(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkLoadR8(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkStoreI4(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkStoreI8(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkStoreR2(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkStoreR4(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkStoreR8(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkStoreC4(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkStoreC8(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkWaitGroup(llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genTrailz(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genTransfer(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genTranspose(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genThisGrid(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genThisImage(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genThisThreadBlock(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genThisWarp(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genThreadFence(llvm::ArrayRef<fir::ExtendedValue>);
void genThreadFenceBlock(llvm::ArrayRef<fir::ExtendedValue>);
void genThreadFenceSystem(llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genTrim(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genUbound(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genUnlink(std::optional<mlir::Type> resultType,
llvm::ArrayRef<fir::ExtendedValue> args);
fir::ExtendedValue genUnpack(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genVerify(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
template <mlir::NVVM::VoteSyncKind kind>
mlir::Value genVoteSync(mlir::Type, llvm::ArrayRef<mlir::Value>);

/// Implement all conversion functions like DBLE, the first argument is
/// the value to convert. There may be an additional KIND arguments that
Expand Down
1 change: 1 addition & 0 deletions flang/lib/Optimizer/Builder/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ add_flang_library(FIRBuilder
BoxValue.cpp
Character.cpp
Complex.cpp
CUDAIntrinsicCall.cpp
CUFCommon.cpp
DoLoopHelper.cpp
FIRBuilder.cpp
Expand Down
Loading