Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions flang/include/flang/Optimizer/Builder/IntrinsicCall.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ struct IntrinsicLibrary {
fir::ExtendedValue genAny(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtanpi(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAtomicAddR2(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAtomicCas(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
Expand Down
43 changes: 42 additions & 1 deletion flang/lib/Optimizer/Builder/IntrinsicCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,10 @@ static constexpr IntrinsicHandler handlers[]{
{"atomicaddf", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicaddi", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicaddl", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicaddr2",
&I::genAtomicAddR2,
{{{"a", asAddr}, {"v", asAddr}}},
false},
{"atomicandi", &I::genAtomicAnd, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomiccasd",
&I::genAtomicCas,
Expand Down Expand Up @@ -3119,14 +3123,51 @@ static mlir::Value genAtomBinOp(fir::FirOpBuilder &builder, mlir::Location &loc,
mlir::Value IntrinsicLibrary::genAtomicAdd(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
assert(args.size() == 2);

mlir::LLVM::AtomicBinOp binOp =
mlir::isa<mlir::IntegerType>(args[1].getType())
? mlir::LLVM::AtomicBinOp::add
: mlir::LLVM::AtomicBinOp::fadd;
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
}

fir::ExtendedValue
IntrinsicLibrary::genAtomicAddR2(mlir::Type resultType,
llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 2);

mlir::Value a = fir::getBase(args[0]);

if (mlir::isa<fir::BaseBoxType>(a.getType())) {
a = fir::BoxAddrOp::create(builder, loc, a);
}

auto loc = builder.getUnknownLoc();
auto f16Ty = builder.getF16Type();
auto i32Ty = builder.getI32Type();
auto vecF16Ty = mlir::VectorType::get({2}, f16Ty);
mlir::Type idxTy = builder.getIndexType();
auto f16RefTy = fir::ReferenceType::get(f16Ty);
auto zero = builder.createIntegerConstant(loc, idxTy, 0);
auto one = builder.createIntegerConstant(loc, idxTy, 1);
auto v1Coord = fir::CoordinateOp::create(builder, loc, f16RefTy,
fir::getBase(args[1]), zero);
auto v2Coord = fir::CoordinateOp::create(builder, loc, f16RefTy,
fir::getBase(args[1]), one);
auto v1 = fir::LoadOp::create(builder, loc, v1Coord);
auto v2 = fir::LoadOp::create(builder, loc, v2Coord);
mlir::Value undef = mlir::LLVM::UndefOp::create(builder, loc, vecF16Ty);
mlir::Value vec1 = mlir::LLVM::InsertElementOp::create(
builder, loc, undef, v1, builder.createIntegerConstant(loc, i32Ty, 0));
mlir::Value vec2 = mlir::LLVM::InsertElementOp::create(
builder, loc, vec1, v2, builder.createIntegerConstant(loc, i32Ty, 1));
auto res = genAtomBinOp(builder, loc, mlir::LLVM::AtomicBinOp::fadd, a, vec2);
auto i32VecTy = mlir::VectorType::get({1}, i32Ty);
mlir::Value vecI32 =
mlir::vector::BitCastOp::create(builder, loc, i32VecTy, res);
return mlir::vector::ExtractOp::create(builder, loc, vecI32,
mlir::ArrayRef<int64_t>{0});
}

mlir::Value IntrinsicLibrary::genAtomicSub(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
assert(args.size() == 2);
Expand Down
5 changes: 5 additions & 0 deletions flang/module/cudadevice.f90
Original file line number Diff line number Diff line change
Expand Up @@ -1171,6 +1171,11 @@ attributes(device) pure integer(8) function atomicaddl(address, val)
integer(8), intent(inout) :: address
integer(8), value :: val
end function
attributes(device) pure integer(4) function atomicaddr2(address, val)
!dir$ ignore_tkr (rd) address, (d) val
real(2), dimension(2), intent(inout) :: address
real(2), dimension(2), intent(in) :: val
end function
end interface

interface atomicsub
Expand Down
4 changes: 4 additions & 0 deletions flang/test/Lower/CUDA/cuda-device-proc.cuf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ attributes(global) subroutine devsub()
integer :: smalltime
integer(4) :: res, offset
integer(8) :: resl
real(2) :: r2a(2)
real(2) :: tmp2(2)

integer :: tid
tid = threadIdx%x
Expand All @@ -34,6 +36,7 @@ attributes(global) subroutine devsub()
al = atomicadd(al, 1_8)
af = atomicadd(af, 1.0_4)
ad = atomicadd(ad, 1.0_8)
ai = atomicadd(r2a, tmp2)

ai = atomicsub(ai, 1_4)
al = atomicsub(al, 1_8)
Expand Down Expand Up @@ -128,6 +131,7 @@ end
! CHECK: %{{.*}} = llvm.atomicrmw add %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64
! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32
! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64
! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, vector<2xf16>

! CHECK: %{{.*}} = llvm.atomicrmw sub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
! CHECK: %{{.*}} = llvm.atomicrmw sub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64
Expand Down
Loading