Skip to content

Commit 046d6a3

Browse files
authored
[Flang][OpenMP] Additional global address space modifications for device (#119585)
A prior PR added a portion of the global address space modifications required for declare target to, this PR seeks to add a small amount more leftover from that PR. The intent is to allow for more correct IR that the backends (in particular AMDGPU) can treat more aptly for optimisations and code correctness 1/3 required PRs to enable declare target to mapping, should look at PR 3/3 to check for full green passes (this one will fail a number due to some dependencies). Co-authored-by: Raghu Maddhipatla raghu.maddhipatla@amd.com
1 parent cbd99c5 commit 046d6a3

File tree

4 files changed

+69
-18
lines changed

4 files changed

+69
-18
lines changed

flang/lib/Optimizer/CodeGen/CodeGen.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ addLLVMOpBundleAttrs(mlir::ConversionPatternRewriter &rewriter,
132132

133133
namespace {
134134

135+
// Replaces an existing operation with an AddressOfOp or an AddrSpaceCastOp
136+
// depending on the existing address spaces of the type.
135137
mlir::Value replaceWithAddrOfOrASCast(mlir::ConversionPatternRewriter &rewriter,
136138
mlir::Location loc,
137139
std::uint64_t globalAS,

flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,10 @@ unsigned ConvertFIRToLLVMPattern::getAllocaAddressSpace(
349349
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
350350
assert(parentOp != nullptr &&
351351
"expected insertion block to have parent operation");
352-
if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
352+
auto module = mlir::isa<mlir::ModuleOp>(parentOp)
353+
? mlir::cast<mlir::ModuleOp>(parentOp)
354+
: parentOp->getParentOfType<mlir::ModuleOp>();
355+
if (module)
353356
if (mlir::Attribute addrSpace =
354357
mlir::DataLayout(module).getAllocaMemorySpace())
355358
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
@@ -361,7 +364,10 @@ unsigned ConvertFIRToLLVMPattern::getProgramAddressSpace(
361364
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
362365
assert(parentOp != nullptr &&
363366
"expected insertion block to have parent operation");
364-
if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
367+
auto module = mlir::isa<mlir::ModuleOp>(parentOp)
368+
? mlir::cast<mlir::ModuleOp>(parentOp)
369+
: parentOp->getParentOfType<mlir::ModuleOp>();
370+
if (module)
365371
if (mlir::Attribute addrSpace =
366372
mlir::DataLayout(module).getProgramMemorySpace())
367373
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
@@ -373,8 +379,14 @@ unsigned ConvertFIRToLLVMPattern::getGlobalAddressSpace(
373379
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
374380
assert(parentOp != nullptr &&
375381
"expected insertion block to have parent operation");
376-
auto dataLayout = mlir::DataLayout::closest(parentOp);
377-
return fir::factory::getGlobalAddressSpace(&dataLayout);
382+
auto module = mlir::isa<mlir::ModuleOp>(parentOp)
383+
? mlir::cast<mlir::ModuleOp>(parentOp)
384+
: parentOp->getParentOfType<mlir::ModuleOp>();
385+
if (module)
386+
if (mlir::Attribute addrSpace =
387+
mlir::DataLayout(module).getGlobalMemorySpace())
388+
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
389+
return defaultAddressSpace;
378390
}
379391

380392
} // namespace fir

flang/test/Fir/convert-to-llvm.fir

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
44
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
55
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-pc-win32" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
6-
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC
7-
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-P0" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s
6+
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC
7+
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s
88

99
//===================================================
1010
// SUMMARY: Tests for FIR --> LLVM MLIR conversion
@@ -17,7 +17,10 @@ fir.global @g_i0 : i32 {
1717
fir.has_value %1 : i32
1818
}
1919

20-
// CHECK: llvm.mlir.global external @g_i0() {addr_space = 0 : i32} : i32 {
20+
// CHECK: llvm.mlir.global external @g_i0()
21+
// GENERIC-SAME: {addr_space = 0 : i32}
22+
// AMDGPU-SAME: {addr_space = 1 : i32}
23+
// CHECK-SAME: i32 {
2124
// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32
2225
// CHECK: llvm.return %[[C0]] : i32
2326
// CHECK: }
@@ -29,25 +32,37 @@ fir.global @g_ci5 constant : i32 {
2932
fir.has_value %c : i32
3033
}
3134

32-
// CHECK: llvm.mlir.global external constant @g_ci5() {addr_space = 0 : i32} : i32 {
35+
// CHECK: llvm.mlir.global external constant @g_ci5()
36+
// GENERIC-SAME: {addr_space = 0 : i32}
37+
// AMDGPU-SAME: {addr_space = 1 : i32}
38+
// CHECK-SAME: i32 {
3339
// CHECK: %[[C5:.*]] = llvm.mlir.constant(5 : i32) : i32
3440
// CHECK: llvm.return %[[C5]] : i32
3541
// CHECK: }
3642

3743
// -----
3844

3945
fir.global internal @i_i515 (515:i32) : i32
40-
// CHECK: llvm.mlir.global internal @i_i515(515 : i32) {addr_space = 0 : i32} : i32
46+
// CHECK: llvm.mlir.global internal @i_i515(515 : i32)
47+
// GENERIC-SAME: {addr_space = 0 : i32}
48+
// AMDGPU-SAME: {addr_space = 1 : i32}
49+
// CHECK-SAME: : i32
4150

4251
// -----
4352

4453
fir.global common @C_i511 (0:i32) : i32
45-
// CHECK: llvm.mlir.global common @C_i511(0 : i32) {addr_space = 0 : i32} : i32
54+
// CHECK: llvm.mlir.global common @C_i511(0 : i32)
55+
// GENERIC-SAME: {addr_space = 0 : i32}
56+
// AMDGPU-SAME: {addr_space = 1 : i32}
57+
// CHECK-SAME: : i32
4658

4759
// -----
4860

4961
fir.global weak @w_i86 (86:i32) : i32
50-
// CHECK: llvm.mlir.global weak @w_i86(86 : i32) {addr_space = 0 : i32} : i32
62+
// CHECK: llvm.mlir.global weak @w_i86(86 : i32)
63+
// GENERIC-SAME: {addr_space = 0 : i32}
64+
// AMDGPU-SAME: {addr_space = 1 : i32}
65+
// CHECK-SAME: : i32
5166

5267
// -----
5368

@@ -69,9 +84,13 @@ fir.global @symbol : i64 {
6984
fir.has_value %0 : i64
7085
}
7186

72-
// CHECK: %{{.*}} = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr
87+
// CHECK: %[[ADDROF:.*]] = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr
88+
// AMDGPU: %{{.*}} = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr
7389

74-
// CHECK: llvm.mlir.global external @[[SYMBOL]]() {addr_space = 0 : i32} : i64 {
90+
// CHECK: llvm.mlir.global external @[[SYMBOL]]()
91+
// GENERIC-SAME: {addr_space = 0 : i32}
92+
// AMDGPU-SAME: {addr_space = 1 : i32}
93+
// CHECK-SAME: i64 {
7594
// CHECK: %{{.*}} = llvm.mlir.constant(1 : i64) : i64
7695
// CHECK: llvm.return %{{.*}} : i64
7796
// CHECK: }
@@ -88,7 +107,10 @@ fir.global internal @_QEmultiarray : !fir.array<32x32xi32> {
88107
fir.has_value %2 : !fir.array<32x32xi32>
89108
}
90109

91-
// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x array<32 x i32>> {
110+
// CHECK: llvm.mlir.global internal @_QEmultiarray()
111+
// GENERIC-SAME: {addr_space = 0 : i32}
112+
// AMDGPU-SAME: {addr_space = 1 : i32}
113+
// CHECK-SAME: : !llvm.array<32 x array<32 x i32>> {
92114
// CHECK: %[[CST:.*]] = llvm.mlir.constant(dense<1> : vector<32x32xi32>) : !llvm.array<32 x array<32 x i32>>
93115
// CHECK: llvm.return %[[CST]] : !llvm.array<32 x array<32 x i32>>
94116
// CHECK: }
@@ -105,7 +127,10 @@ fir.global internal @_QEmultiarray : !fir.array<32xi32> {
105127
fir.has_value %2 : !fir.array<32xi32>
106128
}
107129

108-
// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x i32> {
130+
// CHECK: llvm.mlir.global internal @_QEmultiarray()
131+
// GENERIC-SAME: {addr_space = 0 : i32}
132+
// AMDGPU-SAME: {addr_space = 1 : i32}
133+
// CHECK-SAME: : !llvm.array<32 x i32> {
109134
// CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : i32) : i32
110135
// CHECK: %{{.*}} = llvm.mlir.undef : !llvm.array<32 x i32>
111136
// CHECK: %{{.*}} = llvm.insertvalue %[[CST]], %{{.*}}[5] : !llvm.array<32 x i32>
@@ -1801,7 +1826,9 @@ func.func @embox1(%arg0: !fir.ref<!fir.type<_QMtest_dinitTtseq{i:i32}>>) {
18011826
// CHECK: %{{.*}} = llvm.insertvalue %[[VERSION]], %{{.*}}[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
18021827
// CHECK: %[[TYPE_CODE_I8:.*]] = llvm.trunc %[[TYPE_CODE]] : i32 to i8
18031828
// CHECK: %{{.*}} = llvm.insertvalue %[[TYPE_CODE_I8]], %{{.*}}[4] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
1804-
// CHECK: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr
1829+
// GENERIC: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr
1830+
// AMDGPU: %[[ADDROF:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr<1>
1831+
// AMDGPU: %[[TDESC:.*]] = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr
18051832
// CHECK: %{{.*}} = llvm.insertvalue %[[TDESC]], %{{.*}}[7] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
18061833

18071834
// -----
@@ -2824,7 +2851,10 @@ func.func @coordinate_array_unknown_size_1d(%arg0: !fir.ptr<!fir.array<? x i32>>
28242851

28252852
fir.global common @c_(dense<0> : vector<4294967296xi8>) : !fir.array<4294967296xi8>
28262853

2827-
// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>) {addr_space = 0 : i32} : !llvm.array<4294967296 x i8>
2854+
// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>)
2855+
// GENERIC-SAME: {addr_space = 0 : i32}
2856+
// AMDGPU-SAME: {addr_space = 1 : i32}
2857+
// CHECK-SAME: !llvm.array<4294967296 x i8>
28282858

28292859
// -----
28302860

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7412,6 +7412,12 @@ static void FixupDebugInfoForOutlinedFunction(
74127412
}
74137413
}
74147414

7415+
static Value *removeASCastIfPresent(Value *V) {
7416+
if (Operator::getOpcode(V) == Instruction::AddrSpaceCast)
7417+
return cast<Operator>(V)->getOperand(0);
7418+
return V;
7419+
}
7420+
74157421
static Expected<Function *> createOutlinedFunction(
74167422
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
74177423
const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
@@ -7575,7 +7581,8 @@ static Expected<Function *> createOutlinedFunction(
75757581
// preceding mapped arguments that refer to the same global that may be
75767582
// seperate segments. To prevent this, we defer global processing until all
75777583
// other processing has been performed.
7578-
if (isa<GlobalValue>(Input)) {
7584+
if (llvm::isa<llvm::GlobalValue, llvm::GlobalObject, llvm::GlobalVariable>(
7585+
removeASCastIfPresent(Input))) {
75797586
DeferredReplacement.push_back(std::make_pair(Input, InputCopy));
75807587
continue;
75817588
}

0 commit comments

Comments
 (0)