diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu b/clang/test/CodeGenCUDA/offloading-entries.cu index c4f8d2edad0a9..46235051f1e4f 100644 --- a/clang/test/CodeGenCUDA/offloading-entries.cu +++ b/clang/test/CodeGenCUDA/offloading-entries.cu @@ -5,6 +5,12 @@ // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \ // RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \ // RUN: --check-prefix=HIP %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-windows-gnu -fgpu-rdc \ +// RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \ +// RUN: --check-prefix=CUDA-COFF %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-windows-gnu -fgpu-rdc \ +// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: --check-prefix=HIP-COFF %s #include "Inputs/cuda.h" @@ -23,6 +29,20 @@ // HIP: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [2 x i8] c"x\00" // HIP: @.omp_offloading.entry.x = weak constant %struct.__tgt_offload_entry { ptr @x, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries", align 1 //. +// CUDA-COFF: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00" +// CUDA-COFF: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1 +// CUDA-COFF: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [8 x i8] c"_Z3barv\00" +// CUDA-COFF: @.omp_offloading.entry._Z3barv = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__barv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1 +// CUDA-COFF: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [2 x i8] c"x\00" +// CUDA-COFF: @.omp_offloading.entry.x = weak constant %struct.__tgt_offload_entry { ptr @x, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1 +//. +// HIP-COFF: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00" +// HIP-COFF: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1 +// HIP-COFF: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [8 x i8] c"_Z3barv\00" +// HIP-COFF: @.omp_offloading.entry._Z3barv = weak constant %struct.__tgt_offload_entry { ptr @_Z3barv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1 +// HIP-COFF: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [2 x i8] c"x\00" +// HIP-COFF: @.omp_offloading.entry.x = weak constant %struct.__tgt_offload_entry { ptr @x, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1 +//. // CUDA-LABEL: @_Z18__device_stub__foov( // CUDA-NEXT: entry: // CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov) @@ -37,6 +57,20 @@ // HIP: setup.end: // HIP-NEXT: ret void // +// CUDA-COFF-LABEL: @_Z18__device_stub__foov( +// CUDA-COFF-NEXT: entry: +// CUDA-COFF-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov) +// CUDA-COFF-NEXT: br label [[SETUP_END:%.*]] +// CUDA-COFF: setup.end: +// CUDA-COFF-NEXT: ret void +// +// HIP-COFF-LABEL: @_Z18__device_stub__foov( +// HIP-COFF-NEXT: entry: +// HIP-COFF-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3foov) +// HIP-COFF-NEXT: br label [[SETUP_END:%.*]] +// HIP-COFF: setup.end: +// HIP-COFF-NEXT: ret void +// __global__ void foo() {} // CUDA-LABEL: @_Z18__device_stub__barv( @@ -53,5 +87,19 @@ __global__ void foo() {} // HIP: setup.end: // HIP-NEXT: ret void // +// CUDA-COFF-LABEL: @_Z18__device_stub__barv( +// CUDA-COFF-NEXT: entry: +// CUDA-COFF-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__barv) +// CUDA-COFF-NEXT: br label [[SETUP_END:%.*]] +// CUDA-COFF: setup.end: +// CUDA-COFF-NEXT: ret void +// +// HIP-COFF-LABEL: @_Z18__device_stub__barv( +// HIP-COFF-NEXT: entry: +// HIP-COFF-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3barv) +// HIP-COFF-NEXT: br label [[SETUP_END:%.*]] +// HIP-COFF: setup.end: +// HIP-COFF-NEXT: ret void +// __global__ void bar() {} __device__ int x = 1; diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c index bb641a08bc023..73d3c40810c35 100644 --- a/clang/test/Driver/linker-wrapper-image.c +++ b/clang/test/Driver/linker-wrapper-image.c @@ -8,12 +8,18 @@ // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=OPENMP +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-ELF +// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \ +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-COFF -// OPENMP: @__start_omp_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] -// OPENMP-NEXT: @__stop_omp_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] -// OPENMP-NEXT: @__dummy.omp_offloading_entries = hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries" -// OPENMP-NEXT: @.omp_offloading.device_image = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"\10\FF\10\AD{{.*}}" +// OPENMP-ELF: @__start_omp_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] +// OPENMP-ELF-NEXT: @__stop_omp_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] +// OPENMP-ELF-NEXT: @__dummy.omp_offloading_entries = hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries" + +// OPENMP-COFF: @__start_omp_offloading_entries = hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries$OA" +// OPENMP-COFF-NEXT: @__stop_omp_offloading_entries = hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries$OZ" + +// OPENMP: @.omp_offloading.device_image = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"\10\FF\10\AD{{.*}}" // OPENMP-NEXT: @.omp_offloading.device_images = internal unnamed_addr constant [1 x %__tgt_device_image] [%__tgt_device_image { ptr @.omp_offloading.device_image, ptr getelementptr inbounds ([[[SIZE]] x i8], ptr @.omp_offloading.device_image, i64 1, i64 0), ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries }] // OPENMP-NEXT: @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 1, ptr @.omp_offloading.device_images, ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries } // OPENMP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.omp_offloading.descriptor_reg, ptr null }] @@ -35,15 +41,22 @@ // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF +// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \ +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-COFF // CUDA: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".nv_fatbin" // CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8 // CUDA-NEXT: @.cuda.binary_handle = internal global ptr null -// CUDA-NEXT: @__start_cuda_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] -// CUDA-NEXT: @__stop_cuda_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] -// CUDA-NEXT: @__dummy.cuda_offloading_entries = hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "cuda_offloading_entries" -// CUDA-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.cuda.fatbin_reg, ptr null }] + +// CUDA-ELF: @__start_cuda_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] +// CUDA-ELF-NEXT: @__stop_cuda_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] +// CUDA-ELF-NEXT: @__dummy.cuda_offloading_entries = hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "cuda_offloading_entries" + +// CUDA-COFF: @__start_cuda_offloading_entries = hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "cuda_offloading_entries$OA" +// CUDA-COFF-NEXT: @__stop_cuda_offloading_entries = hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "cuda_offloading_entries$OZ" + +// CUDA: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.cuda.fatbin_reg, ptr null }] // CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" { // CUDA-NEXT: entry: @@ -117,15 +130,22 @@ // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF +// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \ +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-COFF // HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin" // HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8 // HIP-NEXT: @.hip.binary_handle = internal global ptr null -// HIP-NEXT: @__start_hip_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] -// HIP-NEXT: @__stop_hip_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] -// HIP-NEXT: @__dummy.hip_offloading_entries = hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries" -// HIP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.hip.fatbin_reg, ptr null }] + +// HIP-ELF: @__start_hip_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] +// HIP-ELF-NEXT: @__stop_hip_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] +// HIP-ELF-NEXT: @__dummy.hip_offloading_entries = hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries" + +// HIP-COFF: @__start_hip_offloading_entries = hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries$OA" +// HIP-COFF-NEXT: @__stop_hip_offloading_entries = hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries$OZ" + +// HIP: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.hip.fatbin_reg, ptr null }] // HIP: define internal void @.hip.fatbin_reg() section ".text.startup" { // HIP-NEXT: entry: diff --git a/clang/test/OpenMP/declare_target_link_codegen.cpp b/clang/test/OpenMP/declare_target_link_codegen.cpp index 12fc92183ea9a..2372b2738b5be 100644 --- a/clang/test/OpenMP/declare_target_link_codegen.cpp +++ b/clang/test/OpenMP/declare_target_link_codegen.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix HOST --check-prefix CHECK +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-windows-gnu -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix HOST-COFF --check-prefix CHECK // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix DEVICE --check-prefix CHECK // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t @@ -27,6 +28,7 @@ // HOST: [[MAPTYPES:@.+]] = private unnamed_addr constant [3 x i64] [i64 35, i64 531, i64 531] // HOST: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [{{[0-9]+}} x i8] c"c_decl_tgt_ref_ptr\00" // HOST: @.omp_offloading.entry.c_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @c_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 +// HOST-COFF: @.omp_offloading.entry.{{.*}} = weak constant %struct.__tgt_offload_entry { ptr @.{{.*}}, ptr @.{{.*}}, i64 0, i32 0, i32 0 }, section "omp_offloading_entries$OE", align 1 // DEVICE-NOT: internal unnamed_addr constant [{{[0-9]+}} x i8] c"c_{{.*}}_decl_tgt_ref_ptr\00" // HOST: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [{{[0-9]+}} x i8] c"_{{.*}}d_{{.*}}_decl_tgt_ref_ptr\00" // HOST: @.omp_offloading.entry.[[D_PTR]] = weak constant %struct.__tgt_offload_entry { ptr @[[D_PTR]], ptr @.omp_offloading.entry_name{{.*}} @@ -50,7 +52,7 @@ int maini1() { return 0; } -// DEVICE: define weak_odr protected void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l42(ptr {{[^,]+}}, ptr noundef nonnull align {{[0-9]+}} dereferenceable{{[^,]*}} +// DEVICE: define weak_odr protected void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l44(ptr {{[^,]+}}, ptr noundef nonnull align {{[0-9]+}} dereferenceable{{[^,]*}} // DEVICE: [[C_REF:%.+]] = load ptr, ptr @c_decl_tgt_ref_ptr, // DEVICE: [[C:%.+]] = load i32, ptr [[C_REF]], // DEVICE: store i32 [[C]], ptr % @@ -74,10 +76,10 @@ int maini1() { // HOST: [[BP0:%.+]] = getelementptr inbounds [3 x ptr], ptr [[BASEPTRS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // HOST: [[P0:%.+]] = getelementptr inbounds [3 x ptr], ptr [[PTRS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // HOST: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr %{{.+}}) -// HOST: call void @__omp_offloading_{{.*}}_{{.*}}_{{.*}}maini1{{.*}}_l42(ptr %{{[^,]+}}) +// HOST: call void @__omp_offloading_{{.*}}_{{.*}}_{{.*}}maini1{{.*}}_l44(ptr %{{[^,]+}}) // HOST: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 0, i32 0, ptr @.{{.+}}.region_id, ptr %{{.+}}) -// HOST: define internal void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l42(ptr noundef nonnull align {{[0-9]+}} dereferenceable{{.*}}) +// HOST: define internal void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l44(ptr noundef nonnull align {{[0-9]+}} dereferenceable{{.*}}) // HOST: [[C:%.*]] = load i32, ptr @c, // HOST: store i32 [[C]], ptr % diff --git a/llvm/lib/Frontend/Offloading/CMakeLists.txt b/llvm/lib/Frontend/Offloading/CMakeLists.txt index 25eb24785732e..2d0117c9e1005 100644 --- a/llvm/lib/Frontend/Offloading/CMakeLists.txt +++ b/llvm/lib/Frontend/Offloading/CMakeLists.txt @@ -11,4 +11,5 @@ add_llvm_component_library(LLVMFrontendOffloading Core Support TransformUtils + TargetParser ) diff --git a/llvm/lib/Frontend/Offloading/Utility.cpp b/llvm/lib/Frontend/Offloading/Utility.cpp index 340d1463b352a..1c08f02c17f5f 100644 --- a/llvm/lib/Frontend/Offloading/Utility.cpp +++ b/llvm/lib/Frontend/Offloading/Utility.cpp @@ -15,7 +15,6 @@ using namespace llvm; using namespace llvm::offloading; -// TODO: Export this to the linker wrapper code registration. StructType *offloading::getEntryTy(Module &M) { LLVMContext &C = M.getContext(); StructType *EntryTy = @@ -32,6 +31,8 @@ StructType *offloading::getEntryTy(Module &M) { void offloading::emitOffloadingEntry(Module &M, Constant *Addr, StringRef Name, uint64_t Size, int32_t Flags, StringRef SectionName) { + llvm::Triple Triple(M.getTargetTriple()); + Type *Int8PtrTy = PointerType::getUnqual(M.getContext()); Type *Int32Ty = Type::getInt32Ty(M.getContext()); Type *SizeTy = M.getDataLayout().getIntPtrType(M.getContext()); @@ -39,11 +40,10 @@ void offloading::emitOffloadingEntry(Module &M, Constant *Addr, StringRef Name, Constant *AddrName = ConstantDataArray::getString(M.getContext(), Name); // Create the constant string used to look up the symbol in the device. - auto *Str = - new llvm::GlobalVariable(M, AddrName->getType(), /*isConstant=*/true, - llvm::GlobalValue::InternalLinkage, AddrName, - ".omp_offloading.entry_name"); - Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + auto *Str = new GlobalVariable(M, AddrName->getType(), /*isConstant=*/true, + GlobalValue::InternalLinkage, AddrName, + ".omp_offloading.entry_name"); + Str->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Construct the offloading entry. Constant *EntryData[] = { @@ -62,35 +62,49 @@ void offloading::emitOffloadingEntry(Module &M, Constant *Addr, StringRef Name, M.getDataLayout().getDefaultGlobalsAddressSpace()); // The entry has to be created in the section the linker expects it to be. - Entry->setSection(SectionName); + if (Triple.isOSBinFormatCOFF()) + Entry->setSection((SectionName + "$OE").str()); + else + Entry->setSection(SectionName); Entry->setAlignment(Align(1)); } std::pair offloading::getOffloadEntryArray(Module &M, StringRef SectionName) { - auto *EntriesB = - new GlobalVariable(M, ArrayType::get(getEntryTy(M), 0), - /*isConstant=*/true, GlobalValue::ExternalLinkage, - /*Initializer=*/nullptr, "__start_" + SectionName); + llvm::Triple Triple(M.getTargetTriple()); + + auto *ZeroInitilaizer = + ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u)); + auto *EntryInit = Triple.isOSBinFormatCOFF() ? ZeroInitilaizer : nullptr; + auto *EntryType = ArrayType::get(getEntryTy(M), 0); + + auto *EntriesB = new GlobalVariable(M, EntryType, /*isConstant=*/true, + GlobalValue::ExternalLinkage, EntryInit, + "__start_" + SectionName); EntriesB->setVisibility(GlobalValue::HiddenVisibility); - auto *EntriesE = - new GlobalVariable(M, ArrayType::get(getEntryTy(M), 0), - /*isConstant=*/true, GlobalValue::ExternalLinkage, - /*Initializer=*/nullptr, "__stop_" + SectionName); + auto *EntriesE = new GlobalVariable(M, EntryType, /*isConstant=*/true, + GlobalValue::ExternalLinkage, EntryInit, + "__stop_" + SectionName); EntriesE->setVisibility(GlobalValue::HiddenVisibility); - // We assume that external begin/end symbols that we have created above will - // be defined by the linker. But linker will do that only if linker inputs - // have section with "omp_offloading_entries" name which is not guaranteed. - // So, we just create dummy zero sized object in the offload entries section - // to force linker to define those symbols. - auto *DummyInit = - ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u)); - auto *DummyEntry = new GlobalVariable(M, DummyInit->getType(), true, - GlobalVariable::ExternalLinkage, - DummyInit, "__dummy." + SectionName); - DummyEntry->setSection(SectionName); - DummyEntry->setVisibility(GlobalValue::HiddenVisibility); + if (Triple.isOSBinFormatELF()) { + // We assume that external begin/end symbols that we have created above will + // be defined by the linker. This is done whenever a section name with a + // valid C-identifier is present. We define a dummy variable here to force + // the linker to always provide these symbols. + auto *DummyEntry = new GlobalVariable( + M, ZeroInitilaizer->getType(), true, GlobalVariable::ExternalLinkage, + ZeroInitilaizer, "__dummy." + SectionName); + DummyEntry->setSection(SectionName); + DummyEntry->setVisibility(GlobalValue::HiddenVisibility); + } else { + // The COFF linker will merge sections containing a '$' together into a + // single section. The order of entries in this section will be sorted + // alphabetically by the characters following the '$' in the name. Set the + // sections here to ensure that the beginning and end symbols are sorted. + EntriesB->setSection((SectionName + "$OA").str()); + EntriesE->setSection((SectionName + "$OZ").str()); + } return std::make_pair(EntriesB, EntriesE); }