Revert "[PGO][AMDGPU] Add basic HIP offload PGO support (#177665)"#201416
Conversation
This broke profiling builds on Windows by switching the profile library to link against the dynamic CRT; see discussion on the PR. There were already a number of issues reported and fixed after this PR. Rather than piling on the fixes (and this one may need some work), revert back to green for now to let the project recover. This reverts commit 5db1364. Additionally, this reverts the followup PRs in 635e120, 2766733, 4c33844, and 5eca8b6: "[PGO][HIP] Stop pulling ROCm.o into every PGO host link (llvm#200101)" "[compiler-rt][profile] Add COMPILER_RT_BUILD_PROFILE_ROCM option (llvm#200127)" "[PGO][HIP] Skip ROCm interceptor in profile-only compiler-rt builds (llvm#200111)" "[PGO][HIP] Fix profile-only Windows link by gating ROCm interceptor macro (llvm#200859)"
|
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-transforms Author: Hans Wennborg (zmodem) ChangesThis broke profiling builds on Windows by switching the profile library to link against the dynamic CRT; see discussion on the PR. There were already a number of issues reported and fixed after this PR. Rather than piling on the fixes (and this one may need some work), revert back to green for now to let the project recover. This reverts commit 5db1364. Additionally, this reverts the followup PRs in "[PGO][HIP] Stop pulling ROCm.o into every PGO host link (#200101)" Patch is 69.76 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/201416.diff 14 Files Affected:
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 65f398af7902b..259b6c040706b 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -28,7 +28,6 @@
#include "llvm/IR/ReplaceConstant.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/VirtualFileSystem.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace clang;
using namespace CodeGen;
@@ -73,11 +72,6 @@ class CGNVCUDARuntime : public CGCUDARuntime {
/// ModuleCtorFunction() and used to create corresponding cleanup calls in
/// ModuleDtorFunction()
llvm::GlobalVariable *GpuBinaryHandle = nullptr;
- /// Host-side shadow for the per-TU __llvm_profile_sections_<CUID> global,
- /// emitted only for HIP host compiles when PGO is on. Registered via
- /// __hipRegisterVar (non-RDC) or an offloading entry (RDC) so the runtime
- /// can locate the device-side table by name.
- llvm::GlobalVariable *OffloadProfShadow = nullptr;
/// Whether we generate relocatable device code.
bool RelocatableDeviceCode;
/// Mangle context for device.
@@ -182,13 +176,6 @@ class CGNVCUDARuntime : public CGCUDARuntime {
void transformManagedVars();
/// Create offloading entries to register globals in RDC mode.
void createOffloadingEntries();
- /// For HIP+PGO, emit the per-TU __llvm_profile_sections_<CUID> global.
- /// On the device side it is the populated 7-pointer section-bounds table.
- /// On the host side it is a placeholder void* shadow stored in
- /// OffloadProfShadow, registered later by makeRegisterGlobalsFn (non-RDC)
- /// or createOffloadingEntries (RDC) so the runtime can locate the
- /// device-side table by name.
- void emitOffloadProfilingSections();
public:
CGNVCUDARuntime(CodeGenModule &CGM);
@@ -748,32 +735,6 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
}
}
- // Register the per-TU offload-profiling shadow so the host runtime can
- // locate the matching device-side __llvm_profile_sections_<CUID>. We
- // emit both __hipRegisterVar (so the HIP runtime can map the host
- // shadow to the device symbol) and
- // __llvm_profile_offload_register_shadow_variable (so the profile
- // runtime adds the shadow to its drain list).
- if (OffloadProfShadow) {
- llvm::Constant *Name =
- makeConstantString(std::string(OffloadProfShadow->getName()));
- llvm::Value *RegisterVarArgs[] = {
- &GpuBinaryHandlePtr,
- OffloadProfShadow,
- Name,
- Name,
- llvm::ConstantInt::get(IntTy, /*Extern=*/0),
- llvm::ConstantInt::get(VarSizeTy, CGM.getDataLayout().getPointerSize()),
- llvm::ConstantInt::get(IntTy, /*Constant=*/0),
- llvm::ConstantInt::get(IntTy, 0)};
- Builder.CreateCall(RegisterVar, RegisterVarArgs);
-
- llvm::FunctionCallee RegisterShadow = CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(VoidTy, {PtrTy}, false),
- "__llvm_profile_offload_register_shadow_variable");
- Builder.CreateCall(RegisterShadow, {OffloadProfShadow});
- }
-
Builder.CreateRetVoid();
return RegisterKernelsFunc;
}
@@ -1295,124 +1256,11 @@ void CGNVCUDARuntime::createOffloadingEntries() {
I.Flags.getSurfTexType());
}
}
-
- // Register the per-TU offload-profiling shadow. The offloading entry
- // makes the linker-wrapper emit the host __hipRegisterVar call in the
- // combined ctor. Separately emit a per-TU ctor that registers the
- // shadow with the profile runtime's drain list.
- if (OffloadProfShadow) {
- llvm::offloading::emitOffloadingEntry(
- M, Kind, OffloadProfShadow, OffloadProfShadow->getName(),
- CGM.getDataLayout().getPointerSize(),
- llvm::offloading::OffloadGlobalEntry, /*Data=*/0);
-
- llvm::LLVMContext &Ctx = M.getContext();
- auto *PtrTy = llvm::PointerType::getUnqual(Ctx);
- llvm::FunctionCallee RegisterShadow = CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(VoidTy, {PtrTy}, false),
- "__llvm_profile_offload_register_shadow_variable");
- auto *CtorFn = llvm::Function::Create(
- llvm::FunctionType::get(VoidTy, false),
- llvm::GlobalValue::InternalLinkage,
- "__llvm_profile_register_shadow." + CGM.getContext().getCUIDHash(), &M);
- auto *Entry = llvm::BasicBlock::Create(Ctx, "entry", CtorFn);
- llvm::IRBuilder<> B(Entry);
- B.CreateCall(RegisterShadow, {OffloadProfShadow});
- B.CreateRetVoid();
- llvm::appendToGlobalCtors(M, CtorFn, /*Priority=*/65535);
- }
-}
-
-// For HIP host+device compiles with PGO enabled, emit the per-TU global
-// __llvm_profile_sections_<CUID>. Device side: a 7-pointer struct holding
-// section start/stop bounds for the names/counters/data sections plus the
-// raw-version variable. Host side: an opaque void* shadow whose only
-// purpose is to give the host-runtime a registered symbol name to look up
-// via hipGetSymbolAddress; the actual device-side data lives in the
-// matching device-side global.
-void CGNVCUDARuntime::emitOffloadProfilingSections() {
- if (!CGM.getLangOpts().HIP)
- return;
- if (!CGM.getCodeGenOpts().hasProfileInstr())
- return;
-
- StringRef CUIDHash = CGM.getContext().getCUIDHash();
- if (CUIDHash.empty())
- return;
-
- llvm::Module &M = CGM.getModule();
- llvm::LLVMContext &Ctx = M.getContext();
- std::string Name = ("__llvm_profile_sections_" + CUIDHash).str();
-
- // If the global already exists (e.g. another TU was merged in), don't
- // duplicate it.
- if (M.getNamedValue(Name))
- return;
-
- if (CGM.getLangOpts().CUDAIsDevice) {
- // Device side: emit the populated struct. Section start/stop symbols
- // are linker-defined (ELF auto-generates __start_/__stop_ for any
- // section whose name is a valid C identifier; AMDGPU is ELF).
- unsigned GlobalAS = M.getDataLayout().getDefaultGlobalsAddressSpace();
- auto *PtrTy = llvm::PointerType::get(Ctx, GlobalAS);
- auto getOrDeclare = [&](StringRef SymName) {
- if (auto *GV = M.getNamedGlobal(SymName))
- return GV;
- auto *GV = new llvm::GlobalVariable(
- M, llvm::Type::getInt8Ty(Ctx), /*isConstant=*/false,
- llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, SymName,
- /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
- GlobalAS);
- GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
- return GV;
- };
- auto *VersionGV = M.getNamedGlobal("__llvm_profile_raw_version");
- if (!VersionGV) {
- VersionGV = new llvm::GlobalVariable(
- M, llvm::Type::getInt64Ty(Ctx), /*isConstant=*/true,
- llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
- "__llvm_profile_raw_version",
- /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
- GlobalAS);
- }
-
- auto *StructTy = llvm::StructType::get(
- Ctx, {PtrTy, PtrTy, PtrTy, PtrTy, PtrTy, PtrTy, PtrTy});
- llvm::Constant *Fields[] = {
- getOrDeclare("__start___llvm_prf_names"),
- getOrDeclare("__stop___llvm_prf_names"),
- getOrDeclare("__start___llvm_prf_cnts"),
- getOrDeclare("__stop___llvm_prf_cnts"),
- getOrDeclare("__start___llvm_prf_data"),
- getOrDeclare("__stop___llvm_prf_data"),
- VersionGV,
- };
- auto *Init = llvm::ConstantStruct::get(StructTy, Fields);
- auto *GV = new llvm::GlobalVariable(
- M, StructTy, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
- Init, Name, /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
- GlobalAS);
- GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
- CGM.addCompilerUsedGlobal(GV);
- return;
- }
-
- // Host side: emit an opaque void* shadow. Layout doesn't matter — the
- // runtime locates it by name via hipGetSymbolAddress and treats it as
- // the address of the device-side struct. Registration with the HIP
- // runtime is added by makeRegisterGlobalsFn (non-RDC) or
- // createOffloadingEntries (RDC).
- auto *PtrTy = llvm::PointerType::getUnqual(Ctx);
- OffloadProfShadow = new llvm::GlobalVariable(
- M, PtrTy, /*isConstant=*/false, llvm::GlobalValue::ExternalLinkage,
- llvm::ConstantPointerNull::get(PtrTy), Name);
- CGM.addCompilerUsedGlobal(OffloadProfShadow);
}
// Returns module constructor to be added.
llvm::Function *CGNVCUDARuntime::finalizeModule() {
transformManagedVars();
- emitOffloadProfilingSections();
if (CGM.getLangOpts().CUDAIsDevice) {
// Mark ODR-used device variables as compiler used to prevent it from being
// eliminated by optimization. This is necessary for device variables
diff --git a/clang/test/CodeGenHIP/offload-pgo-sections.hip b/clang/test/CodeGenHIP/offload-pgo-sections.hip
deleted file mode 100644
index 17c6fe7b9e609..0000000000000
--- a/clang/test/CodeGenHIP/offload-pgo-sections.hip
+++ /dev/null
@@ -1,50 +0,0 @@
-// REQUIRES: amdgpu-registered-target
-// REQUIRES: x86-registered-target
-
-// Verify CGCUDANV emits the per-TU __llvm_profile_sections_<CUID> global
-// for HIP+PGO compilations. Device subcompile: populated 7-pointer struct
-// in addrspace(1). Host compile: void* shadow registered with the HIP
-// runtime and with the profile runtime's drain list.
-
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
-// RUN: -fprofile-instrument=clang -emit-llvm -o - -x hip %s \
-// RUN: | FileCheck -check-prefix=DEV %s
-
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -cuid=abc \
-// RUN: -fprofile-instrument=clang -emit-llvm -o - -x hip %s \
-// RUN: | FileCheck -check-prefix=HOST %s
-
-// Guard: no PGO -> no emission.
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
-// RUN: -emit-llvm -o - -x hip %s \
-// RUN: | FileCheck -check-prefix=NONE %s
-
-// Guard: no CUID -> no emission.
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
-// RUN: -fprofile-instrument=clang -emit-llvm -o - -x hip %s \
-// RUN: | FileCheck -check-prefix=NONE %s
-
-#define __device__ __attribute__((device))
-#define __global__ __attribute__((global))
-
-__device__ int helper(int x) { return x + 1; }
-__global__ void kernel(int *p) { *p = helper(*p); }
-
-// DEV-DAG: @__start___llvm_prf_names = external hidden addrspace(1) global i8
-// DEV-DAG: @__stop___llvm_prf_names = external hidden addrspace(1) global i8
-// DEV-DAG: @__start___llvm_prf_cnts = external hidden addrspace(1) global i8
-// DEV-DAG: @__stop___llvm_prf_cnts = external hidden addrspace(1) global i8
-// DEV-DAG: @__start___llvm_prf_data = external hidden addrspace(1) global i8
-// DEV-DAG: @__stop___llvm_prf_data = external hidden addrspace(1) global i8
-// DEV-DAG: @__llvm_profile_raw_version = external addrspace(1) constant i64
-// DEV: @__llvm_profile_sections_[[CUID:[0-9a-f]+]] = protected addrspace(1) constant {{.*}}@__start___llvm_prf_names{{.*}}@__stop___llvm_prf_names{{.*}}@__start___llvm_prf_cnts{{.*}}@__stop___llvm_prf_cnts{{.*}}@__start___llvm_prf_data{{.*}}@__stop___llvm_prf_data{{.*}}@__llvm_profile_raw_version
-// DEV: @llvm.compiler.used = {{.*}}@__llvm_profile_sections_[[CUID]]
-
-// HOST: @__llvm_profile_sections_[[CUID:[0-9a-f]+]] = global ptr null
-// HOST: @llvm.compiler.used = {{.*}}@__llvm_profile_sections_[[CUID]]
-// HOST: define internal void @__hip_register_globals
-// HOST: call void @__hipRegisterVar({{.*}}@__llvm_profile_sections_[[CUID]],
-// HOST: call void @__llvm_profile_offload_register_shadow_variable(ptr @__llvm_profile_sections_[[CUID]])
-
-// NONE-NOT: __llvm_profile_sections_
-// NONE-NOT: __llvm_profile_offload_register_shadow_variable
diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index 39034fd9ba67d..e88321d822f84 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -322,15 +322,6 @@ option(COMPILER_RT_USE_ATOMIC_LIBRARY "Use compiler-rt atomic instead of libatom
option(COMPILER_RT_PROFILE_BAREMETAL "Build minimal baremetal profile library" OFF)
-set(DEFAULT_COMPILER_RT_BUILD_PROFILE_ROCM ON)
-if(APPLE)
- set(DEFAULT_COMPILER_RT_BUILD_PROFILE_ROCM OFF)
-endif()
-option(COMPILER_RT_BUILD_PROFILE_ROCM
- "Build the host-side ROCm/HIP device profile collection runtime"
- ${DEFAULT_COMPILER_RT_BUILD_PROFILE_ROCM})
-mark_as_advanced(COMPILER_RT_BUILD_PROFILE_ROCM)
-
include(config-ix)
#================================
diff --git a/compiler-rt/lib/profile/CMakeLists.txt b/compiler-rt/lib/profile/CMakeLists.txt
index 77db2477bb7c6..8d9a773412a22 100644
--- a/compiler-rt/lib/profile/CMakeLists.txt
+++ b/compiler-rt/lib/profile/CMakeLists.txt
@@ -93,9 +93,6 @@ if (NOT COMPILER_RT_PROFILE_BAREMETAL)
InstrProfilingUtil.c
InstrProfilingValue.c
)
- if(COMPILER_RT_BUILD_PROFILE_ROCM)
- list(APPEND PROFILE_SOURCES InstrProfilingPlatformROCm.cpp)
- endif()
endif()
set(PROFILE_HEADERS
@@ -158,43 +155,6 @@ if(COMPILER_RT_PROFILE_BAREMETAL)
-DCOMPILER_RT_PROFILE_BAREMETAL=1)
endif()
-# The HIP host interceptor in InstrProfilingPlatformROCm.cpp pulls in
-# RTInterception + sanitizer_common object libs. Those targets are only created
-# when COMPILER_RT_BUILD_SANITIZERS / _MEMPROF / _XRAY / _CTX_PROFILE is enabled
-# (see lib/CMakeLists.txt). In a profile-only build the targets do not exist;
-# skip both the object-lib merge and the ROCm source file so the static archive
-# remains self-contained.
-set(PROFILE_OBJECT_LIBS)
-set(PROFILE_HAS_HIP_INTERCEPTOR FALSE)
-if(COMPILER_RT_HAS_INTERCEPTION AND NOT COMPILER_RT_PROFILE_BAREMETAL
- AND TARGET RTInterception.${COMPILER_RT_DEFAULT_TARGET_ARCH}
- AND TARGET RTSanitizerCommon.${COMPILER_RT_DEFAULT_TARGET_ARCH}
- AND TARGET RTSanitizerCommonLibc.${COMPILER_RT_DEFAULT_TARGET_ARCH})
- # RTInterception references __sanitizer_internal_{memcpy,memset,memmove} and other
- # sanitizer_common symbols; merge the same object libs as clang_rt.cfi (without
- # coverage/symbolizer) so -fprofile-instr-generate links stay self-contained.
- list(APPEND PROFILE_OBJECT_LIBS
- RTInterception
- RTSanitizerCommon
- RTSanitizerCommonLibc)
- set(PROFILE_HAS_HIP_INTERCEPTOR TRUE)
-endif()
-
-if(NOT PROFILE_HAS_HIP_INTERCEPTOR)
- list(REMOVE_ITEM PROFILE_SOURCES InstrProfilingPlatformROCm.cpp)
-endif()
-
-# Only advertise the ROCm interceptor to InstrProfilingFile.c when its
-# definition (InstrProfilingPlatformROCm.cpp) is actually compiled into the
-# archive. Otherwise InstrProfilingFile.c references
-# __llvm_profile_hip_collect_device_data with no definition; on COFF/Windows
-# there is no weak-undefined fallback, so the link fails (see PR #200111).
-if(COMPILER_RT_BUILD_PROFILE_ROCM AND PROFILE_HAS_HIP_INTERCEPTOR)
- set(EXTRA_FLAGS
- ${EXTRA_FLAGS}
- -DCOMPILER_RT_BUILD_PROFILE_ROCM=1)
-endif()
-
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding EXTRA_FLAGS)
append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib EXTRA_FLAGS)
@@ -208,24 +168,13 @@ if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
endif()
if(MSVC)
- # profile historically used the static CRT (/MT). When we merge RTInterception and
- # RTSanitizerCommon (same object libs as clang_rt.cfi on ELF), those targets are
- # built with MultiThreadedDLL (/MD) — see interception/CMakeLists.txt and
- # sanitizer_common/CMakeLists.txt. Mixing /MD objects into a /MT libclang_rt.profile
- # yields LNK2019 (__imp__stricmp from interception_win.cpp) and LNK4098 in Profile-*.
- if(PROFILE_HAS_HIP_INTERCEPTOR)
- set(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreadedDLL)
- else()
- set(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreaded)
- endif()
+ # profile historically has only been supported with the static runtime
+ # on windows
+ set(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreaded)
endif()
# We don't use the C++ Standard Library here, so avoid including it by mistake.
append_list_if(COMPILER_RT_HAS_NOSTDINCXX_FLAG -nostdinc++ EXTRA_FLAGS)
-# C++ profile sources (e.g. InstrProfilingPlatformROCm.cpp) must not emit exception
-# personality symbols: host libclang_rt.profile.a is linked from C code and from C++
-# tests that do not pull in __gxx_personality_v0 (Profile-* / premerge).
-append_list_if(COMPILER_RT_HAS_FNO_EXCEPTIONS_FLAG -fno-exceptions EXTRA_FLAGS)
# XRay uses C++ standard library headers.
string(REGEX REPLACE "-?-stdlib=[a-zA-Z+]*" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
@@ -251,7 +200,6 @@ if(APPLE)
STATIC
OS ${PROFILE_SUPPORTED_OS}
ARCHS ${PROFILE_SUPPORTED_ARCH}
- OBJECT_LIBS ${PROFILE_OBJECT_LIBS}
CFLAGS ${EXTRA_FLAGS}
SOURCES ${PROFILE_SOURCES}
ADDITIONAL_HEADERS ${PROFILE_HEADERS}
@@ -261,7 +209,6 @@ else()
add_compiler_rt_runtime(clang_rt.profile
STATIC
ARCHS ${PROFILE_SUPPORTED_ARCH}
- OBJECT_LIBS ${PROFILE_OBJECT_LIBS}
CFLAGS ${EXTRA_FLAGS}
SOURCES ${PROFILE_SOURCES}
ADDITIONAL_HEADERS ${PROFILE_HEADERS}
diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c
index 9ea5a2638fac9..71127b05aafb8 100644
--- a/compiler-rt/lib/profile/InstrProfilingFile.c
+++ b/compiler-rt/lib/profile/InstrProfilingFile.c
@@ -41,23 +41,6 @@
#include "InstrProfilingPort.h"
#include "InstrProfilingUtil.h"
-/* Weak so non-HIP programs do not force InstrProfilingPlatformROCm.o (and its
- * transitive sanitizer_common / interception dependencies) into the host link
- * out of libclang_rt.profile.a. HIP programs emit strong references to other
- * ROCm-runtime symbols (e.g. __llvm_profile_offload_register_shadow_variable)
- * that pull in the strong definition.
- * No COMPILER_RT_VISIBILITY: a hidden weak-undefined symbol is non-preemptible
- * and the address test at the call site would fold to true.
- * Windows: __declspec(selectany) is data-only, and the ROCm interceptor path
- * is not used there, so keep the original strong extern. */
-#if COMPILER_RT_BUILD_PROFILE_ROCM
-#if defined(_WIN32)
-extern int __llvm_profile_hip_collect_device_data(void);
-#else
-__attribute__((weak)) int __llvm_profile_hip_collect_device_data(void);
-#endif
-#endif
-
/* From where is profile name specified.
* The order the enumerators define their
* precedence. Re-order them may lead to
@@ -1215,19 +1198,6 @@ int __llvm_profile_write_file(void) {
if (rc)
PROF_ERR("Failed to write file \"%s\": %s\n", Filename, strerror(errno));
- /* On non-Windows the declaration is weak: only invoked when
- * InstrProfilingPlatformROCm.o is in the link, which happens when the program
- * references other ROCm-runtime symbols (HIP-with-PGO). Warning on failure is
- * handled inside the callee. */
-#if COMPILER_RT_BUILD_PROFILE_ROCM
-#if defined(_WIN32)
- (void)__llvm_profile_hip_collect_device_data();
-#else
- if (&__llvm_profile_hip_collect_device_data)
- (void)__llvm_profile_hip_collect_device_data();
-#endif
-#endif
-
// Restore SIGKILL.
if (PDeathSig == 1)
lprofRestoreSigKill();
diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformROCm.cpp b/compiler-rt/lib/profile/InstrProfilingPlatformROCm.cpp
deleted file mode 100644
index ee00c572e3a42..0000000000000
--- a/compiler-rt/lib/profile/InstrProfilingPlatformROCm.cpp
+++ /dev/null
@@ -1,897 +0,0 @@
-//===- InstrProfilingPlatformROCm.cpp - Profile data ROCm platform -------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-extern "C" {
-#include "InstrProfiling.h"
-#include "InstrProfilingInternal.h"
-#include "InstrProfilingPort.h"
-}
-
-#include "interception/interception.h"
-// C library headers (not <cstdio> etc.): clang_rt.profile is built with
-// -nostdinc++ and avoids the C++ standard library (see profile/CMakeLists.txt).
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef _WIN32
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#else
-#include <dlfcn.h>
-#include <pthread.h>
-#endif
-
-/* Serialize one-time HIP loader resolution and DynamicModules mutations.
- * Inline to avoid a sanitizer_common dependency. */
-#ifdef _WIN32
-static INIT_ONCE HipLoadedOnce = INIT_ONCE_STATIC_INIT;
-static CRITICAL_SECTION DynamicModu...
[truncated]
|
🪟 Windows x64 Test Results
Failed Tests(click on a test name to see its output) LLVMLLVM.CodeGen/X86/clmul.llIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
🐧 Linux x64 Test Results
Failed Tests(click on a test name to see its output) LLVMLLVM.CodeGen/X86/clmul.ll (Likely Already Failing)This test is already failing at the base commit.If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
…#177665)" (llvm#201416)" This reverts commit 6cfa1a0.
This broke profiling builds on Windows by switching the profile library to link against the dynamic CRT; see discussion on the PR.
There were already a number of issues reported and fixed after this PR. Rather than piling on the fixes (and this one may need some work), revert back to green for now to let the project recover.
This reverts commit 5db1364.
Additionally, this reverts the followup PRs in
635e120,
2766733,
4c33844, and
5eca8b6:
"[PGO][HIP] Stop pulling ROCm.o into every PGO host link (#200101)"
"[compiler-rt][profile] Add COMPILER_RT_BUILD_PROFILE_ROCM option (#200127)"
"[PGO][HIP] Skip ROCm interceptor in profile-only compiler-rt builds (#200111)"
"[PGO][HIP] Fix profile-only Windows link by gating ROCm interceptor macro (#200859)"