Skip to content

Commit

Permalink
[HIP][LLVM][Opt] Add LLVM support for hipstdpar
Browse files Browse the repository at this point in the history
This patch adds the LLVM changes needed for enabling HIP parallel algorithm offload on AMDGPU targets. What we do here is add two passes, one mandatory and one optional:

1. HipStdParAcceleratorCodeSelectionPass is mandatory, depends on CallGraphAnalysis, and implements the following transform:

    - Traverse the call-graph, and check for functions that are roots for accelerator execution (at the moment, these are GPU kernels exclusively, and would originate in the accelerator specific algorithm library the toolchain uses as an implementation detail);
    - Starting from a root, do a BFS to find all functions that are reachable (called directly or indirectly via a call- chain) and record them;
    - After having done the above for all roots in the Module, we have the computed the set of reachable functions, which is the union of roots and functions reachable from roots;
    - All functions that are not in the reachable set are removed; for the special case where the reachable set is empty we completely clear the module;

2. HipStdParAllocationInterpositionPass is optional, is meant as a fallback with restricted functionality for cases where on-demand paging is unavailable on a platform, and implements the following transform:
    - Iterate all functions in a Module;
    - If a function's name is in a predefined set of allocation / deallocation that the runtime implementation is allowed and expected to interpose, replace all its uses with the equivalent accelerator aware function, iff the latter is available;
        - If the accelerator aware equivalent is unavailable we warn, but compilation will go ahead, which means that it is possible to get issues around the accelerator trying to access inaccessible memory at run time;
    - We rely on direct name matching as opposed to using the new alloc-kind family of attributes and / or the LibCall analysis pass because some of the legacy functions that need replacing would not carry the former or be identified by the latter.

Reviewed by: JonChesterfield, yaxunl

Differential Revision: https://reviews.llvm.org/D155856
  • Loading branch information
AlexVlx committed Oct 12, 2023
1 parent ac0015f commit 0ce6255
Show file tree
Hide file tree
Showing 17 changed files with 941 additions and 0 deletions.
46 changes: 46 additions & 0 deletions llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h
@@ -0,0 +1,46 @@
//===--------- HipStdPar.h - Standard Parallelism passes --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
///
/// AcceleratorCodeSelection - Identify all functions reachable from a kernel,
/// removing those that are unreachable.
///
/// AllocationInterposition - Forward calls to allocation / deallocation
// functions to runtime provided equivalents that allocate memory that is
// accessible for an accelerator
//===----------------------------------------------------------------------===//

#ifndef LLVM_TRANSFORMS_HIPSTDPAR_HIPSTDPAR_H
#define LLVM_TRANSFORMS_HIPSTDPAR_HIPSTDPAR_H

#include "llvm/IR/PassManager.h"

namespace llvm {

class Module;
class ModuleAnaysisManager;

class HipStdParAcceleratorCodeSelectionPass
: public PassInfoMixin<HipStdParAcceleratorCodeSelectionPass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);

static bool isRequired() { return true; }
};

class HipStdParAllocationInterpositionPass
: public PassInfoMixin<HipStdParAllocationInterpositionPass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);

static bool isRequired() { return true; }
};

} // namespace llvm

#endif // LLVM_TRANSFORMS_HIPSTDPAR_HIPSTDPAR_H
1 change: 1 addition & 0 deletions llvm/lib/Passes/CMakeLists.txt
Expand Up @@ -19,6 +19,7 @@ add_llvm_component_library(LLVMPasses
CodeGen
Core
Coroutines
HipStdPar
IPO
InstCombine
IRPrinter
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Passes/PassBuilder.cpp
Expand Up @@ -94,6 +94,7 @@
#include "llvm/Transforms/Coroutines/CoroEarly.h"
#include "llvm/Transforms/Coroutines/CoroElide.h"
#include "llvm/Transforms/Coroutines/CoroSplit.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/Annotation2Metadata.h"
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Expand Up @@ -37,6 +37,7 @@
#include "llvm/Transforms/Coroutines/CoroEarly.h"
#include "llvm/Transforms/Coroutines/CoroElide.h"
#include "llvm/Transforms/Coroutines/CoroSplit.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/Annotation2Metadata.h"
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Passes/PassRegistry.def
Expand Up @@ -64,6 +64,9 @@ MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
MODULE_PASS("function-import", FunctionImportPass())
MODULE_PASS("globalopt", GlobalOptPass())
MODULE_PASS("globalsplit", GlobalSplitPass())
MODULE_PASS("hipstdpar-select-accelerator-code",
HipStdParAcceleratorCodeSelectionPass())
MODULE_PASS("hipstdpar-interpose-alloc", HipStdParAllocationInterpositionPass())
MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
MODULE_PASS("inferattrs", InferFunctionAttrsPass())
MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass())
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Expand Up @@ -50,6 +50,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
Expand Down Expand Up @@ -348,6 +349,11 @@ static cl::opt<bool> EnableRewritePartialRegUses(
cl::desc("Enable rewrite partial reg uses pass"), cl::init(false),
cl::Hidden);

static cl::opt<bool> EnableHipStdPar(
"amdgpu-enable-hipstdpar",
cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false),
cl::Hidden);

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());
Expand Down Expand Up @@ -699,6 +705,8 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
if (EnableLibCallSimplify && Level != OptimizationLevel::O0)
FPM.addPass(AMDGPUSimplifyLibCallsPass());
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
if (EnableHipStdPar)
PM.addPass(HipStdParAcceleratorCodeSelectionPass());
});

PB.registerPipelineEarlySimplificationEPCallback(
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/CMakeLists.txt
Expand Up @@ -176,6 +176,7 @@ add_llvm_target(AMDGPUCodeGen
CodeGenTypes
Core
GlobalISel
HipStdPar
IPO
MC
MIRParser
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/CMakeLists.txt
Expand Up @@ -9,3 +9,4 @@ add_subdirectory(Hello)
add_subdirectory(ObjCARC)
add_subdirectory(Coroutines)
add_subdirectory(CFGuard)
add_subdirectory(HipStdPar)
18 changes: 18 additions & 0 deletions llvm/lib/Transforms/HipStdPar/CMakeLists.txt
@@ -0,0 +1,18 @@
add_llvm_component_library(LLVMHipStdPar
HipStdPar.cpp

ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/HipStdPar

DEPENDS
intrinsics_gen
LLVMAnalysis

COMPONENT_NAME
HipStdPar

LINK_COMPONENTS
Analysis
Core
Support
TransformUtils)

0 comments on commit 0ce6255

Please sign in to comment.