/
GPUToROCDLPass.h
47 lines (38 loc) · 1.76 KB
/
GPUToROCDLPass.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
//===- GPUToROCDLPass.h - Convert GPU kernel to ROCDL dialect ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_
#define MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_
#include "mlir/Conversion/GPUToROCDL/Runtimes.h"
#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
#include <memory>
namespace mlir {
class LLVMTypeConverter;
class ConversionTarget;
class RewritePatternSet;
template <typename OpT>
class OperationPass;
namespace gpu {
class GPUModuleOp;
} // namespace gpu
/// Collect a set of patterns to convert from the GPU dialect to ROCDL.
/// If `runtime` is Unknown, gpu.printf will not be lowered
/// The resulting pattern set should be run over a gpu.module op
void populateGpuToROCDLConversionPatterns(LLVMTypeConverter &converter,
RewritePatternSet &patterns,
gpu::amd::Runtime runtime);
/// Configure target to convert from the GPU dialect to ROCDL.
void configureGpuToROCDLConversionLegality(ConversionTarget &target);
/// Creates a pass that lowers GPU dialect operations to ROCDL counterparts. The
/// index bitwidth used for the lowering of the device side index computations
/// is configurable.
std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
createLowerGpuOpsToROCDLOpsPass(
unsigned indexBitwidth = kDeriveIndexBitwidthFromDataLayout,
gpu::amd::Runtime runtime = gpu::amd::Runtime::Unknown);
} // namespace mlir
#endif // MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_