llvm
diff --git a/‎llvm/docs/KernelInfo.rst‎
Lines changed: 63 additions & 0 deletions b/‎llvm/docs/KernelInfo.rst‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎llvm/docs/Passes.rst‎
Lines changed: 11 additions & 0 deletions b/‎llvm/docs/Passes.rst‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎llvm/include/llvm/Analysis/KernelInfo.h‎
Lines changed: 35 additions & 0 deletions b/‎llvm/include/llvm/Analysis/KernelInfo.h‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎llvm/include/llvm/Analysis/TargetTransformInfo.h‎
Lines changed: 14 additions & 0 deletions b/‎llvm/include/llvm/Analysis/TargetTransformInfo.h‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎llvm/include/llvm/Analysis/TargetTransformInfoImpl.h‎
Lines changed: 4 additions & 0 deletions b/‎llvm/include/llvm/Analysis/TargetTransformInfoImpl.h‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎llvm/include/llvm/IR/Function.h‎
Lines changed: 12 additions & 0 deletions b/‎llvm/include/llvm/IR/Function.h‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎llvm/include/llvm/Target/TargetMachine.h‎
Lines changed: 3 additions & 0 deletions b/‎llvm/include/llvm/Target/TargetMachine.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎llvm/lib/Analysis/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎llvm/lib/Analysis/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1,63 @@
+==========
+KernelInfo
+==========
+
+.. contents::
+   :local:
+
+Introduction
+============
+
+This LLVM IR pass reports various statistics for codes compiled for GPUs.  The
+goal of these statistics is to help identify bad code patterns and ways to
+mitigate them.  The pass operates at the LLVM IR level so that it can, in
+theory, support any LLVM-based compiler for programming languages supporting
+GPUs.
+
+By default, the pass runs at the end of LTO, and options like
+``-Rpass=kernel-info`` enable its remarks.  Example ``opt`` and ``clang``
+command lines appear in the next section.
+
+Remarks include summary statistics (e.g., total size of static allocas) and
+individual occurrences (e.g., source location of each alloca).  Examples of the
+output appear in tests in `llvm/test/Analysis/KernelInfo`.
+
+Example Command Lines
+=====================
+
+To analyze a C program as it appears to an LLVM GPU backend at the end of LTO:
+
+.. code-block:: shell
+
+  $ clang -O2 -g -fopenmp --offload-arch=native test.c -foffload-lto \
+      -Rpass=kernel-info
+
+To analyze specified LLVM IR, perhaps previously generated by something like
+``clang -save-temps -g -fopenmp --offload-arch=native test.c``:
+
+.. code-block:: shell
+
+  $ opt -disable-output test-openmp-nvptx64-nvidia-cuda-sm_70.bc \
+      -pass-remarks=kernel-info -passes=kernel-info
+
+When specifying an LLVM pass pipeline on the command line, ``kernel-info`` still
+runs at the end of LTO by default.  ``-no-kernel-info-end-lto`` disables that
+behavior so you can position ``kernel-info`` explicitly:
+
+.. code-block:: shell
+
+  $ clang -O2 -g -fopenmp --offload-arch=native test.c -foffload-lto \
+      -Rpass=kernel-info \
+      -Xoffload-linker --lto-newpm-passes='lto<O2>'
+
+  $ clang -O2 -g -fopenmp --offload-arch=native test.c -foffload-lto \
+      -Rpass=kernel-info -mllvm -no-kernel-info-end-lto \
+      -Xoffload-linker --lto-newpm-passes='module(kernel-info),lto<O2>'
+
+  $ opt -disable-output test-openmp-nvptx64-nvidia-cuda-sm_70.bc \
+      -pass-remarks=kernel-info \
+      -passes='lto<O2>'
+
+  $ opt -disable-output test-openmp-nvptx64-nvidia-cuda-sm_70.bc \
+      -pass-remarks=kernel-info -no-kernel-info-end-lto \
+      -passes='module(kernel-info),lto<O2>'
@@ -5,6 +5,11 @@ LLVM's Analysis and Transform Passes
 .. contents::
     :local:
 
+.. toctree::
+   :hidden:
+
+   KernelInfo
+
 Introduction
 ============
 .. warning:: This document is not updated frequently, and the list of passes
@@ -148,6 +153,12 @@ This pass collects the count of all instructions and reports them.
 Bookkeeping for "interesting" users of expressions computed from induction
 variables.
 
+``kernel-info``: GPU Kernel Info
+--------------------------------
+
+Reports various statistics for codes compiled for GPUs.  This pass is
+:doc:`documented separately<KernelInfo>`.
+
 ``lazy-value-info``: Lazy Value Information Analysis
 ----------------------------------------------------
 
 
@@ -0,0 +1,35 @@
+//=- KernelInfo.h - Kernel Analysis -------------------------------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the KernelInfoPrinter class used to emit remarks about
+// function properties from a GPU kernel.
+//
+// See llvm/docs/KernelInfo.rst.
+// ===---------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_KERNELINFO_H
+#define LLVM_ANALYSIS_KERNELINFO_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class TargetMachine;
+
+class KernelInfoPrinter : public PassInfoMixin<KernelInfoPrinter> {
+  TargetMachine *TM;
+
+public:
+  explicit KernelInfoPrinter(TargetMachine *TM) : TM(TM) {}
+
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+  static bool isRequired() { return true; }
+};
+} // namespace llvm
+#endif // LLVM_ANALYSIS_KERNELINFO_H
@@ -1891,6 +1891,11 @@ class TargetTransformInfo {
 
   /// @}
 
+  /// Collect kernel launch bounds for \p F into \p LB.
+  void collectKernelLaunchBounds(
+      const Function &F,
+      SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const;
+
 private:
   /// The abstract base class used to type erase specific TTI
   /// implementations.
@@ -2329,6 +2334,9 @@ class TargetTransformInfo::Concept {
   virtual unsigned getMaxNumArgs() const = 0;
   virtual unsigned getNumBytesToPadGlobalArray(unsigned Size,
                                                Type *ArrayType) const = 0;
+  virtual void collectKernelLaunchBounds(
+      const Function &F,
+      SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const = 0;
 };
 
 template <typename T>
@@ -3174,6 +3182,12 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
                                        Type *ArrayType) const override {
     return Impl.getNumBytesToPadGlobalArray(Size, ArrayType);
   }
+
+  void collectKernelLaunchBounds(
+      const Function &F,
+      SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override {
+    Impl.collectKernelLaunchBounds(F, LB);
+  }
 };
 
 template <typename T>
 
@@ -1049,6 +1049,10 @@ class TargetTransformInfoImplBase {
     return 0;
   }
 
+  void collectKernelLaunchBounds(
+      const Function &F,
+      SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
+
 protected:
   // Obtain the minimum required size to hold the value (without the sign)
   // In case of a vector it returns the min required size for one element.
 
@@ -284,6 +284,18 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node<Function> {
     setValueSubclassData((getSubclassDataFromValue() & 0xc00f) | (ID << 4));
   }
 
+  /// Does it have a kernel calling convention?
+  bool hasKernelCallingConv() const {
+    switch (getCallingConv()) {
+    default:
+      return false;
+    case CallingConv::PTX_Kernel:
+    case CallingConv::AMDGPU_KERNEL:
+    case CallingConv::SPIR_KERNEL:
+      return true;
+    }
+  }
+
   enum ProfileCountType { PCT_Real, PCT_Synthetic };
 
   /// Class to represent profile counts.
 
@@ -19,6 +19,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/PGOOptions.h"
 #include "llvm/Target/CGPassBuilderOption.h"
@@ -28,6 +29,8 @@
 #include <string>
 #include <utility>
 
+extern llvm::cl::opt<bool> NoKernelInfoEndLTO;
+
 namespace llvm {
 
 class AAManager;
 
@@ -79,6 +79,7 @@ add_llvm_component_library(LLVMAnalysis
   InstructionPrecedenceTracking.cpp
   InstructionSimplify.cpp
   InteractiveModelRunner.cpp
+  KernelInfo.cpp
   LastRunTrackingAnalysis.cpp
   LazyBranchProbabilityInfo.cpp
   LazyBlockFrequencyInfo.cpp