[StreamExecutor] Add basic Stream operations

Summary: Add the Stream class and a few of the operations it supports. Reviewers: jlebar, tra Subscribers: jprice, parallel_libs-commits Differential Revision: https://reviews.llvm.org/D23333 llvm-svn: 278829
llvm · Aug 16, 2016 · 68b97c7 · 68b97c7
1 parent a3f4f08
commit 68b97c7
Show file tree

Hide file tree

Showing 16 changed files with 732 additions and 49 deletions.
diff --git a/parallel-libs/streamexecutor/CMakeLists.txt b/parallel-libs/streamexecutor/CMakeLists.txt
@@ -24,6 +24,18 @@ if(STREAM_EXECUTOR_STANDALONE)
     include_directories(${LLVM_INCLUDE_DIRS})
     add_definitions(${LLVM_DEFINITIONS})
 
+    # Get the LLVM cxxflags by using llvm-config.
+    #
+    # This is necessary to get -fno-rtti if LLVM is compiled that way.
+    execute_process(
+        COMMAND
+        "${LLVM_BINARY_DIR}/bin/llvm-config"
+        --cxxflags
+        OUTPUT_VARIABLE
+        LLVM_CXXFLAGS
+        OUTPUT_STRIP_TRAILING_WHITESPACE)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${LLVM_CXXFLAGS}")
+
     # Find the libraries that correspond to the LLVM components
     # that we wish to use
     llvm_map_components_to_libnames(llvm_libs support symbolize)

diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Interfaces.h b/parallel-libs/streamexecutor/include/streamexecutor/Interfaces.h
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/LaunchDimensions.h b/parallel-libs/streamexecutor/include/streamexecutor/LaunchDimensions.h
@@ -0,0 +1,47 @@
+//===-- LaunchDimensions.h - Kernel block and grid sizes --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Structures to hold sizes for blocks and grids which are used as parameters
+/// for kernel launches.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef STREAMEXECUTOR_LAUNCHDIMENSIONS_H
+#define STREAMEXECUTOR_LAUNCHDIMENSIONS_H
+
+namespace streamexecutor {
+
+/// The dimensions of a device block of execution.
+///
+/// A block is made up of an array of X by Y by Z threads.
+struct BlockDimensions {
+  BlockDimensions(unsigned X = 1, unsigned Y = 1, unsigned Z = 1)
+      : X(X), Y(Y), Z(Z) {}
+
+  unsigned X;
+  unsigned Y;
+  unsigned Z;
+};
+
+/// The dimensions of a device grid of execution.
+///
+/// A grid is made up of an array of X by Y by Z blocks.
+struct GridDimensions {
+  GridDimensions(unsigned X = 1, unsigned Y = 1, unsigned Z = 1)
+      : X(X), Y(Y), Z(Z) {}
+
+  unsigned X;
+  unsigned Y;
+  unsigned Z;
+};
+
+} // namespace streamexecutor
+
+#endif // STREAMEXECUTOR_LAUNCHDIMENSIONS_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h b/parallel-libs/streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h
@@ -47,6 +47,12 @@
 /// efficiently, although it is probably more information than is needed for any
 /// specific platform.
 ///
+/// The PackedKernelArgumentArrayBase class has no template parameters, so it
+/// does not benefit from compile-time type checking. However, since it has no
+/// template parameters, it can be passed as an argument to virtual functions,
+/// and this allows it to be passed to functions that use virtual function
+/// overloading to handle platform-specific kernel launching.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef STREAMEXECUTOR_PACKEDKERNELARGUMENTARRAY_H
@@ -64,39 +70,81 @@ enum class KernelArgumentType {
   SHARED_DEVICE_MEMORY  /// Shared device memory argument.
 };
 
-/// An array of packed kernel arguments.
-template <typename... ParameterTs> class PackedKernelArgumentArray {
+/// An array of packed kernel arguments without compile-time type information.
+///
+/// This un-templated base class is useful because packed kernel arguments must
+/// at some point be passed to a virtual function that performs
+/// platform-specific kernel launches. Such a virtual function cannot be
+/// templated to handle all specializations of the
+/// PackedKernelArgumentArray<...> class template, so, instead, references to
+/// PackedKernelArgumentArray<...> are passed as references to this base class.
+class PackedKernelArgumentArrayBase {
 public:
-  /// Constructs an instance by packing the specified arguments.
-  PackedKernelArgumentArray(const ParameterTs &... Arguments)
-      : SharedCount(0u) {
-    PackArguments(0, Arguments...);
-  }
+  virtual ~PackedKernelArgumentArrayBase();
 
   /// Gets the number of packed arguments.
-  size_t getArgumentCount() const { return sizeof...(ParameterTs); }
+  size_t getArgumentCount() const { return ArgumentCount; }
 
   /// Gets the address of the argument at the given index.
-  const void *getAddress(size_t Index) const { return Addresses[Index]; }
+  const void *getAddress(size_t Index) const { return AddressesData[Index]; }
 
   /// Gets the size of the argument at the given index.
-  size_t getSize(size_t Index) const { return Sizes[Index]; }
+  size_t getSize(size_t Index) const { return SizesData[Index]; }
 
   /// Gets the type of the argument at the given index.
-  KernelArgumentType getType(size_t Index) const { return Types[Index]; }
+  KernelArgumentType getType(size_t Index) const { return TypesData[Index]; }
 
   /// Gets a pointer to the address array.
-  const void *const *getAddresses() const { return Addresses.data(); }
+  const void *const *getAddresses() const { return AddressesData; }
 
   /// Gets a pointer to the sizes array.
-  const size_t *getSizes() const { return Sizes.data(); }
+  const size_t *getSizes() const { return SizesData; }
 
   /// Gets a pointer to the types array.
-  const KernelArgumentType *getTypes() const { return Types.data(); }
+  const KernelArgumentType *getTypes() const { return TypesData; }
 
   /// Gets the number of shared device memory arguments.
   size_t getSharedCount() const { return SharedCount; }
 
+protected:
+  PackedKernelArgumentArrayBase(size_t ArgumentCount)
+      : ArgumentCount(ArgumentCount), SharedCount(0u) {}
+
+  size_t ArgumentCount;
+  size_t SharedCount;
+  const void *const *AddressesData;
+  size_t *SizesData;
+  KernelArgumentType *TypesData;
+};
+
+/// An array of packed kernel arguments with compile-time type information.
+///
+/// This is used by the platform-independent StreamExecutor code to pack
+/// arguments in a compile-time type-safe way. In order to actually launch a
+/// kernel on a specific platform, however, a reference to this class will have
+/// to be passed to a virtual, platform-specific kernel launch function. Such a
+/// reference will be passed as a reference to the base class rather than a
+/// reference to this subclass itself because a virtual function cannot be
+/// templated in such a way to maintain the template parameter types of the
+/// subclass.
+template <typename... ParameterTs>
+class PackedKernelArgumentArray : public PackedKernelArgumentArrayBase {
+public:
+  /// Constructs an instance by packing the specified arguments.
+  ///
+  /// Rather than using this constructor directly, consider using the
+  /// make_kernel_argument_pack function instead, to get the compiler to infer
+  /// the parameter types for you.
+  PackedKernelArgumentArray(const ParameterTs &... Arguments)
+      : PackedKernelArgumentArrayBase(sizeof...(ParameterTs)) {
+    AddressesData = Addresses.data();
+    SizesData = Sizes.data();
+    TypesData = Types.data();
+    PackArguments(0, Arguments...);
+  }
+
+  ~PackedKernelArgumentArray() override = default;
+
 private:
   // Base case for PackArguments when there are no arguments to pack.
   void PackArguments(size_t) {}
@@ -215,7 +263,6 @@ template <typename... ParameterTs> class PackedKernelArgumentArray {
   std::array<const void *, sizeof...(ParameterTs)> Addresses;
   std::array<size_t, sizeof...(ParameterTs)> Sizes;
   std::array<KernelArgumentType, sizeof...(ParameterTs)> Types;
-  size_t SharedCount;
 };
 
 // Utility template function to call the PackedKernelArgumentArray constructor

diff --git a/parallel-libs/streamexecutor/include/streamexecutor/PlatformInterfaces.h b/parallel-libs/streamexecutor/include/streamexecutor/PlatformInterfaces.h
@@ -0,0 +1,108 @@
+//===-- PlatformInterfaces.h - Interfaces to platform impls -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Interfaces to platform-specific implementations.
+///
+/// The general pattern is that the functions in these interfaces take raw
+/// handle types as parameters. This means that these types and functions are
+/// not intended for public use. Instead, corresponding methods in public types
+/// like Stream, StreamExecutor, and Kernel use C++ templates to create
+/// type-safe public interfaces. Those public functions do the type-unsafe work
+/// of extracting raw handles from their arguments and forwarding those handles
+/// to the methods defined in this file in the proper format.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef STREAMEXECUTOR_PLATFORMINTERFACES_H
+#define STREAMEXECUTOR_PLATFORMINTERFACES_H
+
+#include "streamexecutor/DeviceMemory.h"
+#include "streamexecutor/Kernel.h"
+#include "streamexecutor/LaunchDimensions.h"
+#include "streamexecutor/PackedKernelArgumentArray.h"
+#include "streamexecutor/Utils/Error.h"
+
+namespace streamexecutor {
+
+class PlatformStreamExecutor;
+
+/// Methods supported by device kernel function objects on all platforms.
+class KernelInterface {
+  // TODO(jhen): Add methods.
+};
+
+/// Platform-specific stream handle.
+class PlatformStreamHandle {
+public:
+  explicit PlatformStreamHandle(PlatformStreamExecutor *Executor)
+      : Executor(Executor) {}
+
+  virtual ~PlatformStreamHandle();
+
+  PlatformStreamExecutor *getExecutor() { return Executor; }
+
+private:
+  PlatformStreamExecutor *Executor;
+};
+
+/// Raw executor methods that must be implemented by each platform.
+///
+/// This class defines the platform interface that supports executing work on a
+/// device.
+///
+/// The public StreamExecutor and Stream classes have the type-safe versions of
+/// the functions in this interface.
+class PlatformStreamExecutor {
+public:
+  virtual ~PlatformStreamExecutor();
+
+  virtual std::string getName() const = 0;
+
+  /// Creates a platform-specific stream.
+  virtual Expected<std::unique_ptr<PlatformStreamHandle>> createStream() = 0;
+
+  /// Launches a kernel on the given stream.
+  virtual Error launch(PlatformStreamHandle *S, BlockDimensions BlockSize,
+                       GridDimensions GridSize, const KernelBase &Kernel,
+                       const PackedKernelArgumentArrayBase &ArgumentArray) {
+    return make_error("launch not implemented for platform " + getName());
+  }
+
+  /// Copies data from the device to the host.
+  virtual Error memcpyD2H(PlatformStreamHandle *S,
+                          const GlobalDeviceMemoryBase &DeviceSrc,
+                          void *HostDst, size_t ByteCount) {
+    return make_error("memcpyD2H not implemented for platform " + getName());
+  }
+
+  /// Copies data from the host to the device.
+  virtual Error memcpyH2D(PlatformStreamHandle *S, const void *HostSrc,
+                          GlobalDeviceMemoryBase *DeviceDst, size_t ByteCount) {
+    return make_error("memcpyH2D not implemented for platform " + getName());
+  }
+
+  /// Copies data from one device location to another.
+  virtual Error memcpyD2D(PlatformStreamHandle *S,
+                          const GlobalDeviceMemoryBase &DeviceSrc,
+                          GlobalDeviceMemoryBase *DeviceDst, size_t ByteCount) {
+    return make_error("memcpyD2D not implemented for platform " + getName());
+  }
+
+  /// Blocks the host until the given stream completes all the work enqueued up
+  /// to the point this function is called.
+  virtual Error blockHostUntilDone(PlatformStreamHandle *S) {
+    return make_error("blockHostUntilDone not implemented for platform " +
+                      getName());
+  }
+};
+
+} // namespace streamexecutor
+
+#endif // STREAMEXECUTOR_PLATFORMINTERFACES_H