Skip to content

Commit

Permalink
[StreamExecutor] Add basic Stream operations
Browse files Browse the repository at this point in the history
Summary: Add the Stream class and a few of the operations it supports.

Reviewers: jlebar, tra

Subscribers: jprice, parallel_libs-commits

Differential Revision: https://reviews.llvm.org/D23333

llvm-svn: 278829
  • Loading branch information
henline committed Aug 16, 2016
1 parent a3f4f08 commit 68b97c7
Show file tree
Hide file tree
Showing 16 changed files with 732 additions and 49 deletions.
12 changes: 12 additions & 0 deletions parallel-libs/streamexecutor/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,18 @@ if(STREAM_EXECUTOR_STANDALONE)
include_directories(${LLVM_INCLUDE_DIRS})
add_definitions(${LLVM_DEFINITIONS})

# Get the LLVM cxxflags by using llvm-config.
#
# This is necessary to get -fno-rtti if LLVM is compiled that way.
execute_process(
COMMAND
"${LLVM_BINARY_DIR}/bin/llvm-config"
--cxxflags
OUTPUT_VARIABLE
LLVM_CXXFLAGS
OUTPUT_STRIP_TRAILING_WHITESPACE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${LLVM_CXXFLAGS}")

# Find the libraries that correspond to the LLVM components
# that we wish to use
llvm_map_components_to_libnames(llvm_libs support symbolize)
Expand Down
29 changes: 0 additions & 29 deletions parallel-libs/streamexecutor/include/streamexecutor/Interfaces.h

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
//===-- LaunchDimensions.h - Kernel block and grid sizes --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Structures to hold sizes for blocks and grids which are used as parameters
/// for kernel launches.
///
//===----------------------------------------------------------------------===//

#ifndef STREAMEXECUTOR_LAUNCHDIMENSIONS_H
#define STREAMEXECUTOR_LAUNCHDIMENSIONS_H

namespace streamexecutor {

/// The dimensions of a device block of execution.
///
/// A block is made up of an array of X by Y by Z threads.
struct BlockDimensions {
BlockDimensions(unsigned X = 1, unsigned Y = 1, unsigned Z = 1)
: X(X), Y(Y), Z(Z) {}

unsigned X;
unsigned Y;
unsigned Z;
};

/// The dimensions of a device grid of execution.
///
/// A grid is made up of an array of X by Y by Z blocks.
struct GridDimensions {
GridDimensions(unsigned X = 1, unsigned Y = 1, unsigned Z = 1)
: X(X), Y(Y), Z(Z) {}

unsigned X;
unsigned Y;
unsigned Z;
};

} // namespace streamexecutor

#endif // STREAMEXECUTOR_LAUNCHDIMENSIONS_H
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@
/// efficiently, although it is probably more information than is needed for any
/// specific platform.
///
/// The PackedKernelArgumentArrayBase class has no template parameters, so it
/// does not benefit from compile-time type checking. However, since it has no
/// template parameters, it can be passed as an argument to virtual functions,
/// and this allows it to be passed to functions that use virtual function
/// overloading to handle platform-specific kernel launching.
///
//===----------------------------------------------------------------------===//

#ifndef STREAMEXECUTOR_PACKEDKERNELARGUMENTARRAY_H
Expand All @@ -64,39 +70,81 @@ enum class KernelArgumentType {
SHARED_DEVICE_MEMORY /// Shared device memory argument.
};

/// An array of packed kernel arguments.
template <typename... ParameterTs> class PackedKernelArgumentArray {
/// An array of packed kernel arguments without compile-time type information.
///
/// This un-templated base class is useful because packed kernel arguments must
/// at some point be passed to a virtual function that performs
/// platform-specific kernel launches. Such a virtual function cannot be
/// templated to handle all specializations of the
/// PackedKernelArgumentArray<...> class template, so, instead, references to
/// PackedKernelArgumentArray<...> are passed as references to this base class.
class PackedKernelArgumentArrayBase {
public:
/// Constructs an instance by packing the specified arguments.
PackedKernelArgumentArray(const ParameterTs &... Arguments)
: SharedCount(0u) {
PackArguments(0, Arguments...);
}
virtual ~PackedKernelArgumentArrayBase();

/// Gets the number of packed arguments.
size_t getArgumentCount() const { return sizeof...(ParameterTs); }
size_t getArgumentCount() const { return ArgumentCount; }

/// Gets the address of the argument at the given index.
const void *getAddress(size_t Index) const { return Addresses[Index]; }
const void *getAddress(size_t Index) const { return AddressesData[Index]; }

/// Gets the size of the argument at the given index.
size_t getSize(size_t Index) const { return Sizes[Index]; }
size_t getSize(size_t Index) const { return SizesData[Index]; }

/// Gets the type of the argument at the given index.
KernelArgumentType getType(size_t Index) const { return Types[Index]; }
KernelArgumentType getType(size_t Index) const { return TypesData[Index]; }

/// Gets a pointer to the address array.
const void *const *getAddresses() const { return Addresses.data(); }
const void *const *getAddresses() const { return AddressesData; }

/// Gets a pointer to the sizes array.
const size_t *getSizes() const { return Sizes.data(); }
const size_t *getSizes() const { return SizesData; }

/// Gets a pointer to the types array.
const KernelArgumentType *getTypes() const { return Types.data(); }
const KernelArgumentType *getTypes() const { return TypesData; }

/// Gets the number of shared device memory arguments.
size_t getSharedCount() const { return SharedCount; }

protected:
PackedKernelArgumentArrayBase(size_t ArgumentCount)
: ArgumentCount(ArgumentCount), SharedCount(0u) {}

size_t ArgumentCount;
size_t SharedCount;
const void *const *AddressesData;
size_t *SizesData;
KernelArgumentType *TypesData;
};

/// An array of packed kernel arguments with compile-time type information.
///
/// This is used by the platform-independent StreamExecutor code to pack
/// arguments in a compile-time type-safe way. In order to actually launch a
/// kernel on a specific platform, however, a reference to this class will have
/// to be passed to a virtual, platform-specific kernel launch function. Such a
/// reference will be passed as a reference to the base class rather than a
/// reference to this subclass itself because a virtual function cannot be
/// templated in such a way to maintain the template parameter types of the
/// subclass.
template <typename... ParameterTs>
class PackedKernelArgumentArray : public PackedKernelArgumentArrayBase {
public:
/// Constructs an instance by packing the specified arguments.
///
/// Rather than using this constructor directly, consider using the
/// make_kernel_argument_pack function instead, to get the compiler to infer
/// the parameter types for you.
PackedKernelArgumentArray(const ParameterTs &... Arguments)
: PackedKernelArgumentArrayBase(sizeof...(ParameterTs)) {
AddressesData = Addresses.data();
SizesData = Sizes.data();
TypesData = Types.data();
PackArguments(0, Arguments...);
}

~PackedKernelArgumentArray() override = default;

private:
// Base case for PackArguments when there are no arguments to pack.
void PackArguments(size_t) {}
Expand Down Expand Up @@ -215,7 +263,6 @@ template <typename... ParameterTs> class PackedKernelArgumentArray {
std::array<const void *, sizeof...(ParameterTs)> Addresses;
std::array<size_t, sizeof...(ParameterTs)> Sizes;
std::array<KernelArgumentType, sizeof...(ParameterTs)> Types;
size_t SharedCount;
};

// Utility template function to call the PackedKernelArgumentArray constructor
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
//===-- PlatformInterfaces.h - Interfaces to platform impls -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Interfaces to platform-specific implementations.
///
/// The general pattern is that the functions in these interfaces take raw
/// handle types as parameters. This means that these types and functions are
/// not intended for public use. Instead, corresponding methods in public types
/// like Stream, StreamExecutor, and Kernel use C++ templates to create
/// type-safe public interfaces. Those public functions do the type-unsafe work
/// of extracting raw handles from their arguments and forwarding those handles
/// to the methods defined in this file in the proper format.
///
//===----------------------------------------------------------------------===//

#ifndef STREAMEXECUTOR_PLATFORMINTERFACES_H
#define STREAMEXECUTOR_PLATFORMINTERFACES_H

#include "streamexecutor/DeviceMemory.h"
#include "streamexecutor/Kernel.h"
#include "streamexecutor/LaunchDimensions.h"
#include "streamexecutor/PackedKernelArgumentArray.h"
#include "streamexecutor/Utils/Error.h"

namespace streamexecutor {

class PlatformStreamExecutor;

/// Methods supported by device kernel function objects on all platforms.
class KernelInterface {
// TODO(jhen): Add methods.
};

/// Platform-specific stream handle.
class PlatformStreamHandle {
public:
explicit PlatformStreamHandle(PlatformStreamExecutor *Executor)
: Executor(Executor) {}

virtual ~PlatformStreamHandle();

PlatformStreamExecutor *getExecutor() { return Executor; }

private:
PlatformStreamExecutor *Executor;
};

/// Raw executor methods that must be implemented by each platform.
///
/// This class defines the platform interface that supports executing work on a
/// device.
///
/// The public StreamExecutor and Stream classes have the type-safe versions of
/// the functions in this interface.
class PlatformStreamExecutor {
public:
virtual ~PlatformStreamExecutor();

virtual std::string getName() const = 0;

/// Creates a platform-specific stream.
virtual Expected<std::unique_ptr<PlatformStreamHandle>> createStream() = 0;

/// Launches a kernel on the given stream.
virtual Error launch(PlatformStreamHandle *S, BlockDimensions BlockSize,
GridDimensions GridSize, const KernelBase &Kernel,
const PackedKernelArgumentArrayBase &ArgumentArray) {
return make_error("launch not implemented for platform " + getName());
}

/// Copies data from the device to the host.
virtual Error memcpyD2H(PlatformStreamHandle *S,
const GlobalDeviceMemoryBase &DeviceSrc,
void *HostDst, size_t ByteCount) {
return make_error("memcpyD2H not implemented for platform " + getName());
}

/// Copies data from the host to the device.
virtual Error memcpyH2D(PlatformStreamHandle *S, const void *HostSrc,
GlobalDeviceMemoryBase *DeviceDst, size_t ByteCount) {
return make_error("memcpyH2D not implemented for platform " + getName());
}

/// Copies data from one device location to another.
virtual Error memcpyD2D(PlatformStreamHandle *S,
const GlobalDeviceMemoryBase &DeviceSrc,
GlobalDeviceMemoryBase *DeviceDst, size_t ByteCount) {
return make_error("memcpyD2D not implemented for platform " + getName());
}

/// Blocks the host until the given stream completes all the work enqueued up
/// to the point this function is called.
virtual Error blockHostUntilDone(PlatformStreamHandle *S) {
return make_error("blockHostUntilDone not implemented for platform " +
getName());
}
};

} // namespace streamexecutor

#endif // STREAMEXECUTOR_PLATFORMINTERFACES_H

0 comments on commit 68b97c7

Please sign in to comment.