Skip to content

Commit

Permalink
[SE] Add CUDA platform
Browse files Browse the repository at this point in the history
Summary:
Basic CUDA platform implementation and cmake infrastructure to control
whether it's used. A few important TODOs will be handled in later
patches:

* Log some error messages that can't easily be returned as Errors.
* Cache modules and kernels to prevent reloading them if someone tries to
  reload a kernel that's already loaded.
* Tolerate shared memory arguments for kernel launches.

Reviewers: jlebar

Subscribers: beanz, mgorny, jprice, jlebar, parallel_libs-commits

Differential Revision: https://reviews.llvm.org/D24538

llvm-svn: 281524
  • Loading branch information
henline committed Sep 14, 2016
1 parent d56a27e commit 6bfc863
Show file tree
Hide file tree
Showing 13 changed files with 596 additions and 17 deletions.
5 changes: 5 additions & 0 deletions parallel-libs/streamexecutor/CMakeLists.txt
Expand Up @@ -3,9 +3,14 @@ cmake_minimum_required(VERSION 3.1)
option(STREAM_EXECUTOR_UNIT_TESTS "enable unit tests" ON)
option(STREAM_EXECUTOR_ENABLE_DOXYGEN "enable StreamExecutor doxygen" ON)
option(STREAM_EXECUTOR_ENABLE_CONFIG_TOOL "enable building streamexecutor-config tool" ON)
option(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM "enable building the CUDA StreamExecutor platform" OFF)

configure_file("include/streamexecutor/PlatformOptions.h.in" "include/streamexecutor/PlatformOptions.h")

# First find includes relative to the streamexecutor top-level source path.
include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/include)
# Also look for configured headers in the top-level binary directory.
include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/include)

# If we are not building as part of LLVM, build StreamExecutor as a standalone
# project using LLVM as an external library:
Expand Down
Expand Up @@ -37,33 +37,38 @@ class PlatformDevice {

virtual std::string getName() const = 0;

virtual std::string getPlatformName() const = 0;

/// Creates a platform-specific kernel.
virtual Expected<const void *>
createKernel(const MultiKernelLoaderSpec &Spec) {
return make_error("createKernel not implemented for platform " + getName());
return make_error("createKernel not implemented for platform " +
getPlatformName());
}

virtual Error destroyKernel(const void *Handle) {
return make_error("destroyKernel not implemented for platform " +
getName());
getPlatformName());
}

/// Creates a platform-specific stream.
virtual Expected<const void *> createStream() {
return make_error("createStream not implemented for platform " + getName());
return make_error("createStream not implemented for platform " +
getPlatformName());
}

virtual Error destroyStream(const void *Handle) {
return make_error("destroyStream not implemented for platform " +
getName());
getPlatformName());
}

/// Launches a kernel on the given stream.
virtual Error launch(const void *PlatformStreamHandle,
BlockDimensions BlockSize, GridDimensions GridSize,
const void *PKernelHandle,
const PackedKernelArgumentArrayBase &ArgumentArray) {
return make_error("launch not implemented for platform " + getName());
return make_error("launch not implemented for platform " +
getPlatformName());
}

/// Copies data from the device to the host.
Expand All @@ -72,7 +77,8 @@ class PlatformDevice {
virtual Error copyD2H(const void *PlatformStreamHandle,
const void *DeviceSrcHandle, size_t SrcByteOffset,
void *HostDst, size_t DstByteOffset, size_t ByteCount) {
return make_error("copyD2H not implemented for platform " + getName());
return make_error("copyD2H not implemented for platform " +
getPlatformName());
}

/// Copies data from the host to the device.
Expand All @@ -81,47 +87,49 @@ class PlatformDevice {
virtual Error copyH2D(const void *PlatformStreamHandle, const void *HostSrc,
size_t SrcByteOffset, const void *DeviceDstHandle,
size_t DstByteOffset, size_t ByteCount) {
return make_error("copyH2D not implemented for platform " + getName());
return make_error("copyH2D not implemented for platform " +
getPlatformName());
}

/// Copies data from one device location to another.
virtual Error copyD2D(const void *PlatformStreamHandle,
const void *DeviceSrcHandle, size_t SrcByteOffset,
const void *DeviceDstHandle, size_t DstByteOffset,
size_t ByteCount) {
return make_error("copyD2D not implemented for platform " + getName());
return make_error("copyD2D not implemented for platform " +
getPlatformName());
}

/// Blocks the host until the given stream completes all the work enqueued up
/// to the point this function is called.
virtual Error blockHostUntilDone(const void *PlatformStreamHandle) {
return make_error("blockHostUntilDone not implemented for platform " +
getName());
getPlatformName());
}

/// Allocates untyped device memory of a given size in bytes.
virtual Expected<void *> allocateDeviceMemory(size_t ByteCount) {
return make_error("allocateDeviceMemory not implemented for platform " +
getName());
getPlatformName());
}

/// Frees device memory previously allocated by allocateDeviceMemory.
virtual Error freeDeviceMemory(const void *Handle) {
return make_error("freeDeviceMemory not implemented for platform " +
getName());
getPlatformName());
}

/// Registers previously allocated host memory so it can be used with copyH2D
/// and copyD2H.
virtual Error registerHostMemory(void *Memory, size_t ByteCount) {
return make_error("registerHostMemory not implemented for platform " +
getName());
getPlatformName());
}

/// Unregisters host memory previously registered with registerHostMemory.
virtual Error unregisterHostMemory(const void *Memory) {
return make_error("unregisterHostMemory not implemented for platform " +
getName());
getPlatformName());
}

/// Copies the given number of bytes from device memory to host memory.
Expand All @@ -133,7 +141,7 @@ class PlatformDevice {
size_t SrcByteOffset, void *HostDst,
size_t DstByteOffset, size_t ByteCount) {
return make_error("synchronousCopyD2H not implemented for platform " +
getName());
getPlatformName());
}

/// Similar to synchronousCopyD2H(const void *, size_t, void
Expand All @@ -143,7 +151,7 @@ class PlatformDevice {
const void *DeviceDstHandle,
size_t DstByteOffset, size_t ByteCount) {
return make_error("synchronousCopyH2D not implemented for platform " +
getName());
getPlatformName());
}

/// Similar to synchronousCopyD2H(const void *, size_t, void
Expand All @@ -154,7 +162,7 @@ class PlatformDevice {
const void *DeviceDstHandle,
size_t DstByteOffset, size_t ByteCount) {
return make_error("synchronousCopyD2D not implemented for platform " +
getName());
getPlatformName());
}
};

Expand Down
@@ -0,0 +1,23 @@
//===-- PlatformOptions.h - Platform option macros --------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This contents of this file are filled in at configuration time. This file
/// defines macros that represent the platform configuration state of the build,
/// e.g. which platforms are enabled.
///
//===----------------------------------------------------------------------===//


#ifndef STREAMEXECUTOR_PLATFORMOPTIONS_H
#define STREAMEXECUTOR_PLATFORMOPTIONS_H

#cmakedefine STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM

#endif // STREAMEXECUTOR_PLATFORMOPTIONS_H
@@ -0,0 +1,42 @@
//===-- CUDAPlatform.h - CUDA platform subclass -----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Declaration of the CUDAPlatform class.
///
//===----------------------------------------------------------------------===//

#ifndef STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORM_H
#define STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORM_H

#include "streamexecutor/Platform.h"
#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h"

#include "llvm/Support/Mutex.h"

#include <map>

namespace streamexecutor {
namespace cuda {

class CUDAPlatform : public Platform {
public:
size_t getDeviceCount() const override;

Expected<Device> getDevice(size_t DeviceIndex) override;

private:
llvm::sys::Mutex Mutex;
std::map<size_t, CUDAPlatformDevice> PlatformDevices;
};

} // namespace cuda
} // namespace streamexecutor

#endif // STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORM_H
@@ -0,0 +1,93 @@
//===-- CUDAPlatformDevice.h - CUDAPlatformDevice class ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Declaration of the CUDAPlatformDevice class.
///
//===----------------------------------------------------------------------===//

#ifndef STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORMDEVICE_H
#define STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORMDEVICE_H

#include "streamexecutor/PlatformDevice.h"

namespace streamexecutor {
namespace cuda {

Error CUresultToError(int CUResult, const llvm::Twine &Message);

class CUDAPlatformDevice : public PlatformDevice {
public:
static Expected<CUDAPlatformDevice> create(size_t DeviceIndex);

CUDAPlatformDevice(const CUDAPlatformDevice &) = delete;
CUDAPlatformDevice &operator=(const CUDAPlatformDevice &) = delete;

CUDAPlatformDevice(CUDAPlatformDevice &&) noexcept;
CUDAPlatformDevice &operator=(CUDAPlatformDevice &&) noexcept;

~CUDAPlatformDevice() override;

std::string getName() const override;

std::string getPlatformName() const override { return "CUDA"; }

Expected<const void *>
createKernel(const MultiKernelLoaderSpec &Spec) override;
Error destroyKernel(const void *Handle) override;

Expected<const void *> createStream() override;
Error destroyStream(const void *Handle) override;

Error launch(const void *PlatformStreamHandle, BlockDimensions BlockSize,
GridDimensions GridSize, const void *PKernelHandle,
const PackedKernelArgumentArrayBase &ArgumentArray) override;

Error copyD2H(const void *PlatformStreamHandle, const void *DeviceSrcHandle,
size_t SrcByteOffset, void *HostDst, size_t DstByteOffset,
size_t ByteCount) override;

Error copyH2D(const void *PlatformStreamHandle, const void *HostSrc,
size_t SrcByteOffset, const void *DeviceDstHandle,
size_t DstByteOffset, size_t ByteCount) override;

Error copyD2D(const void *PlatformStreamHandle, const void *DeviceSrcHandle,
size_t SrcByteOffset, const void *DeviceDstHandle,
size_t DstByteOffset, size_t ByteCount) override;

Error blockHostUntilDone(const void *PlatformStreamHandle) override;

Expected<void *> allocateDeviceMemory(size_t ByteCount) override;
Error freeDeviceMemory(const void *Handle) override;

Error registerHostMemory(void *Memory, size_t ByteCount) override;
Error unregisterHostMemory(const void *Memory) override;

Error synchronousCopyD2H(const void *DeviceSrcHandle, size_t SrcByteOffset,
void *HostDst, size_t DstByteOffset,
size_t ByteCount) override;

Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset,
const void *DeviceDstHandle, size_t DstByteOffset,
size_t ByteCount) override;

Error synchronousCopyD2D(const void *DeviceDstHandle, size_t DstByteOffset,
const void *DeviceSrcHandle, size_t SrcByteOffset,
size_t ByteCount) override;

private:
CUDAPlatformDevice(size_t DeviceIndex) : DeviceIndex(DeviceIndex) {}

int DeviceIndex;
};

} // namespace cuda
} // namespace streamexecutor

#endif // STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORMDEVICE_H
Expand Up @@ -29,6 +29,8 @@ class HostPlatformDevice : public PlatformDevice {
public:
std::string getName() const override { return "host"; }

std::string getPlatformName() const override { return "host"; }

Expected<const void *>
createKernel(const MultiKernelLoaderSpec &Spec) override {
if (!Spec.hasHostFunction()) {
Expand Down
24 changes: 23 additions & 1 deletion parallel-libs/streamexecutor/lib/CMakeLists.txt
Expand Up @@ -3,6 +3,26 @@ macro(add_se_library name)
set_target_properties(${name} PROPERTIES FOLDER "streamexecutor libraries")
endmacro(add_se_library)

if(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM)
set(
CMAKE_MODULE_PATH
${CMAKE_MODULE_PATH}
"${CMAKE_CURRENT_SOURCE_DIR}/platforms/cuda/cmake/modules/")

find_package(Libcuda REQUIRED)
include_directories(${LIBCUDA_INCLUDE_DIRS})

set(
STREAM_EXECUTOR_CUDA_PLATFORM_TARGET_OBJECT
$<TARGET_OBJECTS:streamexecutor_cuda_platform>)

set(
STREAM_EXECUTOR_LIBCUDA_LIBRARIES
${LIBCUDA_LIBRARIES})
endif(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM)

add_subdirectory(platforms)

add_se_library(
streamexecutor
Device.cpp
Expand All @@ -16,6 +36,8 @@ add_se_library(
PlatformDevice.cpp
PlatformManager.cpp
Stream.cpp
)
${STREAM_EXECUTOR_CUDA_PLATFORM_TARGET_OBJECT}
LINK_LIBS
${STREAM_EXECUTOR_LIBCUDA_LIBRARIES})

install(TARGETS streamexecutor DESTINATION lib)
10 changes: 10 additions & 0 deletions parallel-libs/streamexecutor/lib/PlatformManager.cpp
Expand Up @@ -13,8 +13,14 @@
//===----------------------------------------------------------------------===//

#include "streamexecutor/PlatformManager.h"

#include "streamexecutor/PlatformOptions.h"
#include "streamexecutor/platforms/host/HostPlatform.h"

#ifdef STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM
#include "streamexecutor/platforms/cuda/CUDAPlatform.h"
#endif

namespace streamexecutor {

PlatformManager::PlatformManager() {
Expand All @@ -26,6 +32,10 @@ PlatformManager::PlatformManager() {
// themselves when they are loaded.

PlatformsByName.emplace("host", llvm::make_unique<host::HostPlatform>());

#ifdef STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM
PlatformsByName.emplace("cuda", llvm::make_unique<cuda::CUDAPlatform>());
#endif
}

Expected<Platform *> PlatformManager::getPlatformByName(llvm::StringRef Name) {
Expand Down
3 changes: 3 additions & 0 deletions parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt
@@ -0,0 +1,3 @@
if(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM)
add_subdirectory(cuda)
endif()
@@ -0,0 +1,5 @@
add_library(
streamexecutor_cuda_platform
OBJECT
CUDAPlatform.cpp
CUDAPlatformDevice.cpp)

0 comments on commit 6bfc863

Please sign in to comment.