Skip to content

Commit

Permalink
Device Resident Tensors - API & Framework (#3745)
Browse files Browse the repository at this point in the history
Summary:
Taking over #3671, but spinning out the API and Glow-core level changes associated with the DRT plan in #3629. This does not implement DRT support on any device.

Documentation: See #3629.
Pull Request resolved: #3745

Test Plan: Ran tests, added two simple new sanity checks to DeviceManagerTest. The first `DeviceResidentTensors` should run only for backends that support resident tensors (none currently). The second `CanHandleDeviceResidentTensors` should run on all devices.

Differential Revision: D18378905

Pulled By: nickgg

fbshipit-source-id: 887c290dae5a6b9b75e9b41a415958d499bc5402
  • Loading branch information
mortzur authored and facebook-github-bot committed Nov 13, 2019
1 parent ec46f24 commit eb48b31
Show file tree
Hide file tree
Showing 21 changed files with 464 additions and 66 deletions.
30 changes: 29 additions & 1 deletion include/glow/Backends/DeviceManager.h
Expand Up @@ -17,6 +17,7 @@
#define GLOW_BACKENDS_DEVICEMANAGER_H

#include "glow/Backend/CompiledFunction.h"
#include "glow/Base/DeviceTensorTransferManager.h"
#include "glow/ExecutionContext/ExecutionContext.h"
#include "glow/Graph/Graph.h"
#include "glow/Runtime/RuntimeTypes.h"
Expand All @@ -42,7 +43,7 @@ using ReadyCBTy = std::function<void(const Module *, Error)>;
using FunctionMapTy = std::map<std::string, CompiledFunction *>;

/// Interface managing a specific instance of a device.
class DeviceManager {
class DeviceManager : public DeviceTensorTransferManager {
protected:
/// Configuration object for the device.
DeviceConfig config_;
Expand Down Expand Up @@ -162,6 +163,33 @@ class DeviceManager {
/// \returns the DeviceInfo for this device containing peak limits for
/// compute and bandwidths (used in partitioning).
virtual DeviceInfo getDeviceInfo() const { return DeviceInfo(); }

/// Copies the contents of \p tensor from the host to the \p location
/// address on this device. Updates the tensor residency info.
virtual void transferToDevice(Tensor &tensor, void *locationContext,
std::function<void(Error)> resultCB =
[](Error) {}) {
DCHECK("Not Implemented");
resultCB(MAKE_ERR(ErrorValue::ErrorCode::DEVICE_FEATURE_NOT_SUPPORTED,
"Direct transfer not supported on this device"));
}

/// Copies the device buffer associated with \p tensor to the host.
/// The tensor must be resident on this device. If \p release is true,
/// frees the device memory. Updates the tensor residency info.
virtual void transferFromDevice(Tensor &tensor, bool release = true,
std::function<void(Error)> resultCB =
[](Error) {}) {
DCHECK("Not Implemented");
resultCB(MAKE_ERR(ErrorValue::ErrorCode::DEVICE_FEATURE_NOT_SUPPORTED,
"Direct transfer not supported on this device"));
}

/// Releases the device buffer associated with \p tensor.
virtual bool releaseDeviceTensor(void *locationContext) {
DCHECK("Not Implemented");
return false;
}
};

} // namespace runtime
Expand Down
50 changes: 50 additions & 0 deletions include/glow/Base/DeviceTensorTransferManager.h
@@ -0,0 +1,50 @@
/**
* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef GLOW_BASE_DEVICETENSORTRANSFERMANAGER_H
#define GLOW_BASE_DEVICETENSORTRANSFERMANAGER_H

#include "glow/Base/Tensor.h"
#include "glow/Support/Error.h"

#include <functional>

namespace glow {

class Tensor;

class DeviceTensorTransferManager {
public:
virtual ~DeviceTensorTransferManager() {}
/// Copies the contents of \p tensor from the host to the \p location address
/// on this device. Updates the tensor residency info.
virtual void transferToDevice(Tensor &tensor, void *locationContext = nullptr,
std::function<void(Error)> resultCB =
[](Error) {}) = 0;

/// Copies the device buffer associated with \p tensor to the host.
/// The tensor must be resident on this device. If \p release is true, frees
/// the device memory. Updates the tensor residency info.
virtual void transferFromDevice(Tensor &tensor, bool release = true,
std::function<void(Error)> resultCB =
[](Error) {}) = 0;

/// Releases the device buffer associated with \p tensor.
virtual bool releaseDeviceTensor(void *locationContext) = 0;
};

} // namespace glow

#endif // GLOW_BASE_DEVICETENSORTRANSFERMANAGER_H
132 changes: 131 additions & 1 deletion include/glow/Base/Tensor.h
Expand Up @@ -20,6 +20,7 @@
#include <cassert>
#include <vector>

#include "glow/Base/DeviceTensorTransferManager.h"
#include "glow/Base/Type.h"
#include "glow/Support/Compiler.h"
#include "glow/Support/Memory.h"
Expand Down Expand Up @@ -48,6 +49,71 @@ void genericTranspose(const Tensor *src, Tensor *dest,
/// returned dims. For example, input {2,1,4} would result in {2,1,4,1,1,1}.
ShapeVector expandDimsToMax(llvm::ArrayRef<size_t> currDims);

namespace runtime {
class DeviceManager;
}

/// Holds information regarding whether this Tensor exists in a device-specific
/// form, either resident or specific for a device, and what device holds it.
class DeviceResidencyInfo final {
enum class TensorResidency {
Host,
Device,
};

// A pointer to the device manager of the device on which the tensor
// resides.
DeviceTensorTransferManager *deviceManager_{nullptr};
/// The residency status of the tensor.
TensorResidency tensorResidency_{TensorResidency::Host};
// A pointer to a context structure, containing the required info to access
// tensor data and perform transfers.
void *locationContext_{nullptr};

public:
DeviceResidencyInfo()
: deviceManager_(nullptr), tensorResidency_(TensorResidency::Host),
locationContext_(nullptr) {}

/// Move ctor.
DeviceResidencyInfo(DeviceResidencyInfo &&other) = delete;

/// Move assignment operator.
DeviceResidencyInfo &operator=(DeviceResidencyInfo &&other) = delete;

~DeviceResidencyInfo() {
// If a tensor is device resident, let its device manager free the device
// buffer.
if (isDeviceResident()) {
deviceManager_->releaseDeviceTensor(locationContext_);
}
}

/// Removes all device specific state.
void clear() {
deviceManager_ = nullptr;
locationContext_ = nullptr;
tensorResidency_ = TensorResidency::Host;
}

/// \returns true if this Tensor is resident or specific for a device.
bool isDeviceResident() const {
assert((tensorResidency_ == TensorResidency::Host || deviceManager_) &&
"Device resident tensor must have an assigned device manager.");
return tensorResidency_ == TensorResidency::Device;
}

/// \returns the DeviceManager this tensor is resident on, if any.
DeviceTensorTransferManager *getDeviceManager() const {
return deviceManager_;
}

/// \returns the device specific location context for a resident Tensor.
void *getLocationContext() const { return locationContext_; }

friend class Tensor;
};

/// A class that represents a contiguous n-dimensional array (a tensor).
class Tensor final {
public:
Expand All @@ -71,6 +137,10 @@ class Tensor final {
/// The TensorPool that is managing this Tensor (if any).
TensorPool *tensorPool_{nullptr};

/// The device residency info accosiated with the tensor.
std::shared_ptr<DeviceResidencyInfo> residencyInfoP_{
new DeviceResidencyInfo()};

/// Size in bytes of the unpadded region memory. This is useful communicating
/// the actual size of the data, this allows for copying only inputs and not
/// padding to the device.
Expand Down Expand Up @@ -119,6 +189,7 @@ class Tensor final {
/// Set the content of the tensor to zero. If \p resetFusedScalesOffsets, then
/// fused scales/offsets will be set to 1.0/0.0 as well.
void zero(bool resetFusedScalesOffsets = false) {
assert(!isDeviceResident() && "Tensor must reside on host to access data.");
size_t size = actualSize();
// Quantized tensors should go to their offset.
switch (type_.getElementType()) {
Expand Down Expand Up @@ -298,7 +369,7 @@ class Tensor final {
unownedTensor.isUnowned_ = true;
unownedTensor.type_ = Type::newShape(getType(), dims);
unownedTensor.unpaddedSize_ = unpaddedSize_;

unownedTensor.residencyInfoP_ = residencyInfoP_;
if (offsets.size() == 0) {
assert(actualSize() == unownedTensor.actualSize() &&
"The size of the unowned tensor "
Expand All @@ -321,6 +392,7 @@ class Tensor final {
/// element to start a subview from.
Tensor getOwnedSlice(llvm::ArrayRef<size_t> dims,
llvm::ArrayRef<size_t> offsets = {}) const {
assert(!isDeviceResident() && "Tensor must reside on host to access data.");
return getUnowned(dims, offsets).clone();
}

Expand All @@ -341,6 +413,7 @@ class Tensor final {

/// Assigns a new shape to the tensor and allocates a new buffer.
void reset(const Type &T) {
assert(!isDeviceResident() && "Tensor must reside on host to access data.");
// If the new size is identical to the allocated size then there is no need
// to re-allocate the buffer.
if (type_ == T && getData()) {
Expand Down Expand Up @@ -390,6 +463,7 @@ class Tensor final {
std::swap(isUnowned_, other.isUnowned_);
std::swap(tensorPool_, other.tensorPool_);
std::swap(unpaddedSize_, other.unpaddedSize_);
std::swap(residencyInfoP_, other.residencyInfoP_);
}

/// Move assignment operator.
Expand All @@ -399,6 +473,7 @@ class Tensor final {
std::swap(isUnowned_, other.isUnowned_);
std::swap(tensorPool_, other.tensorPool_);
std::swap(unpaddedSize_, other.unpaddedSize_);
std::swap(residencyInfoP_, other.residencyInfoP_);
return *this;
}

Expand Down Expand Up @@ -429,6 +504,14 @@ class Tensor final {
/// elements exceeding allowed error; maximum error and location found; etc.).
bool isEqual(const Tensor &other, float allowedError = 0.0001,
bool verbose = true) const {
if (isDeviceResident()) {
if (!other.isDeviceResident()) {
return false;
}

return getDeviceManager() == other.getDeviceManager() &&
getLocationContext() == other.getLocationContext();
}
return isEqualImpl(other, /*isBitwise=*/false, allowedError, verbose);
}

Expand Down Expand Up @@ -513,6 +596,7 @@ class Tensor final {

/// Update the content and type of the tensor from the tensor \p t.
void assign(const Tensor *t) {
assert(!isDeviceResident() && "Tensor must reside on host to access data.");
assert(this != t && "Copying to self");
reset(t);
size_t bufferSize = type_.getSizeInBytes();
Expand All @@ -521,6 +605,7 @@ class Tensor final {

/// Update the raw data of the tensor from the tensor \p t.
void copyRawFrom(const Tensor *t) {
assert(!isDeviceResident() && "Tensor must reside on host to access data.");
assert(this != t && "Copying to self");
assert(actualSize() == t->actualSize());
assert(getElementType() == t->getElementType() && "Invalid element type");
Expand All @@ -531,6 +616,7 @@ class Tensor final {
/// Update the content of the tensor with a slice from tensor \p t. A slice
/// is one index from the first dimension of the tensor.
void copySlice(const Tensor *t, size_t slice) {
assert(!isDeviceResident() && "Tensor must reside on host to access data.");
auto dim = t->dims().slice(1);
(void)dim;
assert(dim == dims() && "Invalid slice size");
Expand All @@ -546,6 +632,7 @@ class Tensor final {
/// The copying operation may overlap the end of the tensor \p t one or more
/// times. This means that the data in the input tensor may be duplicated.
void copyConsecutiveSlices(const Tensor *t, size_t startSliceIdx) {
assert(!isDeviceResident() && "Tensor must reside on host to access data.");
auto onceSliceDim = t->dims().slice(1);
(void)onceSliceDim;
assert(onceSliceDim == dims().slice(1) && "Invalid slice size");
Expand All @@ -571,6 +658,7 @@ class Tensor final {
/// and cast them to DestElemType in this.
template <typename DestElemType, typename SrcElemType>
void copyWithCast(const Tensor *t) {
assert(!isDeviceResident() && "Tensor must reside on host to access data.");
static_assert(!std::is_same<DestElemType, SrcElemType>::value,
"Use copyRawFrom instead");
assert(this != t && "Copying to self");
Expand Down Expand Up @@ -599,11 +687,13 @@ class Tensor final {
/// Transpose the tensor \p src into the empty tensor \p dest. Shuffle the
/// axis based on the list \p shuffle, where each element is the src index.
void transpose(Tensor *dest, llvm::ArrayRef<unsigned_t> shuffle) const {
assert(!isDeviceResident() && "Tensor must reside on host to access data.");
genericTranspose(this, dest, shuffle);
}

/// Create a new copy of the current tensor.
Tensor clone() const {
assert(!isDeviceResident() && "Tensor must reside on host to access data.");
Tensor slice;
slice.assign(this);
return slice;
Expand All @@ -612,6 +702,40 @@ class Tensor final {
/// Return the raw unsafe pointer to the tensor payload.
char *getUnsafePtr() const { return getData(); }

/// \returns true if tensor data is stored on a device
bool isDeviceResident() const { return residencyInfoP_->isDeviceResident(); }

/// Update device residency info with new device manager and context
void moveToDevice(DeviceTensorTransferManager *deviceManager,
void *locationContext);

/// If device resident, copy Tensor contents back to host memory and release
/// associated device memory.
void ensureOnHost();

/// \returns the pointer to the device manager where the tensor resides.
DeviceTensorTransferManager *getDeviceManager() const {
assert(residencyInfoP_->isDeviceResident() &&
"Tensor must be device resident");
return residencyInfoP_->getDeviceManager();
}

/// \returns the pointer to the location context of where the tensor resides.
void *getLocationContext() const {
assert(residencyInfoP_->isDeviceResident() &&
"Tensor must be device resident");
return residencyInfoP_->getLocationContext();
}

/// Clears DeviceResidencyInfo.
/// Note that this does not affect the associated DeviceManager or device
/// memory.
void clearDeviceResidency() {
assert(residencyInfoP_->isDeviceResident() &&
"Tensor must be device resident");
residencyInfoP_->clear();
}

/// \return a new handle that points and manages this tensor.
template <class ElemTy = float> Handle<ElemTy> getHandle() &;

Expand All @@ -623,19 +747,22 @@ class Tensor final {
private:
/// \returns a pointer to the raw data, of type \p ElemTy.
template <class ElemTy> ElemTy *getRawDataPointer() {
assert(!isDeviceResident() && "Tensor must reside on host to access data.");
assert(type_.isType<ElemTy>() && "Asking for the wrong ptr type.");
return reinterpret_cast<ElemTy *>(data_);
}

/// \returns a const pointer to the raw data, of type \p ElemTy.
template <class ElemTy> const ElemTy *getRawDataPointer() const {
assert(!isDeviceResident() && "Tensor must reside on host to access data.");
assert(type_.isType<ElemTy>() && "Asking for the wrong ptr type.");
return reinterpret_cast<const ElemTy *>(data_);
}

template <class ElemTy>
bool isEqualImpl(const Tensor &other, float allowedError,
bool verbose) const {
assert(!isDeviceResident() && "Tensor must reside on host to access data.");
auto const *myData = getRawDataPointer<ElemTy>();
auto const *otherData = other.getRawDataPointer<ElemTy>();
double maxFoundError = 0.0;
Expand Down Expand Up @@ -668,6 +795,7 @@ class Tensor final {
}

bool isBitwiseEqualImpl(const Tensor &other) const {
assert(!isDeviceResident() && "Tensor must reside on host to access data.");
auto const *myData = getUnsafePtr();
auto const *otherData = other.getUnsafePtr();
for (size_t i = 0, e = getSizeInBytes(); i < e; i++) {
Expand Down Expand Up @@ -1283,11 +1411,13 @@ template <class ElemTy> class Handle final {
};

template <class ElemTy> Handle<ElemTy> Tensor::getHandle() & {
assert(!isDeviceResident() && "Tensor must reside on host to access data.");
assert(type_.isType<ElemTy>() && "Getting a handle to the wrong type.");
return Handle<ElemTy>(this);
}

template <class ElemTy> const Handle<ElemTy> Tensor::getHandle() const & {
assert(!isDeviceResident() && "Tensor must reside on host to access data.");
assert(type_.isType<ElemTy>() && "Getting a handle to the wrong type.");
return Handle<ElemTy>(const_cast<Tensor *>(this));
}
Expand Down

0 comments on commit eb48b31

Please sign in to comment.