From b8df0fa8a8df7dff33aeecaa8b49fe8457701751 Mon Sep 17 00:00:00 2001 From: Lucas Hosseini Date: Wed, 9 Sep 2020 15:12:25 +0200 Subject: [PATCH 01/12] Make Channel API accept tensor structs rather than raw pointers. + Introduce tensor.h defining the tensor struct(s). The `CpuTensor` struct is always defined, while the `CudaTensor` struct is defined only when `TENSORPIPE_HAS_CUDA` is true. + Update all channels to take a `CpuTensor` or `CudaTensor` for `send`/`recv` rather than a raw pointer and a length. + Make the base `Channel`/`Context` classes templated on `TTensor`, effectively creating two channel hierarchies (one for CPU channels, one for CUDA channels). + Update the Pipe and the generic channel tests to use the new API. So far, generic channel tests are CPU only, and tests for the CUDA IPC channel are (temporarily) disabled. A subsequent PR will take care of refactoring tests so that generic tests work for CUDA channels. An other PR will add support for CUDA tensors in the Pipe. [ghstack-poisoned] --- tensorpipe/channel/basic/channel.cc | 56 ++-- tensorpipe/channel/basic/channel.h | 13 +- tensorpipe/channel/basic/context.cc | 6 +- tensorpipe/channel/basic/context.h | 5 +- tensorpipe/channel/channel.h | 14 +- tensorpipe/channel/cma/channel.cc | 54 ++-- tensorpipe/channel/cma/channel.h | 12 +- tensorpipe/channel/cma/context.cc | 6 +- tensorpipe/channel/cma/context.h | 5 +- tensorpipe/channel/context.h | 9 +- tensorpipe/channel/cuda_ipc/channel.cc | 114 ++------ tensorpipe/channel/cuda_ipc/channel.h | 26 +- tensorpipe/channel/cuda_ipc/context.cc | 11 +- tensorpipe/channel/cuda_ipc/context.h | 4 +- tensorpipe/channel/mpt/channel.cc | 56 ++-- tensorpipe/channel/mpt/channel.h | 12 +- tensorpipe/channel/mpt/context.cc | 6 +- tensorpipe/channel/mpt/context.h | 5 +- tensorpipe/channel/registry.cc | 2 +- tensorpipe/channel/registry.h | 3 +- tensorpipe/channel/xth/channel.cc | 54 ++-- tensorpipe/channel/xth/channel.h | 12 +- tensorpipe/channel/xth/context.cc | 6 +- tensorpipe/channel/xth/context.h | 5 +- tensorpipe/common/tensor.h | 55 ++++ tensorpipe/core/context.cc | 16 +- tensorpipe/core/context.h | 10 +- tensorpipe/core/message.h | 12 +- tensorpipe/core/nop_types.h | 4 +- tensorpipe/core/pipe.cc | 59 ++-- tensorpipe/python/tensorpipe.cc | 24 +- tensorpipe/test/channel/basic/basic_test.cc | 4 +- tensorpipe/test/channel/channel_test.cc | 102 ++++--- tensorpipe/test/channel/channel_test.h | 29 +- tensorpipe/test/channel/cma/cma_test.cc | 4 +- .../test/channel/cuda_ipc/cuda_ipc_test.cc | 264 +++++++++--------- tensorpipe/test/channel/mpt/mpt_test.cc | 4 +- tensorpipe/test/channel/xth/xth_test.cc | 4 +- tensorpipe/test/core/context_test.cc | 37 +-- 39 files changed, 539 insertions(+), 585 deletions(-) create mode 100644 tensorpipe/common/tensor.h diff --git a/tensorpipe/channel/basic/channel.cc b/tensorpipe/channel/basic/channel.cc index fd7eef897..2a3899cc2 100644 --- a/tensorpipe/channel/basic/channel.cc +++ b/tensorpipe/channel/basic/channel.cc @@ -33,16 +33,11 @@ class Channel::Impl : public std::enable_shared_from_this { void init(); void send( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); - void recv( - TDescriptor descriptor, - void* ptr, - size_t length, - TRecvCallback callback); + void recv(TDescriptor descriptor, CpuTensor tensor, TRecvCallback callback); // Tell the channel what its identifier is. void setId(std::string id); @@ -53,16 +48,14 @@ class Channel::Impl : public std::enable_shared_from_this { OnDemandLoop loop_; void sendFromLoop_( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); // Receive memory region from peer. void recvFromLoop_( TDescriptor descriptor, - void* ptr, - size_t length, + CpuTensor tensor, TRecvCallback callback); void setIdFromLoop_(std::string id); @@ -126,32 +119,27 @@ Channel::Impl::Impl( id_(std::move(id)) {} void Channel::send( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { - impl_->send(ptr, length, std::move(descriptorCallback), std::move(callback)); + impl_->send(tensor, std::move(descriptorCallback), std::move(callback)); } void Channel::Impl::send( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { loop_.deferToLoop([this, - ptr, - length, + tensor, descriptorCallback{std::move(descriptorCallback)}, callback{std::move(callback)}]() mutable { - sendFromLoop_( - ptr, length, std::move(descriptorCallback), std::move(callback)); + sendFromLoop_(tensor, std::move(descriptorCallback), std::move(callback)); }); } // Send memory region to peer. void Channel::Impl::sendFromLoop_( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { TP_DCHECK(loop_.inLoop()); @@ -191,8 +179,8 @@ void Channel::Impl::sendFromLoop_( TP_VLOG(6) << "Channel " << id_ << " is writing payload (#" << sequenceNumber << ")"; connection_->write( - ptr, - length, + tensor.ptr, + tensor.length, eagerCallbackWrapper_( [sequenceNumber, callback{std::move(callback)}](Impl& impl) { TP_VLOG(6) << "Channel " << impl.id_ << " done writing payload (#" @@ -206,30 +194,26 @@ void Channel::Impl::sendFromLoop_( // Receive memory region from peer. void Channel::recv( TDescriptor descriptor, - void* ptr, - size_t length, + CpuTensor tensor, TRecvCallback callback) { - impl_->recv(std::move(descriptor), ptr, length, std::move(callback)); + impl_->recv(std::move(descriptor), tensor, std::move(callback)); } void Channel::Impl::recv( TDescriptor descriptor, - void* ptr, - size_t length, + CpuTensor tensor, TRecvCallback callback) { loop_.deferToLoop([this, descriptor{std::move(descriptor)}, - ptr, - length, + tensor, callback{std::move(callback)}]() mutable { - recvFromLoop_(std::move(descriptor), ptr, length, std::move(callback)); + recvFromLoop_(std::move(descriptor), tensor, std::move(callback)); }); } void Channel::Impl::recvFromLoop_( TDescriptor descriptor, - void* ptr, - size_t length, + CpuTensor tensor, TRecvCallback callback) { TP_DCHECK(loop_.inLoop()); @@ -257,8 +241,8 @@ void Channel::Impl::recvFromLoop_( TP_VLOG(6) << "Channel " << id_ << " is reading payload (#" << sequenceNumber << ")"; connection_->read( - ptr, - length, + tensor.ptr, + tensor.length, eagerCallbackWrapper_( [sequenceNumber, callback{std::move(callback)}]( Impl& impl, const void* /* unused */, size_t /* unused */) { diff --git a/tensorpipe/channel/basic/channel.h b/tensorpipe/channel/basic/channel.h index 6a65b4697..08a4772f0 100644 --- a/tensorpipe/channel/basic/channel.h +++ b/tensorpipe/channel/basic/channel.h @@ -12,12 +12,13 @@ #include #include +#include namespace tensorpipe { namespace channel { namespace basic { -class Channel : public channel::Channel { +class Channel : public channel::CpuChannel { // Use the passkey idiom to allow make_shared to call what should be a private // constructor. See https://abseil.io/tips/134 for more information. struct ConstructorToken {}; @@ -31,17 +32,13 @@ class Channel : public channel::Channel { // Send memory region to peer. void send( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) override; // Receive memory region from peer. - void recv( - TDescriptor descriptor, - void* ptr, - size_t length, - TRecvCallback callback) override; + void recv(TDescriptor descriptor, CpuTensor tensor, TRecvCallback callback) + override; // Tell the channel what its identifier is. void setId(std::string id) override; diff --git a/tensorpipe/channel/basic/context.cc b/tensorpipe/channel/basic/context.cc index 2ddb31eb6..6aaa5842e 100644 --- a/tensorpipe/channel/basic/context.cc +++ b/tensorpipe/channel/basic/context.cc @@ -41,7 +41,7 @@ class Context::Impl : public Context::PrivateIface, const std::string& domainDescriptor() const; - std::shared_ptr createChannel( + std::shared_ptr createChannel( std::shared_ptr, Endpoint); @@ -88,13 +88,13 @@ const std::string& Context::Impl::domainDescriptor() const { return domainDescriptor_; } -std::shared_ptr Context::createChannel( +std::shared_ptr Context::createChannel( std::shared_ptr connection, Endpoint endpoint) { return impl_->createChannel(std::move(connection), endpoint); } -std::shared_ptr Context::Impl::createChannel( +std::shared_ptr Context::Impl::createChannel( std::shared_ptr connection, Endpoint /* unused */) { std::string channelId = id_ + ".c" + std::to_string(channelCounter_++); diff --git a/tensorpipe/channel/basic/context.h b/tensorpipe/channel/basic/context.h index fb19744bf..0433b4e30 100644 --- a/tensorpipe/channel/basic/context.h +++ b/tensorpipe/channel/basic/context.h @@ -13,18 +13,19 @@ #include #include +#include namespace tensorpipe { namespace channel { namespace basic { -class Context : public channel::Context { +class Context : public channel::CpuContext { public: Context(); const std::string& domainDescriptor() const override; - std::shared_ptr createChannel( + std::shared_ptr createChannel( std::shared_ptr, Endpoint) override; diff --git a/tensorpipe/channel/channel.h b/tensorpipe/channel/channel.h index 9569bd36a..08626de63 100644 --- a/tensorpipe/channel/channel.h +++ b/tensorpipe/channel/channel.h @@ -13,6 +13,7 @@ #include #include +#include #include // Channels are an out of band mechanism to transfer data between @@ -53,20 +54,19 @@ using TSendCallback = std::function; using TRecvCallback = std::function; // Abstract base class for channel classes. +template class Channel { public: // Send memory region to peer. virtual void send( - const void* ptr, - size_t length, + const TTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) = 0; // Receive memory region from peer. virtual void recv( TDescriptor descriptor, - void* ptr, - size_t length, + TTensor tensor, TRecvCallback callback) = 0; // Tell the channel what its identifier is. @@ -83,5 +83,11 @@ class Channel { virtual ~Channel() = default; }; +using CpuChannel = Channel; + +#if TENSORPIPE_HAS_CUDA +using CudaChannel = Channel; +#endif // TENSORPIPE_HAS_CUDA + } // namespace channel } // namespace tensorpipe diff --git a/tensorpipe/channel/cma/channel.cc b/tensorpipe/channel/cma/channel.cc index e2df08359..fb1fda805 100644 --- a/tensorpipe/channel/cma/channel.cc +++ b/tensorpipe/channel/cma/channel.cc @@ -55,16 +55,11 @@ class Channel::Impl : public std::enable_shared_from_this { void init(); void send( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); - void recv( - TDescriptor descriptor, - void* ptr, - size_t length, - TRecvCallback callback); + void recv(TDescriptor descriptor, CpuTensor tensor, TRecvCallback callback); // Tell the channel what its identifier is. void setId(std::string id); @@ -78,16 +73,14 @@ class Channel::Impl : public std::enable_shared_from_this { // Send memory region to peer. void sendFromLoop_( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); // Receive memory region from peer. void recvFromLoop_( TDescriptor descriptor, - void* ptr, - size_t length, + CpuTensor tensor, TRecvCallback callback); void setIdFromLoop_(std::string id); @@ -158,31 +151,26 @@ void Channel::Impl::initFromLoop_() { } void Channel::send( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { - impl_->send(ptr, length, std::move(descriptorCallback), std::move(callback)); + impl_->send(tensor, std::move(descriptorCallback), std::move(callback)); } void Channel::Impl::send( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { loop_.deferToLoop([this, - ptr, - length, + tensor, descriptorCallback{std::move(descriptorCallback)}, callback{std::move(callback)}]() mutable { - sendFromLoop_( - ptr, length, std::move(descriptorCallback), std::move(callback)); + sendFromLoop_(tensor, std::move(descriptorCallback), std::move(callback)); }); } void Channel::Impl::sendFromLoop_( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { TP_DCHECK(loop_.inLoop()); @@ -236,7 +224,7 @@ void Channel::Impl::sendFromLoop_( NopHolder nopHolder; Descriptor& nopDescriptor = nopHolder.getObject(); nopDescriptor.pid = getpid(); - nopDescriptor.ptr = reinterpret_cast(ptr); + nopDescriptor.ptr = reinterpret_cast(tensor.ptr); descriptorCallback(Error::kSuccess, saveDescriptor(nopHolder)); } @@ -244,30 +232,26 @@ void Channel::Impl::sendFromLoop_( // Receive memory region from peer. void Channel::recv( TDescriptor descriptor, - void* ptr, - size_t length, + CpuTensor tensor, TRecvCallback callback) { - impl_->recv(std::move(descriptor), ptr, length, std::move(callback)); + impl_->recv(std::move(descriptor), tensor, std::move(callback)); } void Channel::Impl::recv( TDescriptor descriptor, - void* ptr, - size_t length, + CpuTensor tensor, TRecvCallback callback) { loop_.deferToLoop([this, descriptor{std::move(descriptor)}, - ptr, - length, + tensor, callback{std::move(callback)}]() mutable { - recvFromLoop_(std::move(descriptor), ptr, length, std::move(callback)); + recvFromLoop_(std::move(descriptor), tensor, std::move(callback)); }); } void Channel::Impl::recvFromLoop_( TDescriptor descriptor, - void* ptr, - size_t length, + CpuTensor tensor, TRecvCallback callback) { TP_DCHECK(loop_.inLoop()); @@ -301,8 +285,8 @@ void Channel::Impl::recvFromLoop_( context_->requestCopy( remotePid, remotePtr, - ptr, - length, + tensor.ptr, + tensor.length, eagerCallbackWrapper_([sequenceNumber, callback{std::move(callback)}](Impl& impl) { TP_VLOG(6) << "Channel " << impl.id_ << " done copying payload (#" diff --git a/tensorpipe/channel/cma/channel.h b/tensorpipe/channel/cma/channel.h index 5f8859ae4..8e29e8f86 100644 --- a/tensorpipe/channel/cma/channel.h +++ b/tensorpipe/channel/cma/channel.h @@ -17,7 +17,7 @@ namespace tensorpipe { namespace channel { namespace cma { -class Channel : public channel::Channel { +class Channel : public channel::CpuChannel { // Use the passkey idiom to allow make_shared to call what should be a private // constructor. See https://abseil.io/tips/134 for more information. struct ConstructorToken {}; @@ -31,17 +31,13 @@ class Channel : public channel::Channel { // Send memory region to peer. void send( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) override; // Receive memory region from peer. - void recv( - TDescriptor descriptor, - void* ptr, - size_t length, - TRecvCallback callback) override; + void recv(TDescriptor descriptor, CpuTensor tensor, TRecvCallback callback) + override; // Tell the channel what its identifier is. void setId(std::string id) override; diff --git a/tensorpipe/channel/cma/context.cc b/tensorpipe/channel/cma/context.cc index 8c859213f..7126d86dc 100644 --- a/tensorpipe/channel/cma/context.cc +++ b/tensorpipe/channel/cma/context.cc @@ -73,7 +73,7 @@ class Context::Impl : public Context::PrivateIface, const std::string& domainDescriptor() const; - std::shared_ptr createChannel( + std::shared_ptr createChannel( std::shared_ptr, Endpoint); @@ -193,13 +193,13 @@ const std::string& Context::Impl::domainDescriptor() const { return domainDescriptor_; } -std::shared_ptr Context::createChannel( +std::shared_ptr Context::createChannel( std::shared_ptr connection, Endpoint endpoint) { return impl_->createChannel(std::move(connection), endpoint); } -std::shared_ptr Context::Impl::createChannel( +std::shared_ptr Context::Impl::createChannel( std::shared_ptr connection, Endpoint /* unused */) { TP_THROW_ASSERT_IF(joined_); diff --git a/tensorpipe/channel/cma/context.h b/tensorpipe/channel/cma/context.h index 277ae6f86..db24998a9 100644 --- a/tensorpipe/channel/cma/context.h +++ b/tensorpipe/channel/cma/context.h @@ -15,18 +15,19 @@ #include #include #include +#include namespace tensorpipe { namespace channel { namespace cma { -class Context : public channel::Context { +class Context : public channel::CpuContext { public: Context(); const std::string& domainDescriptor() const override; - std::shared_ptr createChannel( + std::shared_ptr> createChannel( std::shared_ptr, Endpoint) override; diff --git a/tensorpipe/channel/context.h b/tensorpipe/channel/context.h index 25463d99b..f5540d02b 100644 --- a/tensorpipe/channel/context.h +++ b/tensorpipe/channel/context.h @@ -25,6 +25,7 @@ namespace channel { // context. All registered instances are assumed to be eligible // channels for all pairs. // +template class Context { public: // Return string to describe the domain for this channel. @@ -42,7 +43,7 @@ class Context { // initialized yet, take care to queue these operations to execute // as soon as initialization has completed. // - virtual std::shared_ptr createChannel( + virtual std::shared_ptr> createChannel( std::shared_ptr, Endpoint) = 0; @@ -66,5 +67,11 @@ class Context { std::string name_; }; +using CpuContext = Context; + +#if TENSORPIPE_HAS_CUDA +using CudaContext = Context; +#endif // TENSORPIPE_HAS_CUDA + } // namespace channel } // namespace tensorpipe diff --git a/tensorpipe/channel/cuda_ipc/channel.cc b/tensorpipe/channel/cuda_ipc/channel.cc index 71de7c45c..c63c63158 100644 --- a/tensorpipe/channel/cuda_ipc/channel.cc +++ b/tensorpipe/channel/cuda_ipc/channel.cc @@ -197,18 +197,11 @@ class Channel::Impl : public std::enable_shared_from_this { void init(); void send( - const void* ptr, - size_t length, + const CudaTensor tensor, TDescriptorCallback descriptorCallback, - TSendCallback callback, - cudaStream_t stream); + TSendCallback callback); - void recv( - TDescriptor descriptor, - void* ptr, - size_t length, - TRecvCallback callback, - cudaStream_t stream); + void recv(TDescriptor descriptor, CudaTensor tensor, TRecvCallback callback); // Tell the channel what its identifier is. void setId(std::string id); @@ -222,19 +215,15 @@ class Channel::Impl : public std::enable_shared_from_this { // Send memory region to peer. void sendFromLoop_( - const void* ptr, - size_t length, + const CudaTensor tensor, TDescriptorCallback descriptorCallback, - TSendCallback callback, - cudaStream_t stream); + TSendCallback callback); // Receive memory region from peer. void recvFromLoop_( TDescriptor descriptor, - void* ptr, - size_t length, - TRecvCallback callback, - cudaStream_t stream); + CudaTensor tensor, + TRecvCallback callback); void readPackets_(); void onReply_(const Reply& nopReply); @@ -312,55 +301,28 @@ void Channel::Impl::initFromLoop_() { } void Channel::send( - const void* ptr, - size_t length, + const CudaTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { - send( - ptr, - length, - std::move(descriptorCallback), - std::move(callback), - cudaStreamDefault); -} - -void Channel::send( - const void* ptr, - size_t length, - TDescriptorCallback descriptorCallback, - TSendCallback callback, - cudaStream_t stream) { - impl_->send( - ptr, length, std::move(descriptorCallback), std::move(callback), stream); + impl_->send(tensor, std::move(descriptorCallback), std::move(callback)); } void Channel::Impl::send( - const void* ptr, - size_t length, + const CudaTensor tensor, TDescriptorCallback descriptorCallback, - TSendCallback callback, - cudaStream_t stream) { + TSendCallback callback) { loop_.deferToLoop([this, - ptr, - length, - stream, + tensor, descriptorCallback{std::move(descriptorCallback)}, callback{std::move(callback)}]() mutable { - sendFromLoop_( - ptr, - length, - std::move(descriptorCallback), - std::move(callback), - stream); + sendFromLoop_(tensor, std::move(descriptorCallback), std::move(callback)); }); } void Channel::Impl::sendFromLoop_( - const void* ptr, - size_t length, + const CudaTensor tensor, TDescriptorCallback descriptorCallback, - TSendCallback callback, - cudaStream_t stream) { + TSendCallback callback) { TP_DCHECK(loop_.inLoop()); const uint64_t sequenceNumber = nextTensorBeingSent_++; @@ -388,14 +350,14 @@ void Channel::Impl::sendFromLoop_( << sequenceNumber << ")"; }; - if (error_ || length == 0) { + if (error_ || tensor.length == 0) { descriptorCallback(error_, std::string()); callback(error_); return; } sendOperations_.emplace_back( - sequenceNumber, std::move(callback), ptr, stream); + sequenceNumber, std::move(callback), tensor.ptr, tensor.stream); auto& op = sendOperations_.back(); NopHolder nopHolder; @@ -406,49 +368,28 @@ void Channel::Impl::sendFromLoop_( // Receive memory region from peer. void Channel::recv( TDescriptor descriptor, - void* ptr, - size_t length, + CudaTensor tensor, TRecvCallback callback) { - recv( - std::move(descriptor), - ptr, - length, - std::move(callback), - cudaStreamDefault); -} - -void Channel::recv( - TDescriptor descriptor, - void* ptr, - size_t length, - TRecvCallback callback, - cudaStream_t stream) { - impl_->recv(std::move(descriptor), ptr, length, std::move(callback), stream); + impl_->recv(std::move(descriptor), std::move(tensor), std::move(callback)); } void Channel::Impl::recv( TDescriptor descriptor, - void* ptr, - size_t length, - TRecvCallback callback, - cudaStream_t stream) { + CudaTensor tensor, + TRecvCallback callback) { loop_.deferToLoop([this, descriptor{std::move(descriptor)}, - ptr, - length, - stream, + tensor{std::move(tensor)}, callback{std::move(callback)}]() mutable { recvFromLoop_( - std::move(descriptor), ptr, length, std::move(callback), stream); + std::move(descriptor), std::move(tensor), std::move(callback)); }); } void Channel::Impl::recvFromLoop_( TDescriptor descriptor, - void* ptr, - size_t length, - TRecvCallback callback, - cudaStream_t stream) { + CudaTensor tensor, + TRecvCallback callback) { TP_DCHECK(loop_.inLoop()); const uint64_t sequenceNumber = nextTensorBeingReceived_++; @@ -463,12 +404,13 @@ void Channel::Impl::recvFromLoop_( << sequenceNumber << ")"; }; - if (error_ || length == 0) { + if (error_ || tensor.length == 0) { callback(error_); return; } - recvOperations_.emplace_back(sequenceNumber, ptr, stream, length); + recvOperations_.emplace_back( + sequenceNumber, tensor.ptr, tensor.stream, tensor.length); auto& op = recvOperations_.back(); NopHolder nopHolder; diff --git a/tensorpipe/channel/cuda_ipc/channel.h b/tensorpipe/channel/cuda_ipc/channel.h index f69d3ee7c..e396e88ae 100644 --- a/tensorpipe/channel/cuda_ipc/channel.h +++ b/tensorpipe/channel/cuda_ipc/channel.h @@ -19,7 +19,7 @@ namespace tensorpipe { namespace channel { namespace cuda_ipc { -class Channel : public channel::Channel { +class Channel : public channel::CudaChannel { // Use the passkey idiom to allow make_shared to call what should be a private // constructor. See https://abseil.io/tips/134 for more information. struct ConstructorToken {}; @@ -33,31 +33,13 @@ class Channel : public channel::Channel { // Send memory region to peer. void send( - const void* ptr, - size_t length, + const CudaTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) override; - void send( - const void* ptr, - size_t length, - TDescriptorCallback descriptorCallback, - TSendCallback callback, - cudaStream_t stream); - // Receive memory region from peer. - void recv( - TDescriptor descriptor, - void* ptr, - size_t length, - TRecvCallback callback) override; - - void recv( - TDescriptor descriptor, - void* ptr, - size_t length, - TRecvCallback callback, - cudaStream_t stream); + void recv(TDescriptor descriptor, CudaTensor tensor, TRecvCallback callback) + override; // Tell the channel what its identifier is. void setId(std::string id) override; diff --git a/tensorpipe/channel/cuda_ipc/context.cc b/tensorpipe/channel/cuda_ipc/context.cc index 1152f931d..e2f7e0714 100644 --- a/tensorpipe/channel/cuda_ipc/context.cc +++ b/tensorpipe/channel/cuda_ipc/context.cc @@ -41,11 +41,12 @@ std::string generateDomainDescriptor() { return oss.str(); } -std::shared_ptr makeCudaIpcChannel() { +std::shared_ptr makeCudaIpcChannel() { return std::make_shared(); } -TP_REGISTER_CREATOR(TensorpipeChannelRegistry, cuda_ipc, makeCudaIpcChannel); +// TODO: Make separate CUDA channel registry. +// TP_REGISTER_CREATOR(TensorpipeChannelRegistry, cuda_ipc, makeCudaIpcChannel); } // namespace @@ -56,7 +57,7 @@ class Context::Impl : public Context::PrivateIface, const std::string& domainDescriptor() const; - std::shared_ptr createChannel( + std::shared_ptr createChannel( std::shared_ptr, Endpoint); @@ -137,13 +138,13 @@ const std::string& Context::Impl::domainDescriptor() const { return domainDescriptor_; } -std::shared_ptr Context::createChannel( +std::shared_ptr Context::createChannel( std::shared_ptr connection, Endpoint endpoint) { return impl_->createChannel(std::move(connection), endpoint); } -std::shared_ptr Context::Impl::createChannel( +std::shared_ptr Context::Impl::createChannel( std::shared_ptr connection, Endpoint /* unused */) { TP_THROW_ASSERT_IF(joined_); diff --git a/tensorpipe/channel/cuda_ipc/context.h b/tensorpipe/channel/cuda_ipc/context.h index f27361318..04d06be10 100644 --- a/tensorpipe/channel/cuda_ipc/context.h +++ b/tensorpipe/channel/cuda_ipc/context.h @@ -20,13 +20,13 @@ namespace tensorpipe { namespace channel { namespace cuda_ipc { -class Context : public channel::Context { +class Context : public channel::CudaContext { public: Context(); const std::string& domainDescriptor() const override; - std::shared_ptr createChannel( + std::shared_ptr createChannel( std::shared_ptr, Endpoint) override; diff --git a/tensorpipe/channel/mpt/channel.cc b/tensorpipe/channel/mpt/channel.cc index 922c58b03..9a8f533dc 100644 --- a/tensorpipe/channel/mpt/channel.cc +++ b/tensorpipe/channel/mpt/channel.cc @@ -60,16 +60,11 @@ class Channel::Impl : public std::enable_shared_from_this { void init(); void send( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); - void recv( - TDescriptor descriptor, - void* ptr, - size_t length, - TRecvCallback callback); + void recv(TDescriptor descriptor, CpuTensor tenssor, TRecvCallback callback); // Tell the channel what its identifier is. void setId(std::string id); @@ -87,15 +82,13 @@ class Channel::Impl : public std::enable_shared_from_this { void initFromLoop_(); void sendFromLoop_( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); void recvFromLoop_( TDescriptor descriptor, - void* ptr, - size_t length, + CpuTensor tensor, TRecvCallback callback); void setIdFromLoop_(std::string id); @@ -259,31 +252,26 @@ void Channel::Impl::initFromLoop_() { } void Channel::send( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { - impl_->send(ptr, length, std::move(descriptorCallback), std::move(callback)); + impl_->send(tensor, std::move(descriptorCallback), std::move(callback)); } void Channel::Impl::send( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { loop_.deferToLoop([this, - ptr, - length, + tensor, descriptorCallback{std::move(descriptorCallback)}, callback{std::move(callback)}]() mutable { - sendFromLoop_( - ptr, length, std::move(descriptorCallback), std::move(callback)); + sendFromLoop_(tensor, std::move(descriptorCallback), std::move(callback)); }); } void Channel::Impl::sendFromLoop_( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { TP_DCHECK(loop_.inLoop()); @@ -323,8 +311,8 @@ void Channel::Impl::sendFromLoop_( sendOperations_.emplace_back(); SendOperation& op = sendOperations_.back(); op.sequenceNumber = sequenceNumber; - op.ptr = ptr; - op.length = length; + op.ptr = tensor.ptr; + op.length = tensor.length; op.callback = std::move(callback); if (state_ == ESTABLISHED) { @@ -336,30 +324,26 @@ void Channel::Impl::sendFromLoop_( void Channel::recv( TDescriptor descriptor, - void* ptr, - size_t length, + CpuTensor tensor, TRecvCallback callback) { - impl_->recv(std::move(descriptor), ptr, length, std::move(callback)); + impl_->recv(std::move(descriptor), tensor, std::move(callback)); } void Channel::Impl::recv( TDescriptor descriptor, - void* ptr, - size_t length, + CpuTensor tensor, TRecvCallback callback) { loop_.deferToLoop([this, descriptor{std::move(descriptor)}, - ptr, - length, + tensor, callback{std::move(callback)}]() mutable { - recvFromLoop_(std::move(descriptor), ptr, length, std::move(callback)); + recvFromLoop_(std::move(descriptor), tensor, std::move(callback)); }); } void Channel::Impl::recvFromLoop_( TDescriptor descriptor, - void* ptr, - size_t length, + CpuTensor tensor, TRecvCallback callback) { TP_DCHECK(loop_.inLoop()); @@ -387,8 +371,8 @@ void Channel::Impl::recvFromLoop_( recvOperations_.emplace_back(); RecvOperation& op = recvOperations_.back(); op.sequenceNumber = sequenceNumber; - op.ptr = ptr; - op.length = length; + op.ptr = tensor.ptr; + op.length = tensor.length; op.callback = std::move(callback); if (state_ == ESTABLISHED) { diff --git a/tensorpipe/channel/mpt/channel.h b/tensorpipe/channel/mpt/channel.h index 1aaacbb1c..2b90e9042 100644 --- a/tensorpipe/channel/mpt/channel.h +++ b/tensorpipe/channel/mpt/channel.h @@ -18,7 +18,7 @@ namespace tensorpipe { namespace channel { namespace mpt { -class Channel : public channel::Channel { +class Channel : public channel::CpuChannel { // Use the passkey idiom to allow make_shared to call what should be a private // constructor. See https://abseil.io/tips/134 for more information. struct ConstructorToken {}; @@ -34,17 +34,13 @@ class Channel : public channel::Channel { // Send memory region to peer. void send( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) override; // Receive memory region from peer. - void recv( - TDescriptor descriptor, - void* ptr, - size_t length, - TRecvCallback callback) override; + void recv(TDescriptor descriptor, CpuTensor tensor, TRecvCallback callback) + override; // Tell the channel what its identifier is. void setId(std::string id) override; diff --git a/tensorpipe/channel/mpt/context.cc b/tensorpipe/channel/mpt/context.cc index 8ce2dc25b..4c5366468 100644 --- a/tensorpipe/channel/mpt/context.cc +++ b/tensorpipe/channel/mpt/context.cc @@ -52,7 +52,7 @@ class Context::Impl : public Context::PrivateIface, const std::string& domainDescriptor() const; - std::shared_ptr createChannel( + std::shared_ptr createChannel( std::shared_ptr, Endpoint); @@ -196,13 +196,13 @@ const std::string& Context::Impl::domainDescriptor() const { return domainDescriptor_; } -std::shared_ptr Context::createChannel( +std::shared_ptr Context::createChannel( std::shared_ptr connection, Endpoint endpoint) { return impl_->createChannel(std::move(connection), endpoint); } -std::shared_ptr Context::Impl::createChannel( +std::shared_ptr Context::Impl::createChannel( std::shared_ptr connection, Endpoint endpoint) { std::string channelId = id_ + ".c" + std::to_string(channelCounter_++); diff --git a/tensorpipe/channel/mpt/context.h b/tensorpipe/channel/mpt/context.h index 0ef944bd8..95419446d 100644 --- a/tensorpipe/channel/mpt/context.h +++ b/tensorpipe/channel/mpt/context.h @@ -14,13 +14,14 @@ #include #include +#include #include namespace tensorpipe { namespace channel { namespace mpt { -class Context : public channel::Context { +class Context : public channel::CpuContext { public: Context( std::vector>, @@ -28,7 +29,7 @@ class Context : public channel::Context { const std::string& domainDescriptor() const override; - std::shared_ptr createChannel( + std::shared_ptr createChannel( std::shared_ptr, Endpoint) override; diff --git a/tensorpipe/channel/registry.cc b/tensorpipe/channel/registry.cc index 9c54bbad3..fbbe6704c 100644 --- a/tensorpipe/channel/registry.cc +++ b/tensorpipe/channel/registry.cc @@ -10,4 +10,4 @@ TP_DEFINE_SHARED_REGISTRY( TensorpipeChannelRegistry, - tensorpipe::channel::Context); + tensorpipe::channel::Context); diff --git a/tensorpipe/channel/registry.h b/tensorpipe/channel/registry.h index 6ab1b4087..26e479fd2 100644 --- a/tensorpipe/channel/registry.h +++ b/tensorpipe/channel/registry.h @@ -9,8 +9,9 @@ #pragma once #include +#include #include TP_DECLARE_SHARED_REGISTRY( TensorpipeChannelRegistry, - tensorpipe::channel::Context); + tensorpipe::channel::Context); diff --git a/tensorpipe/channel/xth/channel.cc b/tensorpipe/channel/xth/channel.cc index d2147965b..2eb731add 100644 --- a/tensorpipe/channel/xth/channel.cc +++ b/tensorpipe/channel/xth/channel.cc @@ -42,16 +42,11 @@ class Channel::Impl : public std::enable_shared_from_this { void init(); void send( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); - void recv( - TDescriptor descriptor, - void* ptr, - size_t length, - TRecvCallback callback); + void recv(TDescriptor descriptor, CpuTensor tensor, TRecvCallback callback); // Tell the channel what its identifier is. void setId(std::string id); @@ -65,16 +60,14 @@ class Channel::Impl : public std::enable_shared_from_this { // Send memory region to peer. void sendFromLoop_( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); // Receive memory region from peer. void recvFromLoop_( TDescriptor descriptor, - void* ptr, - size_t length, + CpuTensor tensor, TRecvCallback callback); void setIdFromLoop_(std::string id); @@ -145,31 +138,26 @@ void Channel::Impl::initFromLoop_() { } void Channel::send( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { - impl_->send(ptr, length, std::move(descriptorCallback), std::move(callback)); + impl_->send(tensor, std::move(descriptorCallback), std::move(callback)); } void Channel::Impl::send( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { loop_.deferToLoop([this, - ptr, - length, + tensor, descriptorCallback{std::move(descriptorCallback)}, callback{std::move(callback)}]() mutable { - sendFromLoop_( - ptr, length, std::move(descriptorCallback), std::move(callback)); + sendFromLoop_(tensor, std::move(descriptorCallback), std::move(callback)); }); } void Channel::Impl::sendFromLoop_( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { TP_DCHECK(loop_.inLoop()); @@ -221,7 +209,7 @@ void Channel::Impl::sendFromLoop_( NopHolder nopHolder; Descriptor& nopDescriptor = nopHolder.getObject(); - nopDescriptor.ptr = reinterpret_cast(ptr); + nopDescriptor.ptr = reinterpret_cast(tensor.ptr); descriptorCallback(Error::kSuccess, saveDescriptor(nopHolder)); } @@ -229,30 +217,26 @@ void Channel::Impl::sendFromLoop_( // Receive memory region from peer. void Channel::recv( TDescriptor descriptor, - void* ptr, - size_t length, + CpuTensor tensor, TRecvCallback callback) { - impl_->recv(std::move(descriptor), ptr, length, std::move(callback)); + impl_->recv(std::move(descriptor), tensor, std::move(callback)); } void Channel::Impl::recv( TDescriptor descriptor, - void* ptr, - size_t length, + CpuTensor tensor, TRecvCallback callback) { loop_.deferToLoop([this, descriptor{std::move(descriptor)}, - ptr, - length, + tensor, callback{std::move(callback)}]() mutable { - recvFromLoop_(std::move(descriptor), ptr, length, std::move(callback)); + recvFromLoop_(std::move(descriptor), tensor, std::move(callback)); }); } void Channel::Impl::recvFromLoop_( TDescriptor descriptor, - void* ptr, - size_t length, + CpuTensor tensor, TRecvCallback callback) { TP_DCHECK(loop_.inLoop()); @@ -282,8 +266,8 @@ void Channel::Impl::recvFromLoop_( << ")"; context_->requestCopy( remotePtr, - ptr, - length, + tensor.ptr, + tensor.length, eagerCallbackWrapper_([sequenceNumber, callback{std::move(callback)}](Impl& impl) { TP_VLOG(6) << "Channel " << impl.id_ << " done copying payload (#" diff --git a/tensorpipe/channel/xth/channel.h b/tensorpipe/channel/xth/channel.h index 08571ef8d..72ff5dfeb 100644 --- a/tensorpipe/channel/xth/channel.h +++ b/tensorpipe/channel/xth/channel.h @@ -17,7 +17,7 @@ namespace tensorpipe { namespace channel { namespace xth { -class Channel : public channel::Channel { +class Channel : public channel::CpuChannel { // Use the passkey idiom to allow make_shared to call what should be a private // constructor. See https://abseil.io/tips/134 for more information. struct ConstructorToken {}; @@ -31,17 +31,13 @@ class Channel : public channel::Channel { // Send memory region to peer. void send( - const void* ptr, - size_t length, + const CpuTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) override; // Receive memory region from peer. - void recv( - TDescriptor descriptor, - void* ptr, - size_t length, - TRecvCallback callback) override; + void recv(TDescriptor descriptor, CpuTensor tensor, TRecvCallback callback) + override; // Tell the channel what its identifier is. void setId(std::string id) override; diff --git a/tensorpipe/channel/xth/context.cc b/tensorpipe/channel/xth/context.cc index 58e5bc4e0..9758d91c6 100644 --- a/tensorpipe/channel/xth/context.cc +++ b/tensorpipe/channel/xth/context.cc @@ -58,7 +58,7 @@ class Context::Impl : public Context::PrivateIface, const std::string& domainDescriptor() const; - std::shared_ptr createChannel( + std::shared_ptr createChannel( std::shared_ptr, Endpoint); @@ -176,13 +176,13 @@ const std::string& Context::Impl::domainDescriptor() const { return domainDescriptor_; } -std::shared_ptr Context::createChannel( +std::shared_ptr Context::createChannel( std::shared_ptr connection, Endpoint endpoint) { return impl_->createChannel(std::move(connection), endpoint); } -std::shared_ptr Context::Impl::createChannel( +std::shared_ptr Context::Impl::createChannel( std::shared_ptr connection, Endpoint /* unused */) { TP_THROW_ASSERT_IF(joined_); diff --git a/tensorpipe/channel/xth/context.h b/tensorpipe/channel/xth/context.h index 77c73fab4..9d62c21a0 100644 --- a/tensorpipe/channel/xth/context.h +++ b/tensorpipe/channel/xth/context.h @@ -15,18 +15,19 @@ #include #include #include +#include namespace tensorpipe { namespace channel { namespace xth { -class Context : public channel::Context { +class Context : public channel::CpuContext { public: Context(); const std::string& domainDescriptor() const override; - std::shared_ptr createChannel( + std::shared_ptr createChannel( std::shared_ptr, Endpoint) override; diff --git a/tensorpipe/common/tensor.h b/tensorpipe/common/tensor.h new file mode 100644 index 000000000..5173afe79 --- /dev/null +++ b/tensorpipe/common/tensor.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +#if TENSORPIPE_HAS_CUDA +#include +#endif // TENSORPIPE_HAS_CUDA + +namespace tensorpipe { + +enum class DeviceType { + kCpu, +#if TENSORPIPE_HAS_CUDA + kCuda, +#endif // TENSORPIPE_HAS_CUDA +}; + +struct CpuTensor { + void* ptr{nullptr}; + size_t length{0}; +}; + +#if TENSORPIPE_HAS_CUDA +struct CudaTensor { + void* ptr{nullptr}; + size_t length{0}; + cudaStream_t stream{cudaStreamDefault}; +}; +#endif // TENSORPIPE_HAS_CUDA + +struct Tensor { + Tensor(CpuTensor t) : type(DeviceType::kCpu), cpu(t) {} + +#if TENSORPIPE_HAS_CUDA + Tensor(CudaTensor t) : type(DeviceType::kCuda), cuda(t) {} +#endif // TENSORPIPE_HAS_CUDA + + DeviceType type; + union { + CpuTensor cpu; +#if TENSORPIPE_HAS_CUDA + CudaTensor cuda; +#endif // TENSORPIPE_HAS_CUDA + }; +}; + +} // namespace tensorpipe diff --git a/tensorpipe/core/context.cc b/tensorpipe/core/context.cc index b0cc732c8..51d4f8f68 100644 --- a/tensorpipe/core/context.cc +++ b/tensorpipe/core/context.cc @@ -51,7 +51,10 @@ class Context::Impl : public Context::PrivateIface, std::string, std::shared_ptr); - void registerChannel(int64_t, std::string, std::shared_ptr); + void registerChannel( + int64_t, + std::string, + std::shared_ptr); std::shared_ptr listen(const std::vector&); @@ -60,7 +63,7 @@ class Context::Impl : public Context::PrivateIface, ClosingEmitter& getClosingEmitter() override; std::shared_ptr getTransport(const std::string&) override; - std::shared_ptr getChannel(const std::string&) override; + std::shared_ptr getChannel(const std::string&) override; using PrivateIface::TOrderedTransports; @@ -102,7 +105,8 @@ class Context::Impl : public Context::PrivateIface, std::unordered_map> transports_; - std::unordered_map> channels_; + std::unordered_map> + channels_; TOrderedTransports transportsByPriority_; TOrderedChannels channelsByPriority_; @@ -150,14 +154,14 @@ void Context::Impl::registerTransport( void Context::registerChannel( int64_t priority, std::string channel, - std::shared_ptr context) { + std::shared_ptr context) { impl_->registerChannel(priority, std::move(channel), std::move(context)); } void Context::Impl::registerChannel( int64_t priority, std::string channel, - std::shared_ptr context) { + std::shared_ptr context) { TP_THROW_ASSERT_IF(channel.empty()); TP_THROW_ASSERT_IF(channels_.find(channel) != channels_.end()) << "channel " << channel << " already registered"; @@ -227,7 +231,7 @@ std::shared_ptr Context::Impl::getTransport( return iter->second; } -std::shared_ptr Context::Impl::getChannel( +std::shared_ptr Context::Impl::getChannel( const std::string& channel) { auto iter = channels_.find(channel); if (iter == channels_.end()) { diff --git a/tensorpipe/core/context.h b/tensorpipe/core/context.h index 1fca36d59..732d33770 100644 --- a/tensorpipe/core/context.h +++ b/tensorpipe/core/context.h @@ -16,6 +16,7 @@ #include #include +#include #include namespace tensorpipe { @@ -58,7 +59,10 @@ class Context final { std::string, std::shared_ptr); - void registerChannel(int64_t, std::string, std::shared_ptr); + void registerChannel( + int64_t, + std::string, + std::shared_ptr); std::shared_ptr listen(const std::vector&); @@ -84,7 +88,7 @@ class Context final { virtual std::shared_ptr getTransport( const std::string&) = 0; - virtual std::shared_ptr getChannel( + virtual std::shared_ptr getChannel( const std::string&) = 0; using TOrderedTransports = std::map< @@ -95,7 +99,7 @@ class Context final { using TOrderedChannels = std::map< int64_t, - std::tuple>>; + std::tuple>>; virtual const TOrderedChannels& getOrderedChannels() = 0; diff --git a/tensorpipe/core/message.h b/tensorpipe/core/message.h index 7234e5164..cc9075fac 100644 --- a/tensorpipe/core/message.h +++ b/tensorpipe/core/message.h @@ -13,6 +13,8 @@ #include #include +#include + namespace tensorpipe { // Messages consist of a primary buffer and zero or more separate @@ -49,12 +51,14 @@ class Message final { std::vector payloads; struct Tensor { - void* data{nullptr}; - size_t length{0}; - - // Users may include arbitrary metadata in the following fields. + tensorpipe::Tensor tensor; + // Users may include arbitrary metadata in the following field. // This may contain allocation hints for the receiver, for example. std::string metadata; + + // The following fields are marked for deprecation. Use `tensor` instead. + void* data{nullptr}; + size_t length{0}; }; // Holds the tensors that are offered to the side channels. diff --git a/tensorpipe/core/nop_types.h b/tensorpipe/core/nop_types.h index b7c9d7b23..4a0aebc3e 100644 --- a/tensorpipe/core/nop_types.h +++ b/tensorpipe/core/nop_types.h @@ -16,6 +16,8 @@ #include #include +#include + namespace tensorpipe { struct SpontaneousConnection { @@ -63,8 +65,6 @@ struct BrochureAnswer { channelSelection); }; -enum class DeviceType { DEVICE_TYPE_UNSPECIFIED, DEVICE_TYPE_CPU }; - struct MessageDescriptor { struct PayloadDescriptor { // This pointless constructor is needed to work around a bug in GCC 5.5 (and diff --git a/tensorpipe/core/pipe.cc b/tensorpipe/core/pipe.cc index ef65b42e6..8b2cd1b88 100644 --- a/tensorpipe/core/pipe.cc +++ b/tensorpipe/core/pipe.cc @@ -85,11 +85,14 @@ void parseDescriptorOfMessage(ReadOperation& op, const Packet& nopPacketIn) { } for (const auto& nopTensorDescriptor : nopMessageDescriptor.tensorDescriptors) { - Message::Tensor tensor; + Message::Tensor tensor{ + .tensor = + CpuTensor{ + nullptr, static_cast(nopTensorDescriptor.sizeInBytes)}, + .metadata = nopTensorDescriptor.metadata, + }; ReadOperation::Tensor tensorBeingAllocated; - tensor.length = nopTensorDescriptor.sizeInBytes; - tensorBeingAllocated.length = tensor.length; - tensor.metadata = nopTensorDescriptor.metadata; + tensorBeingAllocated.length = tensor.tensor.cpu.length; tensorBeingAllocated.channelName = nopTensorDescriptor.channelName; // FIXME If the nop object wasn't const we could move the string out... tensorBeingAllocated.descriptor = nopTensorDescriptor.channelDescriptor; @@ -98,8 +101,8 @@ void parseDescriptorOfMessage(ReadOperation& op, const Packet& nopPacketIn) { } } -// Raise an error if the number or sizes of the payloads and the tensors in the -// message do not match the ones that are expected by the ReadOperation. +// Raise an error if the number or sizes of the payloads and the tensors in +// the message do not match the ones that are expected by the ReadOperation. void checkAllocationCompatibility( const ReadOperation& op, const Message& message) { @@ -118,7 +121,7 @@ void checkAllocationCompatibility( const Message::Tensor& tensor = message.tensors[tensorIdx]; const ReadOperation::Tensor& tensorBeingAllocated = op.tensors[tensorIdx]; TP_DCHECK_GE(tensorBeingAllocated.length, 0); - TP_THROW_ASSERT_IF(tensor.length != tensorBeingAllocated.length); + TP_THROW_ASSERT_IF(tensor.tensor.cpu.length != tensorBeingAllocated.length); } } @@ -181,8 +184,8 @@ std::shared_ptr> makeDescriptorForMessage( nopMessageDescriptor.tensorDescriptors.emplace_back(); MessageDescriptor::TensorDescriptor& nopTensorDescriptor = nopMessageDescriptor.tensorDescriptors.back(); - nopTensorDescriptor.deviceType = DeviceType::DEVICE_TYPE_CPU; - nopTensorDescriptor.sizeInBytes = tensor.length; + nopTensorDescriptor.deviceType = DeviceType::kCpu; + nopTensorDescriptor.sizeInBytes = tensor.tensor.cpu.length; nopTensorDescriptor.metadata = tensor.metadata; nopTensorDescriptor.channelName = otherTensor.channelName; // FIXME In principle we could move here. @@ -260,7 +263,8 @@ class Pipe::Impl : public std::enable_shared_from_this { std::string transport_; std::shared_ptr connection_; - std::unordered_map> channels_; + std::unordered_map> + channels_; // The server will set this up when it tell the client to switch to a // different connection or to open some channels. @@ -476,7 +480,7 @@ void Pipe::Impl::initFromLoop_() { } for (const auto& channelContextIter : context_->getOrderedChannels()) { const std::string& channelName = std::get<0>(channelContextIter.second); - const channel::Context& channelContext = + const channel::CpuContext& channelContext = *(std::get<1>(channelContextIter.second)); ChannelAdvertisement& nopChannelAdvertisement = nopBrochure.channelAdvertisement[channelName]; @@ -669,14 +673,18 @@ void Pipe::Impl::readPayloadsAndReceiveTensorsOfMessage(ReadOperation& op) { tensorIdx++) { Message::Tensor& tensor = op.message.tensors[tensorIdx]; ReadOperation::Tensor& tensorBeingAllocated = op.tensors[tensorIdx]; - std::shared_ptr channel = + std::shared_ptr channel = channels_.at(tensorBeingAllocated.channelName); TP_VLOG(3) << "Pipe " << id_ << " is receiving tensor #" << op.sequenceNumber << "." << tensorIdx; + + // Temporary workaround until tensor.data/tensor.length are removed. + auto cpu_tensor = (tensor.data == nullptr) + ? tensor.tensor.cpu + : CpuTensor{.ptr = tensor.data, .length = tensor.length}; channel->recv( std::move(tensorBeingAllocated.descriptor), - tensor.data, - tensor.length, + cpu_tensor, eagerCallbackWrapper_([&op, tensorIdx](Impl& impl) { TP_VLOG(3) << "Pipe " << impl.id_ << " done receiving tensor #" << op.sequenceNumber << "." << tensorIdx; @@ -1076,13 +1084,17 @@ void Pipe::Impl::sendTensorsOfMessage_(WriteOperation& op) { if (channelIter == channels_.cend()) { continue; } - channel::Channel& channel = *(channelIter->second); + channel::CpuChannel& channel = *(channelIter->second); TP_VLOG(3) << "Pipe " << id_ << " is sending tensor #" << op.sequenceNumber << "." << tensorIdx; + + // Temporary workaround until tensor.data/tensor.length are removed. + auto cpu_tensor = (tensor.data == nullptr) + ? tensor.tensor.cpu + : CpuTensor{.ptr = tensor.data, .length = tensor.length}; channel.send( - tensor.data, - tensor.length, + cpu_tensor, eagerCallbackWrapper_( [&op, tensorIdx](Impl& impl, channel::TDescriptor descriptor) { TP_VLOG(3) << "Pipe " << impl.id_ << " got tensor descriptor #" @@ -1220,7 +1232,7 @@ void Pipe::Impl::onReadWhileServerWaitingForBrochure_( for (const auto& channelContextIter : context_->getOrderedChannels()) { const std::string& channelName = std::get<0>(channelContextIter.second); - const channel::Context& channelContext = + const channel::CpuContext& channelContext = *(std::get<1>(channelContextIter.second)); const auto nopChannelAdvertisementIter = @@ -1315,7 +1327,7 @@ void Pipe::Impl::onReadWhileClientWaitingForBrochureAnswer_( const ChannelSelection& nopChannelSelection = nopChannelSelectionIter.second; - std::shared_ptr channelContext = + std::shared_ptr channelContext = context_->getChannel(channelName); TP_VLOG(3) << "Pipe " << id_ << " is opening connection (for channel " @@ -1339,8 +1351,9 @@ void Pipe::Impl::onReadWhileClientWaitingForBrochureAnswer_( << " done writing nop object (requested connection)"; })); - std::shared_ptr channel = channelContext->createChannel( - std::move(connection), channel::Endpoint::kConnect); + std::shared_ptr channel = + channelContext->createChannel( + std::move(connection), channel::Endpoint::kConnect); channel->setId(id_ + ".ch_" + channelName); channels_.emplace(channelName, std::move(channel)); } @@ -1386,10 +1399,10 @@ void Pipe::Impl::onAcceptWhileServerWaitingForChannel_( auto channelIter = channels_.find(channelName); TP_DCHECK(channelIter == channels_.end()); - std::shared_ptr channelContext = + std::shared_ptr channelContext = context_->getChannel(channelName); - std::shared_ptr channel = channelContext->createChannel( + std::shared_ptr channel = channelContext->createChannel( std::move(receivedConnection), channel::Endpoint::kListen); channel->setId(id_ + ".ch_" + channelName); channels_.emplace(channelName, std::move(channel)); diff --git a/tensorpipe/python/tensorpipe.cc b/tensorpipe/python/tensorpipe.cc index cd0a7d3a0..6244d0b3c 100644 --- a/tensorpipe/python/tensorpipe.cc +++ b/tensorpipe/python/tensorpipe.cc @@ -115,8 +115,8 @@ tensorpipe::Message prepareToWrite(std::shared_ptr pyMessage) { tpMessage.tensors.reserve(pyMessage->tensors.size()); for (const auto& pyTensor : pyMessage->tensors) { tensorpipe::Message::Tensor tpTensor{ - pyTensor->buffer.ptr(), - pyTensor->buffer.length(), + tensorpipe::CpuTensor{pyTensor->buffer.ptr(), + pyTensor->buffer.length()}, {reinterpret_cast(pyTensor->metadata.ptr()), pyTensor->metadata.length()}}; tpMessage.tensors.push_back(std::move(tpTensor)); @@ -187,9 +187,9 @@ std::shared_ptr prepareToAllocate( std::vector> pyTensors; pyTensors.reserve(tpMessage.tensors.size()); for (const auto& tpTensor : tpMessage.tensors) { - TP_DCHECK(tpTensor.data == nullptr); - pyTensors.push_back( - std::make_shared(tpTensor.length, tpTensor.metadata)); + TP_DCHECK(tpTensor.tensor.cpu.ptr == nullptr); + pyTensors.push_back(std::make_shared( + tpTensor.tensor.cpu.length, tpTensor.metadata)); } auto pyMessage = std::make_shared( tpMessage.metadata, std::move(pyPayloads), std::move(pyTensors)); @@ -208,8 +208,8 @@ tensorpipe::Message prepareToRead(std::shared_ptr pyMessage) { tpMessage.tensors.reserve(pyMessage->tensors.size()); for (const auto& pyTensor : pyMessage->tensors) { TP_THROW_ASSERT_IF(!pyTensor->buffer.has_value()) << "No buffer"; - tensorpipe::Message::Tensor tpTensor{pyTensor->buffer.value().ptr(), - pyTensor->buffer.value().length()}; + tensorpipe::Message::Tensor tpTensor{tensorpipe::CpuTensor{ + pyTensor->buffer.value().ptr(), pyTensor->buffer.value().length()}}; tpMessage.tensors.push_back(std::move(tpTensor)); } return tpMessage; @@ -223,8 +223,10 @@ using transport_class_ = py::class_>; template -using channel_class_ = - py::class_>; +using channel_class_ = py::class_< + T, + tensorpipe::channel::Context, + std::shared_ptr>; } // namespace @@ -438,8 +440,8 @@ PYBIND11_MODULE(pytensorpipe, module) { py::arg("name"), py::arg("transport")); - shared_ptr_class_ abstractChannel( - module, "AbstractChannel"); + shared_ptr_class_> + abstractChannel(module, "AbstractChannel"); channel_class_ basicChannel( module, "BasicChannel"); diff --git a/tensorpipe/test/channel/basic/basic_test.cc b/tensorpipe/test/channel/basic/basic_test.cc index 12fb6c0e8..8d26ed49a 100644 --- a/tensorpipe/test/channel/basic/basic_test.cc +++ b/tensorpipe/test/channel/basic/basic_test.cc @@ -17,8 +17,8 @@ class BasicChannelTestHelper : public ChannelTestHelper { return "basic"; } - std::shared_ptr makeContext( - std::string id) override { + std::shared_ptr> + makeContext(std::string id) override { auto context = std::make_shared(); context->setId(std::move(id)); return context; diff --git a/tensorpipe/test/channel/channel_test.cc b/tensorpipe/test/channel/channel_test.cc index 7b2dae07e..6bb3fa8ee 100644 --- a/tensorpipe/test/channel/channel_test.cc +++ b/tensorpipe/test/channel/channel_test.cc @@ -16,8 +16,8 @@ using namespace tensorpipe; using namespace tensorpipe::channel; TEST_P(ChannelTest, DomainDescriptor) { - std::shared_ptr context1 = GetParam()->makeContext("ctx1"); - std::shared_ptr context2 = GetParam()->makeContext("ctx2"); + std::shared_ptr context1 = GetParam()->makeContext("ctx1"); + std::shared_ptr context2 = GetParam()->makeContext("ctx2"); EXPECT_FALSE(context1->domainDescriptor().empty()); EXPECT_FALSE(context2->domainDescriptor().empty()); EXPECT_EQ(context1->domainDescriptor(), context2->domainDescriptor()); @@ -28,7 +28,7 @@ TEST_P(ChannelTest, ClientToServer) { testConnection( [&](std::shared_ptr conn) { - std::shared_ptr ctx = GetParam()->makeContext("server"); + std::shared_ptr ctx = GetParam()->makeContext("server"); auto channel = ctx->createChannel(std::move(conn), Endpoint::kListen); // Initialize with sequential values. @@ -39,7 +39,7 @@ TEST_P(ChannelTest, ClientToServer) { std::future> descriptorFuture; std::future sendFuture; std::tie(descriptorFuture, sendFuture) = - sendWithFuture(channel, data.data(), data.size()); + sendWithFuture(channel, CpuTensor{data.data(), data.size()}); Error descriptorError; TDescriptor descriptor; std::tie(descriptorError, descriptor) = descriptorFuture.get(); @@ -54,15 +54,15 @@ TEST_P(ChannelTest, ClientToServer) { ctx->join(); }, [&](std::shared_ptr conn) { - std::shared_ptr ctx = GetParam()->makeContext("client"); + std::shared_ptr ctx = GetParam()->makeContext("client"); auto channel = ctx->createChannel(std::move(conn), Endpoint::kConnect); std::vector data(dataSize); // Perform recv and wait for completion. auto descriptor = peers_->recv(PeerGroup::kClient); - std::future recvFuture = - recvWithFuture(channel, descriptor, data.data(), data.size()); + std::future recvFuture = recvWithFuture( + channel, descriptor, CpuTensor{data.data(), dataSize}); Error recvError = recvFuture.get(); EXPECT_FALSE(recvError) << recvError.what(); @@ -83,15 +83,15 @@ TEST_P(ChannelTest, ServerToClient) { testConnection( [&](std::shared_ptr conn) { - std::shared_ptr ctx = GetParam()->makeContext("server"); + std::shared_ptr ctx = GetParam()->makeContext("server"); auto channel = ctx->createChannel(std::move(conn), Endpoint::kListen); std::vector data(dataSize); // Perform recv and wait for completion. auto descriptor = peers_->recv(PeerGroup::kServer); - std::future recvFuture = - recvWithFuture(channel, descriptor, data.data(), data.size()); + std::future recvFuture = recvWithFuture( + channel, descriptor, CpuTensor{data.data(), data.size()}); Error recvError = recvFuture.get(); EXPECT_FALSE(recvError) << recvError.what(); @@ -106,7 +106,7 @@ TEST_P(ChannelTest, ServerToClient) { ctx->join(); }, [&](std::shared_ptr conn) { - std::shared_ptr ctx = GetParam()->makeContext("client"); + std::shared_ptr ctx = GetParam()->makeContext("client"); auto channel = ctx->createChannel(std::move(conn), Endpoint::kConnect); // Initialize with sequential values. @@ -117,7 +117,7 @@ TEST_P(ChannelTest, ServerToClient) { std::future> descriptorFuture; std::future sendFuture; std::tie(descriptorFuture, sendFuture) = - sendWithFuture(channel, data.data(), data.size()); + sendWithFuture(channel, CpuTensor{data.data(), data.size()}); Error descriptorError; TDescriptor descriptor; std::tie(descriptorError, descriptor) = descriptorFuture.get(); @@ -139,7 +139,7 @@ TEST_P(ChannelTest, SendMultipleTensors) { testConnection( [&](std::shared_ptr conn) { - std::shared_ptr ctx = GetParam()->makeContext("server"); + std::shared_ptr ctx = GetParam()->makeContext("server"); auto channel = ctx->createChannel(std::move(conn), Endpoint::kListen); // Initialize with sequential values. @@ -154,7 +154,7 @@ TEST_P(ChannelTest, SendMultipleTensors) { std::future> descriptorFuture; std::future sendFuture; std::tie(descriptorFuture, sendFuture) = - sendWithFuture(channel, data.data(), data.size()); + sendWithFuture(channel, CpuTensor{data.data(), data.size()}); Error descriptorError; TDescriptor descriptor; std::tie(descriptorError, descriptor) = descriptorFuture.get(); @@ -173,7 +173,7 @@ TEST_P(ChannelTest, SendMultipleTensors) { ctx->join(); }, [&](std::shared_ptr conn) { - std::shared_ptr ctx = GetParam()->makeContext("client"); + std::shared_ptr ctx = GetParam()->makeContext("client"); auto channel = ctx->createChannel(std::move(conn), Endpoint::kConnect); std::vector> dataVec( @@ -186,7 +186,7 @@ TEST_P(ChannelTest, SendMultipleTensors) { for (int i = 0; i < numTensors; i++) { auto descriptor = peers_->recv(PeerGroup::kClient); std::future recvFuture = recvWithFuture( - channel, descriptor, dataVec[i].data(), dataVec[i].size()); + channel, descriptor, CpuTensor{dataVec[i].data(), dataSize}); recvFutures.push_back(std::move(recvFuture)); } for (auto& recvFuture : recvFutures) { @@ -213,7 +213,7 @@ TEST_P(ChannelTest, SendTensorsBothWays) { testConnection( [&](std::shared_ptr conn) { - std::shared_ptr ctx = GetParam()->makeContext("server"); + std::shared_ptr ctx = GetParam()->makeContext("server"); auto channel = ctx->createChannel(std::move(conn), Endpoint::kListen); // Initialize sendBuffer with sequential values. @@ -229,8 +229,12 @@ TEST_P(ChannelTest, SendTensorsBothWays) { // Perform send. { std::future> descriptorFuture; - std::tie(descriptorFuture, sendFuture) = - sendWithFuture(channel, sendData.data(), sendData.size()); + std::tie(descriptorFuture, sendFuture) = sendWithFuture( + channel, + CpuTensor{ + .ptr = sendData.data(), + .length = sendData.size(), + }); Error descriptorError; TDescriptor descriptor; std::tie(descriptorError, descriptor) = descriptorFuture.get(); @@ -242,7 +246,12 @@ TEST_P(ChannelTest, SendTensorsBothWays) { { auto descriptor = peers_->recv(PeerGroup::kServer); recvFuture = recvWithFuture( - channel, descriptor, recvData.data(), recvData.size()); + channel, + descriptor, + CpuTensor{ + .ptr = recvData.data(), + .length = recvData.size(), + }); } // Wait for completion of both. @@ -262,7 +271,7 @@ TEST_P(ChannelTest, SendTensorsBothWays) { ctx->join(); }, [&](std::shared_ptr conn) { - std::shared_ptr ctx = GetParam()->makeContext("client"); + std::shared_ptr ctx = GetParam()->makeContext("client"); auto channel = ctx->createChannel(std::move(conn), Endpoint::kConnect); // Initialize sendBuffer with sequential values. @@ -278,8 +287,12 @@ TEST_P(ChannelTest, SendTensorsBothWays) { // Perform send. { std::future> descriptorFuture; - std::tie(descriptorFuture, sendFuture) = - sendWithFuture(channel, sendData.data(), sendData.size()); + std::tie(descriptorFuture, sendFuture) = sendWithFuture( + channel, + CpuTensor{ + .ptr = sendData.data(), + .length = sendData.size(), + }); Error descriptorError; TDescriptor descriptor; std::tie(descriptorError, descriptor) = descriptorFuture.get(); @@ -291,7 +304,12 @@ TEST_P(ChannelTest, SendTensorsBothWays) { { auto descriptor = peers_->recv(PeerGroup::kClient); recvFuture = recvWithFuture( - channel, descriptor, recvData.data(), recvData.size()); + channel, + descriptor, + CpuTensor{ + .ptr = recvData.data(), + .length = recvData.size(), + }); } // Wait for completion of both. @@ -317,14 +335,14 @@ TEST_P(ChannelTest, NullPointer) { testConnection( [&](std::shared_ptr conn) { - std::shared_ptr ctx = GetParam()->makeContext("server"); + std::shared_ptr ctx = GetParam()->makeContext("server"); auto channel = ctx->createChannel(std::move(conn), Endpoint::kListen); // Perform send and wait for completion. std::future> descriptorFuture; std::future sendFuture; std::tie(descriptorFuture, sendFuture) = - sendWithFuture(channel, nullptr, 0); + sendWithFuture(channel, CpuTensor{nullptr, 0}); Error descriptorError; TDescriptor descriptor; std::tie(descriptorError, descriptor) = descriptorFuture.get(); @@ -339,13 +357,13 @@ TEST_P(ChannelTest, NullPointer) { ctx->join(); }, [&](std::shared_ptr conn) { - std::shared_ptr ctx = GetParam()->makeContext("client"); + std::shared_ptr ctx = GetParam()->makeContext("client"); auto channel = ctx->createChannel(std::move(conn), Endpoint::kConnect); // Perform recv and wait for completion. auto descriptor = peers_->recv(PeerGroup::kClient); std::future recvFuture = - recvWithFuture(channel, descriptor, nullptr, 0); + recvWithFuture(channel, descriptor, CpuTensor{nullptr, 0}); Error recvError = recvFuture.get(); EXPECT_FALSE(recvError) << recvError.what(); @@ -361,7 +379,7 @@ TEST_P(ChannelTest, EmptyTensor) { testConnection( [&](std::shared_ptr conn) { - std::shared_ptr ctx = GetParam()->makeContext("server"); + std::shared_ptr ctx = GetParam()->makeContext("server"); auto channel = ctx->createChannel(std::move(conn), Endpoint::kListen); // Allocate a non-empty vector so that its .data() pointer is non-null. @@ -371,7 +389,7 @@ TEST_P(ChannelTest, EmptyTensor) { std::future> descriptorFuture; std::future sendFuture; std::tie(descriptorFuture, sendFuture) = - sendWithFuture(channel, data.data(), 0); + sendWithFuture(channel, CpuTensor{data.data(), 0}); Error descriptorError; TDescriptor descriptor; std::tie(descriptorError, descriptor) = descriptorFuture.get(); @@ -386,7 +404,7 @@ TEST_P(ChannelTest, EmptyTensor) { ctx->join(); }, [&](std::shared_ptr conn) { - std::shared_ptr ctx = GetParam()->makeContext("client"); + std::shared_ptr ctx = GetParam()->makeContext("client"); auto channel = ctx->createChannel(std::move(conn), Endpoint::kConnect); // Allocate a non-empty vector so that its .data() pointer is non-null. @@ -395,7 +413,7 @@ TEST_P(ChannelTest, EmptyTensor) { // Perform recv and wait for completion. auto descriptor = peers_->recv(PeerGroup::kClient); std::future recvFuture = - recvWithFuture(channel, descriptor, data.data(), 0); + recvWithFuture(channel, descriptor, CpuTensor{data.data(), 0}); Error recvError = recvFuture.get(); EXPECT_FALSE(recvError) << recvError.what(); @@ -411,12 +429,12 @@ TEST_P(ChannelTest, contextIsNotJoined) { testConnection( [&](std::shared_ptr conn) { - std::shared_ptr context = GetParam()->makeContext("server"); + std::shared_ptr context = GetParam()->makeContext("server"); peers_->send(PeerGroup::kClient, kReady); context->createChannel(std::move(conn), Endpoint::kListen); }, [&](std::shared_ptr conn) { - std::shared_ptr context = GetParam()->makeContext("client"); + std::shared_ptr context = GetParam()->makeContext("client"); EXPECT_EQ(kReady, peers_->recv(PeerGroup::kClient)); context->createChannel(std::move(conn), Endpoint::kConnect); }); @@ -441,7 +459,7 @@ TEST_P(ChannelTest, CallbacksAreDeferred) { testConnection( [&](std::shared_ptr conn) { - std::shared_ptr ctx = GetParam()->makeContext("server"); + std::shared_ptr ctx = GetParam()->makeContext("server"); auto channel = ctx->createChannel(std::move(conn), Endpoint::kListen); // Initialize with sequential values. @@ -456,8 +474,7 @@ TEST_P(ChannelTest, CallbacksAreDeferred) { std::mutex mutex; std::unique_lock callerLock(mutex); channel->send( - buffer->data(), - buffer->size(), + CpuTensor{data.data(), data.size()}, [&descriptorPromise](const Error& error, TDescriptor descriptor) { descriptorPromise.set_value( std::make_tuple(error, std::move(descriptor))); @@ -482,14 +499,12 @@ TEST_P(ChannelTest, CallbacksAreDeferred) { ctx->join(); }, [&](std::shared_ptr conn) { - std::shared_ptr ctx = GetParam()->makeContext("client"); + std::shared_ptr ctx = GetParam()->makeContext("client"); auto channel = ctx->createChannel(std::move(conn), Endpoint::kConnect); // Initialize with zeroes. std::vector data(dataSize); std::fill(data.begin(), data.end(), 0); - auto buffer = helper_->makeBuffer(dataSize); - buffer->wrap(data.data()); // Perform recv and wait for completion. std::promise recvPromise; @@ -498,12 +513,9 @@ TEST_P(ChannelTest, CallbacksAreDeferred) { auto descriptor = peers_->recv(PeerGroup::kClient); channel->recv( descriptor, - buffer->data(), - buffer->size(), - [&recvPromise, &mutex, &buffer, ptr{data.data()}]( - const Error& error) { + CpuTensor{data.data(), data.size()}, + [&recvPromise, &mutex](const Error& error) { std::unique_lock calleeLock(mutex); - buffer->unwrap(ptr); recvPromise.set_value(error); }); callerLock.unlock(); diff --git a/tensorpipe/test/channel/channel_test.h b/tensorpipe/test/channel/channel_test.h index 740c21bbe..56a8325f5 100644 --- a/tensorpipe/test/channel/channel_test.h +++ b/tensorpipe/test/channel/channel_test.h @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -62,7 +63,7 @@ class ChannelTestHelper { // hierarchies are separated. virtual std::string channelName() = 0; - virtual std::shared_ptr makeContext( + virtual std::shared_ptr makeContext( std::string id) = 0; virtual std::shared_ptr makePeerGroup() { @@ -127,48 +128,38 @@ class ChannelTest : public ::testing::TestWithParam { std::tuple>, std::future> sendWithFuture( - std::shared_ptr channel, - const void* ptr, - size_t length) { + std::shared_ptr channel, + const tensorpipe::CpuTensor& tensor) { auto descriptorPromise = std::make_shared< std::promise>>(); auto promise = std::make_shared>(); auto descriptorFuture = descriptorPromise->get_future(); auto future = promise->get_future(); - auto buffer = helper_->makeBuffer(length); - buffer->wrap(ptr); channel->send( - buffer->data(), - buffer->size(), + tensor, [descriptorPromise{std::move(descriptorPromise)}]( const tensorpipe::Error& error, std::string descriptor) { descriptorPromise->set_value( std::make_tuple(error, std::move(descriptor))); }, - [promise{std::move(promise)}, buffer](const tensorpipe::Error& error) { + [promise{std::move(promise)}](const tensorpipe::Error& error) { promise->set_value(error); }); return {std::move(descriptorFuture), std::move(future)}; } [[nodiscard]] std::future recvWithFuture( - std::shared_ptr channel, + std::shared_ptr channel, tensorpipe::channel::TDescriptor descriptor, - void* ptr, - size_t length) { + const tensorpipe::CpuTensor& tensor) { auto promise = std::make_shared>(); auto future = promise->get_future(); - auto buffer = helper_->makeBuffer(length); - buffer->wrap(ptr); channel->recv( std::move(descriptor), - buffer->data(), - buffer->size(), - [promise{std::move(promise)}, buffer, ptr]( - const tensorpipe::Error& error) { - buffer->unwrap(ptr); + tensor, + [promise{std::move(promise)}](const tensorpipe::Error& error) { promise->set_value(error); }); return future; diff --git a/tensorpipe/test/channel/cma/cma_test.cc b/tensorpipe/test/channel/cma/cma_test.cc index 295691c27..10346e810 100644 --- a/tensorpipe/test/channel/cma/cma_test.cc +++ b/tensorpipe/test/channel/cma/cma_test.cc @@ -17,8 +17,8 @@ class CmaChannelTestHelper : public ChannelTestHelper { return "cma"; } - std::shared_ptr makeContext( - std::string id) override { + std::shared_ptr> + makeContext(std::string id) override { auto context = std::make_shared(); context->setId(std::move(id)); return context; diff --git a/tensorpipe/test/channel/cuda_ipc/cuda_ipc_test.cc b/tensorpipe/test/channel/cuda_ipc/cuda_ipc_test.cc index c51c4a55f..e93b40f33 100644 --- a/tensorpipe/test/channel/cuda_ipc/cuda_ipc_test.cc +++ b/tensorpipe/test/channel/cuda_ipc/cuda_ipc_test.cc @@ -59,29 +59,30 @@ class CudaWrapper : public DataWrapper { size_t size_; }; -class CudaChannelTestHelper : public ChannelTestHelper { - public: - std::string channelName() override { - return "cuda_ipc"; - } - - std::shared_ptr makeContext( - std::string id) override { - auto context = std::make_shared(); - context->setId(std::move(id)); - return context; - } - - std::shared_ptr makePeerGroup() override { - return std::make_shared(); - } - - std::shared_ptr makeBuffer(size_t len) override { - return std::make_shared(len); - } -}; - -CudaChannelTestHelper helper; +// class CudaChannelTestHelper : public ChannelTestHelper { +// public: +// std::string channelName() override { +// return "cuda_ipc"; +// } + +// std::shared_ptr makeContext( +// std::string id) override { +// auto context = +// std::make_shared(); +// context->setId(std::move(id)); +// return context; +// } + +// std::shared_ptr makePeerGroup() override { +// return std::make_shared(); +// } + +// std::shared_ptr makeBuffer(size_t len) override { +// return std::make_shared(len); +// } +// }; + +// CudaChannelTestHelper helper; class CudaIpcChannelTest : public ChannelTest {}; @@ -95,110 +96,113 @@ using namespace tensorpipe::channel; << __TP_EXPAND_OPD(a) << " " << cudaGetErrorName(cudaPeekAtLastError()) \ << " (" << cudaGetErrorString(cudaPeekAtLastError()) << ")" -TEST_P(CudaIpcChannelTest, ReceiverWaitsForStartEvent) { - constexpr int kSize = 1024; - - testConnection( - [&](std::shared_ptr conn) { - std::shared_ptr ctx = GetParam()->makeContext("server"); - auto channel = std::static_pointer_cast( - ctx->createChannel(std::move(conn), Endpoint::kListen)); - - TP_CUDA_CHECK(cudaSetDevice(0)); - cudaStream_t sendStream; - TP_CUDA_CHECK(cudaStreamCreate(&sendStream)); - void* ptr; - TP_CUDA_CHECK(cudaMalloc(&ptr, kSize)); - - // Delay sendStream with computations on buffer. - slowKernel(ptr, kSize, sendStream); - - // Set buffer to target value. - TP_CUDA_CHECK(cudaMemsetAsync(ptr, 0x42, kSize, sendStream)); - - // Perform send and wait for completion. - auto descriptorPromise = std::make_shared< - std::promise>>(); - auto sendPromise = std::make_shared>(); - auto descriptorFuture = descriptorPromise->get_future(); - auto sendFuture = sendPromise->get_future(); - - channel->send( - ptr, - kSize, - [descriptorPromise{std::move(descriptorPromise)}]( - const tensorpipe::Error& error, std::string descriptor) { - descriptorPromise->set_value( - std::make_tuple(error, std::move(descriptor))); - }, - [sendPromise{std::move(sendPromise)}]( - const tensorpipe::Error& error) { - sendPromise->set_value(error); - }, - sendStream); - - Error descriptorError; - TDescriptor descriptor; - std::tie(descriptorError, descriptor) = descriptorFuture.get(); - - EXPECT_FALSE(descriptorError) << descriptorError.what(); - peers_->send(PeerGroup::kClient, descriptor); - Error sendError = sendFuture.get(); - EXPECT_FALSE(sendError) << sendError.what(); - TP_CUDA_CHECK(cudaFree(ptr)); - - peers_->done(PeerGroup::kServer); - peers_->join(PeerGroup::kServer); - - ctx->join(); - }, - [&](std::shared_ptr conn) { - std::shared_ptr ctx = GetParam()->makeContext("client"); - auto channel = std::static_pointer_cast( - ctx->createChannel(std::move(conn), Endpoint::kConnect)); - - TP_CUDA_CHECK(cudaSetDevice(0)); - cudaStream_t recvStream; - TP_CUDA_CHECK(cudaStreamCreate(&recvStream)); - void* ptr; - TP_CUDA_CHECK(cudaMalloc(&ptr, kSize)); - - auto descriptor = peers_->recv(PeerGroup::kClient); - - // Perform recv and wait for completion. - auto recvPromise = std::make_shared>(); - auto recvFuture = recvPromise->get_future(); - - channel->recv( - std::move(descriptor), - ptr, - kSize, - [recvPromise{std::move(recvPromise)}]( - const tensorpipe::Error& error) { - recvPromise->set_value(error); - }, - recvStream); - - Error recvError = recvFuture.get(); - EXPECT_FALSE(recvError) << recvError.what(); - - std::array data; - TP_CUDA_CHECK(cudaMemcpy(data.data(), ptr, kSize, cudaMemcpyDefault)); - // Validate contents of vector. - for (auto i = 0; i < kSize; i++) { - EXPECT_EQ(data[i], 0x42); - } - TP_CUDA_CHECK(cudaFree(ptr)); - - peers_->done(PeerGroup::kClient); - peers_->join(PeerGroup::kClient); - - ctx->join(); - }); -} - -INSTANTIATE_TEST_CASE_P(CudaIpc, ChannelTest, ::testing::Values(&helper)); -INSTANTIATE_TEST_CASE_P( - CudaIpc, - CudaIpcChannelTest, - ::testing::Values(&helper)); +// TEST_P(CudaIpcChannelTest, ReceiverWaitsForStartEvent) { +// constexpr int kSize = 1024; + +// testConnection( +// [&](std::shared_ptr conn) { +// std::shared_ptr ctx = GetParam()->makeContext("server"); +// auto channel = std::static_pointer_cast( +// ctx->createChannel(std::move(conn), Endpoint::kListen)); + +// TP_CUDA_CHECK(cudaSetDevice(0)); +// cudaStream_t sendStream; +// TP_CUDA_CHECK(cudaStreamCreate(&sendStream)); +// void* ptr; +// TP_CUDA_CHECK(cudaMalloc(&ptr, kSize)); + +// // Delay sendStream with computations on buffer. +// slowKernel(ptr, kSize, sendStream); + +// // Set buffer to target value. +// TP_CUDA_CHECK(cudaMemsetAsync(ptr, 0x42, kSize, sendStream)); + +// // Perform send and wait for completion. +// auto descriptorPromise = std::make_shared< +// std::promise>>(); +// auto sendPromise = +// std::make_shared>(); auto +// descriptorFuture = descriptorPromise->get_future(); auto sendFuture = +// sendPromise->get_future(); + +// channel->send( +// ptr, +// kSize, +// [descriptorPromise{std::move(descriptorPromise)}]( +// const tensorpipe::Error& error, std::string descriptor) { +// descriptorPromise->set_value( +// std::make_tuple(error, std::move(descriptor))); +// }, +// [sendPromise{std::move(sendPromise)}]( +// const tensorpipe::Error& error) { +// sendPromise->set_value(error); +// }, +// sendStream); + +// Error descriptorError; +// TDescriptor descriptor; +// std::tie(descriptorError, descriptor) = descriptorFuture.get(); + +// EXPECT_FALSE(descriptorError) << descriptorError.what(); +// peers_->send(PeerGroup::kClient, descriptor); +// Error sendError = sendFuture.get(); +// EXPECT_FALSE(sendError) << sendError.what(); +// TP_CUDA_CHECK(cudaFree(ptr)); + +// peers_->done(PeerGroup::kServer); +// peers_->join(PeerGroup::kServer); + +// ctx->join(); +// }, +// [&](std::shared_ptr conn) { +// std::shared_ptr ctx = GetParam()->makeContext("client"); +// auto channel = std::static_pointer_cast( +// ctx->createChannel(std::move(conn), Endpoint::kConnect)); + +// TP_CUDA_CHECK(cudaSetDevice(0)); +// cudaStream_t recvStream; +// TP_CUDA_CHECK(cudaStreamCreate(&recvStream)); +// void* ptr; +// TP_CUDA_CHECK(cudaMalloc(&ptr, kSize)); + +// auto descriptor = peers_->recv(PeerGroup::kClient); + +// // Perform recv and wait for completion. +// auto recvPromise = +// std::make_shared>(); auto recvFuture +// = recvPromise->get_future(); + +// channel->recv( +// std::move(descriptor), +// ptr, +// kSize, +// [recvPromise{std::move(recvPromise)}]( +// const tensorpipe::Error& error) { +// recvPromise->set_value(error); +// }, +// recvStream); + +// Error recvError = recvFuture.get(); +// EXPECT_FALSE(recvError) << recvError.what(); + +// std::array data; +// TP_CUDA_CHECK(cudaMemcpy(data.data(), ptr, kSize, +// cudaMemcpyDefault)); +// // Validate contents of vector. +// for (auto i = 0; i < kSize; i++) { +// EXPECT_EQ(data[i], 0x42); +// } +// TP_CUDA_CHECK(cudaFree(ptr)); + +// peers_->done(PeerGroup::kClient); +// peers_->join(PeerGroup::kClient); + +// ctx->join(); +// }); +// } + +// INSTANTIATE_TEST_CASE_P(CudaIpc, ChannelTest, ::testing::Values(&helper)); +// INSTANTIATE_TEST_CASE_P( +// CudaIpc, +// CudaIpcChannelTest, +// ::testing::Values(&helper)); diff --git a/tensorpipe/test/channel/mpt/mpt_test.cc b/tensorpipe/test/channel/mpt/mpt_test.cc index 3a3c21315..f6e0cf034 100644 --- a/tensorpipe/test/channel/mpt/mpt_test.cc +++ b/tensorpipe/test/channel/mpt/mpt_test.cc @@ -17,8 +17,8 @@ class MptChannelTestHelper : public ChannelTestHelper { return "mpt"; } - std::shared_ptr makeContext( - std::string id) override { + std::shared_ptr> + makeContext(std::string id) override { std::vector> contexts = { std::make_shared(), std::make_shared(), diff --git a/tensorpipe/test/channel/xth/xth_test.cc b/tensorpipe/test/channel/xth/xth_test.cc index 971750ab4..4bbadf830 100644 --- a/tensorpipe/test/channel/xth/xth_test.cc +++ b/tensorpipe/test/channel/xth/xth_test.cc @@ -17,8 +17,8 @@ class XthChannelTestHelper : public ChannelTestHelper { return "xth"; } - std::shared_ptr makeContext( - std::string id) override { + std::shared_ptr> + makeContext(std::string id) override { auto context = std::make_shared(); context->setId(std::move(id)); return context; diff --git a/tensorpipe/test/core/context_test.cc b/tensorpipe/test/core/context_test.cc index df045514c..ef979caab 100644 --- a/tensorpipe/test/core/context_test.cc +++ b/tensorpipe/test/core/context_test.cc @@ -78,10 +78,10 @@ ::testing::AssertionResult messagesAreEqual( } for (size_t idx = 0; idx < m1.tensors.size(); idx++) { EXPECT_TRUE(buffersAreEqual( - m1.tensors[idx].data, - m1.tensors[idx].length, - m2.tensors[idx].data, - m2.tensors[idx].length)); + m1.tensors[idx].tensor.cpu.ptr, + m1.tensors[idx].tensor.cpu.length, + m2.tensors[idx].tensor.cpu.ptr, + m2.tensors[idx].tensor.cpu.length)); } return ::testing::AssertionSuccess(); } @@ -99,10 +99,10 @@ Message makeMessage(int numPayloads, int numTensors) { message.payloads.push_back(std::move(payload)); } for (int i = 0; i < numTensors; i++) { - Message::Tensor tensor; - tensor.data = - reinterpret_cast(const_cast(kTensorData.data())); - tensor.length = kTensorData.length(); + Message::Tensor tensor{ + .tensor = CpuTensor{ + reinterpret_cast(const_cast(kTensorData.data())), + kTensorData.length()}}; message.tensors.push_back(std::move(tensor)); } return message; @@ -191,8 +191,8 @@ TEST(Context, ClientPingSerial) { buffers.push_back(std::move(payloadData)); } for (auto& tensor : message.tensors) { - auto tensorData = std::make_unique(tensor.length); - tensor.data = tensorData.get(); + auto tensorData = std::make_unique(tensor.tensor.cpu.length); + tensor.tensor.cpu.ptr = tensorData.get(); buffers.push_back(std::move(tensorData)); } @@ -260,8 +260,8 @@ TEST(Context, ClientPingInline) { buffers.push_back(std::move(payloadData)); } for (auto& tensor : message.tensors) { - auto tensorData = std::make_unique(tensor.length); - tensor.data = tensorData.get(); + auto tensorData = std::make_unique(tensor.tensor.cpu.length); + tensor.tensor.cpu.ptr = tensorData.get(); buffers.push_back(std::move(tensorData)); } serverPipe->read( @@ -360,8 +360,9 @@ TEST(Context, ServerPingPongTwice) { buffers.push_back(std::move(payloadData)); } for (auto& tensor : message.tensors) { - auto tensorData = std::make_unique(tensor.length); - tensor.data = tensorData.get(); + auto tensorData = + std::make_unique(tensor.tensor.cpu.length); + tensor.tensor.cpu.ptr = tensorData.get(); buffers.push_back(std::move(tensorData)); } serverPipe->read( @@ -404,8 +405,8 @@ TEST(Context, ServerPingPongTwice) { buffers.push_back(std::move(payloadData)); } for (auto& tensor : message.tensors) { - auto tensorData = std::make_unique(tensor.length); - tensor.data = tensorData.get(); + auto tensorData = std::make_unique(tensor.tensor.cpu.length); + tensor.tensor.cpu.ptr = tensorData.get(); buffers.push_back(std::move(tensorData)); } clientPipe->read( @@ -458,8 +459,8 @@ static void pipeRead( buffers.push_back(std::move(payloadData)); } for (auto& tensor : message.tensors) { - auto tensorData = std::make_unique(tensor.length); - tensor.data = tensorData.get(); + auto tensorData = std::make_unique(tensor.tensor.cpu.length); + tensor.tensor.cpu.ptr = tensorData.get(); buffers.push_back(std::move(tensorData)); } pipe->read( From 724469cb6aff854df2f7b418720ba922a3e9bf39 Mon Sep 17 00:00:00 2001 From: Lucas Hosseini Date: Wed, 9 Sep 2020 15:16:54 +0200 Subject: [PATCH 02/12] Update on "Make Channel API accept tensor structs rather than raw pointers." + Introduce tensor.h defining the tensor struct(s). The `CpuTensor` struct is always defined, while the `CudaTensor` struct is defined only when `TENSORPIPE_HAS_CUDA` is true. + Update all channels to take a `CpuTensor` or `CudaTensor` for `send`/`recv` rather than a raw pointer and a length. + Make the base `Channel`/`Context` classes templated on `TTensor`, effectively creating two channel hierarchies (one for CPU channels, one for CUDA channels). + Update the Pipe and the generic channel tests to use the new API. So far, generic channel tests are CPU only, and tests for the CUDA IPC channel are (temporarily) disabled. A subsequent PR will take care of refactoring tests so that generic tests work for CUDA channels. An other PR will add support for CUDA tensors in the Pipe. [ghstack-poisoned] --- tensorpipe/core/pipe.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorpipe/core/pipe.cc b/tensorpipe/core/pipe.cc index 8b2cd1b88..73d8bbe43 100644 --- a/tensorpipe/core/pipe.cc +++ b/tensorpipe/core/pipe.cc @@ -88,7 +88,9 @@ void parseDescriptorOfMessage(ReadOperation& op, const Packet& nopPacketIn) { Message::Tensor tensor{ .tensor = CpuTensor{ - nullptr, static_cast(nopTensorDescriptor.sizeInBytes)}, + .ptr = nullptr, + .length = static_cast(nopTensorDescriptor.sizeInBytes), + }, .metadata = nopTensorDescriptor.metadata, }; ReadOperation::Tensor tensorBeingAllocated; From a83ec5224dd7b05edfa166dcb8789a96119d8563 Mon Sep 17 00:00:00 2001 From: Lucas Hosseini Date: Thu, 10 Sep 2020 14:53:22 +0200 Subject: [PATCH 03/12] Update on "Make Channel API accept tensor structs rather than raw pointers." + Introduce tensor.h defining the tensor struct(s). The `CpuTensor` struct is always defined, while the `CudaTensor` struct is defined only when `TENSORPIPE_HAS_CUDA` is true. + Update all channels to take a `CpuTensor` or `CudaTensor` for `send`/`recv` rather than a raw pointer and a length. + Make the base `Channel`/`Context` classes templated on `TTensor`, effectively creating two channel hierarchies (one for CPU channels, one for CUDA channels). + Update the Pipe and the generic channel tests to use the new API. So far, generic channel tests are CPU only, and tests for the CUDA IPC channel are (temporarily) disabled. A subsequent PR will take care of refactoring tests so that generic tests work for CUDA channels. An other PR will add support for CUDA tensors in the Pipe. Differential Revision: [D23598033](https://our.internmc.facebook.com/intern/diff/D23598033) [ghstack-poisoned] --- tensorpipe/channel/cma/context.h | 2 +- tensorpipe/channel/registry.cc | 2 +- tensorpipe/channel/registry.h | 2 +- tensorpipe/python/tensorpipe.cc | 8 +++----- tensorpipe/test/channel/basic/basic_test.cc | 2 +- tensorpipe/test/channel/cma/cma_test.cc | 2 +- tensorpipe/test/channel/mpt/mpt_test.cc | 2 +- tensorpipe/test/channel/xth/xth_test.cc | 2 +- 8 files changed, 10 insertions(+), 12 deletions(-) diff --git a/tensorpipe/channel/cma/context.h b/tensorpipe/channel/cma/context.h index db24998a9..1e7bee161 100644 --- a/tensorpipe/channel/cma/context.h +++ b/tensorpipe/channel/cma/context.h @@ -27,7 +27,7 @@ class Context : public channel::CpuContext { const std::string& domainDescriptor() const override; - std::shared_ptr> createChannel( + std::shared_ptr createChannel( std::shared_ptr, Endpoint) override; diff --git a/tensorpipe/channel/registry.cc b/tensorpipe/channel/registry.cc index fbbe6704c..429232dc7 100644 --- a/tensorpipe/channel/registry.cc +++ b/tensorpipe/channel/registry.cc @@ -10,4 +10,4 @@ TP_DEFINE_SHARED_REGISTRY( TensorpipeChannelRegistry, - tensorpipe::channel::Context); + tensorpipe::channel::CpuContext); diff --git a/tensorpipe/channel/registry.h b/tensorpipe/channel/registry.h index 26e479fd2..e67e54069 100644 --- a/tensorpipe/channel/registry.h +++ b/tensorpipe/channel/registry.h @@ -14,4 +14,4 @@ TP_DECLARE_SHARED_REGISTRY( TensorpipeChannelRegistry, - tensorpipe::channel::Context); + tensorpipe::channel::CpuContext); diff --git a/tensorpipe/python/tensorpipe.cc b/tensorpipe/python/tensorpipe.cc index 6244d0b3c..42d39dfe5 100644 --- a/tensorpipe/python/tensorpipe.cc +++ b/tensorpipe/python/tensorpipe.cc @@ -223,10 +223,8 @@ using transport_class_ = py::class_>; template -using channel_class_ = py::class_< - T, - tensorpipe::channel::Context, - std::shared_ptr>; +using channel_class_ = + py::class_>; } // namespace @@ -440,7 +438,7 @@ PYBIND11_MODULE(pytensorpipe, module) { py::arg("name"), py::arg("transport")); - shared_ptr_class_> + shared_ptr_class_ abstractChannel(module, "AbstractChannel"); channel_class_ basicChannel( diff --git a/tensorpipe/test/channel/basic/basic_test.cc b/tensorpipe/test/channel/basic/basic_test.cc index 8d26ed49a..896562412 100644 --- a/tensorpipe/test/channel/basic/basic_test.cc +++ b/tensorpipe/test/channel/basic/basic_test.cc @@ -17,7 +17,7 @@ class BasicChannelTestHelper : public ChannelTestHelper { return "basic"; } - std::shared_ptr> + std::shared_ptr makeContext(std::string id) override { auto context = std::make_shared(); context->setId(std::move(id)); diff --git a/tensorpipe/test/channel/cma/cma_test.cc b/tensorpipe/test/channel/cma/cma_test.cc index 10346e810..fba60c3b0 100644 --- a/tensorpipe/test/channel/cma/cma_test.cc +++ b/tensorpipe/test/channel/cma/cma_test.cc @@ -17,7 +17,7 @@ class CmaChannelTestHelper : public ChannelTestHelper { return "cma"; } - std::shared_ptr> + std::shared_ptr makeContext(std::string id) override { auto context = std::make_shared(); context->setId(std::move(id)); diff --git a/tensorpipe/test/channel/mpt/mpt_test.cc b/tensorpipe/test/channel/mpt/mpt_test.cc index f6e0cf034..bfa335bc7 100644 --- a/tensorpipe/test/channel/mpt/mpt_test.cc +++ b/tensorpipe/test/channel/mpt/mpt_test.cc @@ -17,7 +17,7 @@ class MptChannelTestHelper : public ChannelTestHelper { return "mpt"; } - std::shared_ptr> + std::shared_ptr makeContext(std::string id) override { std::vector> contexts = { std::make_shared(), diff --git a/tensorpipe/test/channel/xth/xth_test.cc b/tensorpipe/test/channel/xth/xth_test.cc index 4bbadf830..8a502dc12 100644 --- a/tensorpipe/test/channel/xth/xth_test.cc +++ b/tensorpipe/test/channel/xth/xth_test.cc @@ -17,7 +17,7 @@ class XthChannelTestHelper : public ChannelTestHelper { return "xth"; } - std::shared_ptr> + std::shared_ptr makeContext(std::string id) override { auto context = std::make_shared(); context->setId(std::move(id)); From bd96dc724cc898fe95a9de81bb174223d5976ee8 Mon Sep 17 00:00:00 2001 From: Lucas Hosseini Date: Thu, 10 Sep 2020 14:59:24 +0200 Subject: [PATCH 04/12] Update on "Make Channel API accept tensor structs rather than raw pointers." + Introduce tensor.h defining the tensor struct(s). The `CpuTensor` struct is always defined, while the `CudaTensor` struct is defined only when `TENSORPIPE_HAS_CUDA` is true. + Update all channels to take a `CpuTensor` or `CudaTensor` for `send`/`recv` rather than a raw pointer and a length. + Make the base `Channel`/`Context` classes templated on `TTensor`, effectively creating two channel hierarchies (one for CPU channels, one for CUDA channels). + Update the Pipe and the generic channel tests to use the new API. So far, generic channel tests are CPU only, and tests for the CUDA IPC channel are (temporarily) disabled. A subsequent PR will take care of refactoring tests so that generic tests work for CUDA channels. An other PR will add support for CUDA tensors in the Pipe. Differential Revision: [D23598033](https://our.internmc.facebook.com/intern/diff/D23598033) [ghstack-poisoned] --- tensorpipe/test/channel/basic/basic_test.cc | 4 ++-- tensorpipe/test/channel/cma/cma_test.cc | 4 ++-- tensorpipe/test/channel/mpt/mpt_test.cc | 4 ++-- tensorpipe/test/channel/xth/xth_test.cc | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorpipe/test/channel/basic/basic_test.cc b/tensorpipe/test/channel/basic/basic_test.cc index 896562412..3f224a532 100644 --- a/tensorpipe/test/channel/basic/basic_test.cc +++ b/tensorpipe/test/channel/basic/basic_test.cc @@ -17,8 +17,8 @@ class BasicChannelTestHelper : public ChannelTestHelper { return "basic"; } - std::shared_ptr - makeContext(std::string id) override { + std::shared_ptr makeContext( + std::string id) override { auto context = std::make_shared(); context->setId(std::move(id)); return context; diff --git a/tensorpipe/test/channel/cma/cma_test.cc b/tensorpipe/test/channel/cma/cma_test.cc index fba60c3b0..83ae72ac6 100644 --- a/tensorpipe/test/channel/cma/cma_test.cc +++ b/tensorpipe/test/channel/cma/cma_test.cc @@ -17,8 +17,8 @@ class CmaChannelTestHelper : public ChannelTestHelper { return "cma"; } - std::shared_ptr - makeContext(std::string id) override { + std::shared_ptr makeContext( + std::string id) override { auto context = std::make_shared(); context->setId(std::move(id)); return context; diff --git a/tensorpipe/test/channel/mpt/mpt_test.cc b/tensorpipe/test/channel/mpt/mpt_test.cc index bfa335bc7..3102ecaa0 100644 --- a/tensorpipe/test/channel/mpt/mpt_test.cc +++ b/tensorpipe/test/channel/mpt/mpt_test.cc @@ -17,8 +17,8 @@ class MptChannelTestHelper : public ChannelTestHelper { return "mpt"; } - std::shared_ptr - makeContext(std::string id) override { + std::shared_ptr makeContext( + std::string id) override { std::vector> contexts = { std::make_shared(), std::make_shared(), diff --git a/tensorpipe/test/channel/xth/xth_test.cc b/tensorpipe/test/channel/xth/xth_test.cc index 8a502dc12..4df3f363c 100644 --- a/tensorpipe/test/channel/xth/xth_test.cc +++ b/tensorpipe/test/channel/xth/xth_test.cc @@ -17,8 +17,8 @@ class XthChannelTestHelper : public ChannelTestHelper { return "xth"; } - std::shared_ptr - makeContext(std::string id) override { + std::shared_ptr makeContext( + std::string id) override { auto context = std::make_shared(); context->setId(std::move(id)); return context; From 9fbd18de5603252e20afdbfd29e0a329f519716e Mon Sep 17 00:00:00 2001 From: Lucas Hosseini Date: Thu, 10 Sep 2020 15:06:08 +0200 Subject: [PATCH 05/12] Update on "Make Channel API accept tensor structs rather than raw pointers." + Introduce tensor.h defining the tensor struct(s). The `CpuTensor` struct is always defined, while the `CudaTensor` struct is defined only when `TENSORPIPE_HAS_CUDA` is true. + Update all channels to take a `CpuTensor` or `CudaTensor` for `send`/`recv` rather than a raw pointer and a length. + Make the base `Channel`/`Context` classes templated on `TTensor`, effectively creating two channel hierarchies (one for CPU channels, one for CUDA channels). + Update the Pipe and the generic channel tests to use the new API. So far, generic channel tests are CPU only, and tests for the CUDA IPC channel are (temporarily) disabled. A subsequent PR will take care of refactoring tests so that generic tests work for CUDA channels. An other PR will add support for CUDA tensors in the Pipe. Differential Revision: [D23598033](https://our.internmc.facebook.com/intern/diff/D23598033) [ghstack-poisoned] --- tensorpipe/python/tensorpipe.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorpipe/python/tensorpipe.cc b/tensorpipe/python/tensorpipe.cc index 42d39dfe5..d63181332 100644 --- a/tensorpipe/python/tensorpipe.cc +++ b/tensorpipe/python/tensorpipe.cc @@ -438,8 +438,8 @@ PYBIND11_MODULE(pytensorpipe, module) { py::arg("name"), py::arg("transport")); - shared_ptr_class_ - abstractChannel(module, "AbstractChannel"); + shared_ptr_class_ abstractChannel( + module, "AbstractChannel"); channel_class_ basicChannel( module, "BasicChannel"); From 2aa9b060001d30b2ac93e92963417833733101d3 Mon Sep 17 00:00:00 2001 From: Lucas Hosseini Date: Thu, 10 Sep 2020 20:02:43 +0200 Subject: [PATCH 06/12] Update on "Make Channel API accept tensor structs rather than raw pointers." + Introduce tensor.h defining the tensor struct(s). The `CpuTensor` struct is always defined, while the `CudaTensor` struct is defined only when `TENSORPIPE_HAS_CUDA` is true. + Update all channels to take a `CpuTensor` or `CudaTensor` for `send`/`recv` rather than a raw pointer and a length. + Make the base `Channel`/`Context` classes templated on `TTensor`, effectively creating two channel hierarchies (one for CPU channels, one for CUDA channels). + Update the Pipe and the generic channel tests to use the new API. So far, generic channel tests are CPU only, and tests for the CUDA IPC channel are (temporarily) disabled. A subsequent PR will take care of refactoring tests so that generic tests work for CUDA channels. An other PR will add support for CUDA tensors in the Pipe. Differential Revision: [D23598033](https://our.internmc.facebook.com/intern/diff/D23598033) [ghstack-poisoned] --- tensorpipe/channel/basic/channel.cc | 35 +++++++++++++----------- tensorpipe/channel/basic/channel.h | 6 ++-- tensorpipe/channel/basic/context.h | 2 +- tensorpipe/channel/channel.h | 8 +++--- tensorpipe/channel/cma/channel.cc | 35 +++++++++++++----------- tensorpipe/channel/cma/channel.h | 4 +-- tensorpipe/channel/cma/context.h | 2 +- tensorpipe/channel/context.h | 4 +-- tensorpipe/channel/cuda_ipc/channel.cc | 28 ++++++++++--------- tensorpipe/channel/cuda_ipc/channel.h | 4 +-- tensorpipe/channel/mpt/channel.cc | 35 +++++++++++++----------- tensorpipe/channel/mpt/channel.h | 4 +-- tensorpipe/channel/mpt/context.h | 2 +- tensorpipe/channel/registry.h | 2 +- tensorpipe/channel/xth/channel.cc | 30 ++++++++++---------- tensorpipe/channel/xth/channel.h | 4 +-- tensorpipe/channel/xth/context.h | 2 +- tensorpipe/common/{tensor.h => buffer.h} | 14 +++++----- tensorpipe/core/context.h | 2 +- tensorpipe/core/message.h | 6 ++-- tensorpipe/core/nop_types.h | 2 +- tensorpipe/core/pipe.cc | 18 ++++++------ tensorpipe/python/tensorpipe.cc | 4 +-- tensorpipe/test/channel/channel_test.cc | 34 +++++++++++------------ tensorpipe/test/channel/channel_test.h | 6 ++-- tensorpipe/test/core/context_test.cc | 30 ++++++++++---------- 26 files changed, 168 insertions(+), 155 deletions(-) rename tensorpipe/common/{tensor.h => buffer.h} (79%) diff --git a/tensorpipe/channel/basic/channel.cc b/tensorpipe/channel/basic/channel.cc index 2a3899cc2..a1c511ba3 100644 --- a/tensorpipe/channel/basic/channel.cc +++ b/tensorpipe/channel/basic/channel.cc @@ -33,11 +33,11 @@ class Channel::Impl : public std::enable_shared_from_this { void init(); void send( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); - void recv(TDescriptor descriptor, CpuTensor tensor, TRecvCallback callback); + void recv(TDescriptor descriptor, CpuBuffer tensor, TRecvCallback callback); // Tell the channel what its identifier is. void setId(std::string id); @@ -48,14 +48,14 @@ class Channel::Impl : public std::enable_shared_from_this { OnDemandLoop loop_; void sendFromLoop_( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); // Receive memory region from peer. void recvFromLoop_( TDescriptor descriptor, - CpuTensor tensor, + CpuBuffer tensor, TRecvCallback callback); void setIdFromLoop_(std::string id); @@ -119,27 +119,29 @@ Channel::Impl::Impl( id_(std::move(id)) {} void Channel::send( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { - impl_->send(tensor, std::move(descriptorCallback), std::move(callback)); + impl_->send( + std::move(tensor), std::move(descriptorCallback), std::move(callback)); } void Channel::Impl::send( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { loop_.deferToLoop([this, - tensor, + tensor{std::move(tensor)}, descriptorCallback{std::move(descriptorCallback)}, callback{std::move(callback)}]() mutable { - sendFromLoop_(tensor, std::move(descriptorCallback), std::move(callback)); + sendFromLoop_( + std::move(tensor), std::move(descriptorCallback), std::move(callback)); }); } // Send memory region to peer. void Channel::Impl::sendFromLoop_( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { TP_DCHECK(loop_.inLoop()); @@ -194,26 +196,27 @@ void Channel::Impl::sendFromLoop_( // Receive memory region from peer. void Channel::recv( TDescriptor descriptor, - CpuTensor tensor, + CpuBuffer tensor, TRecvCallback callback) { - impl_->recv(std::move(descriptor), tensor, std::move(callback)); + impl_->recv(std::move(descriptor), std::move(tensor), std::move(callback)); } void Channel::Impl::recv( TDescriptor descriptor, - CpuTensor tensor, + CpuBuffer tensor, TRecvCallback callback) { loop_.deferToLoop([this, descriptor{std::move(descriptor)}, - tensor, + tensor{std::move(tensor)}, callback{std::move(callback)}]() mutable { - recvFromLoop_(std::move(descriptor), tensor, std::move(callback)); + recvFromLoop_( + std::move(descriptor), std::move(tensor), std::move(callback)); }); } void Channel::Impl::recvFromLoop_( TDescriptor descriptor, - CpuTensor tensor, + CpuBuffer tensor, TRecvCallback callback) { TP_DCHECK(loop_.inLoop()); diff --git a/tensorpipe/channel/basic/channel.h b/tensorpipe/channel/basic/channel.h index 08a4772f0..3dad03df6 100644 --- a/tensorpipe/channel/basic/channel.h +++ b/tensorpipe/channel/basic/channel.h @@ -12,7 +12,7 @@ #include #include -#include +#include namespace tensorpipe { namespace channel { @@ -32,12 +32,12 @@ class Channel : public channel::CpuChannel { // Send memory region to peer. void send( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) override; // Receive memory region from peer. - void recv(TDescriptor descriptor, CpuTensor tensor, TRecvCallback callback) + void recv(TDescriptor descriptor, CpuBuffer tensor, TRecvCallback callback) override; // Tell the channel what its identifier is. diff --git a/tensorpipe/channel/basic/context.h b/tensorpipe/channel/basic/context.h index 0433b4e30..ed8a346f4 100644 --- a/tensorpipe/channel/basic/context.h +++ b/tensorpipe/channel/basic/context.h @@ -13,7 +13,7 @@ #include #include -#include +#include namespace tensorpipe { namespace channel { diff --git a/tensorpipe/channel/channel.h b/tensorpipe/channel/channel.h index 08626de63..3a64d8206 100644 --- a/tensorpipe/channel/channel.h +++ b/tensorpipe/channel/channel.h @@ -13,7 +13,7 @@ #include #include -#include +#include #include // Channels are an out of band mechanism to transfer data between @@ -59,7 +59,7 @@ class Channel { public: // Send memory region to peer. virtual void send( - const TTensor tensor, + TTensor tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) = 0; @@ -83,10 +83,10 @@ class Channel { virtual ~Channel() = default; }; -using CpuChannel = Channel; +using CpuChannel = Channel; #if TENSORPIPE_HAS_CUDA -using CudaChannel = Channel; +using CudaChannel = Channel; #endif // TENSORPIPE_HAS_CUDA } // namespace channel diff --git a/tensorpipe/channel/cma/channel.cc b/tensorpipe/channel/cma/channel.cc index fb1fda805..e4ebbf5f3 100644 --- a/tensorpipe/channel/cma/channel.cc +++ b/tensorpipe/channel/cma/channel.cc @@ -55,11 +55,11 @@ class Channel::Impl : public std::enable_shared_from_this { void init(); void send( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); - void recv(TDescriptor descriptor, CpuTensor tensor, TRecvCallback callback); + void recv(TDescriptor descriptor, CpuBuffer tensor, TRecvCallback callback); // Tell the channel what its identifier is. void setId(std::string id); @@ -73,14 +73,14 @@ class Channel::Impl : public std::enable_shared_from_this { // Send memory region to peer. void sendFromLoop_( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); // Receive memory region from peer. void recvFromLoop_( TDescriptor descriptor, - CpuTensor tensor, + CpuBuffer tensor, TRecvCallback callback); void setIdFromLoop_(std::string id); @@ -151,26 +151,28 @@ void Channel::Impl::initFromLoop_() { } void Channel::send( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { - impl_->send(tensor, std::move(descriptorCallback), std::move(callback)); + impl_->send( + std::move(tensor), std::move(descriptorCallback), std::move(callback)); } void Channel::Impl::send( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { loop_.deferToLoop([this, - tensor, + tensor{std::move(tensor)}, descriptorCallback{std::move(descriptorCallback)}, callback{std::move(callback)}]() mutable { - sendFromLoop_(tensor, std::move(descriptorCallback), std::move(callback)); + sendFromLoop_( + std::move(tensor), std::move(descriptorCallback), std::move(callback)); }); } void Channel::Impl::sendFromLoop_( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { TP_DCHECK(loop_.inLoop()); @@ -232,26 +234,27 @@ void Channel::Impl::sendFromLoop_( // Receive memory region from peer. void Channel::recv( TDescriptor descriptor, - CpuTensor tensor, + CpuBuffer tensor, TRecvCallback callback) { - impl_->recv(std::move(descriptor), tensor, std::move(callback)); + impl_->recv(std::move(descriptor), std::move(tensor), std::move(callback)); } void Channel::Impl::recv( TDescriptor descriptor, - CpuTensor tensor, + CpuBuffer tensor, TRecvCallback callback) { loop_.deferToLoop([this, descriptor{std::move(descriptor)}, - tensor, + tensor{std::move(tensor)}, callback{std::move(callback)}]() mutable { - recvFromLoop_(std::move(descriptor), tensor, std::move(callback)); + recvFromLoop_( + std::move(descriptor), std::move(tensor), std::move(callback)); }); } void Channel::Impl::recvFromLoop_( TDescriptor descriptor, - CpuTensor tensor, + CpuBuffer tensor, TRecvCallback callback) { TP_DCHECK(loop_.inLoop()); diff --git a/tensorpipe/channel/cma/channel.h b/tensorpipe/channel/cma/channel.h index 8e29e8f86..c179e091c 100644 --- a/tensorpipe/channel/cma/channel.h +++ b/tensorpipe/channel/cma/channel.h @@ -31,12 +31,12 @@ class Channel : public channel::CpuChannel { // Send memory region to peer. void send( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) override; // Receive memory region from peer. - void recv(TDescriptor descriptor, CpuTensor tensor, TRecvCallback callback) + void recv(TDescriptor descriptor, CpuBuffer tensor, TRecvCallback callback) override; // Tell the channel what its identifier is. diff --git a/tensorpipe/channel/cma/context.h b/tensorpipe/channel/cma/context.h index 1e7bee161..f3b79de6a 100644 --- a/tensorpipe/channel/cma/context.h +++ b/tensorpipe/channel/cma/context.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include namespace tensorpipe { namespace channel { diff --git a/tensorpipe/channel/context.h b/tensorpipe/channel/context.h index f5540d02b..c5e820971 100644 --- a/tensorpipe/channel/context.h +++ b/tensorpipe/channel/context.h @@ -67,10 +67,10 @@ class Context { std::string name_; }; -using CpuContext = Context; +using CpuContext = Context; #if TENSORPIPE_HAS_CUDA -using CudaContext = Context; +using CudaContext = Context; #endif // TENSORPIPE_HAS_CUDA } // namespace channel diff --git a/tensorpipe/channel/cuda_ipc/channel.cc b/tensorpipe/channel/cuda_ipc/channel.cc index c63c63158..82725e394 100644 --- a/tensorpipe/channel/cuda_ipc/channel.cc +++ b/tensorpipe/channel/cuda_ipc/channel.cc @@ -197,11 +197,11 @@ class Channel::Impl : public std::enable_shared_from_this { void init(); void send( - const CudaTensor tensor, + CudaBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); - void recv(TDescriptor descriptor, CudaTensor tensor, TRecvCallback callback); + void recv(TDescriptor descriptor, CudaBuffer tensor, TRecvCallback callback); // Tell the channel what its identifier is. void setId(std::string id); @@ -215,14 +215,14 @@ class Channel::Impl : public std::enable_shared_from_this { // Send memory region to peer. void sendFromLoop_( - const CudaTensor tensor, + CudaBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); // Receive memory region from peer. void recvFromLoop_( TDescriptor descriptor, - CudaTensor tensor, + CudaBuffer tensor, TRecvCallback callback); void readPackets_(); @@ -301,26 +301,28 @@ void Channel::Impl::initFromLoop_() { } void Channel::send( - const CudaTensor tensor, + CudaBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { - impl_->send(tensor, std::move(descriptorCallback), std::move(callback)); + impl_->send( + std::move(tensor), std::move(descriptorCallback), std::move(callback)); } void Channel::Impl::send( - const CudaTensor tensor, + CudaBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { loop_.deferToLoop([this, - tensor, + tensor{std::move(tensor)}, descriptorCallback{std::move(descriptorCallback)}, callback{std::move(callback)}]() mutable { - sendFromLoop_(tensor, std::move(descriptorCallback), std::move(callback)); + sendFromLoop_( + std::move(tensor), std::move(descriptorCallback), std::move(callback)); }); } void Channel::Impl::sendFromLoop_( - const CudaTensor tensor, + CudaBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { TP_DCHECK(loop_.inLoop()); @@ -368,14 +370,14 @@ void Channel::Impl::sendFromLoop_( // Receive memory region from peer. void Channel::recv( TDescriptor descriptor, - CudaTensor tensor, + CudaBuffer tensor, TRecvCallback callback) { impl_->recv(std::move(descriptor), std::move(tensor), std::move(callback)); } void Channel::Impl::recv( TDescriptor descriptor, - CudaTensor tensor, + CudaBuffer tensor, TRecvCallback callback) { loop_.deferToLoop([this, descriptor{std::move(descriptor)}, @@ -388,7 +390,7 @@ void Channel::Impl::recv( void Channel::Impl::recvFromLoop_( TDescriptor descriptor, - CudaTensor tensor, + CudaBuffer tensor, TRecvCallback callback) { TP_DCHECK(loop_.inLoop()); diff --git a/tensorpipe/channel/cuda_ipc/channel.h b/tensorpipe/channel/cuda_ipc/channel.h index e396e88ae..7f2df5b59 100644 --- a/tensorpipe/channel/cuda_ipc/channel.h +++ b/tensorpipe/channel/cuda_ipc/channel.h @@ -33,12 +33,12 @@ class Channel : public channel::CudaChannel { // Send memory region to peer. void send( - const CudaTensor tensor, + CudaBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) override; // Receive memory region from peer. - void recv(TDescriptor descriptor, CudaTensor tensor, TRecvCallback callback) + void recv(TDescriptor descriptor, CudaBuffer tensor, TRecvCallback callback) override; // Tell the channel what its identifier is. diff --git a/tensorpipe/channel/mpt/channel.cc b/tensorpipe/channel/mpt/channel.cc index 9a8f533dc..20abcf32b 100644 --- a/tensorpipe/channel/mpt/channel.cc +++ b/tensorpipe/channel/mpt/channel.cc @@ -60,11 +60,11 @@ class Channel::Impl : public std::enable_shared_from_this { void init(); void send( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); - void recv(TDescriptor descriptor, CpuTensor tenssor, TRecvCallback callback); + void recv(TDescriptor descriptor, CpuBuffer tenssor, TRecvCallback callback); // Tell the channel what its identifier is. void setId(std::string id); @@ -82,13 +82,13 @@ class Channel::Impl : public std::enable_shared_from_this { void initFromLoop_(); void sendFromLoop_( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); void recvFromLoop_( TDescriptor descriptor, - CpuTensor tensor, + CpuBuffer tensor, TRecvCallback callback); void setIdFromLoop_(std::string id); @@ -252,26 +252,28 @@ void Channel::Impl::initFromLoop_() { } void Channel::send( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { - impl_->send(tensor, std::move(descriptorCallback), std::move(callback)); + impl_->send( + std::move(tensor), std::move(descriptorCallback), std::move(callback)); } void Channel::Impl::send( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { loop_.deferToLoop([this, - tensor, + tensor{std::move(tensor)}, descriptorCallback{std::move(descriptorCallback)}, callback{std::move(callback)}]() mutable { - sendFromLoop_(tensor, std::move(descriptorCallback), std::move(callback)); + sendFromLoop_( + std::move(tensor), std::move(descriptorCallback), std::move(callback)); }); } void Channel::Impl::sendFromLoop_( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { TP_DCHECK(loop_.inLoop()); @@ -324,26 +326,27 @@ void Channel::Impl::sendFromLoop_( void Channel::recv( TDescriptor descriptor, - CpuTensor tensor, + CpuBuffer tensor, TRecvCallback callback) { - impl_->recv(std::move(descriptor), tensor, std::move(callback)); + impl_->recv(std::move(descriptor), std::move(tensor), std::move(callback)); } void Channel::Impl::recv( TDescriptor descriptor, - CpuTensor tensor, + CpuBuffer tensor, TRecvCallback callback) { loop_.deferToLoop([this, descriptor{std::move(descriptor)}, - tensor, + tensor{std::move(tensor)}, callback{std::move(callback)}]() mutable { - recvFromLoop_(std::move(descriptor), tensor, std::move(callback)); + recvFromLoop_( + std::move(descriptor), std::move(tensor), std::move(callback)); }); } void Channel::Impl::recvFromLoop_( TDescriptor descriptor, - CpuTensor tensor, + CpuBuffer tensor, TRecvCallback callback) { TP_DCHECK(loop_.inLoop()); diff --git a/tensorpipe/channel/mpt/channel.h b/tensorpipe/channel/mpt/channel.h index 2b90e9042..a5bd57337 100644 --- a/tensorpipe/channel/mpt/channel.h +++ b/tensorpipe/channel/mpt/channel.h @@ -34,12 +34,12 @@ class Channel : public channel::CpuChannel { // Send memory region to peer. void send( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) override; // Receive memory region from peer. - void recv(TDescriptor descriptor, CpuTensor tensor, TRecvCallback callback) + void recv(TDescriptor descriptor, CpuBuffer tensor, TRecvCallback callback) override; // Tell the channel what its identifier is. diff --git a/tensorpipe/channel/mpt/context.h b/tensorpipe/channel/mpt/context.h index 95419446d..a15fb2bfa 100644 --- a/tensorpipe/channel/mpt/context.h +++ b/tensorpipe/channel/mpt/context.h @@ -14,7 +14,7 @@ #include #include -#include +#include #include namespace tensorpipe { diff --git a/tensorpipe/channel/registry.h b/tensorpipe/channel/registry.h index e67e54069..07291bd4d 100644 --- a/tensorpipe/channel/registry.h +++ b/tensorpipe/channel/registry.h @@ -9,7 +9,7 @@ #pragma once #include -#include +#include #include TP_DECLARE_SHARED_REGISTRY( diff --git a/tensorpipe/channel/xth/channel.cc b/tensorpipe/channel/xth/channel.cc index 2eb731add..6ab9d09d8 100644 --- a/tensorpipe/channel/xth/channel.cc +++ b/tensorpipe/channel/xth/channel.cc @@ -42,11 +42,11 @@ class Channel::Impl : public std::enable_shared_from_this { void init(); void send( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); - void recv(TDescriptor descriptor, CpuTensor tensor, TRecvCallback callback); + void recv(TDescriptor descriptor, CpuBuffer tensor, TRecvCallback callback); // Tell the channel what its identifier is. void setId(std::string id); @@ -60,14 +60,14 @@ class Channel::Impl : public std::enable_shared_from_this { // Send memory region to peer. void sendFromLoop_( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback); // Receive memory region from peer. void recvFromLoop_( TDescriptor descriptor, - CpuTensor tensor, + CpuBuffer tensor, TRecvCallback callback); void setIdFromLoop_(std::string id); @@ -138,26 +138,28 @@ void Channel::Impl::initFromLoop_() { } void Channel::send( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { - impl_->send(tensor, std::move(descriptorCallback), std::move(callback)); + impl_->send( + std::move(tensor), std::move(descriptorCallback), std::move(callback)); } void Channel::Impl::send( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { loop_.deferToLoop([this, - tensor, + tensor{std::move(tensor)}, descriptorCallback{std::move(descriptorCallback)}, callback{std::move(callback)}]() mutable { - sendFromLoop_(tensor, std::move(descriptorCallback), std::move(callback)); + sendFromLoop_( + std::move(tensor), std::move(descriptorCallback), std::move(callback)); }); } void Channel::Impl::sendFromLoop_( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) { TP_DCHECK(loop_.inLoop()); @@ -217,18 +219,18 @@ void Channel::Impl::sendFromLoop_( // Receive memory region from peer. void Channel::recv( TDescriptor descriptor, - CpuTensor tensor, + CpuBuffer tensor, TRecvCallback callback) { impl_->recv(std::move(descriptor), tensor, std::move(callback)); } void Channel::Impl::recv( TDescriptor descriptor, - CpuTensor tensor, + CpuBuffer tensor, TRecvCallback callback) { loop_.deferToLoop([this, descriptor{std::move(descriptor)}, - tensor, + tensor{std::move(tensor)}, callback{std::move(callback)}]() mutable { recvFromLoop_(std::move(descriptor), tensor, std::move(callback)); }); @@ -236,7 +238,7 @@ void Channel::Impl::recv( void Channel::Impl::recvFromLoop_( TDescriptor descriptor, - CpuTensor tensor, + CpuBuffer tensor, TRecvCallback callback) { TP_DCHECK(loop_.inLoop()); diff --git a/tensorpipe/channel/xth/channel.h b/tensorpipe/channel/xth/channel.h index 72ff5dfeb..8ecc165f1 100644 --- a/tensorpipe/channel/xth/channel.h +++ b/tensorpipe/channel/xth/channel.h @@ -31,12 +31,12 @@ class Channel : public channel::CpuChannel { // Send memory region to peer. void send( - const CpuTensor tensor, + CpuBuffer tensor, TDescriptorCallback descriptorCallback, TSendCallback callback) override; // Receive memory region from peer. - void recv(TDescriptor descriptor, CpuTensor tensor, TRecvCallback callback) + void recv(TDescriptor descriptor, CpuBuffer tensor, TRecvCallback callback) override; // Tell the channel what its identifier is. diff --git a/tensorpipe/channel/xth/context.h b/tensorpipe/channel/xth/context.h index 9d62c21a0..e5249f5cd 100644 --- a/tensorpipe/channel/xth/context.h +++ b/tensorpipe/channel/xth/context.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include namespace tensorpipe { namespace channel { diff --git a/tensorpipe/common/tensor.h b/tensorpipe/common/buffer.h similarity index 79% rename from tensorpipe/common/tensor.h rename to tensorpipe/common/buffer.h index 5173afe79..cf2563709 100644 --- a/tensorpipe/common/tensor.h +++ b/tensorpipe/common/buffer.h @@ -23,31 +23,31 @@ enum class DeviceType { #endif // TENSORPIPE_HAS_CUDA }; -struct CpuTensor { +struct CpuBuffer { void* ptr{nullptr}; size_t length{0}; }; #if TENSORPIPE_HAS_CUDA -struct CudaTensor { +struct CudaBuffer { void* ptr{nullptr}; size_t length{0}; cudaStream_t stream{cudaStreamDefault}; }; #endif // TENSORPIPE_HAS_CUDA -struct Tensor { - Tensor(CpuTensor t) : type(DeviceType::kCpu), cpu(t) {} +struct Buffer { + Buffer(CpuBuffer t) : type(DeviceType::kCpu), cpu(t) {} #if TENSORPIPE_HAS_CUDA - Tensor(CudaTensor t) : type(DeviceType::kCuda), cuda(t) {} + Buffer(CudaBuffer t) : type(DeviceType::kCuda), cuda(t) {} #endif // TENSORPIPE_HAS_CUDA DeviceType type; union { - CpuTensor cpu; + CpuBuffer cpu; #if TENSORPIPE_HAS_CUDA - CudaTensor cuda; + CudaBuffer cuda; #endif // TENSORPIPE_HAS_CUDA }; }; diff --git a/tensorpipe/core/context.h b/tensorpipe/core/context.h index 732d33770..4d6c76830 100644 --- a/tensorpipe/core/context.h +++ b/tensorpipe/core/context.h @@ -16,7 +16,7 @@ #include #include -#include +#include #include namespace tensorpipe { diff --git a/tensorpipe/core/message.h b/tensorpipe/core/message.h index cc9075fac..d07837df4 100644 --- a/tensorpipe/core/message.h +++ b/tensorpipe/core/message.h @@ -13,7 +13,7 @@ #include #include -#include +#include namespace tensorpipe { @@ -51,12 +51,12 @@ class Message final { std::vector payloads; struct Tensor { - tensorpipe::Tensor tensor; + tensorpipe::Buffer buffer; // Users may include arbitrary metadata in the following field. // This may contain allocation hints for the receiver, for example. std::string metadata; - // The following fields are marked for deprecation. Use `tensor` instead. + // The following fields are marked for deprecation. Use `buffer` instead. void* data{nullptr}; size_t length{0}; }; diff --git a/tensorpipe/core/nop_types.h b/tensorpipe/core/nop_types.h index 4a0aebc3e..7158d15fc 100644 --- a/tensorpipe/core/nop_types.h +++ b/tensorpipe/core/nop_types.h @@ -16,7 +16,7 @@ #include #include -#include +#include namespace tensorpipe { diff --git a/tensorpipe/core/pipe.cc b/tensorpipe/core/pipe.cc index 73d8bbe43..ea043bbbe 100644 --- a/tensorpipe/core/pipe.cc +++ b/tensorpipe/core/pipe.cc @@ -86,15 +86,15 @@ void parseDescriptorOfMessage(ReadOperation& op, const Packet& nopPacketIn) { for (const auto& nopTensorDescriptor : nopMessageDescriptor.tensorDescriptors) { Message::Tensor tensor{ - .tensor = - CpuTensor{ + .buffer = + CpuBuffer{ .ptr = nullptr, .length = static_cast(nopTensorDescriptor.sizeInBytes), }, .metadata = nopTensorDescriptor.metadata, }; ReadOperation::Tensor tensorBeingAllocated; - tensorBeingAllocated.length = tensor.tensor.cpu.length; + tensorBeingAllocated.length = tensor.buffer.cpu.length; tensorBeingAllocated.channelName = nopTensorDescriptor.channelName; // FIXME If the nop object wasn't const we could move the string out... tensorBeingAllocated.descriptor = nopTensorDescriptor.channelDescriptor; @@ -123,7 +123,7 @@ void checkAllocationCompatibility( const Message::Tensor& tensor = message.tensors[tensorIdx]; const ReadOperation::Tensor& tensorBeingAllocated = op.tensors[tensorIdx]; TP_DCHECK_GE(tensorBeingAllocated.length, 0); - TP_THROW_ASSERT_IF(tensor.tensor.cpu.length != tensorBeingAllocated.length); + TP_THROW_ASSERT_IF(tensor.buffer.cpu.length != tensorBeingAllocated.length); } } @@ -187,7 +187,7 @@ std::shared_ptr> makeDescriptorForMessage( MessageDescriptor::TensorDescriptor& nopTensorDescriptor = nopMessageDescriptor.tensorDescriptors.back(); nopTensorDescriptor.deviceType = DeviceType::kCpu; - nopTensorDescriptor.sizeInBytes = tensor.tensor.cpu.length; + nopTensorDescriptor.sizeInBytes = tensor.buffer.cpu.length; nopTensorDescriptor.metadata = tensor.metadata; nopTensorDescriptor.channelName = otherTensor.channelName; // FIXME In principle we could move here. @@ -682,8 +682,8 @@ void Pipe::Impl::readPayloadsAndReceiveTensorsOfMessage(ReadOperation& op) { // Temporary workaround until tensor.data/tensor.length are removed. auto cpu_tensor = (tensor.data == nullptr) - ? tensor.tensor.cpu - : CpuTensor{.ptr = tensor.data, .length = tensor.length}; + ? tensor.buffer.cpu + : CpuBuffer{.ptr = tensor.data, .length = tensor.length}; channel->recv( std::move(tensorBeingAllocated.descriptor), cpu_tensor, @@ -1093,8 +1093,8 @@ void Pipe::Impl::sendTensorsOfMessage_(WriteOperation& op) { // Temporary workaround until tensor.data/tensor.length are removed. auto cpu_tensor = (tensor.data == nullptr) - ? tensor.tensor.cpu - : CpuTensor{.ptr = tensor.data, .length = tensor.length}; + ? tensor.buffer.cpu + : CpuBuffer{.ptr = tensor.data, .length = tensor.length}; channel.send( cpu_tensor, eagerCallbackWrapper_( diff --git a/tensorpipe/python/tensorpipe.cc b/tensorpipe/python/tensorpipe.cc index d63181332..38b2b958b 100644 --- a/tensorpipe/python/tensorpipe.cc +++ b/tensorpipe/python/tensorpipe.cc @@ -115,7 +115,7 @@ tensorpipe::Message prepareToWrite(std::shared_ptr pyMessage) { tpMessage.tensors.reserve(pyMessage->tensors.size()); for (const auto& pyTensor : pyMessage->tensors) { tensorpipe::Message::Tensor tpTensor{ - tensorpipe::CpuTensor{pyTensor->buffer.ptr(), + tensorpipe::CpuBuffer{pyTensor->buffer.ptr(), pyTensor->buffer.length()}, {reinterpret_cast(pyTensor->metadata.ptr()), pyTensor->metadata.length()}}; @@ -208,7 +208,7 @@ tensorpipe::Message prepareToRead(std::shared_ptr pyMessage) { tpMessage.tensors.reserve(pyMessage->tensors.size()); for (const auto& pyTensor : pyMessage->tensors) { TP_THROW_ASSERT_IF(!pyTensor->buffer.has_value()) << "No buffer"; - tensorpipe::Message::Tensor tpTensor{tensorpipe::CpuTensor{ + tensorpipe::Message::Tensor tpTensor{tensorpipe::CpuBuffer{ pyTensor->buffer.value().ptr(), pyTensor->buffer.value().length()}}; tpMessage.tensors.push_back(std::move(tpTensor)); } diff --git a/tensorpipe/test/channel/channel_test.cc b/tensorpipe/test/channel/channel_test.cc index 6bb3fa8ee..b49b890f7 100644 --- a/tensorpipe/test/channel/channel_test.cc +++ b/tensorpipe/test/channel/channel_test.cc @@ -39,7 +39,7 @@ TEST_P(ChannelTest, ClientToServer) { std::future> descriptorFuture; std::future sendFuture; std::tie(descriptorFuture, sendFuture) = - sendWithFuture(channel, CpuTensor{data.data(), data.size()}); + sendWithFuture(channel, CpuBuffer{data.data(), data.size()}); Error descriptorError; TDescriptor descriptor; std::tie(descriptorError, descriptor) = descriptorFuture.get(); @@ -62,7 +62,7 @@ TEST_P(ChannelTest, ClientToServer) { // Perform recv and wait for completion. auto descriptor = peers_->recv(PeerGroup::kClient); std::future recvFuture = recvWithFuture( - channel, descriptor, CpuTensor{data.data(), dataSize}); + channel, descriptor, CpuBuffer{data.data(), data.size()}); Error recvError = recvFuture.get(); EXPECT_FALSE(recvError) << recvError.what(); @@ -91,7 +91,7 @@ TEST_P(ChannelTest, ServerToClient) { // Perform recv and wait for completion. auto descriptor = peers_->recv(PeerGroup::kServer); std::future recvFuture = recvWithFuture( - channel, descriptor, CpuTensor{data.data(), data.size()}); + channel, descriptor, CpuBuffer{data.data(), data.size()}); Error recvError = recvFuture.get(); EXPECT_FALSE(recvError) << recvError.what(); @@ -117,7 +117,7 @@ TEST_P(ChannelTest, ServerToClient) { std::future> descriptorFuture; std::future sendFuture; std::tie(descriptorFuture, sendFuture) = - sendWithFuture(channel, CpuTensor{data.data(), data.size()}); + sendWithFuture(channel, CpuBuffer{data.data(), data.size()}); Error descriptorError; TDescriptor descriptor; std::tie(descriptorError, descriptor) = descriptorFuture.get(); @@ -154,7 +154,7 @@ TEST_P(ChannelTest, SendMultipleTensors) { std::future> descriptorFuture; std::future sendFuture; std::tie(descriptorFuture, sendFuture) = - sendWithFuture(channel, CpuTensor{data.data(), data.size()}); + sendWithFuture(channel, CpuBuffer{data.data(), data.size()}); Error descriptorError; TDescriptor descriptor; std::tie(descriptorError, descriptor) = descriptorFuture.get(); @@ -186,7 +186,7 @@ TEST_P(ChannelTest, SendMultipleTensors) { for (int i = 0; i < numTensors; i++) { auto descriptor = peers_->recv(PeerGroup::kClient); std::future recvFuture = recvWithFuture( - channel, descriptor, CpuTensor{dataVec[i].data(), dataSize}); + channel, descriptor, CpuBuffer{dataVec[i].data(), dataSize}); recvFutures.push_back(std::move(recvFuture)); } for (auto& recvFuture : recvFutures) { @@ -231,7 +231,7 @@ TEST_P(ChannelTest, SendTensorsBothWays) { std::future> descriptorFuture; std::tie(descriptorFuture, sendFuture) = sendWithFuture( channel, - CpuTensor{ + CpuBuffer{ .ptr = sendData.data(), .length = sendData.size(), }); @@ -248,7 +248,7 @@ TEST_P(ChannelTest, SendTensorsBothWays) { recvFuture = recvWithFuture( channel, descriptor, - CpuTensor{ + CpuBuffer{ .ptr = recvData.data(), .length = recvData.size(), }); @@ -289,7 +289,7 @@ TEST_P(ChannelTest, SendTensorsBothWays) { std::future> descriptorFuture; std::tie(descriptorFuture, sendFuture) = sendWithFuture( channel, - CpuTensor{ + CpuBuffer{ .ptr = sendData.data(), .length = sendData.size(), }); @@ -306,7 +306,7 @@ TEST_P(ChannelTest, SendTensorsBothWays) { recvFuture = recvWithFuture( channel, descriptor, - CpuTensor{ + CpuBuffer{ .ptr = recvData.data(), .length = recvData.size(), }); @@ -342,7 +342,7 @@ TEST_P(ChannelTest, NullPointer) { std::future> descriptorFuture; std::future sendFuture; std::tie(descriptorFuture, sendFuture) = - sendWithFuture(channel, CpuTensor{nullptr, 0}); + sendWithFuture(channel, CpuBuffer{nullptr, 0}); Error descriptorError; TDescriptor descriptor; std::tie(descriptorError, descriptor) = descriptorFuture.get(); @@ -363,7 +363,7 @@ TEST_P(ChannelTest, NullPointer) { // Perform recv and wait for completion. auto descriptor = peers_->recv(PeerGroup::kClient); std::future recvFuture = - recvWithFuture(channel, descriptor, CpuTensor{nullptr, 0}); + recvWithFuture(channel, descriptor, CpuBuffer{nullptr, 0}); Error recvError = recvFuture.get(); EXPECT_FALSE(recvError) << recvError.what(); @@ -389,7 +389,7 @@ TEST_P(ChannelTest, EmptyTensor) { std::future> descriptorFuture; std::future sendFuture; std::tie(descriptorFuture, sendFuture) = - sendWithFuture(channel, CpuTensor{data.data(), 0}); + sendWithFuture(channel, CpuBuffer{data.data(), 0}); Error descriptorError; TDescriptor descriptor; std::tie(descriptorError, descriptor) = descriptorFuture.get(); @@ -413,7 +413,7 @@ TEST_P(ChannelTest, EmptyTensor) { // Perform recv and wait for completion. auto descriptor = peers_->recv(PeerGroup::kClient); std::future recvFuture = - recvWithFuture(channel, descriptor, CpuTensor{data.data(), 0}); + recvWithFuture(channel, descriptor, CpuBuffer{data.data(), 0}); Error recvError = recvFuture.get(); EXPECT_FALSE(recvError) << recvError.what(); @@ -465,7 +465,7 @@ TEST_P(ChannelTest, CallbacksAreDeferred) { // Initialize with sequential values. std::vector data(dataSize); std::iota(data.begin(), data.end(), 0); - auto buffer = helper_->makeBuffer(dataSize); + auto buffer = helper_->makeBuffer(data.size()); buffer->wrap(data.data()); // Perform send and wait for completion. @@ -474,7 +474,7 @@ TEST_P(ChannelTest, CallbacksAreDeferred) { std::mutex mutex; std::unique_lock callerLock(mutex); channel->send( - CpuTensor{data.data(), data.size()}, + CpuBuffer{data.data(), data.size()}, [&descriptorPromise](const Error& error, TDescriptor descriptor) { descriptorPromise.set_value( std::make_tuple(error, std::move(descriptor))); @@ -513,7 +513,7 @@ TEST_P(ChannelTest, CallbacksAreDeferred) { auto descriptor = peers_->recv(PeerGroup::kClient); channel->recv( descriptor, - CpuTensor{data.data(), data.size()}, + CpuBuffer{data.data(), data.size()}, [&recvPromise, &mutex](const Error& error) { std::unique_lock calleeLock(mutex); recvPromise.set_value(error); diff --git a/tensorpipe/test/channel/channel_test.h b/tensorpipe/test/channel/channel_test.h index 56a8325f5..202b2d11e 100644 --- a/tensorpipe/test/channel/channel_test.h +++ b/tensorpipe/test/channel/channel_test.h @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include @@ -129,7 +129,7 @@ class ChannelTest : public ::testing::TestWithParam { std::future> sendWithFuture( std::shared_ptr channel, - const tensorpipe::CpuTensor& tensor) { + const tensorpipe::CpuBuffer& tensor) { auto descriptorPromise = std::make_shared< std::promise>>(); auto promise = std::make_shared>(); @@ -152,7 +152,7 @@ class ChannelTest : public ::testing::TestWithParam { [[nodiscard]] std::future recvWithFuture( std::shared_ptr channel, tensorpipe::channel::TDescriptor descriptor, - const tensorpipe::CpuTensor& tensor) { + const tensorpipe::CpuBuffer& tensor) { auto promise = std::make_shared>(); auto future = promise->get_future(); diff --git a/tensorpipe/test/core/context_test.cc b/tensorpipe/test/core/context_test.cc index ef979caab..10a9789bc 100644 --- a/tensorpipe/test/core/context_test.cc +++ b/tensorpipe/test/core/context_test.cc @@ -78,10 +78,10 @@ ::testing::AssertionResult messagesAreEqual( } for (size_t idx = 0; idx < m1.tensors.size(); idx++) { EXPECT_TRUE(buffersAreEqual( - m1.tensors[idx].tensor.cpu.ptr, - m1.tensors[idx].tensor.cpu.length, - m2.tensors[idx].tensor.cpu.ptr, - m2.tensors[idx].tensor.cpu.length)); + m1.tensors[idx].buffer.cpu.ptr, + m1.tensors[idx].buffer.cpu.length, + m2.tensors[idx].buffer.cpu.ptr, + m2.tensors[idx].buffer.cpu.length)); } return ::testing::AssertionSuccess(); } @@ -100,7 +100,7 @@ Message makeMessage(int numPayloads, int numTensors) { } for (int i = 0; i < numTensors; i++) { Message::Tensor tensor{ - .tensor = CpuTensor{ + .buffer = CpuBuffer{ reinterpret_cast(const_cast(kTensorData.data())), kTensorData.length()}}; message.tensors.push_back(std::move(tensor)); @@ -191,8 +191,8 @@ TEST(Context, ClientPingSerial) { buffers.push_back(std::move(payloadData)); } for (auto& tensor : message.tensors) { - auto tensorData = std::make_unique(tensor.tensor.cpu.length); - tensor.tensor.cpu.ptr = tensorData.get(); + auto tensorData = std::make_unique(tensor.buffer.cpu.length); + tensor.buffer.cpu.ptr = tensorData.get(); buffers.push_back(std::move(tensorData)); } @@ -260,8 +260,8 @@ TEST(Context, ClientPingInline) { buffers.push_back(std::move(payloadData)); } for (auto& tensor : message.tensors) { - auto tensorData = std::make_unique(tensor.tensor.cpu.length); - tensor.tensor.cpu.ptr = tensorData.get(); + auto tensorData = std::make_unique(tensor.buffer.cpu.length); + tensor.buffer.cpu.ptr = tensorData.get(); buffers.push_back(std::move(tensorData)); } serverPipe->read( @@ -361,8 +361,8 @@ TEST(Context, ServerPingPongTwice) { } for (auto& tensor : message.tensors) { auto tensorData = - std::make_unique(tensor.tensor.cpu.length); - tensor.tensor.cpu.ptr = tensorData.get(); + std::make_unique(tensor.buffer.cpu.length); + tensor.buffer.cpu.ptr = tensorData.get(); buffers.push_back(std::move(tensorData)); } serverPipe->read( @@ -405,8 +405,8 @@ TEST(Context, ServerPingPongTwice) { buffers.push_back(std::move(payloadData)); } for (auto& tensor : message.tensors) { - auto tensorData = std::make_unique(tensor.tensor.cpu.length); - tensor.tensor.cpu.ptr = tensorData.get(); + auto tensorData = std::make_unique(tensor.buffer.cpu.length); + tensor.buffer.cpu.ptr = tensorData.get(); buffers.push_back(std::move(tensorData)); } clientPipe->read( @@ -459,8 +459,8 @@ static void pipeRead( buffers.push_back(std::move(payloadData)); } for (auto& tensor : message.tensors) { - auto tensorData = std::make_unique(tensor.tensor.cpu.length); - tensor.tensor.cpu.ptr = tensorData.get(); + auto tensorData = std::make_unique(tensor.buffer.cpu.length); + tensor.buffer.cpu.ptr = tensorData.get(); buffers.push_back(std::move(tensorData)); } pipe->read( From 535db88b1e881a404ba43703a961aa83a76b2704 Mon Sep 17 00:00:00 2001 From: Lucas Hosseini Date: Thu, 10 Sep 2020 20:07:44 +0200 Subject: [PATCH 07/12] Update on "Make Channel API accept tensor structs rather than raw pointers." + Introduce tensor.h defining the tensor struct(s). The `CpuTensor` struct is always defined, while the `CudaTensor` struct is defined only when `TENSORPIPE_HAS_CUDA` is true. + Update all channels to take a `CpuTensor` or `CudaTensor` for `send`/`recv` rather than a raw pointer and a length. + Make the base `Channel`/`Context` classes templated on `TTensor`, effectively creating two channel hierarchies (one for CPU channels, one for CUDA channels). + Update the Pipe and the generic channel tests to use the new API. So far, generic channel tests are CPU only, and tests for the CUDA IPC channel are (temporarily) disabled. A subsequent PR will take care of refactoring tests so that generic tests work for CUDA channels. An other PR will add support for CUDA tensors in the Pipe. Differential Revision: [D23598033](https://our.internmc.facebook.com/intern/diff/D23598033) [ghstack-poisoned] --- tensorpipe/channel/basic/context.h | 2 +- tensorpipe/channel/channel.h | 2 +- tensorpipe/channel/cma/context.h | 2 +- tensorpipe/channel/mpt/context.h | 2 +- tensorpipe/channel/xth/context.h | 2 +- tensorpipe/core/context.h | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorpipe/channel/basic/context.h b/tensorpipe/channel/basic/context.h index ed8a346f4..a8cca20b3 100644 --- a/tensorpipe/channel/basic/context.h +++ b/tensorpipe/channel/basic/context.h @@ -12,8 +12,8 @@ #include #include -#include #include +#include namespace tensorpipe { namespace channel { diff --git a/tensorpipe/channel/channel.h b/tensorpipe/channel/channel.h index 3a64d8206..5529d67f4 100644 --- a/tensorpipe/channel/channel.h +++ b/tensorpipe/channel/channel.h @@ -12,8 +12,8 @@ #include #include -#include #include +#include #include // Channels are an out of band mechanism to transfer data between diff --git a/tensorpipe/channel/cma/context.h b/tensorpipe/channel/cma/context.h index f3b79de6a..07124a0d5 100644 --- a/tensorpipe/channel/cma/context.h +++ b/tensorpipe/channel/cma/context.h @@ -13,9 +13,9 @@ #include #include +#include #include #include -#include namespace tensorpipe { namespace channel { diff --git a/tensorpipe/channel/mpt/context.h b/tensorpipe/channel/mpt/context.h index a15fb2bfa..8872dea6c 100644 --- a/tensorpipe/channel/mpt/context.h +++ b/tensorpipe/channel/mpt/context.h @@ -13,8 +13,8 @@ #include #include -#include #include +#include #include namespace tensorpipe { diff --git a/tensorpipe/channel/xth/context.h b/tensorpipe/channel/xth/context.h index e5249f5cd..1769154fd 100644 --- a/tensorpipe/channel/xth/context.h +++ b/tensorpipe/channel/xth/context.h @@ -13,9 +13,9 @@ #include #include +#include #include #include -#include namespace tensorpipe { namespace channel { diff --git a/tensorpipe/core/context.h b/tensorpipe/core/context.h index 4d6c76830..b18d3ac6a 100644 --- a/tensorpipe/core/context.h +++ b/tensorpipe/core/context.h @@ -15,8 +15,8 @@ #include #include -#include #include +#include #include namespace tensorpipe { From b17935d05eeba35477b722cf80be0de4a2a7bfa0 Mon Sep 17 00:00:00 2001 From: Lucas Hosseini Date: Thu, 10 Sep 2020 20:09:18 +0200 Subject: [PATCH 08/12] Update on "Make Channel API accept tensor structs rather than raw pointers." + Introduce tensor.h defining the tensor struct(s). The `CpuTensor` struct is always defined, while the `CudaTensor` struct is defined only when `TENSORPIPE_HAS_CUDA` is true. + Update all channels to take a `CpuTensor` or `CudaTensor` for `send`/`recv` rather than a raw pointer and a length. + Make the base `Channel`/`Context` classes templated on `TTensor`, effectively creating two channel hierarchies (one for CPU channels, one for CUDA channels). + Update the Pipe and the generic channel tests to use the new API. So far, generic channel tests are CPU only, and tests for the CUDA IPC channel are (temporarily) disabled. A subsequent PR will take care of refactoring tests so that generic tests work for CUDA channels. An other PR will add support for CUDA tensors in the Pipe. Differential Revision: [D23598033](https://our.internmc.facebook.com/intern/diff/D23598033) [ghstack-poisoned] --- tensorpipe/python/tensorpipe.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorpipe/python/tensorpipe.cc b/tensorpipe/python/tensorpipe.cc index 38b2b958b..4e66bbe95 100644 --- a/tensorpipe/python/tensorpipe.cc +++ b/tensorpipe/python/tensorpipe.cc @@ -187,9 +187,9 @@ std::shared_ptr prepareToAllocate( std::vector> pyTensors; pyTensors.reserve(tpMessage.tensors.size()); for (const auto& tpTensor : tpMessage.tensors) { - TP_DCHECK(tpTensor.tensor.cpu.ptr == nullptr); + TP_DCHECK(tpTensor.buffer.cpu.ptr == nullptr); pyTensors.push_back(std::make_shared( - tpTensor.tensor.cpu.length, tpTensor.metadata)); + tpTensor.buffer.cpu.length, tpTensor.metadata)); } auto pyMessage = std::make_shared( tpMessage.metadata, std::move(pyPayloads), std::move(pyTensors)); From 275063ba0cace7da0a6b5a1b0579d4e2d429eec8 Mon Sep 17 00:00:00 2001 From: Lucas Hosseini Date: Thu, 10 Sep 2020 20:13:08 +0200 Subject: [PATCH 09/12] Update on "Make Channel API accept tensor structs rather than raw pointers." + Introduce tensor.h defining the tensor struct(s). The `CpuTensor` struct is always defined, while the `CudaTensor` struct is defined only when `TENSORPIPE_HAS_CUDA` is true. + Update all channels to take a `CpuTensor` or `CudaTensor` for `send`/`recv` rather than a raw pointer and a length. + Make the base `Channel`/`Context` classes templated on `TTensor`, effectively creating two channel hierarchies (one for CPU channels, one for CUDA channels). + Update the Pipe and the generic channel tests to use the new API. So far, generic channel tests are CPU only, and tests for the CUDA IPC channel are (temporarily) disabled. A subsequent PR will take care of refactoring tests so that generic tests work for CUDA channels. An other PR will add support for CUDA tensors in the Pipe. Differential Revision: [D23598033](https://our.internmc.facebook.com/intern/diff/D23598033) [ghstack-poisoned] --- tensorpipe/test/channel/channel_test.cc | 26 ++++--------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/tensorpipe/test/channel/channel_test.cc b/tensorpipe/test/channel/channel_test.cc index b49b890f7..d9170036a 100644 --- a/tensorpipe/test/channel/channel_test.cc +++ b/tensorpipe/test/channel/channel_test.cc @@ -230,11 +230,7 @@ TEST_P(ChannelTest, SendTensorsBothWays) { { std::future> descriptorFuture; std::tie(descriptorFuture, sendFuture) = sendWithFuture( - channel, - CpuBuffer{ - .ptr = sendData.data(), - .length = sendData.size(), - }); + channel, CpuBuffer{sendData.data(), sendData.size()}); Error descriptorError; TDescriptor descriptor; std::tie(descriptorError, descriptor) = descriptorFuture.get(); @@ -246,12 +242,7 @@ TEST_P(ChannelTest, SendTensorsBothWays) { { auto descriptor = peers_->recv(PeerGroup::kServer); recvFuture = recvWithFuture( - channel, - descriptor, - CpuBuffer{ - .ptr = recvData.data(), - .length = recvData.size(), - }); + channel, descriptor, CpuBuffer{recvData.data(), recvData.size()}); } // Wait for completion of both. @@ -288,11 +279,7 @@ TEST_P(ChannelTest, SendTensorsBothWays) { { std::future> descriptorFuture; std::tie(descriptorFuture, sendFuture) = sendWithFuture( - channel, - CpuBuffer{ - .ptr = sendData.data(), - .length = sendData.size(), - }); + channel, CpuBuffer{sendData.data(), sendData.size()}); Error descriptorError; TDescriptor descriptor; std::tie(descriptorError, descriptor) = descriptorFuture.get(); @@ -304,12 +291,7 @@ TEST_P(ChannelTest, SendTensorsBothWays) { { auto descriptor = peers_->recv(PeerGroup::kClient); recvFuture = recvWithFuture( - channel, - descriptor, - CpuBuffer{ - .ptr = recvData.data(), - .length = recvData.size(), - }); + channel, descriptor, CpuBuffer{recvData.data(), recvData.size()}); } // Wait for completion of both. From d17c87196d991e6dde373c1c817e7197ea590a40 Mon Sep 17 00:00:00 2001 From: Lucas Hosseini Date: Mon, 14 Sep 2020 18:22:19 +0200 Subject: [PATCH 10/12] Update on "Make Channel API accept buffer structs rather than raw pointers." + Introduce buffer.h defining the buffer struct(s). The `CpuBuffer` struct is always defined, while the `CudaBuffer` struct is defined only when `TENSORPIPE_SUPPORTS_CUDA` is true. + Update all channels to take a `CpuBuffer` or `CudaBuffer` for `send`/`recv` rather than a raw pointer and a length. + Make the base `Channel`/`Context` classes templated on `TBuffer`, effectively creating two channel hierarchies (one for CPU channels, one for CUDA channels). + Update the Pipe and the generic channel tests to use the new API. So far, generic channel tests are CPU only, and tests for the CUDA IPC channel are (temporarily) disabled. A subsequent PR will take care of refactoring tests so that generic tests work for CUDA channels. An other PR will add support for CUDA tensors in the Pipe. Differential Revision: [D23598033](https://our.internmc.facebook.com/intern/diff/D23598033) [ghstack-poisoned] --- tensorpipe/core/message.h | 1 + tensorpipe/test/channel/channel_test.h | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorpipe/core/message.h b/tensorpipe/core/message.h index d07837df4..939e895b8 100644 --- a/tensorpipe/core/message.h +++ b/tensorpipe/core/message.h @@ -52,6 +52,7 @@ class Message final { struct Tensor { tensorpipe::Buffer buffer; + // Users may include arbitrary metadata in the following field. // This may contain allocation hints for the receiver, for example. std::string metadata; diff --git a/tensorpipe/test/channel/channel_test.h b/tensorpipe/test/channel/channel_test.h index 202b2d11e..705f26e80 100644 --- a/tensorpipe/test/channel/channel_test.h +++ b/tensorpipe/test/channel/channel_test.h @@ -129,7 +129,7 @@ class ChannelTest : public ::testing::TestWithParam { std::future> sendWithFuture( std::shared_ptr channel, - const tensorpipe::CpuBuffer& tensor) { + const tensorpipe::CpuBuffer& buffer) { auto descriptorPromise = std::make_shared< std::promise>>(); auto promise = std::make_shared>(); @@ -137,7 +137,7 @@ class ChannelTest : public ::testing::TestWithParam { auto future = promise->get_future(); channel->send( - tensor, + buffer, [descriptorPromise{std::move(descriptorPromise)}]( const tensorpipe::Error& error, std::string descriptor) { descriptorPromise->set_value( @@ -152,13 +152,13 @@ class ChannelTest : public ::testing::TestWithParam { [[nodiscard]] std::future recvWithFuture( std::shared_ptr channel, tensorpipe::channel::TDescriptor descriptor, - const tensorpipe::CpuBuffer& tensor) { + const tensorpipe::CpuBuffer& buffer) { auto promise = std::make_shared>(); auto future = promise->get_future(); channel->recv( std::move(descriptor), - tensor, + buffer, [promise{std::move(promise)}](const tensorpipe::Error& error) { promise->set_value(error); }); From 02bff0520f93a63b035522eaf6a55c9145e98142 Mon Sep 17 00:00:00 2001 From: Lucas Hosseini Date: Tue, 15 Sep 2020 16:24:42 +0200 Subject: [PATCH 11/12] Update on "Make Channel API accept buffer structs rather than raw pointers." + Introduce buffer.h defining the buffer struct(s). The `CpuBuffer` struct is always defined, while the `CudaBuffer` struct is defined only when `TENSORPIPE_SUPPORTS_CUDA` is true. + Update all channels to take a `CpuBuffer` or `CudaBuffer` for `send`/`recv` rather than a raw pointer and a length. + Make the base `Channel`/`Context` classes templated on `TBuffer`, effectively creating two channel hierarchies (one for CPU channels, one for CUDA channels). + Update the Pipe and the generic channel tests to use the new API. So far, generic channel tests are CPU only, and tests for the CUDA IPC channel are (temporarily) disabled. A subsequent PR will take care of refactoring tests so that generic tests work for CUDA channels. An other PR will add support for CUDA tensors in the Pipe. Differential Revision: [D23598033](https://our.internmc.facebook.com/intern/diff/D23598033) [ghstack-poisoned] --- tensorpipe/channel/basic/channel.h | 3 +-- tensorpipe/channel/basic/context.h | 3 +-- tensorpipe/channel/channel.h | 7 ------- tensorpipe/channel/cma/channel.h | 2 +- tensorpipe/channel/cma/context.h | 3 +-- tensorpipe/channel/context.h | 6 ------ tensorpipe/channel/cpu_context.h | 22 ++++++++++++++++++++++ tensorpipe/channel/cuda_buffer.h | 22 ++++++++++++++++++++++ tensorpipe/channel/cuda_ipc/channel.h | 2 +- tensorpipe/channel/cuda_ipc/context.h | 2 +- tensorpipe/channel/mpt/channel.h | 2 +- tensorpipe/channel/mpt/context.cc | 1 - tensorpipe/channel/mpt/context.h | 3 +-- tensorpipe/channel/registry.h | 3 +-- tensorpipe/channel/xth/channel.h | 2 +- tensorpipe/channel/xth/context.h | 3 +-- tensorpipe/common/cpu_buffer.h | 20 ++++++++++++++++++++ tensorpipe/common/cuda_buffer.h | 23 +++++++++++++++++++++++ tensorpipe/{common => core}/buffer.h | 16 ++-------------- tensorpipe/core/context.h | 4 ++-- tensorpipe/core/message.h | 2 +- tensorpipe/core/nop_types.h | 2 +- tensorpipe/tensorpipe.h | 7 ++++++- tensorpipe/test/channel/channel_test.h | 3 +-- 24 files changed, 111 insertions(+), 52 deletions(-) create mode 100644 tensorpipe/channel/cpu_context.h create mode 100644 tensorpipe/channel/cuda_buffer.h create mode 100644 tensorpipe/common/cpu_buffer.h create mode 100644 tensorpipe/common/cuda_buffer.h rename tensorpipe/{common => core}/buffer.h (76%) diff --git a/tensorpipe/channel/basic/channel.h b/tensorpipe/channel/basic/channel.h index b64a342c3..d6723a47c 100644 --- a/tensorpipe/channel/basic/channel.h +++ b/tensorpipe/channel/basic/channel.h @@ -11,8 +11,7 @@ #include #include -#include -#include +#include namespace tensorpipe { namespace channel { diff --git a/tensorpipe/channel/basic/context.h b/tensorpipe/channel/basic/context.h index a8cca20b3..df11bde16 100644 --- a/tensorpipe/channel/basic/context.h +++ b/tensorpipe/channel/basic/context.h @@ -11,8 +11,7 @@ #include #include -#include -#include +#include #include namespace tensorpipe { diff --git a/tensorpipe/channel/channel.h b/tensorpipe/channel/channel.h index 769f2e1d2..95aa06f86 100644 --- a/tensorpipe/channel/channel.h +++ b/tensorpipe/channel/channel.h @@ -12,7 +12,6 @@ #include #include -#include #include #include @@ -83,11 +82,5 @@ class Channel { virtual ~Channel() = default; }; -using CpuChannel = Channel; - -#if TENSORPIPE_SUPPORTS_CUDA -using CudaChannel = Channel; -#endif // TENSORPIPE_SUPPORTS_CUDA - } // namespace channel } // namespace tensorpipe diff --git a/tensorpipe/channel/cma/channel.h b/tensorpipe/channel/cma/channel.h index 61b8dbeec..c2f0d5b0e 100644 --- a/tensorpipe/channel/cma/channel.h +++ b/tensorpipe/channel/cma/channel.h @@ -10,8 +10,8 @@ #include -#include #include +#include namespace tensorpipe { namespace channel { diff --git a/tensorpipe/channel/cma/context.h b/tensorpipe/channel/cma/context.h index 07124a0d5..b469d7037 100644 --- a/tensorpipe/channel/cma/context.h +++ b/tensorpipe/channel/cma/context.h @@ -12,8 +12,7 @@ #include #include -#include -#include +#include #include #include diff --git a/tensorpipe/channel/context.h b/tensorpipe/channel/context.h index cd0bcd40a..60736f5fe 100644 --- a/tensorpipe/channel/context.h +++ b/tensorpipe/channel/context.h @@ -67,11 +67,5 @@ class Context { std::string name_; }; -using CpuContext = Context; - -#if TENSORPIPE_SUPPORTS_CUDA -using CudaContext = Context; -#endif // TENSORPIPE_SUPPORTS_CUDA - } // namespace channel } // namespace tensorpipe diff --git a/tensorpipe/channel/cpu_context.h b/tensorpipe/channel/cpu_context.h new file mode 100644 index 000000000..9171e3e66 --- /dev/null +++ b/tensorpipe/channel/cpu_context.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +namespace tensorpipe { +namespace channel { + +using CpuChannel = Channel; +using CpuContext = Context; + +} // namespace channel +} // namespace tensorpipe diff --git a/tensorpipe/channel/cuda_buffer.h b/tensorpipe/channel/cuda_buffer.h new file mode 100644 index 000000000..f6fc1d989 --- /dev/null +++ b/tensorpipe/channel/cuda_buffer.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +namespace tensorpipe { +namespace channel { + +using CudaChannel = Channel; +using CudaContext = Context; + +} // namespace channel +} // namespace tensorpipe diff --git a/tensorpipe/channel/cuda_ipc/channel.h b/tensorpipe/channel/cuda_ipc/channel.h index 6675b9914..29cb57cbc 100644 --- a/tensorpipe/channel/cuda_ipc/channel.h +++ b/tensorpipe/channel/cuda_ipc/channel.h @@ -12,7 +12,7 @@ #include -#include +#include #include namespace tensorpipe { diff --git a/tensorpipe/channel/cuda_ipc/context.h b/tensorpipe/channel/cuda_ipc/context.h index 04d06be10..ea21ac916 100644 --- a/tensorpipe/channel/cuda_ipc/context.h +++ b/tensorpipe/channel/cuda_ipc/context.h @@ -12,7 +12,7 @@ #include #include -#include +#include #include #include diff --git a/tensorpipe/channel/mpt/channel.h b/tensorpipe/channel/mpt/channel.h index 99843844c..5437f095c 100644 --- a/tensorpipe/channel/mpt/channel.h +++ b/tensorpipe/channel/mpt/channel.h @@ -11,7 +11,7 @@ #include #include -#include +#include #include namespace tensorpipe { diff --git a/tensorpipe/channel/mpt/context.cc b/tensorpipe/channel/mpt/context.cc index 4c5366468..51d5e1822 100644 --- a/tensorpipe/channel/mpt/context.cc +++ b/tensorpipe/channel/mpt/context.cc @@ -13,7 +13,6 @@ #include #include -#include #include #include #include diff --git a/tensorpipe/channel/mpt/context.h b/tensorpipe/channel/mpt/context.h index 8872dea6c..49caf668d 100644 --- a/tensorpipe/channel/mpt/context.h +++ b/tensorpipe/channel/mpt/context.h @@ -12,8 +12,7 @@ #include #include -#include -#include +#include #include #include diff --git a/tensorpipe/channel/registry.h b/tensorpipe/channel/registry.h index 07291bd4d..72dc5e091 100644 --- a/tensorpipe/channel/registry.h +++ b/tensorpipe/channel/registry.h @@ -8,8 +8,7 @@ #pragma once -#include -#include +#include #include TP_DECLARE_SHARED_REGISTRY( diff --git a/tensorpipe/channel/xth/channel.h b/tensorpipe/channel/xth/channel.h index 22dad4c54..3f31cb883 100644 --- a/tensorpipe/channel/xth/channel.h +++ b/tensorpipe/channel/xth/channel.h @@ -10,7 +10,7 @@ #include -#include +#include #include namespace tensorpipe { diff --git a/tensorpipe/channel/xth/context.h b/tensorpipe/channel/xth/context.h index 1769154fd..78ecd6bf1 100644 --- a/tensorpipe/channel/xth/context.h +++ b/tensorpipe/channel/xth/context.h @@ -12,8 +12,7 @@ #include #include -#include -#include +#include #include #include diff --git a/tensorpipe/common/cpu_buffer.h b/tensorpipe/common/cpu_buffer.h new file mode 100644 index 000000000..96748191a --- /dev/null +++ b/tensorpipe/common/cpu_buffer.h @@ -0,0 +1,20 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace tensorpipe { + +struct CpuBuffer { + void* ptr{nullptr}; + size_t length{0}; +}; + +} // namespace tensorpipe diff --git a/tensorpipe/common/cuda_buffer.h b/tensorpipe/common/cuda_buffer.h new file mode 100644 index 000000000..412a9ad43 --- /dev/null +++ b/tensorpipe/common/cuda_buffer.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +#include + +namespace tensorpipe { + +struct CudaBuffer { + void* ptr{nullptr}; + size_t length{0}; + cudaStream_t stream{cudaStreamDefault}; +}; + +} // namespace tensorpipe diff --git a/tensorpipe/common/buffer.h b/tensorpipe/core/buffer.h similarity index 76% rename from tensorpipe/common/buffer.h rename to tensorpipe/core/buffer.h index 507c75582..7418b53aa 100644 --- a/tensorpipe/common/buffer.h +++ b/tensorpipe/core/buffer.h @@ -10,8 +10,9 @@ #include +#include #if TENSORPIPE_SUPPORTS_CUDA -#include +#include #endif // TENSORPIPE_SUPPORTS_CUDA namespace tensorpipe { @@ -23,19 +24,6 @@ enum class DeviceType { #endif // TENSORPIPE_SUPPORTS_CUDA }; -struct CpuBuffer { - void* ptr{nullptr}; - size_t length{0}; -}; - -#if TENSORPIPE_SUPPORTS_CUDA -struct CudaBuffer { - void* ptr{nullptr}; - size_t length{0}; - cudaStream_t stream{cudaStreamDefault}; -}; -#endif // TENSORPIPE_SUPPORTS_CUDA - struct Buffer { /* implicit */ Buffer(CpuBuffer t) : type(DeviceType::kCpu), cpu(t) {} diff --git a/tensorpipe/core/context.h b/tensorpipe/core/context.h index b18d3ac6a..54bc437e9 100644 --- a/tensorpipe/core/context.h +++ b/tensorpipe/core/context.h @@ -14,9 +14,9 @@ #include #include -#include -#include +#include #include +#include #include namespace tensorpipe { diff --git a/tensorpipe/core/message.h b/tensorpipe/core/message.h index 939e895b8..a4c08b065 100644 --- a/tensorpipe/core/message.h +++ b/tensorpipe/core/message.h @@ -13,7 +13,7 @@ #include #include -#include +#include namespace tensorpipe { diff --git a/tensorpipe/core/nop_types.h b/tensorpipe/core/nop_types.h index 7158d15fc..f5c2bf4b6 100644 --- a/tensorpipe/core/nop_types.h +++ b/tensorpipe/core/nop_types.h @@ -16,7 +16,7 @@ #include #include -#include +#include namespace tensorpipe { diff --git a/tensorpipe/tensorpipe.h b/tensorpipe/tensorpipe.h index 061037cae..2ed5b3862 100644 --- a/tensorpipe/tensorpipe.h +++ b/tensorpipe/tensorpipe.h @@ -12,6 +12,7 @@ // High-level API +#include #include #include #include @@ -32,7 +33,11 @@ // Channels -#include +#include +#if TENSORPIPE_SUPPORTS_CUDA +#include +#endif // TENSORPIPE_SUPPORTS_CUDA + #include #include diff --git a/tensorpipe/test/channel/channel_test.h b/tensorpipe/test/channel/channel_test.h index 705f26e80..2ba5bc3f2 100644 --- a/tensorpipe/test/channel/channel_test.h +++ b/tensorpipe/test/channel/channel_test.h @@ -15,8 +15,7 @@ #include -#include -#include +#include #include #include From 58d45cb86f0a0952cc435df5fce11c365c99519f Mon Sep 17 00:00:00 2001 From: Lucas Hosseini Date: Tue, 15 Sep 2020 17:21:33 +0200 Subject: [PATCH 12/12] Update on "Make Channel API accept buffer structs rather than raw pointers." + Introduce buffer.h defining the buffer struct(s). The `CpuBuffer` struct is always defined, while the `CudaBuffer` struct is defined only when `TENSORPIPE_SUPPORTS_CUDA` is true. + Update all channels to take a `CpuBuffer` or `CudaBuffer` for `send`/`recv` rather than a raw pointer and a length. + Make the base `Channel`/`Context` classes templated on `TBuffer`, effectively creating two channel hierarchies (one for CPU channels, one for CUDA channels). + Update the Pipe and the generic channel tests to use the new API. So far, generic channel tests are CPU only, and tests for the CUDA IPC channel are (temporarily) disabled. A subsequent PR will take care of refactoring tests so that generic tests work for CUDA channels. An other PR will add support for CUDA tensors in the Pipe. Differential Revision: [D23598033](https://our.internmc.facebook.com/intern/diff/D23598033) [ghstack-poisoned] --- tensorpipe/channel/{cuda_buffer.h => cuda_context.h} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tensorpipe/channel/{cuda_buffer.h => cuda_context.h} (100%) diff --git a/tensorpipe/channel/cuda_buffer.h b/tensorpipe/channel/cuda_context.h similarity index 100% rename from tensorpipe/channel/cuda_buffer.h rename to tensorpipe/channel/cuda_context.h