Skip to content

Commit

Permalink
Revert "Add dynamic buffer support to OCL Backend (#3765)" (#3784)
Browse files Browse the repository at this point in the history
Summary:
This reverts commit bd69664.

I had thought that I had gotten the last POCL issue in #3765, but I had not. Reverting to fix the OCL build.

Honestly this last issue (AMD/POCL requires sub buffers to aligned) seems to torpedo the whole idea, I can't think of any way to handle Glow TensorViews on the host - which means passing buffer + offset everywhere we pass a buffer below. Essentially this would mean rewriting the whole thing.

Very frustrating since that alignment restriction on subBuffers makes no sense, and no other OCL implementation has it.
Pull Request resolved: #3784

Differential Revision: D18480248

Pulled By: nickgg

fbshipit-source-id: 9b05009ea901a0f477805e6c946faac34d9bc303
  • Loading branch information
nickgg authored and facebook-github-bot committed Nov 13, 2019
1 parent eb48b31 commit 5dac6b2
Show file tree
Hide file tree
Showing 8 changed files with 490 additions and 232 deletions.
2 changes: 1 addition & 1 deletion .circleci/build.sh
Expand Up @@ -140,7 +140,7 @@ elif [[ "$CIRCLE_JOB" == "PYTORCH" ]]; then
cd build
elif [[ "$CIRCLE_JOB" == "OPENCL" ]]; then
install_pocl
CMAKE_ARGS+=("-DGLOW_WITH_OPENCL=ON" "-DGLOW_OPENCL_ALIGN=128")
CMAKE_ARGS+=("-DGLOW_WITH_OPENCL=ON")
else
CMAKE_ARGS+=("-DCMAKE_BUILD_TYPE=Debug")
if [[ "${CIRCLE_JOB}" == "SHARED" ]]; then
Expand Down
220 changes: 93 additions & 127 deletions lib/Backends/OpenCL/OpenCL.cpp

Large diffs are not rendered by default.

18 changes: 5 additions & 13 deletions lib/Backends/OpenCL/OpenCL.h
Expand Up @@ -145,7 +145,8 @@ class OpenCLFunction final : public CompiledFunction {
/// Fill the device \p buffer with a given \p value.
/// \param len number of buffer elements to be filled by the \p value.
/// Elements are considered to be of the type described by \p elemKind.
void fillBuffer(cl_mem buffer, uint64_t len, float value, ElemKind elemKind,
void fillBuffer(cl_mem buffer, uint64_t start, uint64_t len, float value,
ElemKind elemKind,
runtime::OpenCLDeviceBindings *devBindings);

/// Execution a convolution instruction which uses NCHW format.
Expand Down Expand Up @@ -243,13 +244,10 @@ namespace runtime {
/// device specific information used to run a compiled function on a specific
/// device.
struct OpenCLDeviceBindings : DeviceBindings {
OpenCLDeviceBindings(
cl_mem buffer, cl_command_queue commands, cl_device_id device,
cl_context ctx, cl_program prog,
const std::unordered_map<std::string, cl_mem> &subBuffers)
OpenCLDeviceBindings(cl_mem buffer, cl_command_queue commands,
cl_device_id device, cl_context ctx, cl_program prog)
: DeviceBindings(OCLBackend::getName()), deviceBuffer{buffer},
commandQueue{commands}, deviceId{device}, context{ctx}, program{prog},
weightBuffers(subBuffers) {}
commandQueue{commands}, deviceId{device}, context{ctx}, program{prog} {}

/// CL memory buffer. Currently this contains both mutable and immutable
/// weights, the buffer is allocated once when the network is added.
Expand All @@ -273,12 +271,6 @@ struct OpenCLDeviceBindings : DeviceBindings {

/// A list of kernels and their associated events.
std::vector<KernelLaunch> kernelLaunches;

/// Buffers or subBuffers associated with symbols.
std::unordered_map<std::string, cl_mem> weightBuffers;

/// /returns the subBufffer assciated with a Value.
cl_mem getBuffer(glow::Value *v);
};
} // namespace runtime
} // namespace glow
Expand Down
36 changes: 2 additions & 34 deletions lib/Backends/OpenCL/OpenCLDeviceManager.cpp
Expand Up @@ -55,29 +55,7 @@ DeviceManager *createOCLDeviceManager(const DeviceConfig &config) {
return new OpenCLDeviceManager(config);
}

OpenCLBuffer::~OpenCLBuffer() {
for (auto buf : subBuffers_) {
clReleaseMemObject(buf.second);
}
subBuffers_.clear();

clReleaseMemObject(buffer_);
}

/// Add a mapping from a Symbol name to an offset into buffer_;
bool OpenCLBuffer::addSubBuffer(std::string name, size_t offset, size_t size) {
cl_buffer_region region({offset, size});
cl_int err;
auto buf = clCreateSubBuffer(buffer_, CL_MEM_READ_WRITE,
CL_BUFFER_CREATE_TYPE_REGION, &region, &err);
auto res = subBuffers_.emplace(name, buf);
if (!res.second) {
llvm::dbgs() << "OpenCLBuffer: failed to add subBuffer for symbol " << name
<< "\n";
return false;
}
return true;
}
OpenCLBuffer::~OpenCLBuffer() { clReleaseMemObject(buffer_); }
} // namespace runtime
} // namespace glow

Expand Down Expand Up @@ -378,15 +356,6 @@ void OpenCLDeviceManager::addNetworkImpl(const Module *module,
clFinish(commands);
}
usedMemoryBytes_ += sizeInBytes;

// Add a sub-buffer for each symbol in the symbol table. OpenCL sub-buffers
// are essentially TensorViews in Glow.
for (auto &pair : bundle.getSymbolTable()) {
bool success = buffer->addSubBuffer(pair.first, pair.second.offset,
pair.second.size);
DCHECK(success);
}

// Compile the CL program.
// Add to the function name lookup map.
// Add shared pointer to the buffer to buffers. This way the buffer will
Expand All @@ -407,7 +376,6 @@ void OpenCLDeviceManager::addNetworkImpl(const Module *module,
programs_.emplace(func.first, program);
functions_.emplace(func.first, func.second);
buffers_.emplace(func.first, buffer);

buffer->incrementUsers();

DCHECK_LE(usedMemoryBytes_, maxMemoryBytes_);
Expand Down Expand Up @@ -705,7 +673,7 @@ void OpenCLDeviceManager::runFunctionImpl(
auto program = programs_[function];
auto clBindings = glow::make_unique<runtime::OpenCLDeviceBindings>(
buffers_[function]->getBuffer(), queue.backingQueue, deviceId_, context_,
program, buffers_[function]->getSubBuffers());
program);

// Copy inputs to the device.
copyInputsToDevice(func->getRuntimeBundle(), context.get(), clBindings.get());
Expand Down
11 changes: 0 additions & 11 deletions lib/Backends/OpenCL/OpenCLDeviceManager.h
Expand Up @@ -98,9 +98,6 @@ class OpenCLBuffer {
/// The OpenCL buffer being stored.
cl_mem buffer_;

/// Subbuffers for symbols.
std::unordered_map<std::string, cl_mem> subBuffers_;

/// Count of functions using this buffer.
unsigned int users_{0};

Expand All @@ -123,14 +120,6 @@ class OpenCLBuffer {

/// Get size of buffer in bytes.
size_t getSize() { return size_; }

/// Return the mapping from Symbol name to subBuffer for this Buffer.
const std::unordered_map<std::string, cl_mem> &getSubBuffers() {
return subBuffers_;
}

/// Add a mapping from a Symbol name to an offset into buffer_;
bool addSubBuffer(std::string name, size_t offset, size_t size);
};

/// A class controlling a single OpenCL device. Many OpenCLFunctions may be
Expand Down

0 comments on commit 5dac6b2

Please sign in to comment.