Skip to content
Permalink
Browse files

Revert "Add dynamic buffer support to OCL Backend (#3765)" (#3784)

Summary:
This reverts commit bd69664.

I had thought that I had gotten the last POCL issue in #3765, but I had not. Reverting to fix the OCL build.

Honestly this last issue (AMD/POCL requires sub buffers to aligned) seems to torpedo the whole idea, I can't think of any way to handle Glow TensorViews on the host - which means passing buffer + offset everywhere we pass a buffer below. Essentially this would mean rewriting the whole thing.

Very frustrating since that alignment restriction on subBuffers makes no sense, and no other OCL implementation has it.
Pull Request resolved: #3784

Differential Revision: D18480248

Pulled By: nickgg

fbshipit-source-id: 9b05009ea901a0f477805e6c946faac34d9bc303
  • Loading branch information
nickgg authored and facebook-github-bot committed Nov 13, 2019
1 parent eb48b31 commit 5dac6b29fe0e440824e0ff35ba5d7e00c23d0cba
@@ -140,7 +140,7 @@ elif [[ "$CIRCLE_JOB" == "PYTORCH" ]]; then
cd build
elif [[ "$CIRCLE_JOB" == "OPENCL" ]]; then
install_pocl
CMAKE_ARGS+=("-DGLOW_WITH_OPENCL=ON" "-DGLOW_OPENCL_ALIGN=128")
CMAKE_ARGS+=("-DGLOW_WITH_OPENCL=ON")
else
CMAKE_ARGS+=("-DCMAKE_BUILD_TYPE=Debug")
if [[ "${CIRCLE_JOB}" == "SHARED" ]]; then

Large diffs are not rendered by default.

@@ -145,7 +145,8 @@ class OpenCLFunction final : public CompiledFunction {
/// Fill the device \p buffer with a given \p value.
/// \param len number of buffer elements to be filled by the \p value.
/// Elements are considered to be of the type described by \p elemKind.
void fillBuffer(cl_mem buffer, uint64_t len, float value, ElemKind elemKind,
void fillBuffer(cl_mem buffer, uint64_t start, uint64_t len, float value,
ElemKind elemKind,
runtime::OpenCLDeviceBindings *devBindings);

/// Execution a convolution instruction which uses NCHW format.
@@ -243,13 +244,10 @@ namespace runtime {
/// device specific information used to run a compiled function on a specific
/// device.
struct OpenCLDeviceBindings : DeviceBindings {
OpenCLDeviceBindings(
cl_mem buffer, cl_command_queue commands, cl_device_id device,
cl_context ctx, cl_program prog,
const std::unordered_map<std::string, cl_mem> &subBuffers)
OpenCLDeviceBindings(cl_mem buffer, cl_command_queue commands,
cl_device_id device, cl_context ctx, cl_program prog)
: DeviceBindings(OCLBackend::getName()), deviceBuffer{buffer},
commandQueue{commands}, deviceId{device}, context{ctx}, program{prog},
weightBuffers(subBuffers) {}
commandQueue{commands}, deviceId{device}, context{ctx}, program{prog} {}

/// CL memory buffer. Currently this contains both mutable and immutable
/// weights, the buffer is allocated once when the network is added.
@@ -273,12 +271,6 @@ struct OpenCLDeviceBindings : DeviceBindings {

/// A list of kernels and their associated events.
std::vector<KernelLaunch> kernelLaunches;

/// Buffers or subBuffers associated with symbols.
std::unordered_map<std::string, cl_mem> weightBuffers;

/// /returns the subBufffer assciated with a Value.
cl_mem getBuffer(glow::Value *v);
};
} // namespace runtime
} // namespace glow
@@ -55,29 +55,7 @@ DeviceManager *createOCLDeviceManager(const DeviceConfig &config) {
return new OpenCLDeviceManager(config);
}

OpenCLBuffer::~OpenCLBuffer() {
for (auto buf : subBuffers_) {
clReleaseMemObject(buf.second);
}
subBuffers_.clear();

clReleaseMemObject(buffer_);
}

/// Add a mapping from a Symbol name to an offset into buffer_;
bool OpenCLBuffer::addSubBuffer(std::string name, size_t offset, size_t size) {
cl_buffer_region region({offset, size});
cl_int err;
auto buf = clCreateSubBuffer(buffer_, CL_MEM_READ_WRITE,
CL_BUFFER_CREATE_TYPE_REGION, &region, &err);
auto res = subBuffers_.emplace(name, buf);
if (!res.second) {
llvm::dbgs() << "OpenCLBuffer: failed to add subBuffer for symbol " << name
<< "\n";
return false;
}
return true;
}
OpenCLBuffer::~OpenCLBuffer() { clReleaseMemObject(buffer_); }
} // namespace runtime
} // namespace glow

@@ -378,15 +356,6 @@ void OpenCLDeviceManager::addNetworkImpl(const Module *module,
clFinish(commands);
}
usedMemoryBytes_ += sizeInBytes;

// Add a sub-buffer for each symbol in the symbol table. OpenCL sub-buffers
// are essentially TensorViews in Glow.
for (auto &pair : bundle.getSymbolTable()) {
bool success = buffer->addSubBuffer(pair.first, pair.second.offset,
pair.second.size);
DCHECK(success);
}

// Compile the CL program.
// Add to the function name lookup map.
// Add shared pointer to the buffer to buffers. This way the buffer will
@@ -407,7 +376,6 @@ void OpenCLDeviceManager::addNetworkImpl(const Module *module,
programs_.emplace(func.first, program);
functions_.emplace(func.first, func.second);
buffers_.emplace(func.first, buffer);

buffer->incrementUsers();

DCHECK_LE(usedMemoryBytes_, maxMemoryBytes_);
@@ -705,7 +673,7 @@ void OpenCLDeviceManager::runFunctionImpl(
auto program = programs_[function];
auto clBindings = glow::make_unique<runtime::OpenCLDeviceBindings>(
buffers_[function]->getBuffer(), queue.backingQueue, deviceId_, context_,
program, buffers_[function]->getSubBuffers());
program);

// Copy inputs to the device.
copyInputsToDevice(func->getRuntimeBundle(), context.get(), clBindings.get());
@@ -98,9 +98,6 @@ class OpenCLBuffer {
/// The OpenCL buffer being stored.
cl_mem buffer_;

/// Subbuffers for symbols.
std::unordered_map<std::string, cl_mem> subBuffers_;

/// Count of functions using this buffer.
unsigned int users_{0};

@@ -123,14 +120,6 @@ class OpenCLBuffer {

/// Get size of buffer in bytes.
size_t getSize() { return size_; }

/// Return the mapping from Symbol name to subBuffer for this Buffer.
const std::unordered_map<std::string, cl_mem> &getSubBuffers() {
return subBuffers_;
}

/// Add a mapping from a Symbol name to an offset into buffer_;
bool addSubBuffer(std::string name, size_t offset, size_t size);
};

/// A class controlling a single OpenCL device. Many OpenCLFunctions may be

0 comments on commit 5dac6b2

Please sign in to comment.
You can’t perform that action at this time.