Skip to content
Permalink
Browse files

Intel Neural-Network Processor for Inference (NNPI) backend

Summary:
Adding the Intel Neural-Network Processor for Inference (NNPI) backend.
Pull Request resolved: #3605
GitHub Author: Jacob Subag <jacob.subag@intel.com>

fbshipit-source-id: 52179d481f9128c3afbb32ab19abf12ebf168bff
  • Loading branch information...
jsubag authored and arunm-git committed Oct 14, 2019
1 parent 705804c commit 8439a0c0dc6f475f7ca4284fc36c559f2502e7ba
Showing with 4,761 additions and 24 deletions.
  1. +2 −0 .gitignore
  2. +5 −0 CMakeLists.txt
  3. +24 −0 lib/Backends/DeviceManagers.cpp
  4. +114 −0 lib/Backends/NNPI/BlockStream.cpp
  5. +67 −0 lib/Backends/NNPI/BlockStream.h
  6. +58 −0 lib/Backends/NNPI/CMakeLists.txt
  7. +199 −0 lib/Backends/NNPI/DebugMacros.h
  8. +1,648 −0 lib/Backends/NNPI/Importer.cpp
  9. +155 −0 lib/Backends/NNPI/Importer.h
  10. +483 −0 lib/Backends/NNPI/InferencePool.cpp
  11. +89 −0 lib/Backends/NNPI/InferencePool.h
  12. +332 −0 lib/Backends/NNPI/NNPI.cpp
  13. +54 −0 lib/Backends/NNPI/NNPI.h
  14. +130 −0 lib/Backends/NNPI/NNPICompiledFunction.cpp
  15. +72 −0 lib/Backends/NNPI/NNPICompiledFunction.h
  16. +228 −0 lib/Backends/NNPI/NNPIDeviceManager.cpp
  17. +91 −0 lib/Backends/NNPI/NNPIDeviceManager.h
  18. +22 −0 lib/Backends/NNPI/NNPIFactory.cpp
  19. +52 −0 lib/Backends/NNPI/tests/NNPIBackendCorrectnessTest.cpp
  20. +20 −0 lib/Backends/NNPI/tests/NNPIBackendTest.cpp
  21. +22 −0 lib/Backends/NNPI/tests/NNPIDeviceManagerTest.cpp
  22. +37 −0 lib/Backends/NNPI/tests/NNPIGradCheckTest.cpp
  23. +20 −0 lib/Backends/NNPI/tests/NNPIGraphOptzTest.cpp
  24. +36 −0 lib/Backends/NNPI/tests/NNPIMLTest.cpp
  25. +22 −0 lib/Backends/NNPI/tests/NNPIOperatorGradTest.cpp
  26. +160 −0 lib/Backends/NNPI/tests/NNPIOperatorTest.cpp
  27. +20 −0 lib/Backends/NNPI/tests/NNPIParameterSweepTest.cpp
  28. +20 −0 lib/Backends/NNPI/tests/NNPIQuantizationTest.cpp
  29. +22 −0 lib/Backends/NNPI/tests/NNPIRecommendationSystemTest.cpp
  30. +20 −0 lib/Backends/NNPI/tests/NNPISparseLengthsSumTest.cpp
  31. +26 −0 lib/Backends/NNPI/tests/NNPITraceEventsTest.cpp
  32. +20 −0 lib/Backends/NNPI/tests/NNPITypeAToTypeBFunctionConverterTest.cpp
  33. +9 −0 lib/Exporter/ONNXModelWriter.cpp
  34. +4 −1 lib/Onnxifi/onnxifiGlow.cpp
  35. +20 −20 tests/stress/ParameterSweepTest.cpp
  36. +14 −3 tests/stress/SparseLengthsSumTest.cpp
  37. +4 −0 tests/unittests/BackendTestUtils.h
  38. +366 −0 tests/unittests/NNPIBackendTestUtils.h
  39. +16 −0 tools/ClassGen/Backends/NNPI/CMakeLists.txt
  40. +31 −0 tools/ClassGen/Backends/NNPI/NNPISpecificNodes.h
  41. +22 −0 tools/ClassGen/Backends/NNPI/NNPISpecificNodesVerification.h
  42. +4 −0 tools/ClassGen/CMakeLists.txt
  43. +1 −0 tools/ClassGen/NodeGen.cpp
@@ -26,4 +26,6 @@ externalbackends/*/
.vim/
.idea/
.ccls-cache/
.vs*
build_*/*
.clangd/
@@ -8,6 +8,7 @@ enable_testing()
option(GLOW_WITH_CPU "Build the LLVM-based JIT CPU backend" ON)
option(GLOW_WITH_LLVMIRCODEGEN "Build the LLVM-based code generation library" ON)
option(GLOW_WITH_OPENCL "Build the OpenCL backend" OFF)
option(GLOW_WITH_NNPI "Build the NNPI backend" OFF)
option(GLOW_WITH_HABANA "Build the Habana backend" OFF)
option(GLOW_BUILD_EXAMPLES "Build the examples" ON)
option(GLOW_BUILD_PYTORCH_INTEGRATION "Build integration for PyTorch" OFF)
@@ -115,6 +116,10 @@ if (GLOW_WITH_OPENCL)
find_package(OpenCL REQUIRED)
endif ()

if(GLOW_WITH_NNPI)
add_definitions(-DGLOW_WITH_NNPI=1)
endif()

if (GLOW_WITH_HABANA)
add_definitions(-DGLOW_WITH_HABANA=1)

@@ -63,6 +63,27 @@ unsigned numCPUDevices() { return std::thread::hardware_concurrency(); }
unsigned numCPUDevices() { return 0; }
#endif

#if defined(GLOW_WITH_NNPI)
unsigned numNNPIDevices() {
// TODO: unify with numHabanaDevices. copy-pasta with a different device name
std::ifstream devices("/proc/bus/pci/devices");
std::string device;
unsigned count = 0;
while (std::getline(devices, device)) {
if (device.find("sph_pcie") != std::string::npos) {
count++;
}
}
if (count > 0) {
return count;
}
// Todo
return 1; // Fall back to emulator since GLOW_NNPI is set. This feels hacky.
}
#else
unsigned numNNPIDevices() { return 0; }
#endif

#if defined(GLOW_WITH_HABANA)
unsigned numHabanaDevices() {
std::ifstream devices("/proc/bus/pci/devices");
@@ -98,6 +119,9 @@ unsigned DeviceManager::numDevices(llvm::StringRef backendName) {
if (backendName == "OpenCL") {
return numOpenCLDevices();
}
if (backendName == "NNPI") {
return numNNPIDevices();
}
return 0;
}

@@ -0,0 +1,114 @@
/*
* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "BlockStream.h"
#include "DebugMacros.h"
#include <cstring>

namespace glow {

BlockStream::BlockStream(size_t preallocSize, uint32_t pageBlockSize)
: readOffest_(0), writeOffest_(0), blockSize_(pageBlockSize) {
allocateBlocks(preallocSize);
}

BlockStream::~BlockStream() { releaseMemory(); }

size_t BlockStream::write(const char *buffer, size_t size) {
// Make sure we have space to copy.
if (allocateBlocks(size) < size) {
return 0;
}

const char *bStart = &buffer[0];
size_t copied = 0;
size_t offsetInBlock = writeOffest_ % blockSize_;
size_t blockIndex = writeOffest_ / blockSize_;
while (size - copied > 0) {
std::vector<char> &currentBlock = blocks_[blockIndex++];
size_t blockCopySize =
std::min(blockSize_ - offsetInBlock, (size - copied));
const char *bEnd = &bStart[blockCopySize];
auto dstBlockIt = std::back_inserter(currentBlock);
std::copy(bStart, bEnd, dstBlockIt);
bStart = bEnd;
copied += blockCopySize;
offsetInBlock = 0;
}
writeOffest_ += copied;
return copied;
}

size_t BlockStream::read(char *buffer, size_t size) {
char *bStart = &buffer[0];
size_t readBytes = 0;
size_t offsetInBlock = readOffest_ % blockSize_;
size_t blockIndex = readOffest_ / blockSize_;
while (size - readBytes > 0) {
std::vector<char> &currentBlock = blocks_[blockIndex++];
size_t blockCopySize =
std::min(blockSize_ - offsetInBlock, (size - readBytes));
auto srcStartIt = currentBlock.begin() + offsetInBlock;
auto srcEndIt = srcStartIt + blockCopySize;
std::copy(srcStartIt, srcEndIt, bStart);
bStart = &bStart[blockCopySize];
readBytes += blockCopySize;
offsetInBlock = 0;
}
readOffest_ += readBytes;
return readBytes;
}

size_t BlockStream::getFreeAllocatedSpace() {
return blockSize_ * blocks_.size() - writeOffest_;
}

size_t BlockStream::allocateBlocks(size_t size) {
size_t available = getFreeAllocatedSpace();
if (available > size) {
// No need to allocate new blocks.
return available;
}
int64_t missingSize = size - available;

while (missingSize > 0) {
std::vector<char> block;
block.reserve(blockSize_);
size_t reserved = block.capacity();
available += reserved;
if (reserved < blockSize_ && static_cast<int64_t>(reserved) < missingSize) {
// Failed to allocate.
return available;
}
missingSize -= reserved;
blocks_.push_back(block);
}
return available;
}

void BlockStream::resetWrite() {
for (auto &block : blocks_) {
block.clear();
}
writeOffest_ = 0;
}

void BlockStream::releaseMemory() {
blocks_.clear();
blocks_.shrink_to_fit();
reset();
}

} // namespace glow
@@ -0,0 +1,67 @@
/*
* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GLOW_NNPI_BLOCK_STREAM_H
#define GLOW_NNPI_BLOCK_STREAM_H
#include <memory>
#include <vector>
namespace glow {

#define DEFAULT_BLOCK_STREAM_BLOCK_SIZE (1024 * 1024 * 10)
class BlockStream {
public:
/// Default constructor with 0 pre-allocated blocks and 10M page size.
BlockStream() : BlockStream(0, DEFAULT_BLOCK_STREAM_BLOCK_SIZE) {}
/// Constructor with preallocSize pre-allocated memory and 10M page size.
explicit BlockStream(size_t preallocSize)
: BlockStream(preallocSize, DEFAULT_BLOCK_STREAM_BLOCK_SIZE) {}
/// Constructor with preallocSize pre-allocated memory and pageBlockSize page
/// size.
BlockStream(size_t preallocSize, uint32_t pageBlockSize);

/// Destructor: cleans up the allocated blocks.
virtual ~BlockStream();

/// Read from stream (updates read offset).
size_t read(char *buffer, size_t size);
/// Read to stream (updates read offset).
size_t write(const char *buffer, size_t size);
/// Get size of written data.
size_t getSize() { return writeOffest_; }
/// Reset read offset (not changing steam data - can re-read).
void resetRead() { readOffest_ = 0; }
/// Reset write offset (not deleting allocated memory).
void resetWrite();
/// Reset both read and write offset (not deleting allocated memory).
void reset() {
resetRead();
resetWrite();
}
/// Release allocated memory and reset.
void releaseMemory();

private:
uint64_t readOffest_;
uint64_t writeOffest_;
size_t blockSize_;
std::vector<std::vector<char>> blocks_;

size_t getFreeAllocatedSpace();
size_t allocateBlocks(size_t size);
};

} // namespace glow

#endif // GLOW_NNPI_BLOCK_STREAM_H
@@ -0,0 +1,58 @@
# Copyright (c) Glow Contributors. See CONTRIBUTORS file.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

cmake_minimum_required (VERSION 3.3)

if (NOT DEFINED NNPI_API_DIR OR NOT DEFINED NNPI_LIB_DIR)
message(FATAL_ERROR "Missing NNPI Transformer Dirs")
endif()

message(STATUS "[NNPI] NNPI_API_DIR = ${NNPI_API_DIR}")
message(STATUS "[NNPI] NNPI_LIB_DIR = ${NNPI_LIB_DIR}")

if (NOT DEFINED NNPI_INF_API_DIR OR NOT DEFINED NNPI_INF_LIB_DIR)
message(FATAL_ERROR "Missing NNPI Inference Dirs")
endif()

message(STATUS "[NNPI] NNPI_INF_API_DIR = ${NNPI_INF_API_DIR}")
message(STATUS "[NNPI] NNPI_INF_LIB_DIR = ${NNPI_INF_LIB_DIR}")
message(STATUS "[NNPI] GLOW_BINARY_DIR = ${GLOW_BINARY_DIR}")

include_directories(
${NNPI_API_DIR}
${NNPI_INF_API_DIR}
)

add_library(NNPI
NNPI.cpp
NNPICompiledFunction.cpp
Importer.cpp
InferencePool.cpp
NNPIDeviceManager.cpp
NNPIFactory.cpp
BlockStream.cpp
)

target_link_libraries(NNPI
PRIVATE
Backend
Base
Graph
CodeGen
IR
Support
${NNPI_LIB_DIR}/libnnpi_transformer.so
${NNPI_INF_LIB_DIR}/libnnpi_inference.so
)

set(linked_backends ${linked_backends} NNPI PARENT_SCOPE)

0 comments on commit 8439a0c

Please sign in to comment.
You can’t perform that action at this time.