diff --git a/lib/Backends/NNPI/Importer.cpp b/lib/Backends/NNPI/Importer.cpp index b8e339ffa9..874bc88b37 100644 --- a/lib/Backends/NNPI/Importer.cpp +++ b/lib/Backends/NNPI/Importer.cpp @@ -1430,74 +1430,12 @@ class RQFCNodeImporter : public INNPINodeImporter { auto *glowRowwiseFC = llvm::dyn_cast(n); LOG_AND_RETURN_IF_NOT(ERROR, glowRowwiseFC, "Bad node type", NNPI_INVALID_PARAM); - LOG_AND_RETURN_IF_NOT( - ERROR, glowRowwiseFC->getInput().getType()->getOffset() == 0.f, - (std::string("Bad input offset value") + - std::to_string(glowRowwiseFC->getInput().getType()->getOffset())), - NNPI_INVALID_PARAM); - LOG_AND_RETURN_IF_NOT( - ERROR, glowRowwiseFC->getResult().getType()->getOffset() == 0.f, - (std::string("Bad result offset value") + - std::to_string(glowRowwiseFC->getResult().getType()->getOffset())), - NNPI_INVALID_PARAM); LOG_AND_RETURN_IF_NOT( ERROR, !(glowRowwiseFC->getOffsets()) || importer.zeroes(nodeValueName(glowRowwiseFC->getOffsets()).c_str()), "Bad offset value", NNPI_INVALID_PARAM); - // Add internal tensor for Symlowp input. - std::string symlowpInputName = - NNPIImporter::internalName_ + - nodeValueName(glowRowwiseFC->getInput()).c_str() + "_symlowp"; - auto *inType = glowRowwiseFC->getInput().getType(); - LOG_NNPI_IF_ERROR_RETURN_VALUE( - importer.addValue(symlowpInputName, inType, - /* alternativeLayout */ inType->dims().size() == 4, - /* input */ false, /* output */ false, {}, {}, - /* forceSymlowp */ true), - "Failed to add value"); - - // Add internal tensor for Symlowp output. - std::string symlowpOutputName = - NNPIImporter::internalName_ + - nodeValueName(glowRowwiseFC->getResult()).c_str() + "_symlowp"; - auto *outType = glowRowwiseFC->getResult().getType(); - LOG_NNPI_IF_ERROR_RETURN_VALUE( - importer.addValue(symlowpOutputName, outType, - /* alternativeLayout */ outType->dims().size() == 4, - /* input */ false, /* output */ false, {}, {}, - /* forceSymlowp */ true), - "Failed to add value"); - - // Add convert op from Gemmlowp input to Symlowp. - std::string convertInputName = NNPIImporter::internalName_ + - glowRowwiseFC->getName().begin() + - "_convert_input"; - std::string convertInputInputName = - nodeValueName(glowRowwiseFC->getInput()); - if (!importer.hasChannelWiseConverter(convertInputInputName)) { - LOG_NNPI_IF_ERROR_RETURN_VALUE( - nnpiNetworkAddConvertOp( - importer.getNetwork(), convertInputName.c_str(), - convertInputInputName.c_str(), symlowpInputName.c_str()), - "Failed to add layer"); - importer.addChannelWiseConverter(convertInputInputName); - } - - // Add convert op from Symlowp output to Gemmlowp. - std::string convertOutputName = NNPIImporter::internalName_ + - glowRowwiseFC->getName().begin() + - "_convert_output"; - std::string convertOutputOutputName = - nodeValueName(glowRowwiseFC->getResult()); - LOG_NNPI_IF_ERROR_RETURN_VALUE( - nnpiNetworkAddConvertOp( - importer.getNetwork(), convertOutputName.c_str(), - symlowpOutputName.c_str(), convertOutputOutputName.c_str()), - "Failed to add layer"); - importer.addChannelWiseConverter(convertOutputOutputName); - // Create the weights with no offset tensor. // Assert weights & biases have no offset or all zeroes. @@ -1534,17 +1472,14 @@ class RQFCNodeImporter : public INNPINodeImporter { nodeValueName(glowRowwiseFC->getInput()), nodeValueName(glowRowwiseFC->getWeights()), nodeValueName(glowRowwiseFC->getBias()), - symlowpInputName, - symlowpOutputName, }, { nodeValueName(glowRowwiseFC->getResult()), - symlowpInputName, - symlowpOutputName, }); return nnpiNetworkAddFullyConnectedOp( importer.getNetwork(), glowRowwiseFC->getName().begin(), - symlowpInputName.c_str(), symlowpOutputName.c_str(), + nodeValueName(glowRowwiseFC->getInput()).c_str(), + nodeValueName(glowRowwiseFC->getResult()).c_str(), nodeValueName(glowRowwiseFC->getWeights()).c_str(), glowRowwiseFC->getBias() ? nodeValueName(glowRowwiseFC->getBias()).c_str() @@ -1560,7 +1495,6 @@ class ChannelwiseQuantizedConvolutionNodeImporter : public INNPINodeImporter { llvm::dyn_cast(n); LOG_AND_RETURN_IF_NOT(ERROR, glowChannelwiseQuantizedConv, "Bad node type", NNPI_INVALID_PARAM); - LOG_AND_RETURN_IF_NOT( ERROR, !(glowChannelwiseQuantizedConv->getOffsets()) || @@ -1597,60 +1531,6 @@ class ChannelwiseQuantizedConvolutionNodeImporter : public INNPINodeImporter { glowChannelwiseQuantizedConv->getStrides()[1]}; uint32_t dilation[SPATIAL_DIMS2] = {1, 1}; // No dilation, default values - // Add internal tensor for Symlowp input. - std::string symlowpInputName = - NNPIImporter::internalName_ + - nodeValueName(glowChannelwiseQuantizedConv->getInput()).c_str() + - "_symlowp"; - auto *inType = glowChannelwiseQuantizedConv->getInput().getType(); - LOG_NNPI_IF_ERROR_RETURN_VALUE( - importer.addValue(symlowpInputName, inType, - /* alternativeLayout */ inType->dims().size() == 4, - /* input */ false, /* output */ false, {}, {}, - /* forceSymlowp */ true), - "Failed to add value"); - - // Add internal tensor for Symlowp output. - std::string symlowpOutputName = - NNPIImporter::internalName_ + - nodeValueName(glowChannelwiseQuantizedConv->getResult()).c_str() + - "_symlowp"; - auto *outType = glowChannelwiseQuantizedConv->getResult().getType(); - LOG_NNPI_IF_ERROR_RETURN_VALUE( - importer.addValue(symlowpOutputName, outType, - /* alternativeLayout */ outType->dims().size() == 4, - /* input */ false, /* output */ false, {}, {}, - /* forceSymlowp */ true), - "Failed to add value"); - - // Add convert op from Gemmlowp input to Symlowp. - std::string convertInputName = - NNPIImporter::internalName_ + - glowChannelwiseQuantizedConv->getName().begin() + "_convert_input"; - std::string convertInputInputName = - nodeValueName(glowChannelwiseQuantizedConv->getInput()); - if (!importer.hasChannelWiseConverter(convertInputInputName)) { - LOG_NNPI_IF_ERROR_RETURN_VALUE( - nnpiNetworkAddConvertOp( - importer.getNetwork(), convertInputName.c_str(), - convertInputInputName.c_str(), symlowpInputName.c_str()), - "Failed to add layer"); - importer.addChannelWiseConverter(convertInputInputName); - } - - // Add convert op from Symlowp output to Gemmlowp. - std::string convertOutputName = - NNPIImporter::internalName_ + - glowChannelwiseQuantizedConv->getName().begin() + "_convert_output"; - std::string convertOutputOutputName = - nodeValueName(glowChannelwiseQuantizedConv->getResult()); - LOG_NNPI_IF_ERROR_RETURN_VALUE( - nnpiNetworkAddConvertOp( - importer.getNetwork(), convertOutputName.c_str(), - symlowpOutputName.c_str(), convertOutputOutputName.c_str()), - "Failed to add layer"); - importer.addChannelWiseConverter(convertOutputOutputName); - // Create the weights with no offset tensor. // Assert weights & biases have no offset or all zeroes. @@ -1694,18 +1574,15 @@ class ChannelwiseQuantizedConvolutionNodeImporter : public INNPINodeImporter { nodeValueName(glowChannelwiseQuantizedConv->getInput()), nodeValueName(glowChannelwiseQuantizedConv->getFilter()), nodeValueName(glowChannelwiseQuantizedConv->getBias()), - symlowpInputName, - symlowpOutputName, }, { nodeValueName(glowChannelwiseQuantizedConv->getResult()), - symlowpInputName, - symlowpOutputName, }); return nnpiNetworkAddConvolutionOp( importer.getNetwork(), glowChannelwiseQuantizedConv->getName().begin(), - symlowpInputName.c_str(), symlowpOutputName.c_str(), + nodeValueName(glowChannelwiseQuantizedConv->getInput()).c_str(), + nodeValueName(glowChannelwiseQuantizedConv->getResult()).c_str(), nodeValueName(glowChannelwiseQuantizedConv->getFilter()).c_str(), glowChannelwiseQuantizedConv->getBias() ? nodeValueName(glowChannelwiseQuantizedConv->getBias()).c_str() diff --git a/lib/Backends/NNPI/InferenceContext.cpp b/lib/Backends/NNPI/InferenceContext.cpp old mode 100755 new mode 100644 diff --git a/lib/Backends/NNPI/InferencePool.cpp b/lib/Backends/NNPI/InferencePool.cpp index bb3463db1d..4311dc6057 100644 --- a/lib/Backends/NNPI/InferencePool.cpp +++ b/lib/Backends/NNPI/InferencePool.cpp @@ -28,7 +28,7 @@ namespace glow { namespace runtime { InferencePoolEnv::InferencePoolEnv() - : numWorkers_(0), deviceOptions_(nullptr), nnpiCompiledFunction_(nullptr), + : deviceOptions_(nullptr), nnpiCompiledFunction_(nullptr), staticPlaceholderMap_(nullptr) {} InferencePoolEnv::~InferencePoolEnv() { @@ -41,8 +41,7 @@ InferencePoolEnv::~InferencePoolEnv() { } } -Error InferencePoolEnv::init(unsigned numWorkers, NNPIAdapter adapter, - NNPIDeviceContext device, +Error InferencePoolEnv::init(NNPIAdapter adapter, NNPIDeviceContext device, std::shared_ptr deviceTracing, CompiledFunction *compiledFunction, StaticPlaceholderMap *staticPlaceholderMap, @@ -57,20 +56,24 @@ Error InferencePoolEnv::init(unsigned numWorkers, NNPIAdapter adapter, if (workersPool_) { return MAKE_ERR("InferencePool already initialized!"); } - numWorkers_ = numWorkers; + + nnpiCompiledFunction_ = static_cast(compiledFunction); + size_t optionsNumWorkers = + nnpiCompiledFunction_->getCompilationOptions().numWorkers; + // Ice-ref not re-entrant for the same nnpiNetwork. + size_t numWorkers = deviceOptions_->inferOnDevice ? optionsNumWorkers : 1; workersPool_ = glow::make_unique( - numWorkers_, std::make_shared("NNPI-worker")); + numWorkers, std::make_shared("NNPI-worker")); deviceTracing_ = deviceTracing; staticPlaceholderMap_ = staticPlaceholderMap; - inferenceContexts_.resize(numWorkers_); - freeContexts_.resize(numWorkers_); - if (inferenceContexts_.size() != numWorkers_) { + inferenceContexts_.resize(numWorkers); + freeContexts_.resize(numWorkers); + if (inferenceContexts_.size() != numWorkers) { return MAKE_ERR("InferencePool failed to create inference contexts"); } // Create host network. - nnpiCompiledFunction_ = static_cast(compiledFunction); NNPIHostNetwork hostNetwork(NNPI_INVALID_NNPIHANDLE); if (deviceOptions_->inferOnDevice) { // Create NNPI host network (load compiled binary). diff --git a/lib/Backends/NNPI/InferencePool.h b/lib/Backends/NNPI/InferencePool.h index c9a49671e7..2510f96f85 100644 --- a/lib/Backends/NNPI/InferencePool.h +++ b/lib/Backends/NNPI/InferencePool.h @@ -33,7 +33,6 @@ namespace glow { namespace runtime { class NNPIDeviceBindings; class InferencePoolEnv { - unsigned numWorkers_; std::unique_ptr workersPool_; std::vector inferenceContexts_; std::vector freeContexts_; @@ -53,7 +52,7 @@ class InferencePoolEnv { public: InferencePoolEnv(); ~InferencePoolEnv(); - Error init(unsigned numWorkers, NNPIAdapter adapter, NNPIDeviceContext device, + Error init(NNPIAdapter adapter, NNPIDeviceContext device, std::shared_ptr deviceTracing, CompiledFunction *compiledFunction, StaticPlaceholderMap *staticPlaceholderMap, diff --git a/lib/Backends/NNPI/NNPI.cpp b/lib/Backends/NNPI/NNPI.cpp index ca1590b6ad..30b51a6cac 100644 --- a/lib/Backends/NNPI/NNPI.cpp +++ b/lib/Backends/NNPI/NNPI.cpp @@ -198,7 +198,8 @@ bool NNPIBackend::isOpSupported(const NodeInfo &NI) const { } return NI.allInputsAndOutputsHaveSameElemKind({ElemKind::Int8QTy}, {ConvolutionNode::BiasIdx}) && - (NI.getInElemTy(ConvolutionNode::BiasIdx) == ElemKind::Int32QTy); + ((NI.getInElemTy(ConvolutionNode::BiasIdx) == ElemKind::Int32QTy) || + (NI.getInElemTy(ConvolutionNode::BiasIdx) == ElemKind::FloatTy)); case Kinded::Kind::Convolution3DNodeKind: if (!NI.getInTy(Convolution3DNode::InputIdx)->isQuantizedType()) { @@ -207,7 +208,9 @@ bool NNPIBackend::isOpSupported(const NodeInfo &NI) const { } return NI.allInputsAndOutputsHaveSameElemKind( {ElemKind::Int8QTy}, {Convolution3DNode::BiasIdx}) && - (NI.getInElemTy(Convolution3DNode::BiasIdx) == ElemKind::Int32QTy); + ((NI.getInElemTy(Convolution3DNode::BiasIdx) == + ElemKind::Int32QTy) || + (NI.getInElemTy(ConvolutionNode::BiasIdx) == ElemKind::FloatTy)); case Kinded::Kind::QuantizeNodeKind: return (NI.getInElemTy(QuantizeNode::InputIdx) == ElemKind::FloatTy || NI.getInElemTy(QuantizeNode::InputIdx) == ElemKind::Float16Ty) && @@ -238,13 +241,15 @@ bool NNPIBackend::isOpSupported(const NodeInfo &NI) const { } case Kinded::Kind::FullyConnectedNodeKind: - if (!NI.getInTy(ConvolutionNode::InputIdx)->isQuantizedType()) { + if (!NI.getInTy(FullyConnectedNode::InputIdx)->isQuantizedType()) { return NI.allInputsAndOutputsHaveSameElemKind( {ElemKind::FloatTy, ElemKind::Float16Ty}); } return NI.allInputsAndOutputsHaveSameElemKind( {ElemKind::Int8QTy}, {FullyConnectedNode::BiasIdx}) && - (NI.getInElemTy(FullyConnectedNode::BiasIdx) == ElemKind::Int32QTy); + ((NI.getInElemTy(FullyConnectedNode::BiasIdx) == + ElemKind::Int32QTy) || + (NI.getInElemTy(FullyConnectedNode::BiasIdx) == ElemKind::FloatTy)); case Kinded::Kind::MaxPoolNodeKind: return NI.allInputsAndOutputsHaveSameElemKind( @@ -309,8 +314,10 @@ bool NNPIBackend::isOpSupported(const NodeInfo &NI) const { ElemKind::FloatTy) && (NI.getInElemTy(RowwiseQuantizedFullyConnectedNode::OffsetsIdx) == ElemKind::Int32ITy) && - (NI.getInElemTy(RowwiseQuantizedFullyConnectedNode::BiasIdx) == - ElemKind::Int32QTy) && + ((NI.getInElemTy(RowwiseQuantizedFullyConnectedNode::BiasIdx) == + ElemKind::Int32QTy) || + (NI.getInElemTy(RowwiseQuantizedFullyConnectedNode::BiasIdx) == + ElemKind::FloatTy)) && (NI.getOutElemTy(RowwiseQuantizedFullyConnectedNode::ResultIdx) == ElemKind::Int8QTy); @@ -370,7 +377,8 @@ bool NNPIBackend::isOpSupported(const NodeInfo &NI) const { auto resultK = NI.getOutElemTy(EmbeddingBagByteRowwiseOffsetsNode::ResultIdx); return (dataK == ElemKind::UInt8FusedQTy || - dataK == ElemKind::UInt8FusedFP16QTy) && + dataK == ElemKind::UInt8FusedFP16QTy || + dataK == ElemKind::UInt4FusedFP16QTy) && (resultK == ElemKind::FloatTy || resultK == ElemKind::Float16Ty) && (indicesK == ElemKind::Int64ITy) && (offsetsK == ElemKind::Int64ITy); } @@ -500,6 +508,7 @@ bool NNPIBackend::shouldLower(const Node *N) const { case Kinded::Kind::AdaptiveAvgPoolNodeKind: case Kinded::Kind::EmbeddingBagNodeKind: case Kinded::Kind::EmbeddingBagByteRowwiseOffsetsNodeKind: + case Kinded::Kind::LayerNormalizationNodeKind: return false; case Kinded::Kind::FusedRowwiseQuantizedSparseLengthsSumNodeKind: { const FusedRowwiseQuantizedSparseLengthsSumNode *SLSN = @@ -510,7 +519,6 @@ bool NNPIBackend::shouldLower(const Node *N) const { return true; } } - case Kinded::Kind::LayerNormalizationNodeKind: case Kinded::Kind::SparseLengthsSumNodeKind: // WA - lower until ICE-T implements it. if (NNPIBackend::backendOptions_.useIceT || @@ -1133,8 +1141,6 @@ traversePostOrder(const runtime::DAGNode *root, Error NNPIBackend::bindContexts( llvm::ArrayRef bindings, const runtime::DAGNode *root, bool enableP2P, bool enableDRT) { - LOG(INFO) << "enableP2P/DRT not yet implemented. enableDRT = " << enableDRT - << ", enableP2P = " << enableP2P << ".\n"; if (backendOptions_.dumpRuntime) { DotWriter::clear(); DotWriter::addSubGraph("Host", "Host"); @@ -1154,10 +1160,12 @@ Error NNPIBackend::bindContexts( nnpiDM->addPlaceholderUsageCount(cb.networkName, phUsage); } - for (const auto &usage : phUsage) { + for (auto &usage : phUsage) { LOG_IF_NOT_RETURN_LLVMERROR( usage.second.numWriters < 2, "Multiple writes to the same placeholder not suported"); + usage.second.disableP2P = !enableP2P; + usage.second.disableDRT = !enableDRT; } for (auto *dagNode : postOrder) { diff --git a/lib/Backends/NNPI/NNPICompiledFunction.cpp b/lib/Backends/NNPI/NNPICompiledFunction.cpp index 386e26d106..29ac57ea88 100644 --- a/lib/Backends/NNPI/NNPICompiledFunction.cpp +++ b/lib/Backends/NNPI/NNPICompiledFunction.cpp @@ -362,16 +362,16 @@ Error NNPICompiledFunction::compile(Function *F, const BackendOptions &opts) { compilationFileName_.c_str(), NULL), "Failed NNPI Compile"); } - } - if (compilationOptions_.inferOnDevice) { - DBG_MEM_USAGE("NNPICompiledFunction destroy network"); - // NNPINetwork is not needed anymore on the inferfence api path. - // Once the complied stream is loaded, query on the network can be done - // using the host network instead. - LOG_NNPI_IF_ERROR(nnpiNetworkDestroy(network_), - "Failed NNPI Network Destroy"); - network_ = NNPI_INVALID_NNPIHANDLE; - DBG_MEM_USAGE("NNPICompiledFunction destroy network done"); + if (compilationOptions_.inferOnDevice) { + DBG_MEM_USAGE("NNPICompiledFunction destroy network"); + // NNPINetwork is not needed anymore on the inferfence api path. + // Once the complied stream is loaded, query on the network can be done + // using the host network instead. + LOG_NNPI_IF_ERROR(nnpiNetworkDestroy(network_), + "Failed NNPI Network Destroy"); + network_ = NNPI_INVALID_NNPIHANDLE; + DBG_MEM_USAGE("NNPICompiledFunction destroy network done"); + } } // Determine and save what inputs can be treated as partial. Need to do this @@ -390,6 +390,12 @@ Error NNPICompiledFunction::compile(Function *F, const BackendOptions &opts) { return Error::success(); } +NNPICompiledFunction::NNPICompiledFunction(Function *F) + : CompiledFunction(runtime::RuntimeBundle::create(*F)), + compilationOptions_({}) { + std::memset(&config_, 0, sizeof(config_)); +}; + NNPICompiledFunction::~NNPICompiledFunction() { if (network_ != NNPI_INVALID_NNPIHANDLE) { LOG_NNPI_IF_ERROR(nnpiNetworkDestroy(network_), diff --git a/lib/Backends/NNPI/NNPICompiledFunction.h b/lib/Backends/NNPI/NNPICompiledFunction.h index ee298672be..d4838981c2 100644 --- a/lib/Backends/NNPI/NNPICompiledFunction.h +++ b/lib/Backends/NNPI/NNPICompiledFunction.h @@ -31,12 +31,10 @@ namespace glow { /// Function "compiled" for execution by the NNPI backend. class NNPICompiledFunction final : public CompiledFunction { public: - NNPICompiledFunction(Function *F) - : CompiledFunction(runtime::RuntimeBundle::create(*F)), - compilationOptions_({}){}; - /// \name CompiledFunction interface. ///@{ + NNPICompiledFunction(Function *F); + ~NNPICompiledFunction() override; /// Execute the network and allocate Placeholder memory with given diff --git a/lib/Backends/NNPI/NNPIDeviceManager.cpp b/lib/Backends/NNPI/NNPIDeviceManager.cpp old mode 100755 new mode 100644 index 0802d09682..a3915184f9 --- a/lib/Backends/NNPI/NNPIDeviceManager.cpp +++ b/lib/Backends/NNPI/NNPIDeviceManager.cpp @@ -61,30 +61,15 @@ std::atomic NNPIDeviceManager::runIdentifier_; NNPIDeviceManager::NNPIDeviceManager( const DeviceConfig &config, - std::shared_ptr deviceOptions, NNPIAdapter adapter, - unsigned numInferenceWorkers) - : DeviceManager(config), numWorkersPerFunction_(numInferenceWorkers), - deviceId_(config_.deviceID), adapter_(adapter), + std::shared_ptr deviceOptions, NNPIAdapter adapter) + : DeviceManager(config), deviceId_(config_.deviceID), adapter_(adapter), device_(NNPI_INVALID_NNPIHANDLE), deviceOptions_(deviceOptions) { - if (deviceOptions_->showVars) { LOG(INFO) << deviceOptions_->dumpStatus(); } if (deviceOptions_->deviceId >= 0) { deviceId_ = static_cast(deviceOptions_->deviceId); } - - if (!numWorkersPerFunction_) { - numWorkersPerFunction_ = 2; - } - - if (deviceOptions_->numWorkers > 0) { - numWorkersPerFunction_ = deviceOptions_->numWorkers; - } - - // Ice-ref not re-entrant for the same nnpiNetwork. - numWorkersPerFunction_ = - deviceOptions_->inferOnDevice ? numWorkersPerFunction_ : 1; } NNPIDeviceManager::~NNPIDeviceManager() { @@ -202,8 +187,8 @@ void NNPIDeviceManager::addNetwork(const Module *module, functions_.emplace(func.first, func.second); usedMemoryBytes_ += functionCost_; // TODO:: static moduleSize. auto err = inferenceEnvs_[func.first].init( - numWorkersPerFunction_, adapter_, device_, deviceTracing_, func.second, - &staticPlaceholders_, deviceOptions_, func.first, deviceId_); + adapter_, device_, deviceTracing_, func.second, &staticPlaceholders_, + deviceOptions_, func.first, deviceId_); if (err) { functions_.erase(func.first); lock.unlock(); diff --git a/lib/Backends/NNPI/NNPIDeviceManager.h b/lib/Backends/NNPI/NNPIDeviceManager.h index 7aee8c6d6b..2ad2a717eb 100644 --- a/lib/Backends/NNPI/NNPIDeviceManager.h +++ b/lib/Backends/NNPI/NNPIDeviceManager.h @@ -53,8 +53,6 @@ class NNPIDeviceManager : public DeviceManager { uint64_t usedMemoryBytes_{0}; /// Static memory cost of the InterpreterFunction. const uint64_t functionCost_{1}; - /// Number of worker threads allocated per loaded function. - unsigned numWorkersPerFunction_; /// Inference id counter. static std::atomic runIdentifier_; @@ -82,8 +80,7 @@ class NNPIDeviceManager : public DeviceManager { public: explicit NNPIDeviceManager(const DeviceConfig &config, std::shared_ptr deviceOptions, - NNPIAdapter adapter, - unsigned numInferenceWorkers = 0); + NNPIAdapter adapter); virtual ~NNPIDeviceManager(); Error init() override; diff --git a/lib/Backends/NNPI/NNPIMLTraceWrapper.cpp b/lib/Backends/NNPI/NNPIMLTraceWrapper.cpp old mode 100755 new mode 100644 diff --git a/lib/Backends/NNPI/NNPIMLTraceWrapper.h b/lib/Backends/NNPI/NNPIMLTraceWrapper.h old mode 100755 new mode 100644 diff --git a/lib/Backends/NNPI/NNPIOptions.h b/lib/Backends/NNPI/NNPIOptions.h index a9c72c6d7f..d9c30ddaba 100644 --- a/lib/Backends/NNPI/NNPIOptions.h +++ b/lib/Backends/NNPI/NNPIOptions.h @@ -251,6 +251,12 @@ class NNPICompilationOptions : public NNPIOptions { DECLARE_NNPI_OPTION(disableConstFolding, bool, "DisableConstFolding", "Disable constant folding during compilation.", "NNPI_DISABLE_CONSTFOLD", "0"); + /// Setting this variable will override the amount of worker threads allocated + /// for the network on the device (default:2). + DECLARE_NNPI_OPTION(numWorkers, int, "NumOfWorkers", + "Override the amount of worker threads allocated for the " + "network on the device.", + "NNPI_NUM_WORKERS", "2"); NNPICompilationOptions(const BackendSpecificOptions ¶meters) { INIT_NNPI_OPTIONS(useIceT, parameters); @@ -266,6 +272,7 @@ class NNPICompilationOptions : public NNPIOptions { INIT_NNPI_OPTIONS(debugCompileConfigFile, parameters); INIT_NNPI_OPTIONS(reserveResources, parameters); INIT_NNPI_OPTIONS(disableConstFolding, parameters); + INIT_NNPI_OPTIONS(numWorkers, parameters); setLogLevel(this->compilationLogLevel); } @@ -308,12 +315,6 @@ class NNPIDeviceOptions : public NNPIOptions { DECLARE_NNPI_OPTION(deviceId, int, "DeviceID", "Override the target device ID used to run (0,1,...).", "NNPI_DEVICE_ID", "-1"); - /// Setting this variable will override the amount of worker threads allocated - /// per network on the device (default:2). - DECLARE_NNPI_OPTION(numWorkers, int, "NumOfWorkers", - "Override the amount of worker threads allocated per " - "network on the device.", - "NNPI_NUM_WORKERS", "-1"); /// Setting this variable will enabled device tracing (host2device, /// device2host copy infer etc.). DECLARE_NNPI_OPTION( @@ -361,7 +362,6 @@ class NNPIDeviceOptions : public NNPIOptions { INIT_NNPI_OPTIONS(inferOnDevice, parameters); INIT_NNPI_OPTIONS(showVars, parameters); INIT_NNPI_OPTIONS(deviceId, parameters); - INIT_NNPI_OPTIONS(numWorkers, parameters); INIT_NNPI_OPTIONS(enabledDeviceTracing, parameters); INIT_NNPI_OPTIONS(deviceMemory, parameters); INIT_NNPI_OPTIONS(enabledCommandLists, parameters); diff --git a/lib/Backends/NNPI/NNPIResource.cpp b/lib/Backends/NNPI/NNPIResource.cpp index 85f31a91c0..1080eb8f32 100644 --- a/lib/Backends/NNPI/NNPIResource.cpp +++ b/lib/Backends/NNPI/NNPIResource.cpp @@ -156,6 +156,13 @@ bool NNPIResource::init(const NNPIObjectName name, deviceOptions_ = deviceOptions; usage_ = usage; desc_ = *desc; + + ResourceUsers *users = nullptr; + if (phUsage) { + users = &(phUsage->at(name_)); + LOG_AND_RETURN_IF_NOT(ERROR, users, "Invalid resource users", false); + } + if (!deviceOptions_->inferOnDevice) { // Handle stuff for ice ref (or compile only path). size_t resourceSize = CalcDescSize(&desc_); @@ -170,7 +177,7 @@ bool NNPIResource::init(const NNPIObjectName name, return true; } - if (deviceOptions_->disableDRT) { + if (deviceOptions_->disableDRT || (users && users->disableDRT)) { switch (usage_) { case ResourceUsage::DRTInput: usage_ = ResourceUsage::InputResource; @@ -181,7 +188,7 @@ bool NNPIResource::init(const NNPIObjectName name, default:; // Do nothing. } } - if (deviceOptions_->disableP2P) { + if (deviceOptions_->disableP2P || (users && users->disableP2P)) { switch (usage_) { case ResourceUsage::P2PInput: usage_ = ResourceUsage::InputResource; @@ -230,11 +237,8 @@ bool NNPIResource::init(const NNPIObjectName name, // Create device resource. bool allocateDeviceResource = false; - ResourceUsers *users = nullptr; shared_ptr sharedResource = nullptr; if (phUsage) { - users = &(phUsage->at(name_)); - LOG_AND_RETURN_IF_NOT(ERROR, users, "Invalid resource users", false); sharedResource = findResourceForDevice(*users, device_); } diff --git a/lib/Backends/NNPI/NNPIResource.h b/lib/Backends/NNPI/NNPIResource.h index 2f1d207721..6f46dd93a0 100644 --- a/lib/Backends/NNPI/NNPIResource.h +++ b/lib/Backends/NNPI/NNPIResource.h @@ -33,6 +33,8 @@ struct ResourceUsers { std::vector> writers; std::vector> readers; std::unordered_set devices; + bool disableP2P = false; + bool disableDRT = false; }; using PlaceholderUsageMap = std::unordered_map; diff --git a/lib/Backends/NNPI/NNPITracing.cpp b/lib/Backends/NNPI/NNPITracing.cpp old mode 100755 new mode 100644 diff --git a/lib/Backends/NNPI/NNPITracing.h b/lib/Backends/NNPI/NNPITracing.h old mode 100755 new mode 100644 diff --git a/lib/Backends/NNPI/tests/NNPIHostManagerTest.cpp b/lib/Backends/NNPI/tests/NNPIHostManagerTest.cpp index 4f7a161e71..1a0ad09cee 100644 --- a/lib/Backends/NNPI/tests/NNPIHostManagerTest.cpp +++ b/lib/Backends/NNPI/tests/NNPIHostManagerTest.cpp @@ -23,20 +23,7 @@ std::set glow::backendTestBlacklist = {}; struct BlacklistInitializer { BlacklistInitializer() { const std::vector> testBlacklistedSetups = - {{"testStaticAssignmentP2PandDRTConcurrent/0", - TestBlacklist::AnyDeviceAnyEngine}, - {"testStaticAssignmentP2PandDRT/0", TestBlacklist::AnyDeviceAnyEngine}, - {"testSaturateHost/0", TestBlacklist::AnyDeviceAnyEngine}, - {"testStaticAssignmentP2POnly/0", TestBlacklist::AnyDeviceAnyEngine}, - {"testStaticAssignmentDeviceResidentTensorOnly/0", - TestBlacklist::AnyDeviceAnyEngine}, - {"testStaticAssignment/0", TestBlacklist::AnyDeviceHWEngine}, - {"ConcurrentAddRemoveUnique/0", TestBlacklist::AnyDeviceAnyEngine}, - {"ConcurrentAddRemoveUnique/0", TestBlacklist::AnyDeviceAnyEngine}, - {"testPartitionConfigReplication/0", - TestBlacklist::AnyDeviceAnyEngine}, - {"testSinglePartitionReplication/0", - TestBlacklist::AnyDeviceAnyEngine}}; + {{"ConcurrentAddRemoveUnique/0", TestBlacklist::AnyDeviceAnyEngine}}; TestBlacklist::prepareBlacklist(testBlacklistedSetups, backendTestBlacklist); } diff --git a/lib/Backends/NNPI/tests/NNPIOperatorTest.cpp b/lib/Backends/NNPI/tests/NNPIOperatorTest.cpp index 4aab117b03..94d6a81fbd 100644 --- a/lib/Backends/NNPI/tests/NNPIOperatorTest.cpp +++ b/lib/Backends/NNPI/tests/NNPIOperatorTest.cpp @@ -231,9 +231,6 @@ struct BlacklistInitializer { {"FusedRowwiseQuantizedSparseLengthsWeightedSum_ConvertedFloat16_" "NoFusedConvert_FP32Accum/0", TestBlacklist::AnyDeviceHWEngine}, - {"FusedRowwiseQuantizedSparseLengthsSum_Fused4Bit_Float16_" - "AccumFloat16/0", - TestBlacklist::AnyDeviceHWEngine}, {"to_back2/0", TestBlacklist::AnyDeviceHWEngine}, {"GroupDilatedConvolution/0", TestBlacklist::AnyDeviceHWEngine}, {"less_int32Cases/0", TestBlacklist::AnyDeviceHWEngine}, diff --git a/lib/Backends/NNPI/tests/NNPIParameterSweepTest.cpp b/lib/Backends/NNPI/tests/NNPIParameterSweepTest.cpp index c22fb28491..b45a547924 100644 --- a/lib/Backends/NNPI/tests/NNPIParameterSweepTest.cpp +++ b/lib/Backends/NNPI/tests/NNPIParameterSweepTest.cpp @@ -40,156 +40,6 @@ struct BlacklistInitializer { {"BatchMatMulTest_Int8/91", TestBlacklist::AnyDeviceAnyEngine}, {"BatchMatMulTest_Int8/95", TestBlacklist::AnyDeviceAnyEngine}, {"FCTest_Int8/139", TestBlacklist::AnyDeviceAnyEngine}, - {"BatchMatMulTest_Float16/2", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/3", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/6", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/7", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/10", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/11", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/14", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/15", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/18", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/19", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/21", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/22", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/23", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/25", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/26", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/27", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/30", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/31", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/33", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/34", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/35", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/37", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/38", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/39", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/41", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/42", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/43", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/45", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/46", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/47", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/49", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/50", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/51", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/54", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/55", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/58", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/59", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/61", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/62", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/63", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/65", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/66", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/67", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/69", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/70", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/71", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/73", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/74", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/75", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/78", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/79", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/81", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/82", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/83", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/85", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/86", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/87", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/89", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/90", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/91", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/93", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/94", TestBlacklist::AnyDeviceHWEngine}, - {"BatchMatMulTest_Float16/95", TestBlacklist::AnyDeviceHWEngine}, - {"ConvTest_Float16/3", TestBlacklist::AnyDeviceHWEngine}, - {"ConvTest_Float16/7", TestBlacklist::AnyDeviceHWEngine}, - {"ConvTest_Float16/9", TestBlacklist::AnyDeviceHWEngine}, - {"ConvTest_Float16/11", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/14", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/17", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/18", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/19", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/21", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/22", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/23", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/24", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/26", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/27", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/28", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/29", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/30", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/31", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/32", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/33", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/34", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/47", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/48", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/49", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/51", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/52", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/53", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/54", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/55", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/56", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/57", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/58", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/59", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/60", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/61", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/62", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/63", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/64", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/65", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/66", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/67", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/68", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/69", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/83", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/84", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/86", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/87", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/88", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/89", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/90", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/91", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/92", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/93", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/94", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/95", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/96", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/97", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/98", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/99", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/100", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/101", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/102", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/103", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/104", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/116", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/117", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/118", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/119", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/121", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/122", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/123", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/124", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/125", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/126", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/127", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/128", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/129", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/130", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/131", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/132", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/133", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/134", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/135", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/136", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/137", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/138", TestBlacklist::AnyDeviceHWEngine}, - {"FCTest_Float16/139", TestBlacklist::AnyDeviceHWEngine}, }; for (int i = 0; i < 80; i++) { testBlacklistedSetups.push_back(