Skip to content
This repository was archived by the owner on Jul 1, 2025. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 4 additions & 127 deletions lib/Backends/NNPI/Importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1430,74 +1430,12 @@ class RQFCNodeImporter : public INNPINodeImporter {
auto *glowRowwiseFC = llvm::dyn_cast<RowwiseQuantizedFullyConnectedNode>(n);
LOG_AND_RETURN_IF_NOT(ERROR, glowRowwiseFC, "Bad node type",
NNPI_INVALID_PARAM);
LOG_AND_RETURN_IF_NOT(
ERROR, glowRowwiseFC->getInput().getType()->getOffset() == 0.f,
(std::string("Bad input offset value") +
std::to_string(glowRowwiseFC->getInput().getType()->getOffset())),
NNPI_INVALID_PARAM);
LOG_AND_RETURN_IF_NOT(
ERROR, glowRowwiseFC->getResult().getType()->getOffset() == 0.f,
(std::string("Bad result offset value") +
std::to_string(glowRowwiseFC->getResult().getType()->getOffset())),
NNPI_INVALID_PARAM);
LOG_AND_RETURN_IF_NOT(
ERROR,
!(glowRowwiseFC->getOffsets()) ||
importer.zeroes(nodeValueName(glowRowwiseFC->getOffsets()).c_str()),
"Bad offset value", NNPI_INVALID_PARAM);

// Add internal tensor for Symlowp input.
std::string symlowpInputName =
NNPIImporter::internalName_ +
nodeValueName(glowRowwiseFC->getInput()).c_str() + "_symlowp";
auto *inType = glowRowwiseFC->getInput().getType();
LOG_NNPI_IF_ERROR_RETURN_VALUE(
importer.addValue(symlowpInputName, inType,
/* alternativeLayout */ inType->dims().size() == 4,
/* input */ false, /* output */ false, {}, {},
/* forceSymlowp */ true),
"Failed to add value");

// Add internal tensor for Symlowp output.
std::string symlowpOutputName =
NNPIImporter::internalName_ +
nodeValueName(glowRowwiseFC->getResult()).c_str() + "_symlowp";
auto *outType = glowRowwiseFC->getResult().getType();
LOG_NNPI_IF_ERROR_RETURN_VALUE(
importer.addValue(symlowpOutputName, outType,
/* alternativeLayout */ outType->dims().size() == 4,
/* input */ false, /* output */ false, {}, {},
/* forceSymlowp */ true),
"Failed to add value");

// Add convert op from Gemmlowp input to Symlowp.
std::string convertInputName = NNPIImporter::internalName_ +
glowRowwiseFC->getName().begin() +
"_convert_input";
std::string convertInputInputName =
nodeValueName(glowRowwiseFC->getInput());
if (!importer.hasChannelWiseConverter(convertInputInputName)) {
LOG_NNPI_IF_ERROR_RETURN_VALUE(
nnpiNetworkAddConvertOp(
importer.getNetwork(), convertInputName.c_str(),
convertInputInputName.c_str(), symlowpInputName.c_str()),
"Failed to add layer");
importer.addChannelWiseConverter(convertInputInputName);
}

// Add convert op from Symlowp output to Gemmlowp.
std::string convertOutputName = NNPIImporter::internalName_ +
glowRowwiseFC->getName().begin() +
"_convert_output";
std::string convertOutputOutputName =
nodeValueName(glowRowwiseFC->getResult());
LOG_NNPI_IF_ERROR_RETURN_VALUE(
nnpiNetworkAddConvertOp(
importer.getNetwork(), convertOutputName.c_str(),
symlowpOutputName.c_str(), convertOutputOutputName.c_str()),
"Failed to add layer");
importer.addChannelWiseConverter(convertOutputOutputName);

// Create the weights with no offset tensor.
// Assert weights & biases have no offset or all zeroes.

Expand Down Expand Up @@ -1534,17 +1472,14 @@ class RQFCNodeImporter : public INNPINodeImporter {
nodeValueName(glowRowwiseFC->getInput()),
nodeValueName(glowRowwiseFC->getWeights()),
nodeValueName(glowRowwiseFC->getBias()),
symlowpInputName,
symlowpOutputName,
},
{
nodeValueName(glowRowwiseFC->getResult()),
symlowpInputName,
symlowpOutputName,
});
return nnpiNetworkAddFullyConnectedOp(
importer.getNetwork(), glowRowwiseFC->getName().begin(),
symlowpInputName.c_str(), symlowpOutputName.c_str(),
nodeValueName(glowRowwiseFC->getInput()).c_str(),
nodeValueName(glowRowwiseFC->getResult()).c_str(),
nodeValueName(glowRowwiseFC->getWeights()).c_str(),
glowRowwiseFC->getBias()
? nodeValueName(glowRowwiseFC->getBias()).c_str()
Expand All @@ -1560,7 +1495,6 @@ class ChannelwiseQuantizedConvolutionNodeImporter : public INNPINodeImporter {
llvm::dyn_cast<ChannelwiseQuantizedConvolutionNode>(n);
LOG_AND_RETURN_IF_NOT(ERROR, glowChannelwiseQuantizedConv, "Bad node type",
NNPI_INVALID_PARAM);

LOG_AND_RETURN_IF_NOT(
ERROR,
!(glowChannelwiseQuantizedConv->getOffsets()) ||
Expand Down Expand Up @@ -1597,60 +1531,6 @@ class ChannelwiseQuantizedConvolutionNodeImporter : public INNPINodeImporter {
glowChannelwiseQuantizedConv->getStrides()[1]};
uint32_t dilation[SPATIAL_DIMS2] = {1, 1}; // No dilation, default values

// Add internal tensor for Symlowp input.
std::string symlowpInputName =
NNPIImporter::internalName_ +
nodeValueName(glowChannelwiseQuantizedConv->getInput()).c_str() +
"_symlowp";
auto *inType = glowChannelwiseQuantizedConv->getInput().getType();
LOG_NNPI_IF_ERROR_RETURN_VALUE(
importer.addValue(symlowpInputName, inType,
/* alternativeLayout */ inType->dims().size() == 4,
/* input */ false, /* output */ false, {}, {},
/* forceSymlowp */ true),
"Failed to add value");

// Add internal tensor for Symlowp output.
std::string symlowpOutputName =
NNPIImporter::internalName_ +
nodeValueName(glowChannelwiseQuantizedConv->getResult()).c_str() +
"_symlowp";
auto *outType = glowChannelwiseQuantizedConv->getResult().getType();
LOG_NNPI_IF_ERROR_RETURN_VALUE(
importer.addValue(symlowpOutputName, outType,
/* alternativeLayout */ outType->dims().size() == 4,
/* input */ false, /* output */ false, {}, {},
/* forceSymlowp */ true),
"Failed to add value");

// Add convert op from Gemmlowp input to Symlowp.
std::string convertInputName =
NNPIImporter::internalName_ +
glowChannelwiseQuantizedConv->getName().begin() + "_convert_input";
std::string convertInputInputName =
nodeValueName(glowChannelwiseQuantizedConv->getInput());
if (!importer.hasChannelWiseConverter(convertInputInputName)) {
LOG_NNPI_IF_ERROR_RETURN_VALUE(
nnpiNetworkAddConvertOp(
importer.getNetwork(), convertInputName.c_str(),
convertInputInputName.c_str(), symlowpInputName.c_str()),
"Failed to add layer");
importer.addChannelWiseConverter(convertInputInputName);
}

// Add convert op from Symlowp output to Gemmlowp.
std::string convertOutputName =
NNPIImporter::internalName_ +
glowChannelwiseQuantizedConv->getName().begin() + "_convert_output";
std::string convertOutputOutputName =
nodeValueName(glowChannelwiseQuantizedConv->getResult());
LOG_NNPI_IF_ERROR_RETURN_VALUE(
nnpiNetworkAddConvertOp(
importer.getNetwork(), convertOutputName.c_str(),
symlowpOutputName.c_str(), convertOutputOutputName.c_str()),
"Failed to add layer");
importer.addChannelWiseConverter(convertOutputOutputName);

// Create the weights with no offset tensor.
// Assert weights & biases have no offset or all zeroes.

Expand Down Expand Up @@ -1694,18 +1574,15 @@ class ChannelwiseQuantizedConvolutionNodeImporter : public INNPINodeImporter {
nodeValueName(glowChannelwiseQuantizedConv->getInput()),
nodeValueName(glowChannelwiseQuantizedConv->getFilter()),
nodeValueName(glowChannelwiseQuantizedConv->getBias()),
symlowpInputName,
symlowpOutputName,
},
{
nodeValueName(glowChannelwiseQuantizedConv->getResult()),
symlowpInputName,
symlowpOutputName,
});

return nnpiNetworkAddConvolutionOp(
importer.getNetwork(), glowChannelwiseQuantizedConv->getName().begin(),
symlowpInputName.c_str(), symlowpOutputName.c_str(),
nodeValueName(glowChannelwiseQuantizedConv->getInput()).c_str(),
nodeValueName(glowChannelwiseQuantizedConv->getResult()).c_str(),
nodeValueName(glowChannelwiseQuantizedConv->getFilter()).c_str(),
glowChannelwiseQuantizedConv->getBias()
? nodeValueName(glowChannelwiseQuantizedConv->getBias()).c_str()
Expand Down
Empty file modified lib/Backends/NNPI/InferenceContext.cpp
100755 → 100644
Empty file.
21 changes: 12 additions & 9 deletions lib/Backends/NNPI/InferencePool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ namespace glow {
namespace runtime {

InferencePoolEnv::InferencePoolEnv()
: numWorkers_(0), deviceOptions_(nullptr), nnpiCompiledFunction_(nullptr),
: deviceOptions_(nullptr), nnpiCompiledFunction_(nullptr),
staticPlaceholderMap_(nullptr) {}

InferencePoolEnv::~InferencePoolEnv() {
Expand All @@ -41,8 +41,7 @@ InferencePoolEnv::~InferencePoolEnv() {
}
}

Error InferencePoolEnv::init(unsigned numWorkers, NNPIAdapter adapter,
NNPIDeviceContext device,
Error InferencePoolEnv::init(NNPIAdapter adapter, NNPIDeviceContext device,
std::shared_ptr<NNPIDeviceTracing> deviceTracing,
CompiledFunction *compiledFunction,
StaticPlaceholderMap *staticPlaceholderMap,
Expand All @@ -57,20 +56,24 @@ Error InferencePoolEnv::init(unsigned numWorkers, NNPIAdapter adapter,
if (workersPool_) {
return MAKE_ERR("InferencePool already initialized!");
}
numWorkers_ = numWorkers;

nnpiCompiledFunction_ = static_cast<NNPICompiledFunction *>(compiledFunction);
size_t optionsNumWorkers =
nnpiCompiledFunction_->getCompilationOptions().numWorkers;
// Ice-ref not re-entrant for the same nnpiNetwork.
size_t numWorkers = deviceOptions_->inferOnDevice ? optionsNumWorkers : 1;
workersPool_ = glow::make_unique<folly::CPUThreadPoolExecutor>(
numWorkers_, std::make_shared<folly::NamedThreadFactory>("NNPI-worker"));
numWorkers, std::make_shared<folly::NamedThreadFactory>("NNPI-worker"));
deviceTracing_ = deviceTracing;
staticPlaceholderMap_ = staticPlaceholderMap;

inferenceContexts_.resize(numWorkers_);
freeContexts_.resize(numWorkers_);
if (inferenceContexts_.size() != numWorkers_) {
inferenceContexts_.resize(numWorkers);
freeContexts_.resize(numWorkers);
if (inferenceContexts_.size() != numWorkers) {
return MAKE_ERR("InferencePool failed to create inference contexts");
}

// Create host network.
nnpiCompiledFunction_ = static_cast<NNPICompiledFunction *>(compiledFunction);
NNPIHostNetwork hostNetwork(NNPI_INVALID_NNPIHANDLE);
if (deviceOptions_->inferOnDevice) {
// Create NNPI host network (load compiled binary).
Expand Down
3 changes: 1 addition & 2 deletions lib/Backends/NNPI/InferencePool.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ namespace glow {
namespace runtime {
class NNPIDeviceBindings;
class InferencePoolEnv {
unsigned numWorkers_;
std::unique_ptr<folly::CPUThreadPoolExecutor> workersPool_;
std::vector<InferenceContext> inferenceContexts_;
std::vector<InferenceContext *> freeContexts_;
Expand All @@ -53,7 +52,7 @@ class InferencePoolEnv {
public:
InferencePoolEnv();
~InferencePoolEnv();
Error init(unsigned numWorkers, NNPIAdapter adapter, NNPIDeviceContext device,
Error init(NNPIAdapter adapter, NNPIDeviceContext device,
std::shared_ptr<NNPIDeviceTracing> deviceTracing,
CompiledFunction *compiledFunction,
StaticPlaceholderMap *staticPlaceholderMap,
Expand Down
30 changes: 19 additions & 11 deletions lib/Backends/NNPI/NNPI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,8 @@ bool NNPIBackend::isOpSupported(const NodeInfo &NI) const {
}
return NI.allInputsAndOutputsHaveSameElemKind({ElemKind::Int8QTy},
{ConvolutionNode::BiasIdx}) &&
(NI.getInElemTy(ConvolutionNode::BiasIdx) == ElemKind::Int32QTy);
((NI.getInElemTy(ConvolutionNode::BiasIdx) == ElemKind::Int32QTy) ||
(NI.getInElemTy(ConvolutionNode::BiasIdx) == ElemKind::FloatTy));

case Kinded::Kind::Convolution3DNodeKind:
if (!NI.getInTy(Convolution3DNode::InputIdx)->isQuantizedType()) {
Expand All @@ -207,7 +208,9 @@ bool NNPIBackend::isOpSupported(const NodeInfo &NI) const {
}
return NI.allInputsAndOutputsHaveSameElemKind(
{ElemKind::Int8QTy}, {Convolution3DNode::BiasIdx}) &&
(NI.getInElemTy(Convolution3DNode::BiasIdx) == ElemKind::Int32QTy);
((NI.getInElemTy(Convolution3DNode::BiasIdx) ==
ElemKind::Int32QTy) ||
(NI.getInElemTy(ConvolutionNode::BiasIdx) == ElemKind::FloatTy));
case Kinded::Kind::QuantizeNodeKind:
return (NI.getInElemTy(QuantizeNode::InputIdx) == ElemKind::FloatTy ||
NI.getInElemTy(QuantizeNode::InputIdx) == ElemKind::Float16Ty) &&
Expand Down Expand Up @@ -238,13 +241,15 @@ bool NNPIBackend::isOpSupported(const NodeInfo &NI) const {
}

case Kinded::Kind::FullyConnectedNodeKind:
if (!NI.getInTy(ConvolutionNode::InputIdx)->isQuantizedType()) {
if (!NI.getInTy(FullyConnectedNode::InputIdx)->isQuantizedType()) {
return NI.allInputsAndOutputsHaveSameElemKind(
{ElemKind::FloatTy, ElemKind::Float16Ty});
}
return NI.allInputsAndOutputsHaveSameElemKind(
{ElemKind::Int8QTy}, {FullyConnectedNode::BiasIdx}) &&
(NI.getInElemTy(FullyConnectedNode::BiasIdx) == ElemKind::Int32QTy);
((NI.getInElemTy(FullyConnectedNode::BiasIdx) ==
ElemKind::Int32QTy) ||
(NI.getInElemTy(FullyConnectedNode::BiasIdx) == ElemKind::FloatTy));

case Kinded::Kind::MaxPoolNodeKind:
return NI.allInputsAndOutputsHaveSameElemKind(
Expand Down Expand Up @@ -309,8 +314,10 @@ bool NNPIBackend::isOpSupported(const NodeInfo &NI) const {
ElemKind::FloatTy) &&
(NI.getInElemTy(RowwiseQuantizedFullyConnectedNode::OffsetsIdx) ==
ElemKind::Int32ITy) &&
(NI.getInElemTy(RowwiseQuantizedFullyConnectedNode::BiasIdx) ==
ElemKind::Int32QTy) &&
((NI.getInElemTy(RowwiseQuantizedFullyConnectedNode::BiasIdx) ==
ElemKind::Int32QTy) ||
(NI.getInElemTy(RowwiseQuantizedFullyConnectedNode::BiasIdx) ==
ElemKind::FloatTy)) &&
(NI.getOutElemTy(RowwiseQuantizedFullyConnectedNode::ResultIdx) ==
ElemKind::Int8QTy);

Expand Down Expand Up @@ -370,7 +377,8 @@ bool NNPIBackend::isOpSupported(const NodeInfo &NI) const {
auto resultK =
NI.getOutElemTy(EmbeddingBagByteRowwiseOffsetsNode::ResultIdx);
return (dataK == ElemKind::UInt8FusedQTy ||
dataK == ElemKind::UInt8FusedFP16QTy) &&
dataK == ElemKind::UInt8FusedFP16QTy ||
dataK == ElemKind::UInt4FusedFP16QTy) &&
(resultK == ElemKind::FloatTy || resultK == ElemKind::Float16Ty) &&
(indicesK == ElemKind::Int64ITy) && (offsetsK == ElemKind::Int64ITy);
}
Expand Down Expand Up @@ -500,6 +508,7 @@ bool NNPIBackend::shouldLower(const Node *N) const {
case Kinded::Kind::AdaptiveAvgPoolNodeKind:
case Kinded::Kind::EmbeddingBagNodeKind:
case Kinded::Kind::EmbeddingBagByteRowwiseOffsetsNodeKind:
case Kinded::Kind::LayerNormalizationNodeKind:
return false;
case Kinded::Kind::FusedRowwiseQuantizedSparseLengthsSumNodeKind: {
const FusedRowwiseQuantizedSparseLengthsSumNode *SLSN =
Expand All @@ -510,7 +519,6 @@ bool NNPIBackend::shouldLower(const Node *N) const {
return true;
}
}
case Kinded::Kind::LayerNormalizationNodeKind:
case Kinded::Kind::SparseLengthsSumNodeKind:
// WA - lower until ICE-T implements it.
if (NNPIBackend::backendOptions_.useIceT ||
Expand Down Expand Up @@ -1133,8 +1141,6 @@ traversePostOrder(const runtime::DAGNode *root,
Error NNPIBackend::bindContexts(
llvm::ArrayRef<runtime::ContextBinding> bindings,
const runtime::DAGNode *root, bool enableP2P, bool enableDRT) {
LOG(INFO) << "enableP2P/DRT not yet implemented. enableDRT = " << enableDRT
<< ", enableP2P = " << enableP2P << ".\n";
if (backendOptions_.dumpRuntime) {
DotWriter::clear();
DotWriter::addSubGraph("Host", "Host");
Expand All @@ -1154,10 +1160,12 @@ Error NNPIBackend::bindContexts(
nnpiDM->addPlaceholderUsageCount(cb.networkName, phUsage);
}

for (const auto &usage : phUsage) {
for (auto &usage : phUsage) {
LOG_IF_NOT_RETURN_LLVMERROR(
usage.second.numWriters < 2,
"Multiple writes to the same placeholder not suported");
usage.second.disableP2P = !enableP2P;
usage.second.disableDRT = !enableDRT;
}

for (auto *dagNode : postOrder) {
Expand Down
26 changes: 16 additions & 10 deletions lib/Backends/NNPI/NNPICompiledFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,16 +362,16 @@ Error NNPICompiledFunction::compile(Function *F, const BackendOptions &opts) {
compilationFileName_.c_str(), NULL),
"Failed NNPI Compile");
}
}
if (compilationOptions_.inferOnDevice) {
DBG_MEM_USAGE("NNPICompiledFunction destroy network");
// NNPINetwork is not needed anymore on the inferfence api path.
// Once the complied stream is loaded, query on the network can be done
// using the host network instead.
LOG_NNPI_IF_ERROR(nnpiNetworkDestroy(network_),
"Failed NNPI Network Destroy");
network_ = NNPI_INVALID_NNPIHANDLE;
DBG_MEM_USAGE("NNPICompiledFunction destroy network done");
if (compilationOptions_.inferOnDevice) {
DBG_MEM_USAGE("NNPICompiledFunction destroy network");
// NNPINetwork is not needed anymore on the inferfence api path.
// Once the complied stream is loaded, query on the network can be done
// using the host network instead.
LOG_NNPI_IF_ERROR(nnpiNetworkDestroy(network_),
"Failed NNPI Network Destroy");
network_ = NNPI_INVALID_NNPIHANDLE;
DBG_MEM_USAGE("NNPICompiledFunction destroy network done");
}
}

// Determine and save what inputs can be treated as partial. Need to do this
Expand All @@ -390,6 +390,12 @@ Error NNPICompiledFunction::compile(Function *F, const BackendOptions &opts) {
return Error::success();
}

NNPICompiledFunction::NNPICompiledFunction(Function *F)
: CompiledFunction(runtime::RuntimeBundle::create(*F)),
compilationOptions_({}) {
std::memset(&config_, 0, sizeof(config_));
};

NNPICompiledFunction::~NNPICompiledFunction() {
if (network_ != NNPI_INVALID_NNPIHANDLE) {
LOG_NNPI_IF_ERROR(nnpiNetworkDestroy(network_),
Expand Down
Loading