Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 9 additions & 16 deletions include/onnxruntime/core/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -1454,12 +1454,16 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
return Resolve(default_options);
}

/// <summary>
/// This function converts all the graph TensorProto initializers into OrtValues
/// and creates a in-memory external data reference for each OrtValue.
/// </summary>
/// <returns></returns>
Status ConvertInitializersIntoOrtValues();

/**
* @brief Converts a subset of graph TensorProto initializers into OrtValues and updates the graph proto.
*
* This function converts specified TensorProto initializers in the graph into OrtValues and
* creates in-memory external data references for each OrtValue. It then updates the provided
* GraphProto with the modified initializers.
* @brief This function examines the specified initializers in the graph and converts them inline
* if any has external data in memory.
*
* @param iterators Span of iterators pointing to the initializers and the order that should be processed
* @param output_graph_proto The GraphProto to be updated with the modified initializers
Expand Down Expand Up @@ -1633,17 +1637,6 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
/// <returns>Status indicating success or failure</returns>
Status ProcessSubgraphsInMemoryData(ONNX_NAMESPACE::GraphProto& output_graph_proto) const;

/// <summary>
/// This function replaces all of the initializers within output_graph_proto
/// from this Graph instance. All in memory initializers are regenerated and inlined.
/// This is necessary even if the graph_proto_ is already up to date because initializers() may
/// contain obsolete initializers that are no longer in use due to optimizations and contain obsolete
/// references to OrtValues that may no longer be around (since we like appending rather than replacing).
/// </summary>
/// <param name="output_graph_proto">Destination GraphProto to receive the updated initializers.</param>
/// <returns>Status indicating success or failure.</returns>
Status RegenerateInitializersAndReplaceInMemory(ONNX_NAMESPACE::GraphProto& output_graph_proto) const;

/// <summary>
/// This function traverses the graph bottom up and externalizes
/// constant initializers along with their pre-packed blobs from different
Expand Down
76 changes: 33 additions & 43 deletions onnxruntime/core/graph/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1231,28 +1231,6 @@ Graph::Graph(const Model& owning_model,
ArgNameToTypeMap name_to_type_map;
const auto& model_path = ModelPath();

// If the tensor proto data is large enough, move data from TensorProto to an OrtValue
// - Add external data reference to TensorProto that points to an OrtValue.
// This lambda should not be used on initializers that already have external data reference.
// Otherwise, this function does nothing.
auto put_large_tensor_in_ort_value = [this, &model_path](ONNX_NAMESPACE::TensorProto& tensor_proto) {
size_t size_in_bytes = 0;
ORT_THROW_IF_ERROR(utils::GetSizeInBytesFromTensorProto<0>(tensor_proto, &size_in_bytes));
if (size_in_bytes > utils::kSmallTensorExternalDataThreshold) {
OrtValue ort_value;
ORT_THROW_IF_ERROR(utils::TensorProtoToOrtValue(Env::Default(), model_path, tensor_proto,
CPUAllocator::DefaultInstance(), ort_value));
constexpr const bool use_tensor_buffer_true = true;
auto tensor_proto_to_add = utils::TensorToTensorProto(ort_value.Get<Tensor>(), tensor_proto.name(),
use_tensor_buffer_true);
assert(ort_value.IsAllocated());
auto ins_result = ortvalue_initializers_.insert_or_assign(tensor_proto_to_add.name(), std::move(ort_value));
ORT_ENFORCE(ins_result.second, "Unexpected duplicate insert or assign OrtValue for tensor: ", tensor_proto_to_add.name(),
" in the initializer list.");
tensor_proto = std::move(tensor_proto_to_add);
}
};

// Process 'Constant' nodes
// Put the 'TensorProto' stored in the 'Constant' nodes attribute into the graphs initializer list
for (auto& node : graph_proto_->node()) {
Expand All @@ -1272,8 +1250,6 @@ Graph::Graph(const Model& owning_model,
}
}

put_large_tensor_in_ort_value(*tensor);

// Ensure initializers are also graph inputs.
if (ir_version_ < 4) {
TypeProto t{utils::TypeProtoFromTensorProto(*tensor)};
Expand Down Expand Up @@ -1350,25 +1326,7 @@ Graph::Graph(const Model& owning_model,
}

// Copy initial tensors to a map.
for (int i = 0, lim = graph_proto_->initializer_size(); i < lim; ++i) {
auto& tensor = *graph_proto_->mutable_initializer(i);
// If data is on disk, it will be loaded either by optimizers
// or during session state finalization.
// If data is already in memory, do nothing.
if (!utils::HasExternalData(tensor)) {
// sparse_tensor_names_ contain references to strings to save memory
// in case we replace the tensor_proto, we want to make sure we remove
// the old reference first, and then add a new one.
const bool is_sparse = sparse_tensor_names_.count(tensor.name());
if (is_sparse) {
sparse_tensor_names_.erase(tensor.name());
}
put_large_tensor_in_ort_value(tensor);
if (is_sparse) {
sparse_tensor_names_.emplace(tensor.name());
}
}

for (auto& tensor : graph_proto_->initializer()) {
auto p = name_to_initial_tensor_.emplace(tensor.name(), &tensor);
if (!p.second) {
LOGS(logger_, WARNING) << "Duplicate initializer (dense, sparse or ConstantNode): '" << tensor.name()
Expand Down Expand Up @@ -3457,6 +3415,38 @@ Status Graph::Resolve(const ResolveOptions& options) {
return ForThisAndAllSubgraphs(all_subgraphs, finalize_func);
}

Status Graph::ConvertInitializersIntoOrtValues() {
std::vector<Graph*> all_subgraphs;
FindAllSubgraphs(all_subgraphs);

auto put_weights_maybe_in_memory_func = [&](Graph& graph) -> Status {
// if we have any initializers that are not in memory, put them there.
const auto& model_path = graph.ModelPath();
auto& graph_proto = *graph.graph_proto_;
for (int i = 0, lim = graph_proto.initializer_size(); i < lim; ++i) {
auto& tensor_proto = *graph_proto.mutable_initializer(i);
if (utils::HasExternalData(tensor_proto)) {
continue; // ignore data on disk, that will be loaded either by EP or at session_state finalize
}

size_t size_in_bytes = 0;
ORT_RETURN_IF_ERROR(utils::GetSizeInBytesFromTensorProto<0>(tensor_proto, &size_in_bytes));
if (size_in_bytes > utils::kSmallTensorExternalDataThreshold) {
OrtValue ort_value;
ORT_RETURN_IF_ERROR(utils::TensorProtoToOrtValue(Env::Default(), model_path, tensor_proto,
CPUAllocator::DefaultInstance(), ort_value));
constexpr const bool use_tensor_buffer_true = true;
auto tensor_proto_to_add = utils::TensorToTensorProto(ort_value.Get<Tensor>(), tensor_proto.name(),
use_tensor_buffer_true);
ORT_RETURN_IF_ERROR(graph.ReplaceInitializedTensor(tensor_proto_to_add, ort_value));
}
}
return Status::OK();
};

return ForThisAndAllSubgraphs(all_subgraphs, put_weights_maybe_in_memory_func);
}

void Graph::SetName(const std::string& name) {
graph_proto_->set_name(name);
}
Expand Down
81 changes: 36 additions & 45 deletions onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1654,11 +1654,8 @@ SubGraphCollection_t NvExecutionProvider::GetSupportedList(SubGraphCollection_t
SetAllGraphInputs(graph_build);
}

auto status = graph_build.Resolve();
if (!status.IsOK()) {
LOGS_DEFAULT(ERROR) << status.ErrorMessage();
ORT_THROW_IF_ERROR(ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "ONNX graph resolve failed: " + status.ErrorMessage()));
}
ORT_THROW_IF_ERROR(graph_build.Resolve());

// Add parent graph output to the subgraph
int i = 0;
std::vector<const NodeArg*> subgraph_outputs;
Expand Down Expand Up @@ -1705,41 +1702,38 @@ SubGraphCollection_t NvExecutionProvider::GetSupportedList(SubGraphCollection_t
auto model = graph_viewer->CreateModel(*GetLogger());
auto model_proto = model->ToProto();

// ORT's default topological sort is using reversed DFS.
// When creating model proto from graph viewer, let ORT use priority-based topological sort based on node index.
// The reason is, in some cases, for example ResNet50, using default topological sort will end up with generating
// the model proto that has different node ordering compared to original onnx model.

// save user provided external data in memory instead of writing to ModelProto
// needed for models > 2GB
std::vector<TensorrtUserWeights> userWeights;
if (use_external_data_initializer_) {
auto c_api = Ort::GetApi();
const InitializedTensorSet& allInitializers = graph_viewer->GetAllInitializedTensors();
const auto& allInitializers = graph_viewer->GetAllInitializedTensors();
userWeights.reserve(allInitializers.size());
for (auto& entry : allInitializers) {
OrtValue initializer_value;
auto* tp = entry.second;
for (const auto& [name, tp] : allInitializers) {
if (utils::HasRawData(*tp)) {
userWeights.emplace_back(TensorrtUserWeights(tp->name(), tp->raw_data().data(), tp->raw_data().size()));
} else if (graph_viewer->GetOrtValueInitializer(tp->name(), initializer_value)) {
// the initializer was marked as external data by the ORT graph at load time since it was provided in memory
size_t size = 0;
const void* ptr = nullptr;
Ort::ThrowOnError(c_api.GetTensorSizeInBytes(&initializer_value, &size));
Ort::ThrowOnError(c_api.GetTensorData(&initializer_value, &ptr));
userWeights.emplace_back(tp->name(), ptr, size);
// Keep inits in memory instead of writing to ModelProto.
userWeights.emplace_back(name, tp->raw_data().data(), tp->raw_data().size());
} else if (utils::HasExternalDataInMemory(*tp)) {
// only copy and take ownership of the data if none of the above conditions are met
std::unique_ptr<ONNX_NAMESPACE::TensorProto> full_init;
ORT_THROW_IF_ERROR(utils::GetTensorProtoWithDataIfInMemory(*tp, full_init));
userWeights.emplace_back(std::move(full_init->name()), std::move(full_init->raw_data()));
// the initializer was marked as external data by the ORT graph at load time since it was provided in memory
if (OrtValue v; graph_viewer->GetOrtValueInitializer(name, v)) {
Ort::ConstValue initializer_value{&v};
const size_t size = initializer_value.GetTensorSizeInBytes();
const void* ptr = initializer_value.GetTensorRawData();
userWeights.emplace_back(name, ptr, size);
} else {
// only copy and take ownership of the data if none of the above conditions are met
std::unique_ptr<ONNX_NAMESPACE::TensorProto> full_init;
ORT_THROW_IF_ERROR(utils::GetTensorProtoWithDataIfInMemory(*tp, full_init));
userWeights.emplace_back(name, full_init->raw_data());
}
}
}
}
Copy link
Contributor

@chilo-ms chilo-ms Dec 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lacking "}" for the above code block if (use_external_data_initializer_) {}
It makes NV TRT RTX EP failed to be built.


// ORT's default topological sort is using reversed DFS.
// When creating model proto from graph viewer, let ORT use priority-based topological sort based on node index.
// The reason is, in some cases, for example ResNet50, using default topological sort will end up with generating
// the model proto that has different node ordering compared to original onnx model.
graph_viewer->ToProto(*model_proto->mutable_graph(), true, true, 1 /*priority-based topological sort*/, !use_external_data_initializer_ /*include raw initializers*/);

model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);

std::string string_buf;
Expand Down Expand Up @@ -2567,30 +2561,27 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr
// exclude weights if external
std::vector<TensorrtUserWeights> userWeights;
if (use_external_data_initializer_) {
auto c_api = Ort::GetApi();
const InitializedTensorSet& allInitializers = graph_body_viewer.GetAllInitializedTensors();
userWeights.reserve(allInitializers.size());
for (auto& entry : allInitializers) {
OrtValue initializer_value;
auto* tp = entry.second;
for (const auto& [name, tp] : allInitializers) {
if (utils::HasRawData(*tp)) {
userWeights.emplace_back(TensorrtUserWeights(tp->name(), tp->raw_data().data(), tp->raw_data().size()));
} else if (graph_body_viewer.GetOrtValueInitializer(tp->name(), initializer_value)) {
// the initializer was marked as external data by the ORT graph at load time since it was provided in memory
size_t size = 0;
const void* ptr = nullptr;
Ort::ThrowOnError(c_api.GetTensorSizeInBytes(&initializer_value, &size));
Ort::ThrowOnError(c_api.GetTensorData(&initializer_value, &ptr));
userWeights.emplace_back(tp->name(), ptr, size);
userWeights.emplace_back(name, tp->raw_data().data(), tp->raw_data().size());
} else if (utils::HasExternalDataInMemory(*tp)) {
// only copy and take ownership of the data if none of the above conditions are met
std::unique_ptr<ONNX_NAMESPACE::TensorProto> full_init;
ORT_THROW_IF_ERROR(utils::GetTensorProtoWithDataIfInMemory(*tp, full_init));
userWeights.emplace_back(TensorrtUserWeights(std::move(full_init->name()), std::move(full_init->raw_data())));
// the initializer was marked as external data by the ORT graph at load time since it was provided in memory
if (OrtValue v; graph_body_viewer.GetOrtValueInitializer(name, v)) {
Ort::ConstValue initializer_value{&v};
const size_t size = initializer_value.GetTensorSizeInBytes();
const void* ptr = initializer_value.GetTensorRawData();
userWeights.emplace_back(name, ptr, size);
} else {
// only copy and take ownership of the data if none of the above conditions are met
std::unique_ptr<ONNX_NAMESPACE::TensorProto> full_init;
ORT_THROW_IF_ERROR(utils::GetTensorProtoWithDataIfInMemory(*tp, full_init));
userWeights.emplace_back(name, full_init->raw_data());
}
}
}
}

// ORT's default topological sort is using reversed DFS.
// When creating model proto from graph viewer, let ORT use priority-based topological sort based on node index.
// The reason is, in some cases, for example ResNet50, using default topological sort will end up with generating
Expand Down
28 changes: 24 additions & 4 deletions onnxruntime/core/providers/shared_library/provider_interfaces.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,27 @@ struct IteratorHolder {
bool operator!=(const IteratorHolder& p) const { return p_->operator!=(*p.p_); }

void operator++() { p_->operator++(); }
const TResult& operator*() { return p_->operator*(); }
TResult& operator*() { return p_->operator*(); }
T* operator->() { return p_.get(); }

private:
std::unique_ptr<T> p_;
};

struct TensorProto_ConstIterator {
virtual ~TensorProto_ConstIterator() = default;
virtual bool operator!=(const TensorProto_ConstIterator& p) const = 0;
virtual void operator++() = 0;
virtual const ONNX_NAMESPACE::TensorProto& operator*() const = 0;
};

struct TensorProto_Iterator {
virtual ~TensorProto_Iterator() = default;
virtual bool operator!=(const TensorProto_Iterator& p) const = 0;
virtual void operator++() = 0;
virtual ONNX_NAMESPACE::TensorProto& operator*() const = 0;
};

struct NodeAttributes_Iterator {
virtual ~NodeAttributes_Iterator() {}

Expand Down Expand Up @@ -439,7 +453,8 @@ struct ProviderHost {
// GraphProto
virtual std::unique_ptr<ONNX_NAMESPACE::GraphProto> GraphProto__construct() = 0;
virtual void GraphProto__operator_delete(ONNX_NAMESPACE::GraphProto* p) = 0;
virtual void GraphProto__operator_assign(ONNX_NAMESPACE::GraphProto* p, const ONNX_NAMESPACE::GraphProto& v) = 0;
virtual ONNX_NAMESPACE::GraphProto& GraphProto__operator_assign(ONNX_NAMESPACE::GraphProto* p, const ONNX_NAMESPACE::GraphProto& v) = 0;
virtual ONNX_NAMESPACE::GraphProto& GraphProto__operator_move_assign(ONNX_NAMESPACE::GraphProto* p, ONNX_NAMESPACE::GraphProto&& v) = 0;

virtual const ONNX_NAMESPACE::ValueInfoProto& GraphProto__input(const ONNX_NAMESPACE::GraphProto* p, int index) = 0;
virtual ONNX_NAMESPACE::ValueInfoProtos* GraphProto__mutable_input(ONNX_NAMESPACE::GraphProto* p) = 0;
Expand Down Expand Up @@ -492,7 +507,8 @@ struct ProviderHost {
// TensorProto
virtual std::unique_ptr<ONNX_NAMESPACE::TensorProto> TensorProto__construct() = 0;
virtual void TensorProto__operator_delete(ONNX_NAMESPACE::TensorProto* p) = 0;
virtual void TensorProto__operator_assign(ONNX_NAMESPACE::TensorProto* p, const ONNX_NAMESPACE::TensorProto& v) = 0;
virtual ONNX_NAMESPACE::TensorProto& TensorProto__operator_assign(ONNX_NAMESPACE::TensorProto* p, const ONNX_NAMESPACE::TensorProto& v) = 0;
virtual ONNX_NAMESPACE::TensorProto& TensorProto__operator_move_assign(ONNX_NAMESPACE::TensorProto* p, ONNX_NAMESPACE::TensorProto&& v) = 0;
virtual bool TensorProto__has_name(const ONNX_NAMESPACE::TensorProto* p) = 0;
virtual void TensorProto__set_name(ONNX_NAMESPACE::TensorProto* p, const ::std::string& name) = 0;
virtual const ::std::string& TensorProto__name(const ONNX_NAMESPACE::TensorProto* p) = 0;
Expand Down Expand Up @@ -521,8 +537,12 @@ struct ProviderHost {

// TensorProtos
virtual ONNX_NAMESPACE::TensorProto* TensorProtos__Add(ONNX_NAMESPACE::TensorProtos* p) = 0;
virtual int TensorProtos__size(ONNX_NAMESPACE::TensorProtos* p) = 0;
virtual int TensorProtos__size(const ONNX_NAMESPACE::TensorProtos* p) = 0;
virtual ONNX_NAMESPACE::TensorProto& TensorProtos__at(ONNX_NAMESPACE::TensorProtos* p, int index) = 0;
virtual std::unique_ptr<TensorProto_ConstIterator> TensorProtos__begin(const ONNX_NAMESPACE::TensorProtos* p) = 0;
virtual std::unique_ptr<TensorProto_ConstIterator> TensorProtos__end(const ONNX_NAMESPACE::TensorProtos* p) = 0;
virtual std::unique_ptr<TensorProto_Iterator> TensorProtos__begin(ONNX_NAMESPACE::TensorProtos* p) = 0;
virtual std::unique_ptr<TensorProto_Iterator> TensorProtos__end(ONNX_NAMESPACE::TensorProtos* p) = 0;

// TensorShapeProto_Dimension
virtual int TensorShapeProto_Dimension__value_case(const ONNX_NAMESPACE::TensorShapeProto_Dimension* p) = 0;
Expand Down
Loading
Loading