[AUTO] Load cached model to target device W/O CPU accelerating (#24211)

### Details: - enable AUTO to retrieve the hash id for cached model - load cached model to GPU only with AUTO if cached blob exists and not set `ov::intel_auto::enable_startup_fallback` explicitly to AUTO ### Tickets: - CVS-140059
openvinotoolkit · May 21, 2024 · 01de846 · 01de846
1 parent 415ba28
commit 01de846
Show file tree

Hide file tree

Showing 11 changed files with 168 additions and 77 deletions.
diff --git a/src/inference/src/compilation_context.hpp → .../openvino/runtime/compilation_context.hpp b/src/inference/src/compilation_context.hpp → .../openvino/runtime/compilation_context.hpp
@@ -11,13 +11,13 @@
 #include <string>
 
 #include "openvino/core/any.hpp"
+#include "openvino/runtime/icompiled_model.hpp"
+#include "openvino/runtime/tensor.hpp"
 
 namespace ov {
 
-class Tensor;
-class Model;
-
-struct ModelCache final {
+class OPENVINO_RUNTIME_API ModelCache {
+public:
     static std::string calculate_file_info(const std::string& filePath);
 
     static std::string compute_hash(const std::shared_ptr<const ov::Model>& model, const ov::AnyMap& compileOptions);
@@ -37,15 +37,15 @@ class CompiledBlobHeader final {
     CompiledBlobHeader();
     CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo, const std::string& runtimeInfo);
 
-    const std::string& getIeVersion() const {
+    const std::string& get_openvino_version() const {
         return m_ieVersion;
     }
 
-    const std::string& getFileInfo() const {
+    const std::string& get_file_info() const {
         return m_fileInfo;
     }
 
-    const std::string& getRuntimeInfo() const {
+    const std::string& get_runtime_info() const {
         return m_runtimeInfo;
     }
 

diff --git a/src/inference/dev_api/openvino/runtime/icore.hpp b/src/inference/dev_api/openvino/runtime/icore.hpp
@@ -54,6 +54,8 @@ class OPENVINO_RUNTIME_API ICore {
      */
     virtual std::shared_ptr<ov::Model> read_model(const std::string& model_path, const std::string& bin_path) const = 0;
 
+    virtual ov::AnyMap create_compile_config(const std::string& device_name, const ov::AnyMap& origConfig) const = 0;
+
     /**
      * @brief Creates a compiled mdel from a model object.
      *

diff --git a/src/inference/src/compilation_context.cpp → ...inference/src/dev/compilation_context.cpp b/src/inference/src/compilation_context.cpp → ...inference/src/dev/compilation_context.cpp
@@ -2,8 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "compilation_context.hpp"
-
 #include <sys/stat.h>
 #include <sys/types.h>
 
@@ -14,6 +12,7 @@
 #include "itt.hpp"
 #include "openvino/core/parallel.hpp"
 #include "openvino/pass/manager.hpp"
+#include "openvino/runtime/compilation_context.hpp"
 #include "openvino/util/file_util.hpp"
 #include "openvino/util/xml_parse_utils.hpp"
 #include "transformations/hash.hpp"
@@ -25,22 +24,12 @@
 #endif
 
 namespace ov {
-
 template <typename T>
 static uint64_t hash_combine(uint64_t seed, const T& a) {
     // Hash combine formula from boost
     return seed ^ (std::hash<T>()(a) + 0x9e3779b9 + (seed << 6) + (seed >> 2));
 }
 
-template <typename T>
-static int32_t as_int32_t(T v) {
-    return static_cast<int32_t>(v);
-}
-
-}  // namespace ov
-
-namespace ov {
-
 std::string ModelCache::calculate_file_info(const std::string& filePath) {
     uint64_t seed = 0;
     auto absPath = filePath;
@@ -76,25 +65,25 @@ std::string ModelCache::compute_hash(const std::shared_ptr<const ov::Model>& mod
 
     // 2. Compute hash on serialized data and options
     for (const auto& kvp : compileOptions) {
-        seed = ov::hash_combine(seed, kvp.first + kvp.second.as<std::string>());
+        seed = hash_combine(seed, kvp.first + kvp.second.as<std::string>());
     }
 
     // 3. Add runtime information which may not be serialized
     for (const auto& op : model->get_ordered_ops()) {
         const auto& rt = op->get_rt_info();
         for (const auto& rtMapData : rt) {
-            seed = ov::hash_combine(seed, rtMapData.first);
+            seed = hash_combine(seed, rtMapData.first);
             std::stringstream strm;
             rtMapData.second.print(strm);
-            seed = ov::hash_combine(seed, strm.str());
+            seed = hash_combine(seed, strm.str());
         }
     }
 
     return std::to_string(seed);
 }
 
 std::string ModelCache::compute_hash(const std::string& modelName, const ov::AnyMap& compileOptions) {
-    OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ReadTime, "ModelCache::compute_hash - ModelName");
+    OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ReadTime, "ModelCache::compute_hash - Model");
     uint64_t seed = 0;
     try {
         seed = hash_combine(seed, ov::util::get_absolute_file_path(modelName));
@@ -111,7 +100,7 @@ std::string ModelCache::compute_hash(const std::string& modelName, const ov::Any
 std::string ModelCache::compute_hash(const std::string& modelStr,
                                      const ov::Tensor& tensor,
                                      const ov::AnyMap& compileOptions) {
-    OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ReadTime, "ModelCache::compute_hash - Model Memory");
+    OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ReadTime, "ModelCache::compute_hash - Model");
     uint64_t seed = 0;
     // model string
     seed = hash_combine(seed, modelStr);

diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
@@ -7,7 +7,6 @@
 #include <memory>
 
 #include "check_network_batchable.hpp"
-#include "compilation_context.hpp"
 #include "itt.hpp"
 #include "model_reader.hpp"
 #include "openvino/core/any.hpp"
@@ -18,6 +17,7 @@
 #include "openvino/core/version.hpp"
 #include "openvino/opsets/opset.hpp"
 #include "openvino/pass/manager.hpp"
+#include "openvino/runtime/compilation_context.hpp"
 #include "openvino/runtime/device_id_parser.hpp"
 #include "openvino/runtime/icompiled_model.hpp"
 #include "openvino/runtime/internal_properties.hpp"
@@ -1403,21 +1403,22 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
             try {
                 ov::CompiledBlobHeader header;
                 networkStream >> header;
-                if (header.getFileInfo() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) {
+                if (header.get_file_info() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) {
                     // Original file is changed, don't use cache
                     OPENVINO_THROW("Original model file is changed");
                 }
                 if (util::contains(plugin.get_property(ov::internal::supported_properties),
                                    ov::internal::compiled_model_runtime_properties_supported.name())) {
                     ov::AnyMap compiled_model_runtime_properties = {
-                        {ov::internal::compiled_model_runtime_properties.name(), std::string(header.getRuntimeInfo())}};
+                        {ov::internal::compiled_model_runtime_properties.name(),
+                         std::string(header.get_runtime_info())}};
                     auto res = plugin.get_property(ov::internal::compiled_model_runtime_properties_supported.name(),
                                                    compiled_model_runtime_properties);
                     if (!res.as<bool>()) {
                         OPENVINO_THROW("Original model runtime properties have been changed, not supported anymore!");
                     }
                 } else {
-                    if (header.getIeVersion() != ov::get_openvino_version().buildNumber) {
+                    if (header.get_openvino_version() != ov::get_openvino_version().buildNumber) {
                         // Build number mismatch, don't use this cache
                         OPENVINO_THROW("Version does not match");
                     }

diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp
@@ -165,6 +165,9 @@ class CoreImpl : public ov::ICore, public std::enable_shared_from_this<ov::ICore
     bool device_supports_cache_dir(const ov::Plugin& plugin) const;
 
     ov::AnyMap create_compile_config(const ov::Plugin& plugin, const ov::AnyMap& origConfig) const;
+    ov::AnyMap create_compile_config(const std::string& device_name, const ov::AnyMap& origConfig) const override {
+        return create_compile_config(get_plugin(device_name), origConfig);
+    }
 
     bool is_hidden_device(const std::string& device_name) const;
     void register_plugin_in_registry_unsafe(const std::string& device_name, PluginDescriptor& desc);