diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt
index f07211de5..9a2ffd573 100644
--- a/.cspell-wordlist.txt
+++ b/.cspell-wordlist.txt
@@ -73,4 +73,10 @@ timesteps
 Timesteps
 denoises
 denoise
-denoising
\ No newline at end of file
+denoising
+threadpool
+chrono
+setpriority
+errno
+ifdef
+elif
diff --git a/apps/llm/app.json b/apps/llm/app.json
index 5eef49366..db61ee89d 100644
--- a/apps/llm/app.json
+++ b/apps/llm/app.json
@@ -58,7 +58,11 @@
         "foregroundImage": "./assets/icons/adaptive-icon.png",
         "backgroundColor": "#ffffff"
       },
-      "package": "com.anonymous.llm"
+      "package": "com.anonymous.llm",
+      "permissions": [
+        "android.permission.READ_CALENDAR",
+        "android.permission.WRITE_CALENDAR"
+      ]
     },
     "web": {
       "favicon": "./assets/icons/favicon.png"
diff --git a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp
index ed0d37f92..6d5e48902 100644
--- a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp
@@ -13,12 +13,8 @@
 #include <rnexecutorch/models/speech_to_text/SpeechToText.h>
 #include <rnexecutorch/models/style_transfer/StyleTransfer.h>
 #include <rnexecutorch/models/vertical_ocr/VerticalOCR.h>
-
-#if defined(__ANDROID__) && defined(__aarch64__)
-#include <executorch/extension/threadpool/cpuinfo_utils.h>
-#include <executorch/extension/threadpool/threadpool.h>
-#include <rnexecutorch/Log.h>
-#endif
+#include <rnexecutorch/threads/GlobalThreadPool.h>
+#include <rnexecutorch/threads/utils/ThreadUtils.h>
 
 namespace rnexecutorch {
 
@@ -97,21 +93,8 @@ void RnExecutorchInstaller::injectJSIBindings(
       RnExecutorchInstaller::loadModel<models::speech_to_text::SpeechToText>(
           jsiRuntime, jsCallInvoker, "loadSpeechToText"));
 
-#if defined(__ANDROID__) && defined(__aarch64__)
-  auto num_of_perf_cores =
-      ::executorch::extension::cpuinfo::get_num_performant_cores();
-  log(LOG_LEVEL::Info, "Detected ", num_of_perf_cores, " performant cores");
-  // setting num_of_cores to floor(num_of_perf_cores / 2) + 1) because depending
-  // on cpu arch as when possible we want to leave at least 2 performant cores
-  // for other tasks (setting more actually results in drop of performance). For
-  // older devices (i.e. samsung s22) resolves to 3 cores, and for newer ones
-  // (like OnePlus 12) resolves to 4, which when benchamrked gives highest
-  // throughput.
-  auto num_of_cores = static_cast<uint32_t>(num_of_perf_cores / 2) + 1;
-  ::executorch::extension::threadpool::get_threadpool()
-      ->_unsafe_reset_threadpool(num_of_cores);
-  log(LOG_LEVEL::Info, "Configuring xnnpack for ", num_of_cores, " threads");
-#endif
+  threads::utils::unsafeSetupThreadPool();
+  threads::GlobalThreadPool::initialize();
 }
 
 } // namespace rnexecutorch
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
index f512dce9d..97f769e3a 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
@@ -21,6 +21,7 @@
 #include <rnexecutorch/models/speech_to_text/SpeechToText.h>
 #include <rnexecutorch/models/text_to_image/TextToImage.h>
 #include <rnexecutorch/models/vertical_ocr/VerticalOCR.h>
+#include <rnexecutorch/threads/GlobalThreadPool.h>
 
 namespace rnexecutorch {
 
@@ -201,58 +202,60 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
             // We need to dispatch a thread if we want the function to be
             // asynchronous. In this thread all accesses to jsi::Runtime need to
             // be done via the callInvoker.
-            std::thread([this, promise,
-                         argsConverted = std::move(argsConverted)]() {
-              try {
-                if constexpr (std::is_void_v<decltype(std::apply(
-                                  std::bind_front(FnPtr, model),
-                                  argsConverted))>) {
-                  // For void functions, just call the function and resolve with
-                  // undefined
-                  std::apply(std::bind_front(FnPtr, model),
-                             std::move(argsConverted));
-                  callInvoker->invokeAsync([promise](jsi::Runtime &runtime) {
-                    promise->resolve(jsi::Value::undefined());
-                  });
-                } else {
-                  // For non-void functions, capture the result and convert it
-                  auto result = std::apply(std::bind_front(FnPtr, model),
-                                           std::move(argsConverted));
-                  // The result is copied. It should either be quickly copiable,
-                  // or passed with a shared_ptr.
-                  callInvoker->invokeAsync(
-                      [promise, result](jsi::Runtime &runtime) {
-                        promise->resolve(jsi_conversion::getJsiValue(
-                            std::move(result), runtime));
-                      });
-                }
-              } catch (const std::runtime_error &e) {
-                // This catch should be merged with the next two
-                // (std::runtime_error and jsi::JSError inherits from
-                // std::exception) HOWEVER react native has broken RTTI which
-                // breaks proper exception type checking. Remove when the
-                // following change is present in our version:
-                // https://github.com/facebook/react-native/commit/3132cc88dd46f95898a756456bebeeb6c248f20e
-                callInvoker->invokeAsync([e = std::move(e), promise]() {
-                  promise->reject(e.what());
+            threads::GlobalThreadPool::detach(
+                [this, promise, argsConverted = std::move(argsConverted)]() {
+                  try {
+                    if constexpr (std::is_void_v<decltype(std::apply(
+                                      std::bind_front(FnPtr, model),
+                                      argsConverted))>) {
+                      // For void functions, just call the function and resolve
+                      // with undefined
+                      std::apply(std::bind_front(FnPtr, model),
+                                 std::move(argsConverted));
+                      callInvoker->invokeAsync(
+                          [promise](jsi::Runtime &runtime) {
+                            promise->resolve(jsi::Value::undefined());
+                          });
+                    } else {
+                      // For non-void functions, capture the result and convert
+                      // it
+                      auto result = std::apply(std::bind_front(FnPtr, model),
+                                               std::move(argsConverted));
+                      // The result is copied. It should either be quickly
+                      // copiable, or passed with a shared_ptr.
+                      callInvoker->invokeAsync(
+                          [promise, result](jsi::Runtime &runtime) {
+                            promise->resolve(jsi_conversion::getJsiValue(
+                                std::move(result), runtime));
+                          });
+                    }
+                  } catch (const std::runtime_error &e) {
+                    // This catch should be merged with the next two
+                    // (std::runtime_error and jsi::JSError inherits from
+                    // std::exception) HOWEVER react native has broken RTTI
+                    // which breaks proper exception type checking. Remove when
+                    // the following change is present in our version:
+                    // https://github.com/facebook/react-native/commit/3132cc88dd46f95898a756456bebeeb6c248f20e
+                    callInvoker->invokeAsync([e = std::move(e), promise]() {
+                      promise->reject(e.what());
+                    });
+                    return;
+                  } catch (const jsi::JSError &e) {
+                    callInvoker->invokeAsync([e = std::move(e), promise]() {
+                      promise->reject(e.what());
+                    });
+                    return;
+                  } catch (const std::exception &e) {
+                    callInvoker->invokeAsync([e = std::move(e), promise]() {
+                      promise->reject(e.what());
+                    });
+                    return;
+                  } catch (...) {
+                    callInvoker->invokeAsync(
+                        [promise]() { promise->reject("Unknown error"); });
+                    return;
+                  }
                 });
-                return;
-              } catch (const jsi::JSError &e) {
-                callInvoker->invokeAsync([e = std::move(e), promise]() {
-                  promise->reject(e.what());
-                });
-                return;
-              } catch (const std::exception &e) {
-                callInvoker->invokeAsync([e = std::move(e), promise]() {
-                  promise->reject(e.what());
-                });
-                return;
-              } catch (...) {
-                callInvoker->invokeAsync(
-                    [promise]() { promise->reject("Unknown error"); });
-                return;
-              }
-            }).detach();
           } catch (...) {
             promise->reject("Couldn't parse JS arguments in a native function");
           }
diff --git a/packages/react-native-executorch/common/rnexecutorch/threads/GlobalThreadPool.h b/packages/react-native-executorch/common/rnexecutorch/threads/GlobalThreadPool.h
new file mode 100644
index 000000000..ec5ddeaa0
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/threads/GlobalThreadPool.h
@@ -0,0 +1,79 @@
+// GlobalThreadPool.h
+#pragma once
+
+#include <executorch/extension/threadpool/cpuinfo_utils.h>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <rnexecutorch/Log.h>
+#include <rnexecutorch/threads/HighPerformanceThreadPool.h>
+
+namespace rnexecutorch::threads {
+
+class GlobalThreadPool {
+public:
+  GlobalThreadPool() = delete;
+  GlobalThreadPool(const GlobalThreadPool &) = delete;
+  GlobalThreadPool &operator=(const GlobalThreadPool &) = delete;
+  GlobalThreadPool(GlobalThreadPool &&) = delete;
+  GlobalThreadPool &operator=(GlobalThreadPool &&) = delete;
+
+  static HighPerformanceThreadPool &get() {
+    if (!instance) {
+      initialize();
+    }
+    return *instance;
+  }
+
+  static void initialize(std::optional<uint32_t> numThreads = std::nullopt,
+                         ThreadConfig config = {}) {
+    std::call_once(initFlag, [&numThreads, config]() {
+      if (!numThreads) {
+        numThreads =
+            ::executorch::extension::cpuinfo::get_num_performant_cores();
+      }
+
+      log(rnexecutorch::LOG_LEVEL::Info, "Initializing global thread pool with",
+          numThreads, "threads");
+      instance = std::make_unique<HighPerformanceThreadPool>(numThreads.value(),
+                                                             config);
+    });
+  }
+
+  // Convenience methods that mirror std::thread interface
+  template <typename Func, typename... Args>
+  static auto async(Func &&func, Args &&...args) {
+    return get().submit(std::forward<Func>(func), std::forward<Args>(args)...);
+  }
+
+  template <typename Func, typename... Args>
+  static auto async_high_priority(Func &&func, Args &&...args) {
+    return get().submitWithPriority(Priority::HIGH, std::forward<Func>(func),
+                                    std::forward<Args>(args)...);
+  }
+
+  // Fire and forget (like std::thread{}.detach())
+  template <typename Func, typename... Args>
+  static void detach(Func &&func, Args &&...args) {
+    get().submitDetached(std::forward<Func>(func), std::forward<Args>(args)...);
+  }
+
+  // Execute and wait (like std::thread{}.join())
+  template <typename Func, typename... Args>
+  static auto execute(Func &&func, Args &&...args) {
+    return get().execute(std::forward<Func>(func), std::forward<Args>(args)...);
+  }
+
+  static void shutdown() {
+    if (instance) {
+      instance->shutdown();
+      instance.reset();
+    }
+  }
+
+private:
+  inline static std::unique_ptr<HighPerformanceThreadPool> instance;
+  inline static std::once_flag initFlag;
+};
+
+} // namespace rnexecutorch::threads
diff --git a/packages/react-native-executorch/common/rnexecutorch/threads/HighPerformanceThreadPool.h b/packages/react-native-executorch/common/rnexecutorch/threads/HighPerformanceThreadPool.h
new file mode 100644
index 000000000..fd856f990
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/threads/HighPerformanceThreadPool.h
@@ -0,0 +1,364 @@
+// HighPerformanceThreadPool.h
+#pragma once
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <fstream>
+#include <functional>
+#include <future>
+#include <memory>
+#include <mutex>
+#include <pthread.h>
+#include <queue>
+#include <ranges>
+#include <sched.h>
+#include <sys/resource.h>
+#include <thread>
+#include <unistd.h>
+#include <vector>
+
+#include <executorch/extension/threadpool/cpuinfo_utils.h>
+#include <rnexecutorch/Log.h>
+
+#ifdef __APPLE__
+#include <sys/syscall.h>
+#endif
+
+#ifdef __ANDROID__
+#include <sys/types.h>
+#endif
+
+namespace rnexecutorch::threads {
+
+enum class Priority { LOW, NORMAL, HIGH, REALTIME };
+
+struct ThreadConfig {
+  bool pinToPerformanceCores{true};
+  std::string namePrefix{"RN_ET_Worker"};
+};
+
+class HighPerformanceThreadPool {
+public:
+  explicit HighPerformanceThreadPool(size_t numThreads = 1,
+                                     ThreadConfig cfg = ThreadConfig())
+      : config(std::move(cfg)) {
+
+#ifdef __ANDROID__
+    detectCPUTopology();
+    numThreads = std::min(numThreads, performanceCores.size());
+#endif
+
+    for (size_t i = 0; i < numThreads; i++) {
+      workers.emplace_back(&HighPerformanceThreadPool::workerThread, this, i);
+    }
+
+    log(LOG_LEVEL::Debug, "Thread pool initialized with", numThreads,
+        "workers.");
+  }
+
+  ~HighPerformanceThreadPool() { shutdown(); }
+
+  // Submit a task and get a future for the result
+  template <typename Func, typename... Args>
+  auto submit(Func &&func, Args &&...args)
+      -> std::future<decltype(func(args...))> {
+    return submitWithPriority(Priority::NORMAL, std::forward<Func>(func),
+                              std::forward<Args>(args)...);
+  }
+
+  // Submit a task with specific priority
+  template <typename Func, typename... Args>
+  auto submitWithPriority(Priority priority, Func &&func, Args &&...args)
+      -> std::future<decltype(func(args...))> {
+
+    using ReturnType = decltype(func(args...));
+
+    // Create a packaged task
+    auto boundFunc =
+        std::bind(std::forward<Func>(func), std::forward<Args>(args)...);
+    auto task = std::make_unique<Task<decltype(boundFunc), ReturnType>>(
+        std::move(boundFunc));
+    auto future = task->getFuture();
+
+    // Add to queue
+    {
+      std::scoped_lock lock(queueMutex);
+
+      if (!running) {
+        throw std::runtime_error("Thread pool is shutting down");
+      }
+
+      WorkItem item(std::move(task), priority,
+                    std::chrono::steady_clock::now());
+
+      taskQueue.push(std::move(item));
+    }
+
+    condition.notify_one();
+    return future;
+  }
+
+  // Execute a task and wait for result
+  template <typename Func, typename... Args>
+  auto execute(Func &&func, Args &&...args) -> decltype(func(args...)) {
+    auto future = submit(std::forward<Func>(func), std::forward<Args>(args)...);
+    return future.get();
+  }
+
+  // Fire and forget task
+  template <typename Func, typename... Args>
+  void submitDetached(Func &&func, Args &&...args) {
+    submit(std::forward<Func>(func), std::forward<Args>(args)...);
+    // Future is destroyed, task still runs
+  }
+
+  void shutdown() {
+    if (!running.exchange(false)) {
+      return;
+    }
+
+    condition.notify_all();
+
+    for (auto &worker : workers) {
+      if (worker.joinable()) {
+        worker.join();
+      }
+    }
+  }
+
+private:
+  // Task wrapper that can hold any callable
+  class ITask {
+  public:
+    virtual ~ITask() = default;
+    virtual void execute() = 0;
+  };
+
+  template <typename Func, typename Result> class Task : public ITask {
+  public:
+    Task(Func &&f) : func(std::forward<Func>(f)) {}
+
+    void execute() override {
+      try {
+        if constexpr (std::is_void_v<Result>) {
+          func();
+          promise.set_value();
+        } else {
+          promise.set_value(func());
+        }
+      } catch (...) {
+        promise.set_exception(std::current_exception());
+      }
+    }
+
+    std::future<Result> getFuture() { return promise.get_future(); }
+
+  private:
+    Func func;
+    std::promise<Result> promise;
+  };
+
+  class WorkItem {
+  public:
+    WorkItem() = default;
+    WorkItem(std::unique_ptr<ITask> task, Priority priority,
+             std::chrono::steady_clock::time_point enqueueTime)
+        : task(std::move(task)), priority(priority), enqueueTime(enqueueTime) {}
+
+    std::unique_ptr<ITask> task;
+
+    bool operator<(const WorkItem &other) const {
+      return priority != other.priority ? priority < other.priority
+                                        : enqueueTime > other.enqueueTime;
+    }
+
+  private:
+    Priority priority;
+    std::chrono::steady_clock::time_point enqueueTime;
+  };
+
+  // Thread pool state
+  std::vector<std::thread> workers;
+  std::priority_queue<WorkItem> taskQueue;
+  std::mutex queueMutex;
+  std::condition_variable condition;
+  std::atomic<bool> running{true};
+  std::atomic<size_t> activeWorkers{0};
+  std::atomic<size_t> totalTasksProcessed{0};
+
+#ifdef __ANDROID__
+  // Performance cores
+  std::vector<int32_t> performanceCores;
+  std::vector<int32_t> efficiencyCores;
+#endif
+
+  // Configuration
+  ThreadConfig config;
+
+  void detectCPUTopology() {
+#ifdef __ANDROID__
+    struct CoreInfo {
+      int32_t id;
+      int64_t maxFreq;
+    };
+
+    std::vector<CoreInfo> cores;
+    const auto numOfCores = std::thread::hardware_concurrency();
+
+    for (int32_t i = 0; std::cmp_less(i, numOfCores); ++i) {
+      std::string path = "/sys/devices/system/cpu/cpu" + std::to_string(i) +
+                         "/cpufreq/cpuinfo_max_freq";
+      std::ifstream file(path);
+      if (!file.good()) {
+        break;
+      }
+
+      CoreInfo info;
+      info.id = i;
+      file >> info.maxFreq;
+      cores.push_back(info);
+    }
+
+    if (cores.empty()) {
+      log(LOG_LEVEL::Debug, "Could not detect CPU topology");
+      return;
+    }
+
+    // Sort by frequency
+    std::ranges::sort(cores, [](const CoreInfo &a, const CoreInfo &b) {
+      return a.maxFreq > b.maxFreq;
+    });
+
+    // Classify cores
+    const auto numOfPerfCores =
+        ::executorch::extension::cpuinfo::get_num_performant_cores();
+
+    constexpr float kKiloToGigaRatio = 1e6;
+    for (int32_t i = 0; i < cores.size(); ++i) {
+      if (i < numOfPerfCores) {
+        performanceCores.push_back(cores[i].id);
+        log(LOG_LEVEL::Debug, "Performance core:", cores[i].id, "(",
+            cores[i].maxFreq / kKiloToGigaRatio, "GHz)");
+      } else {
+        efficiencyCores.push_back(cores[i].id);
+        log(LOG_LEVEL::Debug, "Efficiency core:", cores[i].id, "(",
+            cores[i].maxFreq / kKiloToGigaRatio, "GHz)");
+      }
+    }
+#endif
+  }
+
+#ifdef __ANDROID__
+  inline uint64_t getCurrentThreadId() { return gettid(); }
+#endif
+
+  inline void setCurrentThreadName(const std::string &name) {
+#ifdef __ANDROID__
+    pthread_setname_np(pthread_self(), name.c_str());
+#elif defined(__APPLE__)
+    pthread_setname_np(name.c_str());
+#endif
+  }
+
+  void configureThread(uint32_t workerIndex) {
+    std::string threadName = config.namePrefix + std::to_string(workerIndex);
+    setCurrentThreadName(threadName.c_str());
+
+#ifdef __ANDROID__
+    if (config.pinToPerformanceCores && !performanceCores.empty()) {
+      setCPUAffinity();
+    }
+#endif
+
+    setThreadPriority();
+
+    log(LOG_LEVEL::Debug, "Worker", workerIndex,
+        "configured:", threadName.c_str());
+  }
+
+  void setCPUAffinity() {
+    // AFAIK it is not possible on iOS
+#ifdef __ANDROID__
+    if (performanceCores.empty()) {
+      log(LOG_LEVEL::Error, "No cores specified for affinity setting");
+      return;
+    }
+
+    cpu_set_t cpuset;
+    CPU_ZERO(&cpuset);
+
+    for (int32_t core : performanceCores) {
+      CPU_SET(core, &cpuset);
+    }
+
+    pid_t tid = getCurrentThreadId();
+    log(LOG_LEVEL::Debug, "Thread id", tid);
+    if (sched_setaffinity(tid, sizeof(cpuset), &cpuset) == 0) {
+      log(LOG_LEVEL::Debug, "Thread pinned to cores:", performanceCores);
+    } else {
+      log(LOG_LEVEL::Debug, "Failed to set CPU affinity (error:", errno,
+          "). Continuing without affinity.");
+    }
+#endif
+  }
+
+  void setThreadPriority() {
+    // pthread_setschedparam doesn't work on android because permissions reasons
+    // and in general does not provide visible improvements on iOS
+
+    // Set nice value as fallback or additional priority boost
+    constexpr int nice_value = 0;
+    if (setpriority(PRIO_PROCESS, 0, nice_value) != 0) {
+      log(LOG_LEVEL::Debug, "Failed to set nice value");
+    } else {
+      log(LOG_LEVEL::Debug, "Set nice value", nice_value);
+    }
+  }
+
+  void processTask(const WorkItem &item) {
+    activeWorkers++;
+
+    try {
+      item.task->execute();
+    } catch (const std::exception &e) {
+      log(LOG_LEVEL::Error, "Task failed:", e.what());
+      activeWorkers--;
+      throw;
+    }
+
+    activeWorkers--;
+    totalTasksProcessed++;
+  }
+
+  void workerThread(int workerIndex) {
+    configureThread(workerIndex);
+
+    while (running) {
+      WorkItem item;
+
+      {
+        std::unique_lock<std::mutex> lock(queueMutex);
+        condition.wait(lock, [this] { return !taskQueue.empty() || !running; });
+
+        if (!running && taskQueue.empty()) {
+          break;
+        }
+
+        if (!taskQueue.empty()) {
+          item = std::move(const_cast<WorkItem &>(taskQueue.top()));
+          taskQueue.pop();
+        } else {
+          continue;
+        }
+      }
+
+      processTask(item);
+    }
+
+    log(LOG_LEVEL::Debug, "Worker", workerIndex, "shutting down");
+  }
+};
+
+} // namespace rnexecutorch::threads
diff --git a/packages/react-native-executorch/common/rnexecutorch/threads/utils/ThreadUtils.h b/packages/react-native-executorch/common/rnexecutorch/threads/utils/ThreadUtils.h
new file mode 100644
index 000000000..664480d38
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/threads/utils/ThreadUtils.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <executorch/extension/threadpool/cpuinfo_utils.h>
+#include <executorch/extension/threadpool/threadpool.h>
+#include <rnexecutorch/Log.h>
+
+namespace rnexecutorch::threads::utils {
+
+void unsafeSetupThreadPool(uint32_t num_of_cores = 0) {
+  auto num_of_perf_cores =
+      ::executorch::extension::cpuinfo::get_num_performant_cores();
+  log(LOG_LEVEL::Info, "Detected ", num_of_perf_cores, " performant cores");
+  // setting num_of_cores to floor(num_of_perf_cores / 2) + 1) because
+  // depending on cpu arch as when possible we want to leave at least 2
+  // performant cores for other tasks (setting more actually results in drop
+  // of performance). For older devices (i.e. samsung s22) resolves to 3
+  // cores, and for newer ones (like OnePlus 12) resolves to 4, which when
+  // benchmarked gives highest throughput. For iPhones they usually have 2
+  // performance cores
+  auto _num_of_cores = num_of_cores
+                           ? num_of_cores
+                           : static_cast<uint32_t>(num_of_perf_cores / 2) + 1;
+  const auto threadpool = ::executorch::extension::threadpool::get_threadpool();
+  threadpool->_unsafe_reset_threadpool(_num_of_cores);
+  log(LOG_LEVEL::Info, "Configuring xnnpack for",
+      threadpool->get_thread_count(), "threads");
+}
+
+} // namespace rnexecutorch::threads::utils
diff --git a/packages/react-native-executorch/ios/libs/cpuinfo/libcpuinfo.a b/packages/react-native-executorch/ios/libs/cpuinfo/libcpuinfo.a
new file mode 100644
index 000000000..b5d389569
Binary files /dev/null and b/packages/react-native-executorch/ios/libs/cpuinfo/libcpuinfo.a differ
diff --git a/packages/react-native-executorch/ios/libs/pthreadpool/physical-arm64-release/libpthreadpool.a b/packages/react-native-executorch/ios/libs/pthreadpool/physical-arm64-release/libpthreadpool.a
new file mode 100644
index 000000000..652afff7b
Binary files /dev/null and b/packages/react-native-executorch/ios/libs/pthreadpool/physical-arm64-release/libpthreadpool.a differ
diff --git a/packages/react-native-executorch/ios/libs/pthreadpool/simulator-arm64-debug/libpthreadpool.a b/packages/react-native-executorch/ios/libs/pthreadpool/simulator-arm64-debug/libpthreadpool.a
new file mode 100644
index 000000000..e3c3053a1
Binary files /dev/null and b/packages/react-native-executorch/ios/libs/pthreadpool/simulator-arm64-debug/libpthreadpool.a differ
diff --git a/packages/react-native-executorch/react-native-executorch.podspec b/packages/react-native-executorch/react-native-executorch.podspec
index 381d3a99f..2c63c0923 100644
--- a/packages/react-native-executorch/react-native-executorch.podspec
+++ b/packages/react-native-executorch/react-native-executorch.podspec
@@ -24,6 +24,9 @@ Pod::Spec.new do |s|
     'MetalPerformanceShadersGraph'
   ]
 
+  pthreadpool_binaries_path = File.expand_path('$(PODS_TARGET_SRCROOT)/ios/libs/pthreadpool', __dir__)
+  cpuinfo_binaries_path = File.expand_path('$(PODS_TARGET_SRCROOT)/ios/libs/cpuinfo', __dir__)
+
   s.user_target_xcconfig = {
     "HEADER_SEARCH_PATHS" => "$(PODS_TARGET_SRCROOT)/third-party/include",
 
@@ -39,7 +42,9 @@ Pod::Spec.new do |s|
       "-force_load \"#{et_binaries_path}\"/libthreadpool_ios.a",
       "\"#{tokenizers_binaries_path}/physical-arm64-release/libtokenizers_cpp.a\"",
       "\"#{tokenizers_binaries_path}/physical-arm64-release/libsentencepiece.a\"",
-      "\"#{tokenizers_binaries_path}/physical-arm64-release/libtokenizers_c.a\""
+      "\"#{tokenizers_binaries_path}/physical-arm64-release/libtokenizers_c.a\"",
+      "\"#{pthreadpool_binaries_path}/physical-arm64-release/libpthreadpool.a\"",
+      "\"#{cpuinfo_binaries_path}/libcpuinfo.a\""
     ].join(' '),
 
     "OTHER_LDFLAGS[sdk=iphonesimulator*]" => [
@@ -54,7 +59,9 @@ Pod::Spec.new do |s|
       "-force_load \"#{et_binaries_path}\"/libthreadpool_simulator.a",
       "\"#{tokenizers_binaries_path}/simulator-arm64-debug/libtokenizers_cpp.a\"",
       "\"#{tokenizers_binaries_path}/simulator-arm64-debug/libsentencepiece.a\"",
-      "\"#{tokenizers_binaries_path}/simulator-arm64-debug/libtokenizers_c.a\""
+      "\"#{tokenizers_binaries_path}/simulator-arm64-debug/libtokenizers_c.a\"",
+      "\"#{pthreadpool_binaries_path}/simulator-arm64-debug/libpthreadpool.a\"",
+      "\"#{cpuinfo_binaries_path}/libcpuinfo.a\""
     ].join(' '),
 
     'EXCLUDED_ARCHS[sdk=iphonesimulator*]' => 'x86_64',
diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h b/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h
index d559738b7..8f54889f7 100644
--- a/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h
+++ b/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h
@@ -8,7 +8,7 @@
 
 #pragma once
 
-#include <cpuinfo.h>
+#include <cpuinfo/cpuinfo.h>
 
 namespace executorch::extension::cpuinfo {
 
diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/threadpool.h b/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/threadpool.h
index 95678d2c6..edd3be02a 100644
--- a/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/threadpool.h
+++ b/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/threadpool.h
@@ -12,7 +12,7 @@
 #include <memory>
 #include <mutex>
 
-#include <pthreadpool.h>
+#include <pthreadpool/pthreadpool.h>
 
 namespace executorch::extension::threadpool {