From 4aca3b1183c8360465b994dd380e4b9eb04ad122 Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Wed, 9 Apr 2025 14:18:55 -0700
Subject: [PATCH 01/16] Bump external tokenizer submodule version

---
 extension/llm/tokenizers | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extension/llm/tokenizers b/extension/llm/tokenizers
index d70f5a76055..4167468eff0 160000
--- a/extension/llm/tokenizers
+++ b/extension/llm/tokenizers
@@ -1 +1 @@
-Subproject commit d70f5a760552d8d3bb288cdd93eebde477bb6eb0
+Subproject commit 4167468eff098f93a431bfbc9ae23e76c8d26ed3

From 13408fb89d30f2d4b6b5953a6c87314b13202b05 Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Mon, 10 Mar 2025 16:32:58 -0700
Subject: [PATCH 02/16] Add stub impl of json tokenizer to llama runner

---
 examples/models/llama/runner/CMakeLists.txt |  4 ++
 examples/models/llama/runner/runner.cpp     | 42 +++++++++++++--------
 examples/models/llama/runner/targets.bzl    |  1 +
 extension/llm/tokenizer/hf_tokenizer.cpp    | 41 ++++++++++++++++++++
 extension/llm/tokenizer/hf_tokenizer.h      | 34 +++++++++++++++++
 extension/llm/tokenizer/targets.bzl         | 17 +++++++++
 6 files changed, 123 insertions(+), 16 deletions(-)
 create mode 100644 extension/llm/tokenizer/hf_tokenizer.cpp
 create mode 100644 extension/llm/tokenizer/hf_tokenizer.h

diff --git a/examples/models/llama/runner/CMakeLists.txt b/examples/models/llama/runner/CMakeLists.txt
index 04fe23e4d82..9c39b35199f 100644
--- a/examples/models/llama/runner/CMakeLists.txt
+++ b/examples/models/llama/runner/CMakeLists.txt
@@ -47,6 +47,10 @@ list(
 )
 list(APPEND _llama_runner__srcs
      ${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp
+   )
+list(
+  APPEND _llama_runner__srcs
+  ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/hf_tokenizer.cpp
 )
 
 if(CMAKE_TOOLCHAIN_IOS
diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp
index 0ecc611ef6c..0014a476864 100644
--- a/examples/models/llama/runner/runner.cpp
+++ b/examples/models/llama/runner/runner.cpp
@@ -17,6 +17,7 @@
 
 #include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
 #include <pytorch/tokenizers/llama2c_tokenizer.h>
+#include <pytorch/tokenizers/hf_tokenizer.h>
 
 namespace example {
 
@@ -77,24 +78,33 @@ Error Runner::load() {
     return Error::Ok;
   }
   ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method("forward"));
-  // load tokenizer. Assuming tiktoken is the default tokenizer
+  // Load tokenizer.
   tokenizer_ = nullptr;
-  tokenizer_ = get_tiktoken_for_llama();
-  ::tokenizers::Error err = tokenizer_->load(tokenizer_path_);
-  // Rely on tiktoken to throw error if the artifact is incompatible. Then we
-  // fallback to BPE tokenizer.
-  if (err != ::tokenizers::Error::Ok) {
+  // Check if tokenizer_path_ ends with ".json".
+  if (tokenizer_path_.size() >= 5 &&
+      tokenizer_path_.compare(tokenizer_path_.size() - 5, 5, ".json") == 0) {
+    tokenizer_ = std::make_unique<tokenizers::HFTokenizer>();
+    tokenizer_->load(tokenizer_path_);
     ET_LOG(
-        Info,
-        "Failed to load %s as a Tiktoken artifact, trying BPE tokenizer",
-        tokenizer_path_.c_str());
-    tokenizer_.reset();
-    tokenizer_ = std::make_unique<::tokenizers::Llama2cTokenizer>();
-    err = tokenizer_->load(tokenizer_path_);
-    ET_CHECK_TK_OK_OR_RETURN_ERROR(
-        err,
-        "Failed to load %s as a llama2.c tokenizer artifact",
-        tokenizer_path_.c_str());
+        Info, "Loaded tokenizer %s as HF tokenizer", tokenizer_path_.c_str());
+  } else {
+    ::tokenizers::Error err = tokenizer_->load(tokenizer_path_);
+    tokenizer_ = get_tiktoken_for_llama();
+    // Rely on tiktoken to throw error if the artifact is incompatible. Then we
+    // fallback to BPE tokenizer.
+    if (err != ::tokenizers::Error::Ok) {
+      ET_LOG(
+	  Info,
+	  "Failed to load %s as a Tiktoken artifact, trying BPE tokenizer",
+	  tokenizer_path_.c_str());
+      tokenizer_.reset();
+      tokenizer_ = std::make_unique<::tokenizers::Llama2cTokenizer>();
+      err = tokenizer_->load(tokenizer_path_);
+      ET_CHECK_TK_OK_OR_RETURN_ERROR(
+	  err,
+	  "Failed to load %s as a llama2.c tokenizer artifact",
+	  tokenizer_path_.c_str());
+    }
   }
 
   ET_LOG(Info, "Reading metadata from model");
diff --git a/examples/models/llama/runner/targets.bzl b/examples/models/llama/runner/targets.bzl
index 60fc1f2b74d..5797c25e6d3 100644
--- a/examples/models/llama/runner/targets.bzl
+++ b/examples/models/llama/runner/targets.bzl
@@ -49,6 +49,7 @@ def define_common_targets():
                 "//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix,
                 "//executorch/examples/models/llama/tokenizer:tiktoken",
                 "//pytorch/tokenizers:llama2c_tokenizer",
+		"//pytorch/tokenizers:hf_tokenizer",
             ] + (_get_operator_lib(aten)) + ([
                 # Vulkan API currently cannot build on some platforms (e.g. Apple, FBCODE)
                 # Therefore enable it explicitly for now to avoid failing tests
diff --git a/extension/llm/tokenizer/hf_tokenizer.cpp b/extension/llm/tokenizer/hf_tokenizer.cpp
new file mode 100644
index 00000000000..c7a32127335
--- /dev/null
+++ b/extension/llm/tokenizer/hf_tokenizer.cpp
@@ -0,0 +1,41 @@
+#include <executorch/extension/llm/tokenizer/hf_tokenizer.h>
+
+#include <executorch/runtime/core/error.h>
+#include <executorch/runtime/core/result.h>
+#include <string>
+#include <vector>
+
+using ::executorch::runtime::Error;
+using ::executorch::runtime::Result;
+
+namespace executorch {
+namespace extension {
+namespace llm {
+
+HfTokenizer::~HfTokenizer() {}
+
+Error HfTokenizer::load(const std::string& tokenizer_path) {
+  // Stub implementation for loading the tokenizer.
+  // TODO: Implement actual loading logic.
+  return ::executorch::runtime::Error::Ok;
+}
+
+Result<std::vector<uint64_t>>
+HfTokenizer::encode(const std::string& input, int8_t bos, int8_t eos) const {
+  // Stub implementation for encoding.
+  // TODO: Implement actual encoding logic.
+  std::vector<uint64_t> tokens;
+  return ::executorch::runtime::Result<std::vector<uint64_t>>(tokens);
+}
+
+Result<std::string> HfTokenizer::decode(uint64_t prev_token, uint64_t token)
+    const {
+  // Stub implementation for decoding.
+  // TODO: Implement actual decoding logic.
+  std::string decoded_string;
+  return ::executorch::runtime::Result<std::string>(decoded_string);
+}
+
+} // namespace llm
+} // namespace extension
+} // namespace executorch
diff --git a/extension/llm/tokenizer/hf_tokenizer.h b/extension/llm/tokenizer/hf_tokenizer.h
new file mode 100644
index 00000000000..eee2d2426b5
--- /dev/null
+++ b/extension/llm/tokenizer/hf_tokenizer.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/extension/llm/tokenizer/tokenizer.h>
+
+namespace executorch {
+namespace extension {
+namespace llm {
+
+class ET_EXPERIMENTAL HfTokenizer : public Tokenizer {
+ public:
+  explicit HfTokenizer(){};
+  ~HfTokenizer() override;
+
+  ::executorch::runtime::Error load(const std::string& tokenizer_path) override;
+
+  ::executorch::runtime::Result<std::vector<uint64_t>>
+  encode(const std::string& input, int8_t bos, int8_t eos) const override;
+
+  ::executorch::runtime::Result<std::string> decode(
+      uint64_t prev_token,
+      uint64_t token) const override;
+};
+
+} // namespace llm
+} // namespace extension
+} // namespace executorch
diff --git a/extension/llm/tokenizer/targets.bzl b/extension/llm/tokenizer/targets.bzl
index 7b545054390..1bdf305094b 100644
--- a/extension/llm/tokenizer/targets.bzl
+++ b/extension/llm/tokenizer/targets.bzl
@@ -96,3 +96,20 @@ def define_common_targets():
             "re2",
         ],
     )
+
+    runtime.cxx_library(
+        name = "hf_tokenizer",
+        srcs = [
+            "hf_tokenizer.cpp",
+        ],
+        exported_headers = [
+            "hf_tokenizer.h",
+        ],
+        exported_deps = [
+            ":tokenizer_header",
+            "//executorch/runtime/core:core",
+        ],
+        visibility = [
+            "@EXECUTORCH_CLIENTS",
+        ],
+    )

From d790908f565a8512a092745365e66ebdab20949e Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Mon, 24 Mar 2025 04:28:27 -0700
Subject: [PATCH 03/16] Cmake

---
 examples/models/llama/runner/CMakeLists.txt | 15 ++++++++++-----
 examples/models/llama/runner/runner.cpp     |  2 ++
 examples/models/llama/runner/targets.bzl    |  2 +-
 extension/llm/runner/CMakeLists.txt         |  7 +++++++
 4 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/examples/models/llama/runner/CMakeLists.txt b/examples/models/llama/runner/CMakeLists.txt
index 9c39b35199f..4ee2e6214c2 100644
--- a/examples/models/llama/runner/CMakeLists.txt
+++ b/examples/models/llama/runner/CMakeLists.txt
@@ -47,10 +47,6 @@ list(
 )
 list(APPEND _llama_runner__srcs
      ${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp
-   )
-list(
-  APPEND _llama_runner__srcs
-  ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/hf_tokenizer.cpp
 )
 
 if(CMAKE_TOOLCHAIN_IOS
@@ -77,10 +73,19 @@ add_subdirectory(
   ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/re2
   ${CMAKE_CURRENT_BINARY_DIR}/re2
 )
+add_subdirectory(
+  ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/json
+  ${CMAKE_CURRENT_BINARY_DIR}/json
+)
+target_include_directories(llama_runner
+  PRIVATE ${CMAKE_INSTALL_PREFIX}/include
+  ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/llama.cpp-unicode/include
+  ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/llama.cpp-unicode/src
+)
 set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})
 
 set(llama_runner_deps executorch extension_data_loader extension_module
-                      extension_tensor re2::re2
+                      extension_tensor re2::re2 nlohmann_json::nlohmann_json
 )
 
 target_link_libraries(llama_runner PUBLIC ${llama_runner_deps})
diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp
index 0014a476864..d81dd40252a 100644
--- a/examples/models/llama/runner/runner.cpp
+++ b/examples/models/llama/runner/runner.cpp
@@ -82,8 +82,10 @@ Error Runner::load() {
   tokenizer_ = nullptr;
   // Check if tokenizer_path_ ends with ".json".
   if (tokenizer_path_.size() >= 5 &&
+      
       tokenizer_path_.compare(tokenizer_path_.size() - 5, 5, ".json") == 0) {
     tokenizer_ = std::make_unique<tokenizers::HFTokenizer>();
+    ET_LOG(Info, "Loading json tokenizer");
     tokenizer_->load(tokenizer_path_);
     ET_LOG(
         Info, "Loaded tokenizer %s as HF tokenizer", tokenizer_path_.c_str());
diff --git a/examples/models/llama/runner/targets.bzl b/examples/models/llama/runner/targets.bzl
index 5797c25e6d3..158202cf55a 100644
--- a/examples/models/llama/runner/targets.bzl
+++ b/examples/models/llama/runner/targets.bzl
@@ -49,7 +49,7 @@ def define_common_targets():
                 "//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix,
                 "//executorch/examples/models/llama/tokenizer:tiktoken",
                 "//pytorch/tokenizers:llama2c_tokenizer",
-		"//pytorch/tokenizers:hf_tokenizer",
+                "//pytorch/tokenizers:hf_tokenizer",
             ] + (_get_operator_lib(aten)) + ([
                 # Vulkan API currently cannot build on some platforms (e.g. Apple, FBCODE)
                 # Therefore enable it explicitly for now to avoid failing tests
diff --git a/extension/llm/runner/CMakeLists.txt b/extension/llm/runner/CMakeLists.txt
index 993314ccd63..55d599ff998 100644
--- a/extension/llm/runner/CMakeLists.txt
+++ b/extension/llm/runner/CMakeLists.txt
@@ -49,6 +49,13 @@ set(runner_deps executorch extension_data_loader extension_module
 
 target_link_libraries(extension_llm_runner PUBLIC ${runner_deps})
 
+target_include_directories(
+  extension_llm_runner
+  PUBLIC
+    ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/llama.cpp-unicode/include
+    ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/llama.cpp-unicode/src
+)
+
 target_include_directories(
   extension_llm_runner INTERFACE ${_common_include_directories}
                                  ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include

From 97110acff06785e98e707a279481755a165d0299 Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Mon, 24 Mar 2025 05:03:30 -0700
Subject: [PATCH 04/16] Remove stub

---
 extension/llm/tokenizer/hf_tokenizer.cpp | 41 ------------------------
 extension/llm/tokenizer/hf_tokenizer.h   | 34 --------------------
 extension/llm/tokenizer/targets.bzl      | 17 ----------
 3 files changed, 92 deletions(-)
 delete mode 100644 extension/llm/tokenizer/hf_tokenizer.cpp
 delete mode 100644 extension/llm/tokenizer/hf_tokenizer.h

diff --git a/extension/llm/tokenizer/hf_tokenizer.cpp b/extension/llm/tokenizer/hf_tokenizer.cpp
deleted file mode 100644
index c7a32127335..00000000000
--- a/extension/llm/tokenizer/hf_tokenizer.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-#include <executorch/extension/llm/tokenizer/hf_tokenizer.h>
-
-#include <executorch/runtime/core/error.h>
-#include <executorch/runtime/core/result.h>
-#include <string>
-#include <vector>
-
-using ::executorch::runtime::Error;
-using ::executorch::runtime::Result;
-
-namespace executorch {
-namespace extension {
-namespace llm {
-
-HfTokenizer::~HfTokenizer() {}
-
-Error HfTokenizer::load(const std::string& tokenizer_path) {
-  // Stub implementation for loading the tokenizer.
-  // TODO: Implement actual loading logic.
-  return ::executorch::runtime::Error::Ok;
-}
-
-Result<std::vector<uint64_t>>
-HfTokenizer::encode(const std::string& input, int8_t bos, int8_t eos) const {
-  // Stub implementation for encoding.
-  // TODO: Implement actual encoding logic.
-  std::vector<uint64_t> tokens;
-  return ::executorch::runtime::Result<std::vector<uint64_t>>(tokens);
-}
-
-Result<std::string> HfTokenizer::decode(uint64_t prev_token, uint64_t token)
-    const {
-  // Stub implementation for decoding.
-  // TODO: Implement actual decoding logic.
-  std::string decoded_string;
-  return ::executorch::runtime::Result<std::string>(decoded_string);
-}
-
-} // namespace llm
-} // namespace extension
-} // namespace executorch
diff --git a/extension/llm/tokenizer/hf_tokenizer.h b/extension/llm/tokenizer/hf_tokenizer.h
deleted file mode 100644
index eee2d2426b5..00000000000
--- a/extension/llm/tokenizer/hf_tokenizer.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#pragma once
-
-#include <executorch/extension/llm/tokenizer/tokenizer.h>
-
-namespace executorch {
-namespace extension {
-namespace llm {
-
-class ET_EXPERIMENTAL HfTokenizer : public Tokenizer {
- public:
-  explicit HfTokenizer(){};
-  ~HfTokenizer() override;
-
-  ::executorch::runtime::Error load(const std::string& tokenizer_path) override;
-
-  ::executorch::runtime::Result<std::vector<uint64_t>>
-  encode(const std::string& input, int8_t bos, int8_t eos) const override;
-
-  ::executorch::runtime::Result<std::string> decode(
-      uint64_t prev_token,
-      uint64_t token) const override;
-};
-
-} // namespace llm
-} // namespace extension
-} // namespace executorch
diff --git a/extension/llm/tokenizer/targets.bzl b/extension/llm/tokenizer/targets.bzl
index 1bdf305094b..7b545054390 100644
--- a/extension/llm/tokenizer/targets.bzl
+++ b/extension/llm/tokenizer/targets.bzl
@@ -96,20 +96,3 @@ def define_common_targets():
             "re2",
         ],
     )
-
-    runtime.cxx_library(
-        name = "hf_tokenizer",
-        srcs = [
-            "hf_tokenizer.cpp",
-        ],
-        exported_headers = [
-            "hf_tokenizer.h",
-        ],
-        exported_deps = [
-            ":tokenizer_header",
-            "//executorch/runtime/core:core",
-        ],
-        visibility = [
-            "@EXECUTORCH_CLIENTS",
-        ],
-    )

From 173308ef09318bb0905d682c2230323146102eca Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Wed, 26 Mar 2025 05:38:51 -0700
Subject: [PATCH 05/16] Scott pr review

---
 examples/models/llama/runner/runner.cpp | 71 +++++++++++++++----------
 1 file changed, 43 insertions(+), 28 deletions(-)

diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp
index d81dd40252a..9ad82864efc 100644
--- a/examples/models/llama/runner/runner.cpp
+++ b/examples/models/llama/runner/runner.cpp
@@ -16,8 +16,8 @@
 #include <executorch/extension/llm/runner/util.h>
 
 #include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
-#include <pytorch/tokenizers/llama2c_tokenizer.h>
 #include <pytorch/tokenizers/hf_tokenizer.h>
+#include <pytorch/tokenizers/llama2c_tokenizer.h>
 
 namespace example {
 
@@ -36,6 +36,41 @@ static constexpr auto kMaxContextLen = "get_max_context_len";
 static constexpr auto kVocabSize = "get_vocab_size";
 static constexpr auto kUseKVCache = "use_kv_cache";
 static constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache";
+
+std::unique_ptr<::tokenizers::Tokenizer> load_tokenizer(
+    const std::string& tokenizer_path) {
+  std::unique_ptr<::tokenizers::Tokenizer> tokenizer = nullptr;
+  ::tokenizers::Error err;
+
+  // First try to load as a json tokenizer.
+  {
+    auto tokenizer = std::make_unique<tokenizers::HFTokenizer>();
+    if (tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
+      ET_LOG(Info, "Loaded json tokenizer");
+      return tokenizer;
+    }
+  }
+
+  // Try to load as tiktoken tokenizer.
+  {
+    auto tokenizer = get_tiktoken_for_llama();
+    if (tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
+      ET_LOG(Info, "Loaded TikToken tokenizer");
+      return tokenizer;
+    }
+  }
+
+  // Try to load as BPE tokenizer.
+  {
+    auto tokenizer = std::make_unique<::tokenizers::Llama2cTokenizer>();
+    if (tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
+      ET_LOG(Info, "Loaded BPE tokenizer");
+      return tokenizer;
+    }
+  }
+
+  return nullptr;
+}
 } // namespace
 
 Runner::Runner(
@@ -78,35 +113,15 @@ Error Runner::load() {
     return Error::Ok;
   }
   ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method("forward"));
+
   // Load tokenizer.
-  tokenizer_ = nullptr;
-  // Check if tokenizer_path_ ends with ".json".
-  if (tokenizer_path_.size() >= 5 &&
-      
-      tokenizer_path_.compare(tokenizer_path_.size() - 5, 5, ".json") == 0) {
-    tokenizer_ = std::make_unique<tokenizers::HFTokenizer>();
-    ET_LOG(Info, "Loading json tokenizer");
-    tokenizer_->load(tokenizer_path_);
+  tokenizer_ = load_tokenizer(tokenizer_path_);
+  if (tokenizer_ == nullptr) {
     ET_LOG(
-        Info, "Loaded tokenizer %s as HF tokenizer", tokenizer_path_.c_str());
-  } else {
-    ::tokenizers::Error err = tokenizer_->load(tokenizer_path_);
-    tokenizer_ = get_tiktoken_for_llama();
-    // Rely on tiktoken to throw error if the artifact is incompatible. Then we
-    // fallback to BPE tokenizer.
-    if (err != ::tokenizers::Error::Ok) {
-      ET_LOG(
-	  Info,
-	  "Failed to load %s as a Tiktoken artifact, trying BPE tokenizer",
-	  tokenizer_path_.c_str());
-      tokenizer_.reset();
-      tokenizer_ = std::make_unique<::tokenizers::Llama2cTokenizer>();
-      err = tokenizer_->load(tokenizer_path_);
-      ET_CHECK_TK_OK_OR_RETURN_ERROR(
-	  err,
-	  "Failed to load %s as a llama2.c tokenizer artifact",
-	  tokenizer_path_.c_str());
-    }
+        Error,
+        "Failed to load %s as a llama2.c tokenizer artifact",
+        tokenizer_path_.c_str());
+    return ::executorch::runtime::Error::InvalidArgument;
   }
 
   ET_LOG(Info, "Reading metadata from model");

From dc8a31aa88e207f371c9adb5a4cc92974e80e4eb Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Tue, 15 Apr 2025 14:14:24 -0700
Subject: [PATCH 06/16] Target link tokenizers library

---
 examples/models/llama/runner/CMakeLists.txt | 45 ++++++---------------
 extension/llm/runner/CMakeLists.txt         |  2 +-
 2 files changed, 13 insertions(+), 34 deletions(-)

diff --git a/examples/models/llama/runner/CMakeLists.txt b/examples/models/llama/runner/CMakeLists.txt
index 4ee2e6214c2..0807e6fa422 100644
--- a/examples/models/llama/runner/CMakeLists.txt
+++ b/examples/models/llama/runner/CMakeLists.txt
@@ -41,14 +41,6 @@ target_include_directories(
   extension_module INTERFACE ${_common_include_directories}
 )
 
-list(
-  APPEND _llama_runner__srcs
-  ${EXECUTORCH_ROOT}/extension/llm/tokenizers/src/tiktoken.cpp
-)
-list(APPEND _llama_runner__srcs
-     ${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp
-)
-
 if(CMAKE_TOOLCHAIN_IOS
    OR ANDROID
    OR APPLE
@@ -60,32 +52,8 @@ else()
   add_library(llama_runner SHARED ${_llama_runner__srcs})
 endif()
 
-# find RE2 for tokenizer, build tiktoken
-set(ABSL_ENABLE_INSTALL ON)
-set(ABSL_PROPAGATE_CXX_STD ON)
-set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE})
-set(CMAKE_POSITION_INDEPENDENT_CODE ON)
-add_subdirectory(
-  ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/abseil-cpp
-  ${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp
-)
-add_subdirectory(
-  ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/re2
-  ${CMAKE_CURRENT_BINARY_DIR}/re2
-)
-add_subdirectory(
-  ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/json
-  ${CMAKE_CURRENT_BINARY_DIR}/json
-)
-target_include_directories(llama_runner
-  PRIVATE ${CMAKE_INSTALL_PREFIX}/include
-  ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/llama.cpp-unicode/include
-  ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/llama.cpp-unicode/src
-)
-set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})
-
 set(llama_runner_deps executorch extension_data_loader extension_module
-                      extension_tensor re2::re2 nlohmann_json::nlohmann_json
+                      extension_tensor
 )
 
 target_link_libraries(llama_runner PUBLIC ${llama_runner_deps})
@@ -94,6 +62,17 @@ target_include_directories(
   llama_runner
   INTERFACE ${_common_include_directories}
 )
+
+# Include tokenizers dependency
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+add_subdirectory(
+  ${EXECUTORCH_ROOT}/extension/llm/tokenizers
+  ${CMAKE_CURRENT_BINARY_DIR}/tokenizers
+)
+target_link_libraries(
+  llama_runner PUBLIC tokenizers
+)
+
 target_include_directories(
   llama_runner
   PUBLIC ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
diff --git a/extension/llm/runner/CMakeLists.txt b/extension/llm/runner/CMakeLists.txt
index 55d599ff998..c71d8f319ec 100644
--- a/extension/llm/runner/CMakeLists.txt
+++ b/extension/llm/runner/CMakeLists.txt
@@ -59,4 +59,4 @@ target_include_directories(
 target_include_directories(
   extension_llm_runner INTERFACE ${_common_include_directories}
                                  ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
-)
+			       )

From fda4ea52aa673672693987c16b7fc0e1edbf7d97 Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Thu, 17 Apr 2025 17:08:49 -0700
Subject: [PATCH 07/16] Remove unused err

---
 examples/models/llama/runner/runner.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp
index f6ae6bb9dcd..654855d9ab4 100644
--- a/examples/models/llama/runner/runner.cpp
+++ b/examples/models/llama/runner/runner.cpp
@@ -41,7 +41,6 @@ static constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache";
 std::unique_ptr<::tokenizers::Tokenizer> load_tokenizer(
     const std::string& tokenizer_path) {
   std::unique_ptr<::tokenizers::Tokenizer> tokenizer = nullptr;
-  ::tokenizers::Error err;
 
   // First try to load as a json tokenizer.
   {

From aac48326494a5d3a979f81139619a79275cd216f Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Fri, 25 Apr 2025 13:16:58 -0700
Subject: [PATCH 08/16] Fix merge error

---
 examples/models/llama/runner/runner.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp
index 462dde2975f..f58225b5f8b 100644
--- a/examples/models/llama/runner/runner.cpp
+++ b/examples/models/llama/runner/runner.cpp
@@ -133,7 +133,7 @@ Error Runner::load() {
     tokenizer_.reset();
     // @lint-ignore CLANGTIDY facebook-hte-Deprecated
     tokenizer_ = std::make_unique<::tokenizers::Llama2cTokenizer>();
-    err = tokenizer_->load(tokenizer_path_);
+    auto err = tokenizer_->load(tokenizer_path_);
     ET_CHECK_TK_OK_OR_RETURN_ERROR(
         err,
         "Failed to load %s as a llama2.c tokenizer artifact",

From f0406e20631dfdb8533b79ae3af62075c5cde894 Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Sun, 27 Apr 2025 21:37:55 -0700
Subject: [PATCH 09/16] Pin bump tokenizers

---
 extension/llm/tokenizers | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extension/llm/tokenizers b/extension/llm/tokenizers
index 3f9c458586e..0ed8e2e3448 160000
--- a/extension/llm/tokenizers
+++ b/extension/llm/tokenizers
@@ -1 +1 @@
-Subproject commit 3f9c458586ee576a7ddafb48eb491f117187e178
+Subproject commit 0ed8e2e34486f119a87c15d000080c5e3eea7aea

From 7fdb8ae159822f85f5eeef0b9e9e9df9bad1922a Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Sun, 27 Apr 2025 21:38:05 -0700
Subject: [PATCH 10/16] Fix qnn build

---
 examples/qualcomm/CMakeLists.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/examples/qualcomm/CMakeLists.txt b/examples/qualcomm/CMakeLists.txt
index 7b2c43b3f46..994657a2013 100644
--- a/examples/qualcomm/CMakeLists.txt
+++ b/examples/qualcomm/CMakeLists.txt
@@ -82,6 +82,14 @@ add_subdirectory(
   ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/re2
   ${CMAKE_CURRENT_BINARY_DIR}/re2
 )
+add_subdirectory(
+  ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/pcre2
+  ${CMAKE_CURRENT_BINARY_DIR}/pcre2
+)
+add_subdirectory(
+  ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/json/single_include
+  ${CMAKE_CURRENT_BINARY_DIR}/json
+)
 set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})
 
 # build qnn_executor_runner

From cb12e06fd49d3a402cb11b5a0f2eb31f473f0919 Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Sun, 27 Apr 2025 22:00:36 -0700
Subject: [PATCH 11/16] Nevermind

---
 examples/qualcomm/CMakeLists.txt | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/examples/qualcomm/CMakeLists.txt b/examples/qualcomm/CMakeLists.txt
index 994657a2013..7b2c43b3f46 100644
--- a/examples/qualcomm/CMakeLists.txt
+++ b/examples/qualcomm/CMakeLists.txt
@@ -82,14 +82,6 @@ add_subdirectory(
   ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/re2
   ${CMAKE_CURRENT_BINARY_DIR}/re2
 )
-add_subdirectory(
-  ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/pcre2
-  ${CMAKE_CURRENT_BINARY_DIR}/pcre2
-)
-add_subdirectory(
-  ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/json/single_include
-  ${CMAKE_CURRENT_BINARY_DIR}/json
-)
 set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})
 
 # build qnn_executor_runner

From 8f0c5320c6cad3d8bc56686ce0e83762779af539 Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Sun, 27 Apr 2025 22:27:52 -0700
Subject: [PATCH 12/16] Try to fix qnn

---
 examples/qualcomm/CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/qualcomm/CMakeLists.txt b/examples/qualcomm/CMakeLists.txt
index 7b2c43b3f46..d140fb357e6 100644
--- a/examples/qualcomm/CMakeLists.txt
+++ b/examples/qualcomm/CMakeLists.txt
@@ -35,7 +35,7 @@ find_package(gflags REQUIRED)
 set(_common_compile_options -Wno-deprecated-declarations -fPIC)
 
 # Let files say "include <executorch/path/to/header.h>".
-set(_common_include_directories ${EXECUTORCH_ROOT}/..)
+set(_common_include_directories ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/json/single_include)
 
 #
 # The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.
@@ -67,6 +67,7 @@ target_include_directories(
   PUBLIC
     ${_common_include_directories}
     ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/include
+    ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/json/single_include
 )
 
 # find RE2 for tokenizer

From 846951ed54b7c6d99fd387b7e0ddc788d0e73ed5 Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Sun, 27 Apr 2025 22:46:39 -0700
Subject: [PATCH 13/16] Fix unicode in qnn

---
 examples/qualcomm/CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/qualcomm/CMakeLists.txt b/examples/qualcomm/CMakeLists.txt
index d140fb357e6..4f338a23044 100644
--- a/examples/qualcomm/CMakeLists.txt
+++ b/examples/qualcomm/CMakeLists.txt
@@ -68,6 +68,8 @@ target_include_directories(
     ${_common_include_directories}
     ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/include
     ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/json/single_include
+    ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/llama.cpp-unicode/include
+    ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/llama.cpp-unicode/src
 )
 
 # find RE2 for tokenizer

From ac83e12cebde146d317addd4ed8383107e2da1e2 Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Mon, 28 Apr 2025 08:18:13 -0700
Subject: [PATCH 14/16] Fix qaihub

---
 .../qualcomm/qaihub_scripts/llama/CMakeLists.txt   | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt b/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt
index f96d0169809..16d91013349 100644
--- a/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt
+++ b/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt
@@ -27,7 +27,12 @@ list(PREPEND _qaihub_llama2_7b_runner__srcs
 # build qaihub llama2 7b runner
 add_executable(qaihub_llama2_7b_runner ${_qaihub_llama2_7b_runner__srcs})
 target_include_directories(
-  qaihub_llama2_7b_runner PUBLIC ${_common_include_directories} ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
+  qaihub_llama2_7b_runner PUBLIC 
+    ${_common_include_directories} 
+    ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
+    ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/json/single_include
+    ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/include
+    ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/src
 )
 target_link_libraries(
   qaihub_llama2_7b_runner
@@ -69,7 +74,12 @@ list(
 # build qaihub llama3 8b runner
 add_executable(qaihub_llama3_8b_runner ${_qaihub_llama3_8b_runner__srcs})
 target_include_directories(
-  qaihub_llama3_8b_runner PUBLIC ${_common_include_directories} ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
+  qaihub_llama3_8b_runner PUBLIC 
+    ${_common_include_directories} 
+    ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
+    ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/json/single_include
+    ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/include
+    ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/src
 )
 
 target_link_libraries(

From c90896845825c416aedfcf1825fc3446c9425802 Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Mon, 28 Apr 2025 11:10:25 -0700
Subject: [PATCH 15/16] Fix local shadowing

---
 examples/models/llama/runner/runner.cpp | 36 +++++++++----------------
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp
index f58225b5f8b..534738521a5 100644
--- a/examples/models/llama/runner/runner.cpp
+++ b/examples/models/llama/runner/runner.cpp
@@ -40,33 +40,23 @@ static constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache";
 
 std::unique_ptr<::tokenizers::Tokenizer> load_tokenizer(
     const std::string& tokenizer_path) {
-  std::unique_ptr<::tokenizers::Tokenizer> tokenizer = nullptr;
-
-  // First try to load as a json tokenizer.
-  {
-    auto tokenizer = std::make_unique<tokenizers::HFTokenizer>();
-    if (tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
-      ET_LOG(Info, "Loaded json tokenizer");
-      return tokenizer;
-    }
+
+  auto json_tokenizer = std::make_unique<tokenizers::HFTokenizer>();
+  if (json_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
+    ET_LOG(Info, "Loaded json tokenizer");
+    return json_tokenizer;
   }
 
-  // Try to load as tiktoken tokenizer.
-  {
-    auto tokenizer = get_tiktoken_for_llama();
-    if (tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
-      ET_LOG(Info, "Loaded TikToken tokenizer");
-      return tokenizer;
-    }
+  auto tiktoken_tokenizer = get_tiktoken_for_llama();
+  if (tiktoken_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
+    ET_LOG(Info, "Loaded TikToken tokenizer");
+    return tiktoken_tokenizer;
   }
 
-  // Try to load as BPE tokenizer.
-  {
-    auto tokenizer = std::make_unique<::tokenizers::Llama2cTokenizer>();
-    if (tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
-      ET_LOG(Info, "Loaded BPE tokenizer");
-      return tokenizer;
-    }
+  auto bpe_tokenizer = std::make_unique<::tokenizers::Llama2cTokenizer>();
+  if (bpe_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
+    ET_LOG(Info, "Loaded BPE tokenizer");
+    return bpe_tokenizer;
   }
 
   return nullptr;

From 8fcd9f7368f9f84b5d1e89d62a57831aa50c9e41 Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Mon, 28 Apr 2025 21:10:14 -0700
Subject: [PATCH 16/16] Fix benchmark app

---
 .../Benchmark.xcodeproj/project.pbxproj       | 48 ++++++++++++++++++-
 1 file changed, 46 insertions(+), 2 deletions(-)

diff --git a/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj b/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj
index 9fc1d47cb22..dda5f2bc6fa 100644
--- a/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj
+++ b/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj
@@ -27,6 +27,13 @@
 		03F181582D7262FC0058BDF9 /* kernels_optimized in Frameworks */ = {isa = PBXBuildFile; productRef = 03F181572D7262FC0058BDF9 /* kernels_optimized */; };
 		03F1815A2D7262FC0058BDF9 /* kernels_portable in Frameworks */ = {isa = PBXBuildFile; productRef = 03F181592D7262FC0058BDF9 /* kernels_portable */; };
 		03F1815C2D7262FC0058BDF9 /* kernels_quantized in Frameworks */ = {isa = PBXBuildFile; productRef = 03F1815B2D7262FC0058BDF9 /* kernels_quantized */; };
+		30AA4B602DC0766800B1BE50 /* pcre2_regex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 30AA4B5A2DC0766800B1BE50 /* pcre2_regex.cpp */; };
+		30AA4B612DC0766800B1BE50 /* regex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 30AA4B5D2DC0766800B1BE50 /* regex.cpp */; };
+		30AA4B622DC0766800B1BE50 /* hf_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 30AA4B592DC0766800B1BE50 /* hf_tokenizer.cpp */; };
+		30AA4B632DC0766800B1BE50 /* token_decoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 30AA4B5F2DC0766800B1BE50 /* token_decoder.cpp */; };
+		30AA4B642DC0766800B1BE50 /* std_regex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 30AA4B5E2DC0766800B1BE50 /* std_regex.cpp */; };
+		30AA4B652DC0766800B1BE50 /* pre_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 30AA4B5B2DC0766800B1BE50 /* pre_tokenizer.cpp */; };
+		30AA4B662DC0766800B1BE50 /* re2_regex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 30AA4B5C2DC0766800B1BE50 /* re2_regex.cpp */; };
 		F292B01D2D88AF3500BE6839 /* bpe_tokenizer_base.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B0162D88AF3500BE6839 /* bpe_tokenizer_base.cpp */; };
 		F292B0202D88AF3500BE6839 /* llama2c_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B0172D88AF3500BE6839 /* llama2c_tokenizer.cpp */; };
 		F292B0212D88AF3500BE6839 /* tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B01A2D88AF3500BE6839 /* tiktoken.cpp */; };
@@ -73,6 +80,21 @@
 		03B2D3792C8A515C0046936E /* GenericTests.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = GenericTests.mm; sourceTree = "<group>"; };
 		03C7FA322C8AA24200E6E9AE /* Resources */ = {isa = PBXFileReference; lastKnownFileType = folder; path = Resources; sourceTree = SOURCE_ROOT; };
 		03E7E6782CBDC1C900205E71 /* CoreMLTests.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = CoreMLTests.mm; sourceTree = "<group>"; };
+		30593C332DC02ED100AB308C /* regex.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = regex.h; sourceTree = "<group>"; };
+		30593C342DC02EDD00AB308C /* re2_regex.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = re2_regex.h; sourceTree = "<group>"; };
+		30593C3D2DC02FD400AB308C /* pcre2_regex.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = pcre2_regex.h; sourceTree = "<group>"; };
+		30593C3E2DC02FD400AB308C /* std_regex.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = std_regex.h; sourceTree = "<group>"; };
+		30AA4B552DC0756E00B1BE50 /* hf_tokenizer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = hf_tokenizer.h; sourceTree = "<group>"; };
+		30AA4B562DC075CE00B1BE50 /* pre_tokenizer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = pre_tokenizer.h; sourceTree = "<group>"; };
+		30AA4B572DC0760200B1BE50 /* token_decoder.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = token_decoder.h; sourceTree = "<group>"; };
+		30AA4B582DC0760C00B1BE50 /* string_integer_map.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = string_integer_map.h; sourceTree = "<group>"; };
+		30AA4B592DC0766800B1BE50 /* hf_tokenizer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = hf_tokenizer.cpp; path = src/hf_tokenizer.cpp; sourceTree = "<group>"; };
+		30AA4B5A2DC0766800B1BE50 /* pcre2_regex.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = pcre2_regex.cpp; path = src/pcre2_regex.cpp; sourceTree = "<group>"; };
+		30AA4B5B2DC0766800B1BE50 /* pre_tokenizer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = pre_tokenizer.cpp; path = src/pre_tokenizer.cpp; sourceTree = "<group>"; };
+		30AA4B5C2DC0766800B1BE50 /* re2_regex.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = re2_regex.cpp; path = src/re2_regex.cpp; sourceTree = "<group>"; };
+		30AA4B5D2DC0766800B1BE50 /* regex.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = regex.cpp; path = src/regex.cpp; sourceTree = "<group>"; };
+		30AA4B5E2DC0766800B1BE50 /* std_regex.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = std_regex.cpp; path = src/std_regex.cpp; sourceTree = "<group>"; };
+		30AA4B5F2DC0766800B1BE50 /* token_decoder.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = token_decoder.cpp; path = src/token_decoder.cpp; sourceTree = "<group>"; };
 		F292B0162D88AF3500BE6839 /* bpe_tokenizer_base.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = bpe_tokenizer_base.cpp; path = src/bpe_tokenizer_base.cpp; sourceTree = "<group>"; };
 		F292B0172D88AF3500BE6839 /* llama2c_tokenizer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = llama2c_tokenizer.cpp; path = src/llama2c_tokenizer.cpp; sourceTree = "<group>"; };
 		F292B01A2D88AF3500BE6839 /* tiktoken.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = tiktoken.cpp; path = src/tiktoken.cpp; sourceTree = "<group>"; };
@@ -156,12 +178,19 @@
 		032A74022CAFBB7800932D36 /* tokenizers */ = {
 			isa = PBXGroup;
 			children = (
+				30AA4B592DC0766800B1BE50 /* hf_tokenizer.cpp */,
+				30AA4B5A2DC0766800B1BE50 /* pcre2_regex.cpp */,
+				30AA4B5B2DC0766800B1BE50 /* pre_tokenizer.cpp */,
+				30AA4B5C2DC0766800B1BE50 /* re2_regex.cpp */,
+				30AA4B5D2DC0766800B1BE50 /* regex.cpp */,
+				30AA4B5E2DC0766800B1BE50 /* std_regex.cpp */,
+				30AA4B5F2DC0766800B1BE50 /* token_decoder.cpp */,
 				F292B0302D88AF4800BE6839 /* include */,
 				F292B0162D88AF3500BE6839 /* bpe_tokenizer_base.cpp */,
+				032A74252CAFC34800932D36 /* llama_tiktoken.cpp */,
 				F292B0172D88AF3500BE6839 /* llama2c_tokenizer.cpp */,
 				F292B01A2D88AF3500BE6839 /* tiktoken.cpp */,
 				032A74242CAFC34800932D36 /* llama_tiktoken.h */,
-				032A74252CAFC34800932D36 /* llama_tiktoken.cpp */,
 			);
 			name = tokenizers;
 			path = ../../../llm/tokenizers;
@@ -222,12 +251,20 @@
 		F292B02E2D88AF4800BE6839 /* tokenizers */ = {
 			isa = PBXGroup;
 			children = (
+				30AA4B582DC0760C00B1BE50 /* string_integer_map.h */,
+				30AA4B572DC0760200B1BE50 /* token_decoder.h */,
+				30AA4B562DC075CE00B1BE50 /* pre_tokenizer.h */,
+				30AA4B552DC0756E00B1BE50 /* hf_tokenizer.h */,
 				F292B0222D88AF4800BE6839 /* base64.h */,
 				F292B0232D88AF4800BE6839 /* bpe_tokenizer_base.h */,
 				F292B0242D88AF4800BE6839 /* error.h */,
 				F292B0262D88AF4800BE6839 /* llama2c_tokenizer.h */,
 				F292B0272D88AF4800BE6839 /* log.h */,
+				30593C3D2DC02FD400AB308C /* pcre2_regex.h */,
+				30593C342DC02EDD00AB308C /* re2_regex.h */,
+				30593C332DC02ED100AB308C /* regex.h */,
 				F292B0292D88AF4800BE6839 /* result.h */,
+				30593C3E2DC02FD400AB308C /* std_regex.h */,
 				F292B02B2D88AF4800BE6839 /* tiktoken.h */,
 				F292B02D2D88AF4800BE6839 /* tokenizer.h */,
 			);
@@ -357,7 +394,7 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 			shellPath = /bin/sh;
-			shellScript = "set -e\n\nif ! command -v cmake &> /dev/null\nthen\n    echo \"Cmake not found, please install Cmake. \\n1. Download Cmake.app from https://cmake.org/download with version > 3.19. \\n2. Install it to Applications/ folder and run sudo /Applications/CMake.app/Contents/bin/cmake-gui --install to install CMake commandline tools.\"\n    exit 1\nfi\n\nCMAKE_DIR=\"$TEMP_DIR/cmake\"\nrm -rf \"$CMAKE_DIR\"\n\nPLATFORM=\"SIMULATORARM64\"\nDEPLOYMENT_TARGET=\"17.0\"\n\nif [[ \"$PLATFORM_NAME\" == *\"iphoneos\"* ]]; then\n  PLATFORM=\"OS64\"\nelif [[ \"$PLATFORM_NAME\" == *\"macos\"* ]]; then\n  PLATFORM=\"MAC_ARM64\"\n  DEPLOYMENT_TARGET=\"10.15\"\nfi\n\ncmake_build() {\n    local src_dir=$1\n    local target=$2\n    shift 2\n    local extra_args=(\"$@\")\n    local build_dir=\"$CMAKE_DIR/build/$(basename \"$src_dir\")\"\n\n    mkdir -p \"$build_dir\" && cd \"$build_dir\"\n\n    if [[ \"$PLATFORM\" == \"MAC_ARM64\" ]]; then\n        extra_args+=(-DCMAKE_INSTALL_BUNDLEDIR=\"${CMAKE_DIR}/bin\")\n        extra_args+=(-DCMAKE_MACOSX_BUNDLE=OFF)\n    fi\n    cmake -G Xcode \\\n          -DCMAKE_BUILD_TYPE=\"Release\" \\\n          -DCMAKE_CXX_STANDARD=17 \\\n          -DCMAKE_TOOLCHAIN_FILE=\"$SRCROOT/../../../../third-party/ios-cmake/ios.toolchain.cmake\" \\\n          -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD=\"c++17\" \\\n          -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY=\"libc++\" \\\n          -DPLATFORM=\"$PLATFORM\" \\\n          -DDEPLOYMENT_TARGET=\"$DEPLOYMENT_TARGET\" \\\n          -DCMAKE_INSTALL_PREFIX=\"$CMAKE_DIR\" \\\n          \"${extra_args[@]}\" \\\n          \"$src_dir\"\n    cmake --build . --config \"Release\" --target \"$target\"\n    if [[ \"$target\" == \"install\" ]]; then\n        cmake --install . --prefix \"$CMAKE_DIR\"\n    fi\n}\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/abseil-cpp\" \"install\" \\\n    -DABSL_PROPAGATE_CXX_STD=ON\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/re2\" \"install\"\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/sentencepiece\" \"sentencepiece-static\" \\\n    -DSPM_ENABLE_SHARED=OFF\n\necho \"$(find $CMAKE_DIR/lib -name \"*.a\" | sed -E 's|^.*/lib([^/]+)\\.a|-l\\1|g' | tr '\\n' ' ')\" > \"$CMAKE_DIR/linker_flags\"\n";
+			shellScript = "set -e\n\nif ! command -v cmake &> /dev/null\nthen\n    echo \"Cmake not found, please install Cmake. \\n1. Download Cmake.app from https://cmake.org/download with version > 3.19. \\n2. Install it to Applications/ folder and run sudo /Applications/CMake.app/Contents/bin/cmake-gui --install to install CMake commandline tools.\"\n    exit 1\nfi\n\nCMAKE_DIR=\"$TEMP_DIR/cmake\"\nrm -rf \"$CMAKE_DIR\"\n\nPLATFORM=\"SIMULATORARM64\"\nDEPLOYMENT_TARGET=\"17.0\"\n\nif [[ \"$PLATFORM_NAME\" == *\"iphoneos\"* ]]; then\n  PLATFORM=\"OS64\"\nelif [[ \"$PLATFORM_NAME\" == *\"macos\"* ]]; then\n  PLATFORM=\"MAC_ARM64\"\n  DEPLOYMENT_TARGET=\"10.15\"\nfi\n\ncmake_build() {\n    local src_dir=$1\n    local target=$2\n    shift 2\n    local extra_args=(\"$@\")\n    local build_dir=\"$CMAKE_DIR/build/$(basename \"$src_dir\")\"\n\n    mkdir -p \"$build_dir\" && cd \"$build_dir\"\n\n    if [[ \"$PLATFORM\" == \"MAC_ARM64\" ]]; then\n        extra_args+=(-DCMAKE_INSTALL_BUNDLEDIR=\"${CMAKE_DIR}/bin\")\n        extra_args+=(-DCMAKE_MACOSX_BUNDLE=OFF)\n    fi\n    cmake -G Xcode \\\n          -DCMAKE_BUILD_TYPE=\"Release\" \\\n          -DCMAKE_CXX_STANDARD=17 \\\n          -DCMAKE_TOOLCHAIN_FILE=\"$SRCROOT/../../../../third-party/ios-cmake/ios.toolchain.cmake\" \\\n          -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD=\"c++17\" \\\n          -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY=\"libc++\" \\\n          -DPLATFORM=\"$PLATFORM\" \\\n          -DDEPLOYMENT_TARGET=\"$DEPLOYMENT_TARGET\" \\\n          -DCMAKE_INSTALL_PREFIX=\"$CMAKE_DIR\" \\\n          \"${extra_args[@]}\" \\\n          \"$src_dir\"\n    cmake --build . --config \"Release\" --target \"$target\"\n    if [[ \"$target\" == \"install\" ]]; then\n        cmake --install . --prefix \"$CMAKE_DIR\"\n    fi\n}\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/abseil-cpp\" \"install\" \\\n    -DABSL_PROPAGATE_CXX_STD=ON\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/re2\" \"install\"\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/pcre2\" \"install\" \\\n    -DPCRE2_BUILD_PCRE2_8=ON \\\n    -DPCRE2_BUILD_PCRE2_16=OFF \\\n    -DPCRE2_BUILD_PCRE2_32=OFF \\\n    -DPCRE2_BUILD_TESTS=OFF \\\n    -DPCRE2_BUILD_PCRE2GREP=OFF \\\n    -DPCRE2_BUILD_PCRE2TEST=OFF \\\n    -DPCRE2_BUILD_PCRE2GPERF=OFF \\\n    -DPCRE2_BUILD_DOCS=OFF \\\n    -DPCRE2_BUILD_LIBPCRE2_PDB=OFF\n    \ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/sentencepiece\" \"sentencepiece-static\" \\\n    -DSPM_ENABLE_SHARED=OFF\n    \ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/llama.cpp-unicode\" \"install\"\n    \n# Include the single header for json.\nmkdir -p \"$CMAKE_DIR/include/nlohmann\"\ncp \"$SRCROOT/../../../llm/tokenizers/third-party/json/single_include/nlohmann/json.hpp\" \"$CMAKE_DIR/include/nlohmann/json.hpp\"\n\necho \"$(find $CMAKE_DIR/lib -name \"*.a\" | sed -E 's|^.*/lib([^/]+)\\.a|-l\\1|g' | tr '\\n' ' ')\" > \"$CMAKE_DIR/linker_flags\"\n";
 		};
 /* End PBXShellScriptBuildPhase section */
 
@@ -385,6 +422,13 @@
 				03E7E6792CBDCAE900205E71 /* CoreMLTests.mm in Sources */,
 				032A74232CAFC1B300932D36 /* runner.cpp in Sources */,
 				03B2D37A2C8A515C0046936E /* GenericTests.mm in Sources */,
+				30AA4B602DC0766800B1BE50 /* pcre2_regex.cpp in Sources */,
+				30AA4B612DC0766800B1BE50 /* regex.cpp in Sources */,
+				30AA4B622DC0766800B1BE50 /* hf_tokenizer.cpp in Sources */,
+				30AA4B632DC0766800B1BE50 /* token_decoder.cpp in Sources */,
+				30AA4B642DC0766800B1BE50 /* std_regex.cpp in Sources */,
+				30AA4B652DC0766800B1BE50 /* pre_tokenizer.cpp in Sources */,
+				30AA4B662DC0766800B1BE50 /* re2_regex.cpp in Sources */,
 				032A73CA2CAFBA8600932D36 /* LLaMATests.mm in Sources */,
 				032A74262CAFC34800932D36 /* llama_tiktoken.cpp in Sources */,
 			);