diff --git a/.gitignore b/.gitignore
index 2377dfe4f..0905bbd98 100644
--- a/.gitignore
+++ b/.gitignore
@@ -38,7 +38,7 @@ out/
 .scb/
 onnxruntime_extensions/_version.py
 onnxruntime-*-*-*/
-temp_*.onnx
+temp_*onnx*
 # Java specific ignores
 */.gradle
 java/hs_*.log
diff --git a/.pipelines/ci.yml b/.pipelines/ci.yml
index a14653aa3..5a50296a6 100644
--- a/.pipelines/ci.yml
+++ b/.pipelines/ci.yml
@@ -478,3 +478,19 @@ jobs:
             echo "Exception propogation was not enabled correctly."
             exit 1
           fi
+
+
+  ##############################
+  # Linux for selected_ops build
+  ##############################
+  - job: Linux_SelectedOpsBuild
+    pool:
+      vmImage: 'ubuntu-latest'
+
+    steps:
+      # compiled as only one operator selected.
+      - bash: |
+          set -e -x -u
+          echo 'set (OCOS_ENABLE_BERT_TOKENIZER ON CACHE BOOL "" FORCE)' > cmake/_selectedoplist.cmake
+          ./build.sh -DOCOS_ENABLE_CPP_EXCEPTIONS=OFF -DOCOS_ENABLE_SELECTED_OPLIST=ON
+        displayName: Build ort-extensions with only one operator was selected
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 38be4d988..db5d5846f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -208,6 +208,9 @@ endif()
 
 if(NOT OCOS_ENABLE_CPP_EXCEPTIONS)
   add_compile_definitions(OCOS_NO_EXCEPTIONS ORT_NO_EXCEPTIONS)
+  if (NOT _ONNXRUNTIME_EMBEDDED)
+    add_compile_definitions(_HAS_EXCEPTIONS=0)
+  endif()
 endif()
 
 include(FetchContent)
@@ -254,7 +257,7 @@ if(OCOS_ENABLE_RE2_REGEX)
 endif()
 
 # ### scan all source files
-set(TARGET_SRC_NOEXCEPTION)
+file(GLOB TARGET_SRC_NOEXCEPTION "base/*.h" "base/*.cc")
 file(GLOB TARGET_SRC "operators/*.cc" "operators/*.h" "includes/*.h*")
 
 if(OCOS_ENABLE_TF_STRING)
@@ -402,11 +405,13 @@ standardize_output_folder(ocos_operators)
 target_include_directories(noexcep_operators PUBLIC
   ${ONNXRUNTIME_INCLUDE_DIR}
   ${PROJECT_SOURCE_DIR}/includes
+  ${PROJECT_SOURCE_DIR}/base
   ${PROJECT_SOURCE_DIR}/operators)
 
 target_include_directories(ocos_operators PUBLIC
   ${ONNXRUNTIME_INCLUDE_DIR}
   ${PROJECT_SOURCE_DIR}/includes
+  ${PROJECT_SOURCE_DIR}/base
   ${PROJECT_SOURCE_DIR}/operators
   ${PROJECT_SOURCE_DIR}/operators/tokenizer)
 
diff --git a/operators/base64.cc b/base/base64.cc
similarity index 100%
rename from operators/base64.cc
rename to base/base64.cc
diff --git a/operators/base64.h b/base/base64.h
similarity index 100%
rename from operators/base64.h
rename to base/base64.h
diff --git a/operators/narrow.h b/base/narrow.h
similarity index 100%
rename from operators/narrow.h
rename to base/narrow.h
diff --git a/operators/ocos.cc b/base/ocos.cc
similarity index 100%
rename from operators/ocos.cc
rename to base/ocos.cc
diff --git a/operators/string_tensor.cc b/base/string_tensor.cc
similarity index 100%
rename from operators/string_tensor.cc
rename to base/string_tensor.cc
index 9e832c1c5..569a39e7b 100644
--- a/operators/string_tensor.cc
+++ b/base/string_tensor.cc
@@ -1,8 +1,8 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
+#include "string_tensor.h"
 #include "string_utils.h"
 #include "ustring.h"
-#include "string_tensor.h"
 
 void GetTensorMutableDataString(const OrtApi& api, OrtW::CustomOpApi& ort, OrtKernelContext* context,
                                 const OrtValue* value, std::vector<std::string>& output) {
diff --git a/operators/string_tensor.h b/base/string_tensor.h
similarity index 100%
rename from operators/string_tensor.h
rename to base/string_tensor.h
index 6b99fe1c0..469db8da8 100644
--- a/operators/string_tensor.h
+++ b/base/string_tensor.h
@@ -3,8 +3,8 @@
 
 #pragma once
 
-#include <string>
 #include "ocos.h"
+#include <string>
 
 
 // Retrieves a vector of strings if the input type is std::string.
diff --git a/operators/string_utils.cc b/base/string_utils.cc
similarity index 100%
rename from operators/string_utils.cc
rename to base/string_utils.cc
diff --git a/operators/string_utils.h b/base/string_utils.h
similarity index 100%
rename from operators/string_utils.h
rename to base/string_utils.h
diff --git a/operators/ustring.cc b/base/ustring.cc
similarity index 99%
rename from operators/ustring.cc
rename to base/ustring.cc
index 1a06392bf..9ac9a8eb0 100644
--- a/operators/ustring.cc
+++ b/base/ustring.cc
@@ -1,7 +1,8 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
-#include <iostream>
 #include "ustring.h"
+#include <iostream>
+
 
 ustring::ustring() : std::u32string() {
 }
diff --git a/operators/ustring.h b/base/ustring.h
similarity index 94%
rename from operators/ustring.h
rename to base/ustring.h
index 9e960ee70..e20f90210 100644
--- a/operators/ustring.h
+++ b/base/ustring.h
@@ -1,15 +1,12 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 #pragma once
-#include <string>
-#include <locale>
-#include <functional>
-
-#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING 1
-#include <codecvt>
 
 #include "ocos.h"
+#include <locale>
+#include <codecvt>
 
+// ustring needs a new implementation, due to the std::codecvt deprecation.
 // Wrap u32string with ustring, in case we will use other implementation in the future
 class ustring : public std::u32string {
  public:
diff --git a/includes/ocos.h b/includes/ocos.h
index b61d1c585..32be5a69a 100644
--- a/includes/ocos.h
+++ b/includes/ocos.h
@@ -3,6 +3,8 @@
 
 #pragma once
 
+#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
+#include <string>
 #include <algorithm>
 #include <functional>
 #include <iterator>
diff --git a/operators/tokenizer/basic_tokenizer.cc b/operators/tokenizer/basic_tokenizer.cc
index d57ae1d4b..ce87c67e7 100644
--- a/operators/tokenizer/basic_tokenizer.cc
+++ b/operators/tokenizer/basic_tokenizer.cc
@@ -1,12 +1,11 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#include "string_utils.h"
 #include "basic_tokenizer.hpp"
+#include "string_utils.h"
 #include "string_tensor.h"
 #include <vector>
 #include <locale>
-#include <codecvt>
 #include <algorithm>
 
 BasicTokenizer::BasicTokenizer(bool do_lower_case, bool tokenize_chinese_chars, bool strip_accents,
diff --git a/operators/tokenizer/bert_tokenizer.hpp b/operators/tokenizer/bert_tokenizer.hpp
index 7b3f320b7..4faddd067 100644
--- a/operators/tokenizer/bert_tokenizer.hpp
+++ b/operators/tokenizer/bert_tokenizer.hpp
@@ -3,14 +3,15 @@
 
 #pragma once
 
-#include <unordered_map>
-#include <vector>
 #include "ocos.h"
 #include "ustring.h"
 #include "string_utils.h"
 #include "string_tensor.h"
 #include "basic_tokenizer.hpp"
 
+#include <unordered_map>
+
+
 class BertTokenizerVocab final {
  public:
   explicit BertTokenizerVocab(std::string_view vocab);
diff --git a/operators/tokenizer/bert_tokenizer_decoder.hpp b/operators/tokenizer/bert_tokenizer_decoder.hpp
index a0863d434..c1e20b961 100644
--- a/operators/tokenizer/bert_tokenizer_decoder.hpp
+++ b/operators/tokenizer/bert_tokenizer_decoder.hpp
@@ -3,8 +3,6 @@
 
 #pragma once
 
-#include <unordered_map>
-#include <vector>
 #include "ocos.h"
 #include "ustring.h"
 #include "string_utils.h"
diff --git a/operators/tokenizer/bpetokenizer.hpp b/operators/tokenizer/bpetokenizer.hpp
index 269ad8119..fcc8e1f63 100644
--- a/operators/tokenizer/bpetokenizer.hpp
+++ b/operators/tokenizer/bpetokenizer.hpp
@@ -1,28 +1,58 @@
 // Licensed under the MIT License.
 // Partial code comes from other Microsoft employee.
 #pragma once
-#include <string>
-#include <vector>
-#include <fstream>
-#include <sstream>
-#include <iostream>
-#include <algorithm>
-#include <list>
-#include <memory>
+#include "ocos.h"
+#include "ustring.h"
+
 #include <regex>
-#include <sstream>
-#include <stdexcept>
+#include <list>
 #include <unordered_map>
-#include <functional>
-#include <codecvt>
-#include <mutex>
 
+#include "unicode.h"
 #include "nlohmann/json.hpp"
-#include "clip_tokenizer.hpp"
-#include "gpt2_tokenizer.hpp"
-#include "roberta_tokenizer.hpp"
+#include "string_utils.h"
 #include "string_tensor.h"
-#include "unicode.h"
+
+// Note: the following logic comes from CPython: unicodetype_db.h (_PyUnicode_IsWhitespace)
+inline bool IsUnicodeSpace(char32_t ch) {
+  switch (ch) {
+    case 0x0009:
+    case 0x000A:
+    case 0x000B:
+    case 0x000C:
+    case 0x000D:
+    case 0x001C:
+    case 0x001D:
+    case 0x001E:
+    case 0x001F:
+    case 0x0020:
+    case 0x0085:
+    case 0x00A0:
+    case 0x1680:
+    case 0x2000:
+    case 0x2001:
+    case 0x2002:
+    case 0x2003:
+    case 0x2004:
+    case 0x2005:
+    case 0x2006:
+    case 0x2007:
+    case 0x2008:
+    case 0x2009:
+    case 0x200A:
+    case 0x2028:
+    case 0x2029:
+    case 0x202F:
+    case 0x205F:
+    case 0x3000:
+      return true;
+  }
+  return false;
+}
+
+inline bool IsEmptyUString(const ustring& str) {
+  return std::all_of(str.begin(), str.end(), [](char32_t ch) { return IsUnicodeSpace(ch); });
+}
 
 class SpecialTokenMap {
  public:
@@ -117,7 +147,6 @@ class VocabData {
     } else {
       int id = static_cast<int>(vocab_map_.size());
       vocab_map_[unk_token] = id;
-      std::cerr << "Special token (" << unk_token << ") have been added in the vocabulary." << std::endl;
     }
 
     std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> str_convert;
diff --git a/operators/tokenizer/clip_tokenizer.cc b/operators/tokenizer/clip_tokenizer.cc
index e123a62f2..1f565c820 100644
--- a/operators/tokenizer/clip_tokenizer.cc
+++ b/operators/tokenizer/clip_tokenizer.cc
@@ -1,68 +1,8 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 // Partial code comes from other Microsoft employee.
-
-#include <string>
-#include <vector>
-#include <fstream>
-#include <sstream>
-#include <iostream>
-#include <algorithm>
-#include <list>
-#include <memory>
-#include <regex>
-#include <sstream>
-#include <stdexcept>
-#include <unordered_map>
-#include <functional>
-#include <codecvt>
-#include <mutex>
-
-#include "nlohmann/json.hpp"
-#include "bpetokenizer.hpp"
-#include "string_tensor.h"
-#include "unicode.h"
-
-// Note: the following logic comes from CPython: unicodetype_db.h (_PyUnicode_IsWhitespace)
-bool IsInUnicodeSpace(char32_t ch) {
-  switch (ch) {
-    case 0x0009:
-    case 0x000A:
-    case 0x000B:
-    case 0x000C:
-    case 0x000D:
-    case 0x001C:
-    case 0x001D:
-    case 0x001E:
-    case 0x001F:
-    case 0x0020:
-    case 0x0085:
-    case 0x00A0:
-    case 0x1680:
-    case 0x2000:
-    case 0x2001:
-    case 0x2002:
-    case 0x2003:
-    case 0x2004:
-    case 0x2005:
-    case 0x2006:
-    case 0x2007:
-    case 0x2008:
-    case 0x2009:
-    case 0x200A:
-    case 0x2028:
-    case 0x2029:
-    case 0x202F:
-    case 0x205F:
-    case 0x3000:
-      return true;
-  }
-  return false;
-}
-
-bool IsEmptyUstring(const ustring& str) {
-  return std::all_of(str.begin(), str.end(), [](char32_t ch) { return IsInUnicodeSpace(ch); });
-}
+#include "clip_tokenizer.hpp"
+#include "string_utils.h"
 
 KernelClipBpeTokenizer::KernelClipBpeTokenizer(const OrtApi& api, const OrtKernelInfo& info)
     : BaseKernel(api, info) {
@@ -93,7 +33,7 @@ KernelClipBpeTokenizer::KernelClipBpeTokenizer(const OrtApi& api, const OrtKerne
 std::vector<int64_t> KernelClipBpeTokenizer::Tokenize(ustring& input, int64_t max_length) {
   std::vector<int64_t> res;
 
-  if (IsEmptyUstring(input)) {
+  if (IsEmptyUString(input)) {
     return res;
   }
   // Add <|startoftext|> token to result
diff --git a/operators/tokenizer/clip_tokenizer.hpp b/operators/tokenizer/clip_tokenizer.hpp
index 8489e83a6..d5387919c 100644
--- a/operators/tokenizer/clip_tokenizer.hpp
+++ b/operators/tokenizer/clip_tokenizer.hpp
@@ -1,9 +1,8 @@
-#include <list>
-#include "ocos.h"
-#include "ustring.h"
-#include "string_utils.h"
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
 
-class VocabData;
+#pragma once
+#include "bpetokenizer.hpp"
 
 struct KernelClipBpeTokenizer : BaseKernel {
   KernelClipBpeTokenizer(const OrtApi& api, const OrtKernelInfo& info);
diff --git a/operators/tokenizer/gpt2_tokenizer.cc b/operators/tokenizer/gpt2_tokenizer.cc
index 5aba4a477..2a3234c57 100644
--- a/operators/tokenizer/gpt2_tokenizer.cc
+++ b/operators/tokenizer/gpt2_tokenizer.cc
@@ -2,66 +2,8 @@
 // Licensed under the MIT License.
 // Partial code comes from other Microsoft employee.
 
-#include <string>
-#include <vector>
-#include <fstream>
-#include <sstream>
-#include <iostream>
-#include <list>
-#include <memory>
-#include <regex>
-#include <sstream>
-#include <stdexcept>
-#include <unordered_map>
-#include <functional>
-#include <codecvt>
-#include <mutex>
-
-#include "nlohmann/json.hpp"
-#include "bpetokenizer.hpp"
-#include "string_tensor.h"
-#include "unicode.h"
-
-// Note: the following logic comes from CPython: unicodetype_db.h (_PyUnicode_IsWhitespace)
-bool IsUnicodeSpace(char32_t ch) {
-  switch (ch) {
-    case 0x0009:
-    case 0x000A:
-    case 0x000B:
-    case 0x000C:
-    case 0x000D:
-    case 0x001C:
-    case 0x001D:
-    case 0x001E:
-    case 0x001F:
-    case 0x0020:
-    case 0x0085:
-    case 0x00A0:
-    case 0x1680:
-    case 0x2000:
-    case 0x2001:
-    case 0x2002:
-    case 0x2003:
-    case 0x2004:
-    case 0x2005:
-    case 0x2006:
-    case 0x2007:
-    case 0x2008:
-    case 0x2009:
-    case 0x200A:
-    case 0x2028:
-    case 0x2029:
-    case 0x202F:
-    case 0x205F:
-    case 0x3000:
-      return true;
-  }
-  return false;
-}
+#include "gpt2_tokenizer.hpp"
 
-bool IsEmptyUString(const ustring& str) {
-  return std::all_of(str.begin(), str.end(), [](char32_t ch) { return IsUnicodeSpace(ch); });
-}
 
 KernelBpeTokenizer::KernelBpeTokenizer(const OrtApi& api, const OrtKernelInfo& info)
     : BaseKernel(api, info) {
@@ -200,17 +142,3 @@ size_t CustomOpBpeTokenizer::GetOutputTypeCount() const {
 ONNXTensorElementDataType CustomOpBpeTokenizer::GetOutputType(size_t /*index*/) const {
   return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
 }
-
-const OrtCustomOp** LoadTokenizerSchemaList() {
-  // create the global objects here to let the ORT catch the expection if any
-  static std::unique_ptr<CustomOpBpeTokenizer> p_CoBpeTokenizer;
-  static const OrtCustomOp* c_CustomOpList[2] = {nullptr};  // {&c_CoBpeTokenizer, nullptr};
-  static std::mutex mtx_loaded;
-  std::lock_guard<std::mutex> lck(mtx_loaded);
-  if (p_CoBpeTokenizer.get() == nullptr) {
-    p_CoBpeTokenizer = std::make_unique<CustomOpBpeTokenizer>();
-    c_CustomOpList[0] = p_CoBpeTokenizer.get();
-  }
-
-  return c_CustomOpList;
-}
diff --git a/operators/tokenizer/gpt2_tokenizer.hpp b/operators/tokenizer/gpt2_tokenizer.hpp
index ed1625d98..31b2bd2d2 100644
--- a/operators/tokenizer/gpt2_tokenizer.hpp
+++ b/operators/tokenizer/gpt2_tokenizer.hpp
@@ -1,8 +1,8 @@
-#include <list>
-#include "ocos.h"
-#include "ustring.h"
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
 
-class VocabData;
+#pragma once
+#include "bpetokenizer.hpp"
 
 struct KernelBpeTokenizer : BaseKernel {
   KernelBpeTokenizer(const OrtApi& api, const OrtKernelInfo& info);
diff --git a/operators/tokenizer/roberta_tokenizer.cc b/operators/tokenizer/roberta_tokenizer.cc
index 4737c054e..c886ce7e7 100644
--- a/operators/tokenizer/roberta_tokenizer.cc
+++ b/operators/tokenizer/roberta_tokenizer.cc
@@ -2,67 +2,9 @@
 // Licensed under the MIT License.
 // Partial code comes from other Microsoft employee.
 
-#include <string>
-#include <vector>
-#include <fstream>
-#include <sstream>
-#include <iostream>
-#include <algorithm>
-#include <list>
-#include <memory>
-#include <regex>
-#include <sstream>
-#include <stdexcept>
-#include <unordered_map>
-#include <functional>
-#include <codecvt>
-#include <mutex>
-
-#include "nlohmann/json.hpp"
-#include "bpetokenizer.hpp"
-#include "string_tensor.h"
-#include "unicode.h"
-
-// Note: the following logic comes from CPython: unicodetype_db.h (_PyUnicode_IsWhitespace)
-bool IsWithinUnicodeSpace(char32_t ch) {
-  switch (ch) {
-    case 0x0009:
-    case 0x000A:
-    case 0x000B:
-    case 0x000C:
-    case 0x000D:
-    case 0x001C:
-    case 0x001D:
-    case 0x001E:
-    case 0x001F:
-    case 0x0020:
-    case 0x0085:
-    case 0x00A0:
-    case 0x1680:
-    case 0x2000:
-    case 0x2001:
-    case 0x2002:
-    case 0x2003:
-    case 0x2004:
-    case 0x2005:
-    case 0x2006:
-    case 0x2007:
-    case 0x2008:
-    case 0x2009:
-    case 0x200A:
-    case 0x2028:
-    case 0x2029:
-    case 0x202F:
-    case 0x205F:
-    case 0x3000:
-      return true;
-  }
-  return false;
-}
+#include "roberta_tokenizer.hpp"
+#include "narrow.h"
 
-bool IsEmptyuString(const ustring& str) {
-  return std::all_of(str.begin(), str.end(), [](char32_t ch) { return IsWithinUnicodeSpace(ch); });
-}
 
 KernelRobertaBpeTokenizer::KernelRobertaBpeTokenizer(const OrtApi& api, const OrtKernelInfo& info)
     : BaseKernel(api, info) {
@@ -90,10 +32,10 @@ KernelRobertaBpeTokenizer::KernelRobertaBpeTokenizer(const OrtApi& api, const Or
   bbpe_tokenizer_->Load(vocabu_stream, merges_stream, "<|endoftext|>", "<|endoftext|>");
 }
 
-std::vector<int64_t> KernelRobertaBpeTokenizer::Tokenize(ustring& input, int64_t max_length, std::list<std::list<std::pair<int, int>>>& offset_map) {
+std::vector<int64_t> KernelRobertaBpeTokenizer::Tokenize(ustring& input, int64_t max_length, std::list<OffsetMappingType>& offset_map) {
   std::vector<int64_t> res;
 
-  if (IsEmptyuString(input)) {
+  if (IsEmptyUString(input)) {
     return res;
   }
   // Add BOS token to result
@@ -116,8 +58,8 @@ std::vector<int64_t> KernelRobertaBpeTokenizer::Tokenize(ustring& input, int64_t
     const char32_t* ptr = cur_input.c_str();
     regcmp.Set(ptr);
 
-    int offset = 0;
-    std::list<std::pair<int, int>> offset_mapping;
+    size_t offset = 0;
+    OffsetMappingType offset_mapping;
 
     // Add offset mapping for BOS token
     offset_mapping.push_back(std::make_pair(0, 0));
@@ -130,16 +72,16 @@ std::vector<int64_t> KernelRobertaBpeTokenizer::Tokenize(ustring& input, int64_t
 
       // Handle offset mapping and special cases
       if (utf8_token.at(0) == ' ') {
-        offset_mapping.push_back(std::make_pair(offset + 1, offset + utf8_token.size()));
+        offset_mapping.emplace_back(std::make_pair(offset + 1, ort_extensions::narrow<size_t>(offset + utf8_token.size())));
       } else {
-        offset_mapping.push_back(std::make_pair(offset, offset + utf8_token.size()));
+        offset_mapping.emplace_back(std::make_pair(offset, ort_extensions::narrow<size_t>(offset + utf8_token.size())));
       }
       offset += utf8_token.size();
 
       // Get byte encodings prior to performing BPE
       byte_list_.clear();
       for (char& cp : utf8_token) {
-        byte_list_.push_back(bbpe_tokenizer_->ByteEncoder()[static_cast<unsigned char>(cp)]);
+        byte_list_.emplace_back(bbpe_tokenizer_->ByteEncoder()[static_cast<unsigned char>(cp)]);
       }
 
       // Perform BPE
@@ -155,13 +97,13 @@ std::vector<int64_t> KernelRobertaBpeTokenizer::Tokenize(ustring& input, int64_t
       }
     }
     // Add offset mapping for EOS token
-    offset_mapping.push_back(std::make_pair(0, 0));
+    offset_mapping.emplace_back(std::make_pair(0, 0));
 
     // Add offset mappings for input in this instance to list of offset mappings for all inputs
-    offset_map.push_back(offset_mapping);
+    offset_map.emplace_back(offset_mapping);
   }
   // Add EOS token to result
-  res.push_back(bbpe_tokenizer_->GetEncoding("</s>"));
+  res.emplace_back(bbpe_tokenizer_->GetEncoding("</s>"));
   return res;
 }
 
@@ -169,7 +111,7 @@ void KernelRobertaBpeTokenizer::Compute(OrtKernelContext* context) {
   // Setup inputs
   const OrtValue* input = ort_.KernelContext_GetInput(context, 0);
   std::vector<std::string> str_input;
-  std::list<std::list<std::pair<int, int>>> offset_map;
+  std::list<OffsetMappingType> offset_map;
   GetTensorMutableDataString(api_, ort_, context, input, str_input);
   OrtTensorDimensions input_dim(ort_, input);
 
diff --git a/operators/tokenizer/roberta_tokenizer.hpp b/operators/tokenizer/roberta_tokenizer.hpp
index e0252a304..b499b6866 100644
--- a/operators/tokenizer/roberta_tokenizer.hpp
+++ b/operators/tokenizer/roberta_tokenizer.hpp
@@ -1,16 +1,16 @@
-#include <list>
-#include "ocos.h"
-#include "ustring.h"
-#include "string_utils.h"
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
 
-class VocabData;
+#pragma once
+#include "bpetokenizer.hpp"
 
 struct KernelRobertaBpeTokenizer : BaseKernel {
   KernelRobertaBpeTokenizer(const OrtApi& api, const OrtKernelInfo& info);
   void Compute(OrtKernelContext* context);
 
  private:
-  std::vector<int64_t> Tokenize(ustring& input, int64_t max_length, std::list<std::list<std::pair<int, int>>>& offset_map);
+  using OffsetMappingType = std::list<std::pair<size_t, size_t>>;
+  std::vector<int64_t> Tokenize(ustring& input, int64_t max_length, std::list<OffsetMappingType>& offset_map);
 
   int64_t padding_length_;
   std::list<int> byte_list_;
diff --git a/operators/tokenizer/wordpiece_tokenizer.hpp b/operators/tokenizer/wordpiece_tokenizer.hpp
index 1bdb8d893..a34b82cdb 100644
--- a/operators/tokenizer/wordpiece_tokenizer.hpp
+++ b/operators/tokenizer/wordpiece_tokenizer.hpp
@@ -3,13 +3,14 @@
 
 #pragma once
 
-#include <unordered_map>
-#include <vector>
 #include "ocos.h"
 #include "ustring.h"
 #include "string_utils.h"
 #include "string_tensor.h"
 
+#include <unordered_map>
+
+
 struct KernelWordpieceTokenizer : BaseKernel {
   KernelWordpieceTokenizer(const OrtApi& api, const OrtKernelInfo& info);
   void Compute(OrtKernelContext* context);
diff --git a/test/test_cliptok.py b/test/test_cliptok.py
index ce8fc49c2..028eecd56 100644
--- a/test/test_cliptok.py
+++ b/test/test_cliptok.py
@@ -39,7 +39,9 @@ class TestCLIPTokenizer(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         cls.tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
-        files = cls.tokenizer.save_vocabulary(".")
+        temp_dir = Path('./temp_onnxclip')
+        temp_dir.mkdir(parents=True, exist_ok=True)
+        files = cls.tokenizer.save_vocabulary(str(temp_dir))
         cls.tokjson = files[0]
         cls.merges = files[1]
 
diff --git a/test/test_gpt2tok.py b/test/test_gpt2tok.py
index 2fae5988c..6b1988c68 100644
--- a/test/test_gpt2tok.py
+++ b/test/test_gpt2tok.py
@@ -2,7 +2,6 @@
 import numpy as np
 import onnxruntime as _ort
 
-from pathlib import Path
 from onnx import helper, onnx_pb as onnx_proto
 from transformers import GPT2Tokenizer
 from onnxruntime_extensions import (
@@ -17,11 +16,6 @@ def _get_file_content(path):
         return file.read()
 
 
-def _get_test_data_file(*sub_dirs):
-    test_dir = Path(__file__).parent
-    return str(test_dir.joinpath(*sub_dirs))
-
-
 def _create_test_model(**kwargs):
     vocab_file = kwargs["vocab_file"]
     merges_file = kwargs["merges_file"]
diff --git a/test/test_robertatok.py b/test/test_robertatok.py
index 053a23ac6..4eb2681e1 100644
--- a/test/test_robertatok.py
+++ b/test/test_robertatok.py
@@ -42,7 +42,9 @@ class TestRobertaTokenizer(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         cls.tokenizer = RobertaTokenizerFast.from_pretrained("roberta-base")
-        files = cls.tokenizer.save_vocabulary(".")
+        temp_dir = Path('./temp_onnxroberta')
+        temp_dir.mkdir(parents=True, exist_ok=True)
+        files = cls.tokenizer.save_vocabulary(str(temp_dir))
         cls.tokjson = files[0]
         cls.merges = files[1]