From baa70670a7acfc778196932cd61e43822d2fa724 Mon Sep 17 00:00:00 2001 From: Xiang Li Date: Sat, 31 Aug 2024 11:29:30 -0400 Subject: [PATCH] [Build] Use C++17 Constructor for tiktoken.cpp when C++20 is unavailable The basic_string_view constructor: ``` template< class It, class End > constexpr basic_string_view( It first, End last ); ``` requires C++20. To allow the code to compile with C++17, use the basic_string_view constructor: ``` constexpr basic_string_view( const CharT* s, size_type count ); ``` For #4661 --- extension/llm/tokenizer/tiktoken.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/extension/llm/tokenizer/tiktoken.cpp b/extension/llm/tokenizer/tiktoken.cpp index 7b15d25f0da..8281b4a4fe3 100644 --- a/extension/llm/tokenizer/tiktoken.cpp +++ b/extension/llm/tokenizer/tiktoken.cpp @@ -252,7 +252,11 @@ Tiktoken::_split_with_allowed_special_token( return std::make_pair(std::nullopt, input); } +#if __cplusplus >= 202002L auto start = input.begin(); +#else + const char* start = input.data(); +#endif std::string special; while (true) { if (!re2::RE2::FindAndConsume(&input, *_special_token_regex, &special)) { @@ -262,9 +266,15 @@ Tiktoken::_split_with_allowed_special_token( if (allowed_special.count(special) == 1) { // Found an allowed special token, split the text with it. +#if __cplusplus >= 202002L return std::make_pair( special, re2::StringPiece(start, input.begin() - start - special.size())); +#else + return std::make_pair( + special, + re2::StringPiece(start, (input.data() - start) - special.size())); +#endif } // else try to find the next special token }