Skip to content

Commit

Permalink
Limit WordDB word length to 50 bytes
Browse files Browse the repository at this point in the history
Should improve both performance and relevancy of the word completions.
  • Loading branch information
mawww committed Mar 22, 2019
1 parent b9c1fa6 commit ad882c3
Showing 1 changed file with 18 additions and 6 deletions.
24 changes: 18 additions & 6 deletions src/word_db.cc
Expand Up @@ -21,6 +21,8 @@ WordDB& get_word_db(const Buffer& buffer)

struct WordSplitter
{
static constexpr CharCount max_word_len = 50;

struct Iterator
{
Iterator(const char* begin, const WordSplitter& splitter)
Expand All @@ -34,12 +36,22 @@ struct WordSplitter
const auto* end = m_splitter->m_content.end();
auto extra_chars = m_splitter->m_extra_word_chars;

m_word_begin = m_word_end;
while (m_word_begin != end and not is_word(utf8::codepoint(m_word_begin, end), extra_chars))
utf8::to_next(m_word_begin, end);
m_word_end = m_word_begin;
while (m_word_end != end and is_word(utf8::codepoint(m_word_end, end), extra_chars))
utf8::to_next(m_word_end, end);
while (true)
{
m_word_begin = m_word_end;
while (m_word_begin != end and not is_word(utf8::codepoint(m_word_begin, end), extra_chars))
utf8::to_next(m_word_begin, end);
m_word_end = m_word_begin;
CharCount word_len = 0;
while (m_word_end != end and is_word(utf8::codepoint(m_word_end, end), extra_chars))
{
utf8::to_next(m_word_end, end);
++word_len;
}
if (m_word_begin == end or word_len < max_word_len)
break;
}

return *this;
}

Expand Down

0 comments on commit ad882c3

Please sign in to comment.