Skip to content

Commit

Permalink
src: minor refactor to string_search.h
Browse files Browse the repository at this point in the history
- Use `std::max` instead of a custom variant
- Use member method pointers to avoid an extra layer of indirection
- Stop transferring `Vector` into the `node` namespace

PR-URL: #20546
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Daniel Bevenius <daniel.bevenius@gmail.com>
Reviewed-By: Tiancheng "Timothy" Gu <timothygu99@gmail.com>
  • Loading branch information
addaleax committed May 9, 2018
1 parent 5db018d commit 9873875
Showing 1 changed file with 54 additions and 91 deletions.
145 changes: 54 additions & 91 deletions src/string_search.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,11 @@

#include "node_internals.h"
#include <string.h>
#include <algorithm>

namespace node {
namespace stringsearch {


// Returns the maximum of the two parameters.
template <typename T>
T Max(T a, T b) {
return a < b ? b : a;
}


static const uint32_t kMaxOneByteCharCodeU = 0xff;

template <typename T>
Expand Down Expand Up @@ -98,7 +91,9 @@ class StringSearchBase {
template <typename Char>
class StringSearch : private StringSearchBase {
public:
explicit StringSearch(Vector<const Char> pattern)
typedef stringsearch::Vector<const Char> Vector;

explicit StringSearch(Vector pattern)
: pattern_(pattern), start_(0) {
if (pattern.length() >= kBMMaxShift) {
start_ = pattern.length() - kBMMaxShift;
Expand All @@ -108,17 +103,17 @@ class StringSearch : private StringSearchBase {
CHECK_GT(pattern_length, 0);
if (pattern_length < kBMMinPatternLength) {
if (pattern_length == 1) {
strategy_ = &SingleCharSearch;
strategy_ = &StringSearch::SingleCharSearch;
return;
}
strategy_ = &LinearSearch;
strategy_ = &StringSearch::LinearSearch;
return;
}
strategy_ = &InitialSearch;
strategy_ = &StringSearch::InitialSearch;
}

size_t Search(Vector<const Char> subject, size_t index) {
return strategy_(this, subject, index);
size_t Search(Vector subject, size_t index) {
return (this->*strategy_)(subject, index);
}

static inline int AlphabetSize() {
Expand All @@ -136,31 +131,12 @@ class StringSearch : private StringSearchBase {
}

private:
typedef size_t (*SearchFunction)(
StringSearch<Char>*,
Vector<const Char>,
size_t);

static size_t SingleCharSearch(StringSearch<Char>* search,
Vector<const Char> subject,
size_t start_index);

static size_t LinearSearch(StringSearch<Char>* search,
Vector<const Char> subject,
size_t start_index);

static size_t InitialSearch(StringSearch<Char>* search,
Vector<const Char> subject,
size_t start_index);

static size_t BoyerMooreHorspoolSearch(
StringSearch<Char>* search,
Vector<const Char> subject,
size_t start_index);

static size_t BoyerMooreSearch(StringSearch<Char>* search,
Vector<const Char> subject,
size_t start_index);
typedef size_t (StringSearch::*SearchFunction)(Vector, size_t);
size_t SingleCharSearch(Vector subject, size_t start_index);
size_t LinearSearch(Vector subject, size_t start_index);
size_t InitialSearch(Vector subject, size_t start_index);
size_t BoyerMooreHorspoolSearch(Vector subject, size_t start_index);
size_t BoyerMooreSearch(Vector subject, size_t start_index);

void PopulateBoyerMooreHorspoolTable();

Expand Down Expand Up @@ -197,7 +173,7 @@ class StringSearch : private StringSearchBase {
}

// The pattern to search for.
Vector<const Char> pattern_;
Vector pattern_;
// Pointer to implementation of the search.
SearchFunction strategy_;
// Cache value of Max(0, pattern_length() - kBMMaxShift)
Expand All @@ -213,8 +189,8 @@ inline T AlignDown(T value, U alignment) {


inline uint8_t GetHighestValueByte(uint16_t character) {
return Max(static_cast<uint8_t>(character & 0xFF),
static_cast<uint8_t>(character >> 8));
return std::max(static_cast<uint8_t>(character & 0xFF),
static_cast<uint8_t>(character >> 8));
}


Expand Down Expand Up @@ -319,11 +295,10 @@ inline size_t FindFirstCharacter(Vector<const uint8_t> pattern,

template <typename Char>
size_t StringSearch<Char>::SingleCharSearch(
StringSearch<Char>* search,
Vector<const Char> subject,
Vector subject,
size_t index) {
CHECK_EQ(1, search->pattern_.length());
return FindFirstCharacter(search->pattern_, subject, index);
CHECK_EQ(1, pattern_.length());
return FindFirstCharacter(pattern_, subject, index);
}

//---------------------------------------------------------------------
Expand All @@ -333,22 +308,19 @@ size_t StringSearch<Char>::SingleCharSearch(
// Simple linear search for short patterns. Never bails out.
template <typename Char>
size_t StringSearch<Char>::LinearSearch(
StringSearch<Char>* search,
Vector<const Char> subject,
Vector subject,
size_t index) {
Vector<const Char> pattern = search->pattern_;
CHECK_GT(pattern.length(), 1);
const size_t pattern_length = pattern.length();
const size_t n = subject.length() - pattern_length;
CHECK_GT(pattern_.length(), 1);
const size_t n = subject.length() - pattern_.length();
for (size_t i = index; i <= n; i++) {
i = FindFirstCharacter(pattern, subject, i);
i = FindFirstCharacter(pattern_, subject, i);
if (i == subject.length())
return subject.length();
CHECK_LE(i, n);

bool matches = true;
for (size_t j = 1; j < pattern_length; j++) {
if (pattern[j] != subject[i + j]) {
for (size_t j = 1; j < pattern_.length(); j++) {
if (pattern_[j] != subject[i + j]) {
matches = false;
break;
}
Expand All @@ -366,19 +338,17 @@ size_t StringSearch<Char>::LinearSearch(

template <typename Char>
size_t StringSearch<Char>::BoyerMooreSearch(
StringSearch<Char>* search,
Vector<const Char> subject,
Vector subject,
size_t start_index) {
Vector<const Char> pattern = search->pattern_;
const size_t subject_length = subject.length();
const size_t pattern_length = pattern.length();
const size_t pattern_length = pattern_.length();
// Only preprocess at most kBMMaxShift last characters of pattern.
size_t start = search->start_;
size_t start = start_;

int* bad_char_occurrence = search->bad_char_table();
int* good_suffix_shift = search->good_suffix_shift_table();
int* bad_char_occurrence = bad_char_table();
int* good_suffix_shift = good_suffix_shift_table();

Char last_char = pattern[pattern_length - 1];
Char last_char = pattern_[pattern_length - 1];
size_t index = start_index;
// Continue search from i.
while (index <= subject_length - pattern_length) {
Expand All @@ -391,7 +361,7 @@ size_t StringSearch<Char>::BoyerMooreSearch(
return subject.length();
}
}
while (pattern[j] == (c = subject[index + j])) {
while (pattern_[j] == (c = subject[index + j])) {
if (j == 0) {
return index;
}
Expand Down Expand Up @@ -420,7 +390,6 @@ size_t StringSearch<Char>::BoyerMooreSearch(
template <typename Char>
void StringSearch<Char>::PopulateBoyerMooreTable() {
const size_t pattern_length = pattern_.length();
Vector<const Char> pattern = pattern_;
// Only look at the last kBMMaxShift characters of pattern (from start_
// to pattern_length).
const size_t start = start_;
Expand Down Expand Up @@ -448,8 +417,8 @@ void StringSearch<Char>::PopulateBoyerMooreTable() {
{
size_t i = pattern_length;
while (i > start) {
Char c = pattern[i - 1];
while (suffix <= pattern_length && c != pattern[suffix - 1]) {
Char c = pattern_[i - 1];
while (suffix <= pattern_length && c != pattern_[suffix - 1]) {
if (static_cast<size_t>(shift_table[suffix]) == length) {
shift_table[suffix] = suffix - i;
}
Expand All @@ -458,7 +427,7 @@ void StringSearch<Char>::PopulateBoyerMooreTable() {
suffix_table[--i] = --suffix;
if (suffix == pattern_length) {
// No suffix to extend, so we check against last_char only.
while ((i > start) && (pattern[i - 1] != last_char)) {
while ((i > start) && (pattern_[i - 1] != last_char)) {
if (static_cast<size_t>(shift_table[pattern_length]) == length) {
shift_table[pattern_length] = pattern_length - i;
}
Expand Down Expand Up @@ -489,17 +458,15 @@ void StringSearch<Char>::PopulateBoyerMooreTable() {

template <typename Char>
size_t StringSearch<Char>::BoyerMooreHorspoolSearch(
StringSearch<Char>* search,
Vector<const Char> subject,
Vector subject,
size_t start_index) {
Vector<const Char> pattern = search->pattern_;
const size_t subject_length = subject.length();
const size_t pattern_length = pattern.length();
int* char_occurrences = search->bad_char_table();
const size_t pattern_length = pattern_.length();
int* char_occurrences = bad_char_table();
int64_t badness = -pattern_length;

// How bad we are doing without a good-suffix table.
Char last_char = pattern[pattern_length - 1];
Char last_char = pattern_[pattern_length - 1];
int last_char_shift =
pattern_length - 1 -
CharOccurrence(char_occurrences, static_cast<Char>(last_char));
Expand All @@ -519,7 +486,7 @@ size_t StringSearch<Char>::BoyerMooreHorspoolSearch(
}
}
j--;
while (pattern[j] == (subject[index + j])) {
while (pattern_[j] == (subject[index + j])) {
if (j == 0) {
return index;
}
Expand All @@ -532,9 +499,9 @@ size_t StringSearch<Char>::BoyerMooreHorspoolSearch(
// compared to reading each character exactly once.
badness += (pattern_length - j) - last_char_shift;
if (badness > 0) {
search->PopulateBoyerMooreTable();
search->strategy_ = &BoyerMooreSearch;
return BoyerMooreSearch(search, subject, index);
PopulateBoyerMooreTable();
strategy_ = &StringSearch::BoyerMooreSearch;
return BoyerMooreSearch(subject, index);
}
}
return subject.length();
Expand Down Expand Up @@ -575,11 +542,9 @@ void StringSearch<Char>::PopulateBoyerMooreHorspoolTable() {
// isn't found very early in the subject. Upgrades to BoyerMooreHorspool.
template <typename Char>
size_t StringSearch<Char>::InitialSearch(
StringSearch<Char>* search,
Vector<const Char> subject,
Vector subject,
size_t index) {
Vector<const Char> pattern = search->pattern_;
const size_t pattern_length = pattern.length();
const size_t pattern_length = pattern_.length();
// Badness is a count of how much work we have done. When we have
// done enough work we decide it's probably worth switching to a better
// algorithm.
Expand All @@ -590,13 +555,13 @@ size_t StringSearch<Char>::InitialSearch(
for (size_t i = index, n = subject.length() - pattern_length; i <= n; i++) {
badness++;
if (badness <= 0) {
i = FindFirstCharacter(pattern, subject, i);
i = FindFirstCharacter(pattern_, subject, i);
if (i == subject.length())
return subject.length();
CHECK_LE(i, n);
size_t j = 1;
do {
if (pattern[j] != subject[i + j]) {
if (pattern_[j] != subject[i + j]) {
break;
}
j++;
Expand All @@ -606,9 +571,9 @@ size_t StringSearch<Char>::InitialSearch(
}
badness += j;
} else {
search->PopulateBoyerMooreHorspoolTable();
search->strategy_ = &BoyerMooreHorspoolSearch;
return BoyerMooreHorspoolSearch(search, subject, i);
PopulateBoyerMooreHorspoolTable();
strategy_ = &StringSearch::BoyerMooreHorspoolSearch;
return BoyerMooreHorspoolSearch(subject, i);
}
}
return subject.length();
Expand All @@ -629,7 +594,6 @@ size_t SearchString(Vector<const Char> subject,
} // namespace node

namespace node {
using node::stringsearch::Vector;

template <typename Char>
size_t SearchString(const Char* haystack,
Expand All @@ -643,9 +607,8 @@ size_t SearchString(const Char* haystack,
// code, create two vectors that are reversed views into the input strings.
// For example, v_needle[0] would return the *last* character of the needle.
// So we're searching for the first instance of rev(needle) in rev(haystack)
Vector<const Char> v_needle = Vector<const Char>(
needle, needle_length, is_forward);
Vector<const Char> v_haystack = Vector<const Char>(
stringsearch::Vector<const Char> v_needle(needle, needle_length, is_forward);
stringsearch::Vector<const Char> v_haystack(
haystack, haystack_length, is_forward);
size_t diff = haystack_length - needle_length;
size_t relative_start_index;
Expand Down

0 comments on commit 9873875

Please sign in to comment.