-
Notifications
You must be signed in to change notification settings - Fork 0
/
text.h
73 lines (60 loc) · 1.8 KB
/
text.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#ifndef __ALGO__TEXT__
#define __ALGO__TEXT__
#include <string>
namespace algocpp {
namespace text {
namespace kmp {
/********************************/
/* Knuth-Morris-Pratt algorithm */
/********************************/
/**
* Given a word, computes its P array. Resulting P[i+1] is the length of the longest proper prefix
* of the prefix word[0..i], which is also its (proper) suffix. The resulting P array is 1 longer
* than the input word.
*
* Thus:
*
* P[0] == 0; corresponds to the empty prefix
* P[1] == 0; corresponds to the 1-element prefix word[0..0]
* P[N] is the length of the longest proper prefix and suffix of the input word.
*/
std::vector<size_t> makeP(const std::string& word) {
const size_t N(word.size());
std::vector<size_t> P(N+1, 0);
if (2 <= N) {
size_t k = 0;
for (size_t i = 1; i < N; ++i) {
// word[0..i]
// k is the computed P for word[0..i-1]
const char c = word[i];
while (k && (c != word[k]))
k = P[k];
if (c == word[k])
++k;
P[i+1] = k;
}
}
return P;
}
/** Returns the list of occurences of a non-empty pattern in the given text. */
std::vector<size_t> search(const std::string& pattern, const std::string& text) {
const size_t M(pattern.size());
const size_t N(text.size());
std::vector<size_t> result; // list of occurences
if (M && (M <= N)) {
const std::vector<size_t> P(makeP(pattern));
size_t j = 0; // length of the matched pattern prefix
for (size_t i=0; i+M <= N; ) {
j = P[j];
for (; (j < M) && (pattern[j] == text[i+j]); ++j);
if (M == j)
result.push_back(i);
i += ((P[j] < j) ? (j - P[j]) : 1);
}
}
return result;
}
};
};
};
#endif