-
Notifications
You must be signed in to change notification settings - Fork 0
/
doc_search.py
executable file
·71 lines (48 loc) · 1.46 KB
/
doc_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
"""
Knuth-Morris-Pratt (KMP) Algorithm Implementation
Reference: https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
"""
def partial_match_table(W):
"""Partial match table/failure function"""
# I: W (pattern/word to be analyzed)
# O: T (array of ints)
pos = 1 # current position we are computing in T
cnd = 0 # zero-based index in W of the next character of the current candidate substring
T = [0] * (len(W) + 1) # table
T[0] = -1
while pos < len(W):
if W[pos] == W[cnd]:
T[pos] = T[cnd]
else:
T[pos] = cnd
cnd = T[cnd]
while cnd >= 0 and W[pos] != W[cnd]:
cnd = T[cnd]
pos += 1
cnd += 1
# T[pos] = cnd
return T
def kmp_search(S, T, W):
#I: string, S (text to be searched)
# string, W (word sought)
# partial_match_table, T (constructed from partial_match_table)
#O: array of indices (P) where W is found in S; nP (number of positions)
j = 0 # pos of current char in S
k = 0 # pos of current char in W
nP = 0
P = []
while j < len(S):
if W[k] == S[j]:
j += 1
k += 1
if k == len(W):
# occurrence found
P.append(j - k)
nP += 1
k = T[k]
else:
k = T[k]
if k < 0:
j += 1
k += 1
return nP, P