Skip to content
Browse files

Merge pull request #24 from jabagawee/kmp_search

KMP search
  • Loading branch information...
2 parents 0c33bca + 2e51f8c commit d473ec2b7a23993dd5ba5db037852615729f6e9c @nryoung committed Oct 2, 2012
Showing with 30 additions and 23 deletions.
  1. +29 −22 algorithms/searching/kmp_search.py
  2. +1 −1 algorithms/tests/test_searching.py
View
51 algorithms/searching/kmp_search.py
@@ -7,43 +7,50 @@
------------------------
Uses a prefix function to reduce the searching time.
- Pre: a sorted list[0,...n,] integers and the key to search for.
+ Pre: a string > substring.
- Post: returns the index of where the first element that matches the key.
+ Post: returns a list of indices where the substring was found.
Time Complexity: O(n + k), where k is the substring to be found
Psuedo Code: CLRS. Introduction to Algorithms. 3rd ed.
- kmp_search.search(sorted_list) -> integer
- kmp_search.search(sorted_list) -> False
+
+ kmp_search.search(sorted_list) -> list[integers]
+ kmp_search.search(sorted_list) -> list[empty]
"""
def search(string, word):
- n = len(string)
- m = len(word)
- pi = compute_prefix(word)
- q = 0
- for i in range(n):
- while q > 0 and word[q] != string[i]:
- q = pi[q - 1]
- if word[q] == string[i]:
- q = q + 1
- if q == m:
- return i - m + 1
- return False
+ word_length = len(word)
+ string_length = len(string)
+ offsets = []
+
+ if word_length > string_length:
+ return offsets
+
+ prefix = compute_prefix(word)
+ q = 0 # q is the number of characters matched
+ for index, letter in enumerate(string):
+ while q > 0 and word[q] != letter:
+ q = prefix[q - 1] # next character does not match
+ if word[q] == letter:
+ q += 1
+ if q == word_length:
+ offsets.append(index - word_length + 1)
+ q = prefix[q - 1] # look for next match
+ return offsets
def compute_prefix(word):
- m = len(word)
- pi = [0] * m
+ word_length = len(word)
+ prefix = [0] * word_length
k = 0
- for q in range(1, m):
+ for q in xrange(1, word_length):
while k > 0 and word[k] != word[q]:
- k = pi[k - 1]
+ k = prefix[k - 1]
if word[k + 1] == word[q]:
k = k + 1
- pi[q] = k
- return pi
+ prefix[q] = k
+ return prefix
View
2 algorithms/tests/test_searching.py
@@ -41,7 +41,7 @@ def test_kmpsearch(self):
self.string = "ABCDE FG ABCDEABCDEF"
rv1 = kmp_search.search(self.string, "ABCDEA")
rv2 = kmp_search.search(self.string, "ABCDER")
- self.assertIs(rv1, 9)
+ self.assertIs(rv1[0], 9)
self.assertFalse(rv2)

0 comments on commit d473ec2

Please sign in to comment.
Something went wrong with that request. Please try again.