In [None]:
# -*- coding: utf-8 -*-

"""
Created on Mon June 22 17:45:47 2020

@author: Onkar Kulkarni

"""

In [35]:
class PatternMatching:
    """ Class for all pattern matching algorithms. """
    def __init__(self):
        return
    
    
    def robinKarp(self, string, pattern):
        """
        Works by: Matching hash(pattern) with hash of sliding window of length = len(pattern)
        Time for string = a^m, pattern = a^n: Worst = (m*n) ; exp. char comparisions = (m+n)
        Space = O(). """
        start = 0
        end = len(pattern)
        positions = []
        while end != len(string)-1:
            if hash(string[start:end]) == hash(pattern):
                positions.append(start)
            start += 1
            end += 1
        return positions
    
    
    def computePrefixSuffix(self, pattern): 
        longest_prefix_suffix = [0]*len(pattern)
        length_previous_PS = 0     # length of the previous longest prefix suffix 

        longest_prefix_suffix[0]   # longest_prefix_suffix[0] is always 0 
        i = 1

        # the loop calculates longest_prefix_suffix[i] for i = 1 to len(pattern) -1 
        while i < len(pattern) : 
            if pattern[i]== pattern[length_previous_PS]: 
                length_previous_PS += 1
                longest_prefix_suffix[i] = length_previous_PS
                i += 1
            else: 
                if length_previous_PS != 0: length_previous_PS = longest_prefix_suffix[length_previous_PS-1]
                else:
                    longest_prefix_suffix[i] = 0
                    i += 1
        return longest_prefix_suffix
    
    def kMP(self, string, pattern):
        # Preprocess the pattern to give array that will hold the longest prefix suffix values for pattern  
        longest_prefix_suffix = self.computePrefixSuffix(pattern)

        i = 0                                                     # index for string[]
        j = 0                                                     # index for pattern[]
        locations = []
        while i < len(string): 
            if pattern[j] == string[i]: 
                i += 1
                j += 1

            if j == len(pattern) :                               # Match
                locations.append((i-j))
                j = longest_prefix_suffix[j-1]
            elif i < len(string) and pattern[j] != string[i]:    # Mismatch after j matches 
                if j != 0:    j = longest_prefix_suffix[j-1]     # Don't match lps[0:j-1] characters (already matched) 
                else:         i += 1

        return locations



In [34]:

pattern_matching = PatternMatching()

print(pattern_matching.robinKarp("mississippi","iss"))
print(pattern_matching.kMP("mississippi","iss"))

[1, 4]
[1, 4]
