In [1]:
# brute force 
def stringmatch(t,p):
    poslist = []
    for i in range(len(t)-len(p)+1):
        # start matching 
        matched = True
        j = 0
        # break if not matched vbefore len(p)
        while j < len(p) and matched:
            if t[i+j] != p[j]:
                matched = False
            j = j+1
        
        # if pattern of len(p) found append 
        if matched:
            poslist.append(i)

    return(poslist)

# O(nm)
print(stringmatch('abababbababbbbababab','abab'))

[0, 2, 7, 14, 16]


In [2]:
# reverse 
def stringmatchrev(t,p):
    poslist = []
    for i in range(len(t)-len(p)+1):
        matched = True
        j = len(p)-1
        while j >= 0 and matched:
            if t[i+j] != p[j]:
                matched = False
            j = j-1
        if matched:
            poslist.append(i)
    return(poslist)

# O(nm)
print(stringmatchrev('abababbababbbbababab','abab'))

[0, 2, 7, 14, 16]


In [3]:
# sublinear 
def boyermoore(t,p):
    last = {} #! Preprocess save the last occurance 
    for i in range(len(p)):
        last[p[i]] = i
    
    poslist, i = [], 0
    while i <= (len(t)-len(p)):
        
        matched,j = True,len(p)-1
        while j >= 0 and matched:
            if t[i+j] != p[j]:
                matched = False
            j = j - 1
        # if matched then move 1 step 
        if matched:
            poslist.append(i)
            i = i + 1
        
        # if mismatched then fix j 
        else:
            j = j + 1
            # if char in pattern 

            if t[i+j] in last.keys():
                i = i + max(j-last[t[i+j]],1)
            # skip the char and move to the next 
            else:
                i = i + j + 1
    return(poslist)
      
# O(nm) if dict is not there then O(#chars) to initialize the lastdict 
#? used in grep 
print(boyermoore('abcaaacabc','abc'))

[0, 7]


In [9]:
def rabinkarp(t,p):
    poslist = []
    numt,nump = 0,0
    for i in range(len(p)):
        numt = 10*numt + int(t[i])
        nump = 10*nump + int(p[i])

    if numt == nump:
        poslist.append(0)
    
    for i in range(1,len(t)-len(p)+1):
        numt = numt - int(t[i-1])*(10**(len(p)-1))
        numt = 10*numt + int(t[i+len(p)-1])
        if numt == nump:
            poslist.append(i)
    
    return poslist

print(rabinkarp('233323233454323','23'))

[0, 4, 6, 13]


In [10]:
def rabin_karp(text, pattern):
    match_found =[]
    n = len(text)
    m = len(pattern)    
    # Prime number to use for the hash function
    prime = 101   
    # Calculate the hash value of the pattern
    pattern_hash = 0
    for i in range(m):
        pattern_hash += ord(pattern[i])
    pattern_hash = pattern_hash % prime
    
    # Calculate the hash value of the first substring of the text
    text_hash = 0
    for i in range(m):
        text_hash += ord(text[i])
    text_hash = text_hash % prime
    # Iterate through the text, checking for matches with the pattern
    for i in range(n - m + 1):
        # Check if the current substring matches the pattern
        if text_hash == pattern_hash and text[i:i+m] == pattern:
            match_found.append(i)       
        # Calculate the hash value of the next substring
        if i < n - m:
            text_hash = (text_hash - ord(text[i]) + ord(text[i+m]))
            text_hash = text_hash % prime
    # No match found
    return match_found
text = 'abcdbabcdb'
pattern = 'abcdb'
print(rabin_karp(text, pattern))

[0, 5]


In [11]:
def kmp_fail(p):
    m = len(p)
    fail = [0 for i in range(m)]
    j,k = 1,0
    while j < m:
        if p[j] == p[k]:
            fail[j] = k+1
            j,k = j+1,k+1
        elif k > 0:
            k = fail[k-1]
        else:
            j = j+1
    return(fail)
print(kmp_fail('abcaabca'))

[0, 0, 0, 1, 1, 2, 3, 4]


In [12]:
def find_kmp(t, p):
    match =[]
    n,m = len(t),len(p)
    if m == 0:
        match.append(0)
    fail = kmp_fail(p)
    j = 0
    k = 0
    while j < n:
        if t[j] == p[k]:
            if k == m - 1:
                match.append(j - m + 1)
                k = 0
                j = j - m + 2
            else:
                j,k = j+1,k+1
        elif k > 0:
            k = fail[k-1]
        else:
            j = j+1
    return(match)
print(find_kmp('ababaabbaba','aba'))


[0, 2, 8]


* A trie is a special kind of tree 
    * From “information retrieval” 
    * Pronounced try, distinguish from tree 
* Rooted tree 

    * Other than root, each node labelled by a letter from Σ 
    * Children of a node have distinct labels 
* Each maximal path is a word 

    * One word should not be a prefix of another 
    * Add special end of word symbol $

In [1]:
class Trie:
    def __init__(self,S=[]):
        self.root = {}
        for s in S:
            self.add(s)
    def add(self,s):
        curr = self.root
        s = s + "$"
        for c in s:
            if c not in curr.keys():
                curr[c] = {}
            curr = curr[c]
    def query(self,s):
        curr = self.root
        for c in s:
            if c not in curr.keys():
                return(False)
            curr = curr[c]
        if "$" in curr.keys():
            return(True)
        else:
            return(False)
        
T = Trie()
T.add('car')
T.add('card')
T.add('care')
T.add('dog')
T.add('done')
print(T.query('dog'))
print(T.query('cat'))


True
False


In [15]:
class SuffixTrie:
    def __init__(self,s):
        self.root = {}
        s = s + "$"
        for i in range(len(s)):
            curr = self.root
            for c in s[i:]:
                if c not in curr.keys():
                    curr[c] = {}
                curr = curr[c]
    def followPath(self,s):
        curr = self.root
        for c in s:
            if c not in curr.keys():
                return(None)
            curr = curr[c]
        return(curr)
    def hasSuffix(self,s):
        node = self.followPath(s)
        return(node is not None and "$" in node.keys())
        
ST = SuffixTrie('card')
print(ST.root)
print(ST.followPath('a'))
print(ST.hasSuffix('aa'))

{'c': {'a': {'r': {'d': {'$': {}}}}}, 'a': {'r': {'d': {'$': {}}}}, 'r': {'d': {'$': {}}}, 'd': {'$': {}}, '$': {}}
{'r': {'d': {'$': {}}}}
False
