# String Matching with Rabin Karp algorithm

In [30]:
from typing import List

# Intuition

Let's assume there are only numbers from 1 to 10.

In [36]:
#                |.      |
#                31,415
#                 14,15  2
# 10(31,415-3*10,000) + T[s+m+1]
T = [2,3,5,9,0,2,3,1,4,1,5,2,6,7,3,9,9,2,1]
P = [3,1,4,1,5]

In [39]:
(31415-30000)*10+2

14152

## Horner's rule

In [33]:
def calc_value(arr: List[int], d=10) -> int:
    res = arr[0]
    
    for i in range(1, len(arr)):
        res = (res * d) + arr[i]
        
    return res

In [35]:
calc_value([3,1,4,1,5])

31415

In [22]:
n = len(T)
m = len(P)

## Calculate new hash from previous one

In [40]:
#                |.      |
#                31,415
#                 14,15  2
#  10(31,415-3*10,000) + T[s+m+1]
#T = [2,3,5,9,0,2,3,1,4,1,5,2,6,7,3,9,9,2,1]
#P = [3,1,4,1,5]

In [78]:
def calc_hash_arr(T: str, m: int):
    '''
    d = 10
    m = 5
    Ts+1 = 
        d(Ts - d^(m-1)*Ts+1) + # 10 * (31,415 - 10^4*3) # old high order num
        T[s+m+1]               # new low order num
    '''
    n = len(T)
    d = 10
    h = d ** (m-1)
    res = [None] * (n-m+1)
    res[0] = calc_value(T[:m])
    
    for s in range(1, n-m+1):
        # s=1
        res[s] = d * (res[s-1] - h*T[s-1]) + T[s+m-1]
        print(res)
    
    return res

calc_hash_arr([1,2,3,4,5,6], 3)

# 10 * (12 - 10*2)  + 3

[123, 234, None, None]
[123, 234, 345, None]
[123, 234, 345, 456]


[123, 234, 345, 456]

In [59]:
10 * (12 - 10*1)  + 3

23

## Let's use modulo

In [83]:
def calc_hash_arr(T: str, m: int, d:int=10, q: int = 13):
    '''
    d = 10
    m = 5
    Ts+1 = 
        d(Ts - d^(m-1)*Ts+1) + # 10 * (31,415 - 10^4*3) # old high order num
        T[s+m+1]               # new low order num
    '''
    n = len(T)
    h = d ** (m-1)
    res = [None] * (n-m+1)
    res[0] = (calc_value(T[:m], d=d)) % q
    
    for s in range(1, n-m+1):
        # s=1
        res[s] = (d * (res[s-1] - h*T[s-1]) + T[s+m-1]) % q
        print(res)
    
    return res

#calc_hash_arr([1,2,3,4,5,6], 3)
calc_hash_arr(T, m=5)

[8, 9, None, None, None, None, None, None, None, None, None, None, None, None, None]
[8, 9, 3, None, None, None, None, None, None, None, None, None, None, None, None]
[8, 9, 3, 11, None, None, None, None, None, None, None, None, None, None, None]
[8, 9, 3, 11, 0, None, None, None, None, None, None, None, None, None, None]
[8, 9, 3, 11, 0, 1, None, None, None, None, None, None, None, None, None]
[8, 9, 3, 11, 0, 1, 7, None, None, None, None, None, None, None, None]
[8, 9, 3, 11, 0, 1, 7, 8, None, None, None, None, None, None, None]
[8, 9, 3, 11, 0, 1, 7, 8, 4, None, None, None, None, None, None]
[8, 9, 3, 11, 0, 1, 7, 8, 4, 5, None, None, None, None, None]
[8, 9, 3, 11, 0, 1, 7, 8, 4, 5, 10, None, None, None, None]
[8, 9, 3, 11, 0, 1, 7, 8, 4, 5, 10, 11, None, None, None]
[8, 9, 3, 11, 0, 1, 7, 8, 4, 5, 10, 11, 7, None, None]
[8, 9, 3, 11, 0, 1, 7, 8, 4, 5, 10, 11, 7, 9, None]
[8, 9, 3, 11, 0, 1, 7, 8, 4, 5, 10, 11, 7, 9, 11]


[8, 9, 3, 11, 0, 1, 7, 8, 4, 5, 10, 11, 7, 9, 11]

In [84]:
# dq fits in a computer word

# d=26
int(2147483647/26)

82595524

In [88]:
ord('a')

97

In [None]:
# T=[1,2,3,4]
# P=[1,2]
# hash = [12,23,34]
# n = 4
# m = 2
# 4-2+1=3

In [41]:
14152%13

8

In [9]:
def hash(val: str) -> int:
    res = 0
    d = 10
    q = 13
    for v in val:
        res += (d*res + ord(v)) % q
    return res

hash('abc')

18

In [14]:
-8 % 5

2