In [87]:
%load_ext cython

The cython extension is already loaded. To reload it, use:
  %reload_ext cython


In [226]:
import pandas as pd
from IPython.display import display

In [227]:
def print_full(x):
    pd.set_option('display.max_rows', len(x))
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 2000)
    pd.set_option('display.float_format', '{:20,.2f}'.format)
    pd.set_option('display.max_colwidth', -1)
    display(x)
    pd.reset_option('display.max_rows')
    pd.reset_option('display.max_columns')
    pd.reset_option('display.width')
    pd.reset_option('display.float_format')
    pd.reset_option('display.max_colwidth')


In [144]:
df = pd.read_csv('data/bible.txt', delimiter='\t', header=None)
df.columns=['book', 'text']
#df = df.head(100)

In [145]:
df['ptext'] = df.text.str.lower().str.replace('[^a-z]', '')
df.head()

Unnamed: 0,book,text,ptext
0,Genesis 1:1,In the beginning God created the heaven and th...,inthebeginninggodcreatedtheheavenandtheearth
1,Genesis 1:2,"And the earth was without form, and void; and ...",andtheearthwaswithoutformandvoidanddarknesswas...
2,Genesis 1:3,"And God said, Let there be light: and there wa...",andgodsaidlettherebelightandtherewaslight
3,Genesis 1:4,"And God saw the light, that it was good: and G...",andgodsawthelightthatitwasgoodandgoddividedthe...
4,Genesis 1:5,"And God called the light Day, and the darkness...",andgodcalledthelightdayandthedarknesshecalledn...


In [146]:
all_text = df.ptext.sum()

In [156]:
%%cython -a
cdef find(str phrase,str all_text,int max_n=30):
    cdef int i
    for i in range(max_n):
        found = do_find(phrase, all_text, i)
        if found: return found
        
cdef do_find(str phrase, str all_text,int n):
    cdef int start, idx, start_range
    start_range = range(len(all_text) - n*len(phrase))
    for start in start_range:
        if all_text[start] != phrase[0]: continue
        for idx, l in enumerate(phrase):
            if all_text[start + n*idx] != l:
                break
        else: return start, n

In [189]:
%%timeit -n3 -r3
find('ceiagay', all_text[:10000])

1.9 ms ± 614 µs per loop (mean ± std. dev. of 3 runs, 3 loops each)


In [244]:
phrase = 'ceiagay'
aa = find(phrase, all_text, 100)
aa

(2485285, 49)

In [245]:
offset = -aa[1] // 2
for i, _ in enumerate(phrase):
    print(all_text[i*aa[1]+ aa[0] + offset :aa[0] + aa[1]*(i+1) + offset])

ndwhenhehadgatheredallthechiefpriestsandscribesof
thepeopletogetherhedemandedofthemwherechristshoul
dbebornandtheysaiduntohiminbethlehemofjudaeaforth
usitiswrittenbytheprophetandthoubethleheminthelan
dofjudaartnottheleastamongtheprincesofjudaforouto
ftheeshallcomeagovernorthatshallrulemypeopleisrae
lthenherodwhenhehadprivilycalledthewisemenenquire


In [246]:
df[df.ptext.str.contains('adgatheredall')]


Unnamed: 0,book,text,ptext
8079,2 Samuel 2:30,And Joab returned from following Abner: and wh...,andjoabreturnedfromfollowingabnerandwhenhehadg...
23173,Matthew 2:4,And when he had gathered all the chief priests...,andwhenhehadgatheredallthechiefpriestsandscrib...


In [247]:
df[df.ptext.str.contains('hehadprivi')]


Unnamed: 0,book,text,ptext
23176,Matthew 2:7,"Then Herod, when he had privily called the wis...",thenherodwhenhehadprivilycalledthewisemenenqui...


In [248]:
print_full(df[23173:23177][['book', 'text']])

Unnamed: 0,book,text
23173,Matthew 2:4,"And when he had gathered all the chief priests and scribes of the people together, he demanded of them where Christ should be born."
23174,Matthew 2:5,"And they said unto him, In Bethlehem of Judaea: for thus it is written by the prophet,"
23175,Matthew 2:6,"And thou Bethlehem, in the land of Juda, art not the least among the princes of Juda: for out of thee shall come a Governor, that shall rule my people Israel."
23176,Matthew 2:7,"Then Herod, when he had privily called the wise men, enquired of them diligently what time the star appeared."
