# Hebrew "Torah" Pattern Analysis

Torah = (תּוֹרָה) = tav (ת), vav (ו), resh (ר), and hey (ה)
<br />
Yahweh = (יהוה) = yod (י), heh (ה),  vav (ו), heh (ה) or YHWH/YHVH

Notes:
1. In Genesis, 50-step counting begins at the first occurance of tav (ת), which is the 5th letter, which occurs at the end of the first word, "resit" or "In the beginning".
2. In Exodus, 50-step counting begins at the first occurance of tav (ת), which is the 8th letter, which occurs at the end of the second word, "sem" or "the names".
3. In Leviticus, 8-step counting begins at the first occurance of yod (י), which is the 2nd letter, which occurs at the start of the first word, "qara" or "called".
4. In Numbers, 50-step counting starts with the third occurance of hey (ה) which is the 14th letter, which occurs at the end of the 4th word, "mose" or "Moses".
5. In Deuteronomy, 49-step counting starts in the fifth verse, using the fourth occurance of hey (ה) (starting from the fifth verse), which is the 30th letter, which occurs at the start of the 8th word, "et-tora" or "Torah".

In [1]:
# Import statements
import pandas as pd
import numpy as np
import re

In [2]:
# Import the data files
heb_mod_df = pd.read_csv(r'..\translations\hebrew_modern.csv', header=0, names=['verse_id', 'book_name','book_number','chapter','verse','content'])

In [3]:
heb_mod_df.head()

Unnamed: 0,verse_id,book_name,book_number,chapter,verse,content
0,1,בְּרֵאשִׁית,1,1,1,¶ בראשית ברא אלהים את השמים ואת הארץ׃
1,2,בְּרֵאשִׁית,1,1,2,והארץ היתה תהו ובהו וחשך על פני תהום ורוח אלהי...
2,3,בְּרֵאשִׁית,1,1,3,ויאמר אלהים יהי אור ויהי אור׃
3,4,בְּרֵאשִׁית,1,1,4,וירא אלהים את האור כי טוב ויבדל אלהים בין האור...
4,5,בְּרֵאשִׁית,1,1,5,ויקרא אלהים לאור יום ולחשך קרא לילה ויהי ערב ו...


In [4]:
# Declare the words to match as lists
torah_letters = ['ת', 'ו', 'ר', 'ה']
yahweh_letters = ['י', 'ה', 'ו', 'ה']
torah_rev_letters = list(reversed(torah_letters))

In [5]:
# Return a list of indices for all occurances of a given letter in a given text
def get_indices(text, letter):
    return [i for i, c in enumerate(text) if c == letter]

In [6]:
# Return whether the given letters are present in order in the given df
def compare_pattern(compare_df, matches):
    if len(compare_df) < len(matches):
        return False
    for i in list(range(len(matches))):
        if compare_df.letter[i] != matches[i]:
            return False
        else:
            continue
    return True

In [7]:
# Pull every Nth letter, given the starting char and occurance
def forward_parse(text, start_pos, step, letters):
    
    index = 1
    first_iteration=True
    letter_count = 0
    index_list = []

    for i, c in enumerate(text):
        if i < start_pos and first_iteration:
            index_list.append(index)
        elif i == start_pos:
            index = 0
            first_iteration = False
            index_list.append(index)
            letter_count = 1
        elif index < step and not first_iteration:
            index_list.append(index)
        else:
            index = 0
            index_list.append(index)
            letter_count += 1
        index += 1
        
    df = pd.DataFrame(list(zip(index_list, text)), columns=['position', 'letter'])
    return df.loc[df.position == 0].reset_index().head(len(letters))

In [8]:
genesis_heb_df = heb_mod_df.loc[heb_mod_df.book_number == 1]
exodus_heb_df = heb_mod_df.loc[heb_mod_df.book_number == 2]
lev_heb_df = heb_mod_df.loc[heb_mod_df.book_number == 3]
numbers_heb_df = heb_mod_df.loc[heb_mod_df.book_number == 4]
deut_heb_df = heb_mod_df.loc[heb_mod_df.book_number == 5]

In [9]:
gen_text = re.sub('[ ¶׃]', '', ''.join(genesis_heb_df.content))
exo_text = re.sub('[ ¶׃]', '', ''.join(exodus_heb_df.content))
lev_text = re.sub('[ ¶׃]', '', ''.join(lev_heb_df.content))
num_text = re.sub('[ ¶׃]', '', ''.join(numbers_heb_df.content))
deu_text = re.sub('[ ¶׃]', '', ''.join(deut_heb_df.content))

In [10]:
gen_result = forward_parse(gen_text, gen_text.index(torah_letters[0]), 50, torah_letters)
gen_result[['letter']]

Unnamed: 0,letter
0,ת
1,ו
2,ר
3,ה


In [11]:
exo_result = forward_parse(exo_text, exo_text.index(torah_letters[0]), 50, torah_letters)
exo_result[['letter']]

Unnamed: 0,letter
0,ת
1,ו
2,ר
3,ה


In [12]:
lev_result = forward_parse(lev_text, lev_text.index(yahweh_letters[0]), 8, yahweh_letters)
lev_result[['letter']]

Unnamed: 0,letter
0,י
1,ה
2,ו
3,ה


In [13]:
num_result = forward_parse(num_text, get_indices(num_text, torah_rev_letters[0])[2], 50, torah_rev_letters)
num_result[['letter']]

Unnamed: 0,letter
0,ה
1,ר
2,ו
3,ת


In [14]:
deu_result = forward_parse(deu_text, get_indices(deu_text, torah_rev_letters[0])[20], 49, torah_rev_letters)
deu_result[['letter']]

Unnamed: 0,letter
0,ה
1,ר
2,ו
3,ת


In [15]:
# Iterate through the whole book, locating any other occurances of this pattern. 
# Experiment with different step values.
# Save the results of # occurances of the pattern for different step values to determine a rate of probability/ chance of it being coincidence

In [16]:
indices = get_indices(gen_text, torah_letters[0])
num_matches = 0

for i in indices:
    df = forward_parse(gen_text, i, 50, torah_letters)
    if compare_pattern(df, torah_letters):
        num_matches += 1
        print(df)
print('num_matches =', num_matches)

   index  position letter
0      5         0      ת
1     55         0      ו
2    105         0      ר
3    155         0      ה
   index  position letter
0  18716         0      ת
1  18766         0      ו
2  18816         0      ר
3  18866         0      ה
   index  position letter
0  76322         0      ת
1  76372         0      ו
2  76422         0      ר
3  76472         0      ה
num_matches = 3


In [17]:
indices = get_indices(gen_text, torah_letters[0])
num_matches = 0

for i in indices:
    df = forward_parse(gen_text, i, 16, torah_letters)
    if compare_pattern(df, torah_letters):
        num_matches += 1
        print(df)
print('num_matches =', num_matches)

   index  position letter
0    659         0      ת
1    675         0      ו
2    691         0      ר
3    707         0      ה
   index  position letter
0  14719         0      ת
1  14735         0      ו
2  14751         0      ר
3  14767         0      ה
   index  position letter
0  17779         0      ת
1  17795         0      ו
2  17811         0      ר
3  17827         0      ה
   index  position letter
0  28317         0      ת
1  28333         0      ו
2  28349         0      ר
3  28365         0      ה
num_matches = 4


indices = get_indices(exo_text, torah_letters[0])
num_matches = 0

for i in indices:
    df = forward_parse(exo_text, i, 50, torah_letters)
    if compare_pattern(df, torah_letters):
        num_matches += 1
        print(df)
print('num_matches =', num_matches)

In [18]:
indices = get_indices(lev_text, yahweh_letters[0])
num_matches = 0

for i in indices:
    df = forward_parse(lev_text, i, 8, yahweh_letters)
    if compare_pattern(df, yahweh_letters):
        num_matches += 1
        print(df)
print('num_matches =', num_matches)

   index  position letter
0      1         0      י
1      9         0      ה
2     17         0      ו
3     25         0      ה
   index  position letter
0   8846         0      י
1   8854         0      ה
2   8862         0      ו
3   8870         0      ה
   index  position letter
0  15880         0      י
1  15888         0      ה
2  15896         0      ו
3  15904         0      ה
   index  position letter
0  19267         0      י
1  19275         0      ה
2  19283         0      ו
3  19291         0      ה
   index  position letter
0  32550         0      י
1  32558         0      ה
2  32566         0      ו
3  32574         0      ה
   index  position letter
0  37978         0      י
1  37986         0      ה
2  37994         0      ו
3  38002         0      ה
   index  position letter
0  42257         0      י
1  42265         0      ה
2  42273         0      ו
3  42281         0      ה
num_matches = 7


In [19]:
indices = get_indices(lev_text, yahweh_letters[0])
num_matches = 0

for i in indices:
    df = forward_parse(lev_text, i, 15, yahweh_letters)
    if compare_pattern(df, yahweh_letters):
        num_matches += 1
        print(df)
print('num_matches =', num_matches)

   index  position letter
0    394         0      י
1    409         0      ה
2    424         0      ו
3    439         0      ה
   index  position letter
0   1326         0      י
1   1341         0      ה
2   1356         0      ו
3   1371         0      ה
   index  position letter
0  25903         0      י
1  25918         0      ה
2  25933         0      ו
3  25948         0      ה
   index  position letter
0  25965         0      י
1  25980         0      ה
2  25995         0      ו
3  26010         0      ה
   index  position letter
0  30483         0      י
1  30498         0      ה
2  30513         0      ו
3  30528         0      ה
   index  position letter
0  42243         0      י
1  42258         0      ה
2  42273         0      ו
3  42288         0      ה
   index  position letter
0  43630         0      י
1  43645         0      ה
2  43660         0      ו
3  43675         0      ה
num_matches = 7


In [20]:
indices = get_indices(lev_text, yahweh_letters[0])
num_matches = 0

for i in indices:
    df = forward_parse(lev_text, i, 22, yahweh_letters)
    if compare_pattern(df, yahweh_letters):
        num_matches += 1
        print(df)
print('num_matches =', num_matches)

num_matches = 0


In [21]:
indices = get_indices(num_text, torah_rev_letters[0])
num_matches = 0

for i in indices:
    df = forward_parse(num_text, i, 50, torah_rev_letters)
    if compare_pattern(df, torah_rev_letters):
        num_matches += 1
        print(df)
print('num_matches =', num_matches)

   index  position letter
0     13         0      ה
1     63         0      ר
2    113         0      ו
3    163         0      ת
   index  position letter
0  29908         0      ה
1  29958         0      ר
2  30008         0      ו
3  30058         0      ת
num_matches = 2


In [22]:
indices = get_indices(deu_text, torah_rev_letters[0])
num_matches = 0

for i in indices:
    df = forward_parse(deu_text, i, 50, torah_rev_letters)
    if compare_pattern(df, torah_rev_letters):
        num_matches += 1
        print(df)
print('num_matches =', num_matches)

   index  position letter
0  10387         0      ה
1  10437         0      ר
2  10487         0      ו
3  10537         0      ת
   index  position letter
0  14774         0      ה
1  14824         0      ר
2  14874         0      ו
3  14924         0      ת
   index  position letter
0  20389         0      ה
1  20439         0      ר
2  20489         0      ו
3  20539         0      ת
   index  position letter
0  50689         0      ה
1  50739         0      ר
2  50789         0      ו
3  50839         0      ת
num_matches = 4


In [23]:
indices = get_indices(deu_text, torah_rev_letters[0])
num_matches = 0

for i in indices:
    df = forward_parse(deu_text, i, 49, torah_rev_letters)
    if compare_pattern(df, torah_rev_letters):
        num_matches += 1
        print(df)
print('num_matches =', num_matches)

   index  position letter
0    278         0      ה
1    327         0      ר
2    376         0      ו
3    425         0      ת
   index  position letter
0  29256         0      ה
1  29305         0      ר
2  29354         0      ו
3  29403         0      ת
num_matches = 2
