In [24]:
from tf.app import use
A = use('bhsa', hoist=globals())
import re
import pandas as pd
from itertools import groupby

In [190]:
# Query for all verb-object/adjunct pairs where object is substantive (noun) or adjective
# Exclude KL 'all' because it is so common and creates problems
query = '''
book 
    clause
        phrase function=Pred|PreC|PreS|PreO
            word sp=verb language=Hebrew     
        phrase function=Objc|Adju
            word sp=subs|adjv language=Hebrew lex#KL/
                
'''

In [191]:
results = A.search(query)

  3.09s 35741 results


In [204]:
# Set of functions to match root patterns to possible nominal patterns
# In this version I only use consonants, could get more fancy if I included vowels
# Roots class is determined first then I build a regex string to generate possible matches

def get_root_class(root):


    if root[0] == 'J' or root[0] == 'W':
        root_class = "I-Y/W"
    elif root[1] == 'J' or root[1] == 'W':
        root_class = "II-Y/W"
    elif root[2] == 'H':
        root_class = "III-H"
    elif root[1] == root[2]:
        root_class = "gem"
    elif root[0] == 'N':
        root_class = "I-N"
    else:
        root_class = "strong"
    
    return root_class

suffix = '(H|T|J|JH|JT|JM|WH|WT|WN|N|WNJ|NJ)?$'

def strong(root):
        
    simple_match = '[NMT]?' + root[0]+ '[JW]?' + root[1] + '[JW]?' + root[2] + suffix 
    quad_match = root[0] + root[1] + root[2] + '([JW]?' + root[2] + '|' + root[1] + root[2] + ')T?$'
    
    match_string = '(' + simple_match + '|' + quad_match + ')'
    
    return match_string

def pe_nun(root):
    # allows N to assimilate
    # special case for NTN?
    match_string = '[MT]?N?' + root[1] + '[JW]?' + root[2] + suffix
    
    return match_string

def pe_yod(root):
    # In some cases Y may drop but this is rare
    # Probably need to hard code these
    
    match_string = '[MT]?[JW]?' + root[1] + '[JW]?' + root[2] + suffix
    return match_string

def ayin_yod(root):
    # middle weak can drop II-Y/W
        
    match_string = '[MT]?' + root[0] + '[JW]?' + root[2] + suffix
    return match_string

def lamed_he(root):
    # third weak 
    match_string = '[MT]?' + root[0] + '[JW]?' + root[1] + '[JW]?' + suffix

    return match_string

def geminate(root):
    # geminate can drop final consonant
    match_string = '[MT]?' + root[0] + '[JW]?' + root[1] + '[JW]?' + root[2] + '?'  + suffix

    return match_string

def match_root(verb, objc):
    
    # identify weak roots
    root_class = get_root_class(verb) 
    
     # for now just cut extra chars ([ and =)
    root = verb
    for char in ['[', '=']:
        root = root.replace(char,"")
    
    nom = objc
    for char in ['/', '=']:
        nom = nom.replace(char,"")
        
    # build regex match string from root consonants
    if root_class == 'I-N':
        match_string = pe_nun(root)
    elif root_class == 'I-Y/W':
        match_string = pe_yod(root)
    elif root_class == 'II-Y/W':
        match_string = ayin_yod(root)
    elif root_class == 'III-H':
        match_string = lamed_he(root)
    elif root_class == 'gem':
        match_string = geminate(root)
    else:
        match_string = strong(root)

    match = re.match(match_string, nom) 
    if match:
        return True
    else:
        return False

In [64]:
# Function to get citation in book chapter:verse format
def reference(node):
    book = L.u(node, 'book')
    chap = L.u(node, 'chapter')
    verse = L.i(node, 'verse')
    bk = Fs("book@en").v(book[0])
    ch = F.chapter.v(chap[0])
    vs = F.verse.v(verse[0])
    ref = f'{bk} {ch}:{vs}'
    return ref

In [206]:
# loop through results of search, identify root of verb and then check object/adjunct for a possible match
# add all matches to the list
matches = []
for (book, clause, Vphrase, Vword, OPhrase,Oword) in results:
    ref = reference(clause)
    Vlex = F.lex.v(Vword)
    Olex = F.lex.v(Oword)
    match = match_root(Vlex, Olex)
    if match: 
        #matches.append((Vlex, Olex, ref))
        matches.append((f'{F.voc_lex_utf8.v(Vword)} {F.vs.v(Vword)}', F.voc_lex_utf8.v(Oword), ref))

In [210]:
# lets see some examples
for match in matches[:10]:
    print(match)

('אהב qal', 'אֲהָבָה', '1_Samuel 20:17')
('אהב qal', 'אֲהָבָה', 'Jeremiah 31:3')
('אוה hit', 'תַּאֲוָה', 'Numbers 11:4')
('אוה hit', 'תַּאֲוָה', 'Psalms 106:14')
('אוה hit', 'תַּאֲוָה', 'Proverbs 21:26')
('אור hif', 'אֹור', 'Ezekiel 32:7')
('אור nif', 'אֹור', 'Job 33:30')
('אלם piel', 'אֲלֻמָּה', 'Genesis 37:7')
('אמר qal', 'אֵמֶר', 'Proverbs 1:21')
('אסף pual', 'אֲסֵפָה', 'Isaiah 24:22')


In [209]:
# OK, now use itertools to group matches by verbal root then object/adjunct
# For each verb-object/adjunct pair create a list of references 
# print out the full organized list

matches.sort(key=lambda x: (x[0], x[1]))

rootData = []  
rootKeys = []

for k, d in groupby(matches, lambda key: (key[0])): 
  
    rootData.append(list(d))        
    rootKeys.append(k)
    
print(f'There were {len(rootData)} verbal roots with possible cognate objects')
print()

for root in rootData:
    
    print(f'{root[0][0]}')
    print('--------------')
    
    objcData = []  
    objcKeys = []

    for k, d in groupby(root, lambda key: (key[1])): 
  
        objcData.append(list(d))        
        objcKeys.append(k)
    
    for objc in objcData:
        ref = []
        for o in objc:
            ref.append(o[2])
        
        print(f'\t {objc[0][1]} \t [{len(ref)}X]')
        for r in ref:
            print(f'\t\t {r}')
        print()

There were 272 verbal roots with possible cognate objects

אהב qal
--------------
	 אֲהָבָה 	 [2X]
		 1_Samuel 20:17
		 Jeremiah 31:3

אוה hit
--------------
	 תַּאֲוָה 	 [3X]
		 Numbers 11:4
		 Psalms 106:14
		 Proverbs 21:26

אור hif
--------------
	 אֹור 	 [1X]
		 Ezekiel 32:7

אור nif
--------------
	 אֹור 	 [1X]
		 Job 33:30

אלם piel
--------------
	 אֲלֻמָּה 	 [1X]
		 Genesis 37:7

אמר qal
--------------
	 אֵמֶר 	 [1X]
		 Proverbs 1:21

אסף pual
--------------
	 אֲסֵפָה 	 [1X]
		 Isaiah 24:22

	 אֹסֶף 	 [1X]
		 Isaiah 33:4

אסר qal
--------------
	 אִסָּר 	 [3X]
		 Numbers 30:3
		 Numbers 30:4
		 Numbers 30:11

ארר nif
--------------
	 מְאֵרָה 	 [1X]
		 Malachi 3:9

בגד qal
--------------
	 בֶּגֶד 	 [1X]
		 Isaiah 24:16

בוא hif
--------------
	 מָבֹוא 	 [1X]
		 Ezekiel 46:19

בוא qal
--------------
	 מָבֹוא 	 [3X]
		 2_Kings 11:16
		 Ezekiel 26:10
		 Ezekiel 33:31

בושׁ qal
--------------
	 בֹּשֶׁת 	 [1X]
		 Isaiah 42:17

בזז qal
--------------
	 בִּזָּה 	 [1X]
		 2_Chronicles 