In [1]:
from tfob import TFOb, BHSA, DSS

**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
scroll,1001,1428.81,100
lex,10450,129.14,94
fragment,11182,127.91,100
line,52895,27.04,100
clause,125,12.85,0
cluster,101099,6.68,47
phrase,315,5.1,0
word,500995,2.81,99
sign,1430241,1.0,100


**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
book,39,10938.21,100
chapter,929,459.19,100
lex,9230,46.22,100
verse,23213,18.38,100
half_verse,45179,9.44,100
sentence,63717,6.7,100
sentence_atom,64514,6.61,100
clause,88131,4.84,100
clause_atom,90704,4.7,100
phrase,253203,1.68,100


In [2]:
import pandas as pd
from itertools import chain

### 1.Create two lists with the verses from MT Isaiah and 1QIsaa

#### 1.1 List of wanted motion verbs

In [3]:
motion_verbs = ["BW>[", "HLK[", "CWB[", "<LH[", "CLX[", "JY>[", 
                "JRD[", "<BR[", "NGC[", "QRB[", "NWS[", "CLK[", 
                ">SP[", "NPL[", "QBY[", "LQX[", "SWR[", "GLH[",
                "QHL["]

#### 1.2 List of occurrences of the motion verbs (MT Isaiah)

In [4]:
verbs_bhsa = TFOb.all("word", BHSA).filter(book="Isaiah").filter_in(lex=motion_verbs)

In [5]:
#TESTING ZONE 
#verbs_bhsa
#TFOb(215297, BHSA).to_verses.text
#TFOb(215297, BHSA).to_verses.pretty()

#### 1.3 List of occurrences of the motion verbs (1QIsaa)

In [6]:
verbs_dss = TFOb.all("scroll", DSS).filter(scroll="1Qisaa").to_words.filter_in(lex=motion_verbs)

In [7]:
#verbs_dss

### 2. Get the verses from both databases

#### 2.1 Clean function to harmonised the DSS verses with the BHSA verses

In [8]:
def clean(g_cons):
    return g_cons.replace("_", " ").replace("׳", "").replace("'", "") 

#### 2.2 Creating two lists of verses (BHSA and DSS) containing the motion verbs

In [9]:
# From the list of motion verbs, create two lists (BHSA and DSS) with the verses where the verbs occur

verses_bhsa = []
verses_dss = []

for verb in chain(verbs_bhsa, verbs_dss):
    if verb.source.name == "BHSA":
        scroll = "MT"
        verse = verb.to_verses.to_words
        verses_bhsa.append(verse)
    else:
        scroll = verb.to_scrolls.scroll[0]
        verse = TFOb.section([verb.book[0], verb.chapter[0], verb.verse[0]], DSS, scroll)
        #verse = clean(str(verse))
        verses_dss.append(verse)

In [10]:
print(len(verses_bhsa))
print(len(verses_dss))

571
577


#### 2.3 Finding the clauses in the BHSA verses

In [11]:
clauses_bhsa = []

for verb in verbs_bhsa:
    mv_clause = verb.to_clauses
    clauses_bhsa.append(mv_clause)

# Find the complement in the mv_clause

cmpl_bhsa = []

for clause in clauses_bhsa:
    cmpl = clause.to_phrases.filter(function="Cmpl")
    cmpl_bhsa.append(cmpl)

In [12]:
#cmpl_bhsa

#### 2.4 Find the complements in the BHSA verses

In [13]:
# Filter the clauses without a complement
cmpl_bhsa = [cmpl for cmpl in cmpl_bhsa if cmpl]
cmpl_bhsa      

[<phrase_1 "M NGD <JNJ">,
 <phrase_1 ">LJHM">,
 <phrase_1 "<LJK">,
 <phrase_1 ">L HR JHWH >L BJT >LHJ J<QB">,
 <phrase_1 "B >RXTJW">,
 <phrase_1 "M YJWN">,
 <phrase_1 "B >WR JHWH">,
 <phrase_1 "B YWR">,
 <phrase_1 "B M<RWT YRJM W B MXLWT <PR">,
 <phrase_1 "B NQRWT H YRJM W B S<PJ H SL<JM">,
 <phrase_1 "M JRWCLM W M JHWDH">,
 <phrase_2 "B MCPV <M ZQNJ <MW W FRJW">,
 <phrase_1 "B FDH">,
 <phrase_1 "M <L H MZBX">,
 <phrase_1 "JRWCLM">,
 <phrase_1 "B JHWDH">,
 <phrase_1 "<LJK W <L <MK W <L BJT >BJK">,
 <phrase_1 "M <L JHWDH">,
 <phrase_1 "CMH">,
 <phrase_1 "CMH">,
 <phrase_1 ">L H NBJ>H">,
 <phrase_1 "<LJHM">,
 <phrase_1 "<L KL >PJQJW">,
 <phrase_1 "<L KL GDWTJW">,
 <phrase_1 "B DRK H <M H ZH">,
 <phrase_1 "BH">,
 <phrase_1 "B J<QB">,
 <phrase_1 "B JFR>L">,
 <phrase_1 "M MRXQ">,
 <phrase_1 "<L MJ">,
 <phrase_1 "TXT HRWGJM">,
 <phrase_1 "B GWJ XNP">,
 <phrase_1 ">L >L GBWR">,
 <phrase_1 "BW">,
 <phrase_1 "M <L CKMK">,
 <phrase_1 "<L <JT">,
 <phrase_1 "B MGRWN">,
 <phrase_1 "M<BRH">,
 <phras

In [14]:
cmpl_bhsa[1].to_words.lex

['>L']

### 3. Create the dataset

### 3.1 Check correspondances between DSS and BHSA

In [15]:
verbs_dss[0] # this is a TFOb object containing a verb

<word_1 "TB>W">

In [16]:
def find_bhsa_verb(verb_dss):
    """
    Checks if a verb occurring in DSS also occurs in BHSA (same book, chapter, verse, lexeme).
    Else, returns None.
    """
    
    # Get book chapter verse info from a DSS verb
    v_book = verb_dss.book[0]    
    v_chapter = verb_dss.chapter[0]
    v_verse = verb_dss.verse[0]

    # Get the corresponding BHSA verse
    verse_bhsa = TFOb.section((v_book, v_chapter, v_verse), BHSA).to_words
    verb_bhsa = verse_bhsa.filter(lex=verb_dss.lex[0])
    
    # If repetition of verb in same verse: TODO
    if len(verb_bhsa) > 1:
        return # TODO
        scroll = verb_dss.to_scrolls.scroll[0]
        verse_dss = TFOb.section([v_book, v_chapter, v_verse], DSS, scroll)
        print("Verse BHSA:", verse_bhsa)
        print("Verse DSS:", verse_dss)
        
    if verb_bhsa:
        return verb_bhsa

    

#print(v_book, v_chapter, v_verse)
#print(verse_bhsa)

In [17]:
def find_dss_verse(verb_dss):
    """
    Returns the verse from the verb for a DSS occurrence.
    """
    # Extract the book, chapter, verse info form the verb
    book = verb_dss.book[0]    
    chapter = verb_dss.chapter[0]
    verse = verb_dss.verse[0]
    
    # Returns the verse
    scroll = verb_dss.to_scrolls.scroll[0]
    verse_dss = TFOb.section([book, chapter, verse], DSS, scroll)
    return verse_dss

In [18]:
# Test find_dss_verse function
print("BHSA verse: " + str(verbs_bhsa[1].to_verses)) # for comparison
print("DSS verse: " + str(find_dss_verse(verbs_dss[1]))) # function test

BHSA verse: L> TWSJPW HBJ> MNXT CW> QVRT TW<BH HJ> LJ XDC W CBT QR> MQR> L> >WKL >WN W <YRH
DSS verse: LW> TWSJPW L HBJ> MNXT CW> QVRT TW<BH HJ> LJ XWDC W CBT QR> MQR> LW> >WKL >WN W <YRTH


In [19]:
# Testing the find_dss_verse function

#for i in range(0, len(verbs_bhsa)):
    #print(f"\ni = {i}")
    #print(is_lex_identical(verbs_bhsa[i].to_verses, find_dss_verse(verbs_dss[i]))) 

### 3.2 Create a function to test if a DSS verse has the same lexemes as a BHSA verse

In [20]:
def is_lex_identical(verse_bhsa, verse_dss):
    """
    Checks if the verses (i.e. BHSA versus DSS) are identical on the lexeme level.
    """
    return [clean(lex) for lex in verse_bhsa.to_words.lex if lex] == [clean(lex) for lex in verse_dss.to_words.lex if lex]
    #verse_with_spaces = [clean(lex) for lex in verse_bhsa.to_words.lex] == [clean(lex) for lex in verse_dss.to_words.lex]
    #if verse_without_spaces != verse_with_spaces:
        #print(verse_bhsa.to_words.lex)
        #print(verse_dss.to_words.lex)
        #print(verse_bhsa.text)
        #print(verse_dss.text)
        #print(verse_bhsa.to_words[-1].text)
        #print(verse_dss.to_words[-1].text)
        

In [21]:
# Print the results from is_lex_identical

n = 0

for i in range(0, len(verbs_bhsa)):
    if is_lex_identical(verbs_bhsa[i].to_verses, find_dss_verse(verbs_dss[i])):
        n += 1
print(n)


    #print(f"\ni = {i}")
    #print(is_lex_identical(verbs_bhsa[i].to_verses, find_dss_verse(verbs_dss[i])))  
    
# To see the lexemes, add the following lines inside the function is_lex_identical above:
#    print([clean(lex) for lex in verse_bhsa.to_words.lex if lex])
#    print([clean(lex) for lex in verse_dss.to_words.lex if lex])

47


### 3.3 Find complements

In [22]:
# Create a dataset with the occurrences

def find_complements(verb):
    """Find the complement of a verb. If no match, returns None"""
    if verb.source.name == "BHSA":
        complements = verb.to_clauses.to_phrases.filter(function="Cmpl")
        return complements
    
    # if the verb is not BHSA, it's DSS
    verb_bhsa = find_bhsa_verb(verb)
        
    # Check if verses are identical
    if verb_bhsa and is_lex_identical(verb_bhsa.to_verses, verse):
        return find_complements(verb_bhsa)
        


items = [] # create an empty list to store all the information for each occ.

       
for verb in chain(verbs_bhsa, verbs_dss):
    
    # Add MT as "scroll" for the BHSA
    if verb.source.name == "BHSA":
        scroll = "MT"
        verse = verb.to_verses

    else:
        scroll = verb.to_scrolls.scroll[0]
        verse = TFOb.section([verb.book[0], verb.chapter[0], verb.verse[0]], DSS, scroll)
    
    complements = find_complements(verb)
    
    # If complements is None ==> there was no match between DSS and BHSA verses ==> find complement manually
    if complements is None:
        complements = [""]

    for complement in complements:
        if complement == "":
            cmpl_heb = ""
        else:
            cmpl_heb = complement.text
            
        # Collect information about the following variables:    
        item = {
            "verb_id": verb.ids[0], 
            "lex": verb.lex[0], 
            "scroll": scroll,
            "book": verb.book[0], 
            "chapter": verb.chapter[0], 
            "verse": verb.verse[0],
            "g_cons": clean(verb.g_cons[0]),
            "hebrew": verb.text,
            "g_cons_verse": clean(str(verse)),
            "stem": verb.vs[0],
            "tense": verb.vt[0],
            "subject": clean(str(verb.to_clauses.to_phrases.filter(function="Subj"))),
            "subj_heb": verb.to_clauses.to_phrases.filter(function="Subj").text,
            "complement": clean(str(complement)),
            "cmpl_heb" : cmpl_heb,
        }
        items.append(item)

### 3.4 Create the dataset with Pandas

In [28]:
df = pd.DataFrame(items).fillna("")
df.sort_values(["book", "chapter", "verse"], ascending=[True, True, True], ignore_index=True, inplace=True)

In [29]:
# Show the occurrences where DSS verse = BHSA verse
# df[(df.scroll != "MT") & (df.complement != "")]
# df[(df.complement != "")]
df[(df.scroll == "1Qisaa") & (df.complement != "")]

Unnamed: 0,verb_id,lex,scroll,book,chapter,verse,g_cons,hebrew,g_cons_verse,stem,tense,subject,subj_heb,complement,cmpl_heb
17,1898827,CWB[,1Qisaa,Isaiah,10,21,JCWB,ישוב,C>R JCWB C>R J<QWB >L >L GBWR,qal,impf,,,>L >L GBWR,אֶל־אֵ֖ל גִּבֹּֽור׃
19,1898844,CWB[,1Qisaa,Isaiah,10,22,JCWB,ישוב,KJ> >M JHJH <MK JFR>L K XWL H JM C>R JCWB BW K...,qal,impf,,,BW,בֹּ֑ו
24,1898948,BW>[,1Qisaa,Isaiah,10,28,B>,בא,B> <L <JTH <BR B MGRWN L MKMC JPQWD KLJW,qal,perf,,,<L <JT,עַל־עַיַּ֖ת
25,1898951,<BR[,1Qisaa,Isaiah,10,28,<BR,עבר,B> <L <JTH <BR B MGRWN L MKMC JPQWD KLJW,qal,perf,,,B MGRWN,בְּמִגְרֹ֑ון
31,1898482,BW>[,1Qisaa,Isaiah,10,3,TBW>,תבוא,W MH T<FW L JWM PQWDH W L C>H M MRXQ TBW> <L M...,qal,impf,,,M MRXQ,מִמֶּרְחָ֣ק
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
681,1917541,<LH[,1Qisaa,Isaiah,63,11,M<LH,מעלה,W JZKWR JMJ <WLM MWCH <MW> >JH H M<LH M JM >T ...,hifil,ptca,,,M JM,מִיָּ֗ם
693,1918207,<LH[,1Qisaa,Isaiah,65,17,T<LJN>,תעלינא,KJ> HNNJ BWR> CMJM XDCJM W >RY XDCH W LW> TZKR...,qal,impf,,,<L LB,עַל־לֵֽב׃
695,1917892,HLK[,1Qisaa,Isaiah,65,2,HWLKJM,הולכים,PRFTJ JDJ KWL H JWM >L <M SWRH H HWLKJM H DRK ...,qal,ptca,,,>XR MXCBTJHM,אַחַ֖ר מַחְשְׁבֹתֵיהֶֽם׃
700,1918021,JY>[,1Qisaa,Isaiah,65,9,HWYJTJ,הוציתי,W HWYJTJ M J<QWB ZR< W M JHWDH JRC HRJ W JRCWH...,hifil,perf,,,M J<QB,מִֽיַּעֲקֹב֙


In [30]:
# Show dataset
df

Unnamed: 0,verb_id,lex,scroll,book,chapter,verse,g_cons,hebrew,g_cons_verse,stem,tense,subject,subj_heb,complement,cmpl_heb
0,1895059,BW>[,1Qisaa,Isaiah,1,12,TB>W,תבאו,KJ> TB>W L R>WT PNJ MJ BQC ZW>T M JDKM L RMWS ...,qal,impf,,,,
1,1895075,BW>[,1Qisaa,Isaiah,1,13,HBJ>,הביא,LW> TWSJPW L HBJ> MNXT CW> QVRT TW<BH HJ> LJ X...,hifil,infc,,,,
2,212315,SWR[,MT,Isaiah,1,16,HSJRW,הָסִ֛ירוּ,RXYW HZKW HSJRW R< M<LLJKM M NGD <JNJ XDLW HR<,hif,impv,,,M NGD <JNJ,מִנֶּ֣גֶד עֵינָ֑י
3,1895129,SWR[,1Qisaa,Isaiah,1,16,HSJRW,הסירו,RXYW W HZKW W HSJRW RW< M<LLJKM M NGD <JNJ XDL...,hifil,impv,,,,
4,1895149,HLK[,1Qisaa,Isaiah,1,18,LKW,לכו,LKW N> W NWKXH JW>MR JHWH >M JHJW XV>JKM K CNJ...,qal,impv,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
753,1898435,CWB[,1Qisaa,Isaiah,9,20,CB,שב,W J>KL MNCH >T >PRJM W >PRJM >T MNCH JXDW HMH ...,qal,perf,,,,
754,215292,CLX[,MT,Isaiah,9,7,CLX,שָׁלַ֥ח,DBR CLX >DNJ B J<QB W NPL B JFR>L,qal,perf,>DNJ,אֲדֹנָ֖י,B J<QB,בְּיַעֲקֹ֑ב
755,215297,NPL[,MT,Isaiah,9,7,NPL,נָפַ֖ל,DBR CLX >DNJ B J<QB W NPL B JFR>L,qal,perf,,,B JFR>L,בְּיִשְׂרָאֵֽל׃
756,1898190,CLX[,1Qisaa,Isaiah,9,7,CLX,שלח,DBR CLX JHWH B J<QWB W NPL B JFR>L,qal,perf,,,,


In [60]:
# Show only MT occurrences
#df[df.scroll == "MT"]

# Show only non-MT occurrences
#df[df.scroll != "MT"]

In [61]:
# Compare the lexemes and their order
# clean the DSS verses by Id to be able to retrieve the good complements (and not copy them from BHSA)

### 3.5 Save the dataset in a csv file

**Nota Bene**: this dataset contains all the occurrences of the selected motion verbs in the DSS. For the BHSA, it contains all the occurrrences of these verbs WITH A COMPLEMENT. 
The finding of complements in the DSS is a work-in-progress.
Furthermore, for now, the complements have been tak

In [31]:
df.to_csv("data/isaiah_preliminary_dataset.csv")