In [3]:
import pandas as pd

from itertools import chain

from tfob import TFOb, BHSA, DSS

In [7]:
# List of motion verbs 
motion_verbs = ["BW>[", "HLK[", "CWB[", "<LH[", "CLX[", "JY>[", 
                "JRD[", "<BR[", "NGC[", "QRB[", "NWS[", "CLK[", 
                ">SP[", "NPL[", "QBY[", "LQX[", "SWR[", "GLH[",
                "QHL["]

In [8]:
# Filter all the occurrences of the motion verbs in Isaiah (BHSA)

verbs_bhsa = TFOb.all("word", BHSA).filter(book="Isaiah").filter_in(lex=motion_verbs)

In [5]:
# Show the list of occurrences from BHSA
verbs_bhsa

<word_571 "TB>W HBJ> HSJRW LKW JBW> >CJBH >SJRH >CJBH CBJH HLKW LKW N<LH NLKH TY> LKW NLKH BW> B>W JCLJK BW> [...] JRDT HLKJM QRB TGC HWY>TJ T<LJNH M<LH >BJ> JBW> JBW> HCJB B>H QBY B>W CLXTJ HBJ>W JBJ>W >QX JBW> JY>W">

In [9]:
# Filter all the occurrences of the motion verbs in Isaiah (DSS)

verbs_dss = TFOb.all("scroll", DSS).filter(scroll="1Qisaa").to_words.filter(book="Isaiah").filter_in(lex=motion_verbs)

In [7]:
# Show the list of occurrences from DSS
verbs_dss

<word_577 "TB>W HBJ> HSJRW LKW JBW> HCJB >SJR >CJBH CBJH HLKW LKW N<LH N>LKH TY> LKW NLKH B>W JCLJK BW> MHSJR [...] JRDTH HWLKJM QRB HWYJTJ T<LJN> M<LH >BJ> JBW> JBW> HCJB JBW> B>W QBY B>W CLXTJ HBJ>W JBJ>W >QX JBW> JY>W">

In [11]:
# Create a clean function to harmonise the data from the DSS with BHSA format

def clean(g_cons):
    return g_cons.replace("_", " ").replace("'", "")  

In [12]:
# Find the type of complement

items = []

for verb in chain(verbs_bhsa, verbs_dss):
    if verb.source.name == "BHSA":
        scroll = "MT"
        verse = verb.to_verses
        dir_he_dss_verse = ""
    else:
        scroll = verb.to_scrolls.scroll[0]
        verse = TFOb.section([verb.book[0], verb.chapter[0], verb.verse[0]], DSS, scroll) 
        dir_he_dss_verse = int("H" in verse.uvf_etcbc)
             
    complements = verb.to_clauses.to_phrases.filter(function="Cmpl")
    if len(complements) == 0:
        complements = [""]
        
    for complement in complements:
        if complement == "":
            dir_he = ""
        else: 
            dir_he = int("H" in complement.to_words.uvf)

        item = {
            "verb_id": verb.ids[0], 
            "lex": verb.lex[0], 
            "scroll": scroll,
            "book": verb.book[0], 
            "chapter": verb.chapter[0], 
            "verse": verb.verse[0],
            "g_cons": clean(verb.g_cons[0]),
            "hebrew": verb.text,
            "g_cons_verse": clean(str(verse)),
            "stem": verb.vs[0],
            "tense": verb.vt[0],
            "subject": clean(str(verb.to_clauses.to_phrases.filter(function="Subj"))),
            "subj_heb": verb.to_clauses.to_phrases.filter(function="Subj").text,
            "complement": clean(str(complement)),
            "cmpl_heb" : verb.to_clauses.to_phrases.filter(function="Cmpl").text,
            "dir_he": dir_he,
            "dir_he_dss_verse": dir_he_dss_verse, 
        }
        if complement != "":
            prepositions = complement.to_words.filter(sp="prep")
            n = 0
            for preposition in prepositions:
                n += 1
                item[f"preposition_{n}"] = str(preposition)
        items.append(item)
           
df = pd.DataFrame(items).fillna("")
df.sort_values(["book", "chapter", "verse", "lex", "scroll"], ascending=[True, True, True, True, False], ignore_index=True, inplace=True) 


cmpl_isaiah = df

In [13]:
cmpl_isaiah

Unnamed: 0,verb_id,lex,scroll,book,chapter,verse,g_cons,hebrew,g_cons_verse,stem,tense,subject,subj_heb,complement,cmpl_heb,dir_he,dir_he_dss_verse,preposition_1,preposition_2,preposition_3
0,212256,BW>[,MT,Isaiah,1,12,TB>W,תָבֹ֔אוּ,KJ TB>W L R>WT PNJ MJ BQC Z>T M JDKM RMS XYRJ,qal,impf,,,,,,,,,
1,1895059,BW>[,1Qisaa,Isaiah,1,12,TB>W,תבאו,KJ> TB>W L R>WT PNJ MJ BQC ZW>T M JDKM L RMWS ...,qal,impf,,,,,,0,,,
2,212269,BW>[,MT,Isaiah,1,13,HBJ>,הָבִיא֙,L> TWSJPW HBJ> MNXT CW> QVRT TW<BH HJ> LJ XDC ...,hif,infc,,,,,,,,,
3,1895075,BW>[,1Qisaa,Isaiah,1,13,HBJ>,הביא,LW> TWSJPW L HBJ> MNXT CW> QVRT TW<BH HJ> LJ X...,hifil,infc,,,,,,0,,,
4,212315,SWR[,MT,Isaiah,1,16,HSJRW,הָסִ֛ירוּ,RXYW HZKW HSJRW R< M<LLJKM M NGD <JNJ XDLW HR<,hif,impv,,,M NGD <JNJ,מִנֶּ֣גֶד עֵינָ֑י,0,,M,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1150,1898190,CLX[,1Qisaa,Isaiah,9,7,CLX,שלח,DBR CLX JHWH B J<QWB W NPL B JFR>L,qal,perf,,,,,,0,,,
1151,215297,NPL[,MT,Isaiah,9,7,NPL,נָפַ֖ל,DBR CLX >DNJ B J<QB W NPL B JFR>L,qal,perf,,,B JFR>L,בְּיִשְׂרָאֵֽל׃,0,,B,,
1152,1898195,NPL[,1Qisaa,Isaiah,9,7,NPL,נפל,DBR CLX JHWH B J<QWB W NPL B JFR>L,qal,perf,,,,,,0,,,
1153,215318,NPL[,MT,Isaiah,9,9,NPLW,נָפָ֖לוּ,LBNJM NPLW W GZJT NBNH CQMJM GD<W W >RZJM NXLJP,qal,perf,LBNJM,לְבֵנִ֥ים,,,,,,,


In [9]:
# Create a dataset with the occurrences

items = [] # create an empty list to store all the information for each occ.

for verb in chain(verbs_bhsa, verbs_dss):
    
    # Add MT as "scroll" for the BHSA
    if verb.source.name == "BHSA":
        scroll = "MT"
        verse = verb.to_verses
    else:
        scroll = verb.to_scrolls.scroll[0]
        verse = TFOb.section([verb.book[0], verb.chapter[0], verb.verse[0]], DSS, scroll) 
        
    # Find the complement (works only for the BHSA)
    complement = verb.to_clauses.to_phrases.filter(function="Cmpl")
    if len(complement) == 0:
        complement = [""]
        
    # Collect information about the following variables:    
    item = {
        "verb_id": verb.ids[0], 
        "lex": verb.lex[0], 
        "scroll": scroll,
        "book": verb.book[0], 
        "chapter": verb.chapter[0], 
        "verse": verb.verse[0],
        "g_cons": clean(verb.g_cons[0]),
        "hebrew": verb.text,
        "g_cons_verse": clean(str(verse)),
        "stem": verb.vs[0],
        "tense": verb.vt[0],
        "subject": clean(str(verb.to_clauses.to_phrases.filter(function="Subj"))),
        "subj_heb": verb.to_clauses.to_phrases.filter(function="Subj").text,
        "complement": clean(str(complement)),
        "cmpl_heb" : verb.to_clauses.to_phrases.filter(function="Cmpl").text,
    }
    items.append(item)

In [10]:
df = pd.DataFrame(items).fillna("")
df.sort_values(["book", "chapter", "verse", "lex", "scroll"], ascending=[True, True, True, True, False], ignore_index=True, inplace=True) 

In [11]:
cmpl_isaiah = df

In [12]:
cmpl_isaiah

Unnamed: 0,verb_id,lex,scroll,book,chapter,verse,g_cons,hebrew,g_cons_verse,stem,tense,subject,subj_heb,complement,cmpl_heb
0,212256,BW>[,MT,Isaiah,1,12,TB>W,תָבֹ֔אוּ,KJ TB>W L R>WT PNJ MJ BQC Z>T M JDKM RMS XYRJ,qal,impf,,,[],
1,1895059,BW>[,1Qisaa,Isaiah,1,12,TB>W,תבאו,KJ> TB>W L R>WT PNJ MJ BQC ZW>T M JDKM L RMWS ...,qal,impf,,,[],
2,212269,BW>[,MT,Isaiah,1,13,HBJ>,הָבִיא֙,L> TWSJPW HBJ> MNXT CW> QVRT TW<BH HJ> LJ XDC ...,hif,infc,,,[],
3,1895075,BW>[,1Qisaa,Isaiah,1,13,HBJ>,הביא,LW> TWSJPW L HBJ> MNXT CW> QVRT TW<BH HJ> LJ X...,hifil,infc,,,[],
4,212315,SWR[,MT,Isaiah,1,16,HSJRW,הָסִ֛ירוּ,RXYW HZKW HSJRW R< M<LLJKM M NGD <JNJ XDLW HR<,hif,impv,,,M NGD <JNJ,מִנֶּ֣גֶד עֵינָ֑י
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1143,1898190,CLX[,1Qisaa,Isaiah,9,7,CLX,שלח,DBR CLX JHWH B J<QWB W NPL B JFR>L,qal,perf,,,[],
1144,215297,NPL[,MT,Isaiah,9,7,NPL,נָפַ֖ל,DBR CLX >DNJ B J<QB W NPL B JFR>L,qal,perf,,,B JFR>L,בְּיִשְׂרָאֵֽל׃
1145,1898195,NPL[,1Qisaa,Isaiah,9,7,NPL,נפל,DBR CLX JHWH B J<QWB W NPL B JFR>L,qal,perf,,,[],
1146,215318,NPL[,MT,Isaiah,9,9,NPLW,נָפָ֖לוּ,LBNJM NPLW W GZJT NBNH CQMJM GD<W W >RZJM NXLJP,qal,perf,LBNJM,לְבֵנִ֥ים,[],


In [None]:
# Other variables to add:
# 