In [1]:
from tfob import TFOb, BHSA, DSS

**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
scroll,1001,1428.81,100
lex,10450,129.14,94
fragment,11182,127.91,100
line,52895,27.04,100
clause,125,12.85,0
cluster,101099,6.68,47
phrase,315,5.1,0
word,500995,2.81,99
sign,1430241,1.0,100


**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
book,39,10938.21,100
chapter,929,459.19,100
lex,9230,46.22,100
verse,23213,18.38,100
half_verse,45179,9.44,100
sentence,63717,6.7,100
sentence_atom,64514,6.61,100
clause,88131,4.84,100
clause_atom,90704,4.7,100
phrase,253203,1.68,100


In [2]:
import pandas as pd
from itertools import chain

In [3]:
motion_verbs = ["BW>[", "HLK[", "CWB[", "<LH[", "CLX[", "JY>[", 
                "JRD[", "<BR[", "NGC[", "QRB[", "NWS[", "CLK[", 
                ">SP[", "NPL[", "QBY[", "LQX[", "SWR[", "GLH[",
                "QHL["]

In [4]:
verbs_bhsa = TFOb.all("word", BHSA).filter(book="Isaiah").filter_in(lex=motion_verbs)
verbs_bhsa

<word_571 "TB>W HBJ> HSJRW LKW JBW> >CJBH >SJRH >CJBH CBJH HLKW LKW N<LH NLKH TY> LKW NLKH BW> B>W JCLJK BW> [...] JRDT HLKJM QRB TGC HWY>TJ T<LJNH M<LH >BJ> JBW> JBW> HCJB B>H QBY B>W CLXTJ HBJ>W JBJ>W >QX JBW> JY>W">

In [5]:
verbs_dss = TFOb.all("scroll", DSS).filter(scroll="1Qisaa").to_words.filter(book="Isaiah").filter_in(lex=motion_verbs)
verbs_dss

<word_577 "TB>W HBJ> HSJRW LKW JBW> HCJB >SJR >CJBH CBJH HLKW LKW N<LH N>LKH TY> LKW NLKH B>W JCLJK BW> MHSJR [...] JRDTH HWLKJM QRB HWYJTJ T<LJN> M<LH >BJ> JBW> JBW> HCJB JBW> B>W QBY B>W CLXTJ HBJ>W JBJ>W >QX JBW> JY>W">

In [6]:
# Notes
# one line per complement
# manage the verses where motion verbs occur several times
# remove underscore in proper name in BHSA and ‘ for pronominal suffixes in DSS

In [7]:
def clean(g_cons):
    return g_cons.replace("_", " ").replace("'", "")  

In [19]:
items = []

for verb in chain(verbs_bhsa, verbs_dss):
    if verb.source.name == "BHSA":
        scroll = "MT"
        dir_he_dss_verse = ""
    else:
        scroll = verb.to_scrolls.scroll[0]
        verse = TFOb.section([verb.book[0], verb.chapter[0], verb.verse[0]], DSS)
        dir_he_dss_verse = int("H" in verse.uvf_etcbc)
             
    complements = verb.to_clauses.to_phrases.filter(function="Cmpl")
    if len(complements) == 0:
        complements = [""]
        
    for complement in complements:
        if complement == "":
            dir_he = ""
        else: 
            dir_he = int("H" in complement.to_words.uvf)

        item = {
            "verb_id": verb.ids[0], 
            "lex": verb.lex[0], 
            "scroll": scroll,
            "book": verb.book[0], 
            "chapter": verb.chapter[0], 
            "verse": verb.verse[0],
            "g_cons": clean(verb.g_cons[0]),
            "hebrew": verb.text,
            "g_cons_verse": str(verb.to_verse),
            "stem": verb.vs[0],
            "tense": verb.vt[0],
            "subject": clean(str(verb.to_clauses.to_phrases.filter(function="Subj"))),
            "subj_heb": verb.to_clauses.to_phrases.filter(function="Subj").text,
            "complement": clean(str(complement)),
            "cmpl_heb" : verb.to_clauses.to_phrases.filter(function="Cmpl").text,
            "dir_he": dir_he,
            "dir_he_dss_verse": dir_he_dss_verse, 
        }
        if complement != "":
            prepositions = complement.to_words.filter(sp="prep")
            n = 0
            for preposition in prepositions:
                n += 1
                item[f"preposition_{n}"] = str(preposition)
        items.append(item)
    
df = pd.DataFrame(items).fillna("")

df

AttributeError: 'NodeFeatures' object has no attribute 'to_verse'

In [9]:
df.to_csv("motion_verbs_isaiah_1.csv", index=False, encoding="utf8")

In [10]:
df[df.dir_he == 1][['lex', "book", "chapter", "verse", "scroll", "hebrew", "complement", 'dir_he']]

Unnamed: 0,lex,book,chapter,verse,scroll,hebrew,complement,dir_he
56,BW>[,Isaiah,7,24,MT,יָ֣בֹוא,CMH,1
57,BW>[,Isaiah,7,25,MT,תָבֹ֣וא,CMH,1
130,CLX[,Isaiah,16,1,MT,שִׁלְחוּ־,MDBRH >L HR BT YJWN,1
149,BW>[,Isaiah,20,1,MT,בֹּ֤א,>CDWDH,1
172,<BR[,Isaiah,23,6,MT,עִבְר֖וּ,TRCJCH,1
267,CLX[,Isaiah,36,2,MT,יִּשְׁלַ֣ח,JRWCLMH,1
371,CLX[,Isaiah,43,14,MT,שִׁלַּ֣חְתִּי,BBLH,1
481,CWB[,Isaiah,55,10,MT,יָשׁ֔וּב,CMH,1


In [11]:
df[df.dir_he_dss_verse == 1][['lex', "book", "chapter", "verse", "scroll", "hebrew", "dir_he_dss_verse"]]

Unnamed: 0,lex,book,chapter,verse,scroll,hebrew,dir_he_dss_verse
634,BW>[,Isaiah,7,24,1Qisaa,יבוא,1
635,BW>[,Isaiah,7,25,1Qisaa,תבוא,1
645,<BR[,Isaiah,8,21,1Qisaa,עבר,1
667,BW>[,Isaiah,10,28,1Qisaa,בא,1
668,<BR[,Isaiah,10,28,1Qisaa,עבר,1
694,<LH[,Isaiah,14,13,1Qisaa,אעלה,1
707,CLX[,Isaiah,16,1,1Qisaa,שלחו,1
726,BW>[,Isaiah,19,23,1Qisaa,בא,1
727,BW>[,Isaiah,20,1,1Qisaa,בא,1
728,CLX[,Isaiah,20,1,1Qisaa,שלח,1


In [12]:
# for name in dir(DSS.F):
#     if "__" in name:
#         continue
#     print(name, getattr(BHSA.F, name).v(verbs.ids[0]))

In [13]:
verbs_dss._levels

['to_scrolls',
 'to_lexs',
 'to_fragments',
 'to_lines',
 'to_clauses',
 'to_clusters',
 'to_phrases',
 'to_words',
 'to_signs']

In [16]:
TFOb.all("scroll", DSS).filter(scroll="1Qisaa").to_words.filter(book="Isaiah", chapter="47", verse="8").text

'ועתה שמעי זואת עוד׳נה היושבת לבטח האומרה בלבב׳ה אני ואפס׳י עוד לוא אשב עלמנה ולוא אראה שכול ׃ '

In [17]:
TFOb.all("word", BHSA).filter(book="Isaiah", chapter="38", verse="17").text

'הִנֵּ֥ה לְשָׁלֹ֖ום מַר־לִ֣י מָ֑ר וְאַתָּ֞ה חָשַׁ֤קְתָּ נַפְשִׁי֙ מִשַּׁ֣חַת בְּלִ֔י כִּ֥י הִשְׁלַ֛כְתָּ אַחֲרֵ֥י גֵוְךָ֖ כָּל־חֲטָאָֽי׃ '