In [1]:
from tfob import TFOb, BHSA, DSS
import pandas as pd

**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
scroll,1001,1428.81,100
lex,10450,129.14,94
fragment,11182,127.91,100
line,52895,27.04,100
clause,125,12.85,0
cluster,101099,6.68,47
phrase,315,5.1,0
word,500995,2.81,99
sign,1430241,1.0,100


**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
book,39,10938.21,100
chapter,929,459.19,100
lex,9230,46.22,100
verse,23213,18.38,100
half_verse,45179,9.44,100
sentence,63717,6.7,100
sentence_atom,64514,6.61,100
clause,88131,4.84,100
clause_atom,90704,4.7,100
phrase,253203,1.68,100


In [2]:
from itertools import chain

In [3]:
# List of motion verbs 
motion_verbs = ["BW>[", "HLK[", "CWB[", "<LH[", "CLX[", "JY>[", 
                "JRD[", "<BR[", "NGC[", "QRB[", "NWS[", "CLK[", 
                ">SP[", "NPL[", "QBY[", "LQX[", "SWR[", "GLH[",
                "QHL["]

In [4]:
# Filter all the occurrences of the motion verbs in Isaiah (BHSA)

verbs_bhsa = TFOb.all("word", BHSA).filter(book="Isaiah").filter_in(lex=motion_verbs)

In [5]:
# Show the list of occurrences from BHSA
verbs_bhsa

<word_571 "TB>W HBJ> HSJRW LKW JBW> >CJBH >SJRH >CJBH CBJH HLKW LKW N<LH NLKH TY> LKW NLKH BW> B>W JCLJK BW> [...] JRDT HLKJM QRB TGC HWY>TJ T<LJNH M<LH >BJ> JBW> JBW> HCJB B>H QBY B>W CLXTJ HBJ>W JBJ>W >QX JBW> JY>W">

In [6]:
# Filter all the occurrences of the motion verbs in Isaiah (DSS)

verbs_dss = TFOb.all("scroll", DSS).filter(scroll="1Qisaa").to_words.filter(book="Isaiah").filter_in(lex=motion_verbs)

In [7]:
# Show the list of occurrences from DSS
verbs_dss

<word_577 "TB>W HBJ> HSJRW LKW JBW> HCJB >SJR >CJBH CBJH HLKW LKW N<LH N>LKH TY> LKW NLKH B>W JCLJK BW> MHSJR [...] JRDTH HWLKJM QRB HWYJTJ T<LJN> M<LH >BJ> JBW> JBW> HCJB JBW> B>W QBY B>W CLXTJ HBJ>W JBJ>W >QX JBW> JY>W">

In [8]:
# Create a clean function to harmonise the data from the DSS with BHSA format

def clean(g_cons):
    return g_cons.replace("_", " ").replace("'", "")  

In [9]:
# Create a dataset with the occurrences

items = [] # create an empty list to store all the information for each occ.

for verb in chain(verbs_bhsa, verbs_dss):
    
    # Add MT as "scroll" for the BHSA
    if verb.source.name == "BHSA":
        scroll = "MT"
        verse = verb.to_verses
    else:
        scroll = verb.to_scrolls.scroll[0]
        verse = TFOb.section([verb.book[0], verb.chapter[0], verb.verse[0]], DSS, scroll) 
        
    # Find the complement (works only for the BHSA)
    complement = verb.to_clauses.to_phrases.filter(function="Cmpl")
    if len(complement) == 0:
        complement = [""]
        
    # Collect information about the following variables:    
    item = {
        "verb_id": verb.ids[0], 
        "lex": verb.lex[0], 
        "scroll": scroll,
        "book": verb.book[0], 
        "chapter": verb.chapter[0], 
        "verse": verb.verse[0],
        "g_cons": clean(verb.g_cons[0]),
        "hebrew": verb.text,
        "g_cons_verse": clean(str(verse)),
        "stem": verb.vs[0],
        "tense": verb.vt[0],
        "subject": clean(str(verb.to_clauses.to_phrases.filter(function="Subj"))),
        "subj_heb": verb.to_clauses.to_phrases.filter(function="Subj").text,
        "complement": clean(str(complement)),
        "cmpl_heb" : verb.to_clauses.to_phrases.filter(function="Cmpl").text,
    }
    items.append(item)

In [10]:
df = pd.DataFrame(items).fillna("")
df.sort_values(["book", "chapter", "verse", "lex", "scroll"], ascending=[True, True, True, True, False], ignore_index=True, inplace=True) 

In [11]:
cmpl_isaiah = df

In [12]:
cmpl_isaiah

Unnamed: 0,verb_id,lex,scroll,book,chapter,verse,g_cons,hebrew,g_cons_verse,stem,tense,subject,subj_heb,complement,cmpl_heb
0,212256,BW>[,MT,Isaiah,1,12,TB>W,תָבֹ֔אוּ,KJ TB>W L R>WT PNJ MJ BQC Z>T M JDKM RMS XYRJ,qal,impf,,,[],
1,1895059,BW>[,1Qisaa,Isaiah,1,12,TB>W,תבאו,KJ> TB>W L R>WT PNJ MJ BQC ZW>T M JDKM L RMWS ...,qal,impf,,,[],
2,212269,BW>[,MT,Isaiah,1,13,HBJ>,הָבִיא֙,L> TWSJPW HBJ> MNXT CW> QVRT TW<BH HJ> LJ XDC ...,hif,infc,,,[],
3,1895075,BW>[,1Qisaa,Isaiah,1,13,HBJ>,הביא,LW> TWSJPW L HBJ> MNXT CW> QVRT TW<BH HJ> LJ X...,hifil,infc,,,[],
4,212315,SWR[,MT,Isaiah,1,16,HSJRW,הָסִ֛ירוּ,RXYW HZKW HSJRW R< M<LLJKM M NGD <JNJ XDLW HR<,hif,impv,,,M NGD <JNJ,מִנֶּ֣גֶד עֵינָ֑י
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1143,1898190,CLX[,1Qisaa,Isaiah,9,7,CLX,שלח,DBR CLX JHWH B J<QWB W NPL B JFR>L,qal,perf,,,[],
1144,215297,NPL[,MT,Isaiah,9,7,NPL,נָפַ֖ל,DBR CLX >DNJ B J<QB W NPL B JFR>L,qal,perf,,,B JFR>L,בְּיִשְׂרָאֵֽל׃
1145,1898195,NPL[,1Qisaa,Isaiah,9,7,NPL,נפל,DBR CLX JHWH B J<QWB W NPL B JFR>L,qal,perf,,,[],
1146,215318,NPL[,MT,Isaiah,9,9,NPLW,נָפָ֖לוּ,LBNJM NPLW W GZJT NBNH CQMJM GD<W W >RZJM NXLJP,qal,perf,LBNJM,לְבֵנִ֥ים,[],


In [None]:
# Other variables to add:
# 