In [1]:
import pandas as pd

from itertools import chain

from tfob import TFOb, BHSA, DSS

**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
scroll,1001,1428.81,100
lex,10450,129.14,94
fragment,11182,127.91,100
line,52895,27.04,100
clause,125,12.85,0
cluster,101099,6.68,47
phrase,315,5.1,0
word,500995,2.81,99
sign,1430241,1.0,100


**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
book,39,10938.21,100
chapter,929,459.19,100
lex,9230,46.22,100
verse,23213,18.38,100
half_verse,45179,9.44,100
sentence,63717,6.7,100
sentence_atom,64514,6.61,100
clause,88131,4.84,100
clause_atom,90704,4.7,100
phrase,253203,1.68,100


### 1.Create two lists with the verses from MT Isaiah and 1QIsaa

#### 1.1 List of wanted motion verbs

In [2]:
motion_verbs = ['BW>[','HLK[','JY>[','JRD[','<BR[',
                '<LH[','CWB[','>TH[','BRX[','GJX[',
                'GLH[','GLL[','DXP[','DLG[','HWH[',
                'XWC[','XLP[','XSH[','VB<[','VWF[',
                'MHR[','MWC[','NGC[','NHR[','NWX[',
                'NWS[','NXT[','NVP[','NS<[','NPL[',
                'NTK[','SBB[','SWR[','SLQ[','<WZ[',
                '<WP[','PNH[','PC<[','YWP[','Y<D[',
                'QHL[','QPY[','QRB[','RWY[','FVH[',
                'CWX[','CWR[','CVP[','CQQ[','T>R[',
                'T<H[']

SCROLL = "1Q8"

#### 1.2 List of occurrences of the motion verbs (1QI8)

In [3]:
verbs_dss = TFOb.all("scroll", DSS).filter(scroll=SCROLL).to_words.filter_in(lex=motion_verbs)

In [4]:
verbs_dss

<word_218 "JBW> TBW JB>W B>JM JBW> B> JPWL JRD J<LH HBJ>W TGLJ T<W <BRW NPL B> HT<W HT<W HT<WT CBW B> [...] TRD TNJXNW TT<NW CWB JRDT JRDT T<LJNH M<LH >BJ> JBW> CWVP JBW> HCJB B>H B>W HBJ>W JBJ>W JBW> JY>W PC<JM">

### 2. Get the verses from both databases and compare lexemes

#### 2.1 Clean function to harmonised the DSS verses with the BHSA verses

In [5]:
def clean(g_cons):
    return g_cons.replace("_", " ").replace("׳", "").replace("'", "")

#### 2.2 Find references (book, chapter, verse) for each verb in 1Q8 

In [6]:
verbs_dss[0] # this is a TFOb object containing a verb

<word_1 "JBW>">

In [7]:
def find_verb_ref(verb):
    if verb.source == "BHSA":
        book = verb.book[0]
        chapter = verb.chapter[0]
        verse_num = verb.verse[0]
    else:
        book = verb.book[0]    
        chapter = verb.chapter[0]
        verse_num = verb.verse[0]
    return [book, chapter, verse_num]

In [8]:
ref_dss = find_verb_ref(verbs_dss[34])

print(ref_dss)

print(TFOb.section(ref_dss, DSS, scroll=SCROLL).to_words.lex)
print(TFOb.section(ref_dss, BHSA).to_words.lex)

['Isaiah', '26', '2']
['PTX[', 'C<R/', 'W', 'BW>[', 'GWJ/', 'YDJQ/', 'CMR[', '>MWN/', '']
['PTX[', 'C<R/', 'W', 'BW>[', 'GWJ/', 'YDJQ/', 'CMR[', '>MWN/']


#### 2.3 Compare the lexemes from 1Q8 and BHSA for each verse containing a motion verb

In [9]:
def is_lex_identical(verb_dss):
    """
    Checks if the verses (i.e. BHSA versus DSS) are identical on the lexeme level.
    """
    ref_verb = find_verb_ref(verb_dss)
    
    dss_lex = TFOb.section(ref_verb, DSS, scroll=SCROLL).to_words.lex
    bhsa_lex = TFOb.section(ref_verb, BHSA).to_words.lex
    
    if dss_lex[-1] == "":
        dss_lex.pop()
    
    return [clean(lex) for lex in bhsa_lex] == [clean(lex) for lex in dss_lex]

#### 2.4 Functions testing (test functions lower in the notebook)

In [74]:
verb_test = verbs_dss[1]
ref = find_verb_ref(verb_test)
verse_control = TFOb.section(ref, BHSA)

In [75]:
print(TFOb.section(ref, DSS, scroll=SCROLL).lex)
print(verse_control.to_words.lex)

['W', 'KL/', 'H', 'HR/', '>CR', 'B', 'M<DR/', '<DR=[', 'L>', 'BW>[', 'CM', 'JR>H/', 'CMJR/', 'W', 'CJT/', 'W', 'HJH[', 'L', 'MCLX/', 'CWR/', 'W', 'L', 'MRMS/', 'FH/', '']
['W', 'KL/', 'H', 'HR/', '>CR', 'B', 'H', 'M<DR/', '<DR=[', 'L>', 'BW>[', 'CM', 'JR>H/', 'CMJR/', 'W', 'CJT/', 'W', 'HJH[', 'L', 'MCLX/', 'CWR/', 'W', 'L', 'MRMS/', 'FH/']


In [81]:
print(find_verb_ref(verb_test))

print(f"BHSA verse: {verse_control}\n")

print("BHSA clauses:")
for clause in verse_control.to_clauses:
    print(clause)
    
print("\n")

print("BHSA phrases:")
for phrase in verse_control.to_phrases:
    if phrase.function[0] == "Cmpl" or phrase.function[0] == "Subj":
        print(phrase, phrase.function)
        
for word in verse_control.to_words:
    if word.uvf[0] == "H":
        print(f"Words with directive he in BHSA verse: {word}")

print(f"\nDSS verse: {TFOb.section(ref, DSS, scroll=SCROLL)}")
print(f"DSS verb : {verb_test}")
print(f"DSS Clause: {find_clause(verb_test)}")
print(f"DSS Complement: {find_complements(verb_test)}")
print(f"DSS Subject: {find_subject(verb_test)}")

for word in TFOb.section(ref, DSS, scroll=SCROLL):
    if word.uvf_etcbc[0] == "H":
        print(f"Words with directive he in DSS verse: {word}")


['Isaiah', '7', '25']
BHSA verse: W KL H HRJM >CR B M<DR J<DRWN L> TBW> CMH JR>T CMJR W CJT W HJH L MCLX CWR W L MRMS FH

BHSA clauses:
W KL H HRJM
>CR B M<DR J<DRWN
L> TBW> CMH JR>T CMJR W CJT
W HJH L MCLX CWR W L MRMS FH


BHSA phrases:
CMH ['Cmpl']
Words with directive he in BHSA verse: CMH

DSS verse: W KL H HRJM >CR B M<DR J<DRWN L> TBW CMH JR>T CMJR W CJT W HJH L MCLX CWR W L MRMS FH
DSS verb : TBW
DSS Clause: None
DSS Complement: None
DSS Subject: None
Words with directive he in DSS verse: CMH


In [12]:
n = 0

for i in range(len(verbs_dss)):
    if is_lex_identical(verbs_dss[i]):
        n += 1

n

113

### 3.2 Create a function to test if a DSS verse has the same lexemes as a BHSA verse

### 3.3 Find complements

In [13]:
def find_bhsa_verb(verb_dss):
    """
    Checks if a verb occurring in DSS also occurs in BHSA (same book, chapter, verse, lexeme).
    Else, returns None.
    """
    
    # Get book chapter verse info from a DSS verb
    ref_dss = find_verb_ref(verb_dss)

    # Get the corresponding BHSA verse
    verse_bhsa = TFOb.section(ref_dss, BHSA).to_words
    verb_bhsa = verse_bhsa.filter(lex=verb_dss.lex[0])
    
    # If repetition of verb in same verse: TODO
    if len(verb_bhsa) > 1:
        return # TODO
        scroll = verb_dss.to_scrolls.scroll[0]
        verse_dss = TFOb.section(ref_dss, DSS, scroll)
        print("Verse BHSA:", verse_bhsa)
        print("Verse DSS:", verse_dss)
        
    if verb_bhsa:
        return verb_bhsa

    

#print(v_book, v_chapter, v_verse)
#print(verse_bhsa)

In [14]:
def find_clause(verb):
    """Find the complement of a verb. If no match, returns None"""
    if verb.source.name == "BHSA":
        clause = verb.to_clauses.to_clauses
        return clause
    
    # if the verb is not BHSA, it's DSS
    verb_bhsa = find_bhsa_verb(verb)

    # Check if verses are identical  
    if verb_bhsa and is_lex_identical(verb): # TODO
        verse_dss = TFOb.section(find_verb_ref(verb), DSS, scroll=SCROLL).to_words
        clause_bhsa = find_clause(verb_bhsa)
        
    
        first_word_id = clause_bhsa.to_words.ids[0]
        last_word_id = clause_bhsa.to_words.ids[-1]

        verse_ids = clause_bhsa.to_verses.to_words.ids

        first_word_index = verse_ids.index(first_word_id)
        last_word_index = verse_ids.index(last_word_id)
            
        
        return verse_dss[first_word_index:last_word_index + 1]

In [15]:
def find_complements(verb):
    """Find the complement of a verb. If no match, returns None"""
    if verb.source.name == "BHSA":
        complements = verb.to_clauses.to_phrases.filter(function="Cmpl")
        return complements
    
    # if the verb is not BHSA, it's DSS
    verb_bhsa = find_bhsa_verb(verb)

    # Check if verses are identical  
    if verb_bhsa and is_lex_identical(verb): # TODO
        verse_dss = TFOb.section(find_verb_ref(verb), DSS, scroll=SCROLL).to_words
        complements_bhsa = find_complements(verb_bhsa)
        
        complements_dss = []
    
        for complement_bhsa in complements_bhsa:
            first_word_id = complement_bhsa.to_words.ids[0]
            last_word_id = complement_bhsa.to_words.ids[-1]
            
            verse_ids = complement_bhsa.to_verses.to_words.ids
            
            first_word_index = verse_ids.index(first_word_id)
            last_word_index = verse_ids.index(last_word_id)
            
            complements_dss.append(verse_dss[first_word_index:last_word_index + 1])
        
        return complements_dss
    
def find_subject(verb):
    """Find the subject of a verb. If no match, returns None"""
    if verb.source.name == "BHSA":
        subjects = verb.to_clauses.to_phrases.filter(function="Subj")
        assert len(subjects) <= 1
        return subjects
    
    # if the verb is not BHSA, it's DSS
    verb_bhsa = find_bhsa_verb(verb)

    # Check if verses are identical  
    if verb_bhsa and is_lex_identical(verb): # TODO
        verse_dss = TFOb.section(find_verb_ref(verb), DSS, scroll=SCROLL).to_words
        subject_bhsa = find_subject(verb_bhsa)
        
        if not subject_bhsa:
            return ""

        first_word_id = subject_bhsa.to_words.ids[0]
        last_word_id = subject_bhsa.to_words.ids[-1]

        verse_ids = subject_bhsa.to_verses.to_words.ids

        first_word_index = verse_ids.index(first_word_id)
        last_word_index = verse_ids.index(last_word_id)
            
        return verse_dss[first_word_index:last_word_index + 1]

In [16]:
def is_sign_unc(verse):
    """If a verse contains a missing or uncertain sign, returns True. Else, returns False."""
    
    unc_types = ['missing', 'unc']
    verse_sign_types = []
    
    for sign in verse.to_signs:
        if sign.type[0] in unc_types:
            sign_type = 1
        else:
            sign_type = 0
            
        verse_sign_types.append(sign_type)
    
    return verse_sign_types

In [17]:
for verb in verbs_dss:
    ref = find_verb_ref(verb)
    verse = TFOb.section(ref, DSS, scroll=SCROLL)
    verse_types = is_sign_unc(verse)
    if 1 in verse_types:
        print(verb)

TBW
NPL
<BRW
J<BR
<WBR
><LH
NGLH
>TJWT
TLK
HCJB
HCJB
JMWC


In [18]:
# Create a dataset with the occurrences


items = [] # create an empty list to store all the information for each occ.

       
for verb in verbs_dss:
    
    # Add MT as "scroll" for the BHSA
    if verb.source.name == "BHSA":
        scroll = "MT"
        verse = verb.to_verses
        dir_he_dss_verse = ""
        sign_info = ""

    else:
        scroll = verb.to_scrolls.scroll[0]
        verse = TFOb.section([verb.book[0], verb.chapter[0], verb.verse[0]], DSS, scroll)
        dir_he_dss_verse = int("H" in verse.uvf_etcbc)
        sign_info = "".join([str(sign) for sign in is_sign_unc(verse)])
        
        
    subject = find_subject(verb)
    complements = find_complements(verb)
    
    # If complements is None ==> there was no match between DSS and BHSA verses ==> find complement manually
    if complements is None:
        complements = [""]
        dir_he = ""

    for complement in complements:        
        if complement == "":
            dir_he = ""
        else: 
            dir_he = int("H" in complement.to_words.uvf_etcbc)
               
    
        # Collect information about the following variables:    
        item = {
            "verb_id": verb.ids[0], 
            "lex": verb.lex[0], 
            "scroll": scroll,
            "book": verb.book[0], 
            "chapter": verb.chapter[0], 
            "verse_num": verb.verse[0],
            "gcons_verb": clean(verb.g_cons[0]),
            "gcons_verse": clean(str(verse)),
            "gcons_clause": clean(str(find_clause(verb))),
            "subject": clean(str(subject)),
            "complement": clean(str(complement)),
            "dir_he": dir_he,
            "dir_he_dss": dir_he_dss_verse,
            "sign_info": sign_info,
            "stem": verb.vs[0],
            "tense": verb.vt[0],
        }
        
        if complement != "": #TODO: get the prepositions when lex_bhsa = lex_dss, using the index of the prep in BHSA
            prepositions = complement.to_words.filter(sp="prep")
            n = 0
            for preposition in prepositions:
                n += 1
                item[f"preposition_{n}"] = str(preposition)

        items.append(item)

In [19]:
#for verb_dss in verbs_dss:
    #print(find_complements(verb_dss))

### 3.4 Create the dataset with Pandas

In [20]:
df1 = pd.DataFrame(items).fillna("")
df1.sort_values(["book", "chapter", "verse_num"], ascending=[True, True, True], ignore_index=True, inplace=True)

In [21]:
df1

Unnamed: 0,verb_id,lex,scroll,book,chapter,verse_num,gcons_verb,gcons_verse,gcons_clause,subject,complement,dir_he,dir_he_dss,sign_info,stem,tense
0,1919358,NPL[,1Q8,Isaiah,13,15,JPWL,H NMY> JDQR W KL H NSPH JPWL B XRB,,,,,0,0000000000000000000000000,qal,impf
1,1919255,BW>[,1Q8,Isaiah,13,2,JB>W,<L HR NCPH F>W NS HRJMW QWL LHM HNJPW JD W JB>...,W JB>W PTXJ NDJBJM,,PTXJ NDJBJM,0,0,0000000000000000000000000000000000000000000000000,qal,impf
2,1919289,BW>[,1Q8,Isaiah,13,5,B>JM,B>JM M >RY MRXQ M QYH H CMJM JHWH W KLJ Z<MW L...,B>JM M >RY MRXQ M QYH H CMJM JHWH W KLJ Z<MW,JHWH W KLJ Z<MW,M >RY MRXQ M QYH H CMJM,0,0,0000000000000000000000000000000000000000000000,qal,ptca
3,1919349,BW>[,1Q8,Isaiah,13,9,B>,HNH JWM JHWH B>,,,,,0,00000000000000,qal,ptca
4,1919431,JRD[,1Q8,Isaiah,15,3,JRD,B XWYTJH XGRW FQ <L GGWTJH W B RXBTJH KLH JJLJ...,,,,,0,0000000000000000000000000000000000000000000000...,qal,ptca
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157,1927697,PC<[,1Q8,Isaiah,66,24,PC<JM,W JY>W W R>W B PGRJ H >NCJM H PC<JM BJ KJ TWL<...,H PC<JM BJ,,BJ,0,0,0000000000000000000000000000000000000000000000...,qal,ptca
158,1927246,BW>[,1Q8,Isaiah,66,4,>BJ>,GM >NJ >BXR B T<LLJHM W B MGRTM >BJ> LHM J<N Q...,,,,,0,0000000000000000000000000000000000000000000000...,hifil,impf
159,1927313,BW>[,1Q8,Isaiah,66,7,JBW>,B VRM TXJL JLDH B VRM JBW> XBL LH W HMLJVH ZKR,B VRM JBW> XBL LH,XBL,LH,0,0,00000000000000000000000000000000000000,qal,impf
160,1919017,BW>[,1Q8,Isaiah,7,24,JBW>,B XYJM W B QCT JBW> CMH KJ CMJR W CJT THJH KL ...,,,,,1,0000000000000000000000000000000000000000,qal,impf


In [22]:
df2 = pd.read_csv("data/dataset_isaiah_2_csv.csv", sep=";")

In [23]:
df2

Unnamed: 0,verb_id,lex,scroll,book,chapter,verse_num,gcons_verb,gcons_verse,gcons_clause,subject,complement,dir_he,dir_he_dss,sign_info,stem,tense,preposition_1,preposition_2,preposition_3,preposition_4
0,212315,SWR[,MT,Isaiah,1,16,HSJRW,RXYW HZKW HSJRW R< M<LLJKM M NGD <JNJ XDLW HR<,HSJRW R< M<LLJKM M NGD <JNJ,,M NGD <JNJ,0.0,,,hif,impv,M,,,
1,1895129,SWR[,1Qisaa,Isaiah,1,16,HSJRW,RXYW W HZKW W HSJRW RW< M<LLJKM M NGD <JNJ XDL...,W HSJRW RW< M<LLJKM M NGD <JNJ,,M NGD <JNJ,0.0,0.0,,hifil,impv,M,,,
2,212105,PC<[,MT,Isaiah,1,2,PC<W,CM<W CMJM W H>ZJNJ >RY KJ JHWH DBR BNJM GDLTJ ...,W HM PC<W BJ,HM,BJ,0.0,,,qal,perf,BJ,,,
3,1894895,PC<[,1Qisaa,Isaiah,1,2,PC<W,CM<W CMJM W H>ZJNJ H >RY KJ> JHWH DBR BNJM GDL...,W HMH PC<W BJ,HMH,BJ,0.0,0.0,,qal,perf,BJ,,,
4,212418,BW>[,MT,Isaiah,1,23,JBW>,FRJK SWRRJM W XBRJ GNBJM KLW >HB CXD W RDP CLM...,W RJB >LMNH L> JBW> >LJHM,RJB >LMNH,>LJHM,0.0,,,qal,impf,>LJHM,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
478,1897742,HLK[,1Qisaa,Isaiah,8,7,HLK,W L KN HNH >DWNJ JHWH M<LH <LJHM >T MJ H NHR H...,W HLK <L KWL GDWWTJW,,<L KWL GDWWTJW,0.0,0.0,,qal,perf,<L,,,
479,214869,XLP[,MT,Isaiah,8,8,XLP,W XLP B JHWDH CVP W <BR <D YW>R JGJ< W HJH MVW...,W XLP B JHWDH,,B JHWDH,0.0,,,qal,perf,B,,,
480,1897748,XLP[,1Qisaa,Isaiah,8,8,XLP,W XLP B JHWDH CVP W <BR <D YW>R JGJ< W HJH MVW...,W XLP B JHWDH,,B JHWDH,0.0,0.0,,qal,perf,B,,,
481,215297,NPL[,MT,Isaiah,9,7,NPL,DBR CLX >DNJ B J<QB W NPL B JFR>L,W NPL B JFR>L,,B JFR>L,0.0,,,qal,perf,B,,,


In [31]:
set(result["dir_he"])


{'', 0.0, 1.0}

In [54]:
frames = [df2, df1]
result = pd.concat(frames, ignore_index=True)

In [55]:
# convert column "a" of a DataFrame
result["chapter"] = pd.to_numeric(result["chapter"])
result["verse_num"] = pd.to_numeric(result["verse_num"])
# converting 'dir_he' and 'dir_he_dss' from float to int 

result["dir_he"] = result["dir_he"].astype(str)
result["dir_he"] = result["dir_he"].replace("1.0", "1").replace("0.0", "0")

result["dir_he_dss"] = result["dir_he_dss"].astype(str)
result["dir_he_dss"] = result["dir_he_dss"].replace("1.0", "1").replace("0.0", "0")

result

Unnamed: 0,verb_id,lex,scroll,book,chapter,verse_num,gcons_verb,gcons_verse,gcons_clause,subject,complement,dir_he,dir_he_dss,sign_info,stem,tense,preposition_1,preposition_2,preposition_3,preposition_4
0,212315,SWR[,MT,Isaiah,1,16,HSJRW,RXYW HZKW HSJRW R< M<LLJKM M NGD <JNJ XDLW HR<,HSJRW R< M<LLJKM M NGD <JNJ,,M NGD <JNJ,0,,,hif,impv,M,,,
1,1895129,SWR[,1Qisaa,Isaiah,1,16,HSJRW,RXYW W HZKW W HSJRW RW< M<LLJKM M NGD <JNJ XDL...,W HSJRW RW< M<LLJKM M NGD <JNJ,,M NGD <JNJ,0,0,,hifil,impv,M,,,
2,212105,PC<[,MT,Isaiah,1,2,PC<W,CM<W CMJM W H>ZJNJ >RY KJ JHWH DBR BNJM GDLTJ ...,W HM PC<W BJ,HM,BJ,0,,,qal,perf,BJ,,,
3,1894895,PC<[,1Qisaa,Isaiah,1,2,PC<W,CM<W CMJM W H>ZJNJ H >RY KJ> JHWH DBR BNJM GDL...,W HMH PC<W BJ,HMH,BJ,0,0,,qal,perf,BJ,,,
4,212418,BW>[,MT,Isaiah,1,23,JBW>,FRJK SWRRJM W XBRJ GNBJM KLW >HB CXD W RDP CLM...,W RJB >LMNH L> JBW> >LJHM,RJB >LMNH,>LJHM,0,,,qal,impf,>LJHM,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
640,1927697,PC<[,1Q8,Isaiah,66,24,PC<JM,W JY>W W R>W B PGRJ H >NCJM H PC<JM BJ KJ TWL<...,H PC<JM BJ,,BJ,0,0,0000000000000000000000000000000000000000000000...,qal,ptca,,,,
641,1927246,BW>[,1Q8,Isaiah,66,4,>BJ>,GM >NJ >BXR B T<LLJHM W B MGRTM >BJ> LHM J<N Q...,,,,,0,0000000000000000000000000000000000000000000000...,hifil,impf,,,,
642,1927313,BW>[,1Q8,Isaiah,66,7,JBW>,B VRM TXJL JLDH B VRM JBW> XBL LH W HMLJVH ZKR,B VRM JBW> XBL LH,XBL,LH,0,0,00000000000000000000000000000000000000,qal,impf,,,,
643,1919017,BW>[,1Q8,Isaiah,7,24,JBW>,B XYJM W B QCT JBW> CMH KJ CMJR W CJT THJH KL ...,,,,,1,0000000000000000000000000000000000000000,qal,impf,,,,


In [56]:
result.sort_values(["book", "chapter", "verse_num"], ascending=[True, True, True], ignore_index=True, inplace=True)
result.tail(20)

Unnamed: 0,verb_id,lex,scroll,book,chapter,verse_num,gcons_verb,gcons_verse,gcons_clause,subject,complement,dir_he,dir_he_dss,sign_info,stem,tense,preposition_1,preposition_2,preposition_3,preposition_4
625,234301,<LH[,MT,Isaiah,65,17,T<LJNH,KJ HNNJ BWR> CMJM XDCJM W >RY XDCH W L> TZKRNH...,W L> T<LJNH <L LB,,<L LB,0.0,,,qal,impf,<L,,,
626,1918207,<LH[,1Qisaa,Isaiah,65,17,T<LJN>,KJ> HNNJ BWR> CMJM XDCJM W >RY XDCH W LW> TZKR...,W LW> T<LJN> <L LB,,<L LB,0.0,0.0,,qal,impf,<L,,,
627,1926994,<LH[,1Q8,Isaiah,65,17,T<LJNH,BWR> CMJM XDCJM W >RY XDCH W L> TZKRNH H R>CNW...,,,,,0.0,0000000000000000000000000000000000000000000000...,qal,impf,,,,
628,1927246,BW>[,1Q8,Isaiah,66,4,>BJ>,GM >NJ >BXR B T<LLJHM W B MGRTM >BJ> LHM J<N Q...,,,,,0.0,0000000000000000000000000000000000000000000000...,hifil,impf,,,,
629,234608,BW>[,MT,Isaiah,66,7,JBW>,B VRM TXJL JLDH B VRM JBW> XBL LH W HMLJVH ZKR,B VRM JBW> XBL LH,XBL,LH,0.0,,,qal,impf,LH,,,
630,1918530,BW>[,1Qisaa,Isaiah,66,7,JBW>,B VRM TXJL JLDH B VRM JBW> XBL LH HMLJVH ZKR,B VRM JBW> XBL LH,XBL,LH,0.0,0.0,,qal,impf,LH,,,
631,1927313,BW>[,1Q8,Isaiah,66,7,JBW>,B VRM TXJL JLDH B VRM JBW> XBL LH W HMLJVH ZKR,B VRM JBW> XBL LH,XBL,LH,0.0,0.0,00000000000000000000000000000000000000,qal,impf,,,,
632,1927460,BW>[,1Q8,Isaiah,66,15,JBW>,KJ HNH JHWH B >C JBW> W K SWPH MRKBTJW L HCJB ...,,,,,0.0,0000000000000000000000000000000000000000000000...,qal,impf,,,,
633,1927466,CWB[,1Q8,Isaiah,66,15,HCJB,KJ HNH JHWH B >C JBW> W K SWPH MRKBTJW L HCJB ...,,,,,0.0,0000000000000000000000000000000000000000000000...,hifil,infc,,,,
634,1927524,BW>[,1Q8,Isaiah,66,18,B>H,W >NKJ M<FJHM W MXCBTJHM B>H L QBY >T KL H GWJ...,,,,,0.0,0000000000000000000000000000000000000000000000...,qal,ptca,,,,


In [57]:
result.fillna("", inplace=True)

In [58]:
result

Unnamed: 0,verb_id,lex,scroll,book,chapter,verse_num,gcons_verb,gcons_verse,gcons_clause,subject,complement,dir_he,dir_he_dss,sign_info,stem,tense,preposition_1,preposition_2,preposition_3,preposition_4
0,212105,PC<[,MT,Isaiah,1,2,PC<W,CM<W CMJM W H>ZJNJ >RY KJ JHWH DBR BNJM GDLTJ ...,W HM PC<W BJ,HM,BJ,0,,,qal,perf,BJ,,,
1,1894895,PC<[,1Qisaa,Isaiah,1,2,PC<W,CM<W CMJM W H>ZJNJ H >RY KJ> JHWH DBR BNJM GDL...,W HMH PC<W BJ,HMH,BJ,0,0,,qal,perf,BJ,,,
2,212315,SWR[,MT,Isaiah,1,16,HSJRW,RXYW HZKW HSJRW R< M<LLJKM M NGD <JNJ XDLW HR<,HSJRW R< M<LLJKM M NGD <JNJ,,M NGD <JNJ,0,,,hif,impv,M,,,
3,1895129,SWR[,1Qisaa,Isaiah,1,16,HSJRW,RXYW W HZKW W HSJRW RW< M<LLJKM M NGD <JNJ XDL...,W HSJRW RW< M<LLJKM M NGD <JNJ,,M NGD <JNJ,0,0,,hifil,impv,M,,,
4,212418,BW>[,MT,Isaiah,1,23,JBW>,FRJK SWRRJM W XBRJ GNBJM KLW >HB CXD W RDP CLM...,W RJB >LMNH L> JBW> >LJHM,RJB >LMNH,>LJHM,0,,,qal,impf,>LJHM,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
640,1927583,BW>[,1Q8,Isaiah,66,20,HBJ>W,W HBJ>W >T KL >XJKM M KL H GWJM MNXH L JHWH B ...,,,,,0,0000000000000000000000000000000000000000000000...,hifil,perf,,,,
641,1927616,BW>[,1Q8,Isaiah,66,20,JBJ>W,W HBJ>W >T KL >XJKM M KL H GWJM MNXH L JHWH B ...,,,,,0,0000000000000000000000000000000000000000000000...,hifil,impf,,,,
642,234988,PC<[,MT,Isaiah,66,24,PC<JM,W JY>W W R>W B PGRJ H >NCJM H PC<JM BJ KJ TWL<...,H PC<JM BJ,,BJ,0,,,qal,ptca,BJ,,,
643,1918921,PC<[,1Qisaa,Isaiah,66,24,PWC<JM,W JY>W W R>W B PGRJ H >NCJM H PWC<JM BJ> KJ> T...,H PWC<JM BJ>,,BJ>,0,0,,qal,ptca,BJ>,,,


### 3.5 Save the dataset in a csv file

In [59]:
result.to_csv("data/isaiah_1q8_1qisaa_ds.csv", index=False, sep=";")