In [1]:
import pandas as pd 

from Bio import pairwise2
from Bio.Seq import Seq

from tfob import TFOb, get_bhsa, get_dss

In [2]:
BHSA = get_bhsa() 
DSS = get_dss()

**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
book,39,10938.21,100
chapter,929,459.19,100
lex,9230,46.22,100
verse,23213,18.38,100
half_verse,45179,9.44,100
sentence,63717,6.7,100
sentence_atom,64514,6.61,100
clause,88131,4.84,100
clause_atom,90704,4.7,100
phrase,253203,1.68,100


**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
scroll,1001,1428.81,100
lex,10450,129.14,94
fragment,11182,127.91,100
line,52895,27.04,100
clause,125,12.85,0
cluster,101099,6.68,47
phrase,315,5.1,0
word,500995,2.81,99
sign,1430241,1.0,100


In [3]:
def align_verse(verse_1, verse_2):
    """Takes two verses (TFOb object) from section (format verse = TFOb.section(["book", "ch", "ver"], source)
    and returns aligned version of the verses as strings."""
    
    # Access the word level of the TFOb object and get the g_cons for each word
    verse1 = [word.g_cons[0] for word in verse_1.to_words]
    verse2 = [word.g_cons[0] for word in verse_2.to_words]
    
    # Removes the None and join the word in a string
    verseA = " ".join([i for i in verse1 if i is not None])
    verseB = " ".join([i for i in verse2 if i is not None])
    
    # Align the two strings
    seq1 = Seq(verseA)
    seq2 = Seq(verseB)
    
    alignements = pairwise2.align.globalxx(seq1, seq2)
    
    v1_al = (alignements[0][0].strip(' '))
    v2_al = (alignements[0][1].strip(' '))
    
    # Returns the aligned strings          
    return v1_al,v2_al

def is_sign_unc(sign):
    """If a sign is a missing or uncertain sign, returns True. Else, returns False."""
    unc_types = ['missing', 'unc']
    return sign.type[0] in unc_types

In [32]:
SCROLL1 = "4Q17"

In [35]:
book = "Exodus"
chapter = "40"
verse_num = "17"
verse_bhsa = TFOb.section([book, chapter, verse_num], BHSA)
verse_scroll_1 = TFOb.section([book, chapter, verse_num], DSS, scroll=SCROLL1)

In [36]:
scroll_1_lex = " ".join(verse_scroll_1.lex)
bhsa_lex = " ".join(verse_bhsa.to_words.lex)

if scroll_1_lex[-1] == "":
    scroll_1_lex = scroll_1_lex[:-1]

print(bhsa_lex)
print(scroll_1_lex)

W HJH[ B H XDC=/ H R>CWN/ B H CNH/ H CNJ/ B >XD/ L H XDC=/ QWM[ H MCKN/
W HJH[ B XDC=/ H R>CWN/ B CNH/ H CNJ/ L JY>[ MN MYRJM/ B >XD/ L XDC=/ QWM[ >T H MCKN/ 


In [23]:
# Assuming bhsa_lex and the scroll_lex are strings of words separated by spaces

# Splitting the strings into sets of words
set_bhsa_lex = set(bhsa_lex.split())
set_scroll_1 = set(scroll_1_lex.split())

# Checking difference 1QIsaa vs BHSA
print(set_bhsa_lex.issubset(set_scroll_1))
print(set_scroll_1.issubset(set_bhsa_lex))

diff_bhsa_scroll_1 = set_bhsa_lex - set_scroll_1
diff_scroll_1_bhsa = set_scroll_1 - set_bhsa_lex

print(f"In BHSA you have {diff_bhsa_scroll_1} that is not in {SCROLL1}.")
print(f"In {SCROLL1} you have {diff_scroll_1_bhsa} that is not in BHSA. \n")

False
False
In BHSA you have {'<WD/', 'YR<[', 'K', 'BW>[', 'XJQ/', 'H', 'HNH', 'N>', 'JD/', 'JY>[', 'CLG/'} that is not in 4Q22.
In 4Q22 you have {'>T', '<BR[', 'HRG[', 'R<=/', '>JC/', 'JFR>L/', '>LHJM/', 'KH', 'C<R/', 'CWB[', 'XRB/', 'QRWB/', '>X/', 'FJM[', 'MXNH/', '<L', 'JRK/', 'MN'} that is not in BHSA. 



In [24]:
for clause in verse_bhsa.to_clauses:
    print(clause)
    
    for verb in clause.to_words.filter(sp="verb"):
        print(verb, "from ", verb.lex[0], verb.vs, verb.vt)
    
    for phrase in clause.to_phrases:
        if phrase.function == ["Cmpl"]:
            print("Complement", phrase, phrase.function, phrase.to_words.lex, phrase.det)
            for word in phrase.to_words:
                print(f"number of {word}", word.nu[0])
                if word.det[0] != None and word.det[0] != "absent":
                    print(f"det of {word}", word.det[0])
                if word.nametype[0] != None and word.nametype[0] != "absent":
                    print(word.nametype[0])
                if word.prs[0] != None and word.prs[0] != "absent":
                    print(f"prs of {word}", word.prs[0], word.prs_nu[0], word.prs_gn[0], word.prs_ps[0])
        else:
            print(phrase, phrase.function, phrase.to_words.lex)
        
    print("\n")
    
# Print the words in a BHSA verse that has a directive-he

bhsa_dir_he = verse_bhsa.to_words.uvf

for item in bhsa_dir_he:
    if item == "H":
        ind = bhsa_dir_he.index(item)
        print(f"In BHSA, the word {verse_bhsa.to_words[ind]} has a directive-he")
        
# Print the words in a 1QIsaa verse that has a directive-he

dss_dir_he1 = verse_scroll_1.uvf_etcbc

for item in dss_dir_he1:
    if item == "H":
        ind = dss_dir_he1.index(item)
        print(f"In {SCROLL1}, the word {verse_scroll_1.to_words[ind]} has a directive-he.")

W J>MR JHWH LW <WD
J>MR from  >MR[ ['qal'] ['wayq']
W ['Conj'] ['W']
J>MR ['Pred'] ['>MR[']
JHWH ['Subj'] ['JHWH/']
Complement LW ['Cmpl'] ['L'] ['det']
number of LW NA
prs of LW W sg m p3
<WD ['Modi'] ['<WD/']


HB> N> JDK B XJQK
HB> from  BW>[ ['hif'] ['impv']
HB> ['Pred'] ['BW>[']
N> ['Intj'] ['N>']
JDK ['Objc'] ['JD/']
B XJQK ['Adju'] ['B', 'XJQ/']


W JB> JDW B XJQW
JB> from  BW>[ ['hif'] ['wayq']
W ['Conj'] ['W']
JB> ['Pred'] ['BW>[']
JDW ['Objc'] ['JD/']
B XJQW ['Adju'] ['B', 'XJQ/']


W JWY>H
JWY>H from  JY>[ ['hif'] ['wayq']
W ['Conj'] ['W']
JWY>H ['PreO'] ['JY>[']


W HNH
W ['Conj'] ['W']
HNH ['Intj'] ['HNH']


JDW MYR<T K CLG
MYR<T from  YR<[ ['pual'] ['ptcp']
JDW ['Subj'] ['JD/']
MYR<T ['PreC'] ['YR<[']
K CLG ['Adju'] ['K', 'H', 'CLG/']




In [25]:
print("BHSA", "\n", verse_bhsa.text)
print(f"{SCROLL1}", "\n", verse_scroll_1.to_words.text)
#print("1Q8", "\n", verse_1q8.to_words.text)

BHSA 
 וַיֹּאמֶר֩ יְהוָ֨ה לֹ֜ו עֹ֗וד הָֽבֵא־נָ֤א יָֽדְךָ֙ בְּחֵיקֶ֔ךָ וַיָּבֵ֥א יָדֹ֖ו בְּחֵיקֹ֑ו וַיֹּ֣וצִאָ֔הּ וְהִנֵּ֥ה יָדֹ֖ו מְצֹרַ֥עַת כַּשָּֽׁלֶג׃ 
4Q22 
 ויאומר ל׳הם כה אמר יהוה אלהי ישראל שימו איש חרב׳ו על ירכ׳ו ועברו ושובו משער לשער במחנה והרוגו איש את אחי׳ו ואיש את רע׳הו ε  #  ε קרב׳ו ׃ 


In [14]:
# Align BHSA and studied scroll 

print(f"Alignement BHSA and {SCROLL1}\n")

print(verse_bhsa)
print(verse_scroll_1)
#print(align_verse(verse_bhsa, verse_1qisaa))
print("\n")

Alignement BHSA and 4Q22

W J>MR LHM KH >MR JHWH >LHJ JFR>L FJMW >JC XRBW <L JRKW <BRW W CWBW M C<R L C<R B MXNH W HRGW >JC >T >XJW W >JC >T R<HW W >JC >T QRBW
W J>WMR LHM KH >MR JHWH >LHJ JFR>L FJMW >JC XRBW <L JRKW W <BRW W CWBW M C<R L C<R B MXNH W HRWGW >JC >T >XJW W >JC >T R<HW QRBW




In [15]:
verse_dss = verse_scroll_1

In [16]:
for word in verse_dss.to_words:
    sign_types = []
    for sign in word.to_signs:
        if not is_sign_unc(sign):
            sign_type = 0
        else:
            sign_type = 1
        sign_types.append(sign_type)
    print(word, sign_types)

W [0]
J>WMR [0, 0, 0, 0, 0]
LHM [0, 0, 0, 0]
KH [0, 0]
>MR [0, 0, 0]
JHWH [0, 0, 0, 0]
>LHJ [0, 0, 0, 0]
JFR>L [0, 0, 0, 0, 0]
FJMW [0, 0, 0, 0]
>JC [0, 0, 0]
XRBW [0, 0, 0, 0, 0]
<L [0, 0]
JRKW [0, 0, 0, 0, 0]
W [0]
<BRW [0, 0, 0, 0]
W [0]
CWBW [0, 0, 0, 0]
M [0]
C<R [0, 0, 0]
L [0]
C<R [0, 0, 0]
B [0]
MXNH [0, 0, 0, 0]
W [0]
HRWGW [0, 0, 0, 0, 0]
>JC [0, 0, 0]
>T [0, 0]
>XJW [0, 0, 0, 0, 0]
W [0]
>JC [0, 0, 0]
>T [0, 0]
R<HW [0, 0, 0, 0, 0]
 [0, 1]
 [0, 1]
 [0, 1]
QRBW [0, 0, 0, 0, 0]
 [0, 0]


In [17]:
for verb in verse_dss.to_words.filter(sp="verb"):
    print(verb, verb.lex, verb.vs, verb.vt, verb.ids)

J>WMR ['>MR['] ['qal'] ['wayy'] [1960754]
>MR ['>MR['] ['qal'] ['perf'] [1960757]
FJMW ['FJM['] ['qal'] ['impv'] [1960761]
<BRW ['<BR['] ['qal'] ['impv'] [1960767]
CWBW ['CWB['] ['qal'] ['impv'] [1960769]
HRWGW ['HRG['] ['qal'] ['impv'] [1960777]


In [18]:
for verb in verse_bhsa.to_words.filter(sp="verb"):
    print(verb, verb.lex, verb.vs, verb.vt)

J>MR ['>MR['] ['qal'] ['wayq']
>MR ['>MR['] ['qal'] ['perf']
FJMW ['FJM['] ['qal'] ['impv']
<BRW ['<BR['] ['qal'] ['impv']
CWBW ['CWB['] ['qal'] ['impv']
HRGW ['HRG['] ['qal'] ['impv']


In [19]:
for word in verse_dss.to_words:
    print(word, word.lex, word.sp)

W ['W'] ['ptcl']
J>WMR ['>MR['] ['verb']
LHM ['L'] ['suff']
KH ['KH'] ['ptcl']
>MR ['>MR['] ['verb']
JHWH ['JHWH/'] ['subs']
>LHJ ['>LHJM/'] ['subs']
JFR>L ['JFR>L/'] ['subs']
FJMW ['FJM['] ['verb']
>JC ['>JC/'] ['subs']
XRBW ['XRB/'] ['suff']
<L ['<L'] ['ptcl']
JRKW ['JRK/'] ['suff']
W ['W'] ['ptcl']
<BRW ['<BR['] ['verb']
W ['W'] ['ptcl']
CWBW ['CWB['] ['verb']
M ['MN'] ['ptcl']
C<R ['C<R/'] ['subs']
L ['L'] ['ptcl']
C<R ['C<R/'] ['subs']
B ['B'] ['ptcl']
MXNH ['MXNH/'] ['subs']
W ['W'] ['ptcl']
HRWGW ['HRG['] ['verb']
>JC ['>JC/'] ['subs']
>T ['>T'] ['ptcl']
>XJW ['>X/'] ['suff']
W ['W'] ['ptcl']
>JC ['>JC/'] ['subs']
>T ['>T'] ['ptcl']
R<HW ['R<=/'] ['suff']
 [''] ['unknown']
 [''] ['unknown']
 [''] ['unknown']
QRBW ['QRWB/'] ['suff']
 [''] [None]


In [20]:
# Print the words in a DSS verse that has a directive-he

dss_dir_he = verse_scroll_1.uvf_etcbc

for item in dss_dir_he:
    if item == "H":
        ind = dss_dir_he.index(item)
        print(verse_dss.to_words[ind])

In [20]:
# Print the words in a BHSA verse that has a directive-he

bhsa_dir_he = verse_bhsa.to_words.uvf

for item in bhsa_dir_he:
    if item == "H":
        ind = bhsa_dir_he.index(item)
        print(verse_bhsa.to_words[ind])

In [20]:
# Show the lexemes of the two verses 
print(verse_dss.to_words.lex)
print(verse_bhsa.to_words.lex)

['W', 'HJH[', '', 'W', 'K', 'MY/', '<BR[', 'HMWN/', '<RJY/', 'W', 'HJH[', 'L', 'PT</', 'PT>M', '']
['W', 'HJH[', 'K', '>BQ/', 'DQ/', 'HMWN/', 'ZR/', 'W', 'K', 'MY/', '<BR[', 'HMWN/', '<RJY/', 'W', 'HJH[', 'L', 'PT</', 'PT>M']


In [21]:
# Show g_cons and lex for dss verse
print(verse_dss.to_words.g_cons)
print(verse_dss.to_words.lex)

['W', 'HJH', None, 'W', 'K', 'MY', '<WBR', 'HMWN', '<RJYJM', 'W', 'HJH', 'L', 'PT<', 'PT>M', None]
['W', 'HJH[', '', 'W', 'K', 'MY/', '<BR[', 'HMWN/', '<RJY/', 'W', 'HJH[', 'L', 'PT</', 'PT>M', '']


In [22]:
# Show g_cons and lex for bhsa verse
print(verse_bhsa.to_words.g_cons)
print(verse_bhsa.to_words.lex)

['W', 'HJH', 'K', '>BQ', 'DQ', 'HMWN', 'ZRJK', 'W', 'K', 'MY', '<BR', 'HMWN', '<RJYJM', 'W', 'HJH', 'L', 'PT<', 'PT>M']
['W', 'HJH[', 'K', '>BQ/', 'DQ/', 'HMWN/', 'ZR/', 'W', 'K', 'MY/', '<BR[', 'HMWN/', '<RJY/', 'W', 'HJH[', 'L', 'PT</', 'PT>M']


In [29]:
verse_types = []

for sign in verse_dss.to_signs:
    if is_sign_unc(sign):
        sign_type = 1
    else:
        sign_type = 0
    verse_types.append(sign_type)

In [30]:
print(verse_types)

[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [31]:
for sign in verse_dss.to_signs:
    if sign.type[0] == "missing" or sign.type[0] == "unc":
        print(f"_{sign}_'s type is {sign.type}.'")

__'s type is ['unc'].'


In [32]:
verse_dss

<word_15 "W HJH W K MY <WBR HMWN <RJYJM W HJH L PT< PT>M">

In [33]:
for word in verse_dss.to_words:
    if word.vt != [None]:
        print(word, word.lex, word.vt)

HJH ['HJH['] ['perf']
<WBR ['<BR['] ['ptca']
HJH ['HJH['] ['perf']


In [34]:
for word in verse_bhsa.to_words:
    if word.vt != ["NA"]:
        print(word, word.lex, word.vt)

HJH ['HJH['] ['perf']
<BR ['<BR['] ['ptca']
HJH ['HJH['] ['perf']
