In [206]:
import pandas as pd 

from Bio import pairwise2
from Bio.Seq import Seq

from tfob import TFOb, BHSA, DSS

In [207]:
def align_verse(verse_1, verse_2):
    """Takes two verses (TFOb object) from section (format verse = TFOb.section(["book", "ch", "ver"], source)
    and returns aligned version of the verses as strings."""
    
    # Access the word level of the TFOb object and get the g_cons for each word
    verse1 = [word.g_cons[0] for word in verse_1.to_words]
    verse2 = [word.g_cons[0] for word in verse_2.to_words]
    
    # Removes the None and join the word in a string
    verseA = " ".join([i for i in verse1 if i is not None])
    verseB = " ".join([i for i in verse2 if i is not None])
    
    # Align the two strings
    seq1 = Seq(verseA)
    seq2 = Seq(verseB)
    
    alignements = pairwise2.align.globalxx(seq1, seq2)
    
    v1_al = (alignements[0][0].strip(' '))
    v2_al = (alignements[0][1].strip(' '))
    
    # Returns the aligned strings          
    return v1_al,v2_al

def is_sign_unc(sign):
    """If a sign is a missing or uncertain sign, returns True. Else, returns False."""
    unc_types = ['missing', 'unc']
    if sign.type[0] in unc_types:
        return True
    return False

In [376]:
SCROLL1 = "1Qisaa"
SCROLL2 = "1Q8"

In [601]:
book = "Isaiah"
chapter = "59"
verse_num = "7"
verse_bhsa = TFOb.section([book, chapter, verse_num], BHSA)
verse_1qisaa = TFOb.section([book, chapter, verse_num], DSS, scroll=SCROLL1)
verse_1q8 = TFOb.section([book, chapter, verse_num], DSS, scroll=SCROLL2)

In [602]:
qisaa_lex = verse_1qisaa.lex
q8_lex = verse_1q8.lex
bhsa_lex = verse_bhsa.to_words.lex

if qisaa_lex[-1] == "":
    qisaa_lex = qisaa_lex[:-1]
    
if q8_lex[-1] == "":
    q8_lex = q8_lex[:-1]

print("BHSA", bhsa_lex, "\n")
print("1QIsaa", qisaa_lex, "\n\n")
print("1Q8", q8_lex)

BHSA ['RGL/', 'L', 'H', 'R</', 'RWY[', 'W', 'MHR[', 'L', 'CPK[', 'DM/', 'NQJ/', 'MXCBH/', 'MXCBH/', '>WN=/', 'CD/', 'W', 'CBR/', 'B', 'MSLH/'] 

1QIsaa ['RGL/', 'L', 'R</', 'RWY[', 'W', 'MHR[', 'L', 'CPK[', 'DM/', 'NQJ/', 'MXCBH/', 'MXCBH/', '>WN=/', 'CD/', 'W', 'CBR/', 'W', 'XMS/', 'B', 'MSLH/'] 


1Q8 ['RGL/', 'L', 'R</', 'RWY[', 'W', 'MHR[', 'L', 'CPK[', 'DM/', 'NQJ/', 'MXCBH/', 'MXCBH/', '>WN=/', 'CD/', 'W', 'CBR/', 'B', 'MSLH/']


In [603]:
# Check the differnces between the verses

# Checking difference 1QIsaa vs BHSA

print(set(bhsa_lex).issubset(set(qisaa_lex)))
print(set(qisaa_lex).issubset(set(bhsa_lex)))

diff_bhsa_1qisaa = set(bhsa_lex)-set(qisaa_lex)
diff_1qisaa_bhsa = set(qisaa_lex)-set(bhsa_lex)

print(f"In BHSA you have {diff_bhsa_1qisaa} that is not in DSS.")
print(f"In {SCROLL1} you have {diff_1qisaa_bhsa} that is not in BHSA. \n")

# Checking difference 1Q8 vs BHSA

print(set(bhsa_lex).issubset(set(q8_lex)))
print(set(q8_lex).issubset(set(bhsa_lex)))

diff_bhsa_1q8 = set(bhsa_lex)-set(q8_lex)
diff_1q8_bhsa = set(q8_lex)-set(bhsa_lex)

print(f"In BHSA you have {diff_bhsa_1q8} that is not in DSS.")
print(f"In {SCROLL2} you have {diff_1q8_bhsa} that is not in BHSA.")

False
False
In BHSA you have {'H'} that is not in DSS.
In 1Qisaa you have {'XMS/'} that is not in BHSA. 

False
True
In BHSA you have {'H'} that is not in DSS.
In 1Q8 you have set() that is not in BHSA.


In [604]:
for clause in verse_bhsa.to_clauses:
    print(clause)
    
    for verb in clause.to_words.filter(sp="verb"):
        print(verb, "from ", verb.lex[0], verb.vs, verb.vt)
    
    for phrase in clause.to_phrases:
        print(phrase, phrase.function, phrase.to_words.lex)
    print("\n")
    
# Print the words in a BHSA verse that has a directive-he

bhsa_dir_he = verse_bhsa.to_words.uvf

for item in bhsa_dir_he:
    if item == "H":
        ind = bhsa_dir_he.index(item)
        print(f"In BHSA, the word {verse_bhsa.to_words[ind]} has a directive-he")
        
# Print the words in a 1QIsaa verse that has a directive-he

dss_dir_he1 = verse_1qisaa.uvf_etcbc

for item in dss_dir_he1:
    if item == "H":
        ind = dss_dir_he1.index(item)
        print(f"In 1QIsaa, the word {verse_1qisaa.to_words[ind]} has a directive-He.")
        
# Print the words in a DSS verse that has a directive-he

dss_dir_he2 = verse_1q8.uvf_etcbc

for item in dss_dir_he2:
    if item == "H":
        ind = dss_dir_he2.index(item)
        print(f"In 1Q8, the word {verse_1q8.to_words[ind]} has a directive-He.")

RGLJHM L R< JRYW
JRYW from  RWY[ ['qal'] ['impf']
RGLJHM ['Subj'] ['RGL/']
L R< ['Cmpl'] ['L', 'H', 'R</']
JRYW ['Pred'] ['RWY[']


W JMHRW
JMHRW from  MHR[ ['piel'] ['impf']
W ['Conj'] ['W']
JMHRW ['Pred'] ['MHR[']


L CPK DM NQJ
CPK from  CPK[ ['qal'] ['infc']
L CPK ['Pred'] ['L', 'CPK[']
DM NQJ ['Objc'] ['DM/', 'NQJ/']


MXCBWTJHM MXCBWT >WN
MXCBWTJHM ['Subj'] ['MXCBH/']
MXCBWT >WN ['PreC'] ['MXCBH/', '>WN=/']


CD W CBR B MSLWTM
CD W CBR ['Subj'] ['CD/', 'W', 'CBR/']
B MSLWTM ['PreC'] ['B', 'MSLH/']




In [605]:
print("BHSA", "\n", verse_bhsa.text)
print("1QIsaa", "\n", verse_1qisaa.to_words.text)
print("1Q8", "\n", verse_1q8.to_words.text)

BHSA 
 רַגְלֵיהֶם֙ לָרַ֣ע יָרֻ֔צוּ וִֽימַהֲר֔וּ לִשְׁפֹּ֖ךְ דָּ֣ם נָקִ֑י מַחְשְׁבֹֽותֵיהֶם֙ מַחְשְׁבֹ֣ות אָ֔וֶן שֹׁ֥ד וָשֶׁ֖בֶר בִּמְסִלֹּותָֽם׃ 
1QIsaa 
 רגלי׳המה לרע ירוצו וימהרו לשפוך דם נקיא מחשבותי׳המה מחשבות און שד ושבר וחמס במסלותי׳המה ׃ 
1Q8 
 רגלי׳הם לרע ירצו וימהרו לשפך דם נק  י מחשבתי׳הם מחשבות און שד ושבר במסלות׳ם ׃ 


In [606]:
# Align 1QIsaa and BHSA

print("Alignement BHSA and 1Qisaa")

print(verse_bhsa)
print(verse_1qisaa)
align_verse(verse_bhsa, verse_1qisaa)
print("\n")

# Align 1QIsaa and BHSA

print("Alignement BHSA and 1Q8")

print(verse_bhsa)
print(verse_1q8)
align_verse(verse_bhsa, verse_1q8)
print("\n")

Alignement BHSA and 1Qisaa
RGLJHM L R< JRYW W JMHRW L CPK DM NQJ MXCBWTJHM MXCBWT >WN CD W CBR B MSLWTM
RGLJHMH L R< JRWYW W JMHRW L CPWK DM NQJ> MXCBWTJHMH MXCBWT >WN CD W CBR W XMS B MSLWTJHMH


Alignement BHSA and 1Q8
RGLJHM L R< JRYW W JMHRW L CPK DM NQJ MXCBWTJHM MXCBWT >WN CD W CBR B MSLWTM
RGLJHM L R< JRYW W JMHRW L CPK DM NQJ MXCBTJHM MXCBWT >WN CD W CBR B MSLWTM




In [607]:
verse_dss = verse_1qisaa

In [470]:
for verb in verse_dss.to_words.filter(sp="verb"):
    print(verb, verb.lex, verb.vs, verb.vt)

JMWCW ['MWC['] ['qal'] ['impf']
TTMWVJNH ['MWV['] ['hitpael'] ['impf']
JMWC ['MWC['] ['qal'] ['impf']
TMWV ['MWV['] ['qal'] ['impf']
>MR ['>MR['] ['qal'] ['perf']


In [471]:
for verb in verse_bhsa.to_words.filter(sp="verb"):
    print(verb, verb.lex, verb.vs, verb.vt)

JMWCW ['MWC['] ['qal'] ['impf']
TMWVNH ['MWV['] ['qal'] ['impf']
JMWC ['MWC['] ['qal'] ['impf']
TMWV ['MWV['] ['qal'] ['impf']
>MR ['>MR['] ['qal'] ['perf']
MRXMK ['RXM['] ['piel'] ['ptca']


In [319]:
for word in verse_dss.to_words:
    print(word, word.sp)

CM<W ['verb']
ZW>T ['adjv']
BJT ['subs']
J<QWB ['subs']
H ['ptcl']
NQR>JM ['verb']
B ['ptcl']
CM ['subs']
JFR>L ['subs']
W ['ptcl']
M ['ptcl']
MJ ['subs']
JHWDH ['subs']
JY>W ['verb']
H ['ptcl']
NCB<JM ['verb']
B ['ptcl']
CM ['subs']
JHWH ['subs']
W ['ptcl']
B ['ptcl']
>LWHJ ['subs']
JFR>L ['subs']
JZKJRW ['verb']
LW> ['ptcl']
B ['ptcl']
>MT ['subs']
W ['ptcl']
LW> ['ptcl']
B ['ptcl']
YDQH ['subs']
 [None]


In [320]:
# Print the words in a DSS verse that has a directive-he

dss_dir_he = verse_dss.uvf_etcbc

for item in dss_dir_he:
    if item == "H":
        ind = dss_dir_he.index(item)
        print(verse_dss.to_words[ind])

In [437]:
# Print the words in a BHSA verse that has a directive-he

bhsa_dir_he = verse_bhsa.to_words.uvf

for item in bhsa_dir_he:
    if item == "H":
        ind = bhsa_dir_he.index(item)
        print(verse_bhsa.to_words[ind])

In [14]:
# Show the lexemes of the two verses 
print(verse_dss.to_words.lex)
print(verse_bhsa.to_words.lex)

['KL/', 'K', 'Y>N/', 'T<H[', '>JC/', 'L', 'DRK/', 'PNH[', 'W', 'JHWH/', 'PG<[', 'B', '>T', '<WN/', 'KL/', '']
['KL/', 'K', 'H', 'Y>N/', 'T<H[', '>JC/', 'L', 'DRK/', 'PNH[', 'W', 'JHWH/', 'PG<[', 'B', '>T', '<WN/', 'KL/']


In [15]:
# Show g_cons and lex for dss verse
print(verse_dss.to_words.g_cons)
print(verse_dss.to_words.lex)

['KLNW', 'K', 'Y>N', 'T<JNW', '>JC', 'L', 'DRKW', 'PNJNW', 'W', 'JHWH', 'HPGJ<', 'BW', '>T', '<WN', 'KLNW', None]
['KL/', 'K', 'Y>N/', 'T<H[', '>JC/', 'L', 'DRK/', 'PNH[', 'W', 'JHWH/', 'PG<[', 'B', '>T', '<WN/', 'KL/', '']


In [16]:
# Show g_cons and lex for bhsa verse
print(verse_bhsa.to_words.g_cons)
print(verse_bhsa.to_words.lex)

['KLNW', 'K', '', 'Y>N', 'T<JNW', '>JC', 'L', 'DRKW', 'PNJNW', 'W', 'JHWH', 'HPGJ<', 'BW', '>T', '<WN', 'KLNW']
['KL/', 'K', 'H', 'Y>N/', 'T<H[', '>JC/', 'L', 'DRK/', 'PNH[', 'W', 'JHWH/', 'PG<[', 'B', '>T', '<WN/', 'KL/']


In [17]:
verse_types = []

for sign in verse_dss.to_signs:
    if is_sign_unc(sign):
        sign_type = 1
    else:
        sign_type = 0
    verse_types.append(sign_type)

In [18]:
print(verse_types)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [152]:
for sign in verse_dss.to_signs:
    if sign.type[0] == "missing" or sign.type[0] == "unc":
        print(f"_{sign}_'s type is {sign.type}.'")

In [21]:
verse_dss

<word_32 "KJ LW M<WPP L >CR MWYQ LH K <T H RJCWN HQL >RY ZBWLWN W H >RY NPTLJ W H >XRWN HKBJD DRK H JM <BR H JRDN GLJL H GW>JM">

In [18]:
for word in verse_dss.to_words:
    if word.vt != [None]:
        print(word, word.lex, word.vt)

M<WPP ['<WP['] ['ptca']
HQL ['QLL['] ['perf']
HKBJD ['KBD['] ['perf']


In [19]:
for word in verse_bhsa.to_words:
    if word.vt != ["NA"]:
        print(word, word.lex, word.vt)

HQL ['QLL['] ['perf']
HKBJD ['KBD['] ['perf']
