In [1]:
import pandas as pd 

from Bio import pairwise2
from Bio.Seq import Seq

from tfob import TFOb, get_bhsa, get_dss

In [2]:
BHSA = get_bhsa() 
DSS = get_dss()

**Locating corpus resources ...**

Name,# of nodes,# slots / node,% coverage
book,39,10938.21,100
chapter,929,459.19,100
lex,9230,46.22,100
verse,23213,18.38,100
half_verse,45179,9.44,100
sentence,63717,6.7,100
sentence_atom,64514,6.61,100
clause,88131,4.84,100
clause_atom,90704,4.7,100
phrase,253203,1.68,100


**Locating corpus resources ...**

Name,# of nodes,# slots / node,% coverage
scroll,1001,1428.81,100
lex,10450,129.14,94
fragment,11182,127.91,100
line,52895,27.04,100
clause,125,12.85,0
cluster,101099,6.68,47
phrase,315,5.1,0
word,500995,2.81,99
sign,1430241,1.0,100


In [3]:
def align_verse(verse_1, verse_2):
    """Takes two verses (TFOb object) from section (format verse = TFOb.section(["book", "ch", "ver"], source)
    and returns aligned version of the verses as strings."""
    
    # Access the word level of the TFOb object and get the g_cons for each word
    verse1 = [word.g_cons[0] for word in verse_1.to_words]
    verse2 = [word.g_cons[0] for word in verse_2.to_words]
    
    # Removes the None and join the word in a string
    verseA = " ".join([i for i in verse1 if i is not None])
    verseB = " ".join([i for i in verse2 if i is not None])
    
    # Align the two strings
    seq1 = Seq(verseA)
    seq2 = Seq(verseB)
    
    alignements = pairwise2.align.globalxx(seq1, seq2)
    
    v1_al = (alignements[0][0].strip(' '))
    v2_al = (alignements[0][1].strip(' '))
    
    # Returns the aligned strings          
    return v1_al,v2_al

def is_sign_unc(sign):
    """If a sign is a missing or uncertain sign, returns True. Else, returns False."""
    unc_types = ['missing', 'unc']
    return sign.type[0] in unc_types

In [6]:
SCROLL1 = "4Q52"

In [13]:
book = "1_Samuel"
chapter = "23"
verse_num = "20"
verse_bhsa = TFOb.section([book, chapter, verse_num], BHSA)
verse_scroll_1 = TFOb.section([book, chapter, verse_num], DSS, scroll=SCROLL1)

In [14]:
scroll_1_lex = " ".join(verse_scroll_1.lex_etcbc)
bhsa_lex = " ".join(verse_bhsa.to_words.lex)

if scroll_1_lex[-1] == "":
    scroll_1_lex = scroll_1_lex[:-1]

print(bhsa_lex)
print(scroll_1_lex)

W <TH L KL/ >WH/ NPC/ H MLK/ L JRD[ JRD[ W L SGR[ B JD/ H MLK/
W <TH L KL/ NPC/ H MLK/ L JRD[ JRD[ <L SGR[ B JD/ H MLK/ 


In [15]:
# Align BHSA and studied scroll 

print(f"Alignement BHSA and {SCROLL1}\n")

print(verse_bhsa)
print(verse_scroll_1)
#print(align_verse(verse_bhsa, verse_1qisaa))
print("\n")

Alignement BHSA and 4Q52

W <TH L KL >WT NPCK H MLK L RDT RD W LNW HSGJRW B JD H MLK
W <TH L KL NPC H MLK L RDH JRD <LJNW HSGJRW B JD H MLK




In [16]:
# Assuming bhsa_lex and the scroll_lex are strings of words separated by spaces

# Splitting the strings into sets of words
set_bhsa_lex = set(bhsa_lex.split())
set_scroll_1 = set(scroll_1_lex.split())

# Checking difference 1QIsaa vs BHSA
print(set_bhsa_lex.issubset(set_scroll_1))
print(set_scroll_1.issubset(set_bhsa_lex))

diff_bhsa_scroll_1 = set_bhsa_lex - set_scroll_1
diff_scroll_1_bhsa = set_scroll_1 - set_bhsa_lex

print(f"In BHSA you have {diff_bhsa_scroll_1} that is not in {SCROLL1}.")
print(f"In {SCROLL1} you have {diff_scroll_1_bhsa} that is not in BHSA. \n")

False
False
In BHSA you have {'>WH/'} that is not in 4Q52.
In 4Q52 you have {'<L'} that is not in BHSA. 



In [17]:
for clause in verse_bhsa.to_clauses:
    print(clause)
    
    for verb in clause.to_words.filter(sp="verb"):
        print(verb, "from ", verb.lex[0], verb.vs, verb.vt)
    
    for phrase in clause.to_phrases:
        if phrase.function == ["Cmpl"]:
            print("Complement", phrase, phrase.function, phrase.to_words.lex, phrase.det)
            for word in phrase.to_words:
                print(f"number of {word}", word.nu[0])
                if word.det[0] != None and word.det[0] != "absent":
                    print(f"det of {word}", word.det[0])
                if word.nametype[0] != None and word.nametype[0] != "absent":
                    print(word.nametype[0])
                if word.prs[0] != None and word.prs[0] != "absent":
                    print(f"prs of {word}", word.prs[0], word.prs_nu[0], word.prs_gn[0], word.prs_ps[0])
        else:
            print(phrase, phrase.function, phrase.to_words.lex)
        
    print("\n")
    
# Print the words in a BHSA verse that has a directive-he

bhsa_dir_he = verse_bhsa.to_words.uvf

for item in bhsa_dir_he:
    if item == "H":
        ind = bhsa_dir_he.index(item)
        print(f"In BHSA, the word {verse_bhsa.to_words[ind]} has a directive-he")
        
# Print the words in a 1QIsaa verse that has a directive-he

dss_dir_he1 = verse_scroll_1.uvf_etcbc

for item in dss_dir_he1:
    if item == "H":
        ind = dss_dir_he1.index(item)
        print(f"In {SCROLL1}, the word {verse_scroll_1.to_words[ind]} has a directive-he.")

W <TH
W ['Conj'] ['W']
<TH ['Time'] ['<TH']


L KL >WT NPCK RD
RD from  JRD[ ['qal'] ['impv']
L KL >WT NPCK ['Adju'] ['L', 'KL/', '>WH/', 'NPC/']
RD ['Pred'] ['JRD[']


H MLK
H MLK ['Voct'] ['H', 'MLK/']


L RDT
RDT from  JRD[ ['qal'] ['infc']
L RDT ['Pred'] ['L', 'JRD[']


W LNW
W ['Conj'] ['W']
LNW ['Adju'] ['L']


HSGJRW B JD H MLK
HSGJRW from  SGR[ ['hif'] ['infc']
HSGJRW ['PreO'] ['SGR[']
Complement B JD H MLK ['Cmpl'] ['B', 'JD/', 'H', 'MLK/'] ['det']
number of B NA
number of JD sg
number of H NA
prs of H n/a NA NA NA
number of MLK sg




In [18]:
print("BHSA", "\n", verse_bhsa.text)
print(f"{SCROLL1}", "\n", verse_scroll_1.to_words.text)
#print("1Q8", "\n", verse_1q8.to_words.text)

BHSA 
 וְ֠עַתָּה לְכָל־אַוַּ֨ת נַפְשְׁךָ֥ הַמֶּ֛לֶךְ לָרֶ֖דֶת רֵ֑ד וְלָ֥נוּ הַסְגִּירֹ֖ו בְּיַ֥ד הַמֶּֽלֶךְ׃ 
4Q52 
 ועתה לכל נפש המלך לרד׳ה ירד עלי׳נו הסגיר׳ו ביד המלך ׃ 


In [19]:
for word in verse_bhsa.to_words:
    if word.vt != [None]:
        print(word, word.lex, word.vt, word.vs, word.ids)

W ['W'] ['NA'] ['NA'] [155754]
<TH ['<TH'] ['NA'] ['NA'] [155755]
L ['L'] ['NA'] ['NA'] [155756]
KL ['KL/'] ['NA'] ['NA'] [155757]
>WT ['>WH/'] ['NA'] ['NA'] [155758]
NPCK ['NPC/'] ['NA'] ['NA'] [155759]
H ['H'] ['NA'] ['NA'] [155760]
MLK ['MLK/'] ['NA'] ['NA'] [155761]
L ['L'] ['NA'] ['NA'] [155762]
RDT ['JRD['] ['infc'] ['qal'] [155763]
RD ['JRD['] ['impv'] ['qal'] [155764]
W ['W'] ['NA'] ['NA'] [155765]
LNW ['L'] ['NA'] ['NA'] [155766]
HSGJRW ['SGR['] ['infc'] ['hif'] [155767]
B ['B'] ['NA'] ['NA'] [155768]
JD ['JD/'] ['NA'] ['NA'] [155769]
H ['H'] ['NA'] ['NA'] [155770]
MLK ['MLK/'] ['NA'] ['NA'] [155771]


In [21]:
verse_dss = verse_scroll_1
verbs_dss = [verse_dss.to_words.filter(sp="verb")]

for verb in verse_dss.to_words.filter(sp="verb"):
    print(verb, verb.lex, verb.vs, verb.vt, verb.nu, verb.ps, verb.ids)

JRD ['JRD['] ['qal'] ['impf'] ['s'] ['3'] [2012431]


In [22]:
for word in verse_dss.to_words:
    print(word, word.lex, word.vt, word.vs, word.nu, word.ids)

W ['W'] [None] [None] [None] [2012422]
<TH ['<TH'] [None] [None] [None] [2012423]
L ['L'] [None] [None] [None] [2012424]
KL ['KL/'] [None] [None] ['s'] [2012425]
NPC ['NPC/'] [None] [None] ['s'] [2012426]
H ['H'] [None] [None] [None] [2012427]
MLK ['MLK/'] [None] [None] ['s'] [2012428]
L ['L'] [None] [None] [None] [2012429]
RDH ['JRD['] ['infc'] ['qal'] [None] [2012430]
JRD ['JRD['] ['impf'] ['qal'] ['s'] [2012431]
<LJNW ['<L'] [None] [None] [None] [2012432]
HSGJRW ['SGR['] ['infc'] ['hifil'] [None] [2012433]
B ['B'] [None] [None] [None] [2012434]
JD ['JD/'] [None] [None] ['s'] [2012435]
H ['H'] [None] [None] [None] [2012436]
MLK ['MLK/'] [None] [None] ['s'] [2012437]
 [''] [None] [None] [None] [2012438]


In [23]:
for verb in verse_bhsa.to_words.filter(sp="verb"):
    print(verb, verb.lex, verb.vs, verb.vt, verb.nu, verb.ps, verb.ids)

RDT ['JRD['] ['qal'] ['infc'] ['unknown'] ['unknown'] [155763]
RD ['JRD['] ['qal'] ['impv'] ['sg'] ['p2'] [155764]
HSGJRW ['SGR['] ['hif'] ['infc'] ['unknown'] ['unknown'] [155767]


In [15]:
def sign_info(verb):
    sign_rec_words = []
    
    scroll = verb.to_scrolls.scroll[0]
    verse = TFOb.section([verb.book[0], verb.chapter[0], verb.verse[0]], DSS, scroll)
    
    for word in verse.to_words:
        signs = word.to_signs
        sign_rec = []
        for sign in signs:
            if sign.type[0] == "cons": # only keeps consonants
                if sign.rec[0]:
                    sign_rec.append("1")
                else:
                    sign_rec.append("0")
        if sign_rec:
            sign_rec_words.append("".join(sign_rec))
        
    return " ".join(sign_rec_words).strip()

In [16]:
for verb_dss in verbs_dss:
    print(sign_info(verb_dss))
    
print(verse_dss)

1 11111 1 1111 1 111 11111 1 11 1 1111 0 0000 11 1 11 1 1111 11111 111 11 111 111 110 11100
W JGJDW L C>WL L >MR XV>JM H <M L JHWH L >KWL <L H DM W J>MR BGDTM GLW N> >LJ HLM >BN GDWLH


In [145]:
for word in verse_dss.to_words:
    print(word, word.lex, word.sp)

H ['H'] ['ptcl']
DRK ['DRK/'] ['subs']
>CR ['>CR'] ['ptcl']
YWH ['YWH['] ['verb']
JHWH ['JHWH/'] ['subs']
>LWHJKMH ['>LHJM/'] ['suff']
>TKMH ['>T'] ['suff']
TLKW ['HLK['] ['verb']
W ['W'] ['ptcl']
LM<N ['LM<N'] ['ptcl']
TXJW ['XJH['] ['verb']
W ['W'] ['ptcl']
VWB ['VWB['] ['verb']
LKMH ['L'] ['suff']
W ['W'] ['ptcl']
H>RKTMH ['>RK['] ['verb']
JMJN ['JWM/'] ['subs']
B ['B'] ['ptcl']
>RY ['>RY/'] ['subs']
>C [''] [None]
R ['>CR'] ['ptcl']
TWRJCWN ['JRC['] ['suff']
 [''] [None]


In [146]:
# Print the words in a DSS verse that has a directive-he

dss_dir_he = verse_scroll_1.uvf_etcbc

for item in dss_dir_he:
    if item == "H":
        ind = dss_dir_he.index(item)
        print(verse_dss.to_words[ind])

In [147]:
# Print the words in a BHSA verse that has a directive-he

bhsa_dir_he = verse_bhsa.to_words.uvf

for item in bhsa_dir_he:
    if item == "H":
        ind = bhsa_dir_he.index(item)
        print(verse_bhsa.to_words[ind])

In [148]:
# Show the lexemes of the two verses 
print(verse_dss.to_words.lex)
print(verse_bhsa.to_words.lex)

['H', 'DRK/', '>CR', 'YWH[', 'JHWH/', '>LHJM/', '>T', 'HLK[', 'W', 'LM<N', 'XJH[', 'W', 'VWB[', 'L', 'W', '>RK[', 'JWM/', 'B', '>RY/', '', '>CR', 'JRC[', '']
['B', 'KL/', 'H', 'DRK/', '>CR', 'YWH[', 'JHWH/', '>LHJM/', '>T', 'HLK[', 'LM<N', 'XJH[', 'W', 'VWB[', 'L', 'W', '>RK[', 'JWM/', 'B', 'H', '>RY/', '>CR', 'JRC[']


In [149]:
# Show g_cons and lex for dss verse
print(verse_dss.to_words.g_cons)
print(verse_dss.to_words.lex)

['H', 'DRK', '>CR', 'YWH', 'JHWH', '>LWHJKMH', '>TKMH', 'TLKW', 'W', 'LM<N', 'TXJW', 'W', 'VWB', 'LKMH', 'W', 'H>RKTMH', 'JMJN', 'B', '>RY', '>C', 'R', 'TWRJCWN', None]
['H', 'DRK/', '>CR', 'YWH[', 'JHWH/', '>LHJM/', '>T', 'HLK[', 'W', 'LM<N', 'XJH[', 'W', 'VWB[', 'L', 'W', '>RK[', 'JWM/', 'B', '>RY/', '', '>CR', 'JRC[', '']


In [150]:
# Show g_cons and lex for bhsa verse
print(verse_bhsa.to_words.g_cons)
print(verse_bhsa.to_words.lex)

['B', 'KL', 'H', 'DRK', '>CR', 'YWH', 'JHWH', '>LHJKM', '>TKM', 'TLKW', 'LM<N', 'TXJWN', 'W', 'VWB', 'LKM', 'W', 'H>RKTM', 'JMJM', 'B', '', '>RY', '>CR', 'TJRCWN']
['B', 'KL/', 'H', 'DRK/', '>CR', 'YWH[', 'JHWH/', '>LHJM/', '>T', 'HLK[', 'LM<N', 'XJH[', 'W', 'VWB[', 'L', 'W', '>RK[', 'JWM/', 'B', 'H', '>RY/', '>CR', 'JRC[']


In [151]:
verse_types = []

for sign in verse_dss.to_signs:
    if is_sign_unc(sign):
        sign_type = 1
    else:
        sign_type = 0
    verse_types.append(sign_type)

In [152]:
print(verse_types)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [153]:
for sign in verse_dss.to_signs:
    if sign.type[0] == "missing" or sign.type[0] == "unc":
        print(f"_{sign}_'s type is {sign.type}.'")

In [154]:
verse_dss

<word_23 "H DRK >CR YWH JHWH >LWHJKMH >TKMH TLKW W LM<N TXJW W VWB LKMH W H>RKTMH JMJN B >RY >C R TWRJCWN">

In [155]:
for word in verse_dss.to_words:
    if word.vt != [None]:
        print(word, word.lex, word.vt, word.vs, word.ids)

YWH ['YWH['] ['perf'] ['piel'] [2069448]
TLKW ['HLK['] ['impf'] ['qal'] [2069452]
TXJW ['XJH['] ['impf'] ['qal'] [2069455]
VWB ['VWB['] ['perf'] ['qal'] [2069457]
H>RKTMH ['>RK['] ['perf'] ['hifil'] [2069460]
TWRJCWN ['JRC['] ['impf'] ['hifil'] [2069466]


In [156]:
for word in verse_bhsa.to_words:
    if word.vt != ["NA"]:
        print(word, word.lex, word.vt)

YWH ['YWH['] ['perf']
TLKW ['HLK['] ['impf']
TXJWN ['XJH['] ['impf']
VWB ['VWB['] ['perf']
H>RKTM ['>RK['] ['perf']
TJRCWN ['JRC['] ['impf']
