In [1]:
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
from munch import Munch

In [2]:
from tf.app import use

B = use("etcbc/dss", checkout="clone", version="1.8", hoist=globals())
Fdss = F
Ldss = L
Tdss = T
DSS = Munch({"F": F, "L": L, "T": T, "name": "DSS"})

A = use("etcbc/bhsa", hoist=globals())
Fmt = F
Lmt = L
Tmt = T
BHSA = Munch({"F": F, "L": L, "T": T, "name": "BHSA"})

**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
scroll,1001,1428.81,100
lex,10450,129.14,94
fragment,11182,127.91,100
line,52895,27.04,100
clause,125,12.85,0
cluster,101099,6.68,47
phrase,315,5.1,0
word,500995,2.81,99
sign,1430241,1.0,100


**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
book,39,10938.21,100
chapter,929,459.19,100
lex,9230,46.22,100
verse,23213,18.38,100
half_verse,45179,9.44,100
sentence,63717,6.7,100
sentence_atom,64514,6.61,100
clause,88131,4.84,100
clause_atom,90704,4.7,100
phrase,253203,1.68,100


In [3]:
del F, L, T

In [4]:
df = pd.read_csv("mt_isa_df_starter.csv", sep=";")

In [7]:
BHSA.F.all.v(1030)

AttributeError: 'NodeFeatures' object has no attribute 'all'

In [5]:
# Create a dictionary containing sections (book, chapter, verse) from the DSS
# Each section is the key to another dictionary containing: 
# the name of the scroll (key) and the words of the verse (argument)
# NB: all the items in sections are strings

# Example
# {('Genesis','24','24'): {'1Q1': [1890183, 1890184, ...]},...}

dss_sections = {}

for word in DSS.F.otype.s("word"):
    scroll = DSS.T.scrollName(DSS.L.u(word, "scroll")[0])
    book = DSS.F.book_etcbc.v(word)
    chapter = DSS.F.chapter.v(word)
    verse = DSS.F.verse.v(word)
    if None in (scroll, book, chapter, verse):
        continue
    section = (book, chapter, verse)
    dss_sections.setdefault(section, {}).setdefault(scroll, []).append(word)

In [6]:
def section_words(section, source, scroll=None):
    """Return the words of a verse found with section, source (and scroll)"""
    if source.name == "BHSA":
        verse_id = source.T.nodeFromSection(section)
        return source.L.d(verse_id, "word")
    elif source.name == "DSS":
        section = (section[0], str(section[1]), str(section[2]))
        return dss_sections[section][scroll]
    else:
        assert False, f"Invalid source {source.name}. The source should be BSHA or DSS. " 

In [7]:
#Test area

#  dss_sections
# section_words(["Isaiah", "1", "1"], DSS, "1Qisaa")
# section_words(["Isaiah", 20, 6], BHSA)

In [8]:
# retrieve the lexeme from a section (book, chapter, verse)

def get_verse_heb(section, source, scroll=None): 
    """Return the verse in Hebrew script"""
    words = section_words(section, source, scroll)
    return source.T.text(words)

In [9]:
# Test area
# get_verse_heb(["Isaiah", 20, 6], DSS, "1Qisaa")
# get_verse_heb(["Isaiah", 20, 6], BHSA)

In [10]:
# How to retrieve information from a dataframe

# verse_heb = []

### method 1

# for book, chapter, verse in sections:
#     verse_heb.append(get_verse_heb(book, chapter, verse))
    
# method 2
# for section in sections:
#     verse_heb.append(get_verse_heb(*section)) # *list is like calling the function with each element of the list one by one

### method 3: running through the columns to get book chapter verse
# for section in zip(df.book, df.chapter, df.verse):
#     verse_heb.append(get_verse_heb(*section))

### method 4: line by line with index (_) 
# in order to call the function from either BHSA or DSS, use **origin (BHSA or DSS) at the end

# for _, row in df.iterrows():
#     verse_heb.append(get_verse_heb(row.book, row.chapter, row.verse, **BHSA))
    
# # add the list as a new column
# df["verse_heb"] = verse_heb

In [11]:
# Get the consonantal representation (g_cons) of any thing

def get_g_cons(etcbc_id, source):
    """Retrieve the consonantal representation for an id of any type"""
    if etcbc_id is None:
        return ""
    elif source.F.otype.v(etcbc_id) == "word":
        words = [etcbc_id]
    else:
        words = source.L.d(etcbc_id, "word")
    return " ".join([source.F.g_cons.v(word) for word in words])

In [12]:
# Retrieve verb info

def get_verb_id(section, lexeme, source, scroll=None):
    """Returns the verb's etcbc id"""
    verb = None
    for word in section_words(section, source, scroll):
        if source.name == "BHSA":
            if source.F.lex.v(word) == lexeme:
                verb = word
                break
        else:
            if source.F.lex_etcbc.v(word) == lexeme:
                verb = word
                break                           
    if verb is None:
        print(f"verb {lexeme} not found in {section, scroll}")
    return verb


def get_verb_heb(section, lexeme, source, scroll=None):
    """Returns the verb in Hebrew script"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    return source.T.text(verb_id)
     
def get_verb_stem(section, lexeme, source, scroll=None):
    """Retrieves the verbal stem of a verb"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    if source.name == "BHSA":
        return source.F.vs.v(verb_id)
    else:
        return source.F.vs_etcbc.v(verb_id)

def get_verb_tense(section, lexeme, source, scroll=None):
    """Retrieve the verbal tense of a verb"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    if source.name == "BHSA":
        return source.F.vt.v(verb_id)
    else:
        return source.F.vt_etcbc.v(verb_id)

In [13]:
# Retrieve the phrase and the clause

def get_phrase_heb(section, lexeme, source, scroll=None):
    """Retrieve the phrase in Hebrew script using the get_verb_id function"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    return source.T.text(source.L.u(verb_id, "phrase"))
        

def get_phrase(section, lexeme, source, scroll=None):
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    phrases = source.L.u(verb_id, "phrase")
    if len(phrases) == 0:
        return ""
    phrase_words = source.L.d(phrases[0], "word") 
    return " ".join([source.F.g_cons.v(word) for word in phrase_words])

def get_clause_heb(section, lexeme, source, scroll=None):
    """Retrieve the clause in Hebrew script using the get_verb_id function"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    return source.T.text(source.L.u(verb_id, "clause"))

def get_clause(section, lexeme, source, scroll=None):
    """Retrieve the clause in ETCBC transcription using the get_verb_id function"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    clauses = source.L.u(verb_id, "clause")
    if len(clauses) == 0:
        return ""
    clause_words = source.L.d(clauses[0], "word") 
    return " ".join([source.F.g_cons.v(word) for word in clause_words])

In [14]:
# Testing area

# get_verb_heb(["Isaiah", 6, 6], "<WP[", DSS, "1Qisaa") 

# get_phrase_heb(["Isaiah", 7, 1], "<LH[", DSS, "1Qisaa")

# get_verb_id(["Isaiah", 20, 6], "NWS[", DSS, "1Qisaa")

# get_verb_stem(["Isaiah", 6, 6], "<WP[", DSS, "1Qisaa")

# get_verb_tense(["Isaiah", 6, 6], "<WP[", BHSA)

# get_clause_heb(["Isaiah", 7, 1], "<LH[", DSS, "1Qisaa")

# get_phrase(["Isaiah", 7, 1], "<LH[", DSS, "1Qisaa")

# get_clause(["Isaiah", 7, 1], "<LH[", DSS, "1Qisaa")

In [15]:
# Retrieve subject and complement(s) of the verb

def get_subject(section, lexeme, source, scroll=None):
    """Find the subject of a verb using the get_verb_id function"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    clauses = source.L.u(verb_id, "clause")
    if len(clauses) == 0:
        return ""
    clause_phrases = source.L.d(clauses[0], "phrase")
    subject = None
    for phrase in clause_phrases:
        if source.F.function.v(phrase) == "Subj":
            subject = phrase
            break
    return get_g_cons(subject, source)               
    
def get_complement(section, lexeme, source, scroll=None):
    """Find the subject of a verb using the get_verb_id function"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    clauses = source.L.u(verb_id, "clause")
    if len(clauses) == 0:
        return ""
    clause_phrases = source.L.d(clauses[0], "phrase")
    complement = None
    for phrase in clause_phrases:
        if source.F.function.v(phrase) == "Cmpl":
            complement = phrase
            break
    return get_g_cons(complement, source)

In [16]:
# Add columns for verse in Hebrew, verb in Hebrew, verbal stem, verbal tense, phrase, clause,

dfs = []
source_df = df

for source, scroll in ((BHSA, None), (DSS, "1Qisaa")):
    df = source_df.copy()
    
    if scroll is None:
        df["scroll"] = "MT"
    else:
        df["scroll"] = scroll
    
    #Add the verse in Hebrew script
    verse_heb = []
    for _, row in df.iterrows():
        verse_heb.append(get_verse_heb([row.book, row.chapter, row.verse], source, scroll))

    # add the list as a new column
    df["verse_heb"] = verse_heb

    # Add the verb in Hebrew script
    verbs_heb = []
    for _, row in df.iterrows():
        verbs_heb.append(get_verb_heb([row.book, row.chapter, row.verse], row.bhsa_lex, source, scroll))

    # Add the list as a new column
    df["verb_heb"] = verbs_heb

    # Add the verbal stem
    verbs_stem = []
    for _, row in df.iterrows():
        verbs_stem.append(get_verb_stem([row.book, row.chapter, row.verse], row.bhsa_lex, source, scroll))

    df["verb_stem"] = verbs_stem

    # Add the verbal tense
    verbs_tense = []
    for _, row in df.iterrows():
        verbs_tense.append(get_verb_tense([row.book, row.chapter, row.verse], row.bhsa_lex, source, scroll))

    df["verb_tense"] = verbs_tense

    # Add the phrase in Hebrew script
    verbs_phrases_heb = []
    for _, row in df.iterrows():
        verbs_phrases_heb.append(get_phrase_heb([row.book, row.chapter, row.verse], row.bhsa_lex, source, scroll))

    df["verb_phrase_heb"] = verbs_phrases_heb

    # Add the phrase (g_cons)
    verbs_phrases = []
    for _, row in df.iterrows():
        verbs_phrases.append(get_phrase([row.book, row.chapter, row.verse], row.bhsa_lex, source, scroll))

    df["verb_phrase"] = verbs_phrases

    # Add the clause in Hebrew script
    verbs_clauses_heb = []
    for _, row in df.iterrows():
        verbs_clauses_heb.append(get_clause_heb([row.book, row.chapter, row.verse], row.bhsa_lex, source, scroll))

    df["verb_clause_heb"] = verbs_clauses_heb

    # Add the clause (g_cons)
    verbs_clauses = []
    for _, row in df.iterrows():
        verbs_clauses.append(get_clause([row.book, row.chapter, row.verse], row.bhsa_lex, source, scroll))

    df["verb_clause"] = verbs_clauses
    
    # Add the subject in transcription
    verb_subjects = []
    for _, row in df.iterrows():
        verb_subjects.append(get_subject([row.book, row.chapter, row.verse], row.bhsa_lex, BHSA))

    df["subject"] = verb_subjects

    # Add the subject in transcription
    verb_complements = []
    for _, row in df.iterrows():
        verb_complements.append(get_complement([row.book, row.chapter, row.verse], row.bhsa_lex, BHSA))

    df["complement"] = verb_complements

    dfs.append(df)

final_df = pd.concat(dfs, ignore_index=True)


verb NWS[ not found in (['Isaiah', 20, 6], '1Qisaa')
verb NWS[ not found in (['Isaiah', 20, 6], '1Qisaa')
verb NWS[ not found in (['Isaiah', 20, 6], '1Qisaa')
verb NWS[ not found in (['Isaiah', 20, 6], '1Qisaa')
verb NWS[ not found in (['Isaiah', 20, 6], '1Qisaa')
verb NWS[ not found in (['Isaiah', 20, 6], '1Qisaa')
verb NWS[ not found in (['Isaiah', 20, 6], '1Qisaa')


In [None]:
# Printing the final_df

# final_df

In [17]:
# Reordering the columns
final_df = final_df[["bhsa_lex", "book", "chapter", "verse", "scroll", "verb_heb", "verse_heb", "verb_stem", "verb_tense", "verb_phrase_heb", "verb_phrase", "verb_clause_heb", "verb_clause", "subject", "complement"]]
final_df

Unnamed: 0,bhsa_lex,book,chapter,verse,scroll,verb_heb,verse_heb,verb_stem,verb_tense,verb_phrase_heb,verb_phrase,verb_clause_heb,verb_clause,subject,complement
0,<WP[,Isaiah,6,6,MT,יָּ֣עָף,וַיָּ֣עָף אֵלַ֗י אֶחָד֙ מִן־הַשְּׂרָפִ֔ים וּבְ...,qal,wayq,יָּ֣עָף,J<P,וַיָּ֣עָף אֵלַ֗י אֶחָד֙ מִן־הַשְּׂרָפִ֔ים,W J<P >LJ >XD MN H FRPJM,>XD MN H FRPJM,>LJ
1,<LH[,Isaiah,7,1,MT,עָלָ֣ה,וַיְהִ֡י בִּימֵ֣י אָ֠חָז בֶּן־יֹותָ֨ם בֶּן־עֻז...,qal,perf,עָלָ֣ה,<LH,עָלָ֣ה רְצִ֣ין מֶֽלֶךְ־אֲ֠רָם וּפֶ֨קַח בֶּן־רְ...,<LH RYJN MLK >RM W PQX BN RMLJHW MLK JFR>L JRW...,RYJN MLK >RM W PQX BN RMLJHW MLK JFR>L,JRWCLM
2,JY>[,Isaiah,7,3,MT,צֵא־,וַיֹּ֣אמֶר יְהוָה֮ אֶֽל־יְשַׁעְיָהוּ֒ צֵא־נָא֙...,qal,impv,צֵא־,Y>,צֵא־נָא֙ אַתָּ֕ה וּשְׁאָ֖ר יָשׁ֣וּב בְּנֶ֑ךָ א...,Y> N> >TH W C>R_JCWB BNK >L QYH T<LT H BRKH H ...,>TH W C>R_JCWB BNK,
3,JY>[,Isaiah,7,3,MT,צֵא־,וַיֹּ֣אמֶר יְהוָה֮ אֶֽל־יְשַׁעְיָהוּ֒ צֵא־נָא֙...,qal,impv,צֵא־,Y>,צֵא־נָא֙ אַתָּ֕ה וּשְׁאָ֖ר יָשׁ֣וּב בְּנֶ֑ךָ א...,Y> N> >TH W C>R_JCWB BNK >L QYH T<LT H BRKH H ...,>TH W C>R_JCWB BNK,
4,<LH[,Isaiah,7,6,MT,נַעֲלֶ֤ה,נַעֲלֶ֤ה בִֽיהוּדָה֙ וּנְקִיצֶ֔נָּה וְנַבְקִעֶ...,qal,impf,נַעֲלֶ֤ה,N<LH,נַעֲלֶ֤ה בִֽיהוּדָה֙,N<LH B JHWDH,,B JHWDH
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,NF>[,Isaiah,39,6,1Qisaa,נשאו,הנה ימים באים ונשאו כול אשר בבית׳כה ואשר אצרו ...,qal,perf,,,,,KL,BBL
90,JRD[,Isaiah,52,4,1Qisaa,ירד,כיא כוה אמר יהוה מצרים ירד עמ׳י ברישונה לגור ש...,qal,perf,,,,,<MJ,MYRJM
91,CLX[,Isaiah,66,19,1Qisaa,שלחתי,ושמתי ב׳המה אותות ושלחתי מ׳המה פליטים אל הגואי...,piel,perf,,,,,,>L H GWJM TRCJC PWL W LWD MCKJ QCT TBL W JWN H...
92,BW>[,Isaiah,66,20,1Qisaa,הביאו,והביאו את כול כול אחי׳כמה מכל הגואים מנחה ליהו...,hif,perf,,,,,,<L HR QDCJ JRWCLM


In [None]:
# Testing area
# get_verb_id(["Isaiah", 7, 1], "<LH[", DSS, "1Qisaa")
# get_subject("Genesis", 8, 3, "HLK[", **BHSA)
# get_g_cons(3592)
# get_complement(["Genesis", 8, 3], "HLK[", BHSA)
# get_subject(["Isaiah", 7, 1], "<LH[", DSS, "1Qisaa")

In [18]:
# Save to csv file

final_df.to_csv("mt_and_dss_isaiah_df.csv", sep=";", index=False)

In [98]:
# TODO: Retrieve information about the complement(s)

# create a function to distinguish complements with/without prepositions

verb_complements = {}

def get_cmpl_type(section, lexeme, source, scroll=None):
    """Find the subject of a verb using the get_verb_id function"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    clauses = source.L.u(verb_id, "clause")
    if len(clauses) == 0:
        return ""
    clause_phrases = source.L.d(clauses[0], "phrase")
    complement = None
    for phrase in clause_phrases:
        if source.F.function.v(phrase) == "Cmpl":
            complement = phrase
            complement_words = source.L.d(phrase, "word")
            preposition = None
            for word in complement_words:
                if source.F.sp.v(word) == "prep":
                    preposition = word
                    verb_complements[get_g_cons(verb_id, source)] = (get_g_cons(complement, source),get_g_cons(preposition, source))
                    break
    return verb_complements

In [99]:
get_cmpl_type(["Isaiah", 7, 6], "<LH[", BHSA)

{'N<LH': ('B JHWDH', 'B')}

In [87]:
print(BHSA.F.lex.v(775526))

None


In [88]:
get_complement(["Isaiah", 7, 6], "<LH[", BHSA)

'B JHWDH'

In [89]:
BHSA.F.otype.v(775526)
words = BHSA.L.d(775526, "word")
print(words)

(214369, 214370)


In [93]:
BHSA.F.sp.v(214369)

'prep'

In [45]:
BHSA.T.text(775526)

'בִֽיהוּדָה֙ '

In [49]:
get_g_cons(775526, BHSA)

'B JHWDH'

In [69]:
NodeFeature?

Object `NodeFeature` not found.


In [92]:
for feature in Fall():
    print(feature, getattr(BHSA.F, feature).v(214369))

book None
book@am None
book@ar None
book@bn None
book@da None
book@de None
book@el None
book@en None
book@es None
book@fa None
book@fr None
book@he None
book@hi None
book@id None
book@ja None
book@ko None
book@la None
book@nl None
book@pa None
book@pt None
book@ru None
book@sw None
book@syc None
book@tr None
book@ur None
book@yo None
book@zh None
chapter None
code None
det None
domain None
freq_lex 15542
function None
g_cons B
g_cons_utf8 ב
g_lex BI-
g_lex_utf8 בִ
g_word BI45-
g_word_utf8 בִֽ
gloss in
gn NA
label None
language Hebrew
lex B
lex_utf8 ב
ls none
nametype None
nme n/a
nu NA
number 2297
otype word
pargr None
pdp prep
pfm n/a
phono vˈi
phono_trailer 
prs absent
prs_gn NA
prs_nu NA
prs_ps NA
ps NA
qere None
qere_trailer None
qere_trailer_utf8 None
qere_utf8 None
rank_lex 3
rela None
sp prep
st NA
tab None
trailer 
trailer_utf8 
txt None
typ None
uvf absent
vbe n/a
vbs n/a
verse None
voc_lex B.:
voc_lex_utf8 בְּ
vs NA
vt NA
