In [1]:
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns

In [2]:
from tf.app import use


B = use("etcbc/dss", checkout="clone", version="1.8", hoist=globals())
Fdss = F
Ldss = L
Tdss = T
DSS = {"F": F, "L": L, "T": T}

A = use("etcbc/bhsa", hoist=globals())
Fmt = F
Lmt = L
Tmt = T
BHSA = {"F": F, "L": L, "T": T}


**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
scroll,1001,1428.81,100
lex,10450,129.14,94
fragment,11182,127.91,100
line,52895,27.04,100
clause,125,12.85,0
cluster,101099,6.68,47
phrase,315,5.1,0
word,500995,2.81,99
sign,1430241,1.0,100


**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
book,39,10938.21,100
chapter,929,459.19,100
lex,9230,46.22,100
verse,23213,18.38,100
half_verse,45179,9.44,100
sentence,63717,6.7,100
sentence_atom,64514,6.61,100
clause,88131,4.84,100
clause_atom,90704,4.7,100
phrase,253203,1.68,100


In [74]:
df = pd.read_csv("mt_isa_df_starter.csv", sep=";")

In [75]:
# retrieve the location of the verb in both BHSA and DSS

# define a function to retrieve the lexeme from a section book, chapter, verse

def get_verse_heb(book, chapter, verse, T, L, F): # T, L, F will be defined when we call the function with **BHSA or **DSS
    """Return the verse in Hebrew script"""
    verse = T.nodeFromSection([book, chapter, verse])
    return T.text(verse)

In [76]:
# add a new column with the verse in Hebrew script

# verse_heb = []

### method 1

# for book, chapter, verse in sections:
#     verse_heb.append(get_verse_heb(book, chapter, verse))
    
# method 2
# for section in sections:
#     verse_heb.append(get_verse_heb(*section)) # *list is like calling the function with each element of the list one by one

### method 3: running through the columns to get book chapter verse
# for section in zip(df.book, df.chapter, df.verse):
#     verse_heb.append(get_verse_heb(*section))

### method 4: line by line with index (_) 
# in order to call the function from either BHSA or DSS, use **origin (BHSA or DSS) at the end

# for _, row in df.iterrows():
#     verse_heb.append(get_verse_heb(row.book, row.chapter, row.verse, **BHSA))
    
# # add the list as a new column
# df["verse_heb"] = verse_heb

In [77]:
# df

In [78]:
# Get the consonantal representation (g_cons) of any thing

def get_g_cons(etcbc_id):
    """Retrieve the consonantal representation for an id of any type"""
    if etcbc_id is None:
        return ""
    elif F.otype.v(etcbc_id) == "word":
        words = [etcbc_id]
    else:
        words = L.d(etcbc_id, "word")
    return " ".join([F.g_cons.v(word) for word in words])

In [79]:
# Retrieve verb info

def get_verb_id(book, chapter, verse, lexeme, T, F, L):
    """Returns the verb's etcbc id"""
    verse = T.nodeFromSection([book, chapter, verse])
    verb = None
    for word in L.d(verse, "word"):
        if F.lex.v(word) == lexeme:
            verb = word
            break
            
    # tells us if the verb has not been found and tells where the error happened
    assert verb is not None, (book, chapter, verse, lexeme)
    return verb

def get_verb_heb(book, chapter, verse, lexeme, T, F, L):
    """Returns the verb in Hebrew script"""
    return T.text(get_verb_id(book, chapter, verse, lexeme, T, F, L))
     
def get_verb_stem(book, chapter, verse, lexeme, T, F, L):
    """Retrieves the verbal stem of a verb"""
    verb = get_verb_id(book, chapter, verse, lexeme, T, F, L)
    return F.vs.v(verb)

def get_verb_tense(book, chapter, verse, lexeme, T, F, L):
    """Retrieve the verbal tense of a verb"""
    verb = get_verb_id(book, chapter, verse, lexeme, T, F, L)
    return F.vt.v(verb)

In [80]:
# Retrieve the phrase and the clause

def get_phrase_heb(book, chapter, verse, lexeme, T, F, L):
    """Retrieve the phrase in Hebrew script using the get_verb_id function"""
    verb = get_verb_id(book, chapter, verse, lexeme, T, F, L)
    return T.text(L.u(verb, "phrase")[0])

def get_phrase(book, chapter, verse, lexeme, T, F, L):
    verb = get_verb_id(book, chapter, verse, lexeme, T, F, L)
    phrase = L.u(verb, "phrase")[0]
    phrase_words = L.d(phrase, "word") 
    return " ".join([F.g_cons.v(word) for word in phrase_words])

def get_clause_heb(book, chapter, verse, lexeme, T, F, L):
    """Retrieve the clause in Hebrew script using the get_verb_id function"""
    verb = get_verb_id(book, chapter, verse, lexeme, T, F, L)
    return T.text(L.u(verb, "clause")[0])

def get_clause(book, chapter, verse, lexeme, T, F, L):
    """Retrieve the clause in ETCBC transcription using the get_verb_id function"""
    verb = get_verb_id(book, chapter, verse, lexeme, T, F, L)
    clause = L.u(verb, "clause")[0]
    clause_words = L.d(clause, "word") 
    return " ".join([F.g_cons.v(word) for word in clause_words])

In [81]:
# Testing area

# NB: I test with ("Genesis", 8, 3, "HLK[", **BHSA) because the verbal phrase contains more than the verb

# get_verb_heb("Genesis", 8, 3, "HLK[", **BHSA) 

# get_phrase_heb("Genesis", 8, 3, "HLK[", **BHSA) 

# get_verb_stem("Genesis", 8, 3, "HLK[", **BHSA)

# get_verb_tense("Genesis", 8, 3, "HLK[", **BHSA)

# get_clause_heb("Genesis", 8, 3, "HLK[", **BHSA)

# get_phrase("Genesis", 8, 3, "HLK[", **BHSA)

# get_clause("Genesis", 8, 3, "HLK[", **BHSA)

In [82]:
# Add columns for verse in Hebrew, verb in Hebrew, verbal stem, verbal tense, phrase, clause,

#Add the verse in Hebrew script
verse_heb = []
for _, row in df.iterrows():
    verse_heb.append(get_verse_heb(row.book, row.chapter, row.verse, **BHSA))
    
# add the list as a new column
df["verse_heb"] = verse_heb

# Add the verb in Hebrew script
verbs_heb = []
for _, row in df.iterrows():
    verbs_heb.append(get_verb_heb(row.book, row.chapter, row.verse, row.bhsa_lex, **BHSA))
    
# Add the list as a new column
df["verb_heb"] = verbs_heb

# Add the verbal stem
verbs_stem = []
for _, row in df.iterrows():
    verbs_stem.append(get_verb_stem(row.book, row.chapter, row.verse, row.bhsa_lex, **BHSA))
    
df["verb_stem"] = verbs_stem

# Add the verbal tense
verbs_tense = []
for _, row in df.iterrows():
    verbs_tense.append(get_verb_tense(row.book, row.chapter, row.verse, row.bhsa_lex, **BHSA))
    
df["verb_tense"] = verbs_tense

# Add the phrase in Hebrew script
verbs_phrases_heb = []
for _, row in df.iterrows():
    verbs_phrases_heb.append(get_phrase_heb(row.book, row.chapter, row.verse, row.bhsa_lex, **BHSA))
    
df["verb_phrase_heb"] = verbs_phrases_heb

# Add the phrase (g_cons)
verbs_phrases = []
for _, row in df.iterrows():
    verbs_phrases.append(get_phrase(row.book, row.chapter, row.verse, row.bhsa_lex, **BHSA))
    
df["verb_phrase"] = verbs_phrases

# Add the clause in Hebrew script
verbs_clauses_heb = []
for _, row in df.iterrows():
    verbs_clauses_heb.append(get_clause_heb(row.book, row.chapter, row.verse, row.bhsa_lex, **BHSA))
    
df["verb_clause_heb"] = verbs_clauses_heb

# Add the clause (g_cons)
verbs_clauses = []
for _, row in df.iterrows():
    verbs_clauses.append(get_clause(row.book, row.chapter, row.verse, row.bhsa_lex, **BHSA))
    
df["verb_clause"] = verbs_clauses

In [83]:
# Reordering the columns
df = df[["bhsa_lex", "book", "chapter", "verse", "verb_heb", "verse_heb", "verb_stem", "verb_tense", "verb_phrase_heb", "verb_phrase", "verb_clause_heb", "verb_clause"]]
df

Unnamed: 0,bhsa_lex,book,chapter,verse,verb_heb,verse_heb,verb_stem,verb_tense,verb_phrase_heb,verb_phrase,verb_clause_heb,verb_clause
0,<WP[,Isaiah,6,6,יָּ֣עָף,וַיָּ֣עָף אֵלַ֗י אֶחָד֙ מִן־הַשְּׂרָפִ֔ים וּבְ...,qal,wayq,יָּ֣עָף,J<P,וַיָּ֣עָף אֵלַ֗י אֶחָד֙ מִן־הַשְּׂרָפִ֔ים,W J<P >LJ >XD MN H FRPJM
1,<LH[,Isaiah,7,1,עָלָ֣ה,וַיְהִ֡י בִּימֵ֣י אָ֠חָז בֶּן־יֹותָ֨ם בֶּן־עֻז...,qal,perf,עָלָ֣ה,<LH,עָלָ֣ה רְצִ֣ין מֶֽלֶךְ־אֲ֠רָם וּפֶ֨קַח בֶּן־רְ...,<LH RYJN MLK >RM W PQX BN RMLJHW MLK JFR>L JRW...
2,JY>[,Isaiah,7,3,צֵא־,וַיֹּ֣אמֶר יְהוָה֮ אֶֽל־יְשַׁעְיָהוּ֒ צֵא־נָא֙...,qal,impv,צֵא־,Y>,צֵא־נָא֙ אַתָּ֕ה וּשְׁאָ֖ר יָשׁ֣וּב בְּנֶ֑ךָ א...,Y> N> >TH W C>R_JCWB BNK >L QYH T<LT H BRKH H ...
3,JY>[,Isaiah,7,3,צֵא־,וַיֹּ֣אמֶר יְהוָה֮ אֶֽל־יְשַׁעְיָהוּ֒ צֵא־נָא֙...,qal,impv,צֵא־,Y>,צֵא־נָא֙ אַתָּ֕ה וּשְׁאָ֖ר יָשׁ֣וּב בְּנֶ֑ךָ א...,Y> N> >TH W C>R_JCWB BNK >L QYH T<LT H BRKH H ...
4,<LH[,Isaiah,7,6,נַעֲלֶ֤ה,נַעֲלֶ֤ה בִֽיהוּדָה֙ וּנְקִיצֶ֔נָּה וְנַבְקִעֶ...,qal,impf,נַעֲלֶ֤ה,N<LH,נַעֲלֶ֤ה בִֽיהוּדָה֙,N<LH B JHWDH
5,BW>[,Isaiah,7,24,יָ֣בֹוא,בַּחִצִּ֥ים וּבַקֶּ֖שֶׁת יָ֣בֹוא שָׁ֑מָּה כִּי...,qal,impf,יָ֣בֹוא,JBW>,בַּחִצִּ֥ים וּבַקֶּ֖שֶׁת יָ֣בֹוא שָׁ֑מָּה,B XYJM W B QCT JBW> CMH
6,BW>[,Isaiah,7,25,תָבֹ֣וא,וְכֹ֣ל הֶהָרִ֗ים אֲשֶׁ֤ר בַּמַּעְדֵּר֙ יֵעָ֣דֵ...,qal,impf,תָבֹ֣וא,TBW>,לֹֽא־תָבֹ֣וא שָׁ֔מָּה יִרְאַ֖ת שָׁמִ֣יר וָשָׁ֑...,L> TBW> CMH JR>T CMJR W CJT
7,<LH[,Isaiah,8,7,מַעֲלֶ֨ה,וְלָכֵ֡ן הִנֵּ֣ה אֲדֹנָי֩ מַעֲלֶ֨ה עֲלֵיהֶ֜ם א...,hif,ptca,מַעֲלֶ֨ה,M<LH,הִנֵּ֣ה אֲדֹנָי֩ מַעֲלֶ֨ה עֲלֵיהֶ֜ם אֶת־מֵ֣י ה...,HNH >DNJ M<LH <LJHM >T MJ H NHR H <YWMJM W H R...
8,BW>[,Isaiah,14,2,הֱבִיא֣וּם,וּלְקָח֣וּם עַמִּים֮ וֶהֱבִיא֣וּם אֶל־מְקֹומָם...,hif,perf,הֱבִיא֣וּם,HBJ>WM,וֶהֱבִיא֣וּם אֶל־מְקֹומָם֒,W HBJ>WM >L MQWMM
9,BW>[,Isaiah,16,12,בָ֧א,וְהָיָ֧ה כִֽי־נִרְאָ֛ה כִּֽי־נִלְאָ֥ה מֹואָ֖ב ...,qal,perf,בָ֧א,B>,וּבָ֧א אֶל־מִקְדָּשֹׁ֛ו,W B> >L MQDCW


In [84]:
# Retrieve subject and complement(s) of the verb

def get_subject(book, chapter, verse, lexeme, T, F, L):
    """Find the subject of a verb using the get_verb_id function"""
    verb = get_verb_id(book, chapter, verse, lexeme, T, F, L)
    clause = L.u(verb, "clause")[0]
    clause_phrases = L.d(clause, "phrase")
    subject = None
    for phrase in clause_phrases:
        if F.function.v(phrase) == "Subj":
            subject = phrase
            break
    return get_g_cons(subject)
                
    
def get_complement(book, chapter, verse, lexeme, T, F, L):
    """Find the subject of a verb using the get_verb_id function"""
    verb = get_verb_id(book, chapter, verse, lexeme, T, F, L)
    clause = L.u(verb, "clause")[0]
    clause_phrases = L.d(clause, "phrase")
    complement = None
    for phrase in clause_phrases:
        if F.function.v(phrase) == "Cmpl":
            complement = phrase
            break
    return get_g_cons(complement)

#     print("subject", subject, T.text(subject))
        
#     cmpls = [phrase for phrase in clause_phrases if F.function.v(phrase) == "Cmpl"]
        
#     print("Complement", cmpls, [T.text(cmpl) for cmpl in cmpls])

In [85]:
# TODO: Retrieve information about the complement(s)

In [86]:
# Testing area

# get_subject("Genesis", 8, 3, "HLK[", **BHSA)
# get_g_cons(3592)
get_complement("Genesis", 8, 3, "HLK[", **BHSA)

'M <L H >RY'

In [87]:
# Add subject and complement in the dataframe

# Add the subject in transcription
verb_subjects = []
for _, row in df.iterrows():
    verb_subjects.append(get_subject(row.book, row.chapter, row.verse, row.bhsa_lex, **BHSA))
    
df["subject"] = verb_subjects

# Add the subject in transcription
verb_complements = []
for _, row in df.iterrows():
    verb_complements.append(get_complement(row.book, row.chapter, row.verse, row.bhsa_lex, **BHSA))
    
df["complement"] = verb_complements

In [88]:
df

Unnamed: 0,bhsa_lex,book,chapter,verse,verb_heb,verse_heb,verb_stem,verb_tense,verb_phrase_heb,verb_phrase,verb_clause_heb,verb_clause,subject,complement
0,<WP[,Isaiah,6,6,יָּ֣עָף,וַיָּ֣עָף אֵלַ֗י אֶחָד֙ מִן־הַשְּׂרָפִ֔ים וּבְ...,qal,wayq,יָּ֣עָף,J<P,וַיָּ֣עָף אֵלַ֗י אֶחָד֙ מִן־הַשְּׂרָפִ֔ים,W J<P >LJ >XD MN H FRPJM,>XD MN H FRPJM,>LJ
1,<LH[,Isaiah,7,1,עָלָ֣ה,וַיְהִ֡י בִּימֵ֣י אָ֠חָז בֶּן־יֹותָ֨ם בֶּן־עֻז...,qal,perf,עָלָ֣ה,<LH,עָלָ֣ה רְצִ֣ין מֶֽלֶךְ־אֲ֠רָם וּפֶ֨קַח בֶּן־רְ...,<LH RYJN MLK >RM W PQX BN RMLJHW MLK JFR>L JRW...,RYJN MLK >RM W PQX BN RMLJHW MLK JFR>L,JRWCLM
2,JY>[,Isaiah,7,3,צֵא־,וַיֹּ֣אמֶר יְהוָה֮ אֶֽל־יְשַׁעְיָהוּ֒ צֵא־נָא֙...,qal,impv,צֵא־,Y>,צֵא־נָא֙ אַתָּ֕ה וּשְׁאָ֖ר יָשׁ֣וּב בְּנֶ֑ךָ א...,Y> N> >TH W C>R_JCWB BNK >L QYH T<LT H BRKH H ...,>TH W C>R_JCWB BNK,
3,JY>[,Isaiah,7,3,צֵא־,וַיֹּ֣אמֶר יְהוָה֮ אֶֽל־יְשַׁעְיָהוּ֒ צֵא־נָא֙...,qal,impv,צֵא־,Y>,צֵא־נָא֙ אַתָּ֕ה וּשְׁאָ֖ר יָשׁ֣וּב בְּנֶ֑ךָ א...,Y> N> >TH W C>R_JCWB BNK >L QYH T<LT H BRKH H ...,>TH W C>R_JCWB BNK,
4,<LH[,Isaiah,7,6,נַעֲלֶ֤ה,נַעֲלֶ֤ה בִֽיהוּדָה֙ וּנְקִיצֶ֔נָּה וְנַבְקִעֶ...,qal,impf,נַעֲלֶ֤ה,N<LH,נַעֲלֶ֤ה בִֽיהוּדָה֙,N<LH B JHWDH,,B JHWDH
5,BW>[,Isaiah,7,24,יָ֣בֹוא,בַּחִצִּ֥ים וּבַקֶּ֖שֶׁת יָ֣בֹוא שָׁ֑מָּה כִּי...,qal,impf,יָ֣בֹוא,JBW>,בַּחִצִּ֥ים וּבַקֶּ֖שֶׁת יָ֣בֹוא שָׁ֑מָּה,B XYJM W B QCT JBW> CMH,,CMH
6,BW>[,Isaiah,7,25,תָבֹ֣וא,וְכֹ֣ל הֶהָרִ֗ים אֲשֶׁ֤ר בַּמַּעְדֵּר֙ יֵעָ֣דֵ...,qal,impf,תָבֹ֣וא,TBW>,לֹֽא־תָבֹ֣וא שָׁ֔מָּה יִרְאַ֖ת שָׁמִ֣יר וָשָׁ֑...,L> TBW> CMH JR>T CMJR W CJT,,CMH
7,<LH[,Isaiah,8,7,מַעֲלֶ֨ה,וְלָכֵ֡ן הִנֵּ֣ה אֲדֹנָי֩ מַעֲלֶ֨ה עֲלֵיהֶ֜ם א...,hif,ptca,מַעֲלֶ֨ה,M<LH,הִנֵּ֣ה אֲדֹנָי֩ מַעֲלֶ֨ה עֲלֵיהֶ֜ם אֶת־מֵ֣י ה...,HNH >DNJ M<LH <LJHM >T MJ H NHR H <YWMJM W H R...,>DNJ,<LJHM
8,BW>[,Isaiah,14,2,הֱבִיא֣וּם,וּלְקָח֣וּם עַמִּים֮ וֶהֱבִיא֣וּם אֶל־מְקֹומָם...,hif,perf,הֱבִיא֣וּם,HBJ>WM,וֶהֱבִיא֣וּם אֶל־מְקֹומָם֒,W HBJ>WM >L MQWMM,,>L MQWMM
9,BW>[,Isaiah,16,12,בָ֧א,וְהָיָ֧ה כִֽי־נִרְאָ֛ה כִּֽי־נִלְאָ֥ה מֹואָ֖ב ...,qal,perf,בָ֧א,B>,וּבָ֧א אֶל־מִקְדָּשֹׁ֛ו,W B> >L MQDCW,,>L MQDCW


In [90]:
df.to_csv("mt_isaiah_df.csv", sep=";")