In [None]:
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
from munch import Munch

In [None]:
from tf.app import use

B = use("etcbc/dss", checkout="clone", version="1.8", hoist=globals())
Fdss = F
Ldss = L
Tdss = T
DSS = Munch({"F": F, "L": L, "T": T, "name": "DSS"})

A = use("etcbc/bhsa", hoist=globals())
Fmt = F
Lmt = L
Tmt = T
BHSA = Munch({"F": F, "L": L, "T": T, "name": "BHSA"})

In [None]:
del F, L, T

In [None]:
df = pd.read_csv("mt_isa_df_starter.csv", sep=";")

In [None]:
# Create a dictionary containing sections (book, chapter, verse) from the DSS
# Each section is the key to another dictionary containing: 
# the name of the scroll (key) and the words of the verse (argument)
# NB: all the items in sections are strings

# Example
# {('Genesis','24','24'): {'1Q1': [1890183, 1890184, ...]},...}

dss_sections = {}

for word in DSS.F.otype.s("word"):
    scroll = DSS.T.scrollName(DSS.L.u(word, "scroll")[0])
    book = DSS.F.book_etcbc.v(word)
    chapter = DSS.F.chapter.v(word)
    verse = DSS.F.verse.v(word)
    if None in (scroll, book, chapter, verse):
        continue
    section = (book, chapter, verse)
    dss_sections.setdefault(section, {}).setdefault(scroll, []).append(word)

In [None]:
def section_words(section, source, scroll=None):
    """Return the words of a verse found with section, source (and scroll)"""
    if source.name == "BHSA":
        verse_id = source.T.nodeFromSection(section)
        return source.L.d(verse_id, "word")
    elif source.name == "DSS":
        section = (section[0], str(section[1]), str(section[2]))
        return dss_sections[section][scroll]
    else:
        assert False, f"Invalid source {source.name}. The source should be BSHA or DSS. " 

In [None]:
#Test area

#  dss_sections
# section_words(["Isaiah", "1", "1"], DSS, "1Qisaa")
# section_words(["Isaiah", 20, 6], BHSA)

In [None]:
# retrieve the lexeme from a section (book, chapter, verse)

def get_verse_heb(section, source, scroll=None): 
    """Return the verse in Hebrew script"""
    words = section_words(section, source, scroll)
    return source.T.text(words)

In [None]:
# Test area
# get_verse_heb(["Isaiah", 20, 6], DSS, "1Qisaa")
# get_verse_heb(["Isaiah", 20, 6], BHSA)

In [None]:
# How to retrieve information from a dataframe

# verse_heb = []

### method 1

# for book, chapter, verse in sections:
#     verse_heb.append(get_verse_heb(book, chapter, verse))
    
# method 2
# for section in sections:
#     verse_heb.append(get_verse_heb(*section)) # *list is like calling the function with each element of the list one by one

### method 3: running through the columns to get book chapter verse
# for section in zip(df.book, df.chapter, df.verse):
#     verse_heb.append(get_verse_heb(*section))

### method 4: line by line with index (_) 
# in order to call the function from either BHSA or DSS, use **origin (BHSA or DSS) at the end

# for _, row in df.iterrows():
#     verse_heb.append(get_verse_heb(row.book, row.chapter, row.verse, **BHSA))
    
# # add the list as a new column
# df["verse_heb"] = verse_heb

In [None]:
# Get the consonantal representation (g_cons) of any thing

def get_g_cons(etcbc_id, source):
    """Retrieve the consonantal representation for an id of any type"""
    if etcbc_id is None:
        return ""
    elif source.F.otype.v(etcbc_id) == "word":
        words = [etcbc_id]
    else:
        words = source.L.d(etcbc_id, "word")
    return " ".join([source.F.g_cons.v(word) for word in words])

In [None]:
# Retrieve verb info

def get_verb_id(section, lexeme, source, scroll=None):
    """Returns the verb's etcbc id"""
    verb = None
    for word in section_words(section, source, scroll):
        if source.name == "BHSA":
            if source.F.lex.v(word) == lexeme:
                verb = word
                break
        else:
            if source.F.lex_etcbc.v(word) == lexeme:
                verb = word
                break                           
    if verb is None:
        print(f"verb {lexeme} not found in {section, scroll}")
    return verb


def get_verb_heb(section, lexeme, source, scroll=None):
    """Returns the verb in Hebrew script"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    return source.T.text(verb_id)
     
def get_verb_stem(section, lexeme, source, scroll=None):
    """Retrieves the verbal stem of a verb"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    if source.name == "BHSA":
        return source.F.vs.v(verb_id)
    else:
        return source.F.vs_etcbc.v(verb_id)

def get_verb_tense(section, lexeme, source, scroll=None):
    """Retrieve the verbal tense of a verb"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    if source.name == "BHSA":
        return source.F.vt.v(verb_id)
    else:
        return source.F.vt_etcbc.v(verb_id)

In [None]:
# Retrieve the phrase and the clause

def get_phrase_heb(section, lexeme, source, scroll=None):
    """Retrieve the phrase in Hebrew script using the get_verb_id function"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    return source.T.text(source.L.u(verb_id, "phrase"))
        

def get_phrase(section, lexeme, source, scroll=None):
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    phrases = source.L.u(verb_id, "phrase")
    if len(phrases) == 0:
        return ""
    phrase_words = source.L.d(phrases[0], "word") 
    return " ".join([source.F.g_cons.v(word) for word in phrase_words])

def get_clause_heb(section, lexeme, source, scroll=None):
    """Retrieve the clause in Hebrew script using the get_verb_id function"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    return source.T.text(source.L.u(verb_id, "clause"))

def get_clause(section, lexeme, source, scroll=None):
    """Retrieve the clause in ETCBC transcription using the get_verb_id function"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    clauses = source.L.u(verb_id, "clause")
    if len(clauses) == 0:
        return ""
    clause_words = source.L.d(clauses[0], "word") 
    return " ".join([source.F.g_cons.v(word) for word in clause_words])

In [None]:
# Testing area

# get_verb_heb(["Isaiah", 6, 6], "<WP[", DSS, "1Qisaa") 

# get_phrase_heb(["Isaiah", 7, 1], "<LH[", DSS, "1Qisaa")

# get_verb_id(["Isaiah", 20, 6], "NWS[", DSS, "1Qisaa")

# get_verb_stem(["Isaiah", 6, 6], "<WP[", DSS, "1Qisaa")

# get_verb_tense(["Isaiah", 6, 6], "<WP[", BHSA)

# get_clause_heb(["Isaiah", 7, 1], "<LH[", DSS, "1Qisaa")

# get_phrase(["Isaiah", 7, 1], "<LH[", DSS, "1Qisaa")

# get_clause(["Isaiah", 7, 1], "<LH[", DSS, "1Qisaa")

In [None]:
# Retrieve subject and complement(s) of the verb

def get_subject(section, lexeme, source, scroll=None):
    """Find the subject of a verb using the get_verb_id function"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    clauses = source.L.u(verb_id, "clause")
    if len(clauses) == 0:
        return ""
    clause_phrases = source.L.d(clauses[0], "phrase")
    subject = None
    for phrase in clause_phrases:
        if source.F.function.v(phrase) == "Subj":
            subject = phrase
            break
    return get_g_cons(subject, source)               
    
def get_complement(section, lexeme, source, scroll=None):
    """Find the subject of a verb using the get_verb_id function"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    clauses = source.L.u(verb_id, "clause")
    if len(clauses) == 0:
        return ""
    clause_phrases = source.L.d(clauses[0], "phrase")
    complement = None
    for phrase in clause_phrases:
        if source.F.function.v(phrase) == "Cmpl":
            complement = phrase
            break
    return get_g_cons(complement, source)

In [None]:
# Add columns for verse in Hebrew, verb in Hebrew, verbal stem, verbal tense, phrase, clause,

dfs = []
source_df = df

for source, scroll in ((BHSA, None), (DSS, "1Qisaa")):
    df = source_df.copy()
    
    if scroll is None:
        df["scroll"] = "MT"
    else:
        df["scroll"] = scroll
    
    #Add the verse in Hebrew script
    verse_heb = []
    for _, row in df.iterrows():
        verse_heb.append(get_verse_heb([row.book, row.chapter, row.verse], source, scroll))

    # add the list as a new column
    df["verse_heb"] = verse_heb

    # Add the verb in Hebrew script
    verbs_heb = []
    for _, row in df.iterrows():
        verbs_heb.append(get_verb_heb([row.book, row.chapter, row.verse], row.bhsa_lex, source, scroll))

    # Add the list as a new column
    df["verb_heb"] = verbs_heb

    # Add the verbal stem
    verbs_stem = []
    for _, row in df.iterrows():
        verbs_stem.append(get_verb_stem([row.book, row.chapter, row.verse], row.bhsa_lex, source, scroll))

    df["verb_stem"] = verbs_stem

    # Add the verbal tense
    verbs_tense = []
    for _, row in df.iterrows():
        verbs_tense.append(get_verb_tense([row.book, row.chapter, row.verse], row.bhsa_lex, source, scroll))

    df["verb_tense"] = verbs_tense

    # Add the phrase in Hebrew script
    verbs_phrases_heb = []
    for _, row in df.iterrows():
        verbs_phrases_heb.append(get_phrase_heb([row.book, row.chapter, row.verse], row.bhsa_lex, source, scroll))

    df["verb_phrase_heb"] = verbs_phrases_heb

    # Add the phrase (g_cons)
    verbs_phrases = []
    for _, row in df.iterrows():
        verbs_phrases.append(get_phrase([row.book, row.chapter, row.verse], row.bhsa_lex, source, scroll))

    df["verb_phrase"] = verbs_phrases

    # Add the clause in Hebrew script
    verbs_clauses_heb = []
    for _, row in df.iterrows():
        verbs_clauses_heb.append(get_clause_heb([row.book, row.chapter, row.verse], row.bhsa_lex, source, scroll))

    df["verb_clause_heb"] = verbs_clauses_heb

    # Add the clause (g_cons)
    verbs_clauses = []
    for _, row in df.iterrows():
        verbs_clauses.append(get_clause([row.book, row.chapter, row.verse], row.bhsa_lex, source, scroll))

    df["verb_clause"] = verbs_clauses
    
    # Add the subject in transcription
    verb_subjects = []
    for _, row in df.iterrows():
        verb_subjects.append(get_subject([row.book, row.chapter, row.verse], row.bhsa_lex, BHSA))

    df["subject"] = verb_subjects

    # Add the subject in transcription
    verb_complements = []
    for _, row in df.iterrows():
        verb_complements.append(get_complement([row.book, row.chapter, row.verse], row.bhsa_lex, BHSA))

    df["complement"] = verb_complements

    dfs.append(df)

final_df = pd.concat(dfs, ignore_index=True)


In [None]:
# Printing the final_df

# final_df

In [None]:
# Reordering the columns
final_df = final_df[["bhsa_lex", "book", "chapter", "verse", "scroll", "verb_heb", "verse_heb", "verb_stem", "verb_tense", "verb_phrase_heb", "verb_phrase", "verb_clause_heb", "verb_clause", "subject", "complement"]]
final_df

In [None]:
# Testing area
# get_verb_id(["Isaiah", 7, 1], "<LH[", DSS, "1Qisaa")
# get_subject("Genesis", 8, 3, "HLK[", **BHSA)
# get_g_cons(3592)
# get_complement(["Genesis", 8, 3], "HLK[", BHSA)
# get_subject(["Isaiah", 7, 1], "<LH[", DSS, "1Qisaa")

In [None]:
# Save to csv file

final_df.to_csv("mt_and_dss_isaiah_df.csv", sep=";", index=False)

In [None]:
# TODO: Retrieve information about the complement(s)

# create a function to distinguish complements with/without prepositions

verb_complements = {}

def get_cmpl_type(section, lexeme, source, scroll=None):
    """Find the subject of a verb using the get_verb_id function"""
    verb_id = get_verb_id(section, lexeme, source, scroll)
    if verb_id is None:
        return ""
    clauses = source.L.u(verb_id, "clause")
    if len(clauses) == 0:
        return ""
    clause_phrases = source.L.d(clauses[0], "phrase")
    complement = None
    for phrase in clause_phrases:
        if source.F.function.v(phrase) == "Cmpl":
            complement = phrase
            complement_words = source.L.d(phrase, "word")
            preposition = None
            for word in complement_words:
                if source.F.sp.v(word) == "prep":
                    preposition = word
                    verb_complements[get_g_cons(verb_id, source)] = (get_g_cons(complement, source),get_g_cons(preposition, source))
                    break
    return verb_complements

In [None]:
get_cmpl_type(["Isaiah", 7, 6], "<LH[", BHSA)

In [None]:
get_complement(["Isaiah", 7, 6], "<LH[", BHSA)

In [None]:
for feature in Fall():
    print(feature, getattr(BHSA.F, feature).v(214369))