In [None]:
from munch import Munch
from tf.app import use
import pandas as pd

In [None]:
B = use("etcbc/dss", hoist=globals())
DSS = Munch({"F": F, "L": L, "T": T, "name": "DSS", "A": B})

A = use("etcbc/bhsa", hoist=globals())
BHSA = Munch({"F": F, "L": L, "T": T, "name": "BHSA", "A": A})

In [None]:
del F, L, T

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import collections
from itertools import chain
from collections import defaultdict

In [None]:
dss_sections = {}
for word in DSS.F.otype.s("word"):    
    scroll = DSS.T.scrollName(DSS.L.u(word, "scroll")[0])
    book = DSS.F.book_etcbc.v(word)    
    chapter = DSS.F.chapter.v(word)
    verse = DSS.F.verse.v(word)    
    if None in (scroll, book, chapter, verse):
        continue
    section = (book, chapter, verse)
    dss_sections.setdefault(section, {}).setdefault(scroll, []).append(word)

In [None]:
def remove_duplicates(iterable):
    return list(dict.fromkeys(iterable))


class TFOb:
    def __init__(self, ids, source):
        if type(ids) is int:
            ids = [ids]
        self.ids = remove_duplicates(ids)
        self.source = source
        self._levels = ["to_" + otype + "s" for otype in source.F.otype.all]
        
    @classmethod
    def all(self, level, source):
        return TFOb(list(source.F.otype.s(level)), source)
        
    @classmethod
    def section(self, section, source, scroll=None):
        if source.name == "BHSA":
            return TFOb(source.T.nodeFromSection(section), source)
        
        section = (section[0], str(section[1]), str(section[2]))
        dss_section = dss_sections[section]
        if scroll is None:
            scroll = list(dss_section.keys())[0]
        return TFOb(dss_section[scroll], source)
        
    def __getattr__(self, attr):
        if attr in self._levels:
            level = "to_" + self.level + "s"
            if self.level == "none":
                return self
            level_index = self._levels.index(level)
            new_level_index = self._levels.index(attr)
            if new_level_index > level_index:
                return self.down(attr[3:-1])
            elif new_level_index < level_index:
                return self.up(attr[3:-1])
            else:
                return self
            
        if self.source.name == "DSS" and attr == "lex":
            attr = "lex_etcbc"
            
        feature = getattr(self.source.F, attr)
        return [getattr(self.source.F, attr).v(id_) for id_ in self.ids]
    
    def copy(self):
        return TFOb(self.ids.copy(), source)

    def up(self, otype=None):
        if self.level in (otype, "none"):
            return self
        return TFOb(
            chain(*[self.source.L.u(id_, otype) for id_ in self.ids]),
            source=self.source,
        )

    def down(self, otype=None):
        if self.level in (otype, "none"):
            return self
        return TFOb(
            chain(*[self.source.L.d(id_, otype) for id_ in self.ids]),
            source=self.source,
        )

    def filter(self, **kwargs):
        ids = []
        for id_ in self.ids:
            for key, value in kwargs.items():
                if key == "lex" and self.source.name == "DSS":
                    key = "lex_etcbc"
                if getattr(self.source.F, key).v(id_) != value:
                    break
            else:
                ids.append(id_)
        return TFOb(ids, source=self.source)

    def first(self, **kwargs):
        ids = []
        for id_ in self.ids:
            for key, value in kwargs.items():
                if key == "lex" and self.source.name == "DSS":
                    key = "lex_etcbc"
                if getattr(self.source.F, key).v(id_) != value:
                    break
            else:
                return TFOb(id_, source=self.source)
        return TFOb([], source=self.source)
            
    def __getitem__(self, i):
        return TFOb(self.ids[i], source=self.source)
    
    def __len__(self):
        return len(self.ids)
    
    @property
    def text(self):
        return self.source.T.text(self.ids)
    
    def str(self, word_limit=None):
        if self.level not in ("word", "none"):
            return str(self.down("word"))
        if word_limit is not None and len(self) > word_limit:
            return self[:word_limit // 2].str() + " [...] " + self[-word_limit // 2:].str()
        else:
            return " ".join([g_cons for g_cons in self.g_cons if g_cons])
    
    def __str__(self):
        return str()
    
    def __dir__(self):
        return list(self.__dict__.keys()) + dir(self.source.F) + self._levels
    
    def __add__(self, ob):
        return TFOb(self.ids + ob.ids, source=self.source)

    @property
    def level(self):
        if len(self.ids) > 0:
            return self.otype[0]
        return "none"
    
    def pretty(self, extraFeatures=("sp", "function")):
        if len(self) == 0:
            return
        levels = self.source.F.otype.all
        ob = self
        level_index = levels.index(ob.level)
        while len(ob) != 1:
            level_index -= 1
            ob = self.up(levels[level_index])
        self.source.A.pretty(ob.ids[0], extraFeatures=extraFeatures)
    
    def __repr__(self):
        level = self.level
        if level != "none":
            level += "s"
            
        return f'<{self.level}_{len(self)} "{self.str(40)}">'

In [None]:
df = pd.read_csv("mt_isa_df_starter.csv", sep=";")

In [None]:
verb = TFOb.section(["Isaiah", 7, 6], BHSA).to_words.first(lex="<LH[")
complement = verb.to_clauses.to_phrases.filter(function="Cmpl")
preposition = complement.to_words.filter(sp="prep")

In [None]:
def get_verb(section, lexeme, source):
    return TFOb.section(section, source).to_words.first(lex=lexeme)

def get_verb_stem(section, lexeme, source):
    verb = get_verb(section, lexeme, source)
    return "".join(verb.vs)

def get_verb_tense(section, lexeme, source):
    verb = get_verb(section, lexeme, source)
    return "".join(verb.vt) 

def get_verse(section, lexeme, source):
    return TFOb.section(section, source)   
    
def get_clause(section, lexeme, source):
    verb = get_verb(section, lexeme, source)
    return verb.to_clauses

def get_phrase(section, lexeme, source):
    verb = get_verb(section, lexeme, source)
    return verb.to_phrases

def get_complements(section, lexeme, source):
    verb = get_verb(section, lexeme, source)
    phrases = verb.to_clauses.to_phrases
    return phrases.filter(function="Cmpl") + phrases.filter(function="Loca")

def get_prepositions(section, lexeme, source):
    complements = get_complements(section, lexeme, source)
    prepositions = []
    for complement in complements:
        prepositions.append(complement.to_words.filter(sp="prep"))
    return prepositions

def get_subject(section, lexeme, source):
    verb = get_verb(section, lexeme, source)
    return verb.to_clauses.to_phrases.filter(function="Subj")

def get_verse_lex(section, lexeme, source):
    verse = get_verse(section, lexeme, source)
    return " ".join(verse.to_words.lex)
    

In [None]:
# df
columns_functions = {
    "verb_heb": lambda *args: get_verb(*args).text, 
    "verse_g_cons": get_verse,
    "verse_heb": lambda *args: get_verse(*args).text, 
    "verb_stem": get_verb_stem, 
    "verb_tense": get_verb_tense,
    "verb_phrase": get_phrase, 
    "verb_phrase_heb": lambda *args: get_phrase(*args).text, 
    "verb_clause": get_clause, 
    "verb_clause_heb": lambda *args: get_clause(*args).text,
    "subject": get_subject, 
    "subj_heb": lambda *args: get_subject(*args).text,
    "complement": get_complements,
    "preposition": get_prepositions,

}

items = []

bhsa_df = df.copy()

DSS_1Qisaa = DSS.copy()
DSS_1Qisaa.scroll = "1Qisaa"
previous_item = None

for _, row in df.iterrows():
    for source in (BHSA, DSS_1Qisaa):
        item = row.to_dict()
        args = ([row.book, row.chapter, row.verse], row.bhsa_lex, source)
    
        for name, function in columns_functions.items():
            ob = function(*args)
            if name in ("complement", "preposition"):
                for i, element in enumerate(ob):
                    item[f"{name}_{i + 1}"] = str(element)
            else:
                item[name] = str(ob)
                
        item["scroll"] = source.get("scroll", "MT")
        items.append(item)
        previous_item = item
        previous_verse = get_verse(*args)

new_dataframe = pd.DataFrame(items).fillna("")
new_dataframe

In [None]:
new_dataframe = new_dataframe[["bhsa_lex", "book", "chapter", "verse", "scroll", "verb_heb", "verse_g_cons","verse_heb", "verb_stem", "verb_tense", "verb_phrase", "verb_phrase_heb", "verb_clause", "verb_clause_heb", "subject", "subj_heb", "complement_1", "preposition_1", "complement_2", "preposition_2", "complement_3", "preposition_3"]]
new_dataframe

In [None]:
new_dataframe.to_csv("new_dataframe.csv", sep=";", index=False, encoding="utf8")

In [None]:
# see all features of a node (here node 1030)
Fall(1030)