In [6]:
from munch import Munch
from tf.app import use

In [8]:
B = use("etcbc/dss", hoist=globals())
DSS = Munch({"F": F, "L": L, "T": T, "name": "DSS", "A": B})

A = use("etcbc/bhsa", hoist=globals())
BHSA = Munch({"F": F, "L": L, "T": T, "name": "BHSA", "A": A})

**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
scroll,1001,1428.81,100
lex,10450,129.14,94
fragment,11182,127.91,100
line,52895,27.04,100
clause,125,12.85,0
cluster,101099,6.68,47
phrase,315,5.1,0
word,500995,2.81,99
sign,1430241,1.0,100


**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
book,39,10938.21,100
chapter,929,459.19,100
lex,9230,46.22,100
verse,23213,18.38,100
half_verse,45179,9.44,100
sentence,63717,6.7,100
sentence_atom,64514,6.61,100
clause,88131,4.84,100
clause_atom,90704,4.7,100
phrase,253203,1.68,100


In [9]:
del F, L, T

In [10]:
%load_ext autoreload
%autoreload 2

In [11]:
import os
import collections
from itertools import chain
from collections import defaultdict

In [12]:
dss_sections = {}
for word in DSS.F.otype.s("word"):    
    scroll = DSS.T.scrollName(DSS.L.u(word, "scroll")[0])
    book = DSS.F.book_etcbc.v(word)    
    chapter = DSS.F.chapter.v(word)
    verse = DSS.F.verse.v(word)    
    if None in (scroll, book, chapter, verse):
        continue
    section = (book, chapter, verse)
    dss_sections.setdefault(section, {}).setdefault(scroll, []).append(word)

In [15]:
def remove_duplicates(iterable):
    return list(dict.fromkeys(iterable))


class TFOb:
    def __init__(self, ids, source):
        if type(ids) is int:
            ids = [ids]
        self.ids = remove_duplicates(ids)
        self.source = source
        self._levels = ["to_" + otype + "s" for otype in source.F.otype.all]
        
    @classmethod
    def all(self, level, source):
        return TFOb(list(source.F.otype.s(level)), source)
        
    @classmethod
    def section(self, section, source, scroll=None):
        if source.name == "BHSA":
            return TFOb(source.T.nodeFromSection(section), source)
        
        section = (section[0], str(section[1]), str(section[2]))
        dss_section = dss_sections[section]
        if scroll is None:
            scroll = list(dss_section.keys())[0]
        return TFOb(dss_section[scroll], source)
        
    def __getattr__(self, attr):
        if attr in self._levels:
            level = "to_" + self.level + "s"
            if self.level == "none":
                return self
            level_index = self._levels.index(level)
            new_level_index = self._levels.index(attr)
            if new_level_index > level_index:
                return self.down(attr[3:-1])
            elif new_level_index < level_index:
                return self.up(attr[3:-1])
            else:
                return self
            
        if self.source.name == "DSS" and attr == "lex":
            attr = "lex_etcbc"
            
        feature = getattr(self.source.F, attr)
        return [getattr(self.source.F, attr).v(id_) for id_ in self.ids]
    
    def copy(self):
        return TFOb(self.ids.copy(), source)

    def up(self, otype=None):
        if self.level in (otype, "none"):
            return self
        return TFOb(
            chain(*[self.source.L.u(id_, otype) for id_ in self.ids]),
            source=self.source,
        )

    def down(self, otype=None):
        if self.level in (otype, "none"):
            return self
        return TFOb(
            chain(*[self.source.L.d(id_, otype) for id_ in self.ids]),
            source=self.source,
        )

    def filter(self, **kwargs):
        ids = []
        for id_ in self.ids:
            for key, value in kwargs.items():
                if key == "lex" and self.source.name == "DSS":
                    key = "lex_etcbc"
                if getattr(self.source.F, key).v(id_) != value:
                    break
            else:
                ids.append(id_)
        return TFOb(ids, source=self.source)

    def first(self, **kwargs):
        ids = []
        for id_ in self.ids:
            for key, value in kwargs.items():
                if key == "lex" and self.source.name == "DSS":
                    key = "lex_etcbc"
                if getattr(self.source.F, key).v(id_) != value:
                    break
            else:
                return TFOb(id_, source=self.source)
        return TFOb([], source=self.source)
            
    def __getitem__(self, i):
        return TFOb(self.ids[i], source=self.source)
    
    def __len__(self):
        return len(self.ids)
    
    @property
    def text(self):
        return self.source.T.text(self.ids)
    
    def str(self, word_limit=None):
        if self.level not in ("word", "none"):
            return str(self.down("word"))
        if word_limit is not None and len(self) > word_limit:
            return self[:word_limit // 2].str() + " [...] " + self[-word_limit // 2:].str()
        else:
            return " ".join([g_cons for g_cons in self.g_cons if g_cons])
    
    def __str__(self):
        return str()
    
    def __dir__(self):
        return list(self.__dict__.keys()) + dir(self.source.F) + self._levels
    
    def __add__(self, ob):
        return TFOb(self.ids + ob.ids, source=self.source)

    @property
    def level(self):
        if len(self.ids) > 0:
            return self.otype[0]
        return "none"
    
    def pretty(self, extraFeatures=("sp", "function")):
        if len(self) == 0:
            return
        levels = self.source.F.otype.all
        ob = self
        level_index = levels.index(ob.level)
        while len(ob) != 1:
            level_index -= 1
            ob = self.up(levels[level_index])
        self.source.A.pretty(ob.ids[0], extraFeatures=extraFeatures)
    
    def __repr__(self):
        level = self.level
        if level != "none":
            level += "s"
            
        return f'<{self.level}_{len(self)} "{self.str(40)}">'

In [18]:
verbs = TFOb.all("word", BHSA).filter(lex="BW>[")

In [21]:
verbs[0].ids

[1030]

In [46]:
ob = TFOb.section(["Isaiah", 6, 6], BHSA).to_words
ob.pretty()

In [310]:
ob + ob

<word_18 "W J<P >LJ >XD MN H FRPJM W B JDW RYPH B MLQXJM LQX M <L H MZBX">

In [47]:
TFOb.section(["Isaiah", 6, 6], BHSA).pretty()

In [1]:
TFOb.section(["Isaiah", 6, 6], BHSA).to_words

NameError: name 'TFOb' is not defined

In [50]:
TFOb.section(["Genesis", 1, 1], BHSA).to_words.first(lex="BR>[").to_clauses.to_phrases.first(function="Subj")

<phrase_1 ">LHJM">

In [52]:
verb = TFOb.section(["Genesis", 1, 1], BHSA).to_words.first(lex="BR>[")
verb.to_clauses.to_phrases.first(function="Subj")

<phrase_1 ">LHJM">

In [54]:
verb.pretty()

In [None]:
verb = TFOb.section(["Genesis", 1, 1], DSS).down("word").first(lex="BR>[")
verb.up("clause").down("phrase").first(function="Subj")

In [None]:
TFOb.section(["Genesis", 1, 1], BHSA).down("word").first(lex="BR>[")

In [None]:
TFOb.section(["Genesis", 1, 1], BHSA).down("word").lex

In [None]:
TFOb.section(["Isaiah", 20, 6], DSS)

In [None]:
TFOb.section(["Isaiah", 6, 6], BHSA)

In [None]:
TFOb.section(["Isaiah", 6, 6], BHSA).down("word").lex

In [None]:
word = TFOb(1033, DSS)
word

In [None]:
TFOb(1, BHSA).up("clause").down("phrase").first(function="Subj")

In [None]:
word.up("clause").down("phrase")

In [None]:
word.up("clause").down("phrase")[0]

In [None]:
clause.down("phrase").filter(function="toto")

In [None]:
phrases.function

In [None]:
def verse_lex(section):
    verse = (T.nodeFromSection(section))
    return [F.lex.v(w) for w in L.d(verse, "word")]

In [None]:
verse_lex(("Genesis", 12, 6))

In [None]:
A.indent(reset=True)
A.info("Counting nodes ...")

i = 0
for n in N.walk():
    i += 1

A.info("{} nodes".format(i))

In [None]:
F.otype.slotType

In [None]:
A.indent(reset=True)
A.info("counting objects ...")

for otype in F.otype.all:
    i = 0

    A.indent(level=1, reset=True)

    for n in F.otype.s(otype):
        i += 1

    A.info("{:>7} {}s".format(i, otype))

A.indent(level=0)
A.info("Done")

In [None]:
verbs = collections.Counter()
A.indent(reset=True)
A.info("Collecting data")

lines = []
n = 0
for w in N.walk():
    # for w in F.otype.s("lex"):
    if F.lex.v(w) == "BW>[" and F.sp.v(w) == "verb" and F.otype.v(w) == "word":
        n += 1
        if n <= 3:
            continue
        section = T.sectionFromNode(w)
        lines.append(
            {
                "node": w,
                "tense": F.vt.v(w),
                "stem": F.st.v(w),
                "book": section[0],
                "chapter": section[1],
                "verse": section[2],
            }
        )
        verse = L.u(w, "verse")
        print("verse", verse, T.text(verse))
        
        clause = L.u(w, "clause")[0]
        print("clause", clause, T.text(clause))
        sentence = L.u(w, "sentence")
        print("sentence", sentence, T.text(sentence))

        clause_phrases = L.d(clause, "phrase")
        print("clause_phrases", clause_phrases)
        
        subject = None
        for phrase in clause_phrases:
            if F.function.v(phrase) == "Subj":
                subject = phrase
                break
                
        print("subject", subject, T.text(subject))
        
        cmpls = [phrase for phrase in clause_phrases if F.function.v(phrase) == "Cmpl"]
        
        print("Complement", cmpls, [T.text(cmpl) for cmpl in cmpls])
 
            
        for word in clause_words:
            print(F.sp.v(word))
        # L.d(clause, sp="verb")
        # print("subject", subject, T.text(subject))
        break
        # print(T.sectionFromNode(w))
        # print("book", F.book.v(w))
        # print(F.lex_utf8.v(w))
        # print(F.vt.v(w))
        # print(F.vs.v(w))

A.info("Done")

In [None]:
F.lex

In [None]:
import pandas as pd

In [None]:
df = pd.DataFrame(lines)
df

In [None]:
df.to_csv("BO.csv", index=False)

In [None]:
Fall()

In [None]:
for name in Fall():
    print(name, getattr(F, name).v(1033))

In [None]:
F.st.v(935)

In [None]:
getattr(F, "st").v(935)

In [None]:
for name in Fall():
    print(name, getattr(F, name).v(1437760))
    print(name, getattr(F, name).v(935))