In [None]:
import os.path
import pickle
from lxml import etree
import markdown as md
import anoi
from anoi import loaders, wordnet as wn
from anoi.facade import get_facade
import spacy

In [None]:
if os.path.exists('space.pkl'):
    with open('space.pkl', 'rb') as pkl_fp:
        space = pickle.load(pkl_fp)
    facade = get_facade(space)
    wordnet_namespace = facade.namespace
else:
    facade = get_facade()
    wordnet_namespace = facade.namespace
    with open('space.pkl', 'wb') as pkl_fp:
        pickle.dump(wordnet_namespace.space, pkl_fp)

In [None]:
article_namespace = anoi.basis.ANOINamespace(space, 'articles')
mdl = loaders.ANOIMarkdownLoader(article_namespace)

In [None]:
with open('../README.md') as fp:
    readme_md = fp.read()
    readme_struct = mdl.load(readme_md)
    readme_elem = mdl.html

In [None]:
nlp = spacy.load('en_core_web_trf')

In [None]:
import dataclasses
dataclasses.astuple(readme_struct)

In [None]:
parent_property = loaders.article.make_parent_property(readme_struct)
timestamp_property = loaders.article.make_timestamp_property(readme_struct)
walk_article = loaders.article.walk_article

for elem in walk_article(readme_struct):
    if len(elem.children) > 0:
        print('_' * 70)
        print(elem.level, elem.elem.text)
        parent = parent_property[elem]
        if parent is not None:
            print(parent.level, parent.elem.text)
        else:
            print(parent)
        print(timestamp_property[elem])


In [None]:
from spacy import displacy

In [None]:
readme_text = '\n'.join(e.elem.text.replace('\n', ' ') for e in walk_article(readme_struct) if e.elem.text is not None)
readme_doc = nlp(readme_text)

In [None]:
len(readme_doc)

In [None]:
tok0 = readme_doc[0]
tok1 = readme_doc[1]
tok2 = readme_doc[2]
tok3 = readme_doc[3]
toks = tok0, tok1, tok2, tok3
[(tok.text, tok.lemma_) for tok in toks]

In [None]:
readme_sents = list(readme_doc.sents)
len(readme_sents), readme_sents[0]

In [None]:
sent = readme_sents[0]
displacy.render(sent, style='dep')

In [None]:
for index, token in enumerate(readme_sents[0]):
    print(index, token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
            token.shape_, token.is_alpha, token.is_stop)
    print()

In [None]:
# Courtesy https://stackoverflow.com/questions/25698448/how-to-embed-html-into-ipython-output
from IPython.core.display import display, HTML
display_uid = lambda uid = anoi.basis.ANOIReserved.ROOT.value:display(HTML(data=facade.render_uid(uid)))

In [None]:
display_uid()

In [None]:
type_ns = anoi.basis.anoi_types(facade.space)

In [None]:
type_ns['STRING']

In [None]:
display_uid(_18)

In [None]:
display_uid(0x2d2b4f)

In [None]:
'STRING' in type_ns