In [22]:
import json
import urllib.request
from toolset import (get_examples, get_frequency,
                    get_text, get_tokens)
from cltk import NLP
from cltk.lemmatize.grc import GreekBackoffLemmatizer
lemmatizer = GreekBackoffLemmatizer()

from cltk.alphabet.text_normalization import cltk_normalize

In [8]:
creed, wf = get_text("creed.txt")
tokens, token_set, phrases = get_tokens(creed)
frequency = get_frequency(tokens)
cltk_nlp = NLP(language="grc")
cltk_doc = cltk_nlp.analyze(creed)

wordlist = []
for pair in frequency:
    token = pair[0]
    wordlist.append(token)

‎𐤀 CLTK version '1.0.24'.
Pipeline for language 'Ancient Greek' (ISO: 'grc'): `GreekNormalizeProcess`, `GreekStanzaProcess`, `GreekEmbeddingsProcess`, `StopsProcess`, `GreekNERProcess`.


In [10]:
def request(action, **params):
    return {'action': action,
            'params': params,
            'version': 6}

def invoke(action, **params):
    requestJson = json.dumps(request(action, **params)).encode('utf-8')
    response = json.load(urllib.request.urlopen(urllib.request.Request('http://localhost:8765', requestJson)))

    if len(response) != 2:
        raise Exception('response has an unexpected number of fields')

    if 'error' not in response:
        raise Exception('response is missing required error field')
    
    if 'result' not in response:
        raise Exception('response is missing required result field')

    if response['error'] is not None:
        raise Exception(response['error'])

    return response['result']

noteIDs = invoke('findNotes', query='deck:Liturgia')
notesInfo = invoke('notesInfo', notes=noteIDs)

In [37]:
notes = []
for card in notesInfo:
    notes.append(cltk_normalize(card['fields']['Word']['value']))
fwl = [cltk_normalize(word) for word in wordlist if word not in notes]
notes_group = lemmatizer.lemmatize(notes)
fwl_group = lemmatizer.lemmatize(fwl)

In [51]:
notes_lemmas = set([tuple[1] for tuple in notes_group])
fwl_lemmas = set([tuple[1] for tuple in fwl_group])

In [45]:
filtered_fwl_lemmas = [word for word in fwl_lemmas if word not in notes_lemmas]
reworked_fwl = [tuple for tuple in fwl_group if tuple[1] in filtered_fwl_lemmas]

In [55]:
import pandas as pd
reworked_fwl

[('Πιστεύω', 'Πιστεύω'),
 ('Θεόν', 'Θεόν'),
 ('Πατέρα', 'Πατέρα'),
 ('παντοκράτορα', 'παντοκράτωρ'),
 ('ποιητὴν', 'ποιητής'),
 ('οὐρανοῦ', 'οὐρανός'),
 ('ὁρατῶν', 'ὁρατός'),
 ('ἀοράτων', 'ἀόρατος'),
 ('Κύριον', 'Κύριον'),
 ('Ἰησοῦν', 'Ἰησοῦν'),
 ('Χριστόν', 'Χριστόν'),
 ('Υἱὸν', 'Υἱὸν'),
 ('μονογενῆ', 'μονογενής'),
 ('πρὸ', 'πρό'),
 ('Φῶς', 'Φῶς'),
 ('Θεὸν', 'Θεὸν'),
 ('ἀληθινὸν', 'ἀληθινός'),
 ('ἀληθινοῦ', 'ἀληθινός'),
 ('οὐ', 'οὐ'),
 ('ὁμοούσιον', 'ὁμοούσιος'),
 ('Πατρί', 'Πατρί'),
 ('πάντα', 'πᾶς'),
 ('Τὸν', 'Τὸν'),
 ('ἡμετέραν', 'ἡμέτερος'),
 ('σωτηρίαν', 'σωτηρία'),
 ('κατελθόντα', 'κατέρχομαι'),
 ('οὐρανῶν', 'οὐρανός'),
 ('σαρκωθέντα', 'σαρκόω'),
 ('Πνεύματος', 'Πνεύματος'),
 ('Ἁγίου', 'Ἁγίου'),
 ('Μαρίας', 'Μαρίας'),
 ('Παρθένου', 'Παρθένου'),
 ('ἐνανθρωπήσαντα', 'ἐνανθρωπέω'),
 ('Σταυρωθέντα', 'Σταυρωθέντα'),
 ('ὑπὲρ', 'ὑπέρ'),
 ('Ποντίου', 'Ποντίου'),
 ('Πιλάτου', 'Πιλάτου'),
 ('παθόντα', 'πάσχω'),
 ('ταφέντα', 'θάπτω'),
 ('ἀναστάντα', 'ἀνίστημι'),
 ('τρίτῃ', 'τρίτος'),
 ('Γρ