In [83]:
#playing around with CLTK
from cltk import NLP
from cltk.dependency.tree import DependencyTree

In [72]:
with open("dataset/lat-livy.txt") as fo:
    livy_full = fo.read()
print("Text snippet:", livy_full[:200])
print("Character count:", len(livy_full))
print("Approximate token count:", len(livy_full.split()))
livy = livy_full[:len(livy_full) // 12]
print("Approximate token count:", len(livy.split()))

Text snippet: Iam primum omnium satis constat Troia capta in ceteros saevitum esse Troianos, duobus, Aeneae Antenorique, et vetusti iure hospitii et quia pacis reddendaeque Helenae semper auctores fuerant, omne ius
Character count: 921462
Approximate token count: 129799
Approximate token count: 10905


In [73]:
cltk_nlp = NLP(language="lat")
cltk_nlp.pipeline.processes.pop(-1)
print(cltk_nlp.pipeline.processes)
cltk_doc = cltk_nlp.analyze(text=livy)

‎𐤀 CLTK version '1.1.6'.
Pipeline for language 'Latin' (ISO: 'lat'): `LatinNormalizeProcess`, `LatinStanzaProcess`, `LatinEmbeddingsProcess`, `StopsProcess`, `LatinLexiconProcess`.
[<class 'cltk.alphabet.processes.LatinNormalizeProcess'>, <class 'cltk.dependency.processes.LatinStanzaProcess'>, <class 'cltk.embeddings.processes.LatinEmbeddingsProcess'>, <class 'cltk.stops.processes.StopsProcess'>]


In [74]:
remove=',:.;?!'
cleaned=[i for i in cltk_doc.tokens if i not in remove]

In [75]:
print([x for x in dir(cltk_doc) if not x.startswith("__")])

['_get_words_attribute', 'embeddings', 'embeddings_model', 'language', 'lemmata', 'morphosyntactic_features', 'normalized_text', 'pipeline', 'pos', 'raw', 'sentence_embeddings', 'sentences', 'sentences_strings', 'sentences_tokens', 'stanza_doc', 'stems', 'tokens', 'tokens_stops_filtered', 'words']


In [77]:
print(cltk_doc.lemmata[:5])
print(cltk_doc.pos[:5])
print(cltk_doc.morphosyntactic_features[2])
print(cltk_doc.sentences_tokens[:2])


['Iam', 'primus', 'omnis', 'satis', 'consto']
['ADV', 'ADJ', 'PRON', 'ADV', 'VERB']
{Case: [genitive], Gender: [neuter], Number: [plural], PrononimalType: [indefinite]}
[['Iam', 'primum', 'omnium', 'satis', 'constat', 'Troia', 'capta', 'in', 'ceteros', 'saevitum', 'esse', 'Troianos', ',', 'duobus', ',', 'Aeneae', 'Antenorique', ',', 'et', 'vetusti', 'iure', 'hospitii', 'et', 'quia', 'pacis', 'reddendaeque', 'Helenae', 'semper', 'auctores', 'fuerant', ',', 'omne', 'ius', 'belli', 'Achiuos', 'abstinuisse', ';'], ['casibus', 'deinde', 'variis', 'Antenorem', 'cum', 'multitudine', 'Enetum', ',', 'qui', 'seditione', 'ex', 'Paphlagonia', 'pulsi', 'et', 'sedes', 'et', 'ducem', 'rege', 'Pylaemene', 'ad', 'Troiam', 'amisso', 'quaerebant', ',', 'venisse', 'in', 'intimum', 'maris', 'Hadriatici', 'sinum', ',', 'Euganeisque', 'qui', 'inter', 'mare', 'Alpesque', 'incolebant', 'pulsis', 'Enetos', 'Troianosque', 'eas', 'tenuisse', 'terras', '.']]


In [78]:
print("Original:", cltk_doc.sentences_strings[5])
print("")
print("Translation:", "Landing there, the Trojans, as men who, after their all but immeasurable wanderings, had nothing left but their swords and ships, were driving booty from the fields, when King Latinus and the Aborigines, who then occupied that country, rushed down from their city and their fields to repel with arms the violence of the invaders.")
sentence_6 = cltk_doc.sentences[5]

Original: Ibi egressi Troiani , ut quibus ab immenso prope errore nihil praeter arma et naues superesset , cum praedam ex agris agerent , Latinus rex Aboriginesque qui tum ea tenebant loca ad arcendam vim advenarum armati ex urbe atque agris concurrunt .

Translation: Landing there, the Trojans, as men who, after their all but immeasurable wanderings, had nothing left but their swords and ships, were driving booty from the fields, when King Latinus and the Aborigines, who then occupied that country, rushed down from their city and their fields to repel with arms the violence of the invaders.


In [79]:
a_word_concurrunt = sentence_6[40]
print(a_word_concurrunt)

Word(index_char_start=None, index_char_stop=None, index_token=40, index_sentence=5, string='concurrunt', pos=verb, lemma='concurro', stem=None, scansion=None, xpos='L3|modA|tem1|gen9', upos='VERB', dependency_relation='advcl', governor=29, features={Aspect: [imperfective], Mood: [indicative], Number: [plural], Person: [third], Tense: [present], VerbForm: [finite], Voice: [active]}, category={F: [neg], N: [neg], V: [pos]}, stop=False, named_entity=None, syllables=None, phonetic_transcription=None, definition=None)


In [80]:
print("`Word.string`:", a_word_concurrunt.string)
print("")
print("`Word.pos`:", a_word_concurrunt.pos)

`Word.string`: concurrunt

`Word.pos`: verb


In [81]:
print("type(`Word.features`):", type(a_word_concurrunt.features))
print("")
print("`Word.features`:", a_word_concurrunt.features)

type(`Word.features`): <class 'cltk.morphology.morphosyntax.MorphosyntacticFeatureBundle'>

`Word.features`: {Aspect: [imperfective], Mood: [indicative], Number: [plural], Person: [third], Tense: [present], VerbForm: [finite], Voice: [active]}


In [82]:
print("Mood:", a_word_concurrunt.features["Mood"]) 
print("Number:", a_word_concurrunt.features["Number"])  
print("Person:", a_word_concurrunt.features["Person"]) 
print("Tense:", a_word_concurrunt.features["Tense"]) 
print("VerbForm:", a_word_concurrunt.features["VerbForm"])  
print("Voice:", a_word_concurrunt.features["Voice"]) 

Mood: [indicative]
Number: [plural]
Person: [third]
Tense: [present]
VerbForm: [finite]
Voice: [active]


In [84]:
print(cltk_doc.sentences_strings[5])

Ibi egressi Troiani , ut quibus ab immenso prope errore nihil praeter arma et naues superesset , cum praedam ex agris agerent , Latinus rex Aboriginesque qui tum ea tenebant loca ad arcendam vim advenarum armati ex urbe atque agris concurrunt .


In [85]:
a_tree = DependencyTree.to_tree(sentence_6)

In [87]:
from pprint import pprint
a_tree.print_tree()

root | egressi_1/verb
    └─ advmod | Ibi_0/adverb
    └─ nsubj:pass | Troiani_2/noun
        └─ acl:relcl | superesset_15/verb
            └─ punct | ,_3/punctuation
            └─ mark | ut_4/subordinating_conjunction
            └─ obl | quibus_5/pronoun
            └─ obl:arg | immenso_7/adjective
                └─ case | ab_6/adposition
            └─ obl | errore_9/noun
                └─ case | prope_8/adposition
            └─ nsubj | nihil_10/pronoun
            └─ obl | arma_12/noun
                └─ case | praeter_11/adposition
                └─ conj | naues_14/noun
                    └─ cc | et_13/coordinating_conjunction
            └─ advcl | agerent_21/verb
                └─ punct | ,_16/punctuation
                └─ mark | cum_17/subordinating_conjunction
                └─ obj | praedam_18/noun
                └─ obl | agris_20/noun
                    └─ case | ex_19/adposition
                └─ nsubj | rex_24/noun
                    └─ punct | ,_22/punctuatio