# Configure

* Load cltk

In [2]:
from cltk import NLP
from cltk.languages.utils import get_lang

#from unidecode import unidecode

from pathlib import Path

import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Initialize the Latin pipeline
nlp = NLP(language="lat")


‎𐤀 CLTK version '1.4.0'. When using the CLTK in research, please cite: https://aclanthology.org/2021.acl-demo.3/

Pipeline for language 'Latin' (ISO: 'lat'): `LatinNormalizeProcess`, `LatinStanzaProcess`, `LatinEmbeddingsProcess`, `StopsProcess`, `LatinLexiconProcess`.

⸖ ``LatinStanzaProcess`` using Stanza model from the Stanford NLP Group: https://stanfordnlp.github.io/stanza/ . Please cite: https://arxiv.org/abs/2003.07082
⸖ ``LatinEmbeddingsProcess`` using word2vec model by University of Oslo from http://vectors.nlpl.eu/ . Please cite: https://aclanthology.org/W17-0237/
⸖ ``LatinLexiconProcess`` using Lewis's *An Elementary Latin Dictionary* (1890).

⸎ To suppress these messages, instantiate ``NLP()`` with ``suppress_banner=True``.


# CLTK Test

In [4]:
# Your Latin text
latin_text = "Arma virumque cano, Troiae qui primus ab oris"

# Process the text
doc = nlp.analyze(text=latin_text)

#print(doc.words)

# Get lemmas
lemmas = [word.lemma for word in doc.words]
list_pos = [word.pos for word in doc.words]

list_words = [word.string for word in doc.words]

print(list_pos)
print(list_words)
print(lemmas)

# Print original words and their lemmas
for word, lemma in zip([word.string for word in doc.words], lemmas):
   print(f"Word: {word:<20} Lemma: {lemma}")

[noun, verb, verb, punctuation, noun, pronoun, adjective, adposition, noun]
['Arma', 'virumque', 'cano', ',', 'Troiae', 'qui', 'primus', 'ab', 'oris']
['arma', 'viruoquor', 'cano', ',', 'troias', 'qui', 'primus', 'ab', 'oris']
Word: Arma                 Lemma: arma
Word: virumque             Lemma: viruoquor
Word: cano                 Lemma: cano
Word: ,                    Lemma: ,
Word: Troiae               Lemma: troias
Word: qui                  Lemma: qui
Word: primus               Lemma: primus
Word: ab                   Lemma: ab
Word: oris                 Lemma: oris


# Load text files

## Load Text

In [5]:
fn = Path('..') / 'Files' / 'pliny6_16_paragraph_1.txt'
print(fn)

../Files/pliny6_16_paragraph_1.txt


In [6]:
with open(fn, 'r', encoding='utf-8') as file:
   text = file.read()
print(text)

Petis ut tibi avunculi mei exitum scribam, quo verius tradere posteris possis. Gratias ago; nam video morti eius si celebretur a te immortalem gloriam esse propositam. 2 Quamvis enim pulcherrimarum clade terrarum, ut populi ut urbes memorabili casu, quasi semper victurus occiderit, quamvis ipse plurima opera et mansura condiderit, multum tamen perpetuitati eius scriptorum tuorum aeternitas addet. 3 Equidem beatos puto, quibus deorum munere datum est aut facere scribenda aut scribere legenda, beatissimos vero quibus utrumque. Horum in numero avunculus meus et suis libris et tuis erit. Quo libentius suscipio, deposco etiam quod iniungis.


## Load AP word list

In [7]:
import pandas as pd

# Old version!
#fn_ap = Path('..') / 'Files' / 'ap-latin-draft-course-framework-vocab-list.csv'

# New version!
fn_ap = Path('..') / 'Files' / 'ap-latin-draft-course-framework-vocab-list-revised.csv'

df_ap_vocab = pd.read_csv(fn_ap)

In [8]:
print(df_ap_vocab)

                                  Vocabulary Part of Speech  \
0                                 a, ab, abs    preposition   
1                     abeo, -ire, -ii, -itum           verb   
2                        absum, abesse, afui           verb   
3     accedo (adc-), -ere, -cessi, \n-cessum           verb   
4    accendo (adc-), -ere, -cendi, \n-censum           verb   
..                                       ...            ...   
995                           votum, -i (n.)           noun   
996                          vox, vocis (f.)           noun   
997   vulnero (volnero), -are, -avi, \n-atum           verb   
998              vulnus (volnus), -eris (n.)           noun   
999                vultus (voltus), -us (m.)           noun   

                                            Definition  Suggested Reading  \
0              (with abl.) from, away from, out of, by                1.1   
1    to go from, go away, go off, go forth, go, \nd...                1.1   
2           

In [9]:
list_ap_vocab = list(df_ap_vocab['Base Word'])
print(list_ap_vocab)

['a', 'abeo', 'absum', 'accedo', 'accendo', 'accido', 'accipio', 'accuso', 'acer', 'acies', 'ad', 'addo', 'adeo', 'adhuc', 'adsum', 'adsurgo', 'adversus', 'adversus', 'aedificium', 'aedifico', 'aeger', 'Aeneas', 'aequo', 'aequor', 'aequus', 'aestas', 'aetas', 'aether', 'affirmo', 'ager', 'agmen', 'ago', 'agricola', 'aio', 'albus', 'Alexandrinus', 'alienus', 'aliqui', 'aliquis', 'alius', 'alter', 'altum', 'altus', 'ambulo', 'amicitia', 'amicus', 'amicus', 'amitto', 'amnis', 'amo', 'amor', 'amplector', 'an', 'ancilla', 'angustus', 'anima', 'animal', 'animus', 'annus', 'ante', 'antea', 'antiquus', 'anxius', 'aperio', 'appareo', 'appello', 'Appia', 'appropinquo', 'apud', 'aqua', 'ara', 'arbor', 'architectus', 'arcus', 'ardens', 'ardeo', 'arena', 'arma', 'Arpocras', 'ars', 'arvum', 'arx', 'ascendo', 'aspicio', 'at', 'ater', 'Athenae', 'athleta', 'atque', 'atrium', 'attonitus', 'audacia', 'audax', 'audeo', 'audio', 'aura', 'Aurelius', 'aureus', 'auris', 'aurum', 'aut', 'autem', 'auxilium', '

In [10]:
for w in list_ap_vocab:
    if w in ['omnis']:
        print(w)

omnis


In [11]:
# Process the text
doc = nlp.analyze(text=text)

#print(doc.words)

# Get lemmas
lemmas = [word.lemma for word in doc.words]
list_pos = [word.pos for word in doc.words]

list_words = [word.string for word in doc.words]

print(list_pos)
print(list_words)
print(lemmas)

# Print original words and their lemmas
for word, lemma in zip([word.string for word in doc.words], lemmas):
   print(f"Word: {word:<20} Lemma: {lemma}")

[verb, subordinating_conjunction, pronoun, noun, determiner, noun, verb, punctuation, pronoun, adverb, verb, adjective, verb, punctuation, noun, verb, punctuation, particle, verb, noun, pronoun, subordinating_conjunction, verb, adposition, pronoun, adjective, noun, auxiliary, verb, punctuation, numeral, adverb, particle, adjective, noun, noun, punctuation, subordinating_conjunction, noun, subordinating_conjunction, noun, adjective, noun, punctuation, subordinating_conjunction, adverb, verb, verb, punctuation, subordinating_conjunction, determiner, determiner, noun, coordinating_conjunction, adjective, verb, punctuation, adverb, adverb, noun, pronoun, verb, determiner, noun, verb, punctuation, numeral, verb, adjective, verb, punctuation, pronoun, noun, noun, verb, auxiliary, coordinating_conjunction, verb, verb, coordinating_conjunction, verb, verb, punctuation, adjective, particle, pronoun, determiner, punctuation, determiner, adposition, noun, noun, determiner, coordinating_conjunctio

In [12]:
text_input = input("Enter the Text: ")

In [13]:
# Process the text
doc = nlp.analyze(text=text_input)

#print(doc.words)

# Get lemmas
list_lemmas = [word.lemma for word in doc.words]
list_pos = [word.pos for word in doc.words]

list_words = [word.string for word in doc.words]
print(list_words)
print(list_lemmas)
print(list_pos)

['Madeleine', 'Pooler', '2', ':', '20', 'PM', '(', '4', 'hours', 'ago', ')', 'to', 'Brian', ',', 'me', 'Vertit', 'ille', 'consilium', 'et', 'quod', 'studioso', 'animo', 'incohaverat', 'obit', 'maximo', '.', 'Deducit', 'quadriremes', ',', 'ascendit', 'ipse', 'non', 'Rectinae', 'modo', 'sed', 'multis', '—', 'erat', 'enim', 'frequens', 'amoenitas', 'orae', '—', 'laturus', 'auxilium', '.', '10', 'Properat', 'illuc', 'unde', 'alii', 'fugiunt', ',', 'rectumque', 'cursum', 'recta', 'gubernacula', 'in', 'periculum', 'tenet', 'adeo', 'solutus', 'metu', ',', 'ut', 'omnes', 'illius', 'mali', 'motus', 'omnes', 'figuras', 'ut', 'deprenderat', 'oculis', 'dictaret', 'enotaretque', '.', '11', 'Iam', 'navibus', 'cinis', 'incidebat', ',', 'quo', 'propius', 'accederent', ',', 'calidior', 'et', 'densior', ';', 'iam', 'pumices', 'etiam', 'nigrique', 'et', 'ambusti', 'et', 'fracti', 'igne', 'lapides', ';', 'iam', 'vadum', 'subitum', 'ruinaque', 'montis', 'litora', 'obstantia', '.', 'Cunctatus', 'paulum', 'a

In [14]:
words_list = text.split()
vocab_list = list_ap_vocab
overlap_lemmas = []
overlap_words = []
for word, lemma in zip(list_words, list_lemmas):
    if lemma in vocab_list:
        overlap_lemmas.append(lemma)
        overlap_words.append(word)


for word, lemma in zip(overlap_words, overlap_lemmas):
    print(word, lemma)



ago ago
me ego
ille ille
consilium consilium
et et
quod qui
animo animus
maximo magnus
ascendit ascendo
ipse ipse
non non
modo modus
sed sed
multis multus
— —
erat sum
enim enim
orae ora
— —
auxilium auxilium
Properat propero
unde unde
alii alius
fugiunt fugio
cursum cursus
in in
periculum periculum
tenet teneo
adeo adeo
metu metus
ut ut
omnes omnis
illius ille
mali malum
omnes omnis
figuras figura
ut ut
oculis oculus
Iam iam
navibus navis
cinis cinis
incidebat incido
quo qui
propius propius
accederent accedo
et et
densior densus
iam iam
etiam etiam
et et
et et
fracti frango
igne ignis
lapides lapis
iam iam
montis mons
an an
mox mox
ut ut
ita ita
faceret facio
monenti moneo
inquit inquam
iuvat iuvo
erat sum
sinu sinus
medio medius
— —
nam nam
mare mare
ibi ibi
quamquam quamquam
nondum nondum
periculo periculum
appropinquante appropinquo
tamen tamen
et et
cum cum
cresceret cresco
proximo proximus
in in
certus certus
si si
ventus ventus
Quo qui
tunc tunc
avunculus avunculus
meus meus
tim

In [15]:
print(text_input)

 Madeleine Pooler 2:20 PM (4 hours ago) to Brian, me  Vertit ille consilium et quod studioso animo incohaverat obit maximo. Deducit quadriremes, ascendit ipse non Rectinae modo sed multis — erat enim frequens amoenitas orae — laturus auxilium. 10 Properat illuc unde alii fugiunt, rectumque cursum recta gubernacula in periculum tenet adeo solutus metu, ut omnes illius mali motus omnes figuras ut deprenderat oculis dictaret enotaretque.  11 Iam navibus cinis incidebat, quo propius accederent, calidior et densior; iam pumices etiam nigrique et ambusti et fracti igne lapides; iam vadum subitum ruinaque montis litora obstantia. Cunctatus paulum an retro flecteret, mox gubernatori ut ita faceret monenti 'Fortes' inquit 'fortuna iuvat: Pomponianum pete.' 12 Stabiis erat diremptus sinu medio — nam sensim circumactis curvatisque litoribus mare infunditur -; ibi quamquam nondum periculo appropinquante, conspicuo tamen et cum cresceret proximo, sarcinas contulerat in naves, certus fugae si contra