# Configure

* Load cltk

In [1]:
from cltk import NLP
from cltk.languages.utils import get_lang

#from unidecode import unidecode

from pathlib import Path

import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Initialize the Latin pipeline
nlp = NLP(language="lat")


‎𐤀 CLTK version '1.4.0'. When using the CLTK in research, please cite: https://aclanthology.org/2021.acl-demo.3/

Pipeline for language 'Latin' (ISO: 'lat'): `LatinNormalizeProcess`, `LatinStanzaProcess`, `LatinEmbeddingsProcess`, `StopsProcess`, `LatinLexiconProcess`.

⸖ ``LatinStanzaProcess`` using Stanza model from the Stanford NLP Group: https://stanfordnlp.github.io/stanza/ . Please cite: https://arxiv.org/abs/2003.07082
⸖ ``LatinEmbeddingsProcess`` using word2vec model by University of Oslo from http://vectors.nlpl.eu/ . Please cite: https://aclanthology.org/W17-0237/
⸖ ``LatinLexiconProcess`` using Lewis's *An Elementary Latin Dictionary* (1890).

⸎ To suppress these messages, instantiate ``NLP()`` with ``suppress_banner=True``.


# CLTK Test

In [3]:
# Your Latin text
latin_text = "Arma virumque cano, Troiae qui primus ab oris"

# Process the text
doc = nlp.analyze(text=latin_text)

#print(doc.words)

# Get lemmas
lemmas = [word.lemma for word in doc.words]
list_pos = [word.pos for word in doc.words]

list_words = [word.string for word in doc.words]

print(list_pos)
print(list_words)
print(lemmas)

# Print original words and their lemmas
for word, lemma in zip([word.string for word in doc.words], lemmas):
   print(f"Word: {word:<20} Lemma: {lemma}")

[noun, verb, verb, punctuation, noun, pronoun, adjective, adposition, noun]
['Arma', 'virumque', 'cano', ',', 'Troiae', 'qui', 'primus', 'ab', 'oris']
['arma', 'viruoquor', 'cano', ',', 'troias', 'qui', 'primus', 'ab', 'oris']
Word: Arma                 Lemma: arma
Word: virumque             Lemma: viruoquor
Word: cano                 Lemma: cano
Word: ,                    Lemma: ,
Word: Troiae               Lemma: troias
Word: qui                  Lemma: qui
Word: primus               Lemma: primus
Word: ab                   Lemma: ab
Word: oris                 Lemma: oris


# Load text files

## Load Text

In [12]:
fn = Path('..') / 'Files' / 'pliny6_16_paragraph_1.txt'
print(fn)

../Files/pliny6_16_paragraph_1.txt


In [13]:
with open(fn, 'r', encoding='utf-8') as file:
   text = file.read()
print(text)

Petis ut tibi avunculi mei exitum scribam, quo verius tradere posteris possis. Gratias ago; nam video morti eius si celebretur a te immortalem gloriam esse propositam. 2 Quamvis enim pulcherrimarum clade terrarum, ut populi ut urbes memorabili casu, quasi semper victurus occiderit, quamvis ipse plurima opera et mansura condiderit, multum tamen perpetuitati eius scriptorum tuorum aeternitas addet. 3 Equidem beatos puto, quibus deorum munere datum est aut facere scribenda aut scribere legenda, beatissimos vero quibus utrumque. Horum in numero avunculus meus et suis libris et tuis erit. Quo libentius suscipio, deposco etiam quod iniungis.


## Load AP word list

In [None]:
import pandas as pd

fn_ap = Path('..') / 'Files' / 'ap-latin-draft-course-framework-vocab-list.csv'

df_ap_vocab = pd.read_csv(fn_ap)

In [18]:
print(df_ap_vocab)

                        Required Vocabulary  \
0                      —, suī, sibi, sē, sē   
1                                ā, ab, abs   
2                      abeō, -īre, iī, itum   
3                       absum, abesse, āfuī   
4      accēdō (adc-), -ere, -cessī, -cessum   
..                                      ...   
707                         voluptās, -ātis   
708             volvō, -ere, volvī, volūtum   
709  vōs, vestrum/vestrī, vōbīs, vōs, vōbīs   
710                               vōtum, -ī   
711                              vōx, vōcis   

                                            Definition  Suggested Reading  \
0                 himself, herself, itself, themselves                1.1   
1              from, away from, out of, by (with abl.)                1.1   
2    to go from, go away, go off, go forth, go, depart                2.2   
3                           to be away from, be absent                5.3   
4    to go to, come to, come near, draw near, appr

In [25]:
list_ap_vocab = list(df_ap_vocab['Base Word'])
print(list_ap_vocab)

['—', 'a', 'abeo', 'absum', 'accedo', 'accendo', 'accido', 'accipio', 'acies', 'ad', 'addo', 'adeo', 'adhuc', 'adsum', 'adsurgo', 'adversus', 'adversus', 'Aeneas', 'aequo', 'aequor', 'aetas', 'aether', 'affirmo', 'ager', 'agmen', 'ago', 'aio', 'Alexandrinus', 'alienus', 'aliqui', 'aliquis', 'alius', 'alter', 'altum', 'altus', 'amicus', 'amicus', 'amitto', 'amnis', 'amo', 'amor', 'amplector', 'an', 'anima', 'animus', 'annus', 'ante', 'antiquus', 'appareo', 'apud', 'aqua', 'ara', 'arbor', 'ardens', 'ardeo', 'arma', 'Arpocras', 'ars', 'arvum', 'arx', 'aspicio', 'at', 'ater', 'atque', 'audeo', 'audio', 'aura', 'auris', 'aurum', 'aut', 'autem', 'avunculus', 'beatus', 'bellum', 'bene', 'beneficium', 'bonus', 'cado', 'caedes', 'caelum', 'caligo', 'Camilla', 'campus', 'cano', 'capillus', 'capio', 'caput', 'carmen', 'Carthago', 'carus', 'castra', 'castrum', 'casus', 'catena', 'caterva', 'causa', 'cedo', 'celer', 'certus', 'ceterus', 'cingo', 'cinis', 'circum', 'civis', 'civitas', 'clarus', 'cla

In [48]:
for w in list_ap_vocab:
    if w in ['omnis']:
        print(w)

omnis


In [14]:
# Process the text
doc = nlp.analyze(text=text)

#print(doc.words)

# Get lemmas
lemmas = [word.lemma for word in doc.words]
list_pos = [word.pos for word in doc.words]

list_words = [word.string for word in doc.words]

print(list_pos)
print(list_words)
print(lemmas)

# Print original words and their lemmas
for word, lemma in zip([word.string for word in doc.words], lemmas):
   print(f"Word: {word:<20} Lemma: {lemma}")

[verb, subordinating_conjunction, pronoun, noun, determiner, noun, verb, punctuation, pronoun, adverb, verb, adjective, verb, punctuation, noun, verb, punctuation, particle, verb, noun, pronoun, subordinating_conjunction, verb, adposition, pronoun, adjective, noun, auxiliary, verb, punctuation, numeral, adverb, particle, adjective, noun, noun, punctuation, subordinating_conjunction, noun, subordinating_conjunction, noun, adjective, noun, punctuation, subordinating_conjunction, adverb, verb, verb, punctuation, subordinating_conjunction, determiner, determiner, noun, coordinating_conjunction, adjective, verb, punctuation, adverb, adverb, noun, pronoun, verb, determiner, noun, verb, punctuation, numeral, verb, adjective, verb, punctuation, pronoun, noun, noun, verb, auxiliary, coordinating_conjunction, verb, verb, coordinating_conjunction, verb, verb, punctuation, adjective, particle, pronoun, determiner, punctuation, determiner, adposition, noun, noun, determiner, coordinating_conjunctio

In [50]:
text_input = input("Enter the Text: ")

In [51]:
# Process the text
doc = nlp.analyze(text=text_input)

#print(doc.words)

# Get lemmas
list_lemmas = [word.lemma for word in doc.words]
list_pos = [word.pos for word in doc.words]

list_words = [word.string for word in doc.words]
print(list_words)
print(list_lemmas)
print(list_pos)

['Vertit', 'ille', 'consilium', 'et', 'quod', 'studioso', 'animo', 'incohaverat', 'obit', 'maximo', '.', 'Deducit', 'quadriremes', ',', 'ascendit', 'ipse', 'non', 'Rectinae', 'modo', 'sed', 'multis', '—', 'erat', 'enim', 'frequens', 'amoenitas', 'orae', '—', 'laturus', 'auxilium', '.', '10', 'Properat', 'illuc', 'unde', 'alii', 'fugiunt', ',', 'rectumque', 'cursum', 'recta', 'gubernacula', 'in', 'periculum', 'tenet', 'adeo', 'solutus', 'metu', ',', 'ut', 'omnes', 'illius', 'mali', 'motus', 'omnes', 'figuras', 'ut', 'deprenderat', 'oculis', 'dictaret', 'enotaretque', '.']
['vertior', 'ille', 'consilium', 'et', 'qui', 'studiosus', 'animus', 'incoho', 'obeo', 'magnus', '.', 'deduco', 'quadrirem', ',', 'ascendo', 'ipse', 'non', 'rectinus', 'modus', 'sed', 'multus', '—', 'sum', 'enim', 'frequens', 'amoenitas', 'ora', '—', 'latuo', 'auxilium', '.', '10', 'propero', 'illuc', 'unde', 'alius', 'fugio', ',', 'rectumquus', 'cursus', 'rectus', 'gubernaculum', 'in', 'periculum', 'teneo', 'adeo', 's

In [54]:
words_list = text.split()
vocab_list = list_ap_vocab
overlap_lemmas = []
overlap_words = []
for word, lemma in zip(list_words, list_lemmas):
    if lemma in vocab_list:
        overlap_lemmas.append(lemma)
        overlap_words.append(word)


for word, lemma in zip(overlap_words, overlap_lemmas):
    print(word, lemma)



ille ille
consilium consilium
et et
quod qui
animo animus
maximo magnus
ipse ipse
non non
modo modus
sed sed
multis multus
— —
erat sum
enim enim
orae ora
— —
Properat propero
unde unde
alii alius
fugiunt fugio
cursum cursus
in in
periculum periculum
tenet teneo
adeo adeo
metu metus
ut ut
omnes omnis
illius ille
mali malum
omnes omnis
figuras figura
ut ut
oculis oculus


In [47]:
print(text_input)

Vertit ille consilium et quod studioso animo incohaverat obit maximo. Deducit quadriremes, ascendit ipse non Rectinae modo sed multis — erat enim frequens amoenitas orae — laturus auxilium. 10 Properat illuc unde alii fugiunt, rectumque cursum recta gubernacula in periculum tenet adeo solutus metu, ut omnes illius mali motus omnes figuras ut deprenderat oculis dictaret enotaretque.
