In [3]:
import json
import spacy
from collections import defaultdict

# Load the LatinCy model
nlp = spacy.load("la_core_web_lg")

# Load the original corpus
with open("latin_tragedies_corpus.json", "r", encoding="utf-8") as f:
    raw_corpus = json.load(f)

# Target lemmas
target_lemmas = {"uomo", "numquid", "inquio"}

# Collect original words and their play
lemma_to_words = defaultdict(set)

for entry in raw_corpus:
    title = entry["title"]
    doc = nlp(entry["text"])
    for token in doc:
        if token.lemma_ in target_lemmas:
            lemma_to_words[token.lemma_].add((title, token.text))

# Display results
for lemma, items in lemma_to_words.items():
    print(f"\n{lemma}:")
    for play, word in sorted(items):
        print(f"  {play}: {word}")



numquid:
  Hercules Furens: numquid
  Hercules Oetaeus: numquid
  Phoenissae: numquid
  Thyestes: numquid
  Troades: numquid

uomo:
  Ecerinis: vomunt
  Oedipus: vomit
  Phaedra: vomit
  Phaedra: vomunt
  Phoenissae: vomit

inquio:
  Hercules Oetaeus: inquit
  Medea: inquit
  Troades: inquit
