**Make sure that you are connected to the kernel associated with our virtual environment . Go to `Kernel` -> `Change kernel` and choose `pia_venv`.**

In [1]:
### REQUIREMENTS

import pandas as pd
import re

In [2]:
### load our data
c_hippocraticum = pd.read_json("../data/c_hippocraticum.json")
c_platonicum = pd.read_json("../data/c_platonicum.json")
c_aristotelicum = pd.read_json("../data/c_aristotelicum.json")

# Key term replacement

In [3]:
### produce a list of all words from the authors
### (useful for a preliminary exploration)
aristotle_list = []
for list_element in c_aristotelicum["lemmata"].tolist():
  aristotle_list.extend(list_element)

plato_list = []
for list_element in c_platonicum["lemmata"].tolist():
  plato_list.extend(list_element)

hippocrates_list = []
for list_element in c_hippocraticum["lemmata"].tolist():
  hippocrates_list.extend(list_element)
one_merged_list = aristotle_list + plato_list + hippocrates_list

In [4]:
# manually define key terms we are interested in:
keyterm_patterns = [("^λ[υ|ύ]π.+", "λύπ*"), ("[α|ά|ἀ|ἄ]λγ.+", "ἄλγ*"), ("^[ὀ|ὠ]δ[ύ|υ]ν.", "ὀδύν*"), ("^π[ό|ο]ν[ο|ό|έ|ε|η|ή|ῆ](?!ρ).+", "πόνο*")]

In [5]:
# unique word forms
matches = []
for pattern_tuple in keyterm_patterns:
  r = re.compile(pattern_tuple[0])
  matches.extend([(pattern_tuple[0], pattern_tuple[1], match, aristotle_list.count(match), plato_list.count(match), hippocrates_list.count(match)) for match in list(filter(r.search, list(set(aristotle_list + hippocrates_list))))])
matches_df = pd.DataFrame(matches, columns=["pattern", "replacement", "match", "c_aristotelicum", "c_platonicum", "c_hippocraticum"]) #, "translation"])
matches_df

Unnamed: 0,pattern,replacement,match,c_aristotelicum,c_platonicum,c_hippocraticum
0,^λ[υ|ύ]π.+,λύπ*,λυπέουσιν,0,0,1
1,^λ[υ|ύ]π.+,λύπ*,λυπηρός,83,21,3
2,^λ[υ|ύ]π.+,λύπ*,λυπέω,113,50,16
3,^λ[υ|ύ]π.+,λύπ*,λυπέοντα,0,0,3
4,^λ[υ|ύ]π.+,λύπ*,λυπεῖταἰ,1,0,0
...,...,...,...,...,...,...
157,^π[ό|ο]ν[ο|ό|έ|ε|η|ή|ῆ](?!ρ).+,πόνο*,πονέουσαι,0,0,2
158,^π[ό|ο]ν[ο|ό|έ|ε|η|ή|ῆ](?!ρ).+,πόνο*,πονεούσης,0,0,1
159,^π[ό|ο]ν[ο|ό|έ|ε|η|ή|ῆ](?!ρ).+,πόνο*,πονητικὰ,1,0,0
160,^π[ό|ο]ν[ο|ό|έ|ε|η|ή|ῆ](?!ρ).+,πόνο*,πονέεται,0,0,7


In [6]:
matches_df.to_csv("../data/matches_pain_words.csv")

In [6]:
# use these regular expressions to make replacements in the list of lemmata
def replacer_word_list(pattern, product, word_list):
  return [re.sub(pattern, product, word) for word in word_list]

def replace_keywords(list_of_words, list_of_tuples):
  for pattern in list_of_tuples:
    list_of_words = replacer_word_list(pattern[0], pattern[1], list_of_words)
  return list_of_words

In [7]:
### test (includes artificial words):
word_list_test = ['βοοκ', 'πᾶς', 'μέλυπρᾷ', "ἄλγτέχνη",'τέχνη' ,'πᾶς', 'μέθοδος', 'ὅμοιος', "λύπη",'πρᾶξίς', 'προαίρεσις', 'ἀγαθός', 'ἐφίημι']
replace_keywords(word_list_test, keyterm_patterns)

['βοοκ',
 'πᾶς',
 'μέλυπρᾷ',
 'ἄλγ*',
 'τέχνη',
 'πᾶς',
 'μέθοδος',
 'ὅμοιος',
 'λύπ*',
 'πρᾶξίς',
 'προαίρεσις',
 'ἀγαθός',
 'ἐφίημι']

In [8]:
# apply the replacement on the level of individual words
c_aristotelicum["lemmata_repl"] = c_aristotelicum["lemmata"].apply(lambda x: replace_keywords(x, keyterm_patterns))
c_platonicum["lemmata_repl"] = c_platonicum["lemmata"].apply(lambda x: replace_keywords(x, keyterm_patterns))
c_hippocraticum["lemmata_repl"] = c_hippocraticum["lemmata"].apply(lambda x: replace_keywords(x, keyterm_patterns))

In [9]:
# apply the replacement on the sentences
def replace_in_sentences(list_of_sentences):
    return [replace_keywords(sentence, keyterm_patterns) for sentence in list_of_sentences]

c_aristotelicum["lemmatized_sentences_repl"] = c_aristotelicum["lemmatized_sentences"].apply(replace_in_sentences)
c_platonicum["lemmatized_sentences_repl"] = c_platonicum["lemmatized_sentences"].apply(replace_in_sentences)
c_hippocraticum["lemmatized_sentences_repl"] = c_hippocraticum["lemmatized_sentences"].apply(replace_in_sentences)

In [10]:
c_platonicum.head(5)

Unnamed: 0,filename,author,title,wordcount,author_id,doc_id,raw_date,date_avr,date_probs,date_manual,provenience,tlg_epithet,clean_string,n_sentences,lemmatized_sentences,lemmata,lemmata_wordcount,lemmata_repl,lemmatized_sentences_repl
535,tlg0059.tlg001.perseus-grc1.xml,Plato,Euthyphro,5393,tlg0059,tlg0059.tlg001,5-4 B.C.,-4,"{'-4.5': 0.5, '-3.5': 0.5}",-3.5,pagan,Philosophici/-ae,"ΕΥΘ. τί νεώτερον, ὦ Σώκρατες, γέγονεν, ὅτι σὺ ...",659,"[[ευθ], [νέος, Σωκράτης, γίγνομαι, λύκειον, κα...","[ευθ, νέος, Σωκράτης, γίγνομαι, λύκειον, καταλ...",2420,"[ευθ, νέος, Σωκράτης, γίγνομαι, λύκειον, καταλ...","[[ευθ], [νέος, Σωκράτης, γίγνομαι, λύκειον, κα..."
536,tlg0059.tlg002.perseus-grc2.xml,Plato,Apology,8659,tlg0059,tlg0059.tlg002,5-4 B.C.,-4,"{'-4.5': 0.5, '-3.5': 0.5}",-3.5,pagan,Philosophici/-ae,"ὅτι μὲν ὑμεῖς, ὦ ἄνδρες Ἀθηναῖοι, πεπόνθατε ὑ...",483,"[[ἀνήρ, Ἀθηναῖος, πάσχω, ἐμός, κατήγορος, οἶδα...","[ἀνήρ, Ἀθηναῖος, πάσχω, ἐμός, κατήγορος, οἶδα,...",3990,"[ἀνήρ, Ἀθηναῖος, πάσχω, ἐμός, κατήγορος, οἶδα,...","[[ἀνήρ, Ἀθηναῖος, πάσχω, ἐμός, κατήγορος, οἶδα..."
537,tlg0059.tlg003.perseus-grc2.xml,Plato,Crito,4268,tlg0059,tlg0059.tlg003,5-4 B.C.,-4,"{'-4.5': 0.5, '-3.5': 0.5}",-3.5,pagan,Philosophici/-ae,"ΣΩ. τί τηνικάδε ἀφῖξαι, ὦ Κρίτων; ἢ οὐ πρῲ ἔτ...",376,"[[σός], [ἀφικνέομαι, κριτής], [εἰμί], [κρ], []...","[σός, ἀφικνέομαι, κριτής, εἰμί, κρ, σός, κρ, ὄ...",1887,"[σός, ἀφικνέομαι, κριτής, εἰμί, κρ, σός, κρ, ὄ...","[[σός], [ἀφικνέομαι, κριτής], [εἰμί], [κρ], []..."
538,tlg0059.tlg004.perseus-grc2.xml,Plato,Phaedo,22519,tlg0059,tlg0059.tlg004,5-4 B.C.,-4,"{'-4.5': 0.5, '-3.5': 0.5}",-3.5,pagan,Philosophici/-ae,"ΕΧ. αὐτός, ὦ Φαίδων, παρεγένου Σωκράτει ἐκείν...",1441,"[[εχ], [αὐτός, φαίδων, παραγίγνομαι, Σωκράτης,...","[εχ, αὐτός, φαίδων, παραγίγνομαι, Σωκράτης, ἐκ...",9900,"[εχ, αὐτός, φαίδων, παραγίγνομαι, Σωκράτης, ἐκ...","[[εχ], [αὐτός, φαίδων, παραγίγνομαι, Σωκράτης,..."
539,tlg0059.tlg005.perseus-grc2.xml,Plato,Cratylus,18985,tlg0059,tlg0059.tlg005,5-4 B.C.,-4,"{'-4.5': 0.5, '-3.5': 0.5}",-3.5,pagan,Philosophici/-ae,ΕΡΜ. βούλει οὖν καὶ Σωκράτει τῷδε ἀνακοινωσώμε...,2260,"[[ερμ], [βούλομαι, Σωκράτης, ἀνακοινόω, λόγος]...","[ερμ, βούλομαι, Σωκράτης, ἀνακοινόω, λόγος, κρ...",8950,"[ερμ, βούλομαι, Σωκράτης, ἀνακοινόω, λόγος, κρ...","[[ερμ], [βούλομαι, Σωκράτης, ἀνακοινόω, λόγος]..."


In [11]:
# explore how the dialogue usually looks like in a form of lemmatized sentences...
c_platonicum["lemmatized_sentences_repl"].tolist()[0]

[['ευθ'],
 ['νέος',
  'Σωκράτης',
  'γίγνομαι',
  'λύκειον',
  'καταλείπω',
  'διατριβή',
  'διατρίβω',
  'βασιλεύς',
  'στοά'],
 ['δίκη', 'τυγχάνω', 'βασιλεύς'],
 ['σός'],
 ['Ἀθηναῖος', 'εὐθύφρων', 'δίκη', 'αὐτός', 'καλέω', 'γραφή'],
 ['ευθ'],
 ['φημί'],
 ['γραφή', 'ἔοικα', 'γράφω'],
 ['ἐκεῖνος', 'καταγιγνώσκω', 'ἕτερος'],
 ['σός'],
 [],
 ['ευθ'],
 ['ἄλλος'],
 ['σός'],
 [],
 ['ευθ'],
 ['οὗτος'],
 ['σός'],
 ['αὐτός', 'γιγνώσκω', 'εὐθύφρων', 'ἀνήρ', 'νέος', 'φαίνω', 'ἀγνώς'],
 ['ὀνομάζω', 'οἴομαι', 'μέλω'],
 ['δῆμος',
  'πιτθεύς',
  'νόος',
  'ἔχω',
  'πιτθέα',
  'μέλω',
  'τετανόθριξ',
  'εὐγένειος',
  'ἐπίγρυπος'],
 ['ευθ'],
 ['ἐννοέω', 'Σωκράτης'],
 ['γραφή', 'γράφω'],
 ['σός'],
 ['ὅστις'],
 ['ἀγεννής', 'δοκέω'],
 ['νέος', 'εἰμί', 'τοσοῦτος', 'πρᾶγμα', 'γιγνώσκω', 'φαῦλος', 'εἰμί'],
 ['ἐκεῖνος', 'φημί', 'οἶδα', 'τρόπος', 'νέος', 'διαφθείρω', 'διαφθείροντες'],
 ['κινδυνεύω',
  'σοφός',
  'εἰμί',
  'ἐμός',
  'ἀμαθία',
  'κατιδὼν',
  'διαφθείροντος',
  'ἡλικιώτης',
  'ἔρχομαι',
  'κατηγ

# Export the data for future usage

In [12]:
c_hippocraticum.to_json("../data/c_hippocraticum_repl.json")
c_platonicum.to_json("../data/c_platonicum_repl.json")
c_aristotelicum.to_json("../data/c_aristotelicum_repl.json")