**Make sure that you are connected to the kernel associated with our virtual environment . Go to `Kernel` -> `Change kernel` and choose `pia_venv`.**

In [6]:
### REQUIREMENTS

import pandas as pd
import re

In [2]:
### load our data
c_aristotelicum = pd.read_json("../data/c_aristotelicum.json")
c_hippocraticum = pd.read_json("../data/c_hippocraticum.json")

# Key term replacement

In [3]:
### produce a list of all words from the authors
### (useful for a preliminary exploration)
aristotle_list = []
for list_element in c_aristotelicum["lemmata"].tolist():
  aristotle_list.extend(list_element)
hippocrates_list = []
for list_element in c_hippocraticum["lemmata"].tolist():
  hippocrates_list.extend(list_element)
one_merged_list = aristotle_list + hippocrates_list

In [4]:
# manually define key terms we are interested in:
keyterm_patterns = [("^λ[υ|ύ]π.+", "λύπ*"), ("[α|ά|ἀ|ἄ]λγ.+", "ἄλγ*"), ("^[ὀ|ὠ]δ[ύ|υ]ν.", "ὀδύν*"), ("^π[ό|ο]ν[ο|ό|έ|ε|η|ή|ῆ](?!ρ).+", "πόνο*")]

In [7]:
# unique word forms
matches = []
for pattern_tuple in keyterm_patterns:
  r = re.compile(pattern_tuple[0])
  matches.extend([(pattern_tuple[0], pattern_tuple[1], match, aristotle_list.count(match), hippocrates_list.count(match)) for match in list(filter(r.search, list(set(aristotle_list + hippocrates_list))))])
matches_df = pd.DataFrame(matches, columns=["pattern", "replacement", "match", "c_aristotelicum", "c_hippocraticum"]) #, "translation"])
matches_df

Unnamed: 0,pattern,replacement,match,c_aristotelicum,c_hippocraticum
0,^λ[υ|ύ]π.+,λύπ*,λυπέον,0,8
1,^λ[υ|ύ]π.+,λύπ*,λυπέοιτο,0,1
2,^λ[υ|ύ]π.+,λύπ*,λύπαςμεσότης,1,0
3,^λ[υ|ύ]π.+,λύπ*,λυπρᾷ,1,0
4,^λ[υ|ύ]π.+,λύπ*,λύπη,197,11
...,...,...,...,...,...
157,^π[ό|ο]ν[ο|ό|έ|ε|η|ή|ῆ](?!ρ).+,πόνο*,πονέσωσιν,0,1
158,^π[ό|ο]ν[ο|ό|έ|ε|η|ή|ῆ](?!ρ).+,πόνο*,πονέοντι,0,2
159,^π[ό|ο]ν[ο|ό|έ|ε|η|ή|ῆ](?!ρ).+,πόνο*,πονέσῃ,0,8
160,^π[ό|ο]ν[ο|ό|έ|ε|η|ή|ῆ](?!ρ).+,πόνο*,πονητέος,0,1


In [8]:
matches_ch = matches_df[matches_df["c_hippocraticum"]>0]
matches_ch

Unnamed: 0,pattern,replacement,match,c_aristotelicum,c_hippocraticum
0,^λ[υ|ύ]π.+,λύπ*,λυπέον,0,8
1,^λ[υ|ύ]π.+,λύπ*,λυπέοιτο,0,1
4,^λ[υ|ύ]π.+,λύπ*,λύπη,197,11
6,^λ[υ|ύ]π.+,λύπ*,λυπέουσα,0,1
7,^λ[υ|ύ]π.+,λύπ*,λυπέοντα,0,3
...,...,...,...,...,...
157,^π[ό|ο]ν[ο|ό|έ|ε|η|ή|ῆ](?!ρ).+,πόνο*,πονέσωσιν,0,1
158,^π[ό|ο]ν[ο|ό|έ|ε|η|ή|ῆ](?!ρ).+,πόνο*,πονέοντι,0,2
159,^π[ό|ο]ν[ο|ό|έ|ε|η|ή|ῆ](?!ρ).+,πόνο*,πονέσῃ,0,8
160,^π[ό|ο]ν[ο|ό|έ|ε|η|ή|ῆ](?!ρ).+,πόνο*,πονητέος,0,1


In [19]:
matches_ch.to_csv("../data/matches_pain_words.csv")

In [12]:
# use these regular expressions to make replacements in the list of lemmata
def replacer_word_list(pattern, product, word_list):
  return [re.sub(pattern, product, word) for word in word_list]

def replace_keywords(list_of_words, list_of_tuples):
  for pattern in list_of_tuples:
    list_of_words = replacer_word_list(pattern[0], pattern[1], list_of_words)
  return list_of_words

In [15]:
### test (includes artificial words):
word_list_test = ['βοοκ', 'πᾶς', 'μέλυπρᾷ', "ἄλγτέχνη",'τέχνη' ,'πᾶς', 'μέθοδος', 'ὅμοιος', "λύπη",'πρᾶξίς', 'προαίρεσις', 'ἀγαθός', 'ἐφίημι']
replace_keywords(word_list_test, keyterm_patterns)

['βοοκ',
 'πᾶς',
 'μέλυπρᾷ',
 'ἄλγ*',
 'τέχνη',
 'πᾶς',
 'μέθοδος',
 'ὅμοιος',
 'λύπ*',
 'πρᾶξίς',
 'προαίρεσις',
 'ἀγαθός',
 'ἐφίημι']

In [16]:
# apply the replacement on the level of individual words
c_aristotelicum["lemmata_repl"] = c_aristotelicum["lemmata"].apply(lambda x: replace_keywords(x, keyterm_patterns))
c_hippocraticum["lemmata_repl"] = c_hippocraticum["lemmata"].apply(lambda x: replace_keywords(x, keyterm_patterns))

In [17]:
# apply the replacement on the sentences
def replace_in_sentences(list_of_sentences):
    return [replace_keywords(sentence, keyterm_patterns) for sentence in list_of_sentences]

c_aristotelicum["lemmatized_sentences_repl"] = c_aristotelicum["lemmatized_sentences"].apply(replace_in_sentences)
c_hippocraticum["lemmatized_sentences_repl"] = c_hippocraticum["lemmatized_sentences"].apply(replace_in_sentences)

In [20]:
c_hippocraticum.head(5)

Unnamed: 0,filename,author,title,wordcount,author_id,doc_id,raw_date,date_avr,date_probs,date_manual,provenience,tlg_epithet,clean_string,n_sentences,lemmatized_sentences,lemmata,lemmata_wordcount,lemmata_repl,lemmatized_sentences_repl
1039,tlg0627.tlg001.perseus-grc1.xml,Hippocrates,On Ancient Medicine,5534,tlg0627,tlg0627.tlg001,5-4 B.C.,-4,"{'-4.5': 0.5, '-3.5': 0.5}",-3.5,pagan,Medici,ὁπόσοι μὲν ἐπεχείρησαν περὶ ἰητρικῆς λέγειν ἢ ...,258,"[[ὁπόσος, ἐπιχειρέω, ἰητρικῆς, λέγω, γράφω, ὑπ...","[ὁπόσος, ἐπιχειρέω, ἰητρικῆς, λέγω, γράφω, ὑπό...",2613,"[ὁπόσος, ἐπιχειρέω, ἰητρικῆς, λέγω, γράφω, ὑπό...","[[ὁπόσος, ἐπιχειρέω, ἰητρικῆς, λέγω, γράφω, ὑπ..."
1040,tlg0627.tlg002.perseus-grc1.xml,Hippocrates,De aere aquis et locis,7502,tlg0627,tlg0627.tlg002,5-4 B.C.,-4,"{'-4.5': 0.5, '-3.5': 0.5}",-3.5,pagan,Medici,"ἰητρικὴν ὅστις βούλεται ὀρθῶς ζητεῖν, τάδε χρὴ...",461,"[[ἰατρικός, βούλομαι, ζητέω, χρή, ποιέω], [πρῶ...","[ἰατρικός, βούλομαι, ζητέω, χρή, ποιέω, πρῶτος...",3470,"[ἰατρικός, βούλομαι, ζητέω, χρή, ποιέω, πρῶτος...","[[ἰατρικός, βούλομαι, ζητέω, χρή, ποιέω], [πρῶ..."
1041,tlg0627.tlg003.perseus-grc1.xml,Hippocrates,The Book of Prognostics,5217,tlg0627,tlg0627.tlg003,5-4 B.C.,-4,"{'-4.5': 0.5, '-3.5': 0.5}",-3.5,pagan,Medici,τὸν ἰητρὸν δοκέει μοι ἄριστον εἶναι πρόνοιαν ἐ...,302,"[[ἰατρός, δοκέω, ἀγαθός, εἰμί, πρόνοια, ἐπιτηδ...","[ἰατρός, δοκέω, ἀγαθός, εἰμί, πρόνοια, ἐπιτηδε...",2503,"[ἰατρός, δοκέω, ἀγαθός, εἰμί, πρόνοια, ἐπιτηδε...","[[ἰατρός, δοκέω, ἀγαθός, εἰμί, πρόνοια, ἐπιτηδ..."
1042,tlg0627.tlg004.perseus-grc1.xml,Hippocrates,On Regimen in Acute Diseases,6128,tlg0627,tlg0627.tlg004,5-4 B.C.,-4,"{'-4.5': 0.5, '-3.5': 0.5}",-3.5,pagan,Medici,"οἱ ξυγγράψαντες τὰς Κνιδίας καλεομένας γνώμας,...",373,"[[ξυγγράψαντες, Κνίδιος, καλέω, γνώμη, ὁποῖος,...","[ξυγγράψαντες, Κνίδιος, καλέω, γνώμη, ὁποῖος, ...",2926,"[ξυγγράψαντες, Κνίδιος, καλέω, γνώμη, ὁποῖος, ...","[[ξυγγράψαντες, Κνίδιος, καλέω, γνώμη, ὁποῖος,..."
1043,tlg0627.tlg005.perseus-grc1.xml,Hippocrates,Acut. sp.,5218,tlg0627,tlg0627.tlg005,5-4 B.C.,-4,"{'-4.5': 0.5, '-3.5': 0.5}",-3.5,pagan,Medici,"καῦσος γίγνεται, ὁκόταν ἀναξηρανθέντα τὰ φλέβι...",352,"[[καῦσος, γίγνομαι, ἀναξηρανθέντα, φλέβια, θερ...","[καῦσος, γίγνομαι, ἀναξηρανθέντα, φλέβια, θερι...",2855,"[καῦσος, γίγνομαι, ἀναξηρανθέντα, φλέβια, θερι...","[[καῦσος, γίγνομαι, ἀναξηρανθέντα, φλέβια, θερ..."


# Export the data for future usage

In [22]:
c_hippocraticum.to_json("../data/c_hippocraticum_repl.json")
c_aristotelicum.to_json("../data/c_aristotelicum_repl.json")