In [519]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from gensim.models import KeyedVectors
from sklearn.metrics.pairwise import cosine_similarity

# plotting
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.colors as mcolors

import seaborn as sns
import os
import json
from translator import translator
import pickle

In [520]:
# to communicate with google spreadsheet...
import gspread
from gspread_dataframe import get_as_dataframe
from gspread_dataframe import set_with_dataframe
from google.oauth2 import service_account # based on google-auth library

# establish connection with gogglesheets...
file_data = json.load(open("../../ServiceAccountsKey.json", "r"))
credentials = service_account.Credentials.from_service_account_info(file_data)
gc = gspread.Client(auth=credentials.with_scopes(['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']))
PIPA_data = gc.open_by_url("https://docs.google.com/spreadsheets/d/1rV4t0_UV_wcx--UAHVwkqB8Wa_5n9mnpV05yGG1OHqk/edit?usp=sharing")

In [521]:
keywords = ["ἀλγέω", "ἄλγημα", "ἄλγος", "λυπέω", "λύπη", "λυπηρός", "ὀδυνάω", "ὀδύνη", "πονέω", "πόνος"]

In [522]:
keyed_vectors_full = KeyedVectors.load("../data/keyed_vectors_full.wv")
keyed_vectors_excl_arist = KeyedVectors.load("../data/keyed_vectors_excl_arist.wv")
keyed_vectors_excl_plato = KeyedVectors.load("../data/keyed_vectors_excl_plato.wv")
keyed_vectors_excl_hipp = KeyedVectors.load("../data/keyed_vectors_excl_hipp.wv")
vectors_list = [keyed_vectors_full, keyed_vectors_excl_arist, keyed_vectors_excl_plato, keyed_vectors_excl_hipp]

In [523]:
n_words = len(keyed_vectors_full)
complete_sim_matrices = []
for vecs in vectors_list:
    complete_sim_matrix = cosine_similarity(vecs.vectors)
    complete_sim_matrices.append(complete_sim_matrix[:n_words, :n_words])

# Analyzing categories

In [524]:
terms_translation_categories = get_as_dataframe(PIPA_data.worksheet("translation"))[["greek", "english", "category_clean"]]
terms_translation_categories = terms_translation_categories[terms_translation_categories["greek"].notnull()]
terms_translation_categories.rename(columns={"category_clean" : "category"}, inplace=True)
terms_translation_categories.head(5)

Unnamed: 0,greek,english,category
0,χαίρω (0.52),rejoice,opossite
1,ἀγανακτέω (0.5),to be displeased,emotion
2,ἥδομαι (0.49),feel pleasure,opossite
3,ἀπολαύω (0.48),enjoy,opossite
4,διάκειμαι (0.46),to be affected,suffering


In [525]:
terms_translation_categories["greek"] = terms_translation_categories["greek"].apply(lambda x: x.rpartition(" (")[0].replace(" ", ""))

In [526]:
terms_translation_categories = terms_translation_categories.drop_duplicates(subset="greek")

In [527]:
terms_translation_categories.groupby("category").size()

category
bodily parts    20
dietetics       27
emotion         15
moral           16
opossite        17
other           13
pain            15
pathology       19
suffering        9
dtype: int64

In [528]:
categories = list(set(terms_translation_categories["category"]))

In [529]:
term_category_dict = dict(zip(terms_translation_categories["greek"], terms_translation_categories["category"]))
term_category_dict

{'χαίρω': 'opossite',
 'ἀγανακτέω': 'emotion',
 'ἥδομαι': 'opossite',
 'ἀπολαύω': 'opossite',
 'διάκειμαι': 'suffering',
 'ἀκόλαστος': 'moral',
 'κακός': 'moral',
 'ἄχθομαι': 'emotion',
 'εὐφραίνω': 'opossite',
 'λυπηρός': 'pain',
 'ἐξαμαρτάνω': 'other',
 'πλησιάζω': 'other',
 'μισέω': 'moral',
 'φοβερός': 'emotion',
 'δυσχεραίνω': 'emotion',
 'λοιδορέω': 'suffering',
 'ὀργίζω': 'suffering',
 'σύνοιδα': 'other',
 'φθονέω': 'moral',
 'ἀνάξιος': 'moral',
 'ἀλγέω': 'pain',
 'ἀνόητος': 'moral',
 'δακρύω': 'emotion',
 'ψαύω': 'other',
 'ὀδυνάω': 'pain',
 'βαρύνω': 'suffering',
 'ἀλγεινός': 'pain',
 'ἄλγημα': 'pain',
 'νείαιρα': 'bodily parts',
 'ἰξύα': 'bodily parts',
 'ὑποχόνδριος': 'bodily parts',
 'λυπέω': 'pain',
 'βάρος': 'pathology',
 'ἧπαρ': 'bodily parts',
 'σπάω': 'pathology',
 'πλευρόν': 'bodily parts',
 'παραφρονέω': 'emotion',
 'πυρεταίνω': 'pathology',
 'ἀλγηδών': 'pain',
 'πόθος': 'opossite',
 'ὀδύρομαι': 'emotion',
 'θυμόω': 'suffering',
 'ἄλγος': 'pain',
 'οἰκτρός': 'emotion

In [530]:
term_translation_dict = dict(zip(terms_translation_categories["greek"], terms_translation_categories["english"]))
term_translation_dict

{'χαίρω': 'rejoice',
 'ἀγανακτέω': 'to be displeased',
 'ἥδομαι': 'feel pleasure',
 'ἀπολαύω': 'enjoy',
 'διάκειμαι': 'to be affected',
 'ἀκόλαστος': 'incontinent',
 'κακός': 'bad',
 'ἄχθομαι': 'to be grieved',
 'εὐφραίνω': 'enjoy oneself',
 'λυπηρός': 'painful',
 'ἐξαμαρτάνω': 'fail',
 'πλησιάζω': 'approach',
 'μισέω': 'hate',
 'φοβερός': 'fearful',
 'δυσχεραίνω': 'to be displeased',
 'λοιδορέω': 'abuse',
 'ὀργίζω': 'irritate',
 'σύνοιδα': 'know',
 'φθονέω': 'be jealous',
 'ἀνάξιος': 'worthless',
 'ἀλγέω': 'be in pain',
 'ἀνόητος': 'silly',
 'δακρύω': 'weep',
 'ψαύω': 'touch',
 'ὀδυνάω': 'be in pain',
 'βαρύνω': 'oppress',
 'ἀλγεινός': 'painful',
 'ἄλγημα': 'pain',
 'νείαιρα': 'abdomen',
 'ἰξύα': 'waist',
 'ὑποχόνδριος': 'abdomen',
 'λυπέω': 'be in pain',
 'βάρος': 'heaviness',
 'ἧπαρ': 'liver',
 'σπάω': 'spasm',
 'πλευρόν': 'rib',
 'παραφρονέω': 'to be deranged',
 'πυρεταίνω': 'have fever',
 'ἀλγηδών': 'painful',
 'πόθος': 'desire',
 'ὀδύρομαι': 'lament',
 'θυμόω': 'provoke',
 'ἄλγος

In [531]:
cat_terms_dict = {}
for cat in categories:
    terms = terms_translation_categories[terms_translation_categories["category"]==cat]["greek"].tolist()
    cat_terms_dict[cat] = terms

In [532]:
cat_terms_dict

{'bodily parts': ['νείαιρα',
  'ἰξύα',
  'ὑποχόνδριος',
  'ἧπαρ',
  'πλευρόν',
  'κλείς',
  'κενεών',
  'βουβών',
  'μετάφρενον',
  'τράχηλος',
  'ὀσφῦς',
  'βλέφαρον',
  'σῶμα',
  'σωματικός',
  'ῥάχις',
  'κνήμη',
  'στῆθος',
  'σφυρόν',
  'φλέβιον',
  'χόνδρος'],
 'emotion': ['ἀγανακτέω',
  'ἄχθομαι',
  'φοβερός',
  'δυσχεραίνω',
  'δακρύω',
  'παραφρονέω',
  'ὀδύρομαι',
  'οἰκτρός',
  'φόβος',
  'δύστηνος',
  'δεῖμα',
  'στένω',
  'πῆμα',
  'μέλεος',
  'γόος'],
 'opossite': ['χαίρω',
  'ἥδομαι',
  'ἀπολαύω',
  'εὐφραίνω',
  'πόθος',
  'ῥᾳστώνη',
  'εὐεξία',
  'πλησμονή',
  'ἡδύς',
  'πεινάω',
  'ἀγαπητός',
  'ἡδονή',
  'ἐπιθυμία',
  'Ἀφροδίσιος',
  'ἔρως',
  'θυμός',
  'δέρκομαι'],
 'pathology': ['βάρος',
  'σπάω',
  'πυρεταίνω',
  'ἕλκος',
  'ἑλκόω',
  'φόνιος',
  'θανάσιμος',
  'βήξ',
  'θέρμη',
  'οἴδημα',
  'στραγγουρία',
  'διάρροια',
  'ὕφαιμος',
  'κατάψυξις',
  'διαθερμαίνω',
  'πυός',
  'στάζω',
  'φρικώδης',
  'ἀσώδης'],
 'suffering': ['διάκειμαι',
  'λοιδορέω',
  'ὀργίζω

In [533]:
keyed_vectors_full.n_similarity(["πόνος"], cat_terms_dict["dietetics"])

0.641673

In [534]:
similarities = []
for term in cat_terms_dict["dietetics"]:
    similarities.append(keyed_vectors_full.similarity("πόνος", term))

In [535]:
np.mean(similarities)

0.29598635

In [536]:
cat_terms_dict["pathology"]

['βάρος',
 'σπάω',
 'πυρεταίνω',
 'ἕλκος',
 'ἑλκόω',
 'φόνιος',
 'θανάσιμος',
 'βήξ',
 'θέρμη',
 'οἴδημα',
 'στραγγουρία',
 'διάρροια',
 'ὕφαιμος',
 'κατάψυξις',
 'διαθερμαίνω',
 'πυός',
 'στάζω',
 'φρικώδης',
 'ἀσώδης']

In [537]:
sim_tups = []
for keyword in keywords:
    for cat in cat_terms_dict.keys():
        sim_tups.append((keyword, cat, keyed_vectors_full.n_similarity([keyword], cat_terms_dict[cat])))

In [538]:
sim_tups

[('ἀλγέω', 'bodily parts', 0.3907564),
 ('ἀλγέω', 'emotion', 0.3932681),
 ('ἀλγέω', 'opossite', 0.27486372),
 ('ἀλγέω', 'pathology', 0.39712083),
 ('ἀλγέω', 'suffering', 0.4330012),
 ('ἀλγέω', 'moral', 0.31691295),
 ('ἀλγέω', 'pain', 0.61059135),
 ('ἀλγέω', 'dietetics', 0.19012412),
 ('ἀλγέω', 'other', 0.38458845),
 ('ἄλγημα', 'bodily parts', 0.7029688),
 ('ἄλγημα', 'emotion', 0.11782563),
 ('ἄλγημα', 'opossite', 0.047449432),
 ('ἄλγημα', 'pathology', 0.70607466),
 ('ἄλγημα', 'suffering', 0.11568883),
 ('ἄλγημα', 'moral', 0.08219812),
 ('ἄλγημα', 'pain', 0.66649115),
 ('ἄλγημα', 'dietetics', 0.3659613),
 ('ἄλγημα', 'other', 0.25914767),
 ('ἄλγος', 'bodily parts', 0.36313546),
 ('ἄλγος', 'emotion', 0.63292605),
 ('ἄλγος', 'opossite', 0.34056836),
 ('ἄλγος', 'pathology', 0.4151088),
 ('ἄλγος', 'suffering', 0.5664587),
 ('ἄλγος', 'moral', 0.28199112),
 ('ἄλγος', 'pain', 0.642763),
 ('ἄλγος', 'dietetics', 0.36896706),
 ('ἄλγος', 'other', 0.3345761),
 ('λυπέω', 'bodily parts', -0.094281085)

# Extending categories

In [539]:
other_words = []
for term in term_category_dict.keys():
    other_words.extend([tup[0] for tup in keyed_vectors_full.most_similar(term, topn=5)])

In [540]:
len(other_words)

755

In [541]:
other_words_unique =  set(other_words)
len(other_words_unique)

374

In [542]:
other_words_unique_new = list(other_words_unique.difference(term_category_dict.keys()))
other_words_unique_new[:10]

['ὠφέλιμος',
 'δύσκολος',
 'πονηρός',
 'ἀγαπάω',
 'ὑγραίνω',
 'μέλαθρον',
 'αὐχήν',
 'ῥοφέω',
 'παρακελεύομαι',
 'τριηραρχέω']

In [543]:
word_cat_extension = pd.DataFrame({"greek" : other_words_unique_new, "english" : [translator(word) for word in other_words_unique_new], "category" : ["" for el in other_words_unique_new]})

In [544]:
#set_with_dataframe(PIPA_data.add_worksheet("word_cat_extension", 1,1), word_cat_extension)

# Load back the manually coded extension

In [545]:
word_freqs = pickle.load(open("../data/word_freqs_all.pickle", "rb"))
word_freqs_dict = dict(word_freqs)

In [546]:
word_freqs_dict["ἀγαθός"]

8832

In [547]:
word_cat_extension_coded = get_as_dataframe(PIPA_data.worksheet("word_cat_extension"))[["greek", "english", "category"]]
word_cat_extension_coded = word_cat_extension_coded[word_cat_extension_coded["category"].notnull()]
word_cat_extension_coded.head(5)

Unnamed: 0,greek,english,category
0,ὠφέλιμος,"helping, useful, serviceable, profitable, adva...",opossite
1,δύσκολος,hard to satisfy with food;,dietetics
2,πονηρός,"toilsome, painful, grievous",pain
3,ἀγαπάω,"to treat with affection, to caress, love, be f...",opossite
4,ὑγραίνω,"to wet, moisten",dietetics


In [548]:
len(word_cat_extension_coded)

259

In [549]:
word_cat_extension_coded

Unnamed: 0,greek,english,category
0,ὠφέλιμος,"helping, useful, serviceable, profitable, adva...",opossite
1,δύσκολος,hard to satisfy with food;,dietetics
2,πονηρός,"toilsome, painful, grievous",pain
3,ἀγαπάω,"to treat with affection, to caress, love, be f...",opossite
4,ὑγραίνω,"to wet, moisten",dietetics
5,μέλαθρον,the ceiling of a room,other
6,αὐχήν,"the neck, throat",bodily parts
7,ῥοφέω,"to sup greedily up, gulp down",dietetics
8,παρακελεύομαι,to order,other
9,τριηραρχέω,to be a,other


In [550]:
terms_translation_categories = pd.concat([terms_translation_categories, word_cat_extension_coded])

In [551]:
terms_translation_categories = terms_translation_categories[~terms_translation_categories["category"].str.contains("other")]

In [552]:
terms_translation_categories["count"] = terms_translation_categories["greek"].apply(lambda x: word_freqs_dict[x])
terms_translation_categories.sort_values("count", ascending=False, inplace=True)
terms_translation_categories.head(5)

Unnamed: 0,greek,english,category,count
249,ἀγαθός,good,moral,8832
6,κακός,bad,moral,4780
209,σῶμα,body,bodily parts,4651
32,φίλος,"loved, beloved, dear",moral,3534
248,ἀδικέω,to do wrong,moral,2134


In [553]:
len(terms_translation_categories)

305

In [554]:
terms_translation_categories.drop_duplicates(subset="greek", inplace=True)
len(terms_translation_categories)

305

In [556]:
pickle.dump(terms_translation_categories, open("../data/terms_translation_categories.pickle", "wb"))