In [1]:
import numpy as np
import pandas as pd
from gensim.models import KeyedVectors
from sklearn.metrics.pairwise import cosine_similarity

# plotting
import matplotlib.pyplot as plt
import seaborn as sns
import os
import json

In [2]:
# to communicate with google spreadsheet...
import gspread
from gspread_dataframe import get_as_dataframe
from gspread_dataframe import set_with_dataframe
from google.oauth2 import service_account # based on google-auth library

# establish connection with gogglesheets...
file_data = json.load(open("../../ServiceAccountsKey.json", "r"))
credentials = service_account.Credentials.from_service_account_info(file_data)
gc = gspread.Client(auth=credentials.with_scopes(['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']))
PIPA_data = gc.open_by_url("https://docs.google.com/spreadsheets/d/1rV4t0_UV_wcx--UAHVwkqB8Wa_5n9mnpV05yGG1OHqk/edit?usp=sharing")

In [3]:
keywords = ['λυπέω',
            'λυπηρός',
            'λύπη',
            'ἄλγος',
            'ἄλγημα',
            'ἀλγέω',
            'ὀδύνη',
            'ὀδυνάω',
            'πονέω',
            'πόνος']

In [1]:
keyed_vectors_full = KeyedVectors.load("../data/keyed_vectors_full.wv")
keyed_vectors_excl_arist = KeyedVectors.load("../data/keyed_vectors_excl_arist.wv")
keyed_vectors_excl_plato = KeyedVectors.load("../data/keyed_vectors_excl_plato.wv")
keyed_vectors_excl_hipp = KeyedVectors.load("../data/keyed_vectors_excl_hipp.wv")
vectors_list = [keyed_vectors_full, keyed_vectors_excl_arist, keyed_vectors_excl_plato, keyed_vectors_excl_hipp]

NameError: name 'KeyedVectors' is not defined

In [24]:
n_words = len(keyed_vectors_full)
complete_sim_matrices = []
for vecs in vectors_list:
    complete_sim_matrix = cosine_similarity(vecs.vectors)
    complete_sim_matrices.append(complete_sim_matrix[:n_words, :n_words])

# Analyzing categories

In [40]:
terms_translation_categories = get_as_dataframe(PIPA_data.worksheet("translation"))[["greek", "english", "category_clean"]]
terms_translation_categories = terms_translation_categories[terms_translation_categories["greek"].notnull()]
terms_translation_categories.head(5)

Unnamed: 0,greek,english,category_clean
0,χαίρω (0.52),rejoice,opossite
1,ἀγανακτέω (0.5),to be displeased,emotion
2,ἥδομαι (0.49),feel pleasure,opossite
3,ἀπολαύω (0.48),enjoy,opossite
4,διάκειμαι (0.46),to be affected,suffering


In [43]:
terms_translation_categories["greek"] = terms_translation_categories["greek"].apply(lambda x: x.rpartition(" (")[0].replace(" ", ""))

In [45]:
terms_translation_categories = terms_translation_categories.drop_duplicates(subset="greek")

In [47]:
terms_translation_categories.groupby("category_clean").size()

category_clean
bodily organs    20
dietetics        27
emotion          15
moral            16
opossite         17
other            13
pain             15
pathology        19
suffering         9
dtype: int64

In [48]:
categories = list(set(terms_translation_categories["category_clean"]))

In [49]:
term_category_dict = dict(zip(terms_translation_categories["greek"], terms_translation_categories["category_clean"]))
term_category_dict

{'χαίρω': 'opossite',
 'ἀγανακτέω': 'emotion',
 'ἥδομαι': 'opossite',
 'ἀπολαύω': 'opossite',
 'διάκειμαι': 'suffering',
 'ἀκόλαστος': 'moral',
 'κακός': 'moral',
 'ἄχθομαι': 'emotion',
 'εὐφραίνω': 'opossite',
 'λυπηρός': 'pain',
 'ἐξαμαρτάνω': 'other',
 'πλησιάζω': 'other',
 'μισέω': 'moral',
 'φοβερός': 'emotion',
 'δυσχεραίνω': 'emotion',
 'λοιδορέω': 'suffering',
 'ὀργίζω': 'suffering',
 'σύνοιδα': 'other',
 'φθονέω': 'moral',
 'ἀνάξιος': 'moral',
 'ἀλγέω': 'pain',
 'ἀνόητος': 'moral',
 'δακρύω': 'emotion',
 'ψαύω': 'other',
 'ὀδυνάω': 'pain',
 'βαρύνω': 'suffering',
 'ἀλγεινός': 'pain',
 'ἄλγημα': 'pain',
 'νείαιρα': 'bodily organs',
 'ἰξύα': 'bodily organs',
 'ὑποχόνδριος': 'bodily organs',
 'λυπέω': 'pain',
 'βάρος': 'pathology',
 'ἧπαρ': 'bodily organs',
 'σπάω': 'pathology',
 'πλευρόν': 'bodily organs',
 'παραφρονέω': 'emotion',
 'πυρεταίνω': 'pathology',
 'ἀλγηδών': 'pain',
 'πόθος': 'opossite',
 'ὀδύρομαι': 'emotion',
 'θυμόω': 'suffering',
 'ἄλγος': 'pain',
 'οἰκτρός': 'em

In [52]:
term_translation_dict = dict(zip(terms_translation_categories["greek"], terms_translation_categories["english"]))
term_translation_dict

KeyError: 'translation'

In [50]:
cat_terms_dict = {}
for cat in categories:
    terms = terms_translation_categories[terms_translation_categories["category_clean"]==cat]["greek"].tolist()
    cat_terms_dict[cat] = terms

In [51]:
cat_terms_dict

{'emotion': ['ἀγανακτέω',
  'ἄχθομαι',
  'φοβερός',
  'δυσχεραίνω',
  'δακρύω',
  'παραφρονέω',
  'ὀδύρομαι',
  'οἰκτρός',
  'φόβος',
  'δύστηνος',
  'δεῖμα',
  'στένω',
  'πῆμα',
  'μέλεος',
  'γόος'],
 'other': ['ἐξαμαρτάνω',
  'πλησιάζω',
  'σύνοιδα',
  'ψαύω',
  'βληχρός',
  'ἤρ',
  'κινδυνεύω',
  'ἐμποδίζω',
  'βίοτος',
  'συζάω',
  'παρουσία',
  'σύμφυτος',
  'ὁμιλία'],
 'pain': ['λυπηρός',
  'ἀλγέω',
  'ὀδυνάω',
  'ἀλγεινός',
  'ἄλγημα',
  'λυπέω',
  'ἀλγηδών',
  'ἄλγος',
  'ὠδίς',
  'πόνος',
  'ἐπίπονος',
  'λύπη',
  'ἄχος',
  'ὀδύνη',
  'ὀδυνώδης'],
 'bodily organs': ['νείαιρα',
  'ἰξύα',
  'ὑποχόνδριος',
  'ἧπαρ',
  'πλευρόν',
  'κλείς',
  'κενεών',
  'βουβών',
  'μετάφρενον',
  'τράχηλος',
  'ὀσφῦς',
  'βλέφαρον',
  'σῶμα',
  'σωματικός',
  'ῥάχις',
  'κνήμη',
  'στῆθος',
  'σφυρόν',
  'φλέβιον',
  'χόνδρος'],
 'pathology': ['βάρος',
  'σπάω',
  'πυρεταίνω',
  'ἕλκος',
  'ἑλκόω',
  'φόνιος',
  'θανάσιμος',
  'βήξ',
  'θέρμη',
  'οἴδημα',
  'στραγγουρία',
  'διάρροια',
  'ὕφα