In [1]:
import pickle
import pandas as pd
import requests
import sddk

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.patches as patches
import seaborn as sns

from sklearn.manifold import TSNE

import gspread
from gspread_dataframe import get_as_dataframe, set_with_dataframe
from google.oauth2 import service_account # based on google-auth library

In [2]:
# for exporting data to googlesheets/sciencedata
# (feel free to skip)

s = sddk.cloudSession("sciencedata.dk")
# establish connection with gogglesheets...
file_data = s.read_file("https://sciencedata.dk/files/ServiceAccountsKey.json", "dict") # or load it from a local storage: json.load(open("../../ServiceAccountsKey.json", "r"))
credentials = service_account.Credentials.from_service_account_info(file_data)
gc = gspread.Client(auth=credentials.with_scopes(['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']))
paul_results = gc.open_by_url("https://docs.google.com/spreadsheets/d/1h4M-gK9TPIfeTV528tUuPBfZF1wtcNCA10yIlJYqGTE/edit?usp=sharing")

endpoint variable has been configured to: https://sciencedata.dk/files/


In [3]:
data_1to3 = pickle.load(open("../data/large_files/embeddings_sents+bgs+tgs_ppmi2_1to3.pkl", "rb"))
data_4to5 = pickle.load(open("../data/large_files/embeddings_sents+bgs+tgs_ppmi2_4to5.pkl", "rb"))

In [16]:
shared_vocab = list(set(data_1to3[1]) & set(data_4to5[1]))
len(shared_vocab)

1465

In [17]:
data_1to3[2][shared_vocab]

Unnamed: 0,ἐλπίς,ἐσθίω,προτρέπω,φυτόν,προερέω,ἀγαπητός,ἀνατίθημι,κρατέω,βουλεύω,τέχνη,...,φλόξ,ἀκρίβεια,μιμνήσκω,καταφρονέω,ἀναβαίνω,δεσμός,ἐπιδημία,θερμός,ἐπιγράφω,ἔπαινος
εἰμί,0.724096,0.722578,0.642129,0.679684,0.705818,0.711251,0.627316,0.699888,0.664706,0.740966,...,0.625503,0.661255,0.678387,0.686799,0.727880,0.711029,0.715374,0.708014,0.648341,0.692845
λέγω,0.683319,0.756927,0.656932,0.630005,0.718522,0.667719,0.662246,0.679838,0.684603,0.685099,...,0.682429,0.625866,0.714699,0.691557,0.730466,0.696227,0.709576,0.656043,0.674540,0.689697
θεός,0.732868,0.716271,0.692056,0.655909,0.716045,0.715590,0.744533,0.711443,0.638489,0.719664,...,0.671303,0.658548,0.676072,0.729940,0.743959,0.694263,0.718369,0.577688,0.674676,0.687963
οὗτος,0.706066,0.713396,0.686895,0.650747,0.716244,0.702182,0.667955,0.700580,0.670929,0.733474,...,0.607472,0.680082,0.707328,0.722704,0.719990,0.686756,0.709347,0.651590,0.691422,0.710812
γίγνομαι,0.711481,0.674150,0.614175,0.667996,0.722014,0.697723,0.619489,0.672948,0.632626,0.730903,...,0.617676,0.594015,0.690836,0.679832,0.703877,0.710307,0.751184,0.661793,0.622562,0.657846
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ἀπέραντος,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
γήινος,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.643294,0.000000,0.000000,0.000000,0.000000,0.000000
φορά,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.705795,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.711621,0.000000,0.000000
ἔρομαι,0.653862,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.681271,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [19]:
data_4to5[2][shared_vocab]

Unnamed: 0,ἐλπίς,ἐσθίω,προτρέπω,φυτόν,προερέω,ἀγαπητός,ἀνατίθημι,κρατέω,βουλεύω,τέχνη,...,φλόξ,ἀκρίβεια,μιμνήσκω,καταφρονέω,ἀναβαίνω,δεσμός,ἐπιδημία,θερμός,ἐπιγράφω,ἔπαινος
εἰμί,0.733720,0.680737,0.670634,0.676192,0.723300,0.672812,0.683606,0.722218,0.709864,0.725541,...,0.666923,0.685100,0.679439,0.700415,0.698978,0.694679,0.656091,0.694413,0.676865,0.656616
οὗτος,0.727761,0.683607,0.675776,0.661083,0.711128,0.728196,0.715339,0.736422,0.706748,0.717521,...,0.667248,0.687442,0.703458,0.692126,0.706093,0.698076,0.702949,0.671268,0.715687,0.680425
λέγω,0.701195,0.734472,0.643983,0.644304,0.685676,0.687660,0.658059,0.701303,0.679500,0.695765,...,0.676752,0.675763,0.689268,0.670887,0.740743,0.691079,0.681710,0.667343,0.669380,0.676499
θεός,0.755811,0.683810,0.671280,0.668665,0.625387,0.743244,0.748740,0.709014,0.676053,0.669955,...,0.642390,0.666161,0.695249,0.705444,0.717437,0.707403,0.684604,0.648106,0.723269,0.669339
αὐτός,0.721367,0.690543,0.670925,0.669057,0.692216,0.670427,0.715140,0.735333,0.713530,0.676689,...,0.664248,0.650763,0.680516,0.689127,0.684251,0.727909,0.644803,0.666916,0.692438,0.663996
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ὦμος,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.740477,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Ἀράβιος,0.000000,0.000000,0.000000,0.000000,0.670838,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
προπάτωρ,0.674919,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.656832,...,0.000000,0.000000,0.000000,0.680243,0.667834,0.000000,0.000000,0.000000,0.725271,0.000000
κορυφή,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
