# Load and query pre-trained model

In order to query a pretrained model, you need to have a trained model in the `./models` folder.

## Import dependencies

In [1]:
import os
try:
    from collections.abc import Mapping
    from gensim.models.word2vec import Word2Vec
except:
#     print("Depencies not found. Make sure you have installed GenSim.")
    !pip install -Iv gensim==3.2.0
    from collections.abc import Mapping
    from gensim.models.word2vec import Word2Vec

## List available models

If you want to train a new model, use the 'build_greek_w2v_model.ipynb' scripts.

In [16]:
print('Available models:\n')
models = os.listdir('./models')
[print(f'{count}. "' + filename + '"\n') for count, filename in enumerate(models) if not(filename.startswith('.'))]

Available models:

0. "papyri&corpus_skipgram_size300_window5_mincount2.model"

1. "dec2017.model"

2. "nov2022.model"

3. "nov2022_includes_papyri.model"

4. "sept2018.model"

5. "papyri&corpus_cbow_size300_window5_mincount2.model"



[None, None, None, None, None, None]

In [50]:
selected_model_number = 5

model_name = models[selected_model_number]

In [51]:
model = Word2Vec.load('./models/' + model_name)
model = model.wv

## Get most-similar hits for input lemma

In [53]:
test_lemmas = ['ἀρχιερεύς', 'ἄγγελος', 'πιλᾶτος', 'πέτρος', 'σῶμα', 'ψυχή', 'νέκρωσις', 'λύγξ', 'λόγος']
print(test_lemmas[-1])
model.most_similar(test_lemmas[-1], topn=10)

λόγος


[('ἑρμηνεία', 0.5288925766944885),
 ('ἐξήγησις', 0.5065293312072754),
 ('θεολογία', 0.4956413805484772),
 ('διδασκαλία', 0.48970848321914673),
 ('ὑπόληψις', 0.47546130418777466),
 ('διήγησις', 0.47397667169570923),
 ('σκέψις', 0.47282832860946655),
 ('θεωρία', 0.47240814566612244),
 ('διαλέγω', 0.4657094478607178),
 ('φυσιολογία', 0.4602492153644562)]

## Compare tokenized sentences

In [54]:
word_set_1 = ['ἀρχιερεύς', 'ἄγγελος']
word_set_2 = ['πιλᾶτος', 'πέτρος']

model.n_similarity(word_set_1, word_set_2)

0.5299723