In [1]:
with open('words/game_wordpool.txt') as f:
    word_pool = f.read().splitlines()

import random

board_words = random.sample(word_pool, k=25)

In [13]:
board_words

['BAR',
 'CHURCH',
 'BUG',
 'DOCTOR',
 'SPRING',
 'POST',
 'TORCH',
 'COMPOUND',
 'GLASS',
 'PALM',
 'CHAIR',
 'TIE',
 'SHARK',
 'BRIDGE',
 'DISEASE',
 'CLIFF',
 'CZECH',
 'SHADOW',
 'OCTOPUS',
 'BAND',
 'TRAIN',
 'DICE',
 'STREAM',
 'STRAW',
 'WORM']

In [14]:
board_words = list(map(lambda x : x.lower(), board_words))

In [15]:
print(board_words[0:9])
print(board_words[9:17])
print(board_words[17:24])
print(board_words[24:])

['bar', 'church', 'bug', 'doctor', 'spring', 'post', 'torch', 'compound', 'glass']
['palm', 'chair', 'tie', 'shark', 'bridge', 'disease', 'cliff', 'czech']
['shadow', 'octopus', 'band', 'train', 'dice', 'stream', 'straw']
['worm']


In [7]:
import gensim.downloader as gensim
word2vec = gensim.load("word2vec-google-news-300")


In [16]:
word2vec.most_similar(positive=board_words[0:9], negative=board_words[9:17])

[('Jayhawker_Towers', 0.32939642667770386),
 ('Camp_Tontozona', 0.3205326497554779),
 ('Ubben_Basketball_Complex', 0.3157985508441925),
 ('firecrackers_BBs', 0.30775976181030273),
 ('6News_Julie_Pursley', 0.3060329854488373),
 ('pizza_parlor', 0.3048863410949707),
 ('malfunctioning_air_conditioner', 0.30258217453956604),
 ('rectory', 0.30048373341560364),
 ('termite_extermination', 0.29974260926246643),
 ('empty_milk_cartons', 0.2994721233844757)]

In [17]:
clue = "rectory"

In [18]:
word2vec.most_similar_to_given(clue, board_words)

'church'

In [21]:
board_words.remove("church")

In [23]:
word2vec.most_similar_to_given(clue, board_words)

'compound'

In [25]:
board_words.remove("compound")

In [26]:
word2vec.most_similar_to_given(clue, board_words)

'bar'

In [27]:
board_words.remove("bar")

In [28]:
word2vec.most_similar_to_given(clue, board_words)

'glass'

In [29]:
board_words.remove("glass")

In [30]:
word2vec.most_similar_to_given(clue, board_words)

'doctor'

In [31]:
board_words.remove("doctor")
word2vec.most_similar_to_given(clue, board_words)

'straw'

In [17]:
board_words_embeddings = list(map(lambda x : data.get(x), board_words))

In [19]:
len(board_words_embeddings)

25

In [30]:
from pymilvus import (
    connections,
    utility,
    FieldSchema,
    CollectionSchema,
    DataType,
    Collection,
)
connections.connect("default", host="localhost", port="19530")

In [69]:
utility.drop_collection("fasttext_board_embeddings")

In [70]:
fields = [
    FieldSchema(
        name="id",
        dtype=DataType.INT64,
        is_primary=True,
        auto_id=False),
    FieldSchema(
        name="word",
        dtype=DataType.VARCHAR,
        max_length=32,
    ),
    FieldSchema(
        name="embeddings",
        dtype=DataType.FLOAT_VECTOR,
        dim=300,
    )
]
schema = CollectionSchema(fields, "Embeddings of the Codenames word pool")
board_db = Collection("fasttext_board_embeddings", schema)    

In [71]:
entries = [
    [i for i in range(len(board_words))],
    board_words,
    board_words_embeddings,
]

In [72]:
board_db.insert(entries)
board_db.flush()  

In [73]:
board_db.num_entities

25

In [74]:
index = {
    "index_type": "IVF_FLAT",
    "metric_type": "COSINE",
    "params": {"nlist": 128},
}
board_db.create_index("embeddings", index)
board_db.load()

In [78]:
clue = "COPY"

clue_embedding = data.get(clue)

In [79]:
vectors_to_search = [clue_embedding]
search_params = {
    "metric_type": "COSINE",
    "params": {"nprobe": 10},
}
result = board_db.search(vectors_to_search, "embeddings", search_params, limit=5, output_fields=["word"])

In [80]:
for hits in result:
    print("====")
    for hit in hits:
        print (hit.entity)

====
id: 1, distance: 0.2861783504486084, entity: {'word': 'TIME'}
id: 4, distance: 0.26706433296203613, entity: {'word': 'STRING'}
id: 10, distance: 0.26237812638282776, entity: {'word': 'GOLD'}
id: 9, distance: 0.24220968782901764, entity: {'word': 'GAS'}
id: 5, distance: 0.23788677155971527, entity: {'word': 'MISSILE'}
