In [18]:
with open('words/game_wordpool.txt') as f:
    word_pool = f.read().splitlines()

import random

board_words = random.sample(word_pool, k=25)

In [19]:
board_words

['GLOVE',
 'CHOCOLATE',
 'STICK',
 'POINT',
 'PRESS',
 'THUMB',
 'LAB',
 'SWING',
 'LIMOUSINE',
 'FAIR',
 'DINOSAUR',
 'SHOT',
 'LEAD',
 'CANADA',
 'WALL',
 'MUG',
 'BILL',
 'FISH',
 'CRANE',
 'HOOK',
 'HORSESHOE',
 'PILOT',
 'GIANT',
 'HOOD',
 'BACK']

In [20]:
board_words = list(map(lambda x : x.lower(), board_words))

In [21]:
print(board_words[0:9])
print(board_words[9:17])
print(board_words[17:24])
print(board_words[24:])

['glove', 'chocolate', 'stick', 'point', 'press', 'thumb', 'lab', 'swing', 'limousine']
['fair', 'dinosaur', 'shot', 'lead', 'canada', 'wall', 'mug', 'bill']
['fish', 'crane', 'hook', 'horseshoe', 'pilot', 'giant', 'hood']
['back']


In [2]:
import gensim.downloader as gensim
fasttext = gensim.load("fasttext-wiki-news-subwords-300")

In [22]:
fasttext.most_similar(positive=board_words[0:9], negative=board_words[9:17])

[('forefinger', 0.4251425266265869),
 ('finger', 0.40358465909957886),
 ('fingers', 0.3948974013328552),
 ('joystick', 0.3865697979927063),
 ('thumbwheel', 0.37440046668052673),
 ('two-finger', 0.3653956353664398),
 ('thumbstick', 0.36416009068489075),
 ('fingerstick', 0.3620564341545105),
 ('finger-stick', 0.36104273796081543),
 ('knuckle', 0.3593359887599945)]

In [23]:
clue = "finger"

In [24]:
fasttext.most_similar_to_given(clue, board_words)

'thumb'

In [27]:
fasttext.most_similar_to_given(clue, board_words)

'glove'

In [29]:
board_words.remove("glove")
fasttext.most_similar_to_given(clue, board_words)

'stick'

In [31]:
board_words.remove("stick")
fasttext.most_similar_to_given(clue, board_words)

'back'

In [32]:
board_words.remove("point")
fasttext.most_similar_to_given(clue, board_words)

ValueError: list.remove(x): x not in list

In [None]:
board_words.remove("back")
fasttext.most_similar_to_given(clue, board_words)

In [10]:
import io

file = io.open('models/fasttext-wiki-news-300d-1M.vec', 'r', encoding='utf-8', newline='\n', errors='ignore')
n, d = map(int, file.readline().split())
data = {}
for line in file:
    tokens = line.rstrip().split(' ')
    data[tokens[0]] = list(map(float, tokens[1:]))

In [17]:
board_words_embeddings = list(map(lambda x : data.get(x), board_words))

In [19]:
len(board_words_embeddings)

25

In [30]:
from pymilvus import (
    connections,
    utility,
    FieldSchema,
    CollectionSchema,
    DataType,
    Collection,
)
connections.connect("default", host="localhost", port="19530")

In [69]:
utility.drop_collection("fasttext_board_embeddings")

In [70]:
fields = [
    FieldSchema(
        name="id",
        dtype=DataType.INT64,
        is_primary=True,
        auto_id=False),
    FieldSchema(
        name="word",
        dtype=DataType.VARCHAR,
        max_length=32,
    ),
    FieldSchema(
        name="embeddings",
        dtype=DataType.FLOAT_VECTOR,
        dim=300,
    )
]
schema = CollectionSchema(fields, "Embeddings of the Codenames word pool")
board_db = Collection("fasttext_board_embeddings", schema)    

In [71]:
entries = [
    [i for i in range(len(board_words))],
    board_words,
    board_words_embeddings,
]

In [72]:
board_db.insert(entries)
board_db.flush()  

In [73]:
board_db.num_entities

25

In [74]:
index = {
    "index_type": "IVF_FLAT",
    "metric_type": "COSINE",
    "params": {"nlist": 128},
}
board_db.create_index("embeddings", index)
board_db.load()

In [78]:
clue = "COPY"

clue_embedding = data.get(clue)

In [79]:
vectors_to_search = [clue_embedding]
search_params = {
    "metric_type": "COSINE",
    "params": {"nprobe": 10},
}
result = board_db.search(vectors_to_search, "embeddings", search_params, limit=5, output_fields=["word"])

In [80]:
for hits in result:
    print("====")
    for hit in hits:
        print (hit.entity)

====
id: 1, distance: 0.2861783504486084, entity: {'word': 'TIME'}
id: 4, distance: 0.26706433296203613, entity: {'word': 'STRING'}
id: 10, distance: 0.26237812638282776, entity: {'word': 'GOLD'}
id: 9, distance: 0.24220968782901764, entity: {'word': 'GAS'}
id: 5, distance: 0.23788677155971527, entity: {'word': 'MISSILE'}
