In [1]:
#load packages
import gensim
from gensim.models import KeyedVectors
from gensim.models import Word2Vec
from gensim.models import FastText
from gensim.models import Doc2Vec
from gensim.models.doc2vec import TaggedDocument
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
import random
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

#load the word2vec model
model = Word2Vec.load('codenames.model')

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


### Get the wordlist from the model's vocab

In [2]:
wordlist = list(model.wv.key_to_index.keys())

### Generate a board, along with blue team words, red team words, and a bomb

In [3]:
# Generate a random list of 25 unique words
words = random.sample(wordlist, 25)

# Split the words into bluelist, redlist, and assassin
bluelist = words[:9]
redlist = words[9:17]
assassin = words[24:]
board = words
#print the board and the like
print("Board:", words)
print("Blue Team:", bluelist)
print("Red Team:", redlist)
print("Assassin:", assassin)

Board: ['therein', 'pallid', 'marbled', 'successful', 'descriptive', 'proportion', 'capstan', 'knowledge', 'overshoot', 'punitive', 'barter', 'nice', 'pestiferous', 'cove', 'illegal', 'simulate', 'unpleasant', 'membranaceous', 'husbandry', 'socialist', 'lorikeet', 'scarab', 'asperse', 'mankind', 'llama']
Blue Team: ['therein', 'pallid', 'marbled', 'successful', 'descriptive', 'proportion', 'capstan', 'knowledge', 'overshoot']
Red Team: ['punitive', 'barter', 'nice', 'pestiferous', 'cove', 'illegal', 'simulate', 'unpleasant']
Assassin: ['llama']


### Make a 'Mother Board' where each word is in a list with its word vector and a penalty term based on it's role in the game

In [4]:
#create a new list, called board_max which will be a list of a list of every word, it's word vector, and a penalty term. If the word is in the bluelist, the penalty term will be -1, if it's in the redlist, the penalty term will be -5, and if it's the assassin, the penalty term will be -100. If it is not in any of those lists then the penalty term will be -2.5
board_max = []
for word in board:
    if word in bluelist:
        penalty = -1
    elif word in redlist:
        penalty = -5
    elif word in assassin:
        penalty = -100
    else:
        penalty = -2.5
    board_max.append([word, model.wv[word], penalty])


### Creating a Method

Here we create a simple method to get a group of words, find a clue for those words, and generate a score using the penalty terms in our board list. This will be the basis of our main function later on

In [5]:
#start with every word in bluelist and find the average word vector for those words

blue_avg = np.zeros(100)
for word in bluelist:
    blue_avg += model.wv[word]
blue_avg = blue_avg/3

#find the most similar word to the blue_avg vector
clue = model.wv.similar_by_vector(blue_avg, topn=1)[0][0]

#now iterate through every word in the board_max list, find the similarity between each word and the blue_sim word and multiply that by the penalty term. Hold the sum in a value 
score = 0
for word in board_max:
    score += model.wv.similarity(clue, word[0]) * word[2]
#add score to a list of scores that hold the clue, the score, and the words that generated the average vector to get the clue
scores = []
scores.append([clue, score, bluelist])

### Put it all together in a function

Here we add everything together to make a cohesive function. We also add a piece that gets every combination of i words so that we can test EVERY possibility. This is essentially performing a grid search, and since it isn't a very large space to search over, we can simply find the best possible clue to give for the best combination. 

In [72]:
from itertools import combinations

def QLClueMaster(board, bluelist, redlist, assassin, model):
    #create the board master list with wordvectors and penalties
    board_max = []
    for word in board:
        if word in bluelist:
            penalty = -0.1
        elif word in redlist:
            penalty = -50
        elif word in assassin:
            penalty = -100
        else:
            penalty = -5
    board_max.append([word, model.wv[word], penalty])
    #set combo list and scores list to empty
    combos = []
    scores = []
    
    #if else: if the red team only has one word left, then try to generate a clue to cover all our remaining words as last effort
    if(len(redlist) == 1):
         #loop from combos of 1 to the length of your wordlist
        for i in range(len(bluelist)-2,len(bluelist)-1):
            #get all possible combos for i words in the bluelist
            combos = list(combinations(bluelist, i))
            #for each combo, get the average vector and find the most similar word to that vector
            for combo in combos:
                blue_avg = np.zeros(100)
                #get the average vector for the combo
                for word in combo:
                    blue_avg += model.wv[word]
                blue_avg = blue_avg/len(combo)
                #get the top i + 1 most similar words to the average vector (clues)
                clues = model.wv.similar_by_vector(blue_avg, topn=(i+1))
                #try the first clue, if it's in the combo, try the next one, and so on until you find a clue that isn't in the combo
                clue = clues[0][0]
                j = 1
                while clue in combo:
                    clue = clues[j][0]
                    j += 1
                clue = clues[j-1][0]
                #get the score for the clue
                score = 0
                for word in board_max:
                    score += model.wv.similarity(clue, word[0]) * word[2]
                #adjust the score by the length of the combo
                score = score / len(combo)
                #add the clue, score, and combo to the scores list
                scores.append([clue, score, combo])
        
    
    else:
        #loop from combos of 1 to the length of your wordlist
        for i in range(3,len(bluelist)-2):
            #get all possible combos for i words in the bluelist
            combos = list(combinations(bluelist, i))
            #for each combo, get the average vector and find the most similar word to that vector
            for combo in combos:
                blue_avg = np.zeros(100)
                #get the average vector for the combo
                for word in combo:
                    blue_avg += model.wv[word]
                blue_avg = blue_avg/len(combo)
                #get the top i + 1 most similar words to the average vector (clues)
                clues = model.wv.similar_by_vector(blue_avg, topn=(i+1))
                #try the first clue, if it's in the combo, try the next one, and so on until you find a clue that isn't in the combo
                clue = clues[0][0]
                j = 1
                while clue in combo:
                    clue = clues[j][0]
                    j += 1
                clue = clues[j-1][0]
                #get the score for the clue
                score = 0
                for word in board_max:
                    score += model.wv.similarity(clue, word[0]) * word[2]
                #adjust the score by the length of the combo
                score = score / len(combo)
                #add the clue, score, and combo to the scores list
                scores.append([clue, score, combo])
   
    #sort the scores list by the score
    scores = sorted(scores, key=lambda x: x[1], reverse=True)
  
    #return the top score, word list, and clue
    return scores[0]

### Test out the function! 

In [73]:
# Take words only from model vocab
wordlist_new = []
with open('wordlist-eng.txt') as f:
    for line in f:
        # all lowercase word
        line = line.lower().strip()
        # only add word if word is in the model vocab
        if line in model.wv.key_to_index:
            wordlist_new.append(line)

# Generate a random list of 25 unique words
board = random.sample(wordlist_new, 25)

# Split the words into bluelist, redlist, and assassin
bluelist = board[:9]
redlist = board[9:10]
assassin = board[24:]
#print the board and the like
print("Board:", board)
print("Blue Team:", bluelist)
print("Red Team:", redlist)
print("Assassin:", assassin)
print(' ')

#run the QLClueMaster function
clue = QLClueMaster(board, bluelist, redlist, assassin, model)
#print the clue, score, and word list
print("Clue:", clue[0])
print("Score:", clue[1])
print("Word List:", clue[2])

Board: ['wind', 'oil', 'mint', 'string', 'snow', 'stream', 'club', 'boot', 'vacuum', 'orange', 'torch', 'rose', 'wave', 'giant', 'paper', 'button', 'spy', 'scorpion', 'rock', 'octopus', 'bow', 'stock', 'cell', 'fair', 'doctor']
Blue Team: ['wind', 'oil', 'mint', 'string', 'snow', 'stream', 'club', 'boot', 'vacuum']
Red Team: ['orange']
Assassin: ['doctor']
 
Clue: sand
Score: -3.303245348589761
Word List: ('oil', 'mint', 'string', 'snow', 'stream', 'club', 'vacuum')
