This file has code for help getting started creating a function which generate clues considering the entire board (opponents' words + assassin words + "bystander" words)

In [3]:
# Import packages
import pandas as pd
import numpy as np
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string
import gensim
import re
from gensim.models import Word2Vec
from gensim.models import KeyedVectors
from gensim.models import Word2Vec
from gensim.models import FastText
from gensim.models import Doc2Vec
from gensim.models.doc2vec import TaggedDocument
import nltk
import random
#nltk.download('punkt')

from sklearn.preprocessing import normalize
from sklearn.cluster import KMeans


In [8]:
#reading in the cleaned dictionary
df = pd.read_csv('cleaned_dict.csv')

#replace commas with spaces
df['definition'] = df['definition'].str.replace(',', ' ')
#remove any non-alphabetic characters
df['definition'] = df['definition'].str.replace('[^a-zA-Z]', '')

#tokenize the definitions
df['definition'] = df['definition'].apply(word_tokenize)
#remove quotes from the words
df['definition'] = df['definition'].apply(lambda x: [word.replace("'", "") for word in x])

#train the word2vec model
model = Word2Vec(df['definition'], min_count=5, window=5, sg=0)

#save the model
model.save('codenames.model')
wordlist = list(model.wv.key_to_index.keys())

Here's a function that generates clues ONLY for our words

In [9]:
# Generate a random list of 25 unique words
words = random.sample(wordlist, 25)

# Split the words into bluelist, redlist, and assassin 
#(might need to randomize which team has the extra word)

bluelist = words[:9]
redlist = words[9:17]
assassin = words[24:]
#print the board and the like
print("Board:", words)
print("Blue Team:", bluelist)
print("Red Team:", redlist)
print("Assassin::", assassin)

Board: ['analytical', 'opacity', 'carbon', 'mocker', 'displacement', 'feebly', 'engraved', 'dissolving', 'representative', 'bend', 'supplant', 'scurvy', 'genitals', 'flatiron', 'wherry', 'merrymaking', 'sphenethmoid', 'calcimine', 'lament', 'callosity', 'decant', 'nectar', 'ounce', 'dashing', 'hamlet']
Blue Team: ['analytical', 'opacity', 'carbon', 'mocker', 'displacement', 'feebly', 'engraved', 'dissolving', 'representative']
Red Team: ['bend', 'supplant', 'scurvy', 'genitals', 'flatiron', 'wherry', 'merrymaking', 'sphenethmoid']
Assassin:: ['hamlet']


In [11]:
#create a new list, called board_max which will be a list of a list of every word, it's word vector, 
#and a penalty term. If the word is in the bluelist, the penalty term will be -1, if it's in the redlist, 
#the penalty term will be -5, and if it's the assassin, the penalty term will be -100. If it is not in any of 
#those lists then the penalty term will be -2.5
board_max = []
for word in words:
    if word in bluelist:
        penalty = -1
    elif word in redlist:
        penalty = -5
    elif word in assassin:
        penalty = -100
    else:
        penalty = -2.5
    board_max.append([word, model.wv[word], penalty])

In [12]:
#start with every word in bluelist and find the average word vector for those words

blue_avg = np.zeros(100)
for word in bluelist:
    blue_avg += model.wv[word]
blue_avg = blue_avg/3

#find the most similar word to the blue_avg vector
clue = model.wv.similar_by_vector(blue_avg, topn=1)[0][0]

#now iterate through every word in the board_max list, 
#find the similarity between each word and the blue_sim word and multiply that by the penalty term. Hold the sum in a value 
score = 0
for word in board_max:
    score += model.wv.similarity(clue, word[0]) * word[2]
#add score to a list of scores that hold the clue, the score, and the words that generated the average vector to get the clue
scores = []
scores.append([clue, score, bluelist])

In [65]:
from itertools import combinations

def QLClueMaster(board, bluelist, redlist, assassin, model):
    #create the board master list with wordvectors and penalties
    board_max = []
    for word in board:
        if word in bluelist:
            penalty = 0.1
        elif word in redlist:
            penalty = -10
        elif word in assassin:
            penalty = -100
        else:
            penalty = -5
    board_max.append([word, model.wv[word], penalty])
    #set combo list and scores list to empty
    combos = []
    scores = []
    #loop from combos of 1 to the length of your wordlist
    startNum = 3
    if(len(bluelist) < 3):
        StartNum = 0
    elif(len(bluelist) > 5):
        StartNum = 5
    else:
        StartNum = 2
    
    for i in range(startNum,len(bluelist)):
        #get all possible combos for i words in the bluelist
        combos = list(combinations(bluelist, i))
        #for each combo, get the average vector and find the most similar word to that vector
        for combo in combos:
            blue_avg = np.zeros(100)
            #get the average vector for the combo
            for word in combo:
                blue_avg += model.wv[word]
            blue_avg = blue_avg/len(combo)
            #get the top i + 1 most similar words to the average vector (clues)
            clues = model.wv.similar_by_vector(blue_avg, topn=(i+1))
            #try the first clue, if it's in the combo, try the next one, and so on until you find a clue that isn't in the combo
            clue = clues[0][0]
            j = 1
            while clue in combo:
                clue = clues[j][0]
                j += 1
            clue = clues[j-1][0]
            #get the score for the clue
            score = 0
            for word in board_max:
                score += model.wv.similarity(clue, word[0]) * word[2]
            #adjust the score by the length of the combo
            score = score / len(combo)
            #add the clue, score, and combo to the scores list
            scores.append([clue, score, combo])

    #sort the scores list by the score
    scores = sorted(scores, key=lambda x: x[1], reverse=True)
    #return the top score, word list, and clue
    return scores[0]

In [66]:
# Generate a random list of 25 unique words
board = random.sample(wordlist, 25)

# Split the words into bluelist, redlist, and assassin
bluelist = board[:9]
redlist = board[9:17]
assassin = board[24:]
#print the board and the like
print("Board:", board)
print("Blue Team:", bluelist)
print("Red Team:", redlist)
print("Assassin:", assassin)
print(' ')

#run the QLClueMaster function
clue = QLClueMaster(board, bluelist, redlist, assassin, model)
#print the clue, score, and word list
print("Clue for Blue Team:", clue[0] + " for", len(clue[2]), "words")
print("Score:", clue[1])
print("Word List:", clue[2])

Board: ['trustee', 'requital', 'late', 'malvaceous', 'doubleness', 'pudgy', 'divan', 'conspire', 'beheld', 'dispersion', 'balm', 'imprint', 'hunger', 'utterly', 'scribe', 'zero', 'freighter', 'priority', 'medicinal', 'columbite', 'sailor', 'voyage', 'muddle', 'bequest', 'hammock']
Blue Team: ['trustee', 'requital', 'late', 'malvaceous', 'doubleness', 'pudgy', 'divan', 'conspire', 'beheld']
Red Team: ['dispersion', 'balm', 'imprint', 'hunger', 'utterly', 'scribe', 'zero', 'freighter']
Assassin: ['hammock']
 
Clue for Blue Team: absent for 8 words
Score: -9.168390184640884
Word List: ('trustee', 'requital', 'late', 'malvaceous', 'doubleness', 'pudgy', 'divan', 'conspire')
