In [44]:
import numpy as np
import pandas as pd
# from itertools import compress
from scipy import spatial
from sklearn.cluster import DBSCAN
from sklearn.cluster import OPTICS
from sklearn.cluster import KMeans
from sklearn.manifold import TSNE
import random
import pickle

In [8]:
emb_glove = pickle.load(open("glove_6B_300d_lite.p", "rb"))

In [9]:
emb_bert = pickle.load(open("bert_uncased_L-12_H-768_A-12_lite.p", "rb"))

In [10]:
# Load Codenames word list
codenames_df = pd.read_csv("word_list/codenames_word_list.csv") 
codenames = pd.melt(codenames_df, id_vars=['ID', 'Version'], value_vars=['SideA', 'SideB'],
        var_name='Side', value_name='Codename')['Codename'].tolist()
codenames = [i.lower() for i in codenames] # convert to lowercase

# Remove two-word nouns
one_word_idx = [' ' not in i for i in codenames]
codenames = [i for (i, v) in zip(codenames, one_word_idx) if v]

In [11]:
class Game:
    
    def __init__(self, word_list, seed):
        # Initialise random number generator
        self.generator = np.random.RandomState(seed=seed)
        # Set board size (use the 5 x 5 setup)
        size = 5
        self.words = np.array(word_list)
        # Shuffle the wordlist
        shuffle = self.generator.choice(
            len(self.words), size * size, replace=False)
        self.board = self.words[shuffle]
        # Specify the layout for this game
        assignments = self.generator.permutation(size * size)
        self.owner = np.empty(size * size, int)
        self.owner[assignments[0]] = 0  # assassin
        self.owner[assignments[1:10]] = 1  # first player: 9 words
        self.owner[assignments[10:18]] = 2  # second player: 8 words
        self.owner[assignments[18:]] = 3  # bystander: 7 words
        self.assassin_word = self.board[self.owner == 0].tolist()
        self.team1_word = self.board[self.owner == 1].tolist()
        self.team2_word = self.board[self.owner == 2].tolist()
        self.bystander_word = self.board[self.owner == 3].tolist()
        
    def print_words(self):
        print("Team 1: ")
        print(self.team1_word)
        print("Team 2: ")
        print(self.team2_word)
        print("Assassin: ")
        print(self.assassin_word)
        print("Bystanders: ")
        print(self.bystander_word)

In [12]:
game1 = Game(codenames, 53321)

In [13]:
game1.print_words()

Team 1: 
['keg', 'hercules', 'shampoo', 'sloth', 'mole', 'hawaii', 'comb', 'leather', 'egypt']
Team 2: 
['tokyo', 'rug', 'post', 'novel', 'chick', 'safe', 'spider', 'champagne']
Assassin: 
['bulb']
Bystanders: 
['fan', 'oil', 'cook', 'manboobs', 'cowgirl', 'strip', 'queer']


In [102]:
class SpyMaster:
    
    def __init__(self, embeddings, game):
        self.embeddings = embeddings
        self.game = game
        self.answers = game.team1_word
        self.bad = game.team2_word + game.assassin_word
    
    def distance(self, word, reference):
        return spatial.distance.cosine(self.embeddings[word], self.embeddings[reference])

    def closest_words(self, reference):
        return sorted(self.embeddings.keys(), key=lambda w: self.distance(w, reference))

    def goodness(self, word, answers, bad):
        if word in self.answers + self.bad: return -999
        return sum([self.distance(word, b) for b in bad]) - 4.0 * sum([self.distance(word, a) for a in answers])

    def minimax(self, word, answers, bad):
        if word in answers + bad: return -999
        return min([self.distance(word, b) for b in bad]) - max([self.distance(word, a) for a in answers])
    
    def get_clusters(self, words, n_cluster=3, n_components=2, eps=0.2, max_eps=0.1, method="kmeans", seed=1):
        word_vectors = np.array([self.embeddings.get(k) for k in words])
        if method=="kmeans":
            clustering = KMeans(n_clusters=n_cluster, random_state=seed).fit(word_vectors)
        elif method=="tsne-kmeans":
            word_vectors_embedded = TSNE(n_components=n_components).fit_transform(word_vectors)
            clustering = KMeans(n_clusters=n_cluster, random_state=0).fit(word_vectors_embedded)
        elif method=="dbscan":
            clustering = DBSCAN(eps=0.2, min_samples=2, metric="cosine").fit(word_vectors)
        elif method=="optics":
            clustering = OPTICS(min_samples=2, metric="cosine", max_eps=max_eps).fit(word_vectors)
        else:
            raise Exception("Clustering method is unknown.")

        return(clustering.labels_)
    
    def candidates(self, size=5):
        clusters = self.get_clusters(self.answers)
        
        for c in set(clusters):
            this_cluster = np.array(self.answers)[clusters==c].tolist()
            print(this_cluster)
            best = sorted(self.embeddings.keys(), key=lambda w: -1 * self.goodness(w, this_cluster, self.bad))
            res = [(str(i + 1), "{0:.2f}".format(self.minimax(w, this_cluster, self.bad)), w) 
                   for i, w in enumerate(sorted(best[:250], key=lambda w: -1 * self.minimax(w, this_cluster, self.bad))[:size])]
            print([(". ".join([c[0], c[2]]) + " (" + c[1] + ")") for c in res])
    

In [103]:
spymaster1 = SpyMaster(embeddings = emb_glove, game = game1)

In [104]:
spymaster1.candidates()

['keg', 'shampoo', 'sloth', 'mole', 'comb']
['1. dat (0.02)', '2. sys (0.02)', '3. snort (0.02)', '4. turd (0.02)', '5. freckles (0.01)']
['hercules', 'hawaii', 'egypt']
['1. mauritius (0.08)', '2. malta (0.07)', '3. dispatched (0.02)', '4. oman (0.01)', '5. fleet (0.01)']
['leather']
['1. boots (0.40)', '2. jackets (0.38)', '3. shoes (0.38)', '4. footwear (0.35)', '5. accessories (0.34)']


In [105]:
spymaster2 = SpyMaster(embeddings = emb_bert, game = game1)

In [106]:
spymaster2.candidates()

['shampoo', 'sloth']
['1. malpractice (0.05)', '2. shakira (0.05)', '3. eminem (0.03)', '4. vacancies (0.02)', '5. cougar (0.02)']
['hercules', 'mole', 'hawaii', 'comb', 'leather', 'egypt']
['1. fbi (-0.04)', '2. mlb (-0.04)', '3. hull (-0.05)', '4. mali (-0.05)', '5. papua (-0.05)']
['keg']
['1. ketchup (0.09)', '2. blowjob (0.07)', '3. poop (0.07)', '4. doggy (0.07)', '5. bong (0.07)']
