In [158]:
import numpy as np
from scipy import spatial
import random
import itertools
from enum import Enum

In [191]:
vocabulary = []

with open('codenamesvocab.txt', 'r') as f:
    vocabulary = [word.lower() for word in f.read().splitlines()]

vocabulary.remove('loch ness')
vocabulary.remove('ice cream')
vocabulary.remove('new york')
vocabulary.remove('scuba diver')

In [159]:
class Color(Enum):
    RED = 1
    BLUE = 2
    GREY = 3
    BLACK = 4

In [None]:
class Game:
    def __init__(self, )

In [49]:
class Card:
    def __init__(self, word, color, flipped = False):
        self.word = word
        self.color = color
        self.flipped = flipped

In [310]:
class Board:
    def __init__(self, cards, threshold=.4):
        self.length = len(cards)
        self.threshold = threshold
        self.clueList = set()
        self.words = cards
        self.distances  = {}
        for card in self.words:
            distToWords = {word : dist \
                           for word, dist in [(word, distBtwnWords(card.word, word)) for word in glove.keys()]}
            self.clueList.update([word for word, dist in distToWords.items() if dist < .4])
            self.distances[card.word] = distToWords
        for card in self.words:
            self.clueList.remove(card.word)
        
    def __str__(self):
        res = ""
        for i in range(self.length):
            for j in range(self.length):
                res += self.words[i*self.length + j].word + " "*(12 - len(self.words[i*self.length + j].word))
            res += "\n"
        return res
    
    def getColor(self, color):
        return [card.word for card in self.words if card.color == color]
    
    def findClosest(self, wordList):
        best = min(self.clueList, key = lambda clue : (lambda k : sum(k)/len(k))([self.distances[word][clue] for word in wordList]))
        return (lambda k : sum(k)/len(k))([self.distances[word][best] for word in wordList]), best
    
    def findNClosest(self, wordList, n):
        best = sorted([((lambda k : sum(k)/len(k))([self.distances[word][clue] for word in wordList]), clue) for clue in self.clueList])
        return best[:n]
    
    def findBest(self, subsetSize):
        subsets = getSubsets([card.word for card in self.words], subsetSize)
        curBestWord = 'N/A'
        curBestScore = 100
        for subset in subsets:
#             print(subset)
            score, word = self.findClosest(subset)
            if score < curBestScore:
                curBestWord = word
                curBestScore = score
                if curBestScore < .4:
                    print(subset, curBestWord, curBestScore)
        return curBestScore, best
    
    def findNBest(self, subsetSize, n):
        subsets = getSubsets([card.word for card in self.words], subsetSize)
        bestWordsList = []
        for subset in subsets:
#             print(subset)
            words = [(score, word, subset) for score, word in self.findNClosest(subset, n)]
            bestWordsList.extend(words)
        return sorted(bestWordsList)[:n]

In [313]:
glove = {}
with open('glove_embeddings/glove.6B.50d.txt','r') as f:
    for line in f.read().splitlines():
        items = line.split(' ')
        glove[items[0]] = np.array([float(x) for x in items[1:]])
badwords = ['put', 'instead','when','then','while','from','on','the','along','where','into','of','be','to','and','a','in','that','have','i','it','for','not','with','he','as','you','do','at','this','but']
for wd in badwords:
    glove.pop(wd)

In [12]:
def distBtwnWords(word1, word2):
    return spatial.distance.cosine(glove[word1], glove[word2])
def wordDiff(word1, word2):
    return glove[word1] - glove[word2]

In [32]:
print(glove['male'])

[-0.23046    0.65937   -0.28411   -0.44366    1.5922     1.8564
 -0.0054708 -0.58679   -0.1506    -0.021166   1.1029    -0.79502
  1.1899     0.53535    0.25256   -0.15882   -0.31825    0.53609
 -0.5944    -0.21288   -0.94989    0.91619    0.4879     0.77063
 -0.16215   -1.0515    -0.70571   -0.79814   -0.79354   -0.086372
  2.2497     0.68785   -0.085613  -0.68004    0.62212   -0.02536
  0.10967   -0.38748   -0.62791   -1.0871    -0.37412   -0.061965
  0.19225    0.89263    0.51763   -1.4791    -0.23219   -1.1589
  0.066075  -0.038772 ]


In [31]:
print(distBtwnWords('dog','cat'), distBtwnWords('dog','bird'), distBtwnWords('cat','bird'))

0.07819947262307503 0.3308377727652998 0.30423994470115245


In [34]:
print(glove['dog'],"\n",glove['cat'],"\n",glove['bird'])

[ 0.11008   -0.38781   -0.57615   -0.27714    0.70521    0.53994
 -1.0786    -0.40146    1.1504    -0.5678     0.0038977  0.52878
  0.64561    0.47262    0.48549   -0.18407    0.1801     0.91397
 -1.1979    -0.5778    -0.37985    0.33606    0.772      0.75555
  0.45506   -1.7671    -1.0503     0.42566    0.41893   -0.68327
  1.5673     0.27685   -0.61708    0.64638   -0.076996   0.37118
  0.1308    -0.45137    0.25398   -0.74392   -0.086199   0.24068
 -0.64819    0.83549    1.2502    -0.51379    0.04224   -0.88118
  0.7158     0.38519  ] 
 [ 0.45281  -0.50108  -0.53714  -0.015697  0.22191   0.54602  -0.67301
 -0.6891    0.63493  -0.19726   0.33685   0.7735    0.90094   0.38488
  0.38367   0.2657   -0.08057   0.61089  -1.2894   -0.22313  -0.61578
  0.21697   0.35614   0.44499   0.60885  -1.1633   -1.1579    0.36118
  0.10466  -0.78325   1.4352    0.18629  -0.26112   0.83275  -0.23123
  0.32481   0.14485  -0.44552   0.33497  -0.95946  -0.097479  0.48138
 -0.43352   0.69455   0.91043  -0.

In [43]:
print(distBtwnWords('male','king'))

0.7283124287728622


In [28]:
print(spatial.distance.cosine(wordDiff('male', 'king'), wordDiff('male','actor')))

0.4519707881531245


In [25]:
for key in list(glove.keys())[:20]:
    print(key, spatial.distance.cosine(wordDiff(key, 'king'), wordDiff(key,'prince')))

the 0.18575387776885832
, 0.2074276278753181
. 0.2085963564891905
of 0.20482632906730824
to 0.19189685258843636
and 0.20081407836221077
in 0.18892307203990266
a 0.15465014174261316
" 0.1410967561280949
's 0.21773113418434242
for 0.1703003554588941
- 0.11110574776606674
that 0.1593307854630318
on 0.16953377889238586
is 0.1701913687060963
was 0.22289563853031347
said 0.11730708504128629
with 0.18009814353505393
he 0.18909176633982105
as 0.23620560566113724


In [47]:
min(glove.keys(), key = lambda word: 1 if word == 'strawberry' else distBtwnWords(word, 'strawberry'))

'shortcake'

In [137]:
max(vocabulary, key=lambda word : len(word))

'MILLIONAIRE'

In [193]:
brd = Board(vocabulary)

In [194]:
print(brd)

back        pass        seal        soldier     temple      
mount       horn        jupiter     suit        washington  
cold        chest       spell       watch       duck        
casino      pistol      marble      cell        ivory       
pole        pin         opera       telescope   copper      



In [163]:
brd.getColor(Color.BLACK)

['UNICORN']

In [96]:
counter = 0
for word in vocabulary:
    if word.lower() not in glove.keys():
        counter += 1
        print(word)
print(counter)

ICE CREAM
LOCH NESS
NEW YORK
SCUBA DIVER
4


In [106]:
'newyork' in glove.keys()

True

In [314]:
testBrd = Board([Card('pit', Color.BLUE),
                 Card('skyscraper', Color.BLUE),
                 Card('theater', Color.BLUE),
                 Card('organ', Color.BLUE),
                 Card('litter', Color.BLUE),
                 Card('day', Color.BLUE),
                 Card('palm', Color.BLUE),
                 Card('plate', Color.BLUE),
                 Card('ambulance', Color.BLUE)])

In [315]:
for size in range(1,6):
    print(size)
    print(*testBrd.findNBest(size, 10), sep="\n")

1
(0.0627233538509605, 'days', ('day',))
(0.07471216001000736, 'theatre', ('theater',))
(0.11197830730993752, 'week', ('day',))
(0.11712252690198821, 'next', ('day',))
(0.12232884011841294, 'weekend', ('day',))
(0.12532693762749159, 'night', ('day',))
(0.12533595022103805, 'coming', ('day',))
(0.12551679467876853, 'morning', ('day',))
(0.12627023687287675, 'came', ('day',))
(0.13016882271948826, 'here', ('day',))
2
(0.2947794461285403, 'bottom', ('pit', 'plate'))
(0.2966389379687788, 'night', ('theater', 'day'))
(0.2987758511679191, 'dirt', ('pit', 'litter'))
(0.3114046274131093, 'garbage', ('pit', 'litter'))
(0.3126123032423235, 'dirt', ('pit', 'plate'))
(0.31504086320738584, 'days', ('pit', 'day'))
(0.3188765804318552, 'starts', ('pit', 'day'))
(0.320102246718943, 'place', ('pit', 'day'))
(0.3205288448258398, 'coming', ('pit', 'day'))
(0.32171904945371416, 'place', ('day', 'plate'))
3
(0.3400826061460636, 'dirt', ('pit', 'litter', 'plate'))
(0.36001987622631476, 'bottom', ('pit', 'da

In [293]:
testBrd.findNClosest(['china', 'staff', 'state', 'agent', 'train'], 10)

[(0.3928719186772989, 'for'),
 (0.3939521205891623, 'also'),
 (0.40378657927184936, 'officials'),
 (0.40398519704183117, 'working'),
 (0.4076677698117268, 'to'),
 (0.4097968244093371, 'as'),
 (0.41297207790100154, '.'),
 (0.41809930259583317, 'service'),
 (0.4189091632643077, 'now'),
 (0.42062552996920194, 'well')]

In [298]:
for word in ['disease', 'worm']:
    print(distBtwnWords(word, 'parasite'))

0.3777119249059333
0.3096891779126616


In [272]:
print(distBtwnWords('train', 'agent'))

0.768875241002086


In [217]:
for subset in getSubsets(['cliff', 'diamond', 'china', 'bottle', 'staff', 'cloak', 'state', 'agent', 'train'],3):
    print(subset)

('cliff', 'diamond', 'china')
('cliff', 'diamond', 'bottle')
('cliff', 'diamond', 'staff')
('cliff', 'diamond', 'cloak')
('cliff', 'diamond', 'state')
('cliff', 'diamond', 'agent')
('cliff', 'diamond', 'train')
('cliff', 'china', 'bottle')
('cliff', 'china', 'staff')
('cliff', 'china', 'cloak')
('cliff', 'china', 'state')
('cliff', 'china', 'agent')
('cliff', 'china', 'train')
('cliff', 'bottle', 'staff')
('cliff', 'bottle', 'cloak')
('cliff', 'bottle', 'state')
('cliff', 'bottle', 'agent')
('cliff', 'bottle', 'train')
('cliff', 'staff', 'cloak')
('cliff', 'staff', 'state')
('cliff', 'staff', 'agent')
('cliff', 'staff', 'train')
('cliff', 'cloak', 'state')
('cliff', 'cloak', 'agent')
('cliff', 'cloak', 'train')
('cliff', 'state', 'agent')
('cliff', 'state', 'train')
('cliff', 'agent', 'train')
('diamond', 'china', 'bottle')
('diamond', 'china', 'staff')
('diamond', 'china', 'cloak')
('diamond', 'china', 'state')
('diamond', 'china', 'agent')
('diamond', 'china', 'train')
('diamond', 'b

In [179]:
def findClosest(wordList, clueList):
        best = min(clueList, key = lambda clue : max([distBtwnWords(clue, word) for word in wordList]))
        return best, max([distBtwnWords(best, word) for word in wordList])

In [198]:
brd.findClosest(['soldier','pass','temple'],glove.keys())

('another', 0.6028596157985795)

In [154]:
def bestWord(board, color):
    goodWords = board.getColor(color)
    oppColor = Color.BLUE if color == Color.RED else Color.RED
    oppWords = board.getColor(oppColor)
    blackWord = board.getColor(Color.BLACK)
        
    def findClosest(wordList, clueList):
        best = min(clueList, key = lambda clue : sum([distances[word][clue] for word in wordList]))
        return best, max([distBtwnWords(best, word) for word in wordList])
    
    fourWordSubsets = getSubsets(goodWords, 4)
    return min([clue, score for clue, score ])
        

In [213]:
def getSubsets(lst, size):
    return list(itertools.combinations(lst, size))