In [12]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras


In [2]:
import nltk
nltk.download('brown')
from nltk.corpus import brown

[nltk_data] Downloading package brown to
[nltk_data]     /Users/michaelspillane/nltk_data...
[nltk_data]   Package brown is already up-to-date!


In [7]:
corpus = (brown.words(categories='news') + brown.words(categories= 'fiction')+
  brown.words(categories = 'romance') + brown.words(categories = 'reviews'))
corpus = list(set(corpus))

In [8]:
corpus2 = np.array([word.lower() for word in corpus if word.isalpha() and (len(word)>5) and len(word)<13])


In [9]:
word_len = np.array([len(word) for word in corpus2])

In [10]:
class HangmanPlayer:
    def __init__(self, word, model, lives=10):
        self.original_word = word
        self.full_word = [ord(i)-97 for i in word]
        self.letters_guessed = set([])
        self.letters_remaining = set(self.full_word)
        self.lives_left = lives
        self.obscured_words_seen = []
        self.letters_previously_guessed = []
        self.guesses = []
        self.correct_responses = []
        self.z = model
        return
    
    def encode_obscured_word(self):
        word = np.array([i if i in self.letters_guessed else 26 for i in self.full_word])
        obscured_word = np.zeros((len(word), 27), dtype=np.float32)
        for i, j in enumerate(word):
            obscured_word[i, j] = 1
        return(obscured_word)
    
    def encode_guess(self, guess):
        encoded_guess = np.zeros(26, dtype=np.float32)
        encoded_guess[guess] = 1
        return(encoded_guess)

    def encode_previous_guesses(self):
        # Create a 1 x 26 vector where 1s indicate that the letter was previously guessed
        guess = np.zeros(26, dtype=np.float32)
        for i in self.letters_guessed:
            guess[i] = 1
        return(guess)
    
    def encode_correct_responses(self):
        # To be used with cross_entropy_with_softmax, this vector must be normalized
        response = np.zeros(26, dtype=np.float32)
        for i in self.letters_remaining:
            response[i] = 1.0
        # response /= response.sum()
        return(response)
    
    def store_guess_and_result(self, guess):
        # Record what the model saw as input: an obscured word and a list of previously-guessed letters
        self.obscured_words_seen.append(self.encode_obscured_word())
        self.letters_previously_guessed.append(self.encode_previous_guesses())
        
        # Record the letter that the model guessed, and add that guess to the list of previous guesses
        self.guesses.append(guess)
        self.letters_guessed.add(guess)
        
        # Store the "correct responses"
        correct_responses = self.encode_correct_responses()
        self.correct_responses.append(correct_responses)
        
        # Determine an appropriate reward, and reduce # of lives left if appropriate
        if guess in self.letters_remaining:
            self.letters_remaining.remove(guess)
        
        if self.correct_responses[-1][guess] < 0.00001:
            self.lives_left -= 1
        return
                
    def run(self):
        # Play a game until we run out of lives or letters
        while (self.lives_left > 0) and (len(self.letters_remaining) > 0):
            guess = np.argmax(np.squeeze(self.z.predict([np.array(self.encode_obscured_word()).reshape((1,self.encode_obscured_word().shape[0],27)),
                                                         np.array(self.encode_previous_guesses()).reshape((1,26))]))-np.squeeze(np.array(self.encode_previous_guesses())))

            self.store_guess_and_result(guess)
        
        # Return the observations for use in training (both inputs, predictions, and losses)
        return(np.array(self.obscured_words_seen),
               np.array(self.letters_previously_guessed),
               np.array(self.correct_responses))
    
    def show_words_seen(self):
        for word in self.obscured_words_seen:
            print(''.join([chr(i + 97) if i != 26 else ' ' for i in word.argmax(axis=1)]))
            
    def show_guesses(self):
        for guess in self.guesses:
            print(chr(guess + 97))

            
    def play_by_play(self):
        print('Hidden word was "{}"'.format(self.original_word))
        for i in range(len(self.guesses)):
            word_seen = ''.join([chr(i + 97) if i != 26 else ' ' for i in self.obscured_words_seen[i].argmax(axis=1)])
            print('Guessed {} after seeing "{}"'.format(chr(self.guesses[i] + 97),
                                                        word_seen))
            
    def evaluate_performance(self):
        # Assumes that the run() method has already been called
        ended_in_success = self.lives_left > 0
        letters_in_word = set([i for i in self.original_word])
        correct_guesses = len(letters_in_word) - len(self.letters_remaining)
        incorrect_guesses = len(self.guesses) - correct_guesses
        return(ended_in_success, correct_guesses, incorrect_guesses, letters_in_word)

In [15]:
N = 128
word_input = keras.Input(shape=(None,27))

letter_input = keras.Input(shape=(26,))

a1 = keras.layers.Conv1D(N, kernel_size=(5),strides=(1),padding='same',activation = 'relu',kernel_initializer='he_normal')(word_input)
a2 = keras.layers.Conv1D(N, kernel_size=(4),strides=(1),padding='same',activation = 'relu',kernel_initializer='he_normal')(word_input)
a3 = keras.layers.Conv1D(N, kernel_size=(3),strides=(1),padding='same',activation = 'relu',kernel_initializer='he_normal')(word_input)
a4 = keras.layers.Conv1D(N, kernel_size=(2),strides=(1),padding='same',activation = 'relu',kernel_initializer='he_normal')(word_input)

d0 = keras.layers.Add()([a1,a2,a3,a4])



d1 = keras.layers.Conv1D(N, kernel_size=(3),strides=(1),padding='same',activation = 'relu',kernel_initializer='he_normal')(d0)
d1 = keras.layers.BatchNormalization()(d1)
d1 = keras.layers.Conv1D(N, kernel_size=(3),strides=(1),padding='same',activation = 'relu',kernel_initializer='he_normal')(d1)
d1 = keras.layers.BatchNormalization()(d1)


d0 = keras.layers.Add()([d0,d1])

d0 = keras.layers.GlobalMaxPooling1D()(d0)
d0 = keras.layers.Dropout(.4)(d0)
d1 = keras.layers.Concatenate()([d0,letter_input])


word_output = keras.layers.Dense(26,activation = 'sigmoid')(d1)

z = keras.Model(inputs = [word_input,letter_input], outputs = word_output)

bce = keras.losses.BinaryCrossentropy()

z.compile(loss=bce,optimizer=keras.optimizers.Adam())

Instructions for updating:
Use tf.cast instead.


In [33]:
NUM_EPOCHS = 3
EPOCH_SIZE = 20

for pp in np.arange(6,13):
  for epoch in range(NUM_EPOCHS):
      print(epoch)
      if epoch ==0:
        
        n=pp
      else:
        pass
      words = corpus2[word_len == n]
      i = np.random.randint(low=0,high = len(words)-1)

      word = words[i]

      other_player = HangmanPlayer(word, z)
      words_seen, previous_letters, correct_responses = other_player.run()


      for _ in range(EPOCH_SIZE):
          i = (i+1)% len(words)
          word = words[i]
          
          other_player = HangmanPlayer(word, z)
          words_seen0, previous_letters0, correct_responses0 = other_player.run()

          words_seen = np.append(words_seen, words_seen0, axis=0)
          previous_letters = np.append(previous_letters, previous_letters0, axis=0)
          correct_responses = np.append(correct_responses, correct_responses0, axis=0)

      _,b = np.unique(words_seen, axis=0,return_index=True)

    
      z.fit([words_seen,previous_letters],correct_responses,batch_size = 32,epochs = 2)
          


0
Epoch 1/2
Epoch 2/2
1
Epoch 1/2
Epoch 2/2
2
Epoch 1/2
Epoch 2/2
0
Epoch 1/2
Epoch 2/2
1
Epoch 1/2
Epoch 2/2
2
Epoch 1/2
Epoch 2/2
0
Epoch 1/2
Epoch 2/2
1
Epoch 1/2
Epoch 2/2
2
Epoch 1/2
Epoch 2/2
0
Epoch 1/2
Epoch 2/2
1
Epoch 1/2
Epoch 2/2
2
Epoch 1/2
Epoch 2/2
0
Epoch 1/2
Epoch 2/2
1
Epoch 1/2
Epoch 2/2
2
Epoch 1/2
Epoch 2/2
0
Epoch 1/2
Epoch 2/2
1
Epoch 1/2
Epoch 2/2
2
Epoch 1/2
Epoch 2/2
0
Epoch 1/2
Epoch 2/2
1
Epoch 1/2
Epoch 2/2
2
Epoch 1/2
Epoch 2/2


In [34]:
my_player = HangmanPlayer('responses', z)
a,b,c = my_player.run()
my_player.play_by_play()
results = my_player.evaluate_performance()
print('The model {} this game'.format('won' if results[0] else 'did not win'))
print('The model made {} correct guesses and {} incorrect guesses'.format(results[1], results[2]))

Hidden word was "responses"
Guessed n after seeing "         "
Guessed e after seeing "     n   "
Guessed r after seeing " e   n e "
Guessed l after seeing "re   n e "
Guessed o after seeing "re   n e "
Guessed t after seeing "re  on e "
Guessed s after seeing "re  on e "
Guessed b after seeing "res onses"
Guessed u after seeing "res onses"
Guessed g after seeing "res onses"
Guessed y after seeing "res onses"
Guessed m after seeing "res onses"
Guessed p after seeing "res onses"
The model won this game
The model made 6 correct guesses and 7 incorrect guesses


In [42]:
def evaluate_model(my_words, my_model):
    results = []
    for word in my_words:
        my_player = HangmanPlayer(word, my_model)
        _ = my_player.run()
        results.append(my_player.evaluate_performance())
    df = pd.DataFrame(results, columns=['won', 'num_correct', 'num_incorrect', 'letters'])
    return(df)

result_df = evaluate_model(corpus2[:50], z)
# print('The model {} this game'.format('won' if results[0] else 'did not win'))
print('The model won {}% of the games'.format((100*result_df['won'].mean()).round()))

The model won 16.0% of the games


In [43]:
z0 = keras.models.load_model('/Users/michaelspillane/Downloads/z.h5')

ValueError: ('Unrecognized keyword arguments:', dict_keys(['ragged']))