In [9]:
import numpy as np 
import pickle 
import os 
import re 

In [322]:
def save_obj(obj, filename): 
    with open(filename + '.pkl', 'wb+') as f: 
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(filename): 
    with open(filename + '.pkl', 'rb') as f: 
        return pickle.load(f)

In [None]:
'''
Implements a greed heuristic for hangman given a dictionary of words. 
Does not generalize to new words. 

=======================
Objects
=======================
Two dictionaries are used to store the words

word_length: {<word> : <length of word>}
length_word: {<length>: array([word1, word2, word3, ...])}


=======================
Methods
=======================

insert(String word): updates the two dictionaries with a word 

charFrequency(String[] list): calculates frequency of characters in a list

remainingCharFrequency(String[] list, char[] list): removes characters that are already successful 

remove(char c): remove all words that contain a specified character 

match(String[] list, regex): returns a list with all regex matches 

greedy_guess(String[] list): returns the most frequent character from a list of words 


'''

In [None]:
'''
=================================================
HOW TO PLAY HANGMAN 
=================================================

SECRET_WORD = 'hangman'

>>>>>>>>>>  SECRET_WORD ONLY HAS LOWER CASE LETTERS <<<<<<<<<

INPUT: N = LENGTH OF SECRET_WORD

Step 1. list_of_words = length_word[N]

Step 2. chars_used = []

Step 3. x = greedy_guess(list_of_words)
        chars_used.append(x)
        if x NOT in SECRET_WORD:
            list_of_words = remove(x)
        if x in SECRET_WORD:
            create pattern 
            list_of_words = pattern(x)
Step 4. repeat Step 3
'''

In [321]:
class hangman: 
    
    # Constructor 
    # Instance variables = [word, length, regex pattern, dictionary of characters, 
    #                       list of characters, number of wrong guesses]
    def __init__(self, word): 
        self.word = word # secret word 
        self.length = len(word) #length of word 
        self.pattern = r'^(.{' + re.escape(str(self.length)) + '})$' # regex pattern of game
        self.charDict = {} # dictionary of all attempted characters
        self.charList = [] # list of successful characters 
        for x in range(self.length):
            self.charList.append('.') 
        self.wrong_guesses = 0 # number of wrong guesses 
    
    # attempt a character 
    # increment wrong guess by 1 if incorrect 
    # do not let the same letter be tried twice 
    def attempt(self, char): 
        x = self.charDict.get(char, 0)
        if x == 0: 
            self.charDict[char] = True 
            indexList = self.find(char)
            if len(indexList) == 0: 
                self.wrong_guesses += 1 
                print('bad guess')
            for index in indexList:
                self.charList[index] = self.word[index]
            self.pattern = self.arrToRegex(self.charList)

        else:
            print("you've already tried this letter")
    
    
    # find the indices at which character ch appears 
    def find(self, ch):
        return [i for i, ltr in enumerate(self.word) if ltr == ch]

    # convert a list of characters to regex 
    def arrToRegex(self, list_of_characters):
        reg = r'^('
        for item in list_of_characters: 
            reg = reg + item 
        reg = reg + ')$' 
        return reg 
            
    # return the regex pattern 
    def regex(self): 
        return self.pattern
    
    # remove all words that do not have a particular character 
    def remove(self, list_of_words, char):
        list_of_matches = []
        pattern = r'(.*' + char + '.*)'
        for item in list_of_words:
            x = re.match(pattern, item)
            if x == None:
                list_of_matches.append(item)
        return list_of_matches

    # return all regex matches from a list of words 
    def match(self, list_of_words, pattern):
        list_of_matches = []
        for item in list_of_words: 
            x = re.search(pattern, item)
            if x != None:
                list_of_matches.append(item)
        return list_of_matches 

    # frequency of characters given a list of words 
    def charFrequency(self, list_of_words): 
        dict_of_characters = {}
        for item in list_of_words: 
            arr_char = list(item)
            for x in arr_char: 
                dict_of_characters[x] = dict_of_characters.get(x, 0) + 1
        return dict_of_characters 

    # guess the next words 
    def greedy_guess(self, list_of_words):
        freq_char = self.charFrequency(list_of_words)
        temp_freq_char = self.charFrequency(list_of_words)
        for item in temp_freq_char:
            x = self.charDict.get(item, 0)
            if x != 0: 
                del freq_char[item]
        freq_char_keys = list(freq_char.keys())
        freq_char_values = list(freq_char.values())
        maxIndex = np.where(freq_char_values == np.amax(freq_char_values))[0]
        return freq_char_keys[np.random.choice(maxIndex)]
        

In [337]:
english_words = load_obj('english_words')
word_length = load_obj('word_length_english')
length_word = load_obj('length_word_english')

In [328]:
# check if the word only has lowercase english letters 
# warning: returns true for empty string 
def validateWord(word): 
    N = len(word)
    pattern = r'^([a-z]{' + re.escape(str(N)) + '})$'
    x = re.search(pattern, word)
    if x!= None:
        return True 
    return False 
    

In [358]:
# returns N random words from dictionary 
def randomWord(N): 
    li1 = []
    mydict1 = {}
    while len(li1) != N:  
        word = np.random.choice(english_words)
        if (validateWord(word) and (not(word in mydict1.keys()))): 
            li1.append(word)
            mydict1[word] = 0 
    return li1   
        