In [9]:
import os

# Initial packages
import numpy as np
import random
import string
from tqdm import tqdm

# ML packages for neural-network
import torch as T
import gymnasium as gym

from stable_baselines3 import DQN, A2C, PPO
from stable_baselines3.common.policies import obs_as_tensor


In [10]:
def load_dictionary(file_path):

    # Read in the .txt file
    with open(file_path, 'r') as file:
        words = file.readlines()

    # Remove newline characters and whitespace
    words_list = []
    for word in words:
        temp_word = word.strip()
        if len(temp_word) > 1:
            words_list.append(temp_word)

    trimmed_words_list = [word for word in words_list if (len(word) > 3)]

    # Randomly shuffle the list
    # random.seed(2024)
    random.shuffle(trimmed_words_list)

    return trimmed_words_list

In [11]:
dictionary_path = 'words_250000_train.txt'
words_list = load_dictionary(dictionary_path)

In [5]:
class HangmanEnv(gym.Env):
    def __init__(self, dictionary, total_lives):
        super(HangmanEnv, self).__init__()

        # Number of lives per game
        self.total_lives = total_lives

        # The dictionary will not change, so save it here
        self.dictionary = dictionary
        self.incorrect_words = []

        # Action space involves choosing ['a','b',...,'y','z'] --> [0,1,...,24,25]
        self.action_space = gym.spaces.Discrete(26)

        # Observation (i.e. state space) the one-hot encoding of the current word along with the information about currently guessed letters
        self.observation_space = gym.spaces.Box(low=-1, high=1, shape=(837,), dtype=np.int8)

    def step(self, action):

        # Turn the action into a letter
        current_guess = chr(action + ord('a'))

        if current_guess not in self.guessed_letters:
            self.guessed_letters.append(current_guess)
            self.available_letters.remove(current_guess)
        else:
            self.observation = self.one_hot_state.flatten()
            self.reward = -0.1
            self.done = False
            truncated = False
            info = {}
            return self.observation, self.reward, self.done, truncated, info

        # If the current guess is in the word, append the guess to the word state
        if current_guess in self.guess_word:
            correct_indices = np.where(self.guess_word == current_guess)[0]
            self.current_word_state[correct_indices] = current_guess
            # self.reward = len(correct_indices) / len(self.guess_word)
            self.reward = 0

        # Not in the word, then remove a life
        else:
            correct_indices = []
            self.lives_remaining -= 1
            self.reward = 0

        # Create the current state vector
        if str(self.current_word_state) == str(self.guess_word):
            self.status = 1
            self.reward = self.lives_remaining * 10
            self.done = True
        elif self.lives_remaining == 0:
            self.status = 0
            self.done = True

        # Update the current letter guessed
        self.one_hot_state[-1, action] = -1
        for index in correct_indices:
            self.one_hot_state[index, action] = 1
            self.one_hot_state[index, -1] = 0
        self.observation = self.one_hot_state.flatten()

        info = {}
        truncated = False
        return self.observation, self.reward, self.done, truncated, info

    def reset(self, seed=None, options=None):

        # Initialize our environment here
        self.lives_remaining = self.total_lives
        self.available_letters = list(string.ascii_lowercase)
        self.guessed_letters = []
        self.done = False
        self.status = 0

        # Now draw a word from the dictionary
        guess_index = np.random.choice(len(self.dictionary))
        self.guess_word = np.array(list(self.dictionary[guess_index]))
        self.current_word_state = np.array(['_'] * len(self.guess_word))

        # Initialize the current state vector
        self.one_hot_state = np.zeros([31, 27], dtype=np.int8)
        for i in range(len(self.guess_word)):
            self.one_hot_state[i, -1] = -1
            self.one_hot_state[-1, -1] = -1
        self.observation = self.one_hot_state.flatten()

        info = {}
        return self.observation, info

    def render(self):

        result = ' '.join([str(elem) for elem in self.current_word_state])

        print(f'The current word state is {result} and with {self.lives_remaining} lives remaining.')

In [6]:
models_dir = "models/PPO"
logdir = "log"

if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if not os.path.exists(logdir):
    os.makedirs(logdir)

In [7]:
env = HangmanEnv(dictionary=words_list, total_lives=6)
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=logdir)
timesteps = 100000
for i in range(1,101):
    model.learn(total_timesteps=timesteps, reset_num_timesteps=False, tb_log_name="PPO")
    model.save(f'{models_dir}/{timesteps*i}')

In [8]:
# Load the trained model
model = PPO.load("models/PPO/25000000.zip")

env = HangmanEnv(dictionary=words_list, total_lives=6)

total_games = 10
wins_list = []
progress_bar = tqdm(range(total_games), desc='Playing Hangman Games')
for game in progress_bar:
    obs, _ = env.reset()
    done = False
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, truncated, info = env.step(action)
        if game > (total_games - 5):
            env.render()

    wins_list.append(env.status)
    progress_bar.set_postfix_str(s=f'Win Rate/1000: {np.round(np.mean(wins_list), 3)}')

    if game > (total_games - 5):
        print('=================================')

Playing Hangman Games:  30%|███       | 3/10 [00:23<00:54,  7.74s/it, Win Rate/1000: 0.0]


KeyboardInterrupt: 