To develop a custom Hangman environment using OpenAI's Gymnasium, we designed an action space where each action corresponds to an index number representing a specific letter. The observation space in this environment includes the current state of the word being guessed, the number of guesses left, the letters that have already been guessed, and a list of letters in ascending order of their appearance in training data for words that match the current state. This setup allows the agent to make informed guesses based on both past actions and the distribution of letters in the training data. The environment was then trained using a Deep Q-Network (DQN) model from the Stable Baselines library, aimed at maximizing rewards. The reward system was structured to give +1 for a correct letter guess, -1 for an incorrect guess, and -10 for guessing a previously guessed letter, thereby encouraging the model to avoid redundant guesses. During testing with 1,000 words, the trained model successfully guessed the correct word in 317 games.

In [1]:
!pip install gymnasium

Collecting gymnasium
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1


In [2]:
from gymnasium import Env, spaces, register, make
import numpy as np
from time import sleep
import pygame
import random
import matplotlib.pyplot as plt
import gymnasium as gym
import re
import collections


# uploading the txt file

In [3]:

file_path = 'words_250000_train.txt'

# Open and read the contents of the file
with open(file_path, 'r') as file:
    words = file.readlines()

word_list = [word.strip() for word in words]

# Display the cleaned list of words
print(word_list[:5])



['aaa', 'aaaaaa', 'aaas', 'aachen', 'aaee']


# Gymnasium Hangman Custom Enviroment

In [4]:
class HangmanEnv(gym.Env):
    def __init__(self, word_list, max_attempts=6):
        super(HangmanEnv, self).__init__()
        self.word_list = word_list
        self.word = None
        self.len_word = None
        self.max_attempts = max_attempts
        self.batch_size=20;
        self.num_batches = len(self.word_list) // self.batch_size
        self.current_batch_index = 0
        self.current_batch_words = []
        #adding common letter dict
        self.current_dictionary = []
        self.sorted_letter_count=np.full(26, 26)
        self.clean_word = None

        # Define action and observation space
        self.action_space = spaces.Discrete(26)  # 26 letters in the alphabet
        self.observation_space = spaces.Dict({
            "guessed_letters": spaces.MultiBinary(26),
            "remaining_attempts": spaces.Discrete(self.max_attempts + 1),
            "current_word_state": spaces.MultiDiscrete([28] * 30),  # 27 includes placeholders
            "sorted_letter_count": spaces.MultiDiscrete([27] * 26)
        })

        self.reset()

    def _prepare_next_batch(self):
        start_index = self.current_batch_index * self.batch_size
        end_index = start_index + self.batch_size
        self.current_batch_words = self.word_list[start_index:end_index]
        random.shuffle(self.current_batch_words)
        self.current_batch_index = (self.current_batch_index + 1) % self.num_batches


    def reset(self,seed=None, options=None):
        super().reset(seed=seed)
        self._prepare_next_batch()
        self.word = random.choice(self.current_batch_words)
        self.len_word = len(self.word)
        self.guessed_letters = np.zeros(26, dtype=np.int8)
        self.sorted_letter_count=np.full(26, 26, dtype=np.int64)
        self.remaining_attempts = self.max_attempts
        self.current_word_state = np.full(30, 27)
        self.current_word_state[:self.len_word] = 26
        done, won = self._check_done()

        ##adding common letter dict
        self.current_dictionary =self.word_list
        self.clean_word=self.current_word_state[:self.len_word]

        letter_count=collections.Counter("".join(self.word_list)).most_common()
        for i, el in enumerate(letter_count):
            idx=ord(el[0])-ord('a')
            self.sorted_letter_count[i]=idx

        info = {"target_word": self.word, "batch_number":self.current_batch_index} if done else {}

        return self._get_observation(), info

    def step(self, action):
        letter = chr(action + ord('a'))

        if self.guessed_letters[action] == 1:
            reward = -10
            self.remaining_attempts -= 1
        else:
            self.guessed_letters[action] = 1
            if letter in self.word:
                reward = 1
                self._update_current_word_state(action)
                self._update_letter_dict()
            else:
                reward = -1
                self.remaining_attempts -= 1

        done, won = self._check_done()
        if done:
            reward += 5 if won else -10  # Additional reward or penalty

        info = {"target_word": self.word, "batch_number":self.current_batch_index} if done else {}
        truncated = False
        return self._get_observation(), reward, done, truncated, info

    def render(self, mode='human'):
        guessed_letters_render = [chr(i + ord('a')) for i, val in enumerate(self.guessed_letters) if val == 1]
        rendered_word = ''.join([chr(i + ord('a')) if i < 26 else '_' if i==26 else '.' for i in self.current_word_state])
        print(f"Word: {rendered_word}")
        print(f"Guessed Letters: {', '.join(guessed_letters_render)}")
        print(f"Remaining Attempts: {self.remaining_attempts}")
        print(f"count: {self.sorted_letter_count}")


    def _get_observation(self):
        return {
            "guessed_letters": self.guessed_letters,
            "remaining_attempts": self.remaining_attempts,
            "current_word_state": self.current_word_state,
            "sorted_letter_count": self.sorted_letter_count
        }

    def _update_current_word_state(self, action):
        for i, char in enumerate(self.word):
            char_idx = ord(char) - ord('a')
            if char_idx == action:
                self.current_word_state[i] = action
        self.clean_word=self.current_word_state[:self.len_word]

    def _update_letter_dict(self):

        self.sorted_letter_count=np.full(26, 26)
        current_dict = self.current_dictionary
        new_dictionary = []

        for dict_word in current_dict:
            # continue if the word is not of the appropriate length
            if len(dict_word) != self.len_word:
                continue

            # if dictionary word is a possible match then add it to the current dictionary
            clean_word_string = ''.join([chr(i + ord('a')) if i < 26 else '.' for i in self.clean_word])

            if re.match(clean_word_string, dict_word):
                new_dictionary.append(dict_word)


        # overwrite old possible words dictionary with updated version
        self.current_dictionary = new_dictionary
        # count occurrence of all characters in possible word matches
        full_dict_string = "".join(new_dictionary)

        c = collections.Counter(full_dict_string)
        sorted_letter_count = c.most_common()
        for i, el in enumerate(sorted_letter_count):
          idx=ord(el[0])-ord('a')
          self.sorted_letter_count[i]=idx
         # return most frequently occurring letter in all possible words that hasn't been guessed yet

    def _check_done(self):
        reconstructed_word = ''.join(chr(i + ord('a')) if i < 26 else '_' if i == 26 else '.' for i in self.current_word_state[:self.len_word])
        if reconstructed_word == self.word:
            return True, True  # Won
        elif self.remaining_attempts <= 0:
            return True, False  # Lost
        return False, False



register(id='hangman', entry_point=HangmanEnv)


In [5]:
env = make('hangman',word_list=word_list, max_attempts=6)

In [6]:
#running the enviroment on random actions(guessed letters) to get observations
obs = env.reset()
env.render()
print("-----------------------")

done = False
while not done:
    action = random.choice([i for i in range(26)]);
    obs, reward, done, trunc, info = env.step(action)
    env.render()
    print(f"Reward: {reward}")
    print("-----------------------")

print(f"Game Over. The word was: {info}")


Word: _______.......................
Guessed Letters: 
Remaining Attempts: 6
count: [ 4  8  0 13 14 17 18 19 11  2 20  3 15 12  7  6 24  1  5 21 10 22 25 23
 16  9]
-----------------------
Word: _______.......................
Guessed Letters: n
Remaining Attempts: 5
count: [ 4  8  0 13 14 17 18 19 11  2 20  3 15 12  7  6 24  1  5 21 10 22 25 23
 16  9]
Reward: -1
-----------------------
Word: _______.......................
Guessed Letters: n, v
Remaining Attempts: 4
count: [ 4  8  0 13 14 17 18 19 11  2 20  3 15 12  7  6 24  1  5 21 10 22 25 23
 16  9]
Reward: -1
-----------------------
Word: _______.......................
Guessed Letters: n, p, v
Remaining Attempts: 3
count: [ 4  8  0 13 14 17 18 19 11  2 20  3 15 12  7  6 24  1  5 21 10 22 25 23
 16  9]
Reward: -1
-----------------------
Word: _______.......................
Guessed Letters: j, n, p, v
Remaining Attempts: 2
count: [ 4  8  0 13 14 17 18 19 11  2 20  3 15 12  7  6 24  1  5 21 10 22 25 23
 16  9]
Reward: -1
-------------

In [None]:
!pip install  stable-baselines3



In [None]:
from stable_baselines3 import DQN
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.evaluation import evaluate_policy

In [None]:
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv

vec_env = DummyVecEnv([lambda: Monitor(env)])

  and should_run_async(code)


In [None]:
import torch as th

In [None]:
#checking the enviroment for compatiblity with the stable_baselines DQN model
check_env(env)

# Training the Model

In [None]:
model = DQN(
    'MultiInputPolicy',  # Policy type suitable for your observation space
    env,
    gamma=0.95,
    learning_rate=5e-5,
    buffer_size=20000,
    learning_starts=1000,
    batch_size=256,
    tau=0.005,
    target_update_interval=1000,
    exploration_fraction=0.2,
    exploration_final_eps=0.005,
    train_freq=4,
    gradient_steps=4,
    max_grad_norm=5,
    verbose=1
)


model.learn(total_timesteps=150000)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
| train/              |          |
|    learning_rate    | 5e-05    |
|    loss             | 0.374    |
|    n_updates        | 134932   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 10.3     |
|    ep_rew_mean      | -1.1     |
|    exploration_rate | 0.005    |
| time/               |          |
|    episodes         | 12984    |
|    fps              | 21       |
|    time_elapsed     | 6310     |
|    total_timesteps  | 135985   |
| train/              |          |
|    learning_rate    | 5e-05    |
|    loss             | 0.412    |
|    n_updates        | 134984   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 10.5     |
|    ep_rew_mean      | -0.54    |
|    exploration_rate | 0.005    |
| time/               |          |
|    episodes         | 1

<stable_baselines3.dqn.dqn.DQN at 0x7aaef330f370>

# Model Testing

In [None]:
mean_reward, std_reward = evaluate_policy(model, vec_env, n_eval_episodes=10)
print(f"Mean reward: {mean_reward} ± {std_reward}")

Mean reward: -1.8 ± 12.278436382536663


In [None]:
model.save("hangman_model_dqn")

In [None]:
obs, info = env.reset(seed=45)
env.render()
terminated = False
print("-----------------------")
while not terminated:
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = env.step(action)
    env.render()
    print(f"Reward: {reward}")
    print("-----------------------")
print(f"Game Over. The word was: {info}")

Word: __________....................
Guessed Letters: 
Remaining Attempts: 6
count: [ 4  8  0 13 14 17 18 19 11  2 20  3 15 12  7  6 24  1  5 21 10 22 25 23
 16  9]
-----------------------
Word: e_________....................
Guessed Letters: e
Remaining Attempts: 6
count: [ 4  8 19 13  0 14 17 11 18  2 12 15 20  3  7  6 23 24  1 21  5 25 22 16
 10  9]
Reward: 1
-----------------------
Word: e_______a_....................
Guessed Letters: a, e
Remaining Attempts: 6
count: [ 4  0  8 11 13 17 14  2 19  3 15 12 20 18  7  6 23 24  1 16 21  5 25 22
 10 26]
Reward: 1
-----------------------
Word: e__l____al....................
Guessed Letters: a, e, l
Remaining Attempts: 6
count: [11 14  8  4  2  0  6 12 20 18  3 13 15 26 26 26 26 26 26 26 26 26 26 26
 26 26]
Reward: 1
-----------------------
Word: e__l__i_al....................
Guessed Letters: a, e, i, l
Remaining Attempts: 6
count: [11 14  8  4  2  0  6 12 20 18  3 13 15 26 26 26 26 26 26 26 26 26 26 26
 26 26]
Reward: 1
-----------------

In [None]:
won=0;
for i in range(1000):
    obs, info = env.reset(seed=42)
    terminated = False
    Reward=0;
    while not terminated:
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, terminated, truncated, info = env.step(action)
        Reward+=reward
    if Reward>0:
        won+=1
    print(f"Reward: {Reward}")
    print(f"word: {info}")
    print("-----------------------")


Reward: 11
word: {'target_word': 'episcopising', 'batch_number': 3068}
-----------------------
Reward: 10
word: {'target_word': 'epistasis', 'batch_number': 3069}
-----------------------
Reward: 13
word: {'target_word': 'epistoma', 'batch_number': 3070}
-----------------------
Reward: -8
word: {'target_word': 'epistrophy', 'batch_number': 3071}
-----------------------
Reward: 13
word: {'target_word': 'epithalamic', 'batch_number': 3072}
-----------------------
Reward: 12
word: {'target_word': 'epitheliums', 'batch_number': 3073}
-----------------------
Reward: 11
word: {'target_word': 'epitrachelia', 'batch_number': 3074}
-----------------------
Reward: -6
word: {'target_word': 'epochmaking', 'batch_number': 3075}
-----------------------
Reward: -11
word: {'target_word': 'epoxide', 'batch_number': 3076}
-----------------------
Reward: -15
word: {'target_word': 'epub', 'batch_number': 3077}
-----------------------
Reward: -9
word: {'target_word': 'equalhanded', 'batch_number': 3078}
---

In [None]:
#number of wins , out of 1000 games
won

317