In [4]:
import numpy as np
import random
import string
from collections import deque
import torch
import torch.nn as nn
import torch.optim as optim

ModuleNotFoundError: No module named 'torch'

In [ ]:
class HangmanEnvironment:
    def __init__(self, max_attempts=6):
        self.word_list = self.build_dictionary('words_250000_train.txt')
        self.max_attempts = max_attempts
        self.reset()
 
    def reset(self):
        self.secret_word = random.choice(self.word_list).lower()
        self.attempts_remaining = self.max_attempts
        self.current_state = "_" * len(self.secret_word)
        self.letters_guessed = set()
        return self.get_observation()

    def get_observation(self):
        encoded_state = [1 if letter in self.letters_guessed else 0 for letter in string.ascii_lowercase]
        return np.array(encoded_state + [ord(c) - ord('a') if c != '_' else 26 for c in self.current_state] + [self.attempts_remaining])
    
    def build_dictionary(self, dictionary_file_location):
        text_file = open(dictionary_file_location,"r")
        full_dictionary = text_file.read().splitlines()
        text_file.close()
        return full_dictionary

    def step(self, action):
        letter = string.ascii_lowercase[action]
        if letter in self.letters_guessed:
            return self.get_observation(), -1, False, {}
        self.letters_guessed.add(letter)

        if letter in self.secret_word:
            new_state = "".join([c if c in self.letters_guessed else '_' for c in self.secret_word])
            self.current_state = new_state
            reward = 10 if new_state == self.secret_word else 1
            done = new_state == self.secret_word
        else:
            self.attempts_remaining -= 1
            reward = -1
            done = self.attempts_remaining == 0

        return self.get_observation(), reward, done, {}

    def render(self):
        print(f"Word: {self.current_state} Attempts Left: {self.attempts_remaining}")

In [ ]:
class DQNNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQNNetwork, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim)
        )
    
    def forward(self, x):
        return self.fc(x)

class DQNAgent:
    def __init__(self, state_dim, action_dim, learning_rate=1e-3):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.model = DQNNetwork(state_dim, action_dim)
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        self.criterion = nn.MSELoss()
        self.memory = deque(maxlen=10000)
        self.gamma = 0.99  # Discount factor

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state, epsilon=0.1):
        if random.random() < epsilon:
            return random.randrange(self.action_dim)
        state = torch.FloatTensor(state).unsqueeze(0)
        with torch.no_grad():
            action_values = self.model(state)
        return np.argmax(action_values.cpu().data.numpy())

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                next_state = torch.FloatTensor(next_state).unsqueeze(0)
                target = (reward + self.gamma * torch.max(self.model(next_state).detach()))
            state = torch.FloatTensor(state).unsqueeze(0)
            target_f = self.model(state)
            target_f[0][action] = target
            self.optimizer.zero_grad()
            loss = self.criterion(target_f, self.model(state))
            loss.backward()
            self.optimizer.step()

    def train(self, env, episodes, batch_size):
        for e in range(episodes):
            state = env.reset()
            state = np.reshape(state, [1, state_dim])
            done = False
            while not done:
                action = self.act(state)
                next_state, reward, done, _ = env.step(action)
                next_state = np.reshape(next_state, [1, state_dim])
                self.remember(state, action, reward, next_state, done)
                state = next_state
                if done:
                    print(f"Episode: {e+1}/{episodes}, score: {reward}")
            self.replay(batch_size)

In [ ]:
env = HangmanEnvironment()
state_dim = env.get_observation().shape[0]
action_dim = 26  # Number of letters in the English alphabet

agent = DQNAgent(state_dim, action_dim)
episodes = 1000
batch_size = 32

agent.train(env, episodes, batch_size)