Deep Q-Learning (DQL) is a variant of Q-Learning that uses deep neural networks to approximate the Q-function. Instead of maintaining a table (Q-table) to store Q-values for each state-action pair, DQL uses a neural network to approximate the Q-values.

### Deep Q-Learning (DQL) for Hangman

#### Components:

##### State Representation:
Current word pattern
Guessed letters
Remaining incorrect guesses

##### Action Space:
All possible letters to guess

##### Reward Function:
+1 for each correct guess
-1 for each incorrect guess
+5 for completing the word

##### Deep Q-Learning Algorithm:
Initialize a neural network to approximate the Q-function
Update the neural network using the Q-learning update rule
Use an epsilon-greedy policy for action selection

In [5]:
import json
import requests
import random
import numpy as np
import gc
import concurrent.futures

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [6]:
class HangmanAPI:
    def __init__(self, access_token):
        self.access_token = access_token
        self.base_url = "https://api.example.com/hangman"  # Replace with actual API endpoint

    def start_game(self, practice=True, verbose=True):
        # Implement starting a new game via API
        pass

    def guess_letter(self, game_id, letter):
        # Implement guessing a letter via API
        pass

    def my_status(self):
        # Implement getting player's current game status via API
        pass

# Define DQLAgent class for Deep Q-Learning
class DQLAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.model = self.build_dql_model()

    def build_dql_model(self):
        model = Sequential()
        model.add(Dense(12, input_dim=self.state_size, activation='relu'))  # Reduced neurons
        model.add(Dense(12, activation='relu'))  # Reduced neurons
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(learning_rate=0.001))
        return model

    def act(self, state):
        q_values = self.model.predict(state)
        available_actions = np.where(q_values >= 0)[1]

        if len(available_actions) == 0:
            return None

        action = np.argmax(q_values[0, available_actions])
        return action

    def train(self, states, target_q_values):
        self.model.fit(states, target_q_values, epochs=1, verbose=0)
        gc.collect()  # Garbage collect to free up memory

    def train_batch(self, state_batch, target_q_values_batch):
        self.model.fit(state_batch, target_q_values_batch, epochs=1, verbose=0)
        gc.collect()  # Garbage collect to free up memory

# Define HangmanGame class
class HangmanGame:
    def __init__(self, api, file_path):
        self.api = api
        self.dictionary = self.load_and_validate_dictionary(file_path)
        self.state_size = 52
        self.action_size = 26
        self.agent = DQLAgent(self.state_size, self.action_size)
        self.train_memory = {}

    def load_and_validate_dictionary(self, file_path):
        valid_words = []
        with open(file_path, 'r') as file:
            for line in file:
                word = line.strip().lower()
                if word.isalpha():
                    valid_words.append(word)
        return valid_words

    def split_data(self, test_size=0.2):
        train_words, test_words = train_test_split(self.dictionary, test_size=test_size, random_state=42)
        return train_words, test_words

    def word_to_state(self, word):
        word_vector = [0] * 26
        for letter in word:
            if letter.isalpha():
                index = ord(letter) - ord('a')
                word_vector[index] = 1

        letters_vector = [0] * 26
        state = word_vector + letters_vector
        return np.array([state])

    def word_to_target(self, word):
        target = [0] * 26
        for letter in word:
            if letter.isalpha():
                index = ord(letter) - ord('a')
                target[index] = 1
        return target

    def train_agent(self, train_words, batch_size=32):
        for word in train_words:
            if word not in self.train_memory:
                state, action, reward, next_state = self.process_word(word)
                self.train_memory[word] = (state.flatten(), action, reward, next_state)

        train_words_batches = [train_words[i:i+batch_size] for i in range(0, len(train_words), batch_size)]

        for batch in train_words_batches:
            states_batch = []
            target_q_values_batch = []

            for word in batch:
                state, action, reward, next_state = self.train_memory[word]
                states_batch.append(state)

                target_q_values = self.agent.model.predict(state)
                max_next_q_value = np.max(self.agent.model.predict(next_state)) if next_state is not None else 0
                target_q_values[0, action] = reward + max_next_q_value
                target_q_values_batch.append(target_q_values.flatten())

            states_batch = np.array(states_batch)
            target_q_values_array = np.array(target_q_values_batch)
            self.agent.train_batch(states_batch, target_q_values_array)
            del states_batch, target_q_values_array
            gc.collect()  # Garbage collect to fr

    def process_word(self, word):
        state = self.word_to_state(word)
        action = self.agent.act(state)
        reward = self.calculate_reward(word, action)

        next_word = random.choice(self.dictionary)
        next_state = self.word_to_state(next_word) if next_word != word else None

        return state, action, reward, next_state

    def calculate_reward(self, word, action):
        if action is None:
            return -1
        letter = chr(action + ord('a'))
        if letter in word:
            return 1
        else:
            return -1

    def evaluate_agent(self, test_words):
        correct_predictions = 0
        total_predictions = 0

        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = [executor.submit(self.evaluate_word, word) for word in test_words]

            for future in concurrent.futures.as_completed(futures):
                correct_predictions += future.result()
                total_predictions += 1

        accuracy = (correct_predictions / total_predictions) * 100
        print(f"Accuracy on test data: {accuracy:.2f}%")

    def evaluate_word(self, word):
        state = self.word_to_state(''.join(['_' * len(word)]))
        while True:
            action = self.agent.act(state)
            if action is None:
                return 0

            letter = chr(action + ord('a'))
            if letter in word:
                return 1

            state[0, action] = -1

    def save_train_memory(self, file_path):
        with open(file_path, 'w') as file:
            json.dump(self.train_memory, file)

    def load_train_memory(self, file_path):
        with open(file_path, 'r') as file:
            self.train_memory = json.load(file)

        # Convert string keys back to tuples if needed
        self.train_memory = {tuple(map(int, k.split(','))): v for k, v in self.train_memory.items()}

    def predict_letters_for_games(self, num_games=1000):
        for _ in range(num_games):
            game_id = self.api.start_game()  # Start a new game
            game_over = False
            while not game_over:
                current_word = self.api.my_status()['word']
                state = self.word_to_state(current_word)
                letter = self.predict_letter(state)
                if letter:
                    response = self.api.guess_letter(game_id, letter)
                    if response['status'] == 'success':
                        if response['gameStatus'] == 'lost':
                            game_over = True
                            self.losses += 1
                        elif response['gameStatus'] == 'won':
                            game_over = True
                            self.wins += 1
                        else:
                            # Game is ongoing, continue guessing
                            pass
                    else:
                        print("Failed to guess letter:", letter)
                        break
                else:
                    print("No letter predicted for the current state.")
                    break

    def predict_letter(self, state):
          return self.agent.predict_letter(state)


In [None]:
if __name__ == "__main__":
    access_token = "0e9732ec39694400482388c882b6d1"  # Replace with actual access token
    api = HangmanAPI(access_token)
    file_path = "C:\\Users\\SONY\\Downloads\\words_250000_train.txt"  # Path to your dictionary file
    hangman_game = HangmanGame(api, file_path)
    train_words, test_words = hangman_game.split_data(test_size=0.2)

    # Train the agent
    hangman_game.train_agent(train_words)

    # Evaluate the agent
    hangman_game.evaluate_agent(test_words)

    # Run 1000 games and evaluate accuracy
    hangman_game.predict_letters_for_games(num_games=1000)
    print(f"Accuracy on 1000 games: {hangman_game.get_accuracy()}%")
