In [5]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
import gym
from gym import spaces
import pygame
import sys
import time
import Wumpus_Env
class QLearningAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.5  # Exploration-exploitation trade-off
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate = 0.001
        self.model = self.build_model()

    def build_model(self):
        model = tf.keras.Sequential([
            layers.Dense(24, input_shape=(self.state_size,), activation='relu'),
            layers.Dense(24, activation='relu'),
            layers.Dense(self.action_size, activation='linear')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(lr=self.learning_rate), loss='mse')
        return model

    def act(self, state, action_size):
        self.action_size = action_size
        print(self.action_size)
        if np.random.rand() <= self.epsilon:
            return np.random.choice(self.action_size)
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])

    def train(self, state, action, reward, next_state, done):
        target = reward
        if not done:
            target = (reward + self.gamma * np.max(self.model.predict(next_state)[0]))
        target_f = self.model.predict(state)
        target_f[0][action] = target
        self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


def preprocess_state(state):
    # Preprocess the state if needed
    return np.reshape(state, [1, len(state)])


def main():
    env = Wumpus_Env.WumpusWorldEnv(size=4, number_of_pits=3)
    state_size = len(env.reset())
    action_size = env.action_space.n

    agent = QLearningAgent(state_size, action_size)

    episodes = 1000
    for episode in range(episodes):
        state = preprocess_state(env.reset())

        total_reward = 0
        done = False

        while not done:
            action = agent.act(state, env.action_space.n)
            next_state, reward, done, _ = env.step(action)
            next_state = preprocess_state(next_state)
            agent.train(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward
            
            env.render()
            print(action)

        print(f"Episode: {episode + 1}, Total Reward: {total_reward}, Epsilon: {agent.epsilon}")

    # Evaluate the trained agent
    state = preprocess_state(env.reset())
    done = False
    while not done:
        action = agent.act(state)
        next_state, _, done, _ = env.step(action)
        state = preprocess_state(next_state)
        env.render()

    pygame.quit()


if __name__ == "__main__":
    main()




(0, 1)
(0, 1)
8
(0, 1)
6
4
(0, 1)
3
4
(0, 1)
(0, 1)
1
Episode: 1, Total Reward: -26, Epsilon: 1.4776123125
(0, 1)
8
(0, 1)
3
8
(0, 1)
0
8
(0, 1)
3
8
(0, 1)
(0, 1)
5
4
(0, 1)
(0, 1)
2
4
(0, 1)
(0, 1)
2
4
(0, 1)
(0, 1)
1
4
(0, 1)
(0, 1)
2
4
(0, 1)
2
4
(0, 1)
2
4
(0, 1)
(0, 1)
3
4
(0, 1)
(0, 1)
1
Episode: 2, Total Reward: -41, Epsilon: 1.3913534532274918
(0, 1)
8
(0, 1)
3
8
(0, 1)
(0, 1)
4
4
(0, 1)
(0, 1)
1
Episode: 3, Total Reward: -24, Epsilon: 1.3705873290188897
(0, 1)
8
(0, 1)
(0, 1)
4
4
(0, 1)
3
4
(0, 1)
(0, 1)
1
Episode: 4, Total Reward: -24, Epsilon: 1.3501311418098665
(0, 1)
8
(0, 1)
(0, 1)
5
4
(0, 1)
3
4
(0, 1)
3
4
(0, 1)
(0, 1)
1
Episode: 5, Total Reward: -27, Epsilon: 1.3233303644232017
(0, 1)
8
(0, 1)
(0, 1)
4
4
(0, 1)
(0, 1)
1
Episode: 6, Total Reward: -21, Epsilon: 1.3101301440380804
(0, 1)
8
(0, 1)
(0, 1)
1
Episode: 7, Total Reward: -20, Epsilon: 1.30357949331789
(0, 1)
8
(0, 1)
(0, 1)
7
4
(0, 1)
3
4
(0, 1)
(0, 1)
2
4
(0, 1)
(0, 1)
1
4
(0, 1)
(0, 1)
3
Episode: 8, Total Rewa