In [2]:
import numpy as np


class QTable:
    def __init__(self, num_states, num_actions, learning_rate, discount_factor, exploration_rate):
        self.q_table = np.zeros((num_states, num_actions))
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate

    def update(self, state, action, reward, next_state):
        q_old = self.q_table[state, action]
        q_target = reward + self.discount_factor * \
            np.max(self.q_table[next_state])
        self.q_table[state, action] += self.learning_rate * (q_target - q_old)

    def get_best_action(self, state):
        return np.argmax(self.q_table[state])

    def get_action(self, state):
        if np.random.rand() < self.exploration_rate:
            return np.random.randint(0, self.q_table.shape[1])
        else:
            return self.get_best_action(state)

In [4]:
class Agent:
    def __init__(self, state_space, action_space, discount_rate, learning_rate, exploration_rate):
        self.state_space = state_space
        self.action_space = action_space
        self.discount_rate = discount_rate
        self.learning_rate = learning_rate
        self.exploration_rate = exploration_rate

        # Multiply by 2 to account for the opponent's state
        num_states = self.state_space * 2
        num_actions = self.action_space
        self.q_table = QTable(num_states, num_actions, self.learning_rate,
                              self.discount_rate, self.exploration_rate)

    def get_state_index(self, state):
        state_index = 0
        for i in range(len(state)):
            state_index += state[i] * (self.state_space ** i)
        return state_index

    def train_network(self, state, action, reward, next_state, done):
        state = self.get_state_index(state)
        next_state = self.get_state_index(next_state)
        self.q_table.update(state, action, reward, next_state)

    def get_best_action(self, state):
        state = self.get_state_index(state)
        return self.q_table.get_best_action(state)

    def get_action(self, state):
        state = self.get_state_index(state)
        return self.q_table.get_action(state)

In [5]:
import poke_env
from poke_env.environment.battle import Battle
from poke_env.player.random_player import RandomPlayer
from poke_env.player_configuration import PlayerConfiguration
from poke_env.server_configuration import ShowdownServerConfiguration


class Environment(RandomPlayer):
    def __init__(self, username, password, battle_format="gen8randombattle"):
        self.battle_format = battle_format

        player_configuration = PlayerConfiguration(username, password)
        server_configuration = ShowdownServerConfiguration

        super().__init__(player_configuration)

    def reset(self):
        # Start a new battle and return the initial state
        self.start_random_battle(self.battle_format)
        return self.get_state()

    def step(self, action):
        # Perform the action chosen by the agent and observe the outcome
        self.choose_random_move(self.battle)
        next_state = self.get_state()
        reward = self.get_reward()
        done = self.battle.finished
        return next_state, reward, done

    def get_state(self):
        # Extract relevant state information from the battle object
        # This is just an example; you can customize the state representation as needed
        state = [
            self.battle.user.active_pokemon.hp,
            self.battle.opponent.active_pokemon.hp,
        ]
        return state

    def get_reward(self):
        # Compute the reward based on the battle outcome
        # This is just an example; you can customize the reward function as needed
        if self.battle.won:
            reward = 1
        elif self.battle.lost:
            reward = -1
        else:
            reward = 0
        return reward

In [6]:
def main():
    username = "Q-learner2023"
    password = "AIisPlaying2023"

    state_space = 10
    action_space = 18
    discount_rate = 0.99
    learning_rate = 0.001
    exploration_rate = 0.1
    agent = Agent(state_space, action_space, discount_rate,
                  learning_rate, exploration_rate)
    env = Environment(username, password)

    num_episodes = 1000
    for episode in range(num_episodes):
        state = env.reset()
        done = False

        while not done:
            action = agent.get_action(state)
            next_state, reward, done = env.step(action)
            agent.train_network(state, action, reward, next_state, done)
            state = next_state


if __name__ == "__main__":
    main()


AttributeError: 'Environment' object has no attribute 'start_random_battle'

2023-04-27 17:52:30,310 - Q-learner2023 - ERROR - Multiple exceptions: [Errno 10061] Connect call failed ('::1', 8000, 0, 0), [Errno 10061] Connect call failed ('127.0.0.1', 8000)
Traceback (most recent call last):
  File "C:\Users\natha\Anaconda3\lib\site-packages\poke_env\player\player_network_interface.py", line 304, in listen
    async with websockets.connect(
  File "C:\Users\natha\Anaconda3\lib\site-packages\websockets\legacy\client.py", line 642, in __aenter__
    return await self
  File "C:\Users\natha\Anaconda3\lib\site-packages\websockets\legacy\client.py", line 659, in __await_impl_timeout__
    return await asyncio.wait_for(self.__await_impl__(), self.open_timeout)
  File "C:\Users\natha\Anaconda3\lib\asyncio\tasks.py", line 483, in wait_for
    return fut.result()
  File "C:\Users\natha\Anaconda3\lib\site-packages\websockets\legacy\client.py", line 663, in __await_impl__
    _transport, _protocol = await self._create_connection()
  File "C:\Users\natha\Anaconda3\lib\async