In [2]:
import numpy as np


class QTable:
    def __init__(self, num_states, num_actions, learning_rate, discount_factor, exploration_rate):
        self.q_table = np.zeros((num_states, num_actions))
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate

    def update(self, state, action, reward, next_state):
        q_old = self.q_table[state, action]
        q_target = reward + self.discount_factor * \
            np.max(self.q_table[next_state])
        self.q_table[state, action] += self.learning_rate * (q_target - q_old)

    def get_best_action(self, state):
        return np.argmax(self.q_table[state])

    def get_action(self, state):
        if np.random.rand() < self.exploration_rate:
            return np.random.randint(0, self.q_table.shape[1])
        else:
            return self.get_best_action(state)

In [3]:
class Agent:
    def __init__(self, state_space, action_space, discount_rate, learning_rate, exploration_rate):
        self.state_space = state_space
        self.action_space = action_space
        self.discount_rate = discount_rate
        self.learning_rate = learning_rate
        self.exploration_rate = exploration_rate

        # Multiply by 2 to account for the opponent's state
        num_states = self.state_space * 2
        num_actions = self.action_space
        self.q_table = QTable(num_states, num_actions, self.learning_rate,
                              self.discount_rate, self.exploration_rate)

    def get_state_index(self, state):
        state_index = 0
        for i in range(len(state)):
            state_index += state[i] * (self.state_space ** i)
        return state_index

    def train_network(self, state, action, reward, next_state, done):
        state = self.get_state_index(state)
        next_state = self.get_state_index(next_state)
        self.q_table.update(state, action, reward, next_state)

    def get_best_action(self, state):
        state = self.get_state_index(state)
        return self.q_table.get_best_action(state)

    def get_action(self, state):
        state = self.get_state_index(state)
        return self.q_table.get_action(state)

ModuleNotFoundError: No module named 'network'

In [1]:
from agent import Agent
from environment import Environment
import numpy as np


def main():
    username = "Q-learner2023"
    password = "AIisPlaying2023"

    state_space = 10
    action_space = 18
    discount_rate = 0.99
    learning_rate = 0.001
    exploration_rate = 0.1
    agent = Agent(state_space, action_space, discount_rate,
                  learning_rate, exploration_rate)
    env = Environment(username, password)

    num_episodes = 1000
    for episode in range(num_episodes):
        state = env.reset()
        done = False

        while not done:
            action = agent.get_action(state)
            next_state, reward, done = env.step(action)
            agent.train_network(state, action, reward, next_state, done)
            state = next_state


if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'agent'