In [100]:
# Definition on the Agent that plays Connect4
import json
import numpy as np
import pickle
import nbimporter
from Board import Connect4Board

In [258]:
class Agent:
    def __init__(self, env,chip, reward_scheme = (0.0, -1.0, 0.5, 1.0), filename = None):
        '''
        reward_scheme <(float, float, float, float)> : (reward for a move that doesn´t end the game, reward for losing, reward fora tied game, reward for winning)
        chip <string> = chip to be played by the agent. must be either "X" or "O"
        
        '''
        self.env = np.array(env)
        
        # Verificar que reward_scheme es una tupla de 4 floats
        if not (isinstance(reward_scheme, tuple) and len(reward_scheme) == 4 and all(isinstance(x, float) for x in reward_scheme)):
            raise ValueError("reward_scheme must be a tupla with 4 floats")
        self.rewards = reward_scheme
     
        if chip not in ["X", "O"]:
            raise ValueError("chip must be 'X' or 'O'")
        self.chip = chip
        self.values = None #TODO
        self.filename = filename  # Nuevo parámetro opcional
        self.Q_table = {} # La tabla que hay que exportar y leer. self.import_Qtable(self.filename)

    def import_Qtable(self, filename):
        try:
            with open(filename, 'rb') as file:
                self.q_table = pickle.load(file)
        except FileNotFoundError:
            print("File not found. Starting with an empty Q-Table.")
        
    def export_Qtable(self,filename):
        with open(filename, 'wb') as file:
            pickle.dump(self.q_table, file)

    def get_vectors_of_column(self,column):
        #deberia ser env.board - se continua por temas de desarrollo
        available_row = self.get_next_available(column)
        if available_row == -1:
            return [], [], [], []
        
        reference_matrix = self.env
        reference_matrix[available_row, column] = 'A'
        horizontal_vector = reference_matrix[available_row, 0:7 ]
        vertical_vector = reference_matrix[0:6, column]
        
        diagonal_1_vector =  reference_matrix.diagonal(column-available_row)
        diagonal_2_vector = reference_matrix[::-1].diagonal(column+(available_row-5))
        return (column, horizontal_vector), (available_row,vertical_vector), (np.where(diagonal_1_vector == 'A')[0][0],diagonal_1_vector), (np.where(diagonal_2_vector == 'A')[0][0],diagonal_2_vector)

    #esta funcion debe estar en el board
    def get_next_available(self,column):
        #deberia ser self.env.board - se continua por temas de desarrollo
        for row in reversed(range(6)):
            if self.env[row, column] == "-":
                return row
        return -1

    def verify_vector(self, actual_position, vector, chip_type):

        counter_chip_type = 0
        counter_other = 0
        
        if actual_position < len(vector)-1 :  
            for p1 in range(actual_position+1, len(vector)):
                if vector[actual_position+1] == vector[p1]:
                    if vector[p1] == chip_type:
                        counter_chip_type += 1
                    if vector[p1] != chip_type and vector[p1] != '-':
                        counter_other += 1
                else:
                    break
                    
        if actual_position > 0 :
            for p2 in range(actual_position-1, 0, -1):
                if vector[actual_position-1] == vector[p2]:
                    if vector[p2] == chip_type:
                        counter_chip_type += 1
                    if vector[p2] != chip_type and vector[p2] != '-':
                        counter_other += 1
                else:
                    break
            
        return counter_chip_type, counter_other
    
    def get_actual_state(self):
        state = []
        for column in range(self.env.columns):
            vectors =self.get_vectors_of_column(column)
            max_own = 0
            max_other = 0
            for index, vector in enumerate(vectors):
                result = self.verify_vector(vector[0], vector[1], self.chip)
                max_own = max(max_own, result[0])
                max_other = max(max_other, result[0])
            state.append((max_own, max_other))
        return state
        

# Hay que definir que estrategia va a tener el agente para terminar de definir su estructura, métodos y eso


In [260]:


horizontal_vector, vertical_vector, diagonal_1_vector, diagonal_2_vector = agente.get_vectors_of_column(4)
print(diagonal_2_vector)
#assert agente.get_vectors_of_column(3) == (3,0), "La funcion no esta retornando los valores correctos"

(2, array(['O', 'X', 'A', '-', '-'], dtype='<U1'))


In [76]:
# Crear una matriz de ejemplo
matriz = np.array([[1, 2, 3,  4, 5, 6, 7], 
                   [8, 9, 10,11,12,13,14], 
                   [15,16,17,18,19,20,21],
                   [22,23,24,25,26,27,28],
                   [29,30,31,32,33,34,35],
                   [36,37,38,39,40,41,42]])

# Diagonal principal (k=0)
diagonal_principal = matriz[::-1].diagonal(-1)
print("Diagonal principal:", diagonal_principal)

Diagonal principal: [29 23 17 11  5]


In [32]:
env = [
        ['-','-','-','-','-','-','-'],
        ['-','-','-','-','-','-','-'],
        ['-','-','O','-','-','-','-'],
        ['-','-','O','-','-','-','-'],
        ['O','-','X','X','X','X','O'],
        ['-','-','O','O','X','O','X']
      ]
agente = Agent(env,'X')

vector = ['-','X','X','-','X','O']
assert agente.verify_vector(3,vector,'X') == (3,0), "La funcion no esta retornando los valores correctos"

vector = ['-','O','O','-','X','O']
assert agente.verify_vector(3,vector,'X') == (1,2), "La funcion no esta retornando los valores correctos"

vector = ['-','O','O','-','X','O']
assert agente.verify_vector(0,vector,'X') == (0,2), "La funcion no esta retornando los valores correctos"

vector = ['-','O','O','X','X','-']
assert agente.verify_vector(5,vector,'X') == (2,0), "La funcion no esta retornando los valores correctos"

vector = ['-','-','-','-']
assert agente.verify_vector(2,vector,'X') == (0,0), "La funcion no esta retornando los valores correctos"

In [26]:
# import numpy as np
# import random
# import pickle

# class Connect4Agent:
#     def __init__(self, chip, reward_scheme=(0.0, -1.0, 0.5, 1.0), filename=None):
#         '''
#         chip <string>: Chip to be played by the agent. Must be either "X" or "O".
#         reward_scheme <tuple>: (reward for a non-terminal move, reward for losing, reward for a tied game, reward for winning).
#         filename <string>: Filename to load or save the Q-Table.
#         '''
#         if chip not in ["X", "O"]:
#             raise ValueError("chip must be 'X' or 'O'")
        
#         self.chip = chip
#         self.rewards = reward_scheme
#         self.filename = filename
#         self.q_table = {}  # Q-Table as a dictionary to store state-action values

#         # Load Q-Table if a filename is provided
#         if filename:
#             self.import_Qtable(filename)

#     def get_state_representation(self, board):
#         """
#         Converts the game board into a tuple using booleans and empty spaces.
#         Returns:
#             A tuple representing the board state where:
#             - None: The cell is empty.
#             - True: The cell contains the agent's chip.
#             - False: The cell contains the opponent's chip.
#         """
#         agent_chip = True
#         opponent_chip = False
#         state = []

#         for row in range(6):
#             for col in range(7):
#                 if board[row][col] == self.chip:
#                     state.append(agent_chip)
#                 elif board[row][col] != "-":  # "-" indicates an empty cell
#                     state.append(opponent_chip)
#                 else:
#                     state.append(None)
#         return tuple(state)  # Convert the state to a tuple to use as a key in the Q-Table

#     def get_available_actions(self, board):
#         """
#         Returns a list of available columns where a chip can be placed.
#         """
#         return [col for col in range(7) if board[0][col] == "-"]  # Check the top cell of each column

#     def choose_action(self, board, epsilon=0.1):
#         """
#         Chooses an action using the epsilon-greedy strategy.
#         """
#         state = self.get_state_representation(board)
#         available_actions = self.get_available_actions(board)

#         if random.random() < epsilon:
#             # Explore: choose a random action
#             return random.choice(available_actions)
#         else:
#             # Exploit: choose the action with the highest Q-value
#             q_values = [self.q_table.get((state, action), 0.0) for action in available_actions]
#             max_q_value = max(q_values)
#             best_actions = [action for action, q in zip(available_actions, q_values) if q == max_q_value]
#             return random.choice(best_actions)  # Choose randomly among the best actions

#     def update_q_table(self, board, action, reward, next_board, alpha=0.1, gamma=0.9):
#         """
#         Updates the Q-Table using the Q-Learning formula.
#         """
#         state = self.get_state_representation(board)
#         next_state = self.get_state_representation(next_board)
#         next_available_actions = self.get_available_actions(next_board)

#         # Current Q-value
#         current_q_value = self.q_table.get((state, action), 0.0)

#         # Max Q-value for the next state
#         if next_available_actions:
#             next_q_values = [self.q_table.get((next_state, next_action), 0.0) for next_action in next_available_actions]
#             max_next_q_value = max(next_q_values)
#         else:
#             max_next_q_value = 0.0  # No future actions if the game is over

#         # Q-Learning update
#         new_q_value = current_q_value + alpha * (reward + gamma * max_next_q_value - current_q_value)
#         self.q_table[(state, action)] = new_q_value

#     def import_Qtable(self, filename):
#         """
#         Imports the Q-Table from a file.
#         """
#         try:
#             with open(filename, 'rb') as file:
#                 self.q_table = pickle.load(file)
#         except FileNotFoundError:
#             print("File not found. Starting with an empty Q-Table.")

#     def export_Qtable(self, filename):
#         """
#         Exports the Q-Table to a file.
#         """
#         with open(filename, 'wb') as file:
#             pickle.dump(self.q_table, file)

In [34]:
# # Crear una instancia del agente
# agent = Connect4Agent(chip="X", reward_scheme=(0.0, -1.0, 0.5, 1.0))

# # Ejemplo de un tablero de juego
# board = [
#     ["-", "-", "-", "-", "-", "-", "-"],
#     ["-", "-", "-", "-", "-", "-", "-"],
#     ["-", "-", "-", "-", "X", "-", "-"],
#     ["-", "-", "-", "X", "O", "-", "-"],
#     ["X", "X", "X", "O", "X", "-", "-"],
#     ["O", "X", "O", "X", "O", "-", "-"]
# ]

# # Escoger una acción con un 10% de exploración y 90% de explotación
# epsilon = 0.1  # Tasa de exploración
# action = agent.choose_action(board, epsilon)
# print("Chosen action (column):", action)

Chosen action (column): 5
