In [None]:
import tkinter as tk
from tkinter import messagebox
import random
import time

class Connect4:
    def __init__(self):
        self.window = tk.Tk()
        self.window.title("Connect 4")
        
        # Configuración del tablero de juego
        self.rows = 6
        self.columns = 7
        self.board = [[0 for _ in range(self.columns)] for _ in range(self.rows)]  # 0 para vacío, 1 para agente 1, -1 para agente 2
        self.agent_turn = 1  # 1 para el primer agente, -1 para el segundo agente

        # Crear el área del tablero de juego
        self.canvas = tk.Canvas(self.window, width=self.columns * 100, height=self.rows * 100, bg="#1E90FF", highlightthickness=0)
        self.canvas.grid(row=0, column=0, columnspan=self.columns, padx=5, pady=5)

        # Dibujar los círculos del tablero
        self.circles = [[self.canvas.create_oval(j * 100 + 10, i * 100 + 10, j * 100 + 90, i * 100 + 90, fill="white", outline="black", width=3) for j in range(self.columns)] for i in range(self.rows)]

        # Ajustar tamaño de filas y columnas
        for i in range(self.columns):
            self.window.grid_columnconfigure(i, weight=1)
        self.window.grid_rowconfigure(0, weight=1)

        # Crear y entrenar los agentes
        self.agent1 = Connect4RL()
        self.agent2 = Connect4RL()
        self.train_agents(1000)  # Entrenar a los agentes

    def train_agents(self, episodes):
        for episode in range(episodes):
            self.reset_game()
            done = False
            
            while not done:
                # Decide qué agente es el que juega
                if self.agent_turn == 1:
                    action = self.agent1.choose_action(self.get_state())  # Agente 1 juega
                else:
                    action = self.agent2.choose_action(self.get_state())  # Agente 2 juega

                # Ejecutar la acción del agente
                row = self.make_move(action, self.agent_turn)
                if row is not None:  # Si se hizo un movimiento válido
                    if self.check_winner(row, action):
                        reward = 1 if self.agent_turn == 1 else -1
                        done = True
                    elif self.is_draw():
                        reward = 0
                        done = True
                    else:
                        reward = 0
                        self.agent_turn *= -1  # Cambiar turno

                    # Actualizar tabla Q del agente que acaba de jugar
                    if self.agent_turn == 1:
                        self.agent1.update_q_value(self.get_state(), action, reward, self.get_state())
                    else:
                        self.agent2.update_q_value(self.get_state(), action, reward, self.get_state())
                
                # Actualizar la interfaz gráfica
                self.window.update()  # Actualiza la interfaz
                time.sleep(0.5)  # Espera 0.5 segundos entre movimientos

    def make_move(self, column, player):
        # Coloca una ficha en el tablero
        for row in range(self.rows - 1, -1, -1):
            if self.board[row][column] == 0:
                self.board[row][column] = player
                self.canvas.itemconfig(self.circles[row][column], fill="red" if player == 1 else "yellow")
                return row  # Retorna la fila donde se colocó la ficha
        return None  # Columna llena

    def get_state(self):
        # Retorna la representación del estado del tablero
        return tuple(tuple(row) for row in self.board)

    def check_winner(self, row, column):
        # Verificar si hay un ganador
        directions = [(1, 0), (0, 1), (1, 1), (1, -1)]
        for dr, dc in directions:
            count = 1
            count += self.count_direction(row, column, dr, dc)
            count += self.count_direction(row, column, -dr, -dc)
            if count >= 4:
                return True
        return False

    def count_direction(self, row, column, dr, dc):
        count = 0
        current_row, current_column = row + dr, column + dc
        while 0 <= current_row < self.rows and 0 <= current_column < self.columns and self.board[current_row][current_column] == self.agent_turn:
            count += 1
            current_row += dr
            current_column += dc
        return count

    def is_draw(self):
        # Verificar si el juego es un empate
        return all(self.board[0][col] != 0 for col in range(self.columns))

    def reset_game(self):
        # Reiniciar el tablero de juego
        self.board = [[0 for _ in range(self.columns)] for _ in range(self.rows)]
        self.agent_turn = 1  # Restablecer el turno al agente 1
        for i in range(self.rows):
            for j in range(self.columns):
                self.canvas.itemconfig(self.circles[i][j], fill="white")

    def run(self):
        # Iniciar la interfaz gráfica
        self.window.mainloop()


class Connect4RL:
    def __init__(self):
        self.q_table = {}  # Tabla Q para almacenar valores
        self.learning_rate = 0.1  # Tasa de aprendizaje
        self.discount_factor = 0.95  # Factor de descuento
        self.exploration_rate = 1.0  # Tasa de exploración inicial
        self.exploration_decay = 0.99  # Decaimiento de la exploración
        self.min_exploration_rate = 0.01  # Tasa de exploración mínima

    def choose_action(self, state):
        # Elegir acción basada en la política epsilon-greedy
        if random.random() < self.exploration_rate:
            return random.choice(self.get_valid_actions(state))  # Exploración
        else:
            return self.get_best_action(state)  # Explotación

    def get_valid_actions(self, state):
        # Retornar acciones válidas (columnas disponibles)
        return [c for c in range(7) if state[0][c] == 0]

    def get_best_action(self, state):
        # Obtener la mejor acción según la tabla Q
        valid_actions = self.get_valid_actions(state)
        if not valid_actions:
            return None  # No hay acciones válidas
        return max(valid_actions, key=lambda action: self.q_table.get((state, action), 0))

    def update_q_value(self, state, action, reward, next_state):
        # Actualizar la tabla Q
        current_q = self.q_table.get((state, action), 0)
        valid_actions = self.get_valid_actions(next_state)
        max_future_q = max((self.q_table.get((next_state, a), 0) for a in valid_actions), default=0)
        new_q = (1 - self.learning_rate) * current_q + self.learning_rate * (reward + self.discount_factor * max_future_q)
        self.q_table[(state, action)] = new_q

        # Decaimiento de la tasa de exploración
        if self.exploration_rate > self.min_exploration_rate:
            self.exploration_rate *= self.exploration_decay


if __name__ == "__main__":
    game = Connect4()  # Inicializa el juego
    game.run()  # Ejecuta la ventana del juego


TclError: invalid command name ".!canvas"

: 