**Juego de Cuatro en Raya**
Nombre: Samantha Mikaela Hinojosa Yucra   CU: 35-5346

**Reglas Basicas del Juego: **
- Dos jugadores tiran fichas en una cuadrícula de 6 filas × 7 columnas.

- Las fichas caen al fondo de la columna.

- El primero en conectar cuatro fichas en línea (horizontal, vertical o diagonal) gana.

Estrategia de aprendizaje:
Usamos Q-learning, donde el agente guarda una tabla Q(s, a) que le dice qué tan buena es cada acción a en un estado s.

In [1]:
import numpy as np
import random

# Configuración del tablero
ROWS = 6
COLUMNS = 7
WIN_COUNT = 4


# ENTORNO DE CUATRO EN RAYA

class ConnectFour:
    def __init__(self):
        self.reset()

    def reset(self):
        self.board = np.zeros((ROWS, COLUMNS), dtype=int)
        self.current_player = 1
        return self.get_state()

    def get_state(self):
        return self.board.copy()

    def available_actions(self):
        return [c for c in range(COLUMNS) if self.board[0, c] == 0]

    def step(self, action):
        if action not in self.available_actions():
            return self.get_state(), -10, True  # Penalización por acción inválida

        # Coloca ficha en la columna
        for r in reversed(range(ROWS)):
            if self.board[r, action] == 0:
                self.board[r, action] = self.current_player
                break

        done = self.check_winner(self.current_player)
        reward = 1 if done else 0

        if not done and not self.available_actions():
            done = True  # empate
            reward = 0.5

        self.current_player = 3 - self.current_player  # alternar entre 1 y 2
        return self.get_state(), reward, done

    def check_winner(self, player):
        # Chequea en todas las direcciones
        for r in range(ROWS):
            for c in range(COLUMNS - WIN_COUNT + 1):
                if np.all(self.board[r, c:c + WIN_COUNT] == player):
                    return True

        for r in range(ROWS - WIN_COUNT + 1):
            for c in range(COLUMNS):
                if np.all(self.board[r:r + WIN_COUNT, c] == player):
                    return True

        for r in range(ROWS - WIN_COUNT + 1):
            for c in range(COLUMNS - WIN_COUNT + 1):
                if all(self.board[r + i, c + i] == player for i in range(WIN_COUNT)):
                    return True

        for r in range(WIN_COUNT - 1, ROWS):
            for c in range(COLUMNS - WIN_COUNT + 1):
                if all(self.board[r - i, c + i] == player for i in range(WIN_COUNT)):
                    return True

        return False

In [2]:
# AGENTE Q-LEARNING
class QAgent:
    def __init__(self, alpha=0.1, gamma=0.95, epsilon=1.0, epsilon_decay=0.9995, epsilon_min=0.01):
        self.q_table = {}
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min

    def get_qs(self, state):
        key = self._hash_state(state)
        if key not in self.q_table:
            self.q_table[key] = np.zeros(COLUMNS)
        return self.q_table[key]

    def choose_action(self, state, available_actions):
        if np.random.rand() < self.epsilon:
            return random.choice(available_actions)
        qs = self.get_qs(state)
        return max(available_actions, key=lambda a: qs[a])

    def learn(self, old_state, action, reward, new_state, done, available_actions):
        old_q = self.get_qs(old_state)[action]
        future_q = 0 if done else max(self.get_qs(new_state)[a] for a in available_actions)
        new_q = old_q + self.alpha * (reward + self.gamma * future_q - old_q)
        self.q_table[self._hash_state(old_state)][action] = new_q

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def _hash_state(self, state):
        return tuple(state.flatten())



In [3]:
# ENTRENAMIENTO
env = ConnectFour()
agent = QAgent()

episodes = 10000

for episode in range(episodes):
    state = env.reset()
    done = False

    while not done:
        available = env.available_actions()
        action = agent.choose_action(state, available)
        next_state, reward, done = env.step(action)
        next_available = env.available_actions()
        agent.learn(state, action, reward, next_state, done, next_available)
        state = next_state

    if episode % 1000 == 0:
        print(f"📘 Episodio {episode} - Epsilon: {agent.epsilon:.4f}")

print("Entrenamiento completo!")

📘 Episodio 0 - Epsilon: 0.9905
📘 Episodio 1000 - Epsilon: 0.0100
📘 Episodio 2000 - Epsilon: 0.0100
📘 Episodio 3000 - Epsilon: 0.0100
📘 Episodio 4000 - Epsilon: 0.0100
📘 Episodio 5000 - Epsilon: 0.0100
📘 Episodio 6000 - Epsilon: 0.0100
📘 Episodio 7000 - Epsilon: 0.0100
📘 Episodio 8000 - Epsilon: 0.0100
📘 Episodio 9000 - Epsilon: 0.0100
🎉 ¡Entrenamiento completo!


In [None]:
# 5. JUGAR CONTRA EL AGENTE

def play_against_agent(agent):
    env = ConnectFour()
    state = env.reset()
    done = False

    while not done:
        print(env.board)
        if env.current_player == 1:
            try:
                action = int(input("Tu jugada (0-6): "))
            except:
                print("Entrada inválida.")
                continue
        else:
            action = agent.choose_action(state, env.available_actions())
            print(f"🤖 Agente juega en columna: {action}")

        if action not in env.available_actions():
            print("⚠️ Acción inválida. Intenta de nuevo.")
            continue

        state, reward, done = env.step(action)

    print("🎮 Juego terminado")
    print(env.board)
    if reward == 1 and env.current_player == 2:
        print("🏆 ¡Ganaste!")
    elif reward == 1 and env.current_player == 1:
        print(" Perdiste. Ganó el agente.")
    else:
        print(" Empate.")

# Para jugar, descomenta esto:
play_against_agent(agent)

[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]]
Tu jugada (0-6): 1
[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0]]
🤖 Agente juega en columna: 0
[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [2 1 0 0 0 0 0]]
Tu jugada (0-6): 3
[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [2 1 0 1 0 0 0]]
🤖 Agente juega en columna: 0
[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [2 0 0 0 0 0 0]
 [2 1 0 1 0 0 0]]
Tu jugada (0-6): 4
[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [2 0 0 0 0 0 0]
 [2 1 0 1 1 0 0]]
🤖 Agente juega en columna: 0
[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [2 0 0 0 0 0 0]
 [2 0 0 0 0 0 0]
 [2 1 0 1 1 0 0]]
