<a href="https://colab.research.google.com/github/rafaelsaraviagrass/SIS420/blob/main/2doParcial_AprendizajePorRefuerzo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
# ==========================
# 1. LIBRERÍAS
# ==========================
import numpy as np
import random
import matplotlib.pyplot as plt

In [15]:
# ==========================
# 2. CLASE DEL JUEGO
# ==========================
class FourInARow:
    def __init__(self, rows=6, cols=7):
        self.rows = rows
        self.cols = cols
        self.board = np.zeros((rows, cols), dtype=int)
        self.player_turn = 1

    def reset_board(self):
        self.board = np.zeros((self.rows, self.cols), dtype=int)
        self.player_turn = 1
        return self.board.copy()

    def legal_moves(self):
        return [i for i in range(self.cols) if self.board[0, i] == 0]

    def place_piece(self, col):
        if col not in self.legal_moves():
            return self.board.copy(), -5, True  # penalización por jugar ilegal

        for row in reversed(range(self.rows)):
            if self.board[row, col] == 0:
                self.board[row, col] = self.player_turn
                break

        reward = 1 if self.has_winner(self.player_turn) else 0
        finished = reward == 1 or len(self.legal_moves()) == 0
        self.player_turn = 2 if self.player_turn == 1 else 1
        return self.board.copy(), reward, finished

    def has_winner(self, player):
        b = self.board
        for r in range(self.rows):
            for c in range(self.cols - 3):
                if all(b[r, c+i] == player for i in range(4)):
                    return True
        for r in range(self.rows - 3):
            for c in range(self.cols):
                if all(b[r+i, c] == player for i in range(4)):
                    return True
        for r in range(self.rows - 3):
            for c in range(self.cols - 3):
                if all(b[r+i, c+i] == player for i in range(4)):
                    return True
        for r in range(3, self.rows):
            for c in range(self.cols - 3):
                if all(b[r-i, c+i] == player for i in range(4)):
                    return True
        return False

In [16]:
# ==========================
# 3. AGENTE Q-Learning
# ==========================
class LearnerAgent:
    def __init__(self, alpha=0.1, gamma=0.99, epsilon=1.0, decay=0.9993, eps_min=0.02):
        self.q = dict()
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.decay = decay
        self.eps_min = eps_min

    def state_key(self, board):
        return tuple(board.flatten())

    def policy(self, board, options):
        if np.random.rand() < self.epsilon:
            return random.choice(options)
        scores = self.q.get(self.state_key(board), np.zeros(7))
        return max(options, key=lambda a: scores[a])

    def update_q(self, state, act, reward, next_state, done, options):
        state_id = self.state_key(state)
        next_id = self.state_key(next_state)
        if state_id not in self.q:
            self.q[state_id] = np.zeros(7)
        if next_id not in self.q:
            self.q[next_id] = np.zeros(7)

        q_now = self.q[state_id][act]
        q_next = 0 if done else max(self.q[next_id][a] for a in options)
        self.q[state_id][act] += self.alpha * (reward + self.gamma * q_next - q_now)

        self.epsilon = max(self.eps_min, self.epsilon * self.decay)

In [17]:
# ==========================
# 4. ENTRENAMIENTO
# ==========================
game = FourInARow()
agent = LearnerAgent()
n_games = 12000

for ep in range(n_games):
    obs = game.reset_board()
    done = False

    while not done:
        available = game.legal_moves()
        move = agent.policy(obs, available)
        next_obs, reward, done = game.place_piece(move)
        agent.update_q(obs, move, reward, next_obs, done, game.legal_moves())
        obs = next_obs

    if ep % 1500 == 0:
        print(f"🔁 Juego {ep} - Epsilon: {agent.epsilon:.3f}")

print("✅ Entrenamiento finalizado.")

🔁 Juego 0 - Epsilon: 0.982
🔁 Juego 1500 - Epsilon: 0.020
🔁 Juego 3000 - Epsilon: 0.020
🔁 Juego 4500 - Epsilon: 0.020
🔁 Juego 6000 - Epsilon: 0.020
🔁 Juego 7500 - Epsilon: 0.020
🔁 Juego 9000 - Epsilon: 0.020
🔁 Juego 10500 - Epsilon: 0.020
✅ Entrenamiento finalizado.


In [18]:
# ==========================
# 5. JUGAR MANUALMENTE
# ==========================
def jugar_vs_agente(agent):
    juego = FourInARow()
    estado = juego.reset_board()
    terminado = False

    while not terminado:
        print(estado)
        if juego.player_turn == 1:
            try:
                eleccion = int(input("🧍 Tu turno (0-6): "))
            except:
                print("❌ Entrada inválida")
                continue
        else:
            eleccion = agent.policy(estado, juego.legal_moves())
            print(f"🤖 Agente elige: {eleccion}")

        if eleccion not in juego.legal_moves():
            print("⚠️ Movimiento no válido")
            continue

        estado, r, terminado = juego.place_piece(eleccion)

    print("🎯 Resultado Final:")
    print(estado)
    if r == 1 and juego.player_turn == 2:
        print("🥳 ¡Ganaste!")
    elif r == 1:
        print("🤖 El agente ganó.")
    else:
        print("🤝 Empate.")

# Descomenta para jugar
# jugar_vs_agente(agent)