# Laboratorio 7

## Integrantes

### Sergio Orellana - 221122

### Andre Marroquin - 22266

### Rodrigo Mansilla - 22611

# Link del repositorio

https://github.com/mar22266/LABORATORIOS-IA.git

# Link del video


# TASK 1

# Task 2

In [1]:
import math
import random
import copy
import numpy as np
import tensorflow as tf
from tensorflow.keras import models, layers, optimizers

# Parámetros del juego y del algoritmo Q-learning
ROW_COUNT = 6
COLUMN_COUNT = 7
EMPTY = 0

# Parámetros del Q-learning
ALPHA = 0.001       # Tasa de aprendizaje para la red (usado en el optimizador)
GAMMA = 0.95        # Factor de descuento
EPSILON = 1.0       # Tasa de exploración inicial
EPSILON_MIN = 0.1   # Valor mínimo de epsilon
EPSILON_DECAY = 0.995  # Factor de decaimiento de epsilon por episodio

# Parámetros del modelo
INPUT_SIZE = ROW_COUNT * COLUMN_COUNT * 3  # 126, por la representación one-hot
OUTPUT_SIZE = COLUMN_COUNT  # 7 acciones (columnas)

# ---------------- Modelo de Red Neuronal ---------------- #
def build_model():
    model = models.Sequential()
    model.add(layers.Dense(128, input_dim=INPUT_SIZE, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(OUTPUT_SIZE, activation='linear'))  # Q-values para cada acción
    model.compile(loss='mse', optimizer=optimizers.Adam(learning_rate=ALPHA))
    return model

# Inicializamos el modelo globalmente
q_model = build_model()

# ---------------- Funciones del juego ---------------- #
def create_board():
    board = [[EMPTY for _ in range(COLUMN_COUNT)] for _ in range(ROW_COUNT)]
    return board

def drop_piece(board, row, col, piece):
    board[row][col] = piece

def is_valid_location(board, col):
    return board[0][col] == EMPTY

def get_next_open_row(board, col):
    for r in range(ROW_COUNT - 1, -1, -1):
        if board[r][col] == EMPTY:
            return r
    return None

def winning_move(board, piece):
    # Horizontal
    for r in range(ROW_COUNT):
        for c in range(COLUMN_COUNT - 3):
            if (board[r][c] == piece and board[r][c+1] == piece and 
                board[r][c+2] == piece and board[r][c+3] == piece):
                return True
    # Vertical
    for c in range(COLUMN_COUNT):
        for r in range(ROW_COUNT - 3):
            if (board[r][c] == piece and board[r+1][c] == piece and 
                board[r+2][c] == piece and board[r+3][c] == piece):
                return True
    # Diagonal positiva
    for r in range(3, ROW_COUNT):
        for c in range(COLUMN_COUNT - 3):
            if (board[r][c] == piece and board[r-1][c+1] == piece and 
                board[r-2][c+2] == piece and board[r-3][c+3] == piece):
                return True
    # Diagonal negativa
    for r in range(ROW_COUNT - 3):
        for c in range(COLUMN_COUNT - 3):
            if (board[r][c] == piece and board[r+1][c+1] == piece and 
                board[r+2][c+2] == piece and board[r+3][c+3] == piece):
                return True
    return False

def get_winning_positions(board, piece):
    for r in range(ROW_COUNT):
        for c in range(COLUMN_COUNT - 3):
            if (board[r][c] == piece and board[r][c+1] == piece and 
                board[r][c+2] == piece and board[r][c+3] == piece):
                return [(r, c + i) for i in range(4)]
    for c in range(COLUMN_COUNT):
        for r in range(ROW_COUNT - 3):
            if (board[r][c] == piece and board[r+1][c] == piece and 
                board[r+2][c] == piece and board[r+3][c] == piece):
                return [(r + i, c) for i in range(4)]
    for r in range(3, ROW_COUNT):
        for c in range(COLUMN_COUNT - 3):
            if (board[r][c] == piece and board[r-1][c+1] == piece and 
                board[r-2][c+2] == piece and board[r-3][c+3] == piece):
                return [(r - i, c + i) for i in range(4)]
    for r in range(ROW_COUNT - 3):
        for c in range(COLUMN_COUNT - 3):
            if (board[r][c] == piece and board[r+1][c+1] == piece and 
                board[r+2][c+2] == piece and board[r+3][c+3] == piece):
                return [(r + i, c + i) for i in range(4)]
    return []

def get_valid_locations(board):
    valid_locations = []
    for col in range(COLUMN_COUNT):
        if is_valid_location(board, col):
            valid_locations.append(col)
    return valid_locations

def is_terminal_node(board):
    return winning_move(board, 1) or winning_move(board, 2) or len(get_valid_locations(board)) == 0

# Representación del estado: one-hot
def get_state_one_hot(board):
    mapping = {0: [1, 0, 0], 1: [0, 1, 0], 2: [0, 0, 1]}
    state = []
    for row in board:
        for cell in row:
            state.extend(mapping[cell])
    return np.array(state)

def print_board(board, winning_positions=[]):
    for r in range(ROW_COUNT):
        row_str = ""
        for c in range(COLUMN_COUNT):
            cell = board[r][c]
            if (r, c) in winning_positions:
                row_str += "\033[91m" + str(cell) + "\033[0m" + " "
            else:
                row_str += str(cell) + " "
        print(row_str)
    print("")  # Salto de línea

# ---------------- Agente TD Learning con Modelo ---------------- #

def choose_action(state, board, epsilon):
    """
    Selecciona una acción usando política ε-greedy.
    Si random < epsilon, elige acción aleatoria de entre las válidas.
    De lo contrario, elige la acción con mayor Q-value entre las válidas.
    """
    valid_actions = get_valid_locations(board)
    if np.random.rand() < epsilon:
        return random.choice(valid_actions)
    # Predecir Q-values con el modelo
    state_input = state.reshape(1, INPUT_SIZE)
    q_values = q_model.predict(state_input, verbose=0)[0]
    # Filtrar Q-values solo para acciones válidas
    q_valid = {action: q_values[action] for action in valid_actions}
    return max(q_valid, key=q_valid.get)

def update_Q(state, action, reward, next_state, done):
    """
    Actualiza el modelo (red neuronal) usando un paso de Q-learning.
    Calcula el target para la acción tomada y realiza una actualización.
    """
    state_input = state.reshape(1, INPUT_SIZE)
    next_state_input = next_state.reshape(1, INPUT_SIZE)
    # Predicción actual de Q(s, :)
    q_values = q_model.predict(state_input, verbose=0)
    # Predicción de Q(s', :)
    q_next = q_model.predict(next_state_input, verbose=0)
    target = q_values.copy()
    if done:
        target[0][action] = reward
    else:
        target[0][action] = reward + GAMMA * np.max(q_next)
    # Entrenamiento de la red para ajustar el Q-value de la acción tomada
    q_model.fit(state_input, target, epochs=1, verbose=0)

# ---------------- Flujo de Juego ---------------- #

# Variable global para acceder al tablero actual en choose_action
current_board = None

def iniciar_juego(mode="human_vs_ai", depth=4, episodes=1):
    global current_board, EPSILON
    board = create_board()
    current_board = board
    game_over = False
    turn = 0
    print_board(board)
    while not game_over:
        if mode == "human_vs_ai":
            if turn == 0:
                # Turno del humano
                valid_cols = get_valid_locations(board)
                col = -1
                while col not in valid_cols:
                    try:
                        col = int(input(f"Col (0-{COLUMN_COUNT-1}): "))
                        if col not in valid_cols:
                            print("No válido, intente otra vez.")
                    except ValueError:
                        print("Entrada inválida.")
                row = get_next_open_row(board, col)
                drop_piece(board, row, col, 1)
                if winning_move(board, 1):
                    winning_pos = get_winning_positions(board, 1)
                    print_board(board, winning_pos)
                    print("¡Ganaste!")
                    game_over = True
                else:
                    print_board(board)
            else:
                # Turno de la IA con TD Learning
                print("IA pensando (TD Learning)...")
                state = get_state_one_hot(board)
                col = choose_action(state, board, EPSILON)
                if col not in get_valid_locations(board):
                    col = random.choice(get_valid_locations(board))
                row = get_next_open_row(board, col)
                drop_piece(board, row, col, 2)
                
                if winning_move(board, 2):
                    reward = 100
                    game_over = True
                elif is_terminal_node(board):
                    reward = 0
                    game_over = True
                else:
                    reward = -1
                
                next_state = get_state_one_hot(board)
                update_Q(state, col, reward, next_state, game_over)
                
                if winning_move(board, 2):
                    winning_pos = get_winning_positions(board, 2)
                    print_board(board, winning_pos)
                    print("IA gana.")
                else:
                    print_board(board)
                
                # Decaimiento de epsilon
                if EPSILON > EPSILON_MIN:
                    EPSILON *= EPSILON_DECAY
                
            turn = (turn + 1) % 2
            current_board = board
        elif mode == "ai_vs_ai":
            # Conservamos el modo IA vs IA con minimax (no modificado aquí)
            if turn == 0:
                print("IA sin poda...")
                use_pruning = False
                col, score = minimax(board, depth, -math.inf, math.inf, True, 1, use_pruning)
                piece = 1
            else:
                print("IA con poda...")
                use_pruning = True
                col, score = minimax(board, depth, -math.inf, math.inf, True, 2, use_pruning)
                piece = 2
            if col is None:
                print("Empate!")
                game_over = True
            else:
                row = get_next_open_row(board, col)
                drop_piece(board, row, col, piece)
                if winning_move(board, piece):
                    winning_pos = get_winning_positions(board, piece)
                    print_board(board, winning_pos)
                    if turn == 0:
                        print("IA sin poda gana!")
                    else:
                        print("IA con poda gana!")
                    game_over = True
                else:
                    print_board(board)
            turn = (turn + 1) % 2
        else:
            print("Modo no válido.")
            break

def ejecutar():
    print("1. Humano vs IA")
    print("2. IA vs IA")
    mode_input = input("Elija 1 o 2: ")
    if mode_input == "1":
        print("Modo: Humano vs IA")
        iniciar_juego(mode="human_vs_ai")
    elif mode_input == "2":
        print("Modo: IA vs IA")
        iniciar_juego(mode="ai_vs_ai")
    else:
        print("Opción no válida.")

ejecutar()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


1. Humano vs IA
2. IA vs IA
Opción no válida.
