# Imports y variables

In [1]:
import os
import sys
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)

In [2]:
from games.nocca_nocca.nocca_nocca import NoccaNocca
from agents.agent_random import RandomAgent
from agents.minimax import MiniMax
from agents.mcts_t import MonteCarloTreeSearch
import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns
import time
import pandas as pd
from datetime import datetime
import csv
import re


# HyperParameters

## Setup


In [10]:
# --- Hiperparámetros a explorar (ajusta los rangos según necesites) ---
mcts_simulations_options = [30, 50, 70] 
mcts_rollouts_options = [2, 3] 
mcts_depth_options = [2, 3] 

minimax_depth_options = [1, 2] # Profundidad 9 ya es el juego completo [2, 4, 6, 8]

# Número de juegos por cada combinación
# de hiperparámetros y enfrentamiento
N_GAMES_PER_CONFIG = 10

# --- Definir los dos 'WEIGHT_TIME' ---
WEIGHT_TIME_WIN_PRIORITY = 0.00001
WEIGHT_TIME_SPEED_PRIORITY = 0.01

weight_time_configs = {
    "Priorizando Victoria": WEIGHT_TIME_WIN_PRIORITY,
    "Priorizando Velocidad": WEIGHT_TIME_SPEED_PRIORITY
}

# --- Estructuras para almacenar los resultados detallados ---
# all_results se inicializa globalmente y se llenará en las celdas de ejecución
all_results = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
# detailed_csv_rows también se inicializa globalmente
detailed_csv_rows = []

# --- Configuración para guardar los resultados ---
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
main_results_dir = f"grid_search_results_nocca_{timestamp}"
os.makedirs(main_results_dir, exist_ok=True)
print(f"Creando directorio principal de resultados: {main_results_dir}")

# Archivo de resumen general y CSV detallado (se crearán al final)
summary_log_file = os.path.join(main_results_dir, "summary_log.txt")
summary_csv_file = os.path.join(main_results_dir, "detailed_results.csv")

# Instancia de tu juego (se puede re-inicializar si es necesario, pero una global es suficiente)
game = NoccaNocca(max_steps=30, seed=42)

print("--- Configuración inicial completada ---")

# Helper para crear un nombre de carpeta amigable
def get_folder_name(agent1_type, agent2_type):
    name1 = agent1_type.split('(')[0]
    name2 = agent2_type.split('(')[0]
    return f"{name1}_vs_{name2}"

# Funciones de extracción de parámetros (centralizadas aquí)
def extract_mcts_params(config_str):
    match = re.search(r'MCTS\(S=(\d+),R=(\d+),D=(\d+)\)', config_str)
    if match:
        return int(match.group(1)), int(match.group(2)), int(match.group(3))
    return np.nan, np.nan, np.nan

def extract_minimax_params(config_str):
    match = re.search(r'MiniMax\(D=(\d+)\)', config_str)
    if match:
        return int(match.group(1))
    return np.nan

# Función para ejecutar una combinación (reutilizable)
def run_agent_combination(config_name, agent1_factory, agent2_factory, subfolder_name):
    global all_results, detailed_csv_rows # Para modificar las variables globales

    # --- LÓGICA DE CREACIÓN DE SUBCARPETAS ELIMINADA AQUÍ ---
    # Las siguientes dos líneas han sido removidas.
    # full_subfolder_path = os.path.join(main_results_dir, subfolder_name)
    # os.makedirs(full_subfolder_path, exist_ok=True)
    # --------------------------------------------------------

    # NO generamos un archivo .txt por cada configuración aquí, solo en el resumen final.
    # log_filename = config_name.replace(' vs ', '_vs_').replace('(', '').replace(')', '').replace(',', '_').replace('=', '')
    # config_log_path = os.path.join(full_subfolder_path, f"{log_filename}.txt") # Esta línea también es irrelevante ahora.

    print(f"\n--- Probando: {config_name} ({N_GAMES_PER_CONFIG} juegos) ---")

    # Inicializar contadores para esta configuración
    all_results[config_name][game.agents[0]] = {'rewards': [], 'times': [], 'wins': [], 'draws': [], 'losses': []}
    all_results[config_name][game.agents[1]] = {'rewards': [], 'times': [], 'wins': [], 'draws': [], 'losses': []}

    for i in range(N_GAMES_PER_CONFIG):
        if (i + 1) % 10 == 0:
            print(f"   Juego {i+1}/{N_GAMES_PER_CONFIG}")

        players = {
            game.agents[0]: agent1_factory(),
            game.agents[1]: agent2_factory()
        }

        game.reset()
        game_times = {agent: [] for agent in game.agents}

        while not game.terminated():
            current_agent = game.agent_selection

            start_time = time.time()
            action = players[current_agent].action()
            end_time = time.time()

            game_times[current_agent].append(end_time - start_time)
            game.step(action)

        for agent_name in game.agents:
            reward = game.reward(agent_name)
            all_results[config_name][agent_name]['rewards'].append(reward)

            if reward > 0:
                all_results[config_name][agent_name]['wins'].append(1)
                all_results[config_name][agent_name]['draws'].append(0)
                all_results[config_name][agent_name]['losses'].append(0)
            elif reward == 0:
                all_results[config_name][agent_name]['wins'].append(0)
                all_results[config_name][agent_name]['draws'].append(1)
                all_results[config_name][agent_name]['losses'].append(0)
            else:
                all_results[config_name][agent_name]['wins'].append(0)
                all_results[config_name][agent_name]['draws'].append(0)
                all_results[config_name][agent_name]['losses'].append(1)

            avg_game_time = np.mean(game_times[agent_name]) if game_times[agent_name] else 0
            all_results[config_name][agent_name]['times'].append(avg_game_time)

    # Imprimir resumen de la configuración actual
    summary_line = f"\nResultados Agregados para {config_name}:\n"
    print(summary_line)

    for agent_role, agent_data in all_results[config_name].items():
        avg_reward = np.mean(agent_data['rewards'])
        total_wins = sum(agent_data['wins'])
        total_draws = sum(agent_data['draws'])
        total_losses = sum(agent_data['losses'])
        avg_time_ms = np.mean(agent_data['times']) * 1000

        agent_summary = f"   Agente {agent_role}: Recompensa Promedio={avg_reward:.3f}, Victorias={total_wins}, Empates={total_draws}, Derrotas={total_losses}, Tiempo Promedio/Acción={avg_time_ms:.2f}ms\n"
        print(agent_summary)

        detailed_csv_rows.append({
            'Configuration': config_name,
            'Agent Role': agent_role,
            'Average Reward': avg_reward,
            'Total Wins': total_wins,
            'Total Draws': total_draws,
            'Total Losses': total_losses,
            'Average Time (ms)': avg_time_ms
        })

Creando directorio principal de resultados: grid_search_results_nocca_20250625_170550
--- Configuración inicial completada ---


In [11]:
from itertools import product

def run_grid_search_vs(
    name_a: str,
    factory_a,
    params_a: dict,
    name_b: str,
    factory_b,
    params_b: dict,
    avoid_duplicates: bool = False
):
    keys_a = list(params_a.keys())
    keys_b = list(params_b.keys())

    values_a = list(params_a.values())
    values_b = list(params_b.values())

    for va in product(*values_a):
        for vb in product(*values_b):
            # Para evitar duplicados (solo en casos como MCTS vs MCTS)
            if avoid_duplicates and va > vb:
                continue

            args_a = dict(zip(keys_a, va))
            args_b = dict(zip(keys_b, vb))

            def build_args_str(name, args):
                if args:
                    params_str = ",".join(f"{k[0].upper()}={v}" for k, v in args.items())
                    return f"{name}({params_str})"
                else:
                    return name

            config_name = f"{build_args_str(name_a, args_a)} vs {build_args_str(name_b, args_b)}"

            run_agent_combination(
                config_name,
                lambda g=game, args=args_a: factory_a(g, args),
                lambda g=game, args=args_b: factory_b(g, args),
                get_folder_name(name_a, name_b)
            )


## MCTS vs Random

In [12]:
run_grid_search_vs(
    "MCTS",
    lambda g, args: MonteCarloTreeSearch(game=g, agent=g.agents[0], **args),
    {"simulations": mcts_simulations_options, "rollouts": mcts_rollouts_options, "depth": mcts_depth_options},
    "Random",
    lambda g, args: RandomAgent(game=g, agent=g.agents[1]),
    {}
)
run_grid_search_vs(
    "Random",
    lambda g, args: RandomAgent(game=g, agent=g.agents[0]),
    {},
    "MCTS",
    lambda g, args: MonteCarloTreeSearch(game=g, agent=g.agents[1], **args),
    {"simulations": mcts_simulations_options, "rollouts": mcts_rollouts_options, "depth": mcts_depth_options}
)



--- Probando: MCTS(S=30,R=2,D=2) vs Random (10 juegos) ---
   Juego 10/10

Resultados Agregados para MCTS(S=30,R=2,D=2) vs Random:

   Agente Black: Recompensa Promedio=0.000, Victorias=0, Empates=10, Derrotas=0, Tiempo Promedio/Acción=142.08ms

   Agente White: Recompensa Promedio=0.000, Victorias=0, Empates=10, Derrotas=0, Tiempo Promedio/Acción=0.72ms


--- Probando: MCTS(S=30,R=2,D=3) vs Random (10 juegos) ---
   Juego 10/10

Resultados Agregados para MCTS(S=30,R=2,D=3) vs Random:

   Agente Black: Recompensa Promedio=0.000, Victorias=0, Empates=10, Derrotas=0, Tiempo Promedio/Acción=162.45ms

   Agente White: Recompensa Promedio=0.000, Victorias=0, Empates=10, Derrotas=0, Tiempo Promedio/Acción=0.57ms


--- Probando: MCTS(S=30,R=3,D=2) vs Random (10 juegos) ---
   Juego 10/10

Resultados Agregados para MCTS(S=30,R=3,D=2) vs Random:

   Agente Black: Recompensa Promedio=0.000, Victorias=0, Empates=10, Derrotas=0, Tiempo Promedio/Acción=171.63ms

   Agente White: Recompensa Promedi

## Minimax vs Random (y viceversa)

In [13]:
run_grid_search_vs(
    "MiniMax",
    lambda g, args: MiniMax(game=g, agent=g.agents[0], depth=args["depth"], seed=1),
    {"depth": minimax_depth_options},
    "Random",
    lambda g, args: RandomAgent(game=g, agent=g.agents[1]),
    {}
)
run_grid_search_vs(
    "Random",
    lambda g, args: RandomAgent(game=g, agent=g.agents[0]),
    {},
    "MiniMax",
    lambda g, args: MiniMax(game=g, agent=g.agents[1], depth=args["depth"], seed=1),
    {"depth": minimax_depth_options}
)



--- Probando: MiniMax(D=1) vs Random (10 juegos) ---
   Juego 10/10

Resultados Agregados para MiniMax(D=1) vs Random:

   Agente Black: Recompensa Promedio=1.000, Victorias=10, Empates=0, Derrotas=0, Tiempo Promedio/Acción=11.56ms

   Agente White: Recompensa Promedio=-1.000, Victorias=0, Empates=0, Derrotas=10, Tiempo Promedio/Acción=0.63ms


--- Probando: MiniMax(D=2) vs Random (10 juegos) ---
   Juego 10/10

Resultados Agregados para MiniMax(D=2) vs Random:

   Agente Black: Recompensa Promedio=0.000, Victorias=0, Empates=10, Derrotas=0, Tiempo Promedio/Acción=242.99ms

   Agente White: Recompensa Promedio=0.000, Victorias=0, Empates=10, Derrotas=0, Tiempo Promedio/Acción=0.66ms


--- Probando: Random vs MiniMax(D=1) (10 juegos) ---
   Juego 10/10

Resultados Agregados para Random vs MiniMax(D=1):

   Agente Black: Recompensa Promedio=0.000, Victorias=0, Empates=10, Derrotas=0, Tiempo Promedio/Acción=0.66ms

   Agente White: Recompensa Promedio=0.000, Victorias=0, Empates=10, Derr

## MCTS vs Minimax (y viceversa)

In [15]:
run_grid_search_vs(
    "MCTS",
    lambda g, args: MonteCarloTreeSearch(game=g, agent=g.agents[0], **args),
    {"simulations": mcts_simulations_options, "rollouts": mcts_rollouts_options, "depth": mcts_depth_options},
    "MiniMax",
    lambda g, args: MiniMax(game=g, agent=g.agents[1], depth=args["depth"]),
    {"depth": minimax_depth_options}
)

run_grid_search_vs(
    "MiniMax",
    lambda g, args: MiniMax(game=g, agent=g.agents[0], depth=args["depth"]),
    {"depth": minimax_depth_options},
    "MCTS",
    lambda g, args: MonteCarloTreeSearch(game=g, agent=g.agents[1], **args),
    {"simulations": mcts_simulations_options, "rollouts": mcts_rollouts_options, "depth": mcts_depth_options}
)



--- Probando: MCTS(S=30,R=2,D=2) vs MiniMax(D=1) (10 juegos) ---
   Juego 10/10

Resultados Agregados para MCTS(S=30,R=2,D=2) vs MiniMax(D=1):

   Agente Black: Recompensa Promedio=0.000, Victorias=0, Empates=10, Derrotas=0, Tiempo Promedio/Acción=150.44ms

   Agente White: Recompensa Promedio=0.000, Victorias=0, Empates=10, Derrotas=0, Tiempo Promedio/Acción=10.74ms


--- Probando: MCTS(S=30,R=2,D=2) vs MiniMax(D=2) (10 juegos) ---
   Juego 10/10

Resultados Agregados para MCTS(S=30,R=2,D=2) vs MiniMax(D=2):

   Agente Black: Recompensa Promedio=0.000, Victorias=0, Empates=10, Derrotas=0, Tiempo Promedio/Acción=137.33ms

   Agente White: Recompensa Promedio=0.000, Victorias=0, Empates=10, Derrotas=0, Tiempo Promedio/Acción=239.40ms


--- Probando: MCTS(S=30,R=2,D=3) vs MiniMax(D=1) (10 juegos) ---
   Juego 10/10

Resultados Agregados para MCTS(S=30,R=2,D=3) vs MiniMax(D=1):

   Agente Black: Recompensa Promedio=-0.100, Victorias=0, Empates=9, Derrotas=1, Tiempo Promedio/Acción=174.46

## MCTS vs MCTS

In [16]:
run_grid_search_vs(
    "MCTS",
    lambda g, args: MonteCarloTreeSearch(game=g, agent=g.agents[0], **args),
    {"simulations": mcts_simulations_options, "rollouts": mcts_rollouts_options, "depth": mcts_depth_options},
    "MCTS",
    lambda g, args: MonteCarloTreeSearch(game=g, agent=g.agents[1], **args),
    {"simulations": mcts_simulations_options, "rollouts": mcts_rollouts_options, "depth": mcts_depth_options},
    avoid_duplicates=True
)



--- Probando: MCTS(S=30,R=2,D=2) vs MCTS(S=30,R=2,D=2) (10 juegos) ---
   Juego 10/10

Resultados Agregados para MCTS(S=30,R=2,D=2) vs MCTS(S=30,R=2,D=2):

   Agente Black: Recompensa Promedio=-1.000, Victorias=0, Empates=0, Derrotas=10, Tiempo Promedio/Acción=146.12ms

   Agente White: Recompensa Promedio=1.000, Victorias=10, Empates=0, Derrotas=0, Tiempo Promedio/Acción=145.48ms


--- Probando: MCTS(S=30,R=2,D=2) vs MCTS(S=30,R=2,D=3) (10 juegos) ---
   Juego 10/10

Resultados Agregados para MCTS(S=30,R=2,D=2) vs MCTS(S=30,R=2,D=3):

   Agente Black: Recompensa Promedio=-1.000, Victorias=0, Empates=0, Derrotas=10, Tiempo Promedio/Acción=153.34ms

   Agente White: Recompensa Promedio=1.000, Victorias=10, Empates=0, Derrotas=0, Tiempo Promedio/Acción=184.94ms


--- Probando: MCTS(S=30,R=2,D=2) vs MCTS(S=30,R=3,D=2) (10 juegos) ---
   Juego 10/10

Resultados Agregados para MCTS(S=30,R=2,D=2) vs MCTS(S=30,R=3,D=2):

   Agente Black: Recompensa Promedio=-1.000, Victorias=0, Empates=0, De

## Guardar Resultados

In [17]:
import os
import csv
import numpy as np

def ensure_dir_for_file(filepath: str):
    folder = os.path.dirname(filepath)
    if folder and not os.path.exists(folder):
        os.makedirs(folder)

# Asegurarse que las carpetas existen antes de guardar los archivos
ensure_dir_for_file(summary_csv_file)
ensure_dir_for_file(summary_log_file)

# Guardar CSV detallado
print("\n--- Guardando resultados detallados en CSV ---")
csv_headers = ['Configuration', 'Agent Role', 'Average Reward', 'Total Wins', 'Total Draws', 'Total Losses', 'Average Time (ms)']

with open(summary_csv_file, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=csv_headers)
    writer.writeheader()
    writer.writerows(detailed_csv_rows)
print(f"Archivo CSV guardado en: {summary_csv_file}")

# Guardar resumen en log
print("\n--- Guardando resumen en log ---")
with open(summary_log_file, 'a', encoding='utf-8') as log_file:
    for priority_name, current_weight_time in weight_time_configs.items():
        log_lines = []
        header_line = f"\n\n--- Análisis Final: {priority_name} (WEIGHT_TIME = {current_weight_time}) ---\n"
        print(header_line)
        log_lines.append(header_line)

        # Inicializar diccionarios para mejores configuraciones
        best_configs = {
            "MCTS_vs_Random": {"score": -np.inf, "config": None},
            "Random_vs_MCTS": {"score": -np.inf, "config": None},
            "MiniMax_vs_Random": {"score": -np.inf, "config": None},
            "Random_vs_MiniMax": {"score": -np.inf, "config": None},
            "MCTS_vs_MiniMax": {"score": -np.inf, "config": None},
            "MiniMax_vs_MCTS": {"score": -np.inf, "config": None},
            "MCTS_vs_MCTS": {"score": -np.inf, "config": None},
        }

        for config_name, agent_results in all_results.items():
            # Obtener tipo de agentes
            agent1_raw, agent2_raw = config_name.split(" vs ")
            agent1_type = agent1_raw.split('(')[0]
            agent2_type = agent2_raw.split('(')[0]

            # Determinar agente principal
            main_agent_role = None
            if agent1_type in ["MCTS", "MiniMax"]:
                main_agent_role = game.agents[0]
            elif agent2_type in ["MCTS", "MiniMax"]:
                main_agent_role = game.agents[1]

            if not main_agent_role or main_agent_role not in agent_results:
                continue

            # Calcular métricas
            avg_reward = np.mean(agent_results[main_agent_role]['rewards'])
            avg_time_ms = np.mean(agent_results[main_agent_role]['times']) * 1000
            total_wins = sum(agent_results[main_agent_role]['wins'])
            total_draws = sum(agent_results[main_agent_role]['draws'])
            total_losses = sum(agent_results[main_agent_role]['losses'])

            combined_score = avg_reward - (current_weight_time * avg_time_ms)

            # Actualizar mejores configuraciones
            key = None
            if agent1_type == "MCTS" and agent2_type == "Random":
                key = "MCTS_vs_Random"
            elif agent1_type == "Random" and agent2_type == "MCTS":
                key = "Random_vs_MCTS"
            elif agent1_type == "MiniMax" and agent2_type == "Random":
                key = "MiniMax_vs_Random"
            elif agent1_type == "Random" and agent2_type == "MiniMax":
                key = "Random_vs_MiniMax"
            elif agent1_type == "MCTS" and agent2_type == "MiniMax":
                key = "MCTS_vs_MiniMax"
            elif agent1_type == "MiniMax" and agent2_type == "MCTS":
                key = "MiniMax_vs_MCTS"
            elif agent1_type == "MCTS" and agent2_type == "MCTS":
                key = "MCTS_vs_MCTS"

            if key is None:
                continue

            if combined_score > best_configs[key]["score"]:
                best_configs[key] = {
                    "score": combined_score,
                    "config": config_name,
                    "avg_time_ms": avg_time_ms,
                    "wins": total_wins,
                    "draws": total_draws,
                    "losses": total_losses,
                    "agent_role": main_agent_role
                }

        # Función para imprimir y escribir en log la info de la mejor config
        def print_and_log_best(key, title):
            bc = best_configs[key]
            if bc["config"] is None:
                line = f"\n**{title}:** No se encontraron configuraciones para esta categoría.\n"
                print(line)
                log_lines.append(line)
                return
            
            win_rate = bc["wins"] / N_GAMES_PER_CONFIG
            draw_rate = bc["draws"] / N_GAMES_PER_CONFIG
            loss_rate = bc["losses"] / N_GAMES_PER_CONFIG
            
            line = (
                f"\n**{title}:**\n"
                f"  Mejor Configuración: {bc['config']}\n"
                f"  Score Combinado: {bc['score']:.3f}\n"
                f"  Victorias: {bc['wins']} ({win_rate:.2%})\n"
                f"  Empates: {bc['draws']} ({draw_rate:.2%})\n"
                f"  Derrotas: {bc['losses']} ({loss_rate:.2%})\n"
                f"  Tiempo Promedio/Acción: {bc['avg_time_ms']:.2f} ms\n"
                f"  Agente Principal (rol): {bc['agent_role']}\n"
            )
            print(line)
            log_lines.append(line)

        print_and_log_best("MCTS_vs_Random", "MCTS (Agente X) vs Random (Agente O)")
        print_and_log_best("Random_vs_MCTS", "Random (Agente X) vs MCTS (Agente O)")
        print_and_log_best("MiniMax_vs_Random", "MiniMax (Agente X) vs Random (Agente O)")
        print_and_log_best("Random_vs_MiniMax", "Random (Agente X) vs MiniMax (Agente O)")
        print_and_log_best("MCTS_vs_MiniMax", "MCTS (Agente X) vs MiniMax (Agente O)")
        print_and_log_best("MiniMax_vs_MCTS", "MiniMax (Agente X) vs MCTS (Agente O)")
        print_and_log_best("MCTS_vs_MCTS", "MCTS (Agente X) vs MCTS (Agente O)")

        log_file.writelines(log_lines)

print("\n--- Guardado completo ---")



--- Guardando resultados detallados en CSV ---
Archivo CSV guardado en: grid_search_results_nocca_20250625_170550\detailed_results.csv

--- Guardando resumen en log ---


--- Análisis Final: Priorizando Victoria (WEIGHT_TIME = 1e-05) ---


**MCTS (Agente X) vs Random (Agente O):**
  Mejor Configuración: MCTS(S=70,R=2,D=3) vs Random
  Score Combinado: 0.096
  Victorias: 1 (10.00%)
  Empates: 9 (90.00%)
  Derrotas: 0 (0.00%)
  Tiempo Promedio/Acción: 398.70 ms
  Agente Principal (rol): Black


**Random (Agente X) vs MCTS (Agente O):**
  Mejor Configuración: Random vs MCTS(S=30,R=2,D=2)
  Score Combinado: 0.999
  Victorias: 10 (100.00%)
  Empates: 0 (0.00%)
  Derrotas: 0 (0.00%)
  Tiempo Promedio/Acción: 143.57 ms
  Agente Principal (rol): White


**MiniMax (Agente X) vs Random (Agente O):**
  Mejor Configuración: MiniMax(D=1) vs Random
  Score Combinado: 1.000
  Victorias: 10 (100.00%)
  Empates: 0 (0.00%)
  Derrotas: 0 (0.00%)
  Tiempo Promedio/Acción: 11.56 ms
  Agente Principal (rol)

In [12]:
main_results_dir

'grid_search_results_nocca_20250624_143103'

## Guardar Visualizaciones

In [9]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import os 
import re 

print("\n--- Generando Visualizaciones y Guardando Gráficos para TODAS las combinaciones ---")

# Cargar los datos del CSV (asumiendo que summary_csv_file está definido en Celda 1 y Celda 6 ya corrió)
try:
    df_all_results = pd.read_csv(summary_csv_file)
except FileNotFoundError:
    print(f"Error: No se encontró el archivo CSV de resultados en '{summary_csv_file}'. Asegúrate de ejecutar la Celda 6 primero.")
    df_all_results = pd.DataFrame() # DataFrame vacío para evitar errores

if df_all_results.empty:
    print("El DataFrame de resultados está vacío. No se generarán visualizaciones.")
else:
    # --- Helper function for saving plots ---
    def save_plot(fig, folder_name, plot_filename):
        plot_folder_path = os.path.join(main_results_dir, folder_name)
        os.makedirs(plot_folder_path, exist_ok=True)
        full_path = os.path.join(plot_folder_path, plot_filename)
        fig.savefig(full_path, bbox_inches='tight')
        print(f"Guardado: {full_path}")
        plt.close(fig) # Close the figure to free up memory

    # --- MAIN LOOP: Gráficos de barras comparativos para CADA CONFIGURACIÓN ÚNICA ---
    print("\n--- Generando gráficos de barras comparativos por cada configuración ---")
    unique_combinations = df_all_results['Configuration'].unique()

    for config_label in unique_combinations:
        df_current_config_data = df_all_results[df_all_results['Configuration'] == config_label].copy()
        
        if df_current_config_data.empty:
            continue

        # Extraer el nombre de la carpeta de subresultados (Ej. "MCTS_vs_Random")
        agent_types_folder_name_parts = [s.split('(')[0] for s in config_label.split(" vs ")]
        agent_types_folder = f"{agent_types_folder_name_parts[0]}_vs_{agent_types_folder_name_parts[1]}"
        
        # Limpiar el nombre del archivo para evitar caracteres problemáticos
        filename_base = config_label.replace('(', '_').replace(')', '').replace(',', '_').replace('=', '').replace(' ', '_').replace('vs', '_vs_').replace('__', '_')
        
        # --- Gráfico de barras para Recompensa Promedio ---
        fig_reward_bar = plt.figure(figsize=(8, 5))
        sns.barplot(x='Agent Role', y='Average Reward', data=df_current_config_data, palette='viridis')
        plt.title(f'Recompensa Promedio en: {config_label}')
        plt.xlabel('Rol del Agente')
        plt.ylabel('Recompensa Promedio')
        plt.ylim(-1, 1) # Normalizar el eje Y para consistencia
        save_plot(fig_reward_bar, agent_types_folder, f'{filename_base}_reward_barplot.png')

        # --- Gráfico de barras para Tiempo Promedio ---
        fig_time_bar = plt.figure(figsize=(8, 5))
        sns.barplot(x='Agent Role', y='Average Time (ms)', data=df_current_config_data, palette='coolwarm')
        plt.title(f'Tiempo Promedio por Acción en: {config_label}')
        plt.xlabel('Rol del Agente')
        plt.ylabel('Tiempo Promedio (ms)')
        save_plot(fig_time_bar, agent_types_folder, f'{filename_base}_time_barplot.png')

    # --- SECCIONES ADICIONALES: Heatmaps / Barplots AGRUPADOS por tipo de enfrentamiento ---
    print("\n--- Generando heatmaps/barplots agrupados (si hay suficientes datos) ---")

    # --- 1. MCTS (Agente X) vs Random (Agente O) - Heatmaps ---
    df_mcts_x_vs_random_o_grouped = df_all_results[
        (df_all_results['Configuration'].str.contains("MCTS") & df_all_results['Configuration'].str.contains("Random")) &
        (df_all_results['Agent Role'] == game.agents[0]) # MCTS es Agente X
    ].copy()

    if not df_mcts_x_vs_random_o_grouped.empty:
        df_mcts_x_vs_random_o_grouped[['Simulations', 'Rollouts', 'Depth']] = df_mcts_x_vs_random_o_grouped['Configuration'].apply(
            lambda x: pd.Series(extract_mcts_params(x))
        )
        df_mcts_x_vs_random_o_grouped.dropna(subset=['Simulations', 'Rollouts', 'Depth'], inplace=True)
        
        # Check if there's enough variation for a heatmap (more than one unique value per axis)
        if df_mcts_x_vs_random_o_grouped['Simulations'].nunique() > 1 and \
           (df_mcts_x_vs_random_o_grouped['Rollouts'].nunique() > 1 or df_mcts_x_vs_random_o_grouped['Depth'].nunique() > 1):
            
            df_mcts_x_vs_random_o_grouped['Rollouts_Depth'] = df_mcts_x_vs_random_o_grouped['Rollouts'].astype(int).astype(str) + '_D' + df_mcts_x_vs_random_o_grouped['Depth'].astype(int).astype(str)

            # Heatmap Recompensa
            fig_hm1_reward = plt.figure(figsize=(10, 6))
            pivot_reward = df_mcts_x_vs_random_o_grouped.pivot_table(index='Simulations', columns='Rollouts_Depth', values='Average Reward')
            sns.heatmap(pivot_reward, annot=True, fmt=".2f", cmap="viridis", linewidths=.5)
            plt.title('MCTS (Agente X) vs Random: Recompensa Promedio (Agrupado)')
            plt.xlabel('Rollouts y Profundidad MCTS')
            plt.ylabel('Simulaciones MCTS')
            save_plot(fig_hm1_reward, get_folder_name("MCTS", "Random"), 'mcts_x_vs_random_o_reward_heatmap.png')

            # Heatmap Tiempo
            fig_hm1_time = plt.figure(figsize=(10, 6))
            pivot_time = df_mcts_x_vs_random_o_grouped.pivot_table(index='Simulations', columns='Rollouts_Depth', values='Average Time (ms)')
            sns.heatmap(pivot_time, annot=True, fmt=".2f", cmap="coolwarm", linewidths=.5)
            plt.title('MCTS (Agente X) vs Random: Tiempo Promedio (ms) (Agrupado)')
            plt.xlabel('Rollouts y Profundidad MCTS')
            plt.ylabel('Simulaciones MCTS')
            save_plot(fig_hm1_time, get_folder_name("MCTS", "Random"), 'mcts_x_vs_random_o_time_heatmap.png')
        else:
            print("No hay suficientes datos variados para heatmaps de MCTS (Agente X) vs Random.")
    else:
        print("No hay datos para MCTS (Agente X) vs Random para generar heatmaps agrupados.")

    # --- 2. Random (Agente X) vs MCTS (Agente O) - Heatmaps ---
    df_random_x_vs_mcts_o_grouped = df_all_results[
        (df_all_results['Configuration'].str.contains("Random") & df_all_results['Configuration'].str.contains("MCTS")) &
        (df_all_results['Agent Role'] == game.agents[1]) # MCTS es Agente O
    ].copy()

    if not df_random_x_vs_mcts_o_grouped.empty:
        df_random_x_vs_mcts_o_grouped[['Simulations', 'Rollouts', 'Depth']] = df_random_x_vs_mcts_o_grouped['Configuration'].apply(
            lambda x: pd.Series(extract_mcts_params(x))
        )
        df_random_x_vs_mcts_o_grouped.dropna(subset=['Simulations', 'Rollouts', 'Depth'], inplace=True)

        if df_random_x_vs_mcts_o_grouped['Simulations'].nunique() > 1 and \
           (df_random_x_vs_mcts_o_grouped['Rollouts'].nunique() > 1 or df_random_x_vs_mcts_o_grouped['Depth'].nunique() > 1):
            
            df_random_x_vs_mcts_o_grouped['Rollouts_Depth'] = df_random_x_vs_mcts_o_grouped['Rollouts'].astype(int).astype(str) + '_D' + df_random_x_vs_mcts_o_grouped['Depth'].astype(int).astype(str)

            # Heatmap Recompensa (para MCTS como Agente O)
            fig_hm2_reward = plt.figure(figsize=(10, 6))
            pivot_reward = df_random_x_vs_mcts_o_grouped.pivot_table(index='Simulations', columns='Rollouts_Depth', values='Average Reward')
            sns.heatmap(pivot_reward, annot=True, fmt=".2f", cmap="viridis", linewidths=.5)
            plt.title('Random (Agente X) vs MCTS (Agente O): Recompensa Promedio (Agente O, Agrupado)')
            plt.xlabel('Rollouts y Profundidad MCTS')
            plt.ylabel('Simulaciones MCTS')
            save_plot(fig_hm2_reward, get_folder_name("Random", "MCTS"), 'random_x_vs_mcts_o_reward_heatmap.png')

            # Heatmap Tiempo (para MCTS como Agente O)
            fig_hm2_time = plt.figure(figsize=(10, 6))
            pivot_time = df_random_x_vs_mcts_o_grouped.pivot_table(index='Simulations', columns='Rollouts_Depth', values='Average Time (ms)')
            sns.heatmap(pivot_time, annot=True, fmt=".2f", cmap="coolwarm", linewidths=.5)
            plt.title('Random (Agente X) vs MCTS (Agente O): Tiempo Promedio (ms) (Agente O, Agrupado)')
            plt.xlabel('Rollouts y Profundidad MCTS')
            plt.ylabel('Simulaciones MCTS')
            save_plot(fig_hm2_time, get_folder_name("Random", "MCTS"), 'random_x_vs_mcts_o_time_heatmap.png')
        else:
            print("No hay suficientes datos variados para heatmaps de Random (Agente X) vs MCTS (Agente O).")
    else:
        print("No hay datos para Random (Agente X) vs MCTS (Agente O) para generar heatmaps agrupados.")

    # --- 3. MiniMax (Agente X) vs Random (Agente O) - Barplots Agrupados ---
    df_minimax_x_vs_random_o_grouped = df_all_results[
        (df_all_results['Configuration'].str.contains("MiniMax") & df_all_results['Configuration'].str.contains("Random")) &
        (df_all_results['Agent Role'] == game.agents[0]) # MiniMax es Agente X
    ].copy()

    if not df_minimax_x_vs_random_o_grouped.empty:
        df_minimax_x_vs_random_o_grouped['Depth'] = df_minimax_x_vs_random_o_grouped['Configuration'].apply(extract_minimax_params)
        df_minimax_x_vs_random_o_grouped.dropna(subset=['Depth'], inplace=True)
        
        # Check if there's enough variation for a barplot (more than one unique depth)
        if df_minimax_x_vs_random_o_grouped['Depth'].nunique() > 1:
            # Barplot Recompensa
            fig_bp1_reward = plt.figure(figsize=(8, 5))
            sns.barplot(x='Depth', y='Average Reward', data=df_minimax_x_vs_random_o_grouped, palette='viridis')
            plt.title('MiniMax (Agente X) vs Random: Recompensa Promedio por Profundidad (Agrupado)')
            plt.xlabel('Profundidad MiniMax')
            plt.ylabel('Recompensa Promedio')
            save_plot(fig_bp1_reward, get_folder_name("MiniMax", "Random"), 'minimax_x_vs_random_o_reward_barplot_grouped.png')

            # Barplot Tiempo
            fig_bp1_time = plt.figure(figsize=(8, 5))
            sns.barplot(x='Depth', y='Average Time (ms)', data=df_minimax_x_vs_random_o_grouped, palette='coolwarm')
            plt.title('MiniMax (Agente X) vs Random: Tiempo Promedio (ms) por Profundidad (Agrupado)')
            plt.xlabel('Profundidad MiniMax')
            plt.ylabel('Tiempo Promedio (ms)')
            save_plot(fig_bp1_time, get_folder_name("MiniMax", "Random"), 'minimax_x_vs_random_o_time_barplot_grouped.png')
        else:
            print("No hay suficientes datos variados para barplots agrupados de MiniMax (Agente X) vs Random.")
    else:
        print("No hay datos para MiniMax (Agente X) vs Random para generar barplots agrupados.")

    # --- 4. Random (Agente X) vs MiniMax (Agente O) - Barplots Agrupados ---
    df_random_x_vs_minimax_o_grouped = df_all_results[
        (df_all_results['Configuration'].str.contains("Random") & df_all_results['Configuration'].str.contains("MiniMax")) &
        (df_all_results['Agent Role'] == game.agents[1]) # MiniMax es Agente O
    ].copy()

    if not df_random_x_vs_minimax_o_grouped.empty:
        df_random_x_vs_minimax_o_grouped['Depth'] = df_random_x_vs_minimax_o_grouped['Configuration'].apply(extract_minimax_params)
        df_random_x_vs_minimax_o_grouped.dropna(subset=['Depth'], inplace=True)
        
        if df_random_x_vs_minimax_o_grouped['Depth'].nunique() > 1:
            # Barplot Recompensa (para MiniMax como Agente O)
            fig_bp2_reward = plt.figure(figsize=(8, 5))
            sns.barplot(x='Depth', y='Average Reward', data=df_random_x_vs_minimax_o_grouped, palette='viridis')
            plt.title('Random (Agente X) vs MiniMax (Agente O): Recompensa Promedio (Agente O, Agrupado)')
            plt.xlabel('Profundidad MiniMax')
            plt.ylabel('Recompensa Promedio')
            save_plot(fig_bp2_reward, get_folder_name("Random", "MiniMax"), 'random_x_vs_minimax_o_reward_barplot_grouped.png')

            # Barplot Tiempo (para MiniMax como Agente O)
            fig_bp2_time = plt.figure(figsize=(8, 5))
            sns.barplot(x='Depth', y='Average Time (ms)', data=df_random_x_vs_minimax_o_grouped, palette='coolwarm')
            plt.title('Random (Agente X) vs MiniMax (Agente O): Tiempo Promedio (ms) (Agente O, Agrupado)')
            plt.xlabel('Profundidad MiniMax')
            plt.ylabel('Tiempo Promedio (ms)')
            save_plot(fig_bp2_time, get_folder_name("Random", "MiniMax"), 'random_x_vs_minimax_o_time_barplot_grouped.png')
        else:
            print("No hay suficientes datos variados para barplots agrupados de Random (Agente X) vs MiniMax (Agente O).")
    else:
        print("No hay datos para Random (Agente X) vs MiniMax (Agente O) para generar barplots agrupados.")

print("--- Generación de Visualizaciones Completada ---")


--- Generando Visualizaciones y Guardando Gráficos para TODAS las combinaciones ---

--- Generando gráficos de barras comparativos por cada configuración ---
Guardado: grid_search_results_20250617_095847\MCTS_vs_Random\MCTS_S10_R1_D1_vs_Random_reward_barplot.png
Guardado: grid_search_results_20250617_095847\MCTS_vs_Random\MCTS_S10_R1_D1_vs_Random_time_barplot.png
Guardado: grid_search_results_20250617_095847\MCTS_vs_Random\MCTS_S10_R1_D2_vs_Random_reward_barplot.png
Guardado: grid_search_results_20250617_095847\MCTS_vs_Random\MCTS_S10_R1_D2_vs_Random_time_barplot.png
Guardado: grid_search_results_20250617_095847\MCTS_vs_Random\MCTS_S10_R1_D3_vs_Random_reward_barplot.png
Guardado: grid_search_results_20250617_095847\MCTS_vs_Random\MCTS_S10_R1_D3_vs_Random_time_barplot.png
Guardado: grid_search_results_20250617_095847\MCTS_vs_Random\MCTS_S10_R2_D1_vs_Random_reward_barplot.png
Guardado: grid_search_results_20250617_095847\MCTS_vs_Random\MCTS_S10_R2_D1_vs_Random_time_barplot.png
Guardado: