## Nueva filosofía

In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, udf, input_file_name
from pyspark.sql.types import StringType
import re

In [2]:
spark = (
    SparkSession.builder
    .appName("Bot_Nakamura")
    .master("local[*]")  # "yarn"
    .getOrCreate()
)


Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/05/10 20:17:46 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [3]:
# Por si se quiere eliminar alguna carpeta
#!hdfs dfs -rm -r /user/ajedrez/jugador
# En principio, solo con la carpeta raw y las partidas dentro nos vale
!hdfs dfs -ls /user/ajedrez/

Found 1 items
drwxr-xr-x   - root supergroup          0 2025-05-09 11:46 /user/ajedrez/raw


In [4]:
# Leer todos los archivos PGN de HDFS como (ruta, contenido)
rdd = spark.sparkContext.wholeTextFiles("hdfs:///user/ajedrez/raw/*.pgn")

# Función para dividir cada archivo en partidas individuales
def dividir_partidas_por_archivo(nombre_y_contenido):
    ruta, contenido = nombre_y_contenido
    partidas = re.split(r'\n(?=\[Event )', contenido)
    return [(ruta, pgn) for pgn in partidas if "[Event" in pgn]

# Dividir partidas
rdd_partidas = rdd.flatMap(dividir_partidas_por_archivo)

# Convertir a DataFrame
df = rdd_partidas.toDF(["archivo", "pgn"])

# UDF para extraer tags PGN
def extraer_tag(tag, texto):
    match = re.search(rf'\[{tag} "([^"]+)"\]', texto)
    return match.group(1) if match else None

# Registrar UDFs
extraer_white = udf(lambda x: extraer_tag("White", x), StringType())
extraer_black = udf(lambda x: extraer_tag("Black", x), StringType())
extraer_date = udf(lambda x: extraer_tag("Date", x), StringType())

# Añadir columnas de metadatos. El Dataframe df tiene todas las partidas
df = df.withColumn("white", extraer_white(col("pgn"))) \
       .withColumn("black", extraer_black(col("pgn"))) \
       .withColumn("date", extraer_date(col("pgn")))

# Filtrar solo partidas donde juega Nakamura
df_nakamura = df.filter((col("white") == "Nakamura,Hi") | (col("black") == "Nakamura,Hi"))

                                                                                

In [5]:
# Dataframe de Nakamura y Dataframe de todas las partidas
df_nakamura.select("white", "black", "date").show(truncate=False)
df.select("white", "black", "date").show(truncate=False)

                                                                                

+-------------------------------+----------------+----------+
|white                          |black           |date      |
+-------------------------------+----------------+----------+
|Caruana,F                      |Nakamura,Hi     |2024.04.04|
|Nakamura,Hi                    |Vidit,S         |2024.04.05|
|Abasov,N                       |Nakamura,Hi     |2024.04.06|
|Nakamura,Hi                    |Praggnanandhaa,R|2024.04.07|
|Perossa,Nicolas                |Nakamura,Hi     |2024.04.02|
|Nakamura,Hi                    |Nevednichy,V    |2024.04.02|
|Wagner,De                      |Nakamura,Hi     |2024.04.02|
|Nakamura,Hi                    |Eljanov,P       |2024.04.02|
|Bjerre,Jonas Buhl              |Nakamura,Hi     |2024.04.02|
|Nakamura,Hi                    |Kuzubov,Y       |2024.04.02|
|Nakamura,Hi                    |Kovalev,Vl      |2024.04.02|
|Zemlyanskii,Ivan               |Nakamura,Hi     |2024.04.02|
|Nakamura,Hi                    |Duda,J          |2024.04.02|
|Kamsky,

25/05/10 20:18:02 WARN BlockReaderFactory: I/O error constructing remote block reader.
java.nio.channels.ClosedByInterruptException
	at java.nio.channels.spi.AbstractInterruptibleChannel.end(AbstractInterruptibleChannel.java:202)
	at sun.nio.ch.SocketChannelImpl.connect(SocketChannelImpl.java:658)
	at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:191)
	at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:586)
	at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3033)
	at org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:829)
	at org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:754)
	at org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.build(BlockReaderFactory.java:381)
	at org.apache.hadoop.hdfs.DFSInputStream.getBlockReader(DFSInputStream.java:755)
	at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:685)
	at org.apac

In [6]:
filas = df_nakamura.count()
columnas = len(df_nakamura.columns)
print(f"Shape: ({filas}, {columnas})")
print(df_nakamura.columns)

filas = df.count()
columnas = len(df.columns)
print(f"Shape: ({filas}, {columnas})")
print(df.columns)

# Nombre del archivo donde está la partida (twicXXXX.pgn) , partida en pgn, jugador que jugó con blancas / negras y fecha

                                                                                

Shape: (70, 5)
['archivo', 'pgn', 'white', 'black', 'date']
Shape: (30216, 5)
['archivo', 'pgn', 'white', 'black', 'date']


In [7]:
# Primera partida del dataframe global
print(df.select("pgn").first()["pgn"])

[Event "FIDE Candidates 2024"]
[Site "Toronto CAN"]
[Date "2024.04.04"]
[Round "1.1"]
[White "Caruana,F"]
[Black "Nakamura,Hi"]
[Result "1/2-1/2"]
[WhiteTitle "GM"]
[BlackTitle "GM"]
[WhiteElo "2803"]
[BlackElo "2789"]
[ECO "B56"]
[Opening "Sicilian"]
[Variation "Venice attack"]
[WhiteFideId "2020009"]
[BlackFideId "2016192"]
[EventDate "2024.04.04"]

1. e4 c5 2. Nf3 d6 3. d4 cxd4 4. Nxd4 Nf6 5. Nc3 e5 6. Bb5+ Nbd7 7. Nf5 a6 8.
Ba4 b5 9. Bb3 Nc5 10. Bg5 Bxf5 11. exf5 Be7 12. Bxf6 Bxf6 13. O-O e4 14. Nxe4
Nxe4 15. Re1 O-O 16. Rxe4 Bxb2 17. Rb1 Bf6 18. Qd5 Rc8 19. Qb7 Rc5 20. Qxa6 Rxf5
21. Rd1 d5 22. Rb4 Bc3 23. Rxb5 Rxf2 24. Rbxd5 Qh4 25. Qd3 Rf6 26. g3 Qb4 27.
Kg2 Bb2 28. Rf5 g6 29. Rxf6 Bxf6 30. Qf3 Qe7 31. a4 Kg7 32. a5 Ra8 33. Rd5 Ra7
34. Rb5 Qd8 35. Rd5 Qc7 36. h4 Rxa5 37. Rxa5 Qxa5 38. Qb7 Qd8 39. Qxf7+ Kh6 40.
Kh3 Qe7 41. Qc4 Qe3 1/2-1/2



### Notación FEN previa + movimiento + color

In [8]:
from pyspark.sql.functions import udf, explode, col
from pyspark.sql.types import ArrayType, StructType, StructField, StringType
import chess.pgn
import io

# Función para extraer las jugadas que hizo Nakamura (sin importar color)
def pgn_a_jugadas_nakamura(pgn_str):
    resultado = []
    try:
        game = chess.pgn.read_game(io.StringIO(pgn_str))
        if not game:
            return []

        board = game.board()
        white = game.headers.get("White", "")
        black = game.headers.get("Black", "")

        if white == "Nakamura,Hi":
            color_jugador = chess.WHITE
            color = "white"
        elif black == "Nakamura,Hi":
            color_jugador = chess.BLACK
            color = "black"
        else:
            return []

        for move in game.mainline_moves():
            if board.turn == color_jugador:
                resultado.append((board.fen(), move.uci(), color))
            board.push(move)
    except:
        pass
    return resultado

# Esquema de salida
esquema_jugada = ArrayType(StructType([
    StructField("FEN", StringType(), True),
    StructField("Move", StringType(), True),
    StructField("Color", StringType(), True)
]))

# UDF para extraer jugadas del jugador
pgn_udf_nakamura = udf(pgn_a_jugadas_nakamura, esquema_jugada)

# Aplicar y explotar resultados
df_jugadas = df_nakamura.withColumn("jugadas", explode(pgn_udf_nakamura(col("pgn")))) \
                        .select(col("jugadas.FEN"), col("jugadas.Move"), col("jugadas.Color"))

# Cachear y mostrar
df_jugadas.cache()
df_jugadas.show(truncate=False)




+-------------------------------------------------------------------+----+-----+
|FEN                                                                |Move|Color|
+-------------------------------------------------------------------+----+-----+
|rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq - 0 1         |c7c5|black|
|rnbqkbnr/pp1ppppp/8/2p5/4P3/5N2/PPPP1PPP/RNBQKB1R b KQkq - 1 2     |d7d6|black|
|rnbqkbnr/pp2pppp/3p4/2p5/3PP3/5N2/PPP2PPP/RNBQKB1R b KQkq - 0 3    |c5d4|black|
|rnbqkbnr/pp2pppp/3p4/8/3NP3/8/PPP2PPP/RNBQKB1R b KQkq - 0 4        |g8f6|black|
|rnbqkb1r/pp2pppp/3p1n2/8/3NP3/2N5/PPP2PPP/R1BQKB1R b KQkq - 2 5    |e7e5|black|
|rnbqkb1r/pp3ppp/3p1n2/1B2p3/3NP3/2N5/PPP2PPP/R1BQK2R b KQkq - 1 6  |b8d7|black|
|r1bqkb1r/pp1n1ppp/3p1n2/1B2pN2/4P3/2N5/PPP2PPP/R1BQK2R b KQkq - 3 7|a7a6|black|
|r1bqkb1r/1p1n1ppp/p2p1n2/4pN2/B3P3/2N5/PPP2PPP/R1BQK2R b KQkq - 1 8|b7b5|black|
|r1bqkb1r/3n1ppp/p2p1n2/1p2pN2/4P3/1BN5/PPP2PPP/R1BQK2R b KQkq - 1 9|d7c5|black|
|r1bqkb1r/5ppp/p2p1n2/1pn1pN

                                                                                

In [9]:
# Están las jugadas realizadas en ambos colores
df_jugadas.groupBy("Color").count().show()

+-----+-----+
|Color|count|
+-----+-----+
|white| 1420|
|black| 1779|
+-----+-----+



⚠️ Consideraciones clave para adaptar tu flujo a Spark MLlib:
MLlib no soporta tensores 3D (8x8x12) directamente.

MLlib trabaja con Vector (denso o disperso), por lo tanto debes aplanar tu tensor a un vector de tamaño 8×8×12 = 768.

No hay soporte nativo para LabelEncoder de sklearn, pero puedes hacer lo mismo con StringIndexer de Spark.

UDFs con NumPy son posibles, pero deben devolver estructuras planas (List[Float]), no arrays 3D.

In [11]:
from pyspark.sql.functions import udf, col
from pyspark.sql.types import ArrayType, FloatType
import numpy as np
from pyspark.ml.feature import StringIndexer

# UDF para convertir FEN a vector
piece_to_plane = {
    'P': 0, 'N': 1, 'B': 2, 'R': 3, 'Q': 4, 'K': 5,
    'p': 6, 'n': 7, 'b': 8, 'r': 9, 'q': 10, 'k': 11
}

def fen_to_vector(fen):
    tensor = np.zeros((8, 8, 12), dtype=np.float32)
    fen_board = fen.split(' ')[0]
    rows = fen_board.split('/')
    for i, row in enumerate(rows):
        col_idx = 0
        for char in row:
            if char.isdigit():
                col_idx += int(char)
            elif char in piece_to_plane:
                plane = piece_to_plane[char]
                tensor[i, col_idx, plane] = 1
                col_idx += 1
    return tensor.flatten().tolist()

# Registrar UDF
fen_udf = udf(fen_to_vector, ArrayType(FloatType()))

# Aplicar la UDF
df_feat = df_jugadas.withColumn("features", fen_udf(col("FEN")))

# Separar por color
df_white = df_feat.filter(col("Color") == "white")
df_black = df_feat.filter(col("Color") == "black")

# Ajustar los indexadores
indexer_white = StringIndexer(inputCol="Move", outputCol="label")
model_white = indexer_white.fit(df_white)
df_white_indexed = model_white.transform(df_white).select("features", "label")

indexer_black = StringIndexer(inputCol="Move", outputCol="label")
model_black = indexer_black.fit(df_black)
df_black_indexed = model_black.transform(df_black).select("features", "label")

# Guardar las clases (etiquetas)
np.save("encoder_blancas.npy", model_white.labels)
np.save("encoder_negras.npy", model_black.labels)

In [12]:
df_white_indexed.printSchema()
df_white_indexed.show(1)
df_black_indexed.printSchema()
df_black_indexed.show(1)

root
 |-- features: array (nullable = true)
 |    |-- element: float (containsNull = true)
 |-- label: double (nullable = false)

+--------------------+-----+
|            features|label|
+--------------------+-----+
|[0.0, 0.0, 0.0, 0...| 19.0|
+--------------------+-----+
only showing top 1 row

root
 |-- features: array (nullable = true)
 |    |-- element: float (containsNull = true)
 |-- label: double (nullable = false)

+--------------------+-----+
|            features|label|
+--------------------+-----+
|[0.0, 0.0, 0.0, 0...|  7.0|
+--------------------+-----+
only showing top 1 row



Lo que puedes hacer en entorno distribuido con Spark MLlib:
MLlib no soporta CNNs ni tensores 4D. Su diseño está enfocado a:

Modelos clásicos: árboles, regresión, SVM, redes densas básicas.

Operación sobre vectores 1D (features) y etiquetas (label).

Entrenamiento distribuido y escalable, pero no deep learning como en Keras.

2. Alternativas si necesitas CNN en un entorno distribuido
Aunque MLlib no soporta CNN, puedes usar frameworks especializados en deep learning que sí funcionan en entornos distribuidos:

a) TensorFlow o PyTorch con Horovod o Spark
Puedes integrar TensorFlow o PyTorch con Horovod, que permite entrenamiento distribuido sobre múltiples nodos.

También puedes usar TensorFlowOnSpark o BigDL (una biblioteca de deep learning optimizada para Spark) para ejecutar redes neuronales sobre clústeres.

b) BigDL
BigDL es una alternativa poderosa si ya estás usando Spark y quieres hacer deep learning directamente sobre ese ecosistema.

Soporta CNN, RNN y otros modelos complejos, y permite entrenamiento distribuido.

Conclusión
MLlib no es adecuado para CNN, pero puedes usar herramientas externas como TensorFlow + Horovod, BigDL o TensorFlowOnSpark si necesitas entrenamiento distribuido de redes neuronales profundas.

⚠️ Limitación clave: Spark ≠ TensorFlow
Spark y TensorFlow usan entornos de ejecución diferentes:

Spark distribuye y mantiene df_final en su propio contexto de ejecución (con sus RDDs y DAGs).

TensorFlow o Keras necesita los datos en forma de NumPy o Tensor en memoria local.

In [13]:
# Guardar DataFrame como Parquet en volumen compartido
# df_final.write.mode("overwrite").parquet("/shared/fen_jugadas.parquet")   NO FUNCIONA
import os

# Crear carpetas si no existen
os.makedirs("/notebooks/datos_cnn/blancas", exist_ok=True)
os.makedirs("/notebooks/datos_cnn/negras", exist_ok=True)

# Guardar en parquet
df_white_pandas = df_white_indexed.toPandas()
df_black_pandas = df_black_indexed.toPandas()

df_white_pandas.to_parquet("/notebooks/datos_cnn/blancas/fen_jugadas.parquet", index=False)
df_black_pandas.to_parquet("/notebooks/datos_cnn/negras/fen_jugadas.parquet", index=False)


In [14]:
!ls -l /notebooks/datos_cnn/blancas
!ls -l /notebooks/datos_cnn/negras

total 40
-rw-r--r-- 1 root root 40189 May 10 20:22 fen_jugadas.parquet
total 48
-rw-r--r-- 1 root root 46712 May 10 20:22 fen_jugadas.parquet


In [12]:
# Creamos una función de entrenamiento para reutilizarla tanto en blancas como en negras
import warnings
warnings.filterwarnings("ignore", category=UserWarning, message="To exit: use 'exit', 'quit', or Ctrl-D.")

def entrenar_cnn_distribuida(data_path, model_path):
    from tensorflowonspark import TFCluster, TFNode
    import tensorflow as tf
    import pandas as pd
    import numpy as np
    from sklearn.model_selection import train_test_split

    def model_fn(num_classes):
        model = tf.keras.Sequential([
            tf.keras.Input(shape=(8, 8, 12)),
            tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
            tf.keras.layers.MaxPooling2D((2, 2)),
            tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
            tf.keras.layers.MaxPooling2D((2, 2)),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(256, activation='relu'),
            tf.keras.layers.Dropout(0.3),
            tf.keras.layers.Dense(num_classes, activation='softmax')
        ])
        model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        return model

    def main_fun(args, ctx):
        df = pd.read_parquet(args["data_path"])
        X = np.array(df["features"].tolist()).reshape(-1, 8, 8, 12)
        y = df["label"].astype(np.int32).values

        num_classes = len(np.unique(y))  # ✅ ahora correctamente antes de usar

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

        model = model_fn(num_classes)
        model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test), batch_size=32)

        if ctx.job_name == "chief":
            model.save(args["model_path"])

    # Crear SparkCluster y lanzar
    from pyspark.sql import SparkSession
    spark = SparkSession.builder.appName("TFoS-CNN").getOrCreate()
    sc = spark.sparkContext

    args = {"data_path": data_path, "model_path": model_path}
    cluster = TFCluster.run(sc, main_fun, args, num_executors=2, num_ps=1,
                            input_mode=TFCluster.InputMode.TENSORFLOW,
                            master_node="chief", log_dir="/tmp/tf_logs")
    # cluster.shutdown()  # solo si usas fuera de notebook




In [16]:
# Modelo para blancas
entrenar_cnn_distribuida(
    data_path="/notebooks/datos_cnn/blancas/fen_jugadas.parquet",
    model_path="/notebooks/datos_cnn/modelo_blancas"
)

2025-05-10 20:22:10.230176: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-10 20:22:10.275533: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-10 20:22:10.276119: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
25/05/10 20:22:12 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.
2025-05-10 20:22:12.598739: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-10 20:22:12.598739: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-10 20:22:12.6492

In [None]:
## IMPORTANTE: REINICIAR EL KERNEL
## Ejecutar la celda de la función "entrenar_cnn_distribuida" y luego la siguiente, el modelo para negras

In [2]:
# Modelo para negras
entrenar_cnn_distribuida(
    data_path="/notebooks/datos_cnn/negras/fen_jugadas.parquet",
    model_path="/notebooks/datos_cnn/modelo_negras"
)


2025-05-10 20:22:54.604818: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-10 20:22:54.660421: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-10 20:22:54.661592: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/05/10 20:22:59 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2025-05-10 20:23:00,707 INFO (MainThread-3313) Reserving TFSparkNodes 
2025-05-10 20:23:00,708 INFO (MainThread-3313) cluster_template: {'ps': 

In [5]:
!ls -l /notebooks/datos_cnn/modelo_blancas/
!ls -l /notebooks/datos_cnn/modelo_negras/



IOStream.flush timed out
total 172
drwxr-xr-x 2 root root   4096 May 10 19:55 assets
-rw-r--r-- 1 root root     55 May 10 20:22 fingerprint.pb
-rw-r--r-- 1 root root  16623 May 10 20:22 keras_metadata.pb
-rw-r--r-- 1 root root 140745 May 10 20:22 saved_model.pb
drwxr-xr-x 2 root root   4096 May 10 20:22 variables
IOStream.flush timed out
total 172
drwxr-xr-x 2 root root   4096 May 10 19:57 assets
-rw-r--r-- 1 root root     57 May 10 20:23 fingerprint.pb
-rw-r--r-- 1 root root  16623 May 10 20:23 keras_metadata.pb
-rw-r--r-- 1 root root 140745 May 10 20:23 saved_model.pb
drwxr-xr-x 2 root root   4096 May 10 20:23 variables


## Evaluación del entrenamiento
Hay que hacerlo

## Juego contra el Bot estilo Nakamura

### 6.1 Nakamura juega con blancas

In [6]:
# !pip install pygame

IOStream.flush timed out


### Instalar el wrapper de Python para Stockfish pero también se necesita el binario ejecutable en el sistema

In [None]:
#!pip install stockfish

In [None]:
# INSTALACION DEL EJECUTABLE
#cd /notebooks
#rm -rf Stockfish
#git clone --branch sf_15 https://github.com/official-stockfish/Stockfish.git
#cd Stockfish/src
#make build ARCH=x86-64
#mkdir -p /notebooks/motor_ejecutable
#cp stockfish /notebooks/motor_ejecutable/stockfish
#chmod +x /notebooks/motor_ejecutable/stockfish




In [15]:
import pygame
import chess
import numpy as np
from tensorflow.keras.models import load_model
from stockfish import Stockfish
import os

# --- CONFIGURACIÓN ---
MODEL_PATH = "/notebooks/datos_cnn/modelo_blancas"
STOCKFISH_PATH = "motor_ejecutable/stockfish"
ENCODER_PATH = "encoder_blancas.npy"  # Asegúrate de haberlo guardado tras el entrenamiento

# Inicializar pygame
pygame.init()
WIDTH, HEIGHT = 512, 512
SQ_SIZE = WIDTH // 8
screen = pygame.display.set_mode((WIDTH, HEIGHT))
pygame.display.set_caption("Juega contra Nakamura")

# Cargar imágenes con rutas absolutas
BASE_DIR = os.getcwd()
PIECE_DIR = os.path.join(BASE_DIR, "pieces")

PIECE_IMAGES = {}
PIECE_MAPPING = {
    "r": "R1", "n": "N1", "b": "B1", "q": "Q1", "k": "K1", "p": "P1",
    "R": "R", "N": "N", "B": "B", "Q": "Q", "K": "K", "P": "P"
}
for symbol, name in PIECE_MAPPING.items():
    PIECE_IMAGES[symbol] = pygame.transform.scale(
        pygame.image.load(f"pieces/{name}.png"), (SQ_SIZE, SQ_SIZE))

# Cargar modelo y encoder
model = load_model(MODEL_PATH)
encoder_classes = np.load(ENCODER_PATH, allow_pickle=True)

# Iniciar motor
stockfish = Stockfish(path=STOCKFISH_PATH, parameters={"Threads": 2, "Minimum Thinking Time": 100})
stockfish.set_elo_rating(2800)
stockfish.set_skill_level(20)

def fen_to_tensor(fen):
    piece_to_plane = {
        'P': 0, 'N': 1, 'B': 2, 'R': 3, 'Q': 4, 'K': 5,
        'p': 6, 'n': 7, 'b': 8, 'r': 9, 'q': 10, 'k': 11
    }
    tensor = np.zeros((8, 8, 12), dtype=np.float32)
    fen_board = fen.split(' ')[0]
    rows = fen_board.split('/')
    for i, row in enumerate(rows):
        col = 0
        for char in row:
            if char.isdigit():
                col += int(char)
            elif char in piece_to_plane:
                tensor[i, col, piece_to_plane[char]] = 1
                col += 1
    return tensor

def flip_square_name(uci_move):
    col_map = {'a': 'h', 'b': 'g', 'c': 'f', 'd': 'e', 'e': 'd', 'f': 'c', 'g': 'b', 'h': 'a'}
    def flip(sq):
        col, row = sq[0], sq[1]
        return col_map[col] + str(9 - int(row))
    return flip(uci_move[:2]) + flip(uci_move[2:4])

def predict_move(fen, board, top_n=10, umbral_cp=20):
    stockfish.set_fen_position(fen)
    eval_info = stockfish.get_evaluation()
    
    if eval_info["type"] == "mate" and eval_info["value"] is not None and eval_info["value"] <= 3:
        best_uci = stockfish.get_best_move()
        print("¡Mate detectado! Stockfish lo ejecuta:", best_uci)
        return chess.Move.from_uci(best_uci)

    top_moves_info = stockfish.get_top_moves(top_n)
    if not top_moves_info:
        print("No hay jugadas válidas.")
        return np.random.choice(list(board.legal_moves))

    best_eval = top_moves_info[0].get("Centipawn")
    if best_eval is None:
        print("Evaluación no disponible. Stockfish mueve.")
        return chess.Move.from_uci(top_moves_info[0]["Move"])

    candidatas = [m for m in top_moves_info if abs(best_eval - m.get("Centipawn", 0)) <= umbral_cp]

    if len(candidatas) == 1:
        best_move = chess.Move.from_uci(candidatas[0]["Move"])
        print("Jugada clara. Stockfish la ejecuta:", best_move.uci())
        return best_move

    tensor = fen_to_tensor(fen).reshape(1, 8, 8, 12)
    prediction = model.predict(tensor, verbose=0)[0]

    scored = []
    for move_info in candidatas:
        move_uci = move_info["Move"]
        try:
            idx = np.where(encoder_classes == move_uci)[0][0]
            ia_score = prediction[idx]
        except IndexError:
            ia_score = 0.0
        scored.append((move_uci, ia_score))

    best_uci, best_score = max(scored, key=lambda x: x[1])
    print(f"\nJugada estilo Nakamura: {flip_square_name(best_uci)} (IA score: {best_score:.4f})")
    return chess.Move.from_uci(best_uci)

def draw_board(board):
    colors = [pygame.Color("white"), pygame.Color("gray")]
    for row in range(8):
        for col in range(8):
            color = colors[(row + col) % 2]
            pygame.draw.rect(screen, color, pygame.Rect(col * SQ_SIZE, row * SQ_SIZE, SQ_SIZE, SQ_SIZE))

    for row in range(8):
        for col in range(8):
            visual_row = row
            visual_col = col
            square = chess.square(7 - visual_col, visual_row)
            piece = board.piece_at(square)
            if piece:
                img = PIECE_IMAGES.get(piece.symbol())
                if img:
                    screen.blit(img, (col * SQ_SIZE, row * SQ_SIZE))

# Bucle principal
board = chess.Board()
running = True
selected_square = None
game_over = False

draw_board(board)
pygame.display.flip()

# Movimiento de apertura del bot
ai_move = predict_move(board.fen(), board)
board.push(ai_move)

while running:
    draw_board(board)
    pygame.display.flip()

    if not game_over and board.is_game_over():
        game_over = True
        result = board.result()
        if board.is_checkmate():
            winner = "Blancas" if board.turn == chess.BLACK else "Negras"
            print(f"Jaque mate. Ganan {winner}")
        else:
            print("Tablas")

    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False

        elif event.type == pygame.MOUSEBUTTONDOWN and not game_over:
            x, y = pygame.mouse.get_pos()
            col = 7 - (x // SQ_SIZE)
            row = y // SQ_SIZE
            square = chess.square(col, row)

            if selected_square is None:
                piece = board.piece_at(square)
                if piece and piece.color == chess.BLACK:
                    selected_square = square
            else:
                move = chess.Move(selected_square, square)
                if move in board.legal_moves:
                    board.push(move)
                    if not board.is_game_over():
                        ai_move = predict_move(board.fen(), board)
                        board.push(ai_move)
                selected_square = None

pygame.quit()



Jugada estilo Nakamura: d7d5 (IA score: 0.1016)




KeyboardInterrupt: 



In [13]:
import os
print(os.getcwd())                      # muestra el directorio actual
print(os.listdir("pieces"))             # lista archivos en la carpeta pieces


/notebooks
['K.png', 'Q1.png', 'K1.png', 'N1.png', 'R.png', 'P.png', 'Q.png', 'N.png', 'R1.png', 'B1.png', 'P1.png', 'B.png']




Este tipo de interfaz con pygame debe ejecutarse en un entorno local con GUI, como:

Un Jupyter Notebook instalado directamente en tu PC (no dentro de contenedor Docker sin GUI)

Un script .py ejecutado desde tu sistema operativo con Python local

In [16]:
with open("nakamura_bot_blancas.py", "w") as f:
    f.write("""import pygame
import chess
import numpy as np
from tensorflow.keras.models import load_model
from stockfish import Stockfish
import os

# Configuración Pygame
pygame.init()
WIDTH, HEIGHT = 512, 512
SQ_SIZE = WIDTH // 8
screen = pygame.display.set_mode((WIDTH, HEIGHT))
pygame.display.set_caption("Juega contra Nakamura")

# Cargar imágenes de piezas
PIECE_IMAGES = {}
PIECE_MAPPING = {
    "r": "R1", "n": "N1", "b": "B1", "q": "Q1", "k": "K1", "p": "P1",
    "R": "R", "N": "N", "B": "B", "Q": "Q", "K": "K", "P": "P"
}
for symbol, name in PIECE_MAPPING.items():
    PIECE_IMAGES[symbol] = pygame.transform.scale(
        pygame.image.load(f"pieces/{name}.png"), (SQ_SIZE, SQ_SIZE))

# Cargar modelo y encoder
MODEL_PATH = "datos_cnn/modelo_blancas"
ENCODER_PATH = "encoder_blancas.npy"
STOCKFISH_PATH = "motor_ejecutable/stockfish"

model = load_model(MODEL_PATH, compile=False)
encoder_classes = np.load(ENCODER_PATH, allow_pickle=True)

# Iniciar Stockfish
stockfish = Stockfish(path=STOCKFISH_PATH, parameters={
    "Threads": 2,
    "Minimum Thinking Time": 100
})
stockfish.set_elo_rating(2800)
stockfish.set_skill_level(20)

# FEN a tensor
def fen_to_tensor(fen):
    piece_to_plane = {
        'P': 0, 'N': 1, 'B': 2, 'R': 3, 'Q': 4, 'K': 5,
        'p': 6, 'n': 7, 'b': 8, 'r': 9, 'q': 10, 'k': 11
    }
    tensor = np.zeros((8, 8, 12), dtype=np.float32)
    fen_board = fen.split(' ')[0]
    rows = fen_board.split('/')
    for i, row in enumerate(rows):
        col = 0
        for char in row:
            if char.isdigit():
                col += int(char)
            elif char in piece_to_plane:
                tensor[i, col, piece_to_plane[char]] = 1
                col += 1
    return tensor

# Inversión visual
def flip_square_name(uci_move):
    col_map = {'a': 'h', 'b': 'g', 'c': 'f', 'd': 'e', 'e': 'd', 'f': 'c', 'g': 'b', 'h': 'a'}
    def flip(sq):
        col, row = sq[0], sq[1]
        return col_map[col] + str(9 - int(row))
    return flip(uci_move[:2]) + flip(uci_move[2:4])

# Jugada IA
def predict_move(fen, board, top_n=10, umbral_cp=20):
    stockfish.set_fen_position(fen)
    eval_info = stockfish.get_evaluation()

    if eval_info["type"] == "mate" and eval_info["value"] is not None and eval_info["value"] <= 3:
        best_uci = stockfish.get_best_move()
        print("¡Mate detectado! Stockfish lo ejecuta:", best_uci)
        return chess.Move.from_uci(best_uci)

    top_moves_info = stockfish.get_top_moves(top_n)
    if not top_moves_info:
        print("No hay jugadas válidas.")
        return np.random.choice(list(board.legal_moves))

    best_eval = top_moves_info[0].get("Centipawn")
    if best_eval is None:
        print("Evaluación no disponible. Stockfish mueve.")
        return chess.Move.from_uci(top_moves_info[0]["Move"])

    candidatas = []
    for move_info in top_moves_info:
        cp = move_info.get("Centipawn")
        if cp is not None and abs(best_eval - cp) <= umbral_cp:
            candidatas.append(move_info)

    if len(candidatas) == 1:
        best_move = chess.Move.from_uci(candidatas[0]["Move"])
        print("Jugada clara. Stockfish la ejecuta:", best_move.uci())
        return best_move

    tensor = fen_to_tensor(fen).reshape(1, 8, 8, 12)
    prediction = model.predict(tensor, verbose=0)[0]

    scored = []
    for move_info in candidatas:
        move_uci = move_info["Move"]
        try:
            idx = np.where(encoder_classes == move_uci)[0][0]
            ia_score = prediction[idx]
        except IndexError:
            ia_score = 0.0
        scored.append((move_uci, ia_score))

    best_uci, best_score = max(scored, key=lambda x: x[1])
    print(f"\\nJugada estilo Nakamura: {flip_square_name(best_uci)} (IA score: {best_score:.4f})")
    return chess.Move.from_uci(best_uci)

# Dibujar tablero
def draw_board(board):
    colors = [pygame.Color("white"), pygame.Color("gray")]
    for row in range(8):
        for col in range(8):
            color = colors[(row + col) % 2]
            pygame.draw.rect(screen, color, pygame.Rect(col * SQ_SIZE, row * SQ_SIZE, SQ_SIZE, SQ_SIZE))

    for row in range(8):
        for col in range(8):
            visual_row = row
            visual_col = col
            square = chess.square(7 - visual_col, visual_row)
            piece = board.piece_at(square)
            if piece:
                img = PIECE_IMAGES.get(piece.symbol())
                if img:
                    screen.blit(img, (col * SQ_SIZE, row * SQ_SIZE))

# Juego principal
board = chess.Board()
running = True
selected_square = None
game_over = False

draw_board(board)
pygame.display.flip()

# Primer movimiento del bot (blancas)
ai_move = predict_move(board.fen(), board)
board.push(ai_move)

while running:
    draw_board(board)
    pygame.display.flip()

    if not game_over and board.is_game_over():
        game_over = True
        result = board.result()
        if board.is_checkmate():
            winner = "Blancas" if board.turn == chess.BLACK else "Negras"
            print(f"Jaque mate. Ganan {winner}")
        else:
            print("Tablas")

    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False

        elif event.type == pygame.MOUSEBUTTONDOWN and not game_over:
            x, y = pygame.mouse.get_pos()
            col = 7 - (x // SQ_SIZE)
            row = y // SQ_SIZE
            square = chess.square(col, row)

            if selected_square is None:
                piece = board.piece_at(square)
                if piece and piece.color == chess.BLACK:
                    selected_square = square
            else:
                move = chess.Move(selected_square, square)
                if move in board.legal_moves:
                    board.push(move)
                    if not board.is_game_over():
                        ai_move = predict_move(board.fen(), board)
                        board.push(ai_move)
                selected_square = None

pygame.quit()
""")





In [None]:
#instalar comunicación X11

apt update && apt install -y x11-utils x11-xserver-utils

apt update && apt install -y x11-utils x11-apps

Local (fuera del contenedor): xhost +local:root





En el docker-compose, en spark-client: 
services:
  spark-client:
    # ... tus otras configuraciones
    environment:
      - DISPLAY=${DISPLAY}
    volumes:
      - /tmp/.X11-unix:/tmp/.X11-unix

En el host (fuera de docker): xhost +local:root

docker compose down
docker compose up -d

Dentro de spark-client: 


echo $DISPLAY
Debería decir algo como :0

apt update && apt install -y x11-apps

xclock
Debería aparecer una ventana de reloj

cd notebooks

python3 nakamura_bot_blancas.py



In [40]:
## ENTRENAMIENTO LOCAL EN SPARK-CLIENT (USÉ EL ANTERIOR)

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

# Cargar datos exportados desde Spark
df = pd.read_parquet("/shared/fen_jugadas.parquet")

# Convertir a tensores NumPy
X = np.array(df["features"].tolist()).reshape(-1, 8, 8, 12)
y = df["label"].values.astype(int)

# Dividir en entrenamiento y validación
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Definir el modelo CNN
model = Sequential([
    Input(shape=(8, 8, 12)),
    Conv2D(64, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.3),
    Dense(len(np.unique(y)), activation='softmax')
])

# Compilar y entrenar
model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Guardar modelo
model.save("/shared/modelo_nakamura.keras")
np.save("/shared/encoder.npy", np.unique(y))  # Etiquetas numéricas


Epoch 1/10


2025-05-10 11:37:23.932746: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.0071 - loss: 6.8118 - val_accuracy: 0.0031 - val_loss: 6.4299
Epoch 2/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.0148 - loss: 6.3681 - val_accuracy: 0.0125 - val_loss: 6.4215
Epoch 3/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.0171 - loss: 6.2490 - val_accuracy: 0.0094 - val_loss: 6.4129
Epoch 4/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.0202 - loss: 6.0973 - val_accuracy: 0.0250 - val_loss: 6.4572
Epoch 5/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.0246 - loss: 5.9505 - val_accuracy: 0.0219 - val_loss: 6.5551
Epoch 6/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.0329 - loss: 5.8137 - val_accuracy: 0.0312 - val_loss: 6.5765
Epoch 7/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━

In [14]:
import numpy as np
from pyspark.sql.functions import udf
from pyspark.sql.types import ArrayType, FloatType, IntegerType
from pyspark.ml.feature import StringIndexer

# Codificar FEN como vector plano 768 (8x8x12)
def fen_to_flat_vector(fen):  
    piece_to_plane = {
        'P': 0, 'N': 1, 'B': 2, 'R': 3, 'Q': 4, 'K': 5,
        'p': 6, 'n': 7, 'b': 8, 'r': 9, 'q': 10, 'k': 11
    }
    
    tensor = np.zeros((8, 8, 12), dtype=np.float32)
    fen_board = fen.split(' ')[0]
    rows = fen_board.split('/')
    
    for i, row in enumerate(rows):
        col = 0
        for char in row:
            if char.isdigit():
                col += int(char)
            elif char in piece_to_plane:
                plane = piece_to_plane[char]
                tensor[i, col, plane] = 1
                col += 1
    return tensor.flatten().tolist()

# UDF para usar en Spark
fen_udf = udf(fen_to_flat_vector, ArrayType(FloatType()))

# Aplicar UDF para obtener features
df_ml = df_jugadas.withColumn("features", fen_udf(col("FEN")))

# Codificar movimiento como etiqueta
indexer = StringIndexer(inputCol="Move", outputCol="label")
indexer_model = indexer.fit(df_ml)
df_ml = indexer_model.transform(df_ml)

# Seleccionar columnas relevantes para MLlib
df_final = df_ml.select("features", "label")

# Mostrar ejemplo
# df_final.show(3, truncate=False)

print(df_final.columns)


['features', 'label']
