# Require tensorflow version 2.15.0

In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, BatchNormalization, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import numpy as np
import pandas as pd
import gzip
import ast
import chess
import os
import time
import random
# If you're using the ChessEnv class
from stockfish import Stockfish

# Configure TensorFlow to use Metal GPU acceleration
physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) > 0:
    print(f"Found {len(physical_devices)} GPU(s):")
    for device in physical_devices:
        print(f"  {device.name}")
    # Configure TensorFlow to use the first GPU
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
        print(f"Memory growth enabled for {physical_devices[0].name}")
    except Exception as e:
        print(f"Error configuring GPU: {e}")
else:
    print("No GPU devices found. Using CPU for computation.")

# Print TensorFlow version and compute device
print(f"TensorFlow version: {tf.__version__}")
print(f"Compute device: {tf.config.list_physical_devices()}")

gpu_devices = tf.config.list_physical_devices('GPU')
if gpu_devices:
    print("GPU devices available:")
    for gpu in gpu_devices:
        print(gpu)
else:
    print("No GPU devices found. Using CPU for computation.")


Found 1 GPU(s):
  /physical_device:GPU:0
Memory growth enabled for /physical_device:GPU:0
TensorFlow version: 2.15.0
Compute device: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
GPU devices available:
PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [None]:
# Keep  existing split_dims function here
squares_index = {
    'a': 0,
    'b': 1,
    'c': 2,
    'd': 3,
    'e': 4,
    'f': 5,
    'g': 6,
    'h': 7
}

def square_to_index(square):
    letter = chess.square_name(square)
    return 8 - int(letter[1]), squares_index[letter[0]]

def split_dims(board):
    # this is the 3d matrix
    board3d = np.zeros((14, 8, 8), dtype=np.int8)

    # here we add the pieces's view on the matrix
    for piece in chess.PIECE_TYPES:
        for square in board.pieces(piece, chess.WHITE):
            idx = np.unravel_index(square, (8, 8))
            board3d[piece - 1][7 - idx[0]][idx[1]] = 1
        for square in board.pieces(piece, chess.BLACK):
            idx = np.unravel_index(square, (8, 8))
            board3d[piece + 5][7 - idx[0]][idx[1]] = 1

    # add attacks and valid moves too
    # so the network knows what is being attacked
    aux = board.turn
    board.turn = chess.WHITE

    for move in board.legal_moves:
        i, j = square_to_index(move.to_square)
        board3d[12][i][j] = 1

    board.turn = chess.BLACK

    for move in board.legal_moves:
        i, j = square_to_index(move.to_square)
        board3d[13][i][j] = 1

    board.turn = aux

    return board3d

In [None]:
def load_chess_data(filepath, batch_size=1024):
    """
    Generator to load chess data in batches to avoid memory issues.
    
    Args:
        filepath: Path to the gzipped CSV file
        batch_size: Number of samples to load at once
        
    Yields:
        X_batch, y_batch: Features and targets for training
    """
    # Open the gzipped file
    with gzip.open(filepath, 'rt') as f:
        # Read file in chunks to avoid memory issues
        for chunk in pd.read_csv(f, chunksize=batch_size):
            X_batch = []
            y_batch = []
            
            for _, row in chunk.iterrows():
                # Convert game_state string to numpy array
                try:
                    # This assumes game_state is stored as a string representation of a numpy array
                    # You'll need to adjust this based on how the data is actually stored
                    game_state_str = row['game_state']
                    
                    # Method 1: Using ast.literal_eval (safer but slower)
                    # game_state = np.array(ast.literal_eval(game_state_str))
                    
                    # Method 2: Using numpy's string parsing (adjust as needed)
                    # Remove unwanted characters and split into numbers
                    # This is a simplified example and may need adjustment based on  data format
                    clean_str = game_state_str.replace('[', '').replace(']', '').replace('\n', ' ')
                    values = np.fromstring(clean_str, sep=' ', dtype=np.int8)
                    game_state = values.reshape(14, 8, 8)
                    
                    # Get the evaluation (target)
                    eval_score = row['stateEval']
                    
                    # Normalize evaluation to [-1, 1] range (assuming original range is [-10, 10])
                    normalized_eval = eval_score / 10.0
                    
                    X_batch.append(game_state)
                    y_batch.append(normalized_eval)
                except Exception as e:
                    print(f"Error processing row: {e}")
                    continue
            
            if X_batch and y_batch:
                yield np.array(X_batch), np.array(y_batch)

In [4]:
def create_chess_cnn(input_shape=(14, 8, 8)):
    """
    Create a CNN for chess position evaluation.
    Optimized for Metal GPU acceleration on macOS.
    
    Args:
        input_shape: Shape of the input tensor (channels, height, width)
        
    Returns:
        A compiled Keras model
    """
    # Enable mixed precision training for better performance on Metal
    # This uses both float16 and float32 where appropriate
    tf.keras.mixed_precision.set_global_policy('mixed_float16')
    
    model = Sequential([
        # First convolutional block
        Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu', 
               input_shape=input_shape),
        BatchNormalization(),
        Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu'),
        BatchNormalization(),
        
        # Second convolutional block
        Conv2D(128, kernel_size=(3, 3), padding='same', activation='relu'),
        BatchNormalization(),
        Conv2D(128, kernel_size=(3, 3), padding='same', activation='relu'),
        BatchNormalization(),
        
        # Third convolutional block
        Conv2D(256, kernel_size=(3, 3), padding='same', activation='relu'),
        BatchNormalization(),
        
        # Fully connected layers
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.3),
        Dense(256, activation='relu'),
        Dropout(0.3),
        
        # Output layer (regression for evaluation score)
        # Note: We use float32 for the output layer to maintain precision
        Dense(1, activation='tanh', dtype='float32')  # Force float32 output
    ])
    
    # Compile model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='mse',  # Mean squared error for regression
        metrics=['mae']  # Mean absolute error to track accuracy
    )
    
    return model

In [5]:
def train_chess_cnn(data_path, model_save_path='chess_cnn_model.h5', epochs=20):
    """
    Train the chess CNN on the processed data with Metal optimizations.
    
    Args:
        data_path: Path to the training data
        model_save_path: Where to save the trained model
        epochs: Number of training epochs
    """
    # Create the model
    model = create_chess_cnn()
    model.summary()  # Show model architecture
    
    # Start overall training timer
    overall_start_time = time.time()
    
    # Set up callbacks
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
        ModelCheckpoint(filepath=model_save_path, save_best_only=True, monitor='val_loss')
    ]
    
    # Create validation data generator
    print("Loading validation data...")
    validation_data = None
    for X_batch, y_batch in load_chess_data(data_path, batch_size=1024):
        validation_data = (X_batch, y_batch)
        print(f"Validation data shape: {X_batch.shape}")
        break  # Just use the first batch as validation data
    
    # Train on batches
    for epoch in range(epochs):
        epoch_start_time = time.time()
        print(f"Epoch {epoch+1}/{epochs}")
        batch_count = 0
        total_loss = 0
        total_mae = 0
        
        # Process each batch
        for X_batch, y_batch in load_chess_data(data_path, batch_size=512):  # Adjusted batch size for Metal
            batch_start_time = time.time()
            
            # Train on this batch
            history = model.fit(
                X_batch, y_batch,
                batch_size=64,  # Optimal batch size for Metal may differ from NVIDIA GPUs
                epochs=1,
                verbose=0,
                validation_data=validation_data
            )
            
            batch_count += 1
            total_loss += history.history['loss'][0]
            total_mae += history.history['mae'][0]
            
            batch_time = time.time() - batch_start_time
            
            # Print progress every 5 batches
            if batch_count % 5 == 0:
                print(f"  Batch {batch_count}: Loss = {total_loss/batch_count:.4f}, MAE = {total_mae/batch_count:.4f} (Time: {batch_time:.2f}s)")
                # Force garbage collection to free GPU memory (helpful for Metal)
                import gc
                gc.collect()
        
        # Evaluate on validation data at the end of each epoch
        eval_start_time = time.time()
        val_loss, val_mae = model.evaluate(validation_data[0], validation_data[1], verbose=0)
        eval_time = time.time() - eval_start_time
        
        epoch_time = time.time() - epoch_start_time
        print(f"  Epoch {epoch+1} completed in {epoch_time:.2f}s")
        print(f"  Validation: Loss = {val_loss:.4f}, MAE = {val_mae:.4f} (Eval time: {eval_time:.2f}s)")
    
    # Save the final model
    model.save(model_save_path)
    
    # Calculate and print total training time
    overall_training_time = time.time() - overall_start_time
    hours, remainder = divmod(overall_training_time, 3600)
    minutes, seconds = divmod(remainder, 60)
    
    print(f"\nModel saved to {model_save_path}")
    print(f"Total training time: {int(hours)}h {int(minutes)}m {seconds:.2f}s")
    
    return model

In [6]:
class EloAdaptiveChessAI:
    def __init__(self, model_path, min_elo=800, max_elo=2800):
        """
        Initialize the adaptive chess AI with a trained CNN model.
        
        Args:
            model_path: Path to the saved model file
            min_elo: Minimum Elo rating the AI can emulate
            max_elo: Maximum Elo rating the AI can emulate
        """
        from tensorflow_addons.optimizers import AdamW
        self.model = tf.keras.models.load_model(model_path, custom_objects={'AdamW': AdamW}, compile=False)
        self.min_elo = min_elo
        self.max_elo = max_elo
        self.current_elo = 1500  # Default Elo
        
    def set_elo(self, elo):
        """Set the Elo rating for the AI to emulate."""
        self.current_elo = max(min(elo, self.max_elo), self.min_elo)
        
    def _calculate_move_quality(self, elo_diff):
        """
        Calculate move quality factor based on Elo difference.
        Higher values (closer to 1) mean better moves.
        """
        # This is a simplified model - you'll want to tune this
        # We use a sigmoid-like function to map Elo difference to move quality
        return 1.0 / (1.0 + np.exp(-elo_diff / 400.0))
        
    def evaluate_position(self, board):
        """
        Evaluate the board position and adjust based on current Elo.
        
        Args:
            board: Chess board position
            
        Returns:
            Evaluation score adjusted for current Elo
        """
        # Convert board to CNN input format
        board_tensor = split_dims(board)
        board_tensor = np.expand_dims(board_tensor, axis=0)  # Add batch dimension
        
        # Get the raw evaluation (normalized between -1 and 1)
        raw_eval = self.model.predict(board_tensor, verbose=0)[0][0]
        
        # Scale back to original range (-10 to 10)
        raw_eval = raw_eval * 10.0
        
        # Calculate move quality based on Elo
        # Top GMs/engines are around 3000+ Elo
        move_quality = self._calculate_move_quality(self.current_elo - 1500)
        
        # Mix perfect evaluation with some randomness based on Elo
        # Lower Elo means more randomness
        noise_factor = 1.0 - move_quality
        noise = np.random.normal(0, noise_factor * 2.0)  # Standard deviation scales with Elo
        
        # Apply noise to the evaluation
        adjusted_eval = raw_eval * move_quality + noise
        
        return adjusted_eval
    
    def select_move(self, board, temperature=1.0):
        """
        Select a move based on position evaluation and current Elo.
        
        Args:
            board: Chess board position
            temperature: Controls randomness of move selection
            
        Returns:
            Selected chess move
        """
        legal_moves = list(board.legal_moves)
        if not legal_moves:
            return None
        
        # Evaluate all legal moves
        move_scores = []
        for move in legal_moves:
            # Make the move
            board.push(move)
            
            # Evaluate the new position
            score = -self.evaluate_position(board)  # Negate because we're evaluating after our move
            
            # Undo the move
            board.pop()
            
            move_scores.append((move, score))
        
        # Adjust temperature based on Elo
        # Lower Elo -> higher temperature (more randomness)
        elo_factor = (self.max_elo - self.current_elo) / (self.max_elo - self.min_elo)
        adjusted_temp = temperature * (1.0 + elo_factor * 2.0)
        
        # Convert scores to probabilities using softmax with temperature
        scores = np.array([score for _, score in move_scores])
        probabilities = np.exp(scores / adjusted_temp)
        probabilities = probabilities / np.sum(probabilities)
        
        # Sample a move based on probabilities
        move_idx = np.random.choice(len(legal_moves), p=probabilities)
        
        return move_scores[move_idx][0]

In [None]:
# Note: This assumes you have  ChessEnv class defined elsewhere in the notebook
class CNNChessEnv:
    """
    Chess environment that uses a CNN for position evaluation.
    """
    def __init__(self, model_path, elo_rating=1500, stockfish_path=None):
        """
        Initialize the CNN-based chess environment.
        
        Args:
            model_path: Path to the trained CNN model
            elo_rating: Initial Elo rating for the AI
            stockfish_path: Path to Stockfish executable (optional)
        """
        self.board = chess.Board()
        self.chess_ai = EloAdaptiveChessAI(model_path)
        self.chess_ai.set_elo(elo_rating)
        self.stockfish_path = stockfish_path
        self.stockfish = None
        
        # Initialize Stockfish if path is provided
        if stockfish_path:
            self.stockfish = Stockfish(path=stockfish_path)
            self.stockfish.set_depth(15)
    
    def reset(self):
        """Reset the board to the starting position."""
        self.board = chess.Board()
        return self.board
    
    def evaluate_position_cnn(self):
        """Evaluate position using the CNN."""
        return self.chess_ai.evaluate_position(self.board)
    
    def evaluate_position_stockfish(self):
        """Evaluate position using Stockfish."""
        if not self.stockfish:
            raise ValueError("Stockfish not initialized")
        
        self.stockfish.set_fen_position(self.board.fen())
        evaluation = self.stockfish.get_evaluation()
        
        # Parse the evaluation
        if evaluation["type"] == "cp":
            # Centipawn evaluation (convert to pawns)
            return evaluation["value"] / 100.0
        else:
            # Mate evaluation
            mate_in = evaluation["value"]
            if mate_in > 0:
                return 9.0 + (1.0 / mate_in)  # Positive for white winning
            else:
                return -9.0 - (1.0 / mate_in)  # Negative for black winning
    
    def get_ai_move(self):
        """Get a move from the AI with the current Elo setting."""
        return self.chess_ai.select_move(self.board)
    
    def make_move(self, move):
        """Make a move on the board."""
        if move not in self.board.legal_moves:
            raise ValueError(f"Illegal move: {move}")
        
        self.board.push(move)
        return self.board
    
    def set_elo(self, elo):
        """Set the Elo rating for the AI."""
        self.chess_ai.set_elo(elo)
        
    def render(self):
        """Render the board as a string."""
        return str(self.board)

In [8]:
def test_cnn_model(model_path, stockfish_path=None, num_positions=10):
    """
    Test the CNN model against Stockfish for a few positions with timing information.
    
    Args:
        model_path: Path to the trained CNN model
        stockfish_path: Path to Stockfish executable (optional)
        num_positions: Number of positions to evaluate
    """
    # Start overall testing timer
    overall_start_time = time.time()
    
    # Initialize environment
    print("Loading model and initializing environment...")
    env = CNNChessEnv(model_path=model_path, stockfish_path=stockfish_path)
    
    # Test at different Elo ratings
    for elo in [1200, 1600, 2000, 2400]:
        print(f"\nTesting at Elo {elo}:")
        env.set_elo(elo)
        
        elo_start_time = time.time()
        
        # Reset board
        env.reset()
        
        # Make random moves to get to different positions
        for i in range(num_positions):
            # Make some random moves to get to a position
            for _ in range(random.randint(1, 10)):  # 1-10 random moves
                legal_moves = list(env.board.legal_moves)
                if not legal_moves:
                    break
                move = random.choice(legal_moves)
                env.make_move(move)
                
                # Stop if game is over
                if env.board.is_game_over():
                    break
            
            # Skip if game is over
            if env.board.is_game_over():
                env.reset()
                continue
            
            position_start_time = time.time()
            
            # Get CNN evaluation
            cnn_start_time = time.time()
            cnn_eval = env.evaluate_position_cnn()
            cnn_time = time.time() - cnn_start_time
            
            # Get Stockfish evaluation if available
            stockfish_eval = None
            stockfish_time = 0
            if stockfish_path:
                stockfish_start_time = time.time()
                stockfish_eval = env.evaluate_position_stockfish()
                stockfish_time = time.time() - stockfish_start_time
                
            # Print comparison
            print(f"\nPosition {i+1} evaluation (Elo {elo}):")
            print(f"  CNN: {cnn_eval:.4f} (Time: {cnn_time:.4f}s)")
            if stockfish_eval is not None:
                print(f"  Stockfish: {stockfish_eval:.4f} (Time: {stockfish_time:.4f}s)")
                print(f"  Difference: {abs(cnn_eval - stockfish_eval):.4f}")
                print(f"  Speed improvement: {stockfish_time/cnn_time:.2f}x")
            
            # Get and print best move
            move_start_time = time.time()
            best_move = env.get_ai_move()
            move_time = time.time() - move_start_time
            
            print(f"  AI's chosen move: {best_move} (Time: {move_time:.4f}s)")
            position_time = time.time() - position_start_time
            print(f"  Total position analysis time: {position_time:.4f}s")
            print(f"  Board:\n{env.render()}")
            
            # Reset for next position
            env.reset()
        
        elo_time = time.time() - elo_start_time
        print(f"\nCompleted Elo {elo} testing in {elo_time:.2f}s")
    
    # Calculate and print total testing time
    overall_testing_time = time.time() - overall_start_time
    minutes, seconds = divmod(overall_testing_time, 60)
    
    print(f"\nTotal testing time: {int(minutes)}m {seconds:.2f}s")

In [9]:
def monitor_performance(seconds=10):
    """
    Monitor system performance metrics including GPU usage.
    
    Args:
        seconds: How long to monitor in seconds
    """
    import psutil
    import time
    
    # Start monitoring
    print("Starting performance monitoring...")
    start_time = time.time()
    end_time = start_time + seconds
    
    # Initialize counters
    count = 0
    cpu_percent_total = 0
    memory_percent_total = 0
    
    # Monitor loop
    while time.time() < end_time:
        # Get CPU usage
        cpu_percent = psutil.cpu_percent(interval=1)
        
        # Get memory usage
        memory_info = psutil.virtual_memory()
        memory_percent = memory_info.percent
        
        # Update totals
        cpu_percent_total += cpu_percent
        memory_percent_total += memory_percent
        count += 1
        
        # Print current values
        print(f"CPU: {cpu_percent}%, Memory: {memory_percent}%")
        
        # Brief pause
        time.sleep(0.5)
    
    # Calculate averages
    avg_cpu = cpu_percent_total / count
    avg_memory = memory_percent_total / count
    
    print(f"\nAverage over {seconds} seconds:")
    print(f"CPU Usage: {avg_cpu:.1f}%")
    print(f"Memory Usage: {avg_memory:.1f}%")
    
    # Note: Metal GPU monitoring requires additional tools
    print("\nNote: For detailed Metal GPU monitoring, use Activity Monitor or Instruments app")

# Example usage:
# Run this when needed to check performance
# monitor_performance(30)  # Monitor for 30 seconds

In [10]:
!which stockfish

/opt/homebrew/bin/stockfish


In [None]:
def main():
    # Define file paths
    data_path = 'cleaned_training_data.csv.gz'  # Update with  actual path
    model_save_path = 'chess_cnn_model.h5'
    stockfish_path = "/opt/homebrew/bin/stockfish"  # Update with  path
    
    # Start overall timer
    start_time = time.time()
    
    # Check if the training data exists
    if not os.path.exists(data_path):
        print(f"Error: Training data file not found at {data_path}")
        return
    
    # Choose whether to train or test
    mode = input("Enter 'train' to train a new model or 'test' to test an existing one: ").strip().lower()
    
    if mode == 'train':
        # Train the model
        print("\n=== CNN TRAINING ===")
        print("Starting CNN training...")
        
        training_start_time = time.time()
        model = train_chess_cnn(data_path, model_save_path)
        training_time = time.time() - training_start_time
        
        hours, remainder = divmod(training_time, 3600)
        minutes, seconds = divmod(remainder, 60)
        print(f"\nTraining completed in {int(hours)}h {int(minutes)}m {seconds:.2f}s")
        
    elif mode == 'test':
        # Check if model exists
        if not os.path.exists(model_save_path):
            print(f"Error: Model file not found at {model_save_path}")
            return
        
        # Test the model
        print("\n=== CNN TESTING ===")
        print("Testing the CNN model...")
        testing_start_time = time.time()
        test_cnn_model(model_save_path, stockfish_path)
        testing_time = time.time() - testing_start_time
        
        minutes, seconds = divmod(testing_time, 60)
        print(f"\nTesting completed in {int(minutes)}m {seconds:.2f}s")
        
    else:
        print("Invalid mode. Please enter 'train' or 'test'.")
    
    # Calculate total execution time
    total_time = time.time() - start_time
    hours, remainder = divmod(total_time, 3600)
    minutes, seconds = divmod(remainder, 60)
    
    print(f"\n=== EXECUTION COMPLETE ===")
    print(f"Total execution time: {int(hours)}h {int(minutes)}m {seconds:.2f}s")

# Run the main function if executed directly
if __name__ == "__main__":
    main()


=== CNN TESTING ===
Testing the CNN model...
Loading model and initializing environment...
Your GPU may run slowly with dtype policy mixed_float16 because it does not have compute capability of at least 7.0. Your GPU:
  METAL, no compute capability (probably not an Nvidia GPU)
See https://developer.nvidia.com/cuda-gpus for a list of GPUs and their compute capabilities.



TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

2025-03-17 20:29:43.935340: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2025-03-17 20:29:43.935364: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-03-17 20:29:43.935380: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-03-17 20:29:43.935413: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-03-17 20:29:43.935426: I tensorflow/core/common_runti


Testing at Elo 1200:


2025-03-17 20:29:44.960078: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.



Position 1 evaluation (Elo 1200):
  CNN: 3.0344 (Time: 1.3558s)
  Stockfish: -0.0500 (Time: 0.1151s)
  Difference: 3.0844
  Speed improvement: 0.08x
  AI's chosen move: h7h6 (Time: 0.8239s)
  Total position analysis time: 2.2951s
  Board:
r . b q k b n r
p p p n p p p p
. . . p . . . .
. . . . . . . .
. . . . P . . P
. . . . . . P .
P P P P . P . .
R N B Q K B N R

Position 2 evaluation (Elo 1200):
  CNN: -1.9625 (Time: 0.0352s)
  Stockfish: 0.0800 (Time: 0.1101s)
  Difference: 2.0425
  Speed improvement: 3.13x
  AI's chosen move: e8d7 (Time: 0.9270s)
  Total position analysis time: 1.0724s
  Board:
r n b q k b n r
p p p . p p p p
. . . p . . . .
. . . . . . . .
. . . . . . . .
. . P . . . P .
P P . P P P . P
R N B Q K B N R

Position 3 evaluation (Elo 1200):
  CNN: 0.2392 (Time: 0.0323s)
  Stockfish: -1.0300 (Time: 0.1150s)
  Difference: 1.2692
  Speed improvement: 3.57x
  AI's chosen move: g8f6 (Time: 0.7235s)
  Total position analysis time: 0.8708s
  Board:
r . b q k b n r
p p p p 

# Try below code see which one you don't have a WARNING

In [12]:
model_path = 'chess_cnn_model.h5'
try:
    cnn_model = tf.keras.models.load_model(model_path, compile=False)
    print("CNN model loaded successfully (without compilation).")
    
    # Use legacy Adam optimizer for M1/M2 Macs as recommended or AdamW
    from tensorflow.keras.optimizers.legacy import Adam
    cnn_model.compile(
        optimizer=Adam(learning_rate=0.001),  
        loss="mse",
        metrics=["mae"]
    )
    print("CNN model recompiled successfully with legacy Adam optimizer.")
except Exception as e:
    print(f"Error loading or compiling CNN model: {e}")
    cnn_model = None

CNN model loaded successfully (without compilation).
CNN model recompiled successfully with legacy Adam optimizer.


In [13]:
try:
    cnn_model = tf.keras.models.load_model(model_path, compile=False)
    print("CNN model loaded successfully (without compilation).")
    
    # Recompile with a supported optimizer
    from tensorflow.keras.optimizers import Adam
    cnn_model.compile(
        optimizer=Adam(learning_rate=0.001),  
        loss="mse",
        metrics=["mae"]
    )
except Exception as e:
    print(f"Error loading or compiling CNN model: {e}")
    print("Will use SimpleChessEvaluator as fallback.")



CNN model loaded successfully (without compilation).


In [14]:
try:
    cnn_model = tf.keras.models.load_model(model_path, compile=False)
    print("CNN model loaded successfully (without compilation).")
    
    # Recompile with AdamW optimizer
    from tensorflow.keras.optimizers import AdamW
    cnn_model.compile(
        optimizer=AdamW(learning_rate=0.001),  
        loss="mse",
        metrics=["mae"]
    )
except Exception as e:
    print(f"Error loading or compiling CNN model: {e}")
    print("Will use SimpleChessEvaluator as fallback.")




CNN model loaded successfully (without compilation).


In [15]:
cnn_model.summary()

Model: "ChessCNN"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 board_input (InputLayer)    [(None, 14, 8, 8)]           0         []                            
                                                                                                  
 conv1_1 (Conv2D)            (None, 64, 8, 8)             8128      ['board_input[0][0]']         
                                                                                                  
 bn1_1 (BatchNormalization)  (None, 64, 8, 8)             256       ['conv1_1[0][0]']             
                                                                                                  
 conv1_2 (Conv2D)            (None, 64, 8, 8)             36928     ['bn1_1[0][0]']               
                                                                                           

In [16]:
output_layer = cnn_model.layers[-1]
print(f"Output layer: {output_layer.name}, activation: {output_layer.activation.__name__ if hasattr(output_layer.activation, '__name__') else output_layer.activation}")

Output layer: value_head, activation: tanh
