<a href="https://colab.research.google.com/github/dinesh88kumar/TicTacToe-Neural-Network/blob/main/tictactoe_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

# Generate more realistic synthetic data
def generate_realistic_data(n_samples=20000):
    # Create empty boards
    X = np.zeros((n_samples, 9))
    y = np.zeros(n_samples)

    for i in range(n_samples):
        # Random number of moves already on board (0-8)
        moves = np.random.randint(0, 5)  # Limited to avoid full boards
        board = np.zeros(9)

        # Place random X and O moves
        positions = np.random.choice(9, moves, replace=False)
        for j, pos in enumerate(positions):
            board[pos] = 1 if j % 2 == 0 else -1

        # For empty positions, determine best move using a heuristic
        empty_positions = np.where(board == 0)[0]
        if len(empty_positions) > 0:
            # Simple heuristic: prefer center, then corners, then sides
            priority_positions = [4, 0, 2, 6, 8, 1, 3, 5, 7]
            for pos in priority_positions:
                if pos in empty_positions:
                    best_move = pos
                    break
            y[i] = best_move
            X[i] = board

    return X, y


In [5]:

# Generate and prepare data
X, y = generate_realistic_data()
y_encoded = keras.utils.to_categorical(y, num_classes=9)

# Split data with a validation set
X_train, X_temp, y_train, y_temp = train_test_split(X, y_encoded, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Data normalization
X_mean = X_train.mean(axis=0)
X_std = X_train.std(axis=0) + 1e-8  # Add small epsilon to avoid division by zero
X_train = (X_train - X_mean) / X_std
X_val = (X_val - X_mean) / X_std
X_test = (X_test - X_mean) / X_std


In [6]:

# Build an improved model
model = Sequential([
    # Input layer
    Dense(128, input_shape=(9,), kernel_regularizer=keras.regularizers.l2(0.001)),
    BatchNormalization(),
    keras.layers.LeakyReLU(alpha=0.1),
    Dropout(0.3),

    # Hidden layer 1
    Dense(128, kernel_regularizer=keras.regularizers.l2(0.001)),
    BatchNormalization(),
    keras.layers.LeakyReLU(alpha=0.1),
    Dropout(0.3),

    # Hidden layer 2
    Dense(64, kernel_regularizer=keras.regularizers.l2(0.001)),
    BatchNormalization(),
    keras.layers.LeakyReLU(alpha=0.1),
    Dropout(0.2),

    # Output layer
    Dense(9, activation='softmax')
])

# Use a fixed learning rate instead of a schedule
# This resolves the conflict with ReduceLROnPlateau
optimizer = Adam(learning_rate=0.001)

model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Add callbacks for training optimization
# Removed ReduceLROnPlateau since it conflicts with learning rate schedules
callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
]

# Train with validation data
history = model.fit(
    X_train, y_train,
    epochs=100,  # We'll stop early with callbacks
    batch_size=64,  # Larger batch size
    validation_data=(X_val, y_val),
    callbacks=callbacks,
    verbose=1
)

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_acc:.4f}")

# Save the model
model.save("tictactoe_best_move_optimized.h5")


Epoch 1/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.7249 - loss: 1.2174 - val_accuracy: 0.9877 - val_loss: 0.3160
Epoch 2/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9695 - loss: 0.3288 - val_accuracy: 0.9927 - val_loss: 0.2034
Epoch 3/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9840 - loss: 0.2307 - val_accuracy: 0.9927 - val_loss: 0.1643
Epoch 4/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.9848 - loss: 0.1837 - val_accuracy: 0.9927 - val_loss: 0.1265
Epoch 5/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9883 - loss: 0.1457 - val_accuracy: 0.9977 - val_loss: 0.0957
Epoch 6/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9937 - loss: 0.1063 - val_accuracy: 0.9997 - val_loss: 0.0715
Epoch 7/100
[1m219/21



Test accuracy: 0.9990


In [None]:

# Improved prediction function
def predict_move(board_state, model, valid_moves_only=True):
    """
    Predict the best move given a board state

    Args:
        board_state: List or array of 9 elements (1=X, -1=O, 0=empty)
        model: Trained TicTacToe model
        valid_moves_only: If True, only considers empty spaces as valid moves

    Returns:
        The index of the best move (0-8)
    """
    # Ensure board_state is numpy array
    board_state_np = np.array(board_state).reshape(1, -1)

    # Store original board for move validation
    original_board = np.array(board_state).reshape(-1)

    # Normalize the input
    board_state_normalized = (board_state_np - X_mean) / X_std

    # Get move probabilities
    move_probs = model.predict(board_state_normalized, verbose=0)[0]

    # Only consider empty positions if requested
    if valid_moves_only:
        for i in range(9):
            if original_board[i] != 0:  # Position already taken
                move_probs[i] = -np.inf

    return np.argmax(move_probs)

# Example usage
board_state = [0, 0, 0, 0, 0, 0, 0, 0, 0]  # Empty board
best_move = predict_move(board_state, model)
print(f"Best Move: {best_move}")

# Visualize a board state and prediction
def print_board(board_state):
    symbols = {0: ' ', 1: 'X', -1: 'O'}
    print('-' * 13)
    for i in range(0, 9, 3):
        print(f"| {symbols[board_state[i]]} | {symbols[board_state[i+1]]} | {symbols[board_state[i+2]]} |")
        print('-' * 13)

# Example with a more realistic board
example_board = [1, 0, -1, 0, 1, 0, 0, -1, 0]
print("Current board:")
print_board(example_board)
best_move = predict_move(example_board, model)
print(f"Recommended move: {best_move}")