In [1]:
from google.colab import files
import pickle

# Upload file manually
uploaded = files.upload()

# Load the pickle file
file_name = list(uploaded.keys())[0]  # Get the uploaded file name
with open(file_name, "rb") as f:
    data = pickle.load(f)

print("Loaded data keys:", data.keys())  # Check contents

Saving mcts7500_pool.pickle to mcts7500_pool.pickle
Loaded data keys: dict_keys(['board_x', 'play_y', 'README'])


In [2]:
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras import layers, regularizers
from sklearn.model_selection import train_test_split
from google.colab import files

# ✅ Register Custom Layers with Explicit Serialization
@tf.keras.utils.register_keras_serializable(package="Custom")
class PatchEmbedding(layers.Layer):
    def __init__(self, num_patches, hidden_dim, **kwargs):
        super().__init__(**kwargs)
        self.num_patches = num_patches
        self.hidden_dim = hidden_dim
        self.proj = layers.Dense(hidden_dim, kernel_regularizer=regularizers.l2(0.0001))
        self.pos_embedding = None

    def build(self, input_shape):
        self.pos_embedding = self.add_weight(
            shape=(1, self.num_patches, self.hidden_dim),
            initializer="random_normal",
            trainable=True,
            name="pos_embedding"
        )

    def call(self, x):
        x = self.proj(x)
        return x + self.pos_embedding

    def get_config(self):
        config = super().get_config()
        config.update({"num_patches": self.num_patches, "hidden_dim": self.hidden_dim})
        return config

@tf.keras.utils.register_keras_serializable(package="Custom")
class ClassTokenIndex(layers.Layer):
    def call(self, x):
        bs = tf.shape(x)[0]
        indices = tf.range(1)
        indices = tf.expand_dims(indices, 0)
        return tf.tile(indices, [bs, 1])

@tf.keras.utils.register_keras_serializable(package="Custom")
class PositionalIndex(layers.Layer):
    def call(self, x):
        bs = tf.shape(x)[0]
        num_vectors = tf.shape(x)[1]
        indices = tf.range(num_vectors)
        indices = tf.expand_dims(indices, 0)
        return tf.tile(indices, [bs, 1])

# ✅ Build Vision Transformer Model
def build_ViT(n, m, hidden_dim, num_layers, num_heads, key_dim,
              value_dim, mlp_dim, dropout_rate, num_classes):
    inp = layers.Input(shape=(n*m, 2))  # ✅ FIX: Accepts (42 patches, 2 channels)

    # ✅ Patch Embedding
    patches = PatchEmbedding(n*m, hidden_dim)(inp)

    # ✅ Class Token Embedding
    token_index = ClassTokenIndex()(patches)
    token_embedding = layers.Embedding(input_dim=1, output_dim=hidden_dim)(token_index)
    patches = layers.Concatenate(axis=1)([token_embedding, patches])

    # ✅ Transformer Layers
    x = patches
    for _ in range(num_layers):
        ln1 = layers.LayerNormalization()(x)
        attn = layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim, value_dim=value_dim)(ln1, ln1)
        x = layers.Add()([x, attn])

        ln2 = layers.LayerNormalization()(x)
        dense = layers.Dense(mlp_dim, activation='gelu')(ln2)
        dense = layers.Dropout(dropout_rate)(dense)
        dense = layers.Dense(hidden_dim)(dense)
        dense = layers.Dropout(dropout_rate)(dense)
        x = layers.Add()([x, dense])

    # ✅ Classification Head
    fl = x[:, 0, :]
    ln_final = layers.LayerNormalization()(fl)
    output = layers.Dense(num_classes, activation='softmax')(ln_final)

    # ✅ Compile Model
    model = tf.keras.models.Model(inp, output)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# ✅ Configure Model Parameters
n = 6
m = 7
hidden_dim = 64
num_layers = 4
num_heads = 4
key_dim = hidden_dim // num_heads
value_dim = key_dim * 2
mlp_dim = hidden_dim
dropout_rate = 0.1
num_classes = 7

# ✅ Build Model
trans = build_ViT(n, m, hidden_dim, num_layers, num_heads,
                  key_dim, value_dim, mlp_dim, dropout_rate, num_classes)
trans.summary()

# ✅ Load and Preprocess Data
board_x = np.array(data["board_x"])
play_y = np.array(data["play_y"])

X_player = (board_x == 1).astype(np.float32)
X_ai = (board_x == -1).astype(np.float32)
X_two_channel = np.stack([X_player, X_ai], axis=-1)

X_reshaped = X_two_channel.reshape(X_two_channel.shape[0], 6 * 7, 2)


# import numpy as np
# from sklearn.model_selection import train_test_split

# ✅ Correct Board Flipping Function
def flip_board_horizontally_correctly(X, y):
    """
    Flip the board horizontally and mirror the move labels.

    Args:
    - X: NumPy array of shape (N, 42, 2)  # Board with patches
    - y: NumPy array of shape (N,)  # Column indices of the played move (0 to 6)

    Returns:
    - X_flipped: Flipped board states
    - y_flipped: Adjusted column indices
    """
    # ✅ Reshape X back to (N, 6, 7, 2) to apply flipping properly
    X_unflattened = X.reshape(-1, 6, 7, 2)

    # ✅ Flip board across columns (mirror left-right)
    X_flipped = np.flip(X_unflattened, axis=2)

    # ✅ Mirror column move
    y_flipped = 6 - y  # Since columns are flipped

    # ✅ Flatten X back to (N, 42, 2)
    X_flipped = X_flipped.reshape(-1, 42, 2)

    return X_flipped, y_flipped

# ✅ Create Flipped Dataset
X_flipped, y_flipped = flip_board_horizontally_correctly(X_reshaped, play_y)

# ✅ Augment Dataset with Flipped Boards
X_augmented = np.concatenate([X_reshaped, X_flipped], axis=0)
y_augmented = np.concatenate([play_y, y_flipped], axis=0)

# ✅ Re-split the Augmented Dataset
X_train, X_test, y_train, y_test = train_test_split(
    X_augmented, y_augmented, test_size=0.2, random_state=42, stratify=y_augmented
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42, stratify=y_train
)

# # ✅ Train and Evaluate Model
early_stop = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

# ✅ Print New Dataset Sizes
print(f"Training set: {X_train.shape}, {y_train.shape}")
print(f"Validation set: {X_val.shape}, {y_val.shape}")
print(f"Test set: {X_test.shape}, {y_test.shape}")

# ✅ Retrain Model with Augmented Data
trans.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_val, y_val), callbacks=[early_stop])

# ✅ Save Model
model_path = "connect4_transformer_augmented.keras"
tf.keras.models.save_model(trans, model_path, include_optimizer=True, save_format="keras")
print(f"✅ Model saved successfully at {model_path}")

# ✅ Evaluate Model on Test Set
test_loss, test_accuracy = trans.evaluate(X_test, y_test)
print(f"\n✅ Test Accuracy after Horizontal Flip Augmentation: {test_accuracy:.4f}")

# X_train, X_test, y_train, y_test = train_test_split(X_reshaped, play_y, test_size=0.2, random_state=42, stratify=play_y)
# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42, stratify=y_train)



# trans.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_val, y_val), callbacks=[early_stop])

# # ✅ Save Model Correctly
# model_path = "connect4_transformer_fixed.keras"
# tf.keras.models.save_model(trans, model_path, include_optimizer=True, save_format="keras")
# print(f"✅ Model saved successfully at {model_path}")

Training set: (339993, 42, 2), (339993,)
Validation set: (84999, 42, 2), (84999,)
Test set: (106248, 42, 2), (106248,)
Epoch 1/100
[1m5313/5313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 9ms/step - accuracy: 0.2840 - loss: 1.7165 - val_accuracy: 0.3765 - val_loss: 1.5078
Epoch 2/100
[1m5313/5313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 6ms/step - accuracy: 0.3794 - loss: 1.5075 - val_accuracy: 0.3995 - val_loss: 1.4653
Epoch 3/100
[1m5313/5313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 6ms/step - accuracy: 0.4013 - loss: 1.4629 - val_accuracy: 0.4155 - val_loss: 1.4380
Epoch 4/100
[1m5313/5313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 6ms/step - accuracy: 0.4216 - loss: 1.4255 - val_accuracy: 0.4387 - val_loss: 1.3886
Epoch 5/100
[1m5313/5313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 6ms/step - accuracy: 0.4383 - loss: 1.3914 - val_accuracy: 0.4542 - val_loss: 1.3600
Epoch 6/100
[1m5313/5313[0m [32m━━━━━━━━━━━━━━━━



✅ Model saved successfully at connect4_transformer_augmented.keras
[1m3321/3321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.5515 - loss: 1.1251

✅ Test Accuracy after Horizontal Flip Augmentation: 0.5521
