In [1]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator # type: ignore
from tensorflow.keras.applications import MobileNetV2 # type: ignore
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D # type: ignore
from tensorflow.keras.models import Model # type: ignore
from tensorflow.keras.optimizers import Adam # type: ignore
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint # type: ignore
from config import img_width, img_height, num_classes

2024-05-31 03:50:10.960879: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Paths
train_data_dir = "dataset/training"
test_data_dir = "dataset/test"
wip_models_dir = "models_wip"
models_dir = "models"
model_name = "chess_classifier_nightly"
current_best = os.path.join(wip_models_dir, f"{model_name}_best.keras")
final = os.path.join(models_dir, f"{model_name}.keras")

# Parameters
# img_width, img_height = 224, 224
batch_size = 32
epochs = 10
# num_classes = 13  # 6 pieces x 2 colors + 1 empty

In [3]:
os.makedirs(wip_models_dir, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)

In [4]:
# Data normalization
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
)

test_datagen = ImageDataGenerator(rescale=1.0 / 255)

In [5]:
# Train and validation generators
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode="categorical",
)

validation_generator = test_datagen.flow_from_directory(
    test_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode="categorical",
)

Found 200537 images belonging to 13 classes.
Found 50135 images belonging to 13 classes.


In [6]:
# Calculate steps_per_epoch and validation_steps
# steps_per_epoch = max(1, train_generator.samples // batch_size)
# validation_steps = max(1, validation_generator.samples // batch_size)
steps_per_epoch = max(1, len(train_generator) // batch_size)
validation_steps = max(1, len(validation_generator) // batch_size)

## Creating the model

Only using a single dense layer on top of MobileNet's, could probably improve the model by adding a few more.

In [7]:
# Load pre-trained MobileNetV2 model + higher level layers
base_model = MobileNetV2(
    weights="imagenet", include_top=False, input_shape=(img_width, img_height, 3)
)

# Add custom layers on top of the base model
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation="relu")(x)
predictions = Dense(num_classes, activation="softmax")(x)

# Final model
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)

In [8]:
# Callbacks
early_stopping = EarlyStopping(
    monitor="val_loss", patience=10, restore_best_weights=True
)
model_checkpoint = ModelCheckpoint(
    current_best, monitor="val_loss", save_best_only=True
)

## Initial training

In [9]:
# Train the model
model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    validation_data=validation_generator,
    validation_steps=validation_steps,
    epochs=epochs,
    callbacks=[early_stopping, model_checkpoint],
)

Epoch 1/10


  self._warn_if_super_not_called()


[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m177s[0m 884ms/step - accuracy: 0.8550 - loss: 0.4481 - val_accuracy: 0.9896 - val_loss: 0.0280
Epoch 2/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m175s[0m 900ms/step - accuracy: 0.9854 - loss: 0.0394 - val_accuracy: 0.9811 - val_loss: 0.0511
Epoch 3/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 945ms/step - accuracy: 0.9879 - loss: 0.0361 - val_accuracy: 0.9889 - val_loss: 0.0248
Epoch 4/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m194s[0m 994ms/step - accuracy: 0.9907 - loss: 0.0264 - val_accuracy: 0.9954 - val_loss: 0.0146
Epoch 5/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m167s[0m 857ms/step - accuracy: 0.9870 - loss: 0.0390 - val_accuracy: 0.9935 - val_loss: 0.0205
Epoch 6/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 745ms/step - accuracy: 0.9925 - loss: 0.0220 - val_accuracy: 0.9954 - val_loss: 0.0168
Epoch 7/10
[1m

<keras.src.callbacks.history.History at 0x147531090>

## Fine tuning

Only the top layers were trainable in the initial training. In order to improve predictions, we unfreeze a few ImageNet layers and retrain the model with a much lower learning rate.

This might be counter-productive, not sure, needs more testing.

Either way, the best weights are used.

In [10]:
# Unfreeze some layers for fine-tuning
for layer in base_model.layers[-30:]:  # Unfreeze last 30 layers
    layer.trainable = True

# Recompile the model with a lower learning rate
model.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)

# Fine-tune the model
model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    validation_data=validation_generator,
    validation_steps=validation_steps,
    epochs=epochs,
    callbacks=[early_stopping, model_checkpoint],
)

Epoch 1/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 979ms/step - accuracy: 0.8641 - loss: 0.6391 - val_accuracy: 1.0000 - val_loss: 0.0025
Epoch 2/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 954ms/step - accuracy: 0.9798 - loss: 0.0619 - val_accuracy: 0.9967 - val_loss: 0.0105
Epoch 3/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m188s[0m 963ms/step - accuracy: 0.9891 - loss: 0.0386 - val_accuracy: 0.9993 - val_loss: 0.0033
Epoch 4/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 956ms/step - accuracy: 0.9889 - loss: 0.0319 - val_accuracy: 0.9980 - val_loss: 0.0057
Epoch 5/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m192s[0m 983ms/step - accuracy: 0.9937 - loss: 0.0188 - val_accuracy: 0.9987 - val_loss: 0.0053
Epoch 6/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m190s[0m 975ms/step - accuracy: 0.9947 - loss: 0.0162 - val_accuracy: 0.9967 - val_loss: 0.0080
Epoc

<keras.src.callbacks.history.History at 0x1489c0cd0>

In [11]:
# Save the final model
model.save(final)

In [12]:
# Evaluate the final model
loss, accuracy = model.evaluate(validation_generator, steps=validation_steps)
print(f"Test accuracy: {accuracy:.4f}")

[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 580ms/step - accuracy: 0.9989 - loss: 0.0028
Test accuracy: 0.9987


In [13]:
model.summary()