The following code uses a 2D Convolutional Neural Network (CNN) to detect whether a page is being flipped or not based on a single image. The model consists of two convolutional layers, each using three 3×3 filters, followed by max pooling layers to reduce spatial dimensions and retain the most important features.

During training, the input images are processed in batches of 32 images, resulting in 75 batches per epoch. For each batch, the model performs forward propagation, computes the loss, and updates the filter and dense layer weights using gradient descent. This process is repeated across 10 epochs, with the learned weights from each epoch carried forward to the next.
After each epoch, the model is evaluated on a separate testing dataset to compute validation accuracy and validation loss. These testing images are not used for learning and do not affect the model’s weights. After training is complete, the final learned weights are used to generate predictions on the full test dataset.

The model achieves a Test F1 score of 98.21%, indicating strong performance. The F1 score is an appropriate evaluation metric for this problem because it balances precision and recall, accounting for both false positives (incorrectly detecting a page flip) and false negatives (missing an actual page flip), which are both important in the context of page flip detection.

In [1]:
# All imports required for the model
import os
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import f1_score


def clean_path(path_input):
    """
    Remove unwanted quotes from copied macOS folder paths.
    Example input:  '/Users/...'
    Example output: /Users/...
    """
    return path_input.replace("'", "").replace('"', "").strip()


def create_generators(train_dir, test_dir, img_size, batch_size):
    """
    Create training and testing image generators for loading data.
    """

    # Normalize pixel values from [0,255] to [0,1]
    train_datagen = ImageDataGenerator(rescale=1.0 / 255.0)
    test_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

    # Generator for training images
    train_gen = train_datagen.flow_from_directory(
        directory=train_dir,      # Folder containing flip and notflip
        target_size=img_size,     # Resize images to desired size
        batch_size=batch_size,    # Number of images per batch
        class_mode="binary",      # 0 or 1 output labels
        shuffle=True              # Shuffle training images
    )

    # Generator for testing images
    test_gen = test_datagen.flow_from_directory(
        directory=test_dir,
        target_size=img_size,
        batch_size=batch_size,
        class_mode="binary",
        shuffle=False             # Keep order for F1 scoring
    )

    return train_gen, test_gen


def build_cnn_model(img_height, img_width):
    """
    Build a simple 2 layer CNN with 3 filters each.
    """

    # Sequential model builds layers in order
    model = models.Sequential([

        # First convolution layer
        layers.Conv2D(
            filters=3,                  # Number of filters
            kernel_size=(3, 3),         # Filter size
            activation="relu",          # Activation function
            input_shape=(img_height,    # Input image height
                         img_width,     # Input image width
                         3)             # RGB channels
        ),

        # First max pooling layer to reduce spatial size
        layers.MaxPooling2D(pool_size=(2, 2)),

        # Second convolution layer
        layers.Conv2D(
            filters=3,
            kernel_size=(3, 3),
            activation="relu"
        ),

        # Second pooling layer
        layers.MaxPooling2D(pool_size=(2, 2)),

        # Flatten the output for dense layers
        layers.Flatten(),

        # Dense hidden layer
        layers.Dense(32, activation="relu"),

        # Output layer with sigmoid for binary classification
        layers.Dense(1, activation="sigmoid")
    ])

    # Compile the model with optimizer, loss, metrics
    model.compile(
        optimizer="adam",
        loss="binary_crossentropy",
        metrics=["accuracy"]
    )

    return model


def evaluate_f1(model, test_gen):
    """
    Compute F1 score on the test set predictions.
    """

    # Reset generator to start from first image
    test_gen.reset()

    # Predict probabilities for each test image
    y_prob = model.predict(test_gen)

    # Convert probabilities to 0 or 1 predictions
    y_pred = (y_prob > 0.5).astype("int32").ravel()

    # True labels from the generator
    y_true = test_gen.classes

    # Calculate F1 score
    return f1_score(y_true, y_pred)


# ===============================
# MAIN EXECUTION CELL
# ===============================

# Image size for resizing images
img_height = 128
img_width = 128
img_size = (img_height, img_width)

# Number of images per training batch
batch_size = 32

# Number of training epochs
epochs = 10

# Paste your folders here (with or without quotes)
raw_train_dir = input("Paste training folder path: ")
raw_test_dir = input("Paste testing folder path: ")

# Clean the folder paths to remove quotes
train_dir = clean_path(raw_train_dir)
test_dir = clean_path(raw_test_dir)

# Create data generators for loading images
train_gen, test_gen = create_generators(
    train_dir, test_dir, img_size, batch_size
)

# Display class mapping (flip or notflip)
print("Class indices:", train_gen.class_indices)

# Build the CNN model
model = build_cnn_model(img_height, img_width)

# Print the model architecture
model.summary()

# Train the model
model.fit(
    train_gen,
    epochs=epochs,
    validation_data=test_gen
)

# Compute F1 score
f1 = evaluate_f1(model, test_gen)
print(f"Test F1 score: {f1:.4f}")


Found 2392 images belonging to 2 classes.
Found 597 images belonging to 2 classes.
Class indices: {'flip': 0, 'notflip': 1}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-12-09 13:20:04.527322: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2025-12-09 13:20:04.527492: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-12-09 13:20:04.527503: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-12-09 13:20:04.527700: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-12-09 13:20:04.527712: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/10


2025-12-09 13:20:05.597728: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 248ms/step - accuracy: 0.5895 - loss: 0.6586 - val_accuracy: 0.6466 - val_loss: 0.6148
Epoch 2/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 239ms/step - accuracy: 0.7768 - loss: 0.4828 - val_accuracy: 0.8727 - val_loss: 0.3748
Epoch 3/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 239ms/step - accuracy: 0.9130 - loss: 0.2568 - val_accuracy: 0.9363 - val_loss: 0.2060
Epoch 4/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 238ms/step - accuracy: 0.9490 - loss: 0.1712 - val_accuracy: 0.8911 - val_loss: 0.2590
Epoch 5/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 238ms/step - accuracy: 0.9678 - loss: 0.1090 - val_accuracy: 0.9598 - val_loss: 0.1308
Epoch 6/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 238ms/step - accuracy: 0.9833 - loss: 0.0635 - val_accuracy: 0.9782 - val_loss: 0.0852
Epoch 7/10
[1m75/75[0m [32m━━━