In [None]:
from __future__ import print_function
import tensorflow as tf
import random
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

random.seed(101)

# Function to build synthetic data
def build_synth_data(data, labels, dataset_size):
    synth_img_height = 224
    synth_img_width = 224

    synth_data = []
    synth_labels = []

    for i in range(dataset_size):
        num_digits = random.randint(1, 5)
        synth_indices = [random.randint(0, len(data)-1) for p in range(num_digits)]
        new_small_image = np.hstack([data[index] for index in synth_indices])

        starting_left = random.randint(1, synth_img_width-(num_digits*28))
        starting_bottom = random.randint(28, synth_img_height-1)
        starting_right = starting_left + num_digits*28
        starting_top = starting_bottom - 28

        new_label = [starting_left, starting_top, starting_right, starting_bottom]

        left_zeros = np.empty(shape=[28, starting_left])
        right_zeros = np.empty(shape=[28, synth_img_width - starting_left - (28*num_digits)])
        bottom_zeros = np.empty(shape=[synth_img_height-starting_bottom, synth_img_height])
        top_zeros = np.empty(shape=[starting_top, synth_img_height])

        new_image = np.hstack([left_zeros, new_small_image])
        new_image = np.hstack([new_image, right_zeros])
        new_image = np.vstack([new_image, bottom_zeros])
        new_image = np.vstack([top_zeros, new_image])

        synth_data.append(new_image)
        synth_labels.append(new_label)

    return synth_data, synth_labels

# Function to prepare data for Keras
def prep_data_keras(img_data):
    synth_img_height = 224
    synth_img_width = 224

    # Handle NaNs/Infs and ensure data is clipped between 0 and 255
    img_data = np.nan_to_num(img_data, nan=0.0, posinf=255.0, neginf=0.0)
    img_data = np.clip(img_data, 0, 255)

    # Normalize the images
    img1 = np.array(img_data, dtype="float32") / 255.0
    img2 = np.array(img_data, dtype="float32") / 255.0
    img3 = np.array(img_data, dtype="float32") / 255.0
    img_data = np.concatenate((img1, img2, img3), axis=2)
    img_data = img_data.reshape(len(img_data), synth_img_height, synth_img_width, 3)

    return img_data

def convert_labels(labels):
    targets = np.array(labels, dtype="float32")
    return targets

# Load MNIST data
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_synth_train, y_synth_train = build_synth_data(X_train, y_train, 60)
X_synth_test, y_synth_test = build_synth_data(X_test, y_test, 10)

train_labels = convert_labels(y_synth_train)
test_labels = convert_labels(y_synth_test)

train_images = prep_data_keras(X_synth_train)
test_images = prep_data_keras(X_synth_test)

# Manually split data into training and validation sets
train_images_split, val_images_split, train_labels_split, val_labels_split = train_test_split(train_images, train_labels, test_size=0.1)

# Data Augmentation using Keras ImageDataGenerator
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    fill_mode='nearest'
)

# Create separate generators for training and validation
train_generator = datagen.flow(train_images_split, train_labels_split, batch_size=16)
val_generator = datagen.flow(val_images_split, val_labels_split, batch_size=16)

# Load VGG16 for bounding box prediction
vgg = tf.keras.applications.VGG16(weights="imagenet", include_top=False, input_tensor=tf.keras.Input(shape=(224, 224, 3)))
vgg.trainable = False

# Bounding box regression model
flatten = vgg.output
flatten = tf.keras.layers.Flatten()(flatten)
bboxHead = tf.keras.layers.Dense(128, activation="relu")(flatten)
bboxHead = Dropout(0.3)(bboxHead)  # Add dropout
bboxHead = tf.keras.layers.Dense(32, activation="relu")(bboxHead)
bboxHead = Dropout(0.3)(bboxHead)  # Add dropout
bboxHead = tf.keras.layers.Dense(4, activation="sigmoid")(bboxHead)
model = tf.keras.models.Model(inputs=vgg.input, outputs=bboxHead)

# Adjust steps_per_epoch and validation_steps
steps_per_epoch = len(train_images_split) // 16
validation_steps = len(val_images_split) // 16

# Compile and train the model
INIT_LR = 5e-5  # Lowered from 1e-4
opt = tf.keras.optimizers.Adam(learning_rate=INIT_LR)
model.compile(loss="mse", optimizer=opt)

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
lr_reduction = ReduceLROnPlateau(monitor='val_loss', patience=2, factor=0.5, min_lr=1e-6)

model.fit(train_generator,
          steps_per_epoch=steps_per_epoch,
          validation_data=val_generator,
          validation_steps=validation_steps,
          epochs=5,
          verbose=1,
          callbacks=[early_stopping, lr_reduction])

# Extract digits from bounding boxes
def extract_digits_from_bbox(images, bboxes):
    digits = []
    for img, bbox in zip(images, bboxes):
        x1, y1, x2, y2 = bbox

        # Ensure the coordinates are within the image bounds
        x1 = max(0, int(x1))
        y1 = max(0, int(y1))
        x2 = min(img.shape[1], int(x2))
        y2 = min(img.shape[0], int(y2))

        # Ensure the bounding box is valid (non-zero area)
        if x2 > x1 and y2 > y1:
            digit_img = img[y1:y2, x1:x2]
            digit_img = tf.image.resize(digit_img, (28, 28))
            digits.append(digit_img)
        else:
            # Handle the case where the bounding box is invalid
            # You could append a placeholder or skip this digit
            print(f"Invalid bounding box: {bbox}, skipping...")
            digits.append(np.zeros((28, 28, 3)))  # Placeholder for invalid bboxes

    return np.array(digits)


# Create CNN+RNN model for digit classification
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, LSTM, Dense, TimeDistributed

cnn_rnn_model = Sequential()
cnn_rnn_model.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding='same'), input_shape=(5, 28, 28, 3)))
cnn_rnn_model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2), padding='same')))
cnn_rnn_model.add(Dropout(0.3))  # Add dropout
cnn_rnn_model.add(TimeDistributed(Flatten()))
cnn_rnn_model.add(LSTM(50, return_sequences=True))
cnn_rnn_model.add(Dropout(0.3))  # Add dropout
cnn_rnn_model.add(TimeDistributed(Dense(10, activation='softmax')))

cnn_rnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
cnn_rnn_model.summary()

# After training bounding box model, predict and extract digits
# After training bounding box model, predict and extract digits
predicted_bboxes = model.predict(test_images)
digit_images = extract_digits_from_bbox(test_images, predicted_bboxes)

# Count valid digit images
num_valid_digits = len([img for img in digit_images if img is not None])

# Ensure we have enough digits and corresponding labels
expected_digits_per_image = 5

if num_valid_digits < expected_digits_per_image:
    print(f"Not enough valid digits extracted: {num_valid_digits}")
else:
    # Calculate the number of full sets of digits
    num_full_sets = num_valid_digits // expected_digits_per_image
    total_digits_needed = num_full_sets * expected_digits_per_image

    # Select only the valid digit images
    digit_images = np.array([img for img in digit_images if img is not None][:total_digits_needed])

    # Corresponding labels should match the number of valid digit images
    digit_labels = to_categorical(y_test[:total_digits_needed], 10).reshape((num_full_sets, expected_digits_per_image, 10))

    # Train CNN+RNN model on the extracted digits
    H = cnn_rnn_model.fit(digit_images, digit_labels, validation_split=0.1, epochs=5, verbose=1)

    # Save the trained model
    cnn_rnn_model.save("cnn_rnn_model.h5", save_format="h5")

    # Evaluate the model
    test_score = cnn_rnn_model.evaluate(digit_images, digit_labels, verbose=0)
    print(f"Test Loss: {test_score[0]}, Test Accuracy: {test_score[1]}")


In [None]:
import tensorflow as tf
from keras.models import load_model
tf.keras.utils.plot_model(model, show_shapes=True)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Set the number of epochs
N = len(H.history['loss'])

# Plot the training and validation loss
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
plt.title("CNN+RNN Model Loss on Training Set")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.legend(loc="lower left")
plt.show()