Make that we are running the correct version of TensorFlow first

In [1]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import tensorflow as tf
tf.__version__

'2.8.2'

In [3]:
import sys

assert sys.version_info >= (3, 6) # Python ≥3.6 required
assert tf.__version__ >= "2.0"    # TensorFlow ≥2.0 required

In [4]:
sys.path.insert(0, "/content/drive/MyDrive/line-reader")

In [5]:
import recognizer

# Train and use lenet or simple networks for character predication

Steps are roughly:
- Create emnist dataset object to batch, shuffle, prefetch, etc...
- Create a network to experiment with.
- Normalize the input before passing it to the network.
- Use expontial decay learning rate.
- We could also augment the training image based on some random rules.
- Save model checkpoints every few epochs.
- Save the trained model weights at the end of the training.

In [6]:
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import sys
# To be able to write code as if this notebook was one level up in files tree structure.
sys.path.append('..')

from pathlib import Path

import tensorflow as tf
from tensorflow.data import Dataset
from tensorflow import keras

In [7]:
!pip install toml

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [8]:
# Import traps: http://python-notes.curiousefficiency.org/en/latest/python_concepts/import_traps.html
from recognizer.datasets import EmnistDataset
from recognizer.networks import lenet
from recognizer.networks import simple
from recognizer.networks import NetworkInput

Hyperparameters

In [9]:
batch_size = 128
epochs = 16

Setup dataset

In [10]:
emnist = EmnistDataset()

train_dataset = emnist.train_dataset.shuffle(1024).batch(batch_size) # .repeat()

test_dataset = emnist.test_dataset.batch(batch_size)

Download path: /content/drive/MyDrive/line-reader/data/cache/datasets/matlab.zip
Processing data...
Balancing train dataset...
Target max number of images per class: 21635892
Dataset ready, with 1395864 training entries and 116323 test entries


In [11]:
type(emnist)

recognizer.datasets.emnist_dataset.EmnistDataset

In [12]:
type(train_dataset)

tensorflow.python.data.ops.dataset_ops.BatchDataset

> The tf.data API provides a software pipelining mechanism through the tf.data.Dataset.prefetch transformation, which can be used to decouple the time when data is produced from the time when data is consumed. In particular, the transformation uses a background thread and an internal buffer to prefetch elements from the input dataset ahead of the time they are requested. The number of elements to prefetch should be equal to (or possibly greater than) the number of batches consumed by a single training step. You could either manually tune this value, or set it to tf.data.experimental.AUTOTUNE which will prompt the tf.data runtime to tune the value dynamically at runtime.
~[https://www.tensorflow.org/alpha/guide/data_performance](https://www.tensorflow.org/alpha/guide/data_performance)

In [13]:
train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

In [14]:
train_dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None, 62), dtype=tf.int64, name=None))>

In [15]:
type(train_dataset)

tensorflow.python.data.ops.dataset_ops.PrefetchDataset

In [16]:
(x_train, y_train), = train_dataset.take(1)
input_shape = tuple(x_train[0].shape)
print(f"x shape: {x_train.shape}, model input shape: {input_shape}")

x shape: (128, 28, 28, 1), model input shape: (28, 28, 1)


## Quickly fit one batch and that everything is working as expected check 

In [22]:
#from recognizer.networks import lenet, simple

#(x_test, y_test), = test_dataset.take(1)

#model = lenet(input_shape=input_shape, number_of_classes=emnist.number_of_classes)
#model = simple(input_shape=input_shape, number_of_classes=emnist.number_of_classes)

#model.compile(optimizer='adam',
#              loss='categorical_crossentropy',
#              metrics=['accuracy'])

#model.fit(x=x_train, y=y_train, epochs=5)
#model.evaluate(x_test, y_test)

# Model training

[Get started with TensorFlow 2.0 for experts](https://www.tensorflow.org/alpha/tutorials/quickstart/advanced)

[Training and Evaluation with TensorFlow Keras](https://www.tensorflow.org/alpha/guide/keras/training_and_evaluation)

In [None]:
model_checkpoints_path = Path("../recognizer/ckpts/character_model")
model_checkpoints_path.mkdir(parents=True, exist_ok=True)
model_save_path = Path("../recognizer/weights/character_model.h5")

In [None]:
loss_object = tf.keras.losses.CategoricalCrossentropy()

initial_learning_rate = 0.01
learning_rate_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

optimizer = keras.optimizers.RMSprop(learning_rate=learning_rate_schedule)

In [None]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')

In [None]:
@tf.function
def train_step(images, labels):
    with tf.GradientTape() as tape:
        predictions = model(images)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_accuracy(labels, predictions)

In [None]:
@tf.function
def test_step(images, labels):
    predictions = model(images)
    t_loss = loss_object(labels, predictions)

    test_loss(t_loss)
    test_accuracy(labels, predictions)

In [None]:
model = lenet(NetworkInput(input_shape=input_shape, mean=emnist.mean, std=emnist.std, number_of_classes=emnist.number_of_classes))
# model = simple(NetworkInput(input_shape=input_shape, number_of_classes=emnist.number_of_classes))

model.summary()

In [None]:
ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optimizer, net=model)
manager = tf.train.CheckpointManager(ckpt, model_checkpoints_path, max_to_keep=3)
ckpt.restore(manager.latest_checkpoint)
if manager.latest_checkpoint:
    print(f"Restored from {manager.latest_checkpoint}")
else:
    print("Initializing from scratch.")

In [None]:
for epoch in range(1, epochs + 1):
    for images, labels in train_dataset:
        train_step(images, labels)
    
    for test_images, test_labels in test_dataset:
        test_step(test_images, test_labels)
    
    ckpt.step.assign_add(1)
    if int(ckpt.step) % 10 == 0:
        save_path = manager.save()
        print(f"💾 Saved checkpoint for step {int(ckpt.step)}: {save_path}")
        
    print(f"Epoch {epoch}, "\
#           f"Current learning rate: {optimizer._lr}, "\
          f"Loss: {train_loss.result()}, Accuracy: {train_accuracy.result()*100}, "\
          f"Test Loss: {test_loss.result()}, Test Accuracy: {test_accuracy.result()*100}")

In [None]:
# Next time, start from a clean slate
!rm -r {model_checkpoints_path}

# Save the model

In [None]:
model.save(model_save_path)

# Predict

In [None]:
model = keras.models.load_model(model_save_path, compile=False)
# model.trainable = False
# model.compile(optimizer='adam',
#               loss='categorical_crossentropy',
#               metrics=['accuracy'])

model.summary()

In [None]:
(x_test, y_test), = test_dataset.take(1)

predictions = model.predict(x_test[:2])
print('predictions shape:', predictions.shape)

In [None]:
predictions

In [None]:
predictions.argmax(axis=1)