# Training an MNIST

In [1]:
import tensorflow as tf
print(tf.__version__)

2.2.0


In [0]:
import numpy as np

In [0]:
import os

In [0]:
import tensorflow_datasets as tfds

In [4]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [0]:
from tensorflow.keras import Sequential
from tensorflow.keras import layers

In [0]:
from tensorflow.keras.callbacks import ModelCheckpoint

In [7]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)

(60000, 28, 28)
(10000, 28, 28)
(60000,)


In [8]:
print(type(x_train))

<class 'numpy.ndarray'>


In [9]:
print(type(x_test))

<class 'numpy.ndarray'>


In [0]:
x_train = np.expand_dims(x_train, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)

In [11]:
print(x_train.shape)
print(x_train.dtype)
print(x_test.dtype)
print(x_test.shape)
print(y_train.shape)

(60000, 28, 28, 1)
uint8
uint8
(10000, 28, 28, 1)
(60000,)


In [0]:
num_classes = 10

In [13]:
x_test = x_test.astype(np.float32)
x_test = x_test / 255.
print(type(x_test))
print(type(x_train))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [0]:
# convert class vectors to binary class matrices
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [15]:
print(type(x_train))

<class 'numpy.ndarray'>


# Do some data Augmentation

In [0]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(featurewise_center=False, samplewise_center=False, 
                                                          featurewise_std_normalization=False, samplewise_std_normalization=False, 
                                                          zca_whitening=False, zca_epsilon=1e-06, 
                                                          rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, 
                                                          brightness_range=None, shear_range=0.2, zoom_range=0.3, channel_shift_range=0.0, 
                                                          fill_mode='nearest', cval=0.0, horizontal_flip=False, 
                                                          vertical_flip=False, rescale=1/255., preprocessing_function=None, data_format=None, 
                                                          validation_split=0.2, dtype=None)

In [0]:
train_datagen.fit(x_train)

# Build a CNN

In [0]:
def make_cnn():
    model = Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.3))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))

    model.add(layers.Dropout(0.3))
    model.add(layers.Flatten())
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dropout(0.3))
    model.add(layers.Dense(10))

    return model

In [0]:
cnn = make_cnn()

In [0]:
cnn.summary()

In [0]:
optim = tf.keras.optimizers.Adam()

In [0]:
loss_f = tf.keras.losses.BinaryCrossentropy(from_logits=True)

In [0]:
cnn.compile(optimizer=optim, loss=loss_f, metrics=["acc"])

In [0]:
epochs = 25

In [0]:
batch_size = 32

In [0]:
checkpoint_path = "training_2/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)


In [0]:
ckpt = ModelCheckpoint(checkpoint_path, monitor="val_loss", verbose=1, save_best_only=True)
callbacksl = [ckpt]

In [30]:
history = cnn.fit(
    train_datagen.flow(x_train, y_train, batch_size=32),
    steps_per_epoch= x_train.shape[0] // batch_size,
    epochs=epochs, batch_size = batch_size, callbacks=callbacksl, validation_data = (x_test, y_test)
    )

Epoch 1/25
Epoch 00001: val_loss improved from inf to 0.02270, saving model to training_2/cp-0001.ckpt
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: training_2/cp-0001.ckpt/assets
Epoch 2/25
Epoch 00002: val_loss improved from 0.02270 to 0.01305, saving model to training_2/cp-0002.ckpt
INFO:tensorflow:Assets written to: training_2/cp-0002.ckpt/assets
Epoch 3/25
Epoch 00003: val_loss improved from 0.01305 to 0.01239, saving model to training_2/cp-0003.ckpt
INFO:tensorflow:Assets written to: training_2/cp-0003.ckpt/assets
Epoch 4/25
Epoch 00004: val_loss improved from 0.01239 to 0.01150, saving model to training_2/cp-0004.ckpt
INFO:tensorflow:Assets written to: training_2/cp-0004.ckpt/assets
Epoch 5/25
Epoch 00005: val_loss improved from 0.01150 to 0.00941, saving model to training_2/cp-0005.ckpt
INFO:tensorflow:Assets written to: training_2/cp-0005.ckpt/assets
Epoch 6/25
Epoch 00006: val_loss did not improve from 0.00

In [0]:
# # not required thoough
# tf.keras.models.save_model(
#     cnn, "/content/final_model.h5", overwrite=True, include_optimizer=True, save_format=None,
#     signatures=None, options=None
# )

In [34]:
print(checkpoint_dir)

training_2


In [0]:
# best_model =  tf.train.latest_checkpoint("content//training_2//cp-0025.ckpt//")

In [51]:
# print(best_model)

None


In [45]:
# Just to have an extra end model
cnn.save('saved_model/my_model') 

INFO:tensorflow:Assets written to: saved_model/my_model/assets


In [53]:
# Use the last checkpoint
!zip -r best_model.zip /content/training_2/cp-0025.ckpt

  adding: content/training_2/cp-0025.ckpt/ (stored 0%)
  adding: content/training_2/cp-0025.ckpt/saved_model.pb (deflated 89%)
  adding: content/training_2/cp-0025.ckpt/variables/ (stored 0%)
  adding: content/training_2/cp-0025.ckpt/variables/variables.index (deflated 69%)
  adding: content/training_2/cp-0025.ckpt/variables/variables.data-00000-of-00002 (deflated 81%)
  adding: content/training_2/cp-0025.ckpt/variables/variables.data-00001-of-00002 (deflated 14%)
  adding: content/training_2/cp-0025.ckpt/assets/ (stored 0%)
