In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import os

In [2]:
import tensorflow_datasets as tfds

In [3]:
dataset, info = tfds.load('mnist',as_supervised = True,split =  [
        tfds.Split.TRAIN.subsplit(tfds.percent[:80]),
        tfds.Split.TRAIN.subsplit(tfds.percent[80:90]),
        tfds.Split.TRAIN.subsplit(tfds.percent[90:]),
    ],
    with_info = True)



In [4]:
dataset

[<_OptionsDataset shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>,
 <_OptionsDataset shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>,
 <_OptionsDataset shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>]

In [5]:
train_dataset = dataset[0]
test_dataset = dataset[1]
valid_dataset = dataset[2]

In [7]:
for image,label in train_dataset.batch(16).take(4):
    print(type(image),type(label))

<class 'tensorflow.python.framework.ops.EagerTensor'> <class 'tensorflow.python.framework.ops.EagerTensor'>
<class 'tensorflow.python.framework.ops.EagerTensor'> <class 'tensorflow.python.framework.ops.EagerTensor'>
<class 'tensorflow.python.framework.ops.EagerTensor'> <class 'tensorflow.python.framework.ops.EagerTensor'>
<class 'tensorflow.python.framework.ops.EagerTensor'> <class 'tensorflow.python.framework.ops.EagerTensor'>


In [8]:
info

tfds.core.DatasetInfo(
    name='mnist',
    version=1.0.0,
    description='The MNIST database of handwritten digits.',
    homepage='http://yann.lecun.com/exdb/mnist/',
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=tf.uint8),
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=10),
    }),
    total_num_examples=70000,
    splits={
        'test': 10000,
        'train': 60000,
    },
    supervised_keys=('image', 'label'),
    citation="""@article{lecun2010mnist,
      title={MNIST handwritten digit database},
      author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
      journal={ATT Labs [Online]. Available: http://yann. lecun. com/exdb/mnist},
      volume={2},
      year={2010}
    }""",
    redistribution_info=,
)

In [9]:
def preprocess(image,label):
    #to grayscale
    image = tf.dtypes.cast(image, tf.float32)
    label = tf.dtypes.cast(label, tf.float32)
    
    #resize
    image = tf.image.per_image_standardization(image)
    
    return image,label
    

In [10]:
train_dataset = train_dataset.map(preprocess)
test_dataset = test_dataset.map(preprocess)
valid_dataset = valid_dataset.map(preprocess)


Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


In [11]:
for image, label in train_dataset.batch(16).take(4):
    print(image.shape, label.shape, label)

(16, 28, 28, 1) (16,) tf.Tensor([8. 8. 3. 0. 0. 4. 0. 6. 1. 1. 7. 1. 9. 5. 0. 6.], shape=(16,), dtype=float32)
(16, 28, 28, 1) (16,) tf.Tensor([1. 2. 0. 1. 9. 4. 3. 0. 4. 6. 6. 8. 3. 4. 0. 3.], shape=(16,), dtype=float32)
(16, 28, 28, 1) (16,) tf.Tensor([3. 0. 7. 5. 6. 4. 4. 2. 8. 2. 9. 2. 2. 1. 3. 1.], shape=(16,), dtype=float32)
(16, 28, 28, 1) (16,) tf.Tensor([2. 2. 7. 5. 1. 9. 5. 6. 3. 3. 2. 1. 0. 0. 5. 1.], shape=(16,), dtype=float32)


In [12]:
class LeNet(tf.keras.Model):
    def __init__(self):
        super(LeNet, self).__init__()
        initializer = tf.initializers.GlorotUniform(seed=123)
        # Conv1
        self.wc1 = tf.Variable(initializer([3, 3, 1, 6]), trainable=True, name='wc1')
        
        # Conv2
        self.wc2 = tf.Variable(initializer([3, 3, 6, 16]), trainable=True, name='wc2')
        
        # Flatten
        
        # Dense
        self.wd3 = tf.Variable(initializer([400, 128]), trainable=True)
        self.wd4 = tf.Variable(initializer([128, 64]), trainable=True)        
        self.wd5 = tf.Variable(initializer([64, 10]), trainable=True)
        
        self.bc1 = tf.Variable(tf.zeros([6]), dtype=tf.float32, trainable=True)
        self.bc2 = tf.Variable(tf.zeros([16]), dtype=tf.float32, trainable=True)
        self.bd3 = tf.Variable(tf.zeros([128]), dtype=tf.float32, trainable=True)
        self.bd4 = tf.Variable(tf.zeros([64]), dtype=tf.float32, trainable=True)        
        self.bd5 = tf.Variable(tf.zeros([10]), dtype=tf.float32, trainable=True)   
    
    def call(self, x):
        # X = NHWC 
        # Conv1 + maxpool 2
        x = tf.nn.conv2d(x, self.wc1, strides=[1, 1, 1, 1], padding="VALID")
        x = tf.nn.bias_add(x, self.bc1)
        x = tf.nn.relu(x)
        x = tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
        
        # Conv2 + maxpool 2
        x = tf.nn.conv2d(x, self.wc2, strides=[1, 1, 1, 1], padding="VALID")
        x = tf.nn.bias_add(x, self.bc2)
        x = tf.nn.relu(x)
        x = tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
        
        # Flattten out
        # N X Number of Nodes
        # Flatten()
        x = tf.reshape(x, (tf.shape(x)[0], -1))
        
        # Dense1
        x = tf.matmul(x, self.wd3)
        x = tf.nn.bias_add(x, self.bd3)
        x = tf.nn.relu(x)

        
        # Dense2
        x = tf.matmul(x, self.wd4)
        x = tf.nn.bias_add(x, self.bd4)
        x = tf.nn.relu(x)
        
        
        # Dense3
        x = tf.matmul(x, self.wd5)
        x = tf.nn.bias_add(x, self.bd5)
#         x = tf.nn.sigmoid(x)
        
        return x

In [13]:
model = LeNet()

In [14]:
ce_loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [15]:
LEARNING_RATE = 0.0001
EPOCHS = 5
BATCH_SIZE = 32

In [16]:
optimizer = tf.optimizers.Adam(learning_rate= LEARNING_RATE)

In [17]:
def train_step(model, inputs, labels, loss_fn, optimzer):
    with tf.GradientTape() as t:
        y_predicted = model(inputs, training=True)
        current_loss = loss_fn(labels, y_predicted)
        
        gradients = t.gradient(current_loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        
    return current_loss

In [18]:
def valid_step(model, inputs, labels, loss_fn):
    y_predicted = model(inputs, training=False)
    current_loss = loss_fn(labels, y_predicted)
    return current_loss

In [19]:
train_dataset = train_dataset.batch(BATCH_SIZE)
valid_dataset = valid_dataset.batch(BATCH_SIZE)

In [20]:
losses = tf.keras.metrics.Mean(name='loss')
val_losses = tf.keras.metrics.Mean(name='val_loss')

In [21]:
current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = f'./temp/train/{current_time}/logs'
file_writer = tf.summary.create_file_writer(train_log_dir)

In [22]:
# Checkpoint
# callbacks = [tf.keras.Checkpoint(...)]
checkpoint_dir = f'./temp/train/checkpoints/'

In [39]:
ckpt = tf.train.Checkpoint(step = tf.Variable(1), optimizer=optimizer, net=model)
manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=3)

In [40]:
def check_for_checkpoint(manager):
    ckpt.restore(manager.latest_checkpoint)
    if manager.latest_checkpoint:
        print(f"restored from {manager.latest_checkpoint}")
    else:
        print("Initializing from scratch")

In [45]:
check_for_checkpoint(manager)

restored from ./temp/train/checkpoints/ckpt-3


In [47]:
for epoch in range(EPOCHS):
    ckpt.step.assign_add(1)

    print(f'epoch: {epoch}')
    losses.reset_states()
    val_losses.reset_states()
    for x_batch, y_batch in train_dataset:
        loss = train_step(model, x_batch, y_batch, ce_loss, optimizer)
        losses(loss)
#         step += 1
    
    save_path = manager.save()
    print("Saved checkpoint for step {}: {}".format(int(ckpt.step), save_path))

    with file_writer.as_default():
        tf.summary.scalar('loss', losses.result(), step=epoch)
        tf.summary.image('Input images', x_batch, step=epoch)

    print(losses.result())
        
    for x_batch, y_batch in valid_dataset:
        val_loss = valid_step(model, x_batch, y_batch, ce_loss)
        val_losses(val_loss)
    
    with file_writer.as_default():
        tf.summary.scalar('val_loss', val_losses.result(), step=epoch)

epoch: 0
Saved checkpoint for step 6: ./temp/train/checkpoints/ckpt-4
tf.Tensor(0.08000128, shape=(), dtype=float32)
epoch: 1
Saved checkpoint for step 7: ./temp/train/checkpoints/ckpt-5
tf.Tensor(0.06830396, shape=(), dtype=float32)
epoch: 2
Saved checkpoint for step 8: ./temp/train/checkpoints/ckpt-6
tf.Tensor(0.059911564, shape=(), dtype=float32)
epoch: 3
Saved checkpoint for step 9: ./temp/train/checkpoints/ckpt-7
tf.Tensor(0.053467274, shape=(), dtype=float32)
epoch: 4
Saved checkpoint for step 10: ./temp/train/checkpoints/ckpt-8
tf.Tensor(0.0481898, shape=(), dtype=float32)


In [48]:
def predict(inputs):
    predicted = model(inputs)
    return tf.nn.softmax(predicted)

In [51]:
for sample, label in test_dataset.batch(16).take(4):
    predictions = predict(sample)
    print(tf.argmax(predictions, axis=1), label)

tf.Tensor([9 5 8 0 4 1 4 9 8 4 8 7 9 3 8 0], shape=(16,), dtype=int64) tf.Tensor([9. 5. 6. 0. 4. 1. 4. 9. 8. 4. 8. 7. 9. 3. 8. 0.], shape=(16,), dtype=float32)
tf.Tensor([0 4 3 2 0 5 7 3 9 4 7 4 7 6 9 8], shape=(16,), dtype=int64) tf.Tensor([0. 4. 3. 2. 0. 5. 7. 3. 9. 4. 7. 4. 7. 6. 9. 8.], shape=(16,), dtype=float32)
tf.Tensor([3 9 3 6 5 0 3 8 9 8 0 2 9 0 9 9], shape=(16,), dtype=int64) tf.Tensor([3. 9. 3. 6. 5. 0. 3. 8. 9. 8. 0. 2. 9. 0. 9. 9.], shape=(16,), dtype=float32)
tf.Tensor([1 5 9 5 9 3 3 2 9 7 0 8 0 3 8 5], shape=(16,), dtype=int64) tf.Tensor([1. 5. 9. 5. 9. 3. 3. 2. 9. 7. 0. 8. 0. 3. 8. 5.], shape=(16,), dtype=float32)


In [55]:
model_dir = f'./temp/train/models/'
weights_path = os.path.join(model_dir, 'weights.h5')

In [56]:
model.save_weights(weights_path)