In [1]:
# Set up deterministic flag
import tensorflow as tf
import os
os.environ["TF_DETERMINISTIC_OPS"] = "1"

In [2]:
# Fix random seeds
SEED = 666
tf.random.set_seed(SEED)
import numpy as np
np.random.seed(SEED)

In [3]:
# Import wandb
import wandb
from wandb.keras import WandbCallback

In [4]:
# Other imports
import time

In [5]:
# Load up and preprocess data
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

train_images = train_images / 255.0
test_images = test_images / 255.0

In [6]:
# Verify shapes
train_images.shape, train_labels.shape, test_images.shape, test_labels.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [7]:
# Specify the class labels
LABELS = ["T-shirt/top","Trouser","Pullover","Dress","Coat",
        "Sandal","Shirt","Sneaker","Bag","Ankle boot"]

In [8]:
# Define model configurations in a dictionary
config_defaults = {
        "epochs": 10,
        "batch_size": 128,
        "prefinal_activation": "relu",
        "final_activation": "softmax",
        "optimizer": "adam",
        'seed': 42
}

In [9]:
# Initialize a new wandb run
wandb.init(project="reproducible-ml", id="save-restore-exp", config=config_defaults)

# Config is a variable that holds and saves hyperparameters and inputs
config = wandb.config

Error generating diff: Reference at 'refs/remotes/origin/master' does not exist


In [10]:
# Define model using sub-classing
class MLPModel(tf.keras.models.Model):
    def __init__(self, classes):
        super(MLPModel, self).__init__()
        self.classes = classes

        self.flatten = tf.keras.layers.Flatten(input_shape=(28, 28))
        self.dense_1 = tf.keras.layers.Dense(256)
        self.final = tf.keras.layers.Dense(classes)

        self.relu = tf.keras.layers.Activation(config.prefinal_activation)
        self.softmax = tf.keras.layers.Activation(config.final_activation)

    def call(self, inputs):
        x = self.flatten(inputs)
        x = self.dense_1(x)
        x = self.relu(x)
        x = self.final(x)
        x = self.softmax(x)

        return x

From [TensorFlow documentation](https://www.tensorflow.org/guide/keras/save_and_serialize#part_ii_saving_and_loading_of_subclassed_models):

"First of all, a subclassed model that has never been used cannot be saved.

That's because a subclassed model needs to be called on some data in order to create its weights.

Until the model has been called, it does not know the shape and dtype of the input data it should be expecting, and thus cannot create its weight variables. You may remember that in the Functional model from the first section, the shape and dtype of the inputs was specified in advance (via keras.Input(...)) -- that's why Functional models have a state as soon as they're instantiated."

In [14]:
# Create a sepcific validation set from the test set
X_val = []
y_val = []

idx = np.random.choice(test_images.shape[0], 32)
for i in idx:
    X_val.append(test_images[i])
    y_val.append(test_labels[i])
    
X_val, y_val = np.array(X_val), np.array(y_val)

In [15]:
# Verify shapes
X_val.shape, y_val.shape

((32, 28, 28), (32,))

In [16]:
# Set up model checkpoint callback
filepath = wandb.run.dir + "/{epoch:02d}-{val_accuracy:.2f}.ckpt"
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath,
                                                monitor="val_accuracy", 
                                                verbose=1, 
                                                save_best_only=True, mode="max")

In [17]:
# Build, compile and train
mlp_model = MLPModel(10)

mlp_model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])

mlp_model.fit(train_images, train_labels, 
    validation_data=(test_images, test_labels),
    batch_size=128,
    epochs=10,
    callbacks=[WandbCallback(data_type="image", validation_data=(X_val, y_val), 
                    labels=LABELS), 
               checkpoint]
)

Error generating diff: Reference at 'refs/remotes/origin/master' does not exist


Train on 60000 samples, validate on 10000 samples
Epoch 1/10

[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Saving the model to HDF5 format requires the model to be a Functional model or a Sequential model. It does not work for subclassed models, because such models are defined via the body of a Python method, which isn't safely serializable. Consider saving to the Tensorflow SavedModel format (by setting save_format="tf") or using `save_weights`.



Epoch 00001: val_accuracy improved from -inf to 0.84600, saving model to /home/jupyter/Rerproducibility-in-tf.keras-with-wandb/wandb/run-20200410_133121-save-restore-exp/01-0.85.ckpt
Epoch 2/10
Epoch 00002: val_accuracy improved from 0.84600 to 0.86110, saving model to /home/jupyter/Rerproducibility-in-tf.keras-with-wandb/wandb/run-20200410_133121-save-restore-exp/02-0.86.ckpt
Epoch 3/10
Epoch 00003: val_accuracy improved from 0.86110 to 0.86120, saving model to /home/jupyter/Rerproducibility-in-tf.keras-with-wandb/wandb/run-20200410_133121-save-restore-exp/03-0.86.ckpt
Epoch 4/10
Epoch 00004: val_accuracy improved from 0.86120 to 0.87160, saving model to /home/jupyter/Rerproducibility-in-tf.keras-with-wandb/wandb/run-20200410_133121-save-restore-exp/04-0.87.ckpt
Epoch 5/10
Epoch 00005: val_accuracy improved from 0.87160 to 0.87470, saving model to /home/jupyter/Rerproducibility-in-tf.keras-with-wandb/wandb/run-20200410_133121-save-restore-exp/05-0.87.ckpt
Epoch 6/10
Epoch 00006: val_

<tensorflow.python.keras.callbacks.History at 0x7faf50cdcc10>

In [18]:
# Try saving the entire model with .h5
mlp_model.save("first_trained_model.h5") 

NotImplementedError: Saving the model to HDF5 format requires the model to be a Functional model or a Sequential model. It does not work for subclassed models, because such models are defined via the body of a Python method, which isn't safely serializable. Consider saving to the Tensorflow SavedModel format (by setting save_format="tf") or using `save_weights`.

In [19]:
# Save the model using SavedModel format
mlp_model.save(wandb.run.dir + "/first_trained_model")

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: /home/jupyter/Rerproducibility-in-tf.keras-with-wandb/wandb/run-20200410_133121-save-restore-exp/first_trained_model/assets


In [20]:
!ls -lh {wandb.run.dir}

total 17M
-rw-r--r-- 1 jupyter jupyter 2.0K Apr 10 13:44 01-0.85.ckpt.data-00000-of-00002
-rw-r--r-- 1 jupyter jupyter 2.4M Apr 10 13:44 01-0.85.ckpt.data-00001-of-00002
-rw-r--r-- 1 jupyter jupyter 1.3K Apr 10 13:44 01-0.85.ckpt.index
-rw-r--r-- 1 jupyter jupyter 2.0K Apr 10 13:44 02-0.86.ckpt.data-00000-of-00002
-rw-r--r-- 1 jupyter jupyter 2.4M Apr 10 13:44 02-0.86.ckpt.data-00001-of-00002
-rw-r--r-- 1 jupyter jupyter 1.3K Apr 10 13:44 02-0.86.ckpt.index
-rw-r--r-- 1 jupyter jupyter 2.0K Apr 10 13:44 03-0.86.ckpt.data-00000-of-00002
-rw-r--r-- 1 jupyter jupyter 2.4M Apr 10 13:44 03-0.86.ckpt.data-00001-of-00002
-rw-r--r-- 1 jupyter jupyter 1.3K Apr 10 13:44 03-0.86.ckpt.index
-rw-r--r-- 1 jupyter jupyter 2.0K Apr 10 13:44 04-0.87.ckpt.data-00000-of-00002
-rw-r--r-- 1 jupyter jupyter 2.4M Apr 10 13:44 04-0.87.ckpt.data-00001-of-00002
-rw-r--r-- 1 jupyter jupyter 1.3K Apr 10 13:44 04-0.87.ckpt.index
-rw-r--r-- 1 jupyter jupyter 2.0K Apr 10 13:44 05-0.87.ckpt.data-00000-of-00002
-rw-r-

## Important links:
- https://app.wandb.ai/lavanyashukla/save_and_restore/reports/Saving-and-Restoring-Models-with-W%26B--Vmlldzo3MDQ3Mw
- https://www.tensorflow.org/guide/keras/save_and_serialize#part_ii_saving_and_loading_of_subclassed_models
- https://www.tensorflow.org/tutorials/keras/save_and_load#define_a_model