# Using tf.similarity on fashion mnist dataset

This tutorial uses tf.similiarity package to show how we can use tf.similarity on the fashion mnist dataset.

Although GPU usage is not required but is highly recommended.

In [None]:
# run this cell if you want to suppress warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# imports
import numpy as np
import six
import tensorflow as tf
from absl import app, flags
from tensorflow.keras.layers import (Conv2D, Dense, Dropout, Flatten, Input,
                                     MaxPooling2D)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tempfile
import tabulate
from tensorflow_similarity.api.engine.preprocessing import Preprocessing
from tensorflow_similarity.api.engine.simhash import SimHash

## Read in fashion mnist dataset

In [None]:
def read_fashion_mnist_data():
    """ Returns the fashion mnist data.

    Read the fashion mnist data from tf.keras.datasets and split
    the test dataset into test and target datasets.
    For more information on fashion mnist, please visit:
    https://keras.io/datasets/#fashion-mnist-database-of-fashion-articles

    Returns:
        A tuple that contains three elements. The first element
        is a tuple that contains data used for training and
        the second element is a tuple that contains data used
        for testing. The third element is a tuple that contains
        the target data. All three tuples have the same
        structure, they contains two elements. The first
        element contains a dictionary for the specs of fashion mnist data
        (in 2d np array), the second element contains
        an np array of labels of class.
    """

    (x_train, y_train), (x_test_raw,
                         y_test_raw) = tf.keras.datasets.fashion_mnist.load_data()

    # Names of the integer classes, i.e., 0 -> T-short/top, 1 -> Trouser, etc.
    class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
                   'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

    # re-label training and testing datasets
    y_train = [class_names[label] for label in y_train]
    y_test_raw = [class_names[label] for label in y_test_raw]

    x_tests = []
    y_tests = []

    x_targets = []
    y_targets = []

    seen = set()
    for x, y in zip(x_test_raw, y_test_raw):
        if y not in seen:
            seen.add(y)
            x_targets.append(x)
            y_targets.append(y)
        else:
            x_tests.append(x)
            y_tests.append(y)

    return (({
        "example": np.array(x_train)
    }, np.array(y_train)), ({
        "example": np.array(x_tests)
    }, np.array(y_tests)), ({
        "example": np.array(x_targets)
    }, np.array(y_targets)))

### Define tower models, decoder, and preprocessor

In [None]:
def simple_fashion_mnist_tower_model():
    """A simple tower model for fashion mnist dataset.

    Returns:
        model: A tensorflow model that returns a 100-dimensional embedding.
    """

    i = Input(shape=(28, 28, 1), name="example")
    o = Conv2D(
        32,
        kernel_size=(5, 5),
        padding='same',
        activation='relu',
        input_shape=(28, 28, 1))(i)
    o = Conv2D(
        32,
        kernel_size=(5, 5),
        padding='same',
        activation='relu',
        input_shape=(28, 28, 1))(i)
    o = MaxPooling2D(pool_size=(2, 2))(o)
    o = Dropout(.25)(o)

    o = Conv2D(64, (3, 3), padding='same', activation='relu')(o)
    o = Conv2D(64, (3, 3), padding='same', activation='relu')(o)
    o = MaxPooling2D(pool_size=(2, 2))(o)
    o = Dropout(.25)(o)

    o = Flatten()(o)
    o = Dense(256, activation="relu")(o)
    o = Dropout(.25)(o)
    o = Dense(100)(o)
    model = Model(inputs=i, outputs=o)
    return model

In [None]:
class Normalize(Preprocessing):
    """A Preprocessing class that normalize the fashion MNIST example inputs."""

    def preprocess(self, img):
        """Normalize and reshape the input images."""

        normed = img["example"] / 255.0
        normed = normed.reshape((28, 28, 1))
        out = {"example": normed}
        return out

### Helper methods

In [None]:
def display_metrics(test_metrics):
    unpacked_test_metrics = sorted([(i[0], i[1]) for i in six.iteritems(test_metrics)])
    print("")
    print("TEST")
    print(tabulate.tabulate(unpacked_test_metrics, ["Metric", "Value"]))

## Example usage 1: basic usage

In [None]:
def run_fashion_mnist_example(data, model, strategy, epochs):
    """An example usage of tf.similarity on fashion MNIST example.

    This basic similarity run will first unpackage training,
    testing, and target data from the arguments and then construct a
    simple moirai model, fit the model with training data, then
    evaluate our model with training and testing datasets.

    Args:
        data: Sets, contains training, testing, and target datasets.
        model: tf.Model, the tower model to fit into moirai.
        strategy: String, specify the strategy to use for learning similarity.
        epochs: Integer, number of epochs to fit our moirai model.

    Returns:
        metrics: Dictionary, containing metrics performed on the
            testing dataset. The key is the name of the metric and the
            value is the np array of the metric values.
    """

    # unpackage data
    (x_train, y_train), (x_test, y_test), (x_targets, y_targets) = data

    moirai = SimHash(
        model,
        preprocessing=Normalize(),
        strategy=strategy,
        optimizer=Adam(lr=.001),
        hard_mining_directory=tempfile.mkdtemp())

    moirai.fit(
        x_train,
        y_train,
        epochs=epochs)

    metrics = moirai.evaluate(x_test, y_test, x_targets, y_targets)
    return metrics

In [None]:
data = read_fashion_mnist_data()
model = simple_fashion_mnist_tower_model()
# Strategy we want to use.
strategy = "stable_hard_quadruplet_loss"
# Number of epochs
epochs = 5

test_metrics = run_fashion_mnist_example(data, model, strategy, epochs)

In [None]:
display_metrics(test_metrics)

## Example usage 2: With Visualization Callback


In [None]:
# additional imports
import datetime
from tensorflow_similarity.api.callbacks.metrics_callbacks import MetricsCallback
from tensorflow_similarity.api.callbacks.plugins import ConfusionMatrixCallbackPlugin
from tensorflow_similarity.api.callbacks.plugins import ClosestItemsCallbackPlugin

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

In [None]:
# Uncomment and run the below line to clear any logs from previous runs
!rm -rf ./logs/

In [None]:
def similarity_run_with_visualization(data, model, strategy, epochs):
    """An example usage of tf.similarity on fashion MNIST example.

    This basic similarity run will first unpackage training,
    testing, and target data from the arguments and then construct a
    simple moirai model, fit the model with training data, then
    evaluate our model with training and testing datasets.

    Args:
        data: Sets, contains training, testing, and target datasets.
        model: tf.Model, the tower model to fit into moirai.
        strategy: String, specify the strategy to use for learning similarity.
        epochs: Integer, number of epochs to fit our moirai model.

    Returns:
        metrics: Dictionary, containing metrics performed on the
            testing dataset. The key is the name of the metric and the
            value is the np array of the metric values.
    """

    # unpackage data
    (x_train, y_train), (x_test, y_test), (x_targets, y_targets) = data
    
    log_dir="logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    confusion_matrix_log_dir = log_dir + "/confusion_matrix"
    closest_items_log_dir = LOG_DIR + "/closest_items"
    
    confusion_matrix_plugin = ConfusionMatrixCallbackPlugin(confusion_matrix_log_dir)
    closest_items_plugin = ClosestItemsCallbackPlugin(closest_items_log_dir)

    metrics_callbacks = MetricsCallback(
        [confusion_matrix_plugin, closest_items_plugin],
        x_test,
        y_test,
        x_targets,
        y_targets)
    
    callbacks = [metrics_callbacks]

    moirai = SimHash(
        model,
        preprocessing=Normalize(),
        strategy=strategy,
        optimizer=Adam(lr=.001),
        hard_mining_directory=tempfile.mkdtemp())

    moirai.fit(
        x_train,
        y_train,
        epochs=epochs,
        callbacks=callbacks)

    metrics = moirai.evaluate(x_test, y_test, x_targets, y_targets)
    return metrics

In [None]:
data = read_fashion_mnist_data()
model = simple_fashion_mnist_tower_model()
# Strategy we want to use.
strategy = "stable_hard_quadruplet_loss"
# Number of epochs
epochs = 5

test_metrics = similarity_run_with_visualization(data, model, strategy, epochs)

In [None]:
%tensorboard --logdir logs

In [None]:
display_metrics(test_metrics)