# **Training**

In [1]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import tensorflow as tf
import tensorflow_datasets as tfds
import shutil

In [2]:
(ds_train, ds_test), ds_info = tfds.load(
  "mnist",
  split = ["train", "test"],
  shuffle_files = True,
  as_supervised = True,
  with_info = True,
)

In [3]:
# normalize each sample to a value between 0 and 1
# one-hot encode labels
def normalize(image, label):
  return tf.cast(image, tf.float32) / 255., tf.one_hot(label, depth = 10)

ds_train = ds_train.map(normalize)
ds_train = ds_train.batch(128)
ds_test = ds_test.map(normalize)
ds_test = ds_test.batch(128)

In [4]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Input(shape = (28, 28)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(32, activation = "relu"),
  tf.keras.layers.Dense(32, activation = "relu"),
  tf.keras.layers.Dense(16, activation = "relu"),
  tf.keras.layers.Dense(10, activation = "softmax"),
])

model.compile(
  optimizer = tf.keras.optimizers.Adam(.00005),
  loss = tf.keras.losses.CategoricalCrossentropy(),
  metrics = [tf.keras.metrics.CategoricalAccuracy()],
)

# save model weights every epoch
if "train" in os.listdir():
  shutil.rmtree("train")
cp_callback = tf.keras.callbacks.ModelCheckpoint(
  "train/epoch-{epoch:02d}.weights.h5",
  verbose = 1,
  save_weights_only = True,
  save_freq = "epoch"
)

epochs = 25
os.mkdir("train")
model.save_weights("train/epoch-{epoch:02d}.weights.h5".format(epoch = 0))
model.fit(
  ds_train,
  epochs = epochs,
  shuffle = True,
  validation_data = ds_test,
  callbacks = [cp_callback],
)

Epoch 1/25
[1m468/469[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - categorical_accuracy: 0.1918 - loss: 2.2250
Epoch 1: saving model to train/epoch-01.weights.h5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - categorical_accuracy: 0.1921 - loss: 2.2244 - val_categorical_accuracy: 0.3772 - val_loss: 1.7989
Epoch 2/25
[1m457/469[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - categorical_accuracy: 0.4462 - loss: 1.6434
Epoch 2: saving model to train/epoch-02.weights.h5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - categorical_accuracy: 0.4486 - loss: 1.6391 - val_categorical_accuracy: 0.7072 - val_loss: 1.1686
Epoch 3/25
[1m444/469[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 2ms/step - categorical_accuracy: 0.7266 - loss: 1.0763
Epoch 3: saving model to train/epoch-03.weights.h5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - categorical_accuracy: 0.

<keras.src.callbacks.history.History at 0x272195ff090>

In [5]:
model.summary()

# **Data Processing**

In [140]:
import pandas as pd
import numpy as np

In [147]:
weights = {"epoch": [], "layer1": [], "layer2": [], "neuron1": [], "neuron2": [], "weight": []}

for epoch in range(epochs + 1):
  model.load_weights(f"train/epoch-{epoch:02d}.weights.h5")

  # pull weights from each layer (we won't include biases)
  for layer in range(1, len(model.layers)):
    weight = model.layers[layer].weights[0]

    weights["epoch"] += [epoch] * weight.shape[0] * weight.shape[1]
    weights["layer1"] += [layer - 2] * weight.shape[0] * weight.shape[1]
    weights["layer2"] += [layer - 1] * weight.shape[0] * weight.shape[1]
    # number neurons with 0 at the middle to make visualization easier later (layers centered with each other)
    weights["neuron1"] += list(np.array([[i - (weight.shape[0] // 2)] * weight.shape[1] for i in range(weight.shape[0])]).flatten())
    half_len = weight.shape[1] // 2
    weights["neuron2"] += list(np.array([list(range(-half_len, weight.shape[1] - half_len)) for i in range(weight.shape[0])]).flatten())
    weights["weight"] += list(weight.numpy().flatten())

weights = pd.DataFrame(weights)

  trackable.load_own_variables(weights_store.get(inner_path))


In [148]:
samples = {"x": [], "y": [], "z": [], "label": []}
outputs = {"epoch": [], "label": [], "layer": [], "neuron": [], "output": []}

for label in range(10):
  # for each label (0 - 9), pick a random batch and a random sample from that batch
  for batch in ds_test.shuffle(buffer_size = ds_info.splits["test"].num_examples).take(1):
    x, y = batch[0], batch[1]
    
  choices = tf.squeeze(tf.where(tf.squeeze(tf.linalg.matmul(y, tf.cast(tf.expand_dims(tf.one_hot(label, depth = 10), axis = -1), tf.float32)))))
  if len(choices.shape) == 0:
    choice = tf.constant([0])
  else:
    choice = tf.random.uniform(shape = (1,), maxval = choices.shape[0], dtype = tf.int32)
  sample = tf.keras.layers.Flatten()(tf.expand_dims(x[choices.numpy()[choice.numpy()[0]]], 0))

  samples["x"] += list(np.array([list(range(28)) for i in range(28)]).flatten())
  samples["y"] += list(np.array([[i] * 28 for i in range(28)]).flatten())
  samples["z"] += list(tf.squeeze(sample).numpy().flatten())
  samples["label"] += [label] * 28 * 28
  
  for epoch in range(epochs + 1):
    model.load_weights(f"train/epoch-{epoch:02d}.weights.h5")

    intermediate = None
    # generate output at each layer for this sample
    for layer in range(1, len(model.layers)):
      weight = model.layers[layer].weights[0]
      bias = model.layers[layer].weights[1]
      if layer == 1:
        output = tf.nn.relu(tf.math.add(tf.squeeze(tf.linalg.matmul(sample, weight)), bias))
        intermediate = tf.identity(output)
      elif layer == len(model.layers) - 1:
        output = tf.nn.softmax(tf.math.add(tf.squeeze(tf.linalg.matmul(tf.expand_dims(intermediate, axis = 0), weight)), bias))
        intermediate = tf.identity(output)
      else:
        output = tf.nn.relu(tf.math.add(tf.squeeze(tf.linalg.matmul(tf.expand_dims(intermediate, axis = 0), weight)), bias))
        intermediate = tf.identity(output)

      outputs["epoch"] += [epoch] * output.shape[0]
      outputs["label"] += [label] * output.shape[0]
      outputs["layer"] += [layer - 1] * output.shape[0]
      # number neurons with 0 at the middle to make visualization easier later (layers centered with each other)
      half_len = output.shape[0] // 2
      outputs["neuron"] += list(range(-half_len, output.shape[0] - half_len))
      outputs["output"] += list(output.numpy())
      
samples = pd.DataFrame(samples)
outputs = pd.DataFrame(outputs)

# **Visualization**

In [149]:
import altair as alt
alt.data_transformers.enable("vegafusion")

DataTransformerRegistry.enable('vegafusion')

In [150]:
print(samples.shape, weights.shape, outputs.shape)
weights["weight"] = (weights["weight"] - weights["weight"].min()) / (weights["weight"].max() - weights["weight"].min())
intermediary_layers = outputs.loc[outputs["layer"] != 3]["output"]
outputs.loc[intermediary_layers.index, "output"] = ((intermediary_layers - intermediary_layers.min()) /\
                                                   (intermediary_layers.max() - intermediary_layers.min())).values
weights = weights.loc[weights["layer1"] != -1]

(7840, 4) (696384, 6) (23400, 5)


In [151]:
label_input = alt.binding_radio(options = outputs["label"].unique(), name = "Label")
label_selection = alt.selection_point(fields = ["label"], bind = label_input, value = 0)
epoch_input = alt.binding_range(min = 0, max = epochs, step = 1, name = "Epoch")
epoch_selection = alt.selection_point(fields = ["epoch"], bind = epoch_input, value = epochs)

image = alt.Chart(samples).mark_rect().encode(
  x = alt.X("x:O", axis = None),
  y = alt.Y("y:O", axis = None),
  color = alt.Color("z:Q", legend = None).scale(scheme = "greys"),
).add_params(label_selection).transform_filter(label_selection)\
.properties(width = 200, height = 200)

neurons = alt.Chart(outputs).mark_circle(
  size = 300,
).encode(
  x =alt.X("layer", axis = None),
  y = alt.Y("neuron", axis = None),
  color = alt.Color("output", legend = alt.Legend(
      title = "Output",
      gradientLength = 400,
  )).scale(
    domain = (0, 1),
    scheme = "lightgreyteal"
  ),
).add_params(label_selection).transform_filter(label_selection)\
.add_params(epoch_selection).transform_filter(epoch_selection)

connections = alt.Chart(weights).mark_line(
).encode(
  x = alt.X("layer1", axis = None),
  x2 = alt.X2("layer2"),
  y = alt.Y("neuron1", axis = None),
  y2 = alt.Y2("neuron2"),
  color = alt.Color("weight", legend = alt.Legend(
      title = "Weight",
      gradientLength = 400,
  )).scale(
    domain = (0, 1),
    scheme = "lightgreyred",
  ),
).add_params(epoch_selection).transform_filter(epoch_selection)\
.properties(
  width = 1000,
  height = 800,
  title={
    "text": "MNIST Neural Network Training",
    "subtitle": "Layer weights and outputs per epoch and label",
    "color": "gray",
    "subtitleColor": "gray",
    "fontSize": 30,
    "subtitleFontSize": 15,
    "dx": -100,
    "dy": 75,
  },
)

In [152]:
alt.hconcat(image, (connections + neurons).resolve_scale(color = "independent"), center = True).configure_view(strokeWidth = 0)