In [1]:
!pip install git+https://github.com/shuiruge/neural-ode.git@master

Collecting git+https://github.com/shuiruge/neural-ode.git@master
  Cloning https://github.com/shuiruge/neural-ode.git (to revision master) to /tmp/pip-req-build-n3sezri7
  Running command git clone -q https://github.com/shuiruge/neural-ode.git /tmp/pip-req-build-n3sezri7
Building wheels for collected packages: node
  Building wheel for node (setup.py) ... [?25l[?25hdone
  Created wheel for node: filename=node-0.1.0-cp36-none-any.whl size=36464 sha256=f2a2088ecbde6123a1fc7ce3b362053f4bacc56f705b3a9ed3f346fdb0c71cb4
  Stored in directory: /tmp/pip-ephem-wheel-cache-qnm0diya/wheels/36/41/e1/1cf7fd120543ff07c299bee3a2ce3fe659795c54f7e03fe9b6
Successfully built node


In [2]:
import numpy as np
import tensorflow as tf
from node.hopfield import ContinuousTimeHopfieldLayer
                           
# for reproducibility
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

tf.keras.backend.clear_session()


def pooling(x, size):
  # x shape: [None, width, height]
  x = tf.expand_dims(x, axis=-1)
  x = tf.image.resize(x, size)
  return x  # shape: [None, size[0], size[1], 1]


def process_data(X, y, image_size):
  X = pooling(X, image_size)
  X = X / 255.
  X = tf.where(X < 0.5, -1., 1.)
  X = tf.reshape(X, [-1, image_size[0] * image_size[1]])
  y = tf.one_hot(y, 10)
  return tf.cast(X, tf.float32), tf.cast(y, tf.float32)


def create_dataset(X, y, epochs=50):
  dataset = tf.data.Dataset.from_tensor_slices((X, y))
  dataset = dataset.shuffle(1000).repeat(epochs).batch(128)
  return dataset


def create_valid_dataset(X, y, n_samples=3000):
  dataset = tf.data.Dataset.from_tensor_slices((X[:n_samples], y[:n_samples]))
  dataset = dataset.batch(128)
  return dataset


# load and preprocess MNIST dataset

IMAGE_SIZE = (28, 28)

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, y_train = process_data(x_train, y_train, IMAGE_SIZE)

In [3]:
benchmark_model = tf.keras.Sequential([
  tf.keras.Input([IMAGE_SIZE[0] * IMAGE_SIZE[1]]),
  tf.keras.layers.LayerNormalization(),
  tf.keras.layers.Dense(1024, activation='relu'),
  tf.keras.layers.LayerNormalization(),
  tf.keras.layers.Dense(1024, activation='relu'),
  tf.keras.layers.LayerNormalization(),
  tf.keras.layers.Dense(1024, activation='tanh'),
  # tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(10, activation='softmax'),
])
benchmark_model.compile(
    loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
benchmark_model.fit(create_dataset(x_train, y_train))
benchmark_model.evaluate(create_valid_dataset(x_train, y_train))



[0.003667906392365694, 0.9993333220481873]

Exame the noise effect:

In [4]:
FLIP_RATIO = 0.2

X = x_train[:1000]
targets = np.argmax(y_train[:1000], axis=1)
noised_X = np.where(np.random.random(size=X.shape) < FLIP_RATIO,
                    -X, X)
unoised_y = np.argmax(benchmark_model.predict(X), axis=1)
noised_y = np.argmax(benchmark_model.predict(noised_X), axis=1)

print('Noise effect (accuracy):',
      np.sum(unoised_y == targets) / targets.shape[0],
      '=>',
      np.sum(noised_y == targets) / targets.shape[0])

Noise effect (accuracy): 0.999 => 0.698


Extract the truncated model from the benchmark:

In [5]:
THRESHOLD = 0.1

# truncated model excluding the last classification layer

truncated_benchmark_model = tf.keras.Sequential(benchmark_model.layers[:6])
unoised_z = truncated_benchmark_model.predict(X)
noised_z = truncated_benchmark_model.predict(noised_X)

z_flip_ratio = tf.reduce_mean(
    tf.where(tf.abs(unoised_z - noised_z) > THRESHOLD, 1., 0.))
print('Latent flip ratio:', z_flip_ratio.numpy())

Latent flip ratio: 0.28144237


Follow the same process, but for Hopfield layer instead:

In [6]:
model = tf.keras.Sequential([
  tf.keras.Input([IMAGE_SIZE[0] * IMAGE_SIZE[1]]),
  tf.keras.layers.LayerNormalization(),
  tf.keras.layers.Dense(1024, activation='relu'),
  tf.keras.layers.LayerNormalization(),
  tf.keras.layers.Dense(1024, activation='relu'),
  tf.keras.layers.LayerNormalization(),
  tf.keras.layers.Dense(1024, activation='tanh'),

  # insert Hopfield layers herein
  ContinuousTimeHopfieldLayer(reg_factor=10, relax_tol=1e-3),
  ContinuousTimeHopfieldLayer(reg_factor=10, relax_tol=1e-3),

  tf.keras.layers.Dense(10, activation='softmax'),
])
model.compile(
    loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
model.fit(create_dataset(x_train, y_train, epochs=50))
model.evaluate(create_valid_dataset(x_train[:1000], y_train[:1000]))



[0.051423460245132446, 0.9959999918937683]

In [7]:
unoised_y = np.argmax(model.predict(X), axis=1)
noised_y = np.argmax(model.predict(noised_X), axis=1)

print('Noise effect (accuracy):',
      np.sum(unoised_y == targets) / targets.shape[0],
      '=>',
      np.sum(noised_y == targets) / targets.shape[0])

print('Relaxing period:')
for layer in model.layers:
  if isinstance(layer, ContinuousTimeHopfieldLayer):
    print(layer._stop_condition.relax_time.numpy())

Noise effect (accuracy): 0.996 => 0.584
Relaxing period:
14.192004
18.50345


In [8]:
# truncated model involving layers before Hopfield layers
truncated_model = tf.keras.Sequential(model.layers[:6])
unoised_z = truncated_model.predict(X)
noised_z = truncated_model.predict(noised_X)

z_flip_ratio = tf.reduce_mean(
    tf.where(tf.abs(unoised_z - noised_z) > THRESHOLD, 1., 0.))
print('Latent flip ratio (without Hopfield):', z_flip_ratio.numpy())


# truncated model also includes the Hopfield layers

higher_truncated_model = tf.keras.Sequential(model.layers[:8])
unoised_z = higher_truncated_model.predict(X)
noised_z = higher_truncated_model.predict(noised_X)

z_flip_ratio = tf.reduce_mean(
    tf.where(tf.abs(unoised_z - noised_z) > THRESHOLD, 1., 0.))
print('Latent flip ratio (with Hopfield):', z_flip_ratio.numpy())

Latent flip ratio (without Hopfield): 0.05506543
Latent flip ratio (with Hopfield): 0.043288086


### Temporal Conclusion

1. Benchmark model is more stable for random flip perturbation in
the final accuracy.
1. However, the model with Hopfield layer is significantly more
stable in the last latent layer output (also in the layer just
before the first Hopfield layer).

TODO:

In [9]:
from node.solvers.runge_kutta import RungeKuttaFehlbergSolver
from node.core import get_node_function


class NodeLayer(tf.keras.layers.Layer):
  
  # TODO: finish writing this class.

  def __init__(self, sub_layers, **kwargs):
    super().__init__(**kwargs)
    self.sub_layers = sub_layers
  
  def build(self, input_shape):
    sub_model = tf.keras.Sequential(
        {tf.keras.Input(input_shape)} + self.sub_layers)
    
    def pvf(t, x):
      return sub_model(x)

    solver = RungeKuttaFehlbergSolver()
    self._node_fn = get_node_function(solver, pvf)
    super().build(input_shape)
  
  def call(self, x):
    t0 = tf.constant(0.)
    t1 = tf.constant(1.)
    return self._node_fn(t0, t1, x)
