# Data

In [0]:
import time
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Import
housing = fetch_california_housing()
x_train_full, x_test, y_train_full, y_test = train_test_split(housing.data, housing.target, test_size=0.3)
x_train, x_valid, y_train, y_valid = train_test_split(x_train_full, y_train_full, test_size=0.3)

# PreProcessing
scaler = MinMaxScaler((-1,1))
x_train = scaler.fit_transform(x_train).astype(np.float32)
x_valid = scaler.transform(x_valid).astype(np.float32)
x_test = scaler.transform(x_test).astype(np.float32)

y_train = np.expand_dims(y_train, axis=1)
y_valid = np.expand_dims(y_valid, axis=1)
y_test = np.expand_dims(y_test, axis=1)

# Summary
print(f"Training Set: {len(x_train)} \
      \nValidation Set: {len(x_valid)} \
      \nTest Set: {len(x_test)}")

Downloading Cal. housing from https://ndownloader.figshare.com/files/5976036 to /root/scikit_learn_data


Training Set: 10113       
Validation Set: 4335       
Test Set: 6192


In [0]:
x_train.dtype

dtype('float32')

# Keras in TensorFlow 2.0

In [0]:
%tensorflow_version 2.x
import tensorflow as tf
print("TensorFlow version: {}".format(tf.__version__))
print("Eager execution: {}".format(tf.executing_eagerly()))

TensorFlow 2.x selected.
TensorFlow version: 2.1.0
Eager execution: True


**Multi-Layer Perceptron using Keras Subclassing API**

In [0]:
class MLP(tf.keras.Model):
  def __init__(self):
    super(MLP, self).__init__()
    self.dense1 = tf.keras.layers.Dense(10, activation="tanh", kernel_initializer=tf.keras.initializers.GlorotUniform(seed=9))
    self.dense2 = tf.keras.layers.Dense(1, kernel_initializer=tf.keras.initializers.GlorotUniform(seed=9))

    self.get_loss = tf.keras.losses.MeanSquaredError()
    self.optimizer = tf.keras.optimizers.SGD()
    self.n_params = None

  def call(self, x):
    x = self.dense1(x)
    x = self.dense2(x)

    if self.n_params == None: # Set n_params
      self.n_params = tf.reduce_sum([tf.reduce_prod(layer.shape) for layer in self.trainable_variables]).numpy()

    return x

  def fit(self, x, y, batch_size=32, epochs=1):
    for epoch in range(epochs):
      print(f"Epoch {epoch+1}/{epochs}")
      print(f"Loss: {self.get_loss(y, self(x))}")

      for i_batch in range(0, len(x), batch_size):
        x_batch = x[i_batch:i_batch+batch_size]
        y_batch = y[i_batch:i_batch+batch_size]

        with tf.GradientTape() as tape:
          y_batch_pred = self(x_batch, training=True)
          loss = self.get_loss(y_batch, y_batch_pred)
        
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

In [0]:
mlp = MLP()
mlp.fit(x_train[:128], y_train[:128], batch_size=128, epochs=5)

Epoch 1/5
Loss: 6.318022727966309
Epoch 2/5
Loss: 4.799226760864258
Epoch 3/5
Loss: 3.7185442447662354
Epoch 4/5
Loss: 2.9463648796081543
Epoch 5/5
Loss: 2.3933355808258057


**Levenberg-Marquardt Neural Network**

In [0]:
class LMNN(tf.keras.Model):
  def __init__(self):
    super(LMNN, self).__init__()
    self.dense1 = tf.keras.layers.Dense(10, activation="tanh", kernel_initializer=tf.keras.initializers.GlorotUniform(seed=9))
    self.dense2 = tf.keras.layers.Dense(1, kernel_initializer=tf.keras.initializers.GlorotUniform(seed=9))

    self.get_loss = tf.keras.losses.MeanSquaredError()
    self.optimizer = tf.keras.optimizers.SGD()
    self.mu = 0.3

  def call(self, x):
    x = self.dense1(x)
    return self.dense2(x)
    
  def fit(self, x, y, batch_size=32, epochs=1):
    for epoch in range(epochs):
      print(f"Epoch {epoch+1}/{epochs}")
      print(f"Loss: {self.get_loss(y, self(x))}")

      for i_batch in range(0, len(x), batch_size):
        x_batch = x[i_batch:i_batch+batch_size]
        y_batch = y[i_batch:i_batch+batch_size]

        with tf.GradientTape(persistent=True) as tape:
          y_batch_pred = self(x_batch, training=True)
          residuals = y_batch - y_batch_pred
        
        jacobian = tape.jacobian(residuals, self.trainable_variables)
        jacobian = tf.concat([tf.reshape(j, [j.shape[0], tf.reduce_prod(j.shape[1:])]) for j in jacobian], axis=1)
        jacobian_transpose = tf.transpose(jacobian)
        
        hessian_approx = tf.matmul(jacobian_transpose, jacobian) + tf.multiply(self.mu, tf.eye(jacobian.shape[1]))
        gradient_approx = tf.matmul(jacobian_transpose, residuals)
        
        updates = tf.matmul(tf.linalg.inv(hessian_approx), gradient_approx)

        new_params = []
        for i in range(len(self.trainable_variables)):
          param_shape = self.trainable_variables[i].shape
          n_sub_params = tf.reduce_prod(param_shape)
          new_params.append(tf.reshape(updates[:n_sub_params], param_shape))
          updates = updates[n_sub_params:]

        self.optimizer.apply_gradients(zip(new_params, self.trainable_variables))

In [0]:
lmnn = LMNN()

tic = time.time()
lmnn.fit(x_train, y_train, batch_size=32, epochs=100)
print(f"Elapsed time: {time.time() - tic}")

Epoch 1/100
Loss: 6.792662620544434
Epoch 2/100
Loss: 0.5329774618148804
Epoch 3/100
Loss: 0.4930444657802582
Epoch 4/100
Loss: 0.4817509055137634
Epoch 5/100
Loss: 0.4748733937740326
Epoch 6/100
Loss: 0.4662263095378876
Epoch 7/100
Loss: 0.4540725350379944
Epoch 8/100
Loss: 0.44516196846961975
Epoch 9/100
Loss: 0.43970826268196106
Epoch 10/100
Loss: 0.43563562631607056
Epoch 11/100
Loss: 0.4328329265117645
Epoch 12/100
Loss: 0.43151527643203735
Epoch 13/100
Loss: 0.43131712079048157
Epoch 14/100
Loss: 0.43172767758369446
Epoch 15/100
Loss: 0.4323820471763611
Epoch 16/100
Loss: 0.4330860674381256
Epoch 17/100
Loss: 0.4337526559829712
Epoch 18/100
Loss: 0.43434855341911316
Epoch 19/100
Loss: 0.43486928939819336
Epoch 20/100
Loss: 0.43532681465148926
Epoch 21/100
Loss: 0.43574029207229614
Epoch 22/100
Loss: 0.4361341893672943
Epoch 23/100
Loss: 0.4365255534648895
Epoch 24/100
Loss: 0.43693315982818604
Epoch 25/100
Loss: 0.437366247177124
Epoch 26/100
Loss: 0.4378325641155243
Epoch 27/100

In [0]:
pred = lmnn(x_valid)
print(f"Validation Loss (MSE): {np.mean((y_valid - pred)**2)}")
pred = lmnn(x_test)
print(f"Validation Loss (MSE): {np.mean((y_test - pred)**2)}")

Validation Loss (MSE): 0.45368221402168274
Validation Loss (MSE): 0.6965617537498474


In [0]:
mlp = MLP()

tic = time.time()
mlp.fit(x_train, y_train, batch_size=2048, epochs=3000)
print(f"Elapsed time: {time.time() - tic}")

Epoch 1/3000
Loss: 6.792662620544434
Epoch 2/3000
Loss: 2.2201812267303467
Epoch 3/3000
Loss: 1.337558627128601
Epoch 4/3000
Loss: 1.1636897325515747
Epoch 5/3000
Loss: 1.1253114938735962
Epoch 6/3000
Loss: 1.1123919486999512
Epoch 7/3000
Loss: 1.1041923761367798
Epoch 8/3000
Loss: 1.0968390703201294
Epoch 9/3000
Loss: 1.089603304862976
Epoch 10/3000
Loss: 1.082341194152832
Epoch 11/3000
Loss: 1.075020432472229
Epoch 12/3000
Loss: 1.0676311254501343
Epoch 13/3000
Loss: 1.0601688623428345
Epoch 14/3000
Loss: 1.0526313781738281
Epoch 15/3000
Loss: 1.0450177192687988
Epoch 16/3000
Loss: 1.0373274087905884
Epoch 17/3000
Loss: 1.0295606851577759
Epoch 18/3000
Loss: 1.0217188596725464
Epoch 19/3000
Loss: 1.0138033628463745
Epoch 20/3000
Loss: 1.0058164596557617
Epoch 21/3000
Loss: 0.997761070728302
Epoch 22/3000
Loss: 0.989640474319458
Epoch 23/3000
Loss: 0.9814583659172058
Epoch 24/3000
Loss: 0.9732192158699036
Epoch 25/3000
Loss: 0.9649279713630676
Epoch 26/3000
Loss: 0.9565894603729248
Ep

In [0]:
pred = mlp(x_valid)
print(f"Validation Loss (MSE): {np.mean((y_valid - pred)**2)}")
pred = mlp(x_test)
print(f"Validation Loss (MSE): {np.mean((y_test - pred)**2)}")

Validation Loss (MSE): 0.46331125497817993
Validation Loss (MSE): 0.483044296503067
