# Training a BNN with 20 lines of code in 20 seconds

### In this notebook we demonstrate how simple it is to perform approximate inference using DeepBayes

(Time reported from a M1 Pro Macbook)

In [1]:
import deepbayes
import deepbayes.optimizers as optimizers
import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *

### First we load in and normalize the MNIST dataset

In [2]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train/255.
X_test = X_test/255.
X_train = X_train.astype("float32").reshape(-1, 28*28)
X_test = X_test.astype("float32").reshape(-1, 28* 28)

### We define a model using the flexible Keras interface
(Most valid Keras models are also valid DeepBayes models)

In [3]:
model = Sequential()
model.add(Dense(128, activation="relu", input_shape=(1, 28*28)))
model.add(Dense(10, activation="softmax"))
loss = tf.keras.losses.SparseCategoricalCrossentropy()

#### We then select the inference method and key parameters for a run
Calling compile will set up the DeepBayes model

Calling train will then perform inference over the parameters

In [4]:
learning_rate = 0.35; decay=0.0
opt = optimizers.VariationalOnlineGuassNewton()
bayes_model = opt.compile(model, loss_fn=loss, epochs=5, learning_rate=learning_rate, batch_size=128)
bayes_model.train(X_train, y_train, X_test, y_test)

This optimizer does not have a default compilation method. Please make sure to call the correct .compile method before use.
deepbayes: Using implicit prior
(784, 128) 0.03571428571428571
(128, 10) 0.08838834764831845
deepbayes: Using implicit prior
(784, 128) 0.03571428571428571
(128, 10) 0.08838834764831845


  self.model.set_weights(np.asarray(init_weights))
  g = np.asarray(weight_gradient)
100%|██████████| 469/469 [00:05<00:00, 92.28it/s]


Epoch 1, loss: 0.833, acc: 0.782, val_loss: 0.670, val_acc: 0.882


100%|██████████| 469/469 [00:05<00:00, 88.26it/s]


Epoch 2, loss: 0.692, acc: 0.867, val_loss: 0.528, val_acc: 0.894


100%|██████████| 469/469 [00:05<00:00, 93.02it/s]


Epoch 3, loss: 0.445, acc: 0.901, val_loss: 0.310, val_acc: 0.908


100%|██████████| 469/469 [00:04<00:00, 94.65it/s]


Epoch 4, loss: 0.283, acc: 0.922, val_loss: 0.268, val_acc: 0.933


100%|██████████| 469/469 [00:04<00:00, 94.81it/s]


Epoch 5, loss: 0.225, acc: 0.942, val_loss: 0.205, val_acc: 0.944


100%|██████████| 469/469 [00:04<00:00, 94.43it/s]


Epoch 6, loss: 0.166, acc: 0.952, val_loss: 0.186, val_acc: 0.952


#### Finally, we can save the resulting posterior. This will create a new directory and store all the posterior information for later use

In [5]:
bayes_model.save("PosteriorModels/VOGN_MNIST_Posterior")

('classes', 10)
('batch_size', 128)
('learning_rate', 0.35)
('decay', 0.0)
('epochs', 6)
('inflate_prior', 1)
('input_noise', 0.0)
('robust_train', 0)
('epsilon', 0.09999999999999999)
('robust_lambda', 0.5)
('loss_monte_carlo', 2)
('input_upper', inf)
('input_lower', -inf)
('beta_1', 0.999)
('beta_2', 0.9999)
('lam', 1.0)
('N', 60000)
('max_eps', 0.1)
('max_robust_lambda', 0.5)


  np.save(path+"/mean", np.asarray(self.posterior_mean))
  np.save(path+"/var", np.asarray(self.posterior_var))
