In [6]:
!pip install tensorflow
!pip install tensorflow_probability

Collecting tensorflow==1.15
  Using cached https://files.pythonhosted.org/packages/fe/01/997a5d608162cf5371dae49e8cb68baaa5773b091a3e74576ea30dae51e6/tensorflow-1.15.0-cp36-cp36m-macosx_10_11_x86_64.whl
Collecting tensorflow-estimator==1.15.1
  Using cached https://files.pythonhosted.org/packages/de/62/2ee9cd74c9fa2fa450877847ba560b260f5d0fb70ee0595203082dafcc9d/tensorflow_estimator-1.15.1-py2.py3-none-any.whl
Collecting tensorboard<1.16.0,>=1.15.0
  Using cached https://files.pythonhosted.org/packages/1e/e9/d3d747a97f7188f48aa5eda486907f3b345cd409f0a0850468ba867db246/tensorboard-1.15.0-py3-none-any.whl
Installing collected packages: tensorflow-estimator, tensorboard, tensorflow
  Found existing installation: tensorflow-estimator 1.14.0
    Uninstalling tensorflow-estimator-1.14.0:
      Successfully uninstalled tensorflow-estimator-1.14.0
  Found existing installation: tensorboard 1.14.0
    Uninstalling tensorboard-1.14.0:
      Successfully uninstalled tensorboard-1.14.0
  Found exi

In [13]:
# In this tutorial, we will implement a simple linear regression model 
# with PAC Bayesian SGD method
import numpy as np
np.random.seed(41)

In [14]:
# configs
n_sample = 100  # number of samples in training set
dim = 5  # dimension of feature vector for each sample

In [16]:
# Section 1: Data preparation
# generate X: n samples of dimension d ~ N(0, diag(sigma_1^2, sigma_2^2, ..., sigma_d^2))
# generate y: n samples of dimension 1 ~ X^T . w* + epsilon. with epsilon ~ N(0, I)

mean_x = np.zeros(dim, dtype=np.float32)
diag_x = np.zeros([dim, dim], dtype=np.float32)
for i, sigma in enumerate(np.sort(np.random.rand(dim))):
    diag_x[i, i] = sigma ** 2
print(diag_x)

[[0.0018896  0.         0.         0.         0.        ]
 [0.         0.00212482 0.         0.         0.        ]
 [0.         0.         0.01355448 0.         0.        ]
 [0.         0.         0.         0.06296267 0.        ]
 [0.         0.         0.         0.         0.45808023]]


In [65]:
# generate input features
X = np.random.multivariate_normal(mean_x, diag_x, n_sample).astype(np.float32)
print(X.shape)
print(X[0:2])

(100, 5)
[[-0.03278099 -0.00192327 -0.02560094  0.16972491 -0.9367767 ]
 [-0.11926062  0.01693853 -0.15874355  0.01103664 -0.49389613]]


In [66]:
# ground truth weight
diag_lambda = 3 # ground truth for P diags
diag = np.zeros([dim, dim], dtype=np.float32)
for i in range(len(diag)):
    diag[i, i] = diag_lambda
print(diag)
w_star = np.random.multivariate_normal([0]*dim, diag, 1)[0]
print(w_star)

[[3. 0. 0. 0. 0.]
 [0. 3. 0. 0. 0.]
 [0. 0. 3. 0. 0.]
 [0. 0. 0. 3. 0.]
 [0. 0. 0. 0. 3.]]
[ 1.55186556 -1.33495412 -0.22992489  0.64493134  2.24390501]


In [67]:
# noise add to y
epsilon = np.random.normal(0, 1, n_sample)

In [68]:
# noisy labels
y = np.dot(X, w_star) + epsilon
y = y.astype(np.float32)
print(y[0:10])

[ 0.15597352 -1.6148058  -2.7178566  -0.9975755   3.716675   -0.24592389
  1.9880351  -0.9270629   2.4805005   0.8656945 ]


100


In [5]:
# Section 2: 
# define the model
import math
import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions

In [None]:
# define learnable distribution Q
v = tf.Variable(name='learnable_mean', shape=dim, initial_value=tf.zeros(shape=dim), trainable=True)
s = tf.Variable(name='learnable_diag', shape=dim, initial_value=tf.zeros(shape=dim), trainable=True)
Q = tfd.MultivariateNormalDiag(loc=v, scale_diag=s)
print(Q.mean())
print(Q.stddev())

In [39]:
# define data generation prior P
P = tfd.MultivariateNormalDiag(loc=[.0]*dim, scale_diag=[lamda]*dim)
print(P.mean())
print(P.stddev())

tf.Tensor([0. 0. 0. 0. 0.], shape=(5,), dtype=float32)
tf.Tensor([4. 4. 4. 4. 4.], shape=(5,), dtype=float32)


In [44]:
from abc import abstractmethod
class Layer(object):
    def __init__(self, name):
        if not name:
            layer = self.__class__.__name__.lower()
            name = layer + '_' + str(get_layer_id(layer))
        self.name = name
        self.vars = []

    def __call__(self, inputs):
        outputs = self._call(inputs)
        return outputs

    @abstractmethod
    def _call(self, inputs):
        pass

In [109]:
class Linear:
    def __init__(self, n_samples, dimension, b, c, delta, learning_rate=0.001, momentum=0.9):
        self.d = dimension
        self.m = n_samples
        self.b = b
        self.c = c
        self.delta = delta # delta represents belief
        self.optimizer = tf.optimizers.SGD(learning_rate=learning_rate, momentum=momentum)
        self._build_model()
        
        
    def _build_model(self):
        self.weight = tf.Variable(dtype=tf.float32, name="weights", shape=self.d, initial_value=tf.zeros(shape=self.d), trainable=True)
        self.bias = tf.Variable(dtype=tf.float32, name="bias", shape=1, initial_value=tf.zeros(shape=1), trainable=True)
        # define learnable distribution Q
        self.s = tf.Variable(dtype=tf.float32, name='learnable_diag', shape=self.d, initial_value=tf.ones(shape=self.d), trainable=True)
        self.Q = tfd.MultivariateNormalDiag(loc=self.weight, scale_diag=self.s)
        # define prior distribution P    
        self.lamda = tf.Variable(dtype=tf.float32, name="p_lambda", shape=1, initial_value=[0.01], trainable=True) # diagnals of prior distribution P
        self.P = tfd.MultivariateNormalDiag(loc=tf.zeros(self.d), scale_diag=tf.tile(self.lamda, [self.d]))
        self.trainable_variables = [self.weight, self.bias, self.lamda, self.s]
        
    def compute_loss(self, predictions, labels):
        empirical_loss = tf.nn.l2_loss(predictions-labels)
        print(empirical_loss)
        KL_divergence = tfd.kl_divergence(distribution_a=self.Q, distribution_b=self.P) # compute KL(Q||P)
        print(KL_divergence)
        RE_loss = (KL_divergence + \
                   2*tf.math.log(self.b * tf.math.log(self.c/self.lamda)) + \    # could be non since two log.
                   tf.math.log(math.pi ** 2 * self.m/(6*self.delta))) / (self.m-1)
        loss = empirical_loss + tf.math.sqrt(RE_loss/2)
        return loss
        
    def predict(self, inputs):
        self.scores = tf.reduce_sum(tf.multiply(inputs, self.weight)) + self.bias
        return self.scores
    
    def train(self, dataset):
        for x, y in dataset:
            with tf.GradientTape() as tape:
                scores = self.predict(x)
                loss = self.compute_loss(scores, y)
                print(loss)
            gradients = tape.gradient(loss, self.trainable_variables)
            self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

In [110]:
model = Linear(n_samples=n_sample, dimension=dim, b=100, c=0.1, delta=0.025)
model.train(zip(X, y))

tf.Tensor(0.01216387, shape=(), dtype=float32)
tf.Tensor(24974.475, shape=(), dtype=float32)
tf.Tensor([11.247513], shape=(1,), dtype=float32)
[<tf.Tensor: id=3675, shape=(5,), dtype=float32, numpy=
array([ 0.00511297,  0.00029998,  0.00399307, -0.02647259,  0.14611237],
      dtype=float32)>, <tf.Tensor: id=3557, shape=(1,), dtype=float32, numpy=array([-0.15597352], dtype=float32)>, <tf.Tensor: id=3649, shape=(1,), dtype=float32, numpy=array([-0.01952237], dtype=float32)>, <tf.Tensor: id=3676, shape=(5,), dtype=float32, numpy=
array([2.2473712, 2.2473712, 2.2473712, 2.2473712, 2.2473712],
      dtype=float32)>]
tf.Tensor(1.3041699, shape=(), dtype=float32)
tf.Tensor(24862.242, shape=(), dtype=float32)
tf.Tensor([12.514264], shape=(1,), dtype=float32)
[<tf.Tensor: id=3966, shape=(5,), dtype=float32, numpy=
array([-0.19262165,  0.02735564, -0.25638548,  0.01788419, -0.79798895],
      dtype=float32)>, <tf.Tensor: id=3849, shape=(1,), dtype=float32, numpy=array([1.6150355], dtype=float32

tf.Tensor(14065.846, shape=(), dtype=float32)
tf.Tensor([16.638824], shape=(1,), dtype=float32)
[<tf.Tensor: id=8052, shape=(5,), dtype=float32, numpy=
array([ 0.06184553, -0.14137274,  0.39826518, -0.7295828 , -4.624788  ],
      dtype=float32)>, <tf.Tensor: id=7935, shape=(1,), dtype=float32, numpy=array([4.0507994], dtype=float32)>, <tf.Tensor: id=8026, shape=(1,), dtype=float32, numpy=array([-0.0232553], dtype=float32)>, <tf.Tensor: id=8053, shape=(5,), dtype=float32, numpy=
array([2.2391155, 2.2391155, 2.2391155, 2.2391155, 2.2391155],
      dtype=float32)>]
tf.Tensor(0.0066336007, shape=(), dtype=float32)
tf.Tensor(13359.351, shape=(), dtype=float32)
tf.Tensor([8.226719], shape=(1,), dtype=float32)
[<tf.Tensor: id=8279, shape=(5,), dtype=float32, numpy=
array([ 0.01904429, -0.00559716, -0.05576354, -0.05345936,  0.4713308 ],
      dtype=float32)>, <tf.Tensor: id=8162, shape=(1,), dtype=float32, numpy=array([0.11518334], dtype=float32)>, <tf.Tensor: id=8253, shape=(1,), dtype=floa

tf.Tensor(2617.9875, shape=(), dtype=float32)
tf.Tensor([4.9115996], shape=(1,), dtype=float32)
[<tf.Tensor: id=12819, shape=(5,), dtype=float32, numpy=
array([ 0.11524785, -0.07263982, -0.58484197, -0.20232949, -0.3236668 ],
      dtype=float32)>, <tf.Tensor: id=12702, shape=(1,), dtype=float32, numpy=array([1.5887848], dtype=float32)>, <tf.Tensor: id=12793, shape=(1,), dtype=float32, numpy=array([-0.04543997], dtype=float32)>, <tf.Tensor: id=12820, shape=(5,), dtype=float32, numpy=
array([2.1095111, 2.1095111, 2.1095111, 2.1095111, 2.1095111],
      dtype=float32)>]
tf.Tensor(5.9630227, shape=(), dtype=float32)
tf.Tensor(2296.6777, shape=(), dtype=float32)
tf.Tensor([9.382938], shape=(1,), dtype=float32)
[<tf.Tensor: id=13046, shape=(5,), dtype=float32, numpy=
array([-0.01718851, -0.2240777 ,  1.1210394 , -1.5022378 , -1.9645206 ],
      dtype=float32)>, <tf.Tensor: id=12929, shape=(1,), dtype=float32, numpy=array([-3.4534106], dtype=float32)>, <tf.Tensor: id=13020, shape=(1,), dtype

tf.Tensor(55.293854, shape=(), dtype=float32)
tf.Tensor([4.745335], shape=(1,), dtype=float32)
[<tf.Tensor: id=17813, shape=(5,), dtype=float32, numpy=
array([ 0.11774162,  0.01743319, -0.42025957, -0.06874916, -2.2222996 ],
      dtype=float32)>, <tf.Tensor: id=17696, shape=(1,), dtype=float32, numpy=array([2.8762896], dtype=float32)>, <tf.Tensor: id=17787, shape=(1,), dtype=float32, numpy=array([-0.22575037], dtype=float32)>, <tf.Tensor: id=17814, shape=(5,), dtype=float32, numpy=
array([-2.013945, -2.013945, -2.013945, -2.013945, -2.013945],
      dtype=float32)>]
tf.Tensor(0.2819427, shape=(), dtype=float32)
tf.Tensor(55.701725, shape=(), dtype=float32)
tf.Tensor([0.89205325], shape=(1,), dtype=float32)
[<tf.Tensor: id=18040, shape=(5,), dtype=float32, numpy=
array([-0.02179857,  0.03903722,  0.0231983 , -0.09022675, -1.6772839 ],
      dtype=float32)>, <tf.Tensor: id=17923, shape=(1,), dtype=float32, numpy=array([-0.75092304], dtype=float32)>, <tf.Tensor: id=18014, shape=(1,), dty

tf.Tensor(298.5479, shape=(), dtype=float32)
tf.Tensor([1.871979], shape=(1,), dtype=float32)
[<tf.Tensor: id=22580, shape=(5,), dtype=float32, numpy=
array([-0.09355063, -0.12872073, -0.09204894,  0.31589034,  1.0810312 ],
      dtype=float32)>, <tf.Tensor: id=22463, shape=(1,), dtype=float32, numpy=array([-1.1084443], dtype=float32)>, <tf.Tensor: id=22554, shape=(1,), dtype=float32, numpy=array([-0.26113358], dtype=float32)>, <tf.Tensor: id=22581, shape=(5,), dtype=float32, numpy=
array([-2.0176346, -2.0176346, -2.0176346, -2.0176346, -2.0176346],
      dtype=float32)>]
tf.Tensor(0.30304167, shape=(), dtype=float32)
tf.Tensor(267.2526, shape=(), dtype=float32)
tf.Tensor([1.495543], shape=(1,), dtype=float32)
[<tf.Tensor: id=22807, shape=(5,), dtype=float32, numpy=
array([-0.01132578, -0.01475578, -0.19360563,  0.5402219 ,  2.7811341 ],
      dtype=float32)>, <tf.Tensor: id=22690, shape=(1,), dtype=float32, numpy=array([-0.77851355], dtype=float32)>, <tf.Tensor: id=22781, shape=(1,), 

In [81]:
tf.multiply([1, 2, 3, 4, 5], [2,2,2,2,3])

<tf.Tensor: id=1485, shape=(5,), dtype=int32, numpy=array([ 2,  4,  6,  8, 15], dtype=int32)>