## Univariate Distributions

In [2]:
import tensorflow as tf
import tensorflow_probability as tfp



In [3]:
tfd = tfp.distributions

In [10]:
tfd

<module 'tensorflow_probability.python.distributions' from 'C:\\Users\\omkar.karve\\.conda\\envs\\motion\\lib\\site-packages\\tensorflow_probability\\python\\distributions\\__init__.py'>

In [4]:
normal = tfd.Normal(loc=0., scale=1.)


In [5]:
normal.sample(3)

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([ 0.08000929, -1.5197878 , -0.6406692 ], dtype=float32)>

In [6]:
normal.prob(0.5)

<tf.Tensor: shape=(), dtype=float32, numpy=0.35206532>

In [7]:
normal.log_prob(0.5)

<tf.Tensor: shape=(), dtype=float32, numpy=-1.0439385>

In [10]:
import math
math.log(0.35203532)

-1.0440237675087614

In [11]:
bern = tfd.Bernoulli(logits=0.847)
print(bern)

tfp.distributions.Bernoulli("Bernoulli", batch_shape=[], event_shape=[], dtype=int32)


In [12]:
bern.sample(3)

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 1, 1])>

In [13]:
bern.prob(1)

<tf.Tensor: shape=(), dtype=float32, numpy=0.69993746>

In [14]:
batched_bern = tfd.Bernoulli(probs=[0.4,0.5])
print(batched_bern)

tfp.distributions.Bernoulli("Bernoulli", batch_shape=[2], event_shape=[], dtype=int32)


In [15]:
batched_bern.sample(3)

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[0, 0],
       [0, 0],
       [0, 1]])>

In [17]:
batched_bern.prob([1,1])

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([0.4, 0.5], dtype=float32)>

## Multivariate Distributions

In [11]:
mv_normal = tfd.MultivariateNormalDiag(loc=[-1,0.5], scale_diag=[1.,1.5])
print(mv_normal)

tfp.distributions.MultivariateNormalDiag("MultivariateNormalDiag", batch_shape=[], event_shape=[2], dtype=float32)


In [12]:
mv_normal.event_shape

TensorShape([2])

In [14]:

mv_normal.sample(3)

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[-1.615947  , -1.3021022 ],
       [ 0.32211852, -0.11452258],
       [-1.4408972 ,  1.2506003 ]], dtype=float32)>

In [17]:
batched_normal = tfd.Normal(loc=[-1,0.5], scale=[1.,1.5])
print(batched_normal)

tfp.distributions.Normal("Normal", batch_shape=[2], event_shape=[], dtype=float32)


In [16]:
batched_normal.sample(3)

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[-0.2397803 ,  2.5428483 ],
       [ 0.85000706,  2.5582943 ],
       [ 0.33503187,  1.3704139 ]], dtype=float32)>

Batched Multivariate diag distribution is a normal distribution of 2 independent variables (event shape is 2)
Hence 3 samples are showing probabilities of 2 independent events per sample - 3X2
Whereas the normal batched sample is just two different probabilities of the same event with different values
Event shape of a normal distribution is empty.

In [19]:
mv_normal.log_prob([-0.2, 1.8])

<tf.Tensor: shape=(), dtype=float32, numpy=-2.9388978>

In [20]:
batched_normal.log_prob([-.2, 1.8])


<tf.Tensor: shape=(2,), dtype=float32, numpy=array([-1.2389386, -1.699959 ], dtype=float32)>

When log prob is calculated for a multivariate distribution that is passed a 1x2 array, it calculates 
log probability of one instance of getting the 1X2 random variable hence log prob is a single value 
Whereas log prob of a univariate normal distribution that is passed a 1x2 array outputs
two different log probabilits of two different instances within that single distribution hence the 
output is a 1x2 array

In [22]:
batched_mv_normal = tfd.MultivariateNormalDiag(
                    loc=[[-1.,0.5],[2.,0.],[-0.5,1.5]],
                    scale_diag=[[1.,1.5],[2.,0.5],[1.,1.]])

In [24]:
print(batched_mv_normal)
# Here loc is a 3x2 array, there are 3 batches and each batch contains one instance of a 2D random variable


tfp.distributions.MultivariateNormalDiag("MultivariateNormalDiag", batch_shape=[3], event_shape=[2], dtype=float32)


In [26]:
batched_mv_normal.sample(4)

<tf.Tensor: shape=(4, 3, 2), dtype=float32, numpy=
array([[[-2.7421708 , -0.4984879 ],
        [ 1.1081645 , -0.12068057],
        [-2.2804341 ,  1.2456772 ]],

       [[-1.2511964 ,  0.73182046],
        [ 1.1839368 , -0.9681898 ],
        [-1.0132573 , -0.8188286 ]],

       [[-0.27510667,  0.21074677],
        [ 2.383969  , -0.47781435],
        [-1.5911664 ,  1.9778253 ]],

       [[-1.0960193 ,  1.4839597 ],
        [ 3.4861307 , -0.33728957],
        [-2.7676692 ,  1.9263558 ]]], dtype=float32)>

In [28]:
batched_mv_normal = tfd.MultivariateNormalDiag(
    loc=[[0.3, 0.8, 1.1], [2.3, -0.3, -1.]], 
    scale_diag=[[1.5, 1., 0.4], [2.5, 1.5, 0.5]])

In [29]:
print(batched_mv_normal)

tfp.distributions.MultivariateNormalDiag("MultivariateNormalDiag", batch_shape=[2], event_shape=[3], dtype=float32)


In [30]:
batched_mv_normal.log_prob([0., -1., 1.])

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([ -3.9172401, -11.917513 ], dtype=float32)>

In [34]:
batched_mv_normal.log_prob(batched_mv_normal.sample(2))

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[-5.9249983, -5.6765656],
       [-3.9769206, -5.1262293]], dtype=float32)>

batched_normal_multivariate sample size => samplesize x batchsize x eventsize

## Independent distributions

In [5]:
batched_norm = tfd.Normal(loc=[-1., 0.5], scale=[1., 1.5])

independent_norm = tfd.Independent(batched_norm, reinterpreted_batch_ndims=1)
print(independent_norm)

tfp.distributions.Independent("IndependentNormal", batch_shape=[], event_shape=[2], dtype=float32)


In [6]:
batched_norm.log_prob([-0.2,1.8])

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([-1.2389386, -1.699959 ], dtype=float32)>

In [7]:
independent_norm.log_prob([-0.2,1.8])

<tf.Tensor: shape=(), dtype=float32, numpy=-2.9388976>

In [10]:
batched_norm = tfd.Normal(
                           loc=[[-1,0.5],[0.,1.],[0.3,-0.1]],
                           scale=[[1.,1.5],[0.2,0.8],[2.,1.]] )
print(batched_norm)

tfp.distributions.Normal("Normal", batch_shape=[3, 2], event_shape=[], dtype=float32)


In [11]:
independent_norm = tfd.Independent(batched_norm, reinterpreted_batch_ndims=1)
print(independent_norm)

tfp.distributions.Independent("IndependentNormal", batch_shape=[3], event_shape=[2], dtype=float32)


In [12]:
independent_norm = tfd.Independent(batched_norm, reinterpreted_batch_ndims=2)
print(independent_norm)

tfp.distributions.Independent("IndependentNormal", batch_shape=[], event_shape=[3, 2], dtype=float32)


### Trainable distributions

In [5]:
normal = tfd.Normal(loc=0.,scale=1.)
normal.trainable_variables

()

In [6]:
normal = tfd.Normal(loc=tf.Variable(0.,name='loc'),scale=1.)
normal.trainable_variables

(<tf.Variable 'loc:0' shape=() dtype=float32, numpy=0.0>,)

In [9]:
# Learning principle is maximum likelihood. Finding parameters that maximize the likelihood is the same as finding params
# that minimize the negative log likelihood.
# reason we are using reduce_mean instead of sum is to prevent the loss scaling with the size of the data
# this doesnt change the optimium solution
def nll(x_train):
    return -tf.reduce_mean(normal.log_prob(x_train))

In [11]:
@tf.function
def get_loss_and_grads(x_train):
    with tf.GradientTape() as tape:
        tape.watch(normal.trainable_variables)
        loss = nll(x_train)
    grads = tape.gradient(loss,normal.trainable_variables) #this is where the heav computation happens
    return loss, grads

In [13]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.05)
num_steps=100 #some random value
for _ in range(num_steps):
    loss,grads = get_loss_and_grads(x_samples)
    optimizer.apply_gradients(zip(grads,normal.trainable_variables))

NameError: name 'x_samples' is not defined