# Distributions
This notebook serves two purposes:
* Explain distributions in pytorch
* Explain why we need distributions

In [1]:
import torch
import torch.nn as nn
from torch.distributions import Normal, MultivariateNormal

In [2]:
mean = torch.tensor(1.0)
norm_dist = Normal(mean, 1.0)

In [3]:
print(norm_dist.sample())

tensor(1.2979)


In [None]:
packing many means is actualy interpreted as packing the mean and scale together

In [19]:
means = torch.tensor([1.0, 2.0])
norm_dist = Normal(*means,1.0)
print(norm_dist.sample())


tensor(1.1837)


We can pass a tensor with multiple means into a normal distribution, and a sample would provide a tensor with sampled values for each mean

In [30]:
means = torch.tensor([0.5,0.2,0.7,0.3,0.4])
norm_dist5 = Normal(means, 1.0)
print (norm_dist5.sample())

tensor([1.5203, 0.3677, 1.0054, 0.0198, 0.3305])


In [31]:
(norm_dist5.batch_shape, norm_dist5.event_shape)

(torch.Size([5]), torch.Size([]))

In [32]:
multi_var = MultivariateNormal(means,torch.eye(len(means)))
(multi_var.batch_shape,multi_var.event_shape)

(torch.Size([]), torch.Size([5]))

In [33]:
sample = multi_var.sample((3,1))
print(sample.numpy())

[[[-0.44347823 -1.4636948   0.2527554   0.45634598  0.65050125]]

 [[-0.0171302  -2.365741    0.62106913  0.8216393   0.6089853 ]]

 [[ 0.89379555 -0.630736    3.0626993   1.6567123   0.41341266]]]


In [34]:
multi_var.log_prob(sample)

tensor([[-6.5673],
        [-8.1809],
        [-8.7289]])

In [45]:
multi_var = MultivariateNormal(means,torch.eye(len(means)))
(multi_var.batch_shape,multi_var.event_shape)
sample = multi_var.sample((3,1))
print(sample.numpy())
multi_var = MultivariateNormal(means,0.000001*torch.eye(len(means)))
(multi_var.batch_shape,multi_var.event_shape)
sample = multi_var.sample((3,1))
print(sample.numpy())
print(means.numpy())
print(multi_var.log_prob(means.unsqueeze(0)))


[[[-0.4003948  -0.19401081  0.69256896  0.03811589  0.59478474]]

 [[-0.0298236   0.46084344  0.47241724 -0.7908543   1.7463951 ]]

 [[ 1.3908324   1.7529848   0.35246193  1.328706    0.8006342 ]]]
[[[0.49947804 0.19992974 0.7007838  0.2987778  0.400642  ]]

 [[0.50044644 0.20053618 0.70028925 0.3001508  0.39983553]]

 [[0.5004401  0.19892773 0.69843525 0.2995331  0.40091994]]]
[0.5 0.2 0.7 0.3 0.4]
tensor([29.9441])


# Creating multiple distributions

In [89]:
type = torch.float

cov_var = torch.full(size=(5,),fill_value=0.5, dtype=type)
cov_var = torch.tensor([3,3,4,5,6], dtype=type)
print("variance per dimension :", cov_var)
cov_mat = torch.stack((torch.diag(cov_var),torch.diag(cov_var-1.0)))
print("covariance matrix :\n", cov_mat)

variance per dimension : tensor([3., 3., 4., 5., 6.])
covariance matrix :
 tensor([[[3., 0., 0., 0., 0.],
         [0., 3., 0., 0., 0.],
         [0., 0., 4., 0., 0.],
         [0., 0., 0., 5., 0.],
         [0., 0., 0., 0., 6.]],

        [[2., 0., 0., 0., 0.],
         [0., 2., 0., 0., 0.],
         [0., 0., 3., 0., 0.],
         [0., 0., 0., 4., 0.],
         [0., 0., 0., 0., 5.]]])


In [90]:

means = torch.tensor([[0.5, 0.2, 0.7, 0.3, 0.4],
                     [3, 3, 4, 5, 6]],dtype=type)

multi_var = MultivariateNormal(means,cov_mat)
(multi_var.batch_shape, multi_var.event_shape)
x = multi_var.sample()
print(x)
test = torch.tensor([[0.5, 0.2, 0.7, 0.3, 0.4],
                     [3, 3, 4, 5, 6]],dtype=type)
test_2 = test-1e-1
a = multi_var.log_prob(test)
b = multi_var.log_prob(test_2)
c = torch.exp(a-b)
print (c)

tensor([[ 1.0921,  1.3728, -0.3358,  1.4643, -1.0745],
        [ 3.2573,  2.8846,  2.6515,  6.5360,  5.8733]])
tensor([1.0064, 1.0090])


In [35]:
multi_var.log_prob(means)

tensor(-4.5947)

We can also compute the log probability that a specific tensor has in comparison to the distribution

In [32]:
x = norm_dist5.sample()
print("x ={}, log_prob = {}".format(x,norm_dist5.log_prob(x)))
print("means={},\n\t log_prob = {},\n\t log_log = {}".format(means, norm_dist5.log_prob(means), torch.log(-norm_dist5.log_prob(means))))
m_log_prob = norm_dist5.log_prob(means)
for i in range(5):
    x = norm_dist5.sample()
    x_log_prb = norm_dist5.log_prob(x)
    x_log_log = x_log_prb/m_log_prob
    print("x={}\n\tlog_prob = {}\n\t prob/mean prob = {}".format(x, x_log_prb, x_log_log))

x =tensor([0.2889, 3.4804, 4.2946, 3.0338, 5.3648]), log_prob = tensor([-1.1718, -2.0148, -1.7569, -1.3857, -0.9855])
means=tensor([1., 2., 3., 4., 5.]),
	 log_prob = tensor([-0.9189, -0.9189, -0.9189, -0.9189, -0.9189]),
	 log_log = tensor([-0.0845, -0.0845, -0.0845, -0.0845, -0.0845])
x=tensor([-0.0431,  1.3103,  2.6622,  4.3412,  5.4286])
	log_prob = tensor([-1.4630, -1.1568, -0.9760, -0.9771, -1.0108])
	 prob/mean prob = tensor([1.5920, 1.2588, 1.0621, 1.0633, 1.0999])
x=tensor([1.3763, 2.5513, 4.6796, 2.7699, 5.2146])
	log_prob = tensor([-0.9897, -1.0709, -2.3295, -1.6755, -0.9420])
	 prob/mean prob = tensor([1.0770, 1.1654, 2.5350, 1.8233, 1.0251])
x=tensor([1.6428, 1.7658, 3.4495, 4.2267, 5.3016])
	log_prob = tensor([-1.1256, -0.9464, -1.0200, -0.9446, -0.9644])
	 prob/mean prob = tensor([1.2248, 1.0299, 1.1100, 1.0280, 1.0495])
x=tensor([0.6541, 2.1856, 2.1194, 3.7145, 5.9594])
	log_prob = tensor([-0.9788, -0.9362, -1.3067, -0.9597, -1.3791])
	 prob/mean prob = tensor([1.0651, 

Let's try to understand log_prob a bit better

The numpy. meshgrid function is used to create a rectangular grid out of two given one-dimensional arrays representing the Cartesian indexing or Matrix indexing. Meshgrid function is somewhat inspired from MATLAB. ... meshgrid function returns two 2-Dimensional arrays representing the X and Y coordinates of all the points.

To sample across more than one action dimension therefore, we need the multi-variate Normal

In [135]:
means = torch.tensor([0.0,0.0,0.0,-0.1,0.1])


nums = 10000
for cov_mat_mul in torch.arange(0.1,1.5,step=0.2):
    mv_normal = MultivariateNormal(means,torch.eye(5)*cov_mat_mul)
    samples= torch.zeros((nums,5))
    for i in range(nums):
        samples[i] = mv_normal.sample()
        samples = torch.tensor(samples)
    samples_mean = samples.mean(0)
    samples_max, _ = samples.max(0)
    samples_min, _ = samples.min(0)

    print("multiplier: ",cov_mat_mul,"\nmean :\t", samples_mean,"\nmax\t:",samples_max,"\nmin\t:",samples_min)


KeyboardInterrupt: 

converting states to probabilities. What does the function from PPO utils do ??

```python
# convert states to probability, passing through the policy
def states_to_prob(policy, states):
    states = torch.stack(states)
    policy_input = states.view(-1,*states.shape[-3:])
    return policy(policy_input).view(states.shape[:-3])
```


In [77]:
states = tuple((torch.tensor([1,1,1,1,1,1]),
                torch.tensor([2,2,2,2,2,2]),
                torch.tensor([3,3,3,3,3,3])))
                      
               
print("states: \n", states)
states = torch.stack(states)
print("after stack: \n", states)
print(states.shape)

states: 
 (tensor([1, 1, 1, 1, 1, 1]), tensor([2, 2, 2, 2, 2, 2]), tensor([3, 3, 3, 3, 3, 3]))
after stack: 
 tensor([[1, 1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2, 2],
        [3, 3, 3, 3, 3, 3]])
torch.Size([3, 6])


the ```states.view(-1,*states.shape[-3:])``` operation just adds another dimension

In [76]:
s_view = states.view(-1,*states.shape[-3:])
print(s_view)
print(s_view.shape)

tensor([[[1, 1, 1, 1, 1, 1],
         [2, 2, 2, 2, 2, 2],
         [3, 3, 3, 3, 3, 3]]])
torch.Size([1, 3, 6])


In [78]:
s_view2 = states.unsqueeze(0)
print(s_view2)
print(s_view2.shape)

tensor([[[1, 1, 1, 1, 1, 1],
         [2, 2, 2, 2, 2, 2],
         [3, 3, 3, 3, 3, 3]]])
torch.Size([1, 3, 6])


In [79]:
print(s_view == s_view2)

tensor([[[True, True, True, True, True, True],
         [True, True, True, True, True, True],
         [True, True, True, True, True, True]]])


In [99]:
import torch
import torch.nn as nn

a = torch.tensor([2,2,2,2])
b = a.unsqueeze(0)
print(a,b)
c = a.unsqueeze(0).repeat(3,1)
print(c)

tensor([2, 2, 2, 2]) tensor([[2, 2, 2, 2]])
tensor([[2, 2, 2, 2],
        [2, 2, 2, 2],
        [2, 2, 2, 2]])


In [145]:
import torch
import torch.nn
import numpy as np 
from torch.distributions import Normal

mean = torch.tensor([3.0])

dist = Normal(mean,0.5)
print (dist.sample())
print (dist.log_prob(mean))
print (np.exp(dist.log_prob(mean)))

for x in torch.arange(0.0,6.0,step=0.5):
    print("num: ", x, " log prob: ", dist.log_prob(x), " prob: ", np.exp(dist.log_prob(x)))

tensor([2.8088])
tensor([-0.2258])
tensor([0.7979])
num:  tensor(0.)  log prob:  tensor([-18.2258])  prob:  tensor([1.2152e-08])
num:  tensor(0.5000)  log prob:  tensor([-12.7258])  prob:  tensor([2.9734e-06])
num:  tensor(1.)  log prob:  tensor([-8.2258])  prob:  tensor([0.0003])
num:  tensor(1.5000)  log prob:  tensor([-4.7258])  prob:  tensor([0.0089])
num:  tensor(2.)  log prob:  tensor([-2.2258])  prob:  tensor([0.1080])
num:  tensor(2.5000)  log prob:  tensor([-0.7258])  prob:  tensor([0.4839])
num:  tensor(3.)  log prob:  tensor([-0.2258])  prob:  tensor([0.7979])
num:  tensor(3.5000)  log prob:  tensor([-0.7258])  prob:  tensor([0.4839])
num:  tensor(4.)  log prob:  tensor([-2.2258])  prob:  tensor([0.1080])
num:  tensor(4.5000)  log prob:  tensor([-4.7258])  prob:  tensor([0.0089])
num:  tensor(5.)  log prob:  tensor([-8.2258])  prob:  tensor([0.0003])
num:  tensor(5.5000)  log prob:  tensor([-12.7258])  prob:  tensor([2.9734e-06])
