In [1]:
import numpy as np
import math
import torch
from torch.distributions.multinomial import Multinomial

### Multinomial Distribution

In the aforementioned example, we considered a dataset with only two classes - celebrities and non-celebrities. Now let us say we have a more granular dataset as follows:
<table>
    <tr>
        <td> Index <td> Class <td> Percentage <td> Probability
    <tr>
    <tr>
        <td> 1 <td> Albert Einstein <td> 5% <td> 0.05
    <tr>
    <tr>
        <td> 2 <td> Marie Curie <td> 7% <td> 0.07
    <tr>
    <tr>
        <td> 3 <td> Gauss <td> 3% <td> 0.03
    <tr>
    <tr>
        <td> 4 <td> Others <td> 85 % <td> 0.85
    <tr>
<table>

    
Let us perform an experiment where we select 20 photos from the dataset and want to find out the probability that class1 occurs 4 times, class2 occurs 2 times, class3 occurs 3 times and class 4 occurs the remaining 11 times. We can do so using the multinomial distribution, which is an extension of the binomial distribution from 2 variables to m variables.
    
Formally, let $C_{1}$, $C_{2}$, ..., $C_{m}$ be $m$ classes with probabilities $p_{1}$, $p_{2}$, ..., $p_{m}$. Let $X_{1}$, $X_{2}$, ..., $X_{m}$ be the corresponding random variables in a set of $n$ trials.
    
Then, the multinomial probability function, depicting probability of $C_{1}$ being selected $k_{1}$ times, $C_{2}$ being selected $k_{2}$ times, $C_{m}$ being selected $k_{m}$ times is 
   $$ P(X_{1}=k_{1}, X_{2}=k_{2}, .., X_{m}=k_{m}) = \frac{n!}{k_{1}!k_{2}!...k_{m}!} p_{1}^{k_{1}}p_{2}^{k_{2}}...p_{m}^{k_{m}} $$
    
where $ \sum_{i=1}^m k_i = n$ and $ \sum_{i=1}^m p_i = 1$
    
Note that for $m=2$, this becomes the binomial distribution. Now, let us implement this in PyTorch

In [2]:
# Set the parameters of the distribution
num_trials = 20
p = torch.tensor([0.05, 0.07, 0.03, 0.85], dtype=torch.float)

# Instantiate the multinomial distribution
multinomial_dist = Multinomial(num_trials, probs=p)

In [3]:
# Instantiate single point test dataset
X = torch.tensor([4, 2, 1, 13], dtype=torch.float)

# Function to evaluate log prob using math formula
def raw_eval(X, n, p):
    f = math.factorial
    result = f(n)
    for pi, xi in zip(p, X):
        result *= (pi ** xi) / f(xi)
    return torch.log(result)

log_prob = multinomial_dist.log_prob(X)
print("Log Prob: {}".format(log_prob))

raw_eval_log_prob = raw_eval(X, num_trials, p)
print("Raw eval Log Prob: {}".format(raw_eval_log_prob))

assert torch.isclose(log_prob, raw_eval_log_prob, atol=1e-4)

Log Prob: -7.0084991455078125
Raw eval Log Prob: -7.008501052856445


In [4]:
# Number of experiment runs
num_samples = 100000 

# Draw Samples. Each element of the samples array represent the number of successes in that experiment.
samples = multinomial_dist.sample(torch.Size([num_samples]))

In [5]:
# The mean obtained from the samples. Denotes the average number of successes
sample_mean = samples.mean(axis=0)
print("Sample mean: {}".format(sample_mean))

# The mean of the distribution from Pytorch
dist_mean = multinomial_dist.mean
print("Dist Mean: {}".format(dist_mean))

# As expected, the two means approximately match and are equal to [num_trials * p1, num_trials * p2, num_trials * pm]
assert torch.allclose(sample_mean, dist_mean, atol=1e-1)

# The variance obtained from the samples
sample_var = multinomial_dist.sample([num_samples]).var(axis=0)
print("Sample variance: {}".format(sample_var))

# The variance of the distribution from Pytorch
dist_var = multinomial_dist.variance
print("Dist Variance: {}".format(dist_var))

# As expected, the two variances approximately match.
assert torch.allclose(sample_var, dist_var, atol=1e-1)

Sample mean: tensor([ 1.0003,  1.3934,  0.6036, 17.0026])
Dist Mean: tensor([ 1.0000,  1.4000,  0.6000, 17.0000])
Sample variance: tensor([0.9525, 1.3029, 0.5839, 2.5657])
Dist Variance: tensor([0.9500, 1.3020, 0.5820, 2.5500])
