In [1]:
import numpy as np
import math
import torch
from torch.distributions.multinomial import Multinomial

### Multinomial Distribution

In the aforementioned example, we considered a dataset with only two classes - celebrities and non-celebrities. Now let us say we have a more granular dataset as follows:
<table>
    <tr>
        <td> Index <td> Class <td> Percentage <td> Probability
    <tr>
    <tr>
        <td> 1 <td> Albert Einstein <td> 5% <td> 0.05
    <tr>
    <tr>
        <td> 2 <td> Marie Curie <td> 7% <td> 0.07
    <tr>
    <tr>
        <td> 3 <td> Gauss <td> 3% <td> 0.03
    <tr>
    <tr>
        <td> 4 <td> Others <td> 85 % <td> 0.85
    <tr>
<table>

    
Let us perform an experiment where we select 20 photos from the dataset and want to find out the probability that class1 occurs 4 times, class2 occurs 2 times, class3 occurs 3 times and class 4 occurs the remaining 11 times. We can do so using the multinomial distribution, which is an extension of the binomial distribution from 2 variables to m variables.
    
Formally, let $C_{1}$, $C_{2}$, ..., $C_{m}$ be $m$ classes with probabilities $p_{1}$, $p_{2}$, ..., $p_{m}$. Let $X_{1}$, $X_{2}$, ..., $X_{m}$ be the corresponding random variables in a set of $n$ trials.
    
Then, the multinomial probability function, depicting probability of $C_{1}$ being selected $k_{1}$ times, $C_{2}$ being selected $k_{2}$ times, $C_{m}$ being selected $k_{m}$ times is 
   $$ P(X_{1}=k_{1}, X_{2}=k_{2}, .., X_{m}=k_{m}) = \frac{n!}{k_{1}!k_{2}!...k_{m}!} p_{1}^{k_{1}}p_{2}^{k_{2}}...p_{m}^{k_{m}} $$
    
where $ \sum_{i=1}^m k_i = n$ and $ \sum_{i=1}^m p_i = 1$
    
Note that for $m=2$, this becomes the binomial distribution. Now, let us implement this in PyTorch

In [2]:
num_trials = 20
p = torch.tensor([0.05, 0.07, 0.03, 0.85], dtype=torch.float)
k = torch.tensor([4, 2, 1, 13], dtype=torch.float)

num_samples = 100000 # Number of experiment runs

# Create the multinomial distribution
multinomial_dist = Multinomial(num_trials, probs=p)

In [3]:
def formula(k, n, p):
    f = math.factorial
    result = f(n)
    for pi, ki in zip(p, k):
        result *= (pi ** ki) / f(ki)
    return torch.log(result)

log_prob = multinomial_dist.log_prob(k)

formula_log_prob = formula(k, num_trials, p)

assert torch.isclose(log_prob, formula_log_prob, atol=1e-4)

In [4]:
# Now, we draw <num_samples> samples from the distribution. 
# Each element of the samples array represent the number of successes in that experiment.
samples = multinomial_dist.sample(torch.Size([num_samples]))

print("Number of samples: {}".format(len(samples)))

Number of samples: 100000


In [5]:
# The mean of the distribution from Pytorch
formula_mean = multinomial_dist.mean
print("Mean: {}".format(formula_mean))

# Our sample mean which denotes the average number of successes
sample_mean = samples.mean(axis=0)
print("Sample mean: {}".format(sample_mean))

# As expected, the two means approximately match and are equal to [num_trials * p1, num_trials * p2, num_trials * pm]


# The variance of the distribution from Pytorch
formula_var = multinomial_dist.variance
print("Variance: {}".format(formula_var))

# Our sample variance
sample_var = multinomial_dist.sample([num_samples]).var(axis=0)
print("Sample variance: {}".format(sample_var))

#As expected, the two variances approximately match.
assert torch.allclose(formula_var, sample_var, atol=1e-1)

Mean: tensor([ 1.0000,  1.4000,  0.6000, 17.0000])
Sample mean: tensor([ 1.0020,  1.4002,  0.6014, 16.9964])
Variance: tensor([0.9500, 1.3020, 0.5820, 2.5500])
Sample variance: tensor([0.9444, 1.3104, 0.5847, 2.5478])


In [6]:
def find_probability(X):
    assert sum(X) == num_trials
    """
    param: X = [x1, x2, x3, x4] where xi denotes number of occurrences of class i 
    """
    matches = samples == torch.tensor(X)
    return torch.sum(torch.sum(matches, axis=1) == len(X)) / float(len(samples))

In [7]:
# Now let us find the probability of finding 4 class1, 2 class2, 3 class3 and 11 class4
X = [4, 2, 1, 13]
find_probability(X)

tensor(0.0010)

In [8]:
formula(X, num_trials, p).exp()
# Observe that the value is approximately equal to the value derived from the sample set

tensor(0.0009)