In [None]:
'''
 * Copyright (c) 2008 Radhamadhab Dalai
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
'''

## Probability Distributions

In this notebook, we describe some widely used probability distributions.

---

### A.1. Normal Distribution, $N_p(\theta, \Sigma)$

$$
\theta \in \mathbb{R}^p \quad \text{and} \quad \Sigma \text{ is a } (p \times p) \text{ symmetric positive definite matrix.}
$$

$$
f(x \mid \theta, \Sigma) = (\det \Sigma)^{-1/2}\,(2\pi)^{-p/2}\, \exp\left(-\frac{(x-\theta)^T\Sigma^{-1}(x-\theta)}{2}\right)
$$

$$
E_{\theta, \Sigma}[X] = \theta \quad \text{and} \quad E_{\theta, \Sigma}\left[(X-\theta)(X-\theta)^T\right] = \Sigma.
$$

> **Note:** When $\Sigma$ is not positive definite, the $N_p(\theta,\Sigma)$ distribution does not have a density with respect to the Lebesgue measure on $\mathbb{R}^p$. For $p=1$, the log-normal distribution is defined as the distribution of $e^X$ when $X \sim N(\theta, \sigma^2)$.

---

## A.2. Gamma Distribution, $G_{\alpha}(\alpha, \beta)$

$$
f(x \mid \alpha, \beta) = \frac{\beta^\alpha}{\Gamma(\alpha)}\, x^{\alpha-1}\,e^{-\beta x}\, I_{[0,\infty)}(x)
$$

$$
E_{\alpha,\beta}[X] = \frac{\alpha}{\beta} \quad \text{and} \quad \operatorname{Var}_{\alpha,\beta}(X) = \frac{\alpha}{\beta^2}.
$$

> **Note:** Particular cases of the Gamma distribution include:
>
> - **Erlang Distribution:** $G_{\alpha}(\alpha, 1)$
> - **Exponential Distribution:** $G_1(\beta)$ (denoted as $\text{Exp}(\beta)$)
> - **Chi-squared Distribution:** $G_{\nu/2}(1/2)$ (denoted as $\chi^2_{\nu}$)
>
> *Also, note that sometimes the parameterization may vary (for instance, one might encounter $G_{\alpha}(1/\beta)$).*

---

## A.3. Beta Distribution, $B_{\alpha}(\alpha, \beta)$

$$
f(x \mid \alpha, \beta) = \frac{x^{\alpha-1}(1-x)^{\beta-1}}{B(\alpha,\beta)}\, I_{(0,1)}(x)
$$

Here, $B(\alpha,\beta)$ is the Beta function.


In [1]:
import math

def normal_pdf(x, theta, sigma):
    """
    Compute the probability density function (PDF) of a univariate normal distribution.
    
    Parameters:
      x     : Point at which PDF is evaluated.
      theta : Mean of the distribution.
      sigma : Standard deviation of the distribution.
      
    Returns:
      PDF value at x.
    """
    denominator = math.sqrt(2 * math.pi) * sigma
    exponent = -0.5 * ((x - theta) / sigma) ** 2
    return (1 / denominator) * math.exp(exponent)

def gamma_pdf(x, alpha, beta):
    """
    Compute the probability density function (PDF) of the Gamma distribution.
    
    Parameters:
      x     : Point at which PDF is evaluated. x must be non-negative.
      alpha : Shape parameter.
      beta  : Rate parameter.
      
    Returns:
      PDF value at x. Returns 0 if x is negative.
    """
    if x < 0:
        return 0
    numerator = beta ** alpha * x ** (alpha - 1) * math.exp(-beta * x)
    denominator = math.gamma(alpha)  # gamma(alpha) computes the Gamma function at alpha.
    return numerator / denominator

def beta_pdf(x, alpha, beta):
    """
    Compute the probability density function (PDF) of the Beta distribution.
    
    Parameters:
      x     : Point at which PDF is evaluated. x must be between 0 and 1.
      alpha : First shape parameter.
      beta  : Second shape parameter.
      
    Returns:
      PDF value at x. Returns 0 if x is not in (0, 1).
    """
    # The Beta function B(alpha, beta) can be computed via Gamma functions:
    # B(alpha, beta) = gamma(alpha) * gamma(beta) / gamma(alpha + beta)
    if x <= 0 or x >= 1:
        return 0
    B = math.gamma(alpha) * math.gamma(beta) / math.gamma(alpha + beta)
    return (x ** (alpha - 1) * (1 - x) ** (beta - 1)) / B

if __name__ == "__main__":
    # Test values
    x_normal = 0.5
    theta = 0
    sigma = 1
    print("Normal PDF at x =", x_normal, "with theta =", theta, "and sigma =", sigma, "is:", 
          normal_pdf(x_normal, theta, sigma))
    
    x_gamma = 0.5
    alpha_gamma = 2
    beta_gamma = 1
    print("Gamma PDF at x =", x_gamma, "with alpha =", alpha_gamma, "and beta =", beta_gamma, "is:",
          gamma_pdf(x_gamma, alpha_gamma, beta_gamma))
    
    x_beta = 0.5
    alpha_beta = 2
    beta_beta = 2
    print("Beta PDF at x =", x_beta, "with alpha =", alpha_beta, "and beta =", beta_beta, "is:",
          beta_pdf(x_beta, alpha_beta, beta_beta))


Normal PDF at x = 0.5 with theta = 0 and sigma = 1 is: 0.3520653267642995
Gamma PDF at x = 0.5 with alpha = 2 and beta = 1 is: 0.3032653298563167
Beta PDF at x = 0.5 with alpha = 2 and beta = 2 is: 1.5


## Probability Distributions

We recall here the density and the two first moments of most of the distributions used in this book. An exhaustive review of probability distributions is provided by Johnson and Kotz (1972) or the more recent Johnson and Hoeting (2003), Johnson et al. (1994, 1995). The densities are given with respect to Lebesgue or counting measure depending on the context.

### A.1. Normal Distribution, $N_p(\theta, \Sigma)$

($\theta \in \mathbb{R}^p$ and $\Sigma$ is a $p \times p$ symmetric positive definite matrix.)

$$
f(x|\theta, \Sigma) = (\det \Sigma)^{-1/2}(2\pi)^{-p/2} e^{-(x-\theta)^T\Sigma^{-1}(x-\theta)/2}.
$$

$$
E_{\theta, \Sigma}[X] = \theta \quad \text{and} \quad E_{\theta, \Sigma}[(X-\theta)(X-\theta)^T] = \Sigma.
$$

When $\Sigma$ is not positive definite, the $N_p(\theta, \Sigma)$ distribution has no density with respect to Lebesgue measure on $\mathbb{R}^p$. For $p=1$, the log-normal distribution is defined as the distribution of $e^X$ when $X \sim N(\theta, \sigma^2)$.

### A.2. Gamma Distribution, $G_{\alpha}(\alpha, \beta)$

($\alpha, \beta > 0$)

$$
f(x|\alpha, \beta) = \frac{\beta^\alpha}{\Gamma(\alpha)} x^{\alpha-1} e^{-\beta x} I_{[0,\infty)}(x).
$$

$$
E_{\alpha, \beta}[X] = \frac{\alpha}{\beta} \quad \text{and} \quad \operatorname{var}_{\alpha, \beta}(X) = \frac{\alpha}{\beta^2}.
$$

Particular cases of the Gamma distribution are the Erlang distribution, $G_{\alpha}(n,1)$, the exponential distribution, $G_{\alpha}(1, \beta)$ (denoted by $\operatorname{Exp}(\beta)$), and the chi-squared distribution, $G_{\alpha}(1/2,1/2)$ (denoted by $\chi^2$). (Note also that the opposite convention is sometimes adopted for the parameter, namely that $G_{\alpha}(\beta)$ may also be noted as $G_{\alpha}(1/\beta)$. See, e.g., Berger 1985.)

### A.3. Beta Distribution, $B_{\alpha}(\alpha, \beta)$

($\alpha, \beta > 0$)

$$
f(x|\alpha, \beta) = \frac{x^{\alpha-1}(1-x)^{\beta-1}}{B(\alpha,\beta)} I_{(0,1)}(x),
$$

where

$$
B(\alpha,\beta) = \frac{\Gamma(\alpha)\Gamma(\beta)}{\Gamma(\alpha+\beta)}.
$$


## Statistical Distributions and Properties

### A.8. Dirichlet Distribution, $D_k(\alpha_1, \ldots, \alpha_k)$
($\alpha_1, \ldots, \alpha_k > 0$ and $\alpha_0 = \alpha_1 + \cdots + \alpha_k$)

$$
f(x_1, \ldots, x_k) = \frac{\Gamma(\alpha_0)}{\Gamma(\alpha_1) \cdots \Gamma(\alpha_k)} x_1^{\alpha_1 - 1} \cdots x_k^{\alpha_k - 1} I_{\{0,1\}}(x_1, \ldots, x_k)
$$

$$
\mathrm{E}_{\alpha}[X_i] = \alpha_i (\alpha_0 + \text{var}(X_i)) = (\alpha_0 - \alpha_i) \alpha_i / [\alpha_0 (\alpha_0 + 1)] \quad \text{and} \quad \text{cov}(X_i, X_j) = -\alpha_i \alpha_j / [\alpha_0^2 (\alpha_0 + 1)] \quad (i \neq j).
$$

As a particular case, note that $(X_1, \ldots, X_k) \sim D_k(\alpha_1, \alpha_0)$ is equivalent to $X \sim \text{Be}(\alpha_1, \alpha_0)$.

### A.9. Pareto Distribution, $Pa(\alpha, \omega)$
($\alpha > 0$ and $\omega > 0$)

$$
f(x \mid \alpha, \omega) = \omega \alpha^\omega [1 \omega + \omega (x)]^{-(\alpha + 1)} I_{(\omega, \infty)}(x)
$$

$$
\mathrm{E}_{\omega, \alpha}[X] = \alpha \omega / (\alpha - 1) \quad (\alpha > 1) \quad \text{and} \quad \text{var}_{\omega, \alpha}(X) = \alpha \omega^2 / [(\alpha - 1)^2 (\alpha - 2)] \quad (\alpha > 2).
$$

### A.10. Binomial Distribution, $B(n, p)$
($0 \leq p \leq 1$)

$$
f(x \mid p) = \binom{n}{x} p^x (1 - p)^{n - x} I_{\{0, \ldots, n\}}(x).
$$

$$
\mathrm{E}_p[X] = np \quad \text{and} \quad \text{var}(X) = np(1 - p).
$$

### A.11. Multinomial Distribution, $M_k(n; p_1, \ldots, p_k)$
($n \geq 1$ ($1 \leq i \leq k$) and $\sum_{i=1}^k p_i = 1$)

$$
f(x_1, \ldots, x_k \mid p_1, \ldots, p_k) = \binom{n}{x_1 \cdots x_k} \prod_{i=1}^k p_i^{x_i} I_{\{x_1 + \cdots + x_k = n\}}.
$$

$$
\mathrm{E}_p[X_i] = np_i, \quad \text{var}(X_i) = np_i(1 - p_i), \quad \text{and} \quad \text{cov}(X_i, X_j) = -np_i p_j \quad (i \neq j).
$$

Note that, if $X \sim M_k(n; p_1, \ldots, p_k)$, $X_i \sim B(n, p_i)$, and that the binomial distribution $X \sim B(n, p)$ corresponds to $X \sim M_2(n, p, 1 - p)$.

### A.12. Poisson Distribution, $\mathcal{P}(\lambda)$
($\lambda > 0$)

$$
f(x \mid \lambda) = e^{-\lambda} \frac{\lambda^x}{x!} I_{\mathbb{N}}(x).
$$

$$
\mathrm{E}_\lambda[X] = \lambda \quad \text{and} \quad \text{var}_\lambda(X) = \lambda.
$$

### A.13. Negative Binomial Distribution, $\mathcal{Ne}(r, p)$
($0 \leq p \leq 1$)

$$
f(x \mid p) = \binom{x + r - 1}{x} p^r (1 - p)^x I_{\mathbb{N}}(x).
$$

$$
\mathrm{E}_p[X] = r(1 - p)/p \quad \text{and} \quad \text{var}_p(X) = r(1 - p)/p^2.
$$

## Statistical Distributions and Properties (Continued)

### A.14. Hypergeometric Distribution, $\mathcal{H}yp(N; n, p)$
($0 \leq p \leq 1$, $n < N$, and $pN \in \mathbb{N}$)

$$
f(x \mid p) = \frac{\binom{pN}{x} \binom{(1-p)N}{n-x}}{\binom{N}{n}} I_{\{0,1,\ldots,n\}}(x).
$$

$$
\mathrm{E}_{N,n,p}[X] = np \quad \text{and} \quad \text{var}_{N,n,p}(X) = (N-n)np(1-p)/(N-1).
$$

In [4]:
import math
import random
from functools import reduce
import operator

# Helper function to compute binomial coefficient (n choose k)
def comb(n, k):
    if k < 0 or k > n:
        return 0
    # Use symmetry to minimize computation: (n choose k) = (n choose n-k)
    k = min(k, n - k)
    return math.factorial(n) // (math.factorial(k) * math.factorial(n - k))

# A.8 Dirichlet Distribution
class Dirichlet:
    def __init__(self, alphas):
        self.alphas = alphas  # List of alpha parameters
        self.alpha_0 = sum(alphas)  # Sum of alphas

    def pdf(self, x):
        # x is a list of values (x_1, ..., x_k), sum(x) = 1, 0 <= x_i <= 1
        if len(x) != len(self.alphas) or abs(sum(x) - 1.0) > 1e-9 or any(xi < 0 or xi > 1 for xi in x):
            return 0.0
        # Compute PDF: Γ(α_0)/(Γ(α_1)...Γ(α_k)) * x_1^(α_1-1) * ... * x_k^(α_k-1)
        numerator = math.gamma(self.alpha_0)
        denominator = reduce(operator.mul, (math.gamma(alpha) for alpha in self.alphas), 1)
        prod = reduce(operator.mul, (xi ** (alpha - 1) for xi, alpha in zip(x, self.alphas)), 1)
        return (numerator / denominator) * prod

    def expected_value(self, i):
        # E[X_i] = α_i / α_0
        return self.alphas[i] / self.alpha_0

    def variance(self, i):
        # var(X_i) = (α_0 - α_i)α_i / [α_0^2 (α_0 + 1)]
        alpha_i = self.alphas[i]
        return (self.alpha_0 - alpha_i) * alpha_i / (self.alpha_0 ** 2 * (self.alpha_0 + 1))

# A.9 Pareto Distribution
class Pareto:
    def __init__(self, alpha, omega):
        self.alpha = alpha  # Shape parameter
        self.omega = omega  # Scale parameter

    def pdf(self, x):
        # f(x) = ωα^ω / (ω + x)^(α+1) for x > ω
        if x <= self.omega:
            return 0.0
        return self.omega * (self.alpha ** self.omega) / ((self.omega + x) ** (self.alpha + 1))

    def expected_value(self):
        # E[X] = αω / (α-1) for α > 1
        if self.alpha <= 1:
            raise ValueError("Expected value undefined for α <= 1")
        return (self.alpha * self.omega) / (self.alpha - 1)

    def variance(self):
        # var(X) = αω^2 / [(α-1)^2 (α-2)] for α > 2
        if self.alpha <= 2:
            raise ValueError("Variance undefined for α <= 2")
        return (self.alpha * self.omega ** 2) / ((self.alpha - 1) ** 2 * (self.alpha - 2))

    def sample(self):
        # Inverse CDF method: X = ω * (U^(-1/α) - 1), U ~ Uniform(0,1)
        u = random.random()
        return self.omega * (u ** (-1 / self.alpha) - 1)

# A.10 Binomial Distribution
class Binomial:
    def __init__(self, n, p):
        self.n = n  # Number of trials
        self.p = p  # Probability of success

    def pmf(self, x):
        # f(x) = (n choose x) * p^x * (1-p)^(n-x)
        if not isinstance(x, int) or x < 0 or x > self.n:
            return 0.0
        coef = comb(self.n, x)  # Use custom comb function
        return coef * (self.p ** x) * ((1 - self.p) ** (self.n - x))

    def expected_value(self):
        # E[X] = np
        return self.n * self.p

    def variance(self):
        # var(X) = np(1-p)
        return self.n * self.p * (1 - self.p)

    def sample(self):
        # Generate via sum of Bernoulli trials
        return sum(random.random() < self.p for _ in range(self.n))

# A.11 Multinomial Distribution
class Multinomial:
    def __init__(self, n, ps):
        self.n = n  # Number of trials
        self.ps = ps  # List of probabilities [p_1, ..., p_k], sum(ps) = 1

    def pmf(self, xs):
        # f(x_1, ..., x_k) = (n choose x_1,...,x_k) * p_1^x_1 * ... * p_k^x_k
        if len(xs) != len(self.ps) or sum(xs) != self.n or any(x < 0 for x in xs):
            return 0.0
        coef = math.factorial(self.n) // reduce(operator.mul, (math.factorial(x) for x in xs), 1)
        prod = reduce(operator.mul, (p ** x for p, x in zip(self.ps, xs)), 1)
        return coef * prod

    def expected_value(self, i):
        # E[X_i] = np_i
        return self.n * self.ps[i]

    def variance(self, i):
        # var(X_i) = np_i(1-p_i)
        return self.n * self.ps[i] * (1 - self.ps[i])

    def sample(self):
        # Generate by sampling from categorical distribution n times
        result = [0] * len(self.ps)
        for _ in range(self.n):
            r = random.random()
            cumsum = 0
            for i, p in enumerate(self.ps):
                cumsum += p
                if r < cumsum:
                    result[i] += 1
                    break
        return result

# A.12 Poisson Distribution
class Poisson:
    def __init__(self, lambda_):
        self.lambda_ = lambda_  # Rate parameter

    def pmf(self, x):
        # f(x) = e^(-λ) * λ^x / x!
        if not isinstance(x, int) or x < 0:
            return 0.0
        return (math.exp(-self.lambda_) * (self.lambda_ ** x)) / math.factorial(x)

    def expected_value(self):
        # E[X] = λ
        return self.lambda_

    def variance(self):
        # var(X) = λ
        return self.lambda_

    def sample(self):
        # Knuth's algorithm for Poisson sampling
        L = math.exp(-self.lambda_)
        k = 0
        p = 1.0
        while p > L:
            k += 1
            p *= random.random()
        return k - 1

# A.13 Negative Binomial Distribution
class NegativeBinomial:
    def __init__(self, r, p):
        self.r = r  # Number of successes
        self.p = p  # Probability of success

    def pmf(self, x):
        # f(x) = (x+r-1 choose x) * p^r * (1-p)^x
        if not isinstance(x, int) or x < 0:
            return 0.0
        coef = comb(x + self.r - 1, x)  # Use custom comb function
        return coef * (self.p ** self.r) * ((1 - self.p) ** x)

    def expected_value(self):
        # E[X] = r(1-p)/p
        return self.r * (1 - self.p) / self.p

    def variance(self):
        # var(X) = r(1-p)/p^2
        return self.r * (1 - self.p) / (self.p ** 2)

    def sample(self):
        # Generate as sum of r Geometric distributions
        return sum(self._geometric_sample() for _ in range(self.r))

    def _geometric_sample(self):
        # Sample from Geometric(p) (number of failures until first success)
        u = random.random()
        return int(math.log(u) / math.log(1 - self.p))

# A.14 Hypergeometric Distribution
class Hypergeometric:
    def __init__(self, N, n, p):
        self.N = N  # Population size
        self.n = n  # Number of draws
        self.p = p  # Proportion of successes
        self.K = int(self.N * self.p)  # Number of successes in population

    def pmf(self, x):
        # f(x) = (K choose x) * and ((N-K) choose (n-x)) / (N choose n)
        if not isinstance(x, int) or x < 0 or x > min(self.n, self.K):
            return 0.0
        numerator = comb(self.K, x) * comb(self.N - self.K, self.n - x)  # Use custom comb function
        denominator = comb(self.N, self.n)  # Use custom comb function
        return numerator / denominator

    def expected_value(self):
        # E[X] = np
        return self.n * self.p

    def variance(self):
        # var(X) = (N-n)np(1-p)/(N-1)
        return (self.N - self.n) * self.n * self.p * (1 - self.p) / (self.N - 1)

    def sample(self):
        # Simulate drawing without replacement
        population = [1] * self.K + [0] * (self.N - self.K)
        random.shuffle(population)
        return sum(population[:self.n])

# Example usage:
if __name__ == "__main__":
    # Dirichlet
    dirichlet = Dirichlet([2, 3, 4])
    print("Dirichlet PDF:", dirichlet.pdf([0.3, 0.3, 0.4]))
    print("Dirichlet E[X_0]:", dirichlet.expected_value(0))

    # Pareto
    pareto = Pareto(3, 1)
    print("Pareto PDF at x=2:", pareto.pdf(2))
    print("Pareto E[X]:", pareto.expected_value())

    # Binomial
    binomial = Binomial(10, 0.5)
    print("Binomial PMF at x=5:", binomial.pmf(5))
    print("Binomial E[X]:", binomial.expected_value())

    # Multinomial
    multinomial = Multinomial(10, [0.3, 0.3, 0.4])
    print("Multinomial PMF:", multinomial.pmf([3, 3, 4]))
    print("Multinomial E[X_0]:", multinomial.expected_value(0))

    # Poisson
    poisson = Poisson(4)
    print("Poisson PMF at x=3:", poisson.pmf(3))
    print("Poisson E[X]:", poisson.expected_value())

    # Negative Binomial
    neg_binomial = NegativeBinomial(3, 0.5)
    print("Negative Binomial PMF at x=2:", neg_binomial.pmf(2))
    print("Negative Binomial E[X]:", neg_binomial.expected_value())

    # Hypergeometric
    hypergeom = Hypergeometric(20, 5, 0.4)
    print("Hypergeometric PMF at x=2:", hypergeom.pmf(2))
    print("Hypergeometric E[X]:", hypergeom.expected_value())

Dirichlet PDF: 5.8060800000000015
Dirichlet E[X_0]: 0.2222222222222222
Pareto PDF at x=2: 0.037037037037037035
Pareto E[X]: 1.5
Binomial PMF at x=5: 0.24609375
Binomial E[X]: 5.0
Multinomial PMF: 0.07838207999999999
Multinomial E[X_0]: 3.0
Poisson PMF at x=3: 0.19536681481316456
Poisson E[X]: 4
Negative Binomial PMF at x=2: 0.1875
Negative Binomial E[X]: 3.0
Hypergeometric PMF at x=2: 0.3973168214654283
Hypergeometric E[X]: 2.0


### Bas aaj ke liye itnaa hee.

This was one of a good kind of bumpy road......

This was a great journey...... And wish for more

## Om Namoh Sachidananda
## Vande
