# 03 — Common discrete distributions (simulate + compare theory)

Distributions from the formula sheet:
- Discrete Uniform U(a,b)
- Bernoulli(p)
- Binomial(n,p)
- Geometric(p)
- Hypergeometric(N,D,n)
- Poisson(λ)


In [None]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt

# Reproducibility: you can change this seed
rng = np.random.default_rng(42)


## Helper: plot histogram of integer-valued samples


In [None]:
def hist_int(samples, title):
    vals, counts = np.unique(samples, return_counts=True)
    plt.figure()
    plt.bar(vals, counts / counts.sum())
    plt.title(title)
    plt.xlabel("k")
    plt.ylabel("relative frequency")
    plt.show()


## Discrete Uniform U(a,b)


In [None]:
a, b = 2, 9
n = 200000
X = rng.integers(a, b+1, size=n)  # inclusive upper bound

theory_mean = (a+b)/2
theory_var = ((b-a+1)**2 - 1)/12

print("Empirical mean/var:", X.mean(), X.var())
print("Theoretical mean/var:", theory_mean, theory_var)
hist_int(X, f"Discrete Uniform U({a},{b})")


## Bernoulli(p)


In [None]:
p = 0.3
n = 200000
X = rng.binomial(1, p, size=n)

print("Empirical mean/var:", X.mean(), X.var())
print("Theoretical mean/var:", p, p*(1-p))
hist_int(X, "Bernoulli(p)")


## Binomial(n,p)


In [None]:
n_trials = 20
p = 0.3
n = 200000
X = rng.binomial(n_trials, p, size=n)

print("Empirical mean/var:", X.mean(), X.var())
print("Theoretical mean/var:", n_trials*p, n_trials*p*(1-p))
hist_int(X, f"Binomial(n={n_trials}, p={p})")


## Geometric(p): number of tries until first success (support 1,2,3,...)


In [None]:
p = 0.2
n = 200000

# numpy uses "number of failures before first success", so +1 gives "tries"
X = rng.geometric(p, size=n)

theory_mean = 1/p
theory_var = (1-p)/(p**2)

print("Empirical mean/var:", X.mean(), X.var())
print("Theoretical mean/var:", theory_mean, theory_var)

# For plotting, truncate long tail for visibility
hist_int(X[X<=20], f"Geometric(p={p}) (truncated at 20)")


## Hypergeometric(N, D, n): successes in sample without replacement


In [None]:
N = 100   # population size
D = 30    # number of successes in population
n_draw = 10

n = 200000
X = rng.hypergeometric(ngood=D, nbad=N-D, nsample=n_draw, size=n)

theory_mean = n_draw * (D/N)
theory_var = (D/(N-1)) * n_draw * ((N-n_draw)/N) * (1 - D/N)  # matches sheet form

print("Empirical mean/var:", X.mean(), X.var())
print("Theoretical mean/var:", theory_mean, theory_var)
hist_int(X, f"Hypergeometric(N={N}, D={D}, n={n_draw})")


## Poisson(λ)


In [None]:
lam = 4.0
n = 200000
X = rng.poisson(lam, size=n)

print("Empirical mean/var:", X.mean(), X.var())
print("Theoretical mean/var:", lam, lam)
hist_int(X[X<=15], f"Poisson(λ={lam}) (truncated at 15)")
