In [None]:
#Binomial distribution vs. Normal distribution

import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

# Parameters of the binomial distribution
n = int(input("n = "))
p = float(input("The probability is "))

# The binomial distribution
binom_rv = stats.binom(n, p)
x = np.arange(0, n + 1)
binom_pmf = binom_rv.pmf(x)

# The approximation by the normal distribution
mu = n * p
sigma = np.sqrt(n * p * (1 - p))
norm_rv = stats.norm(mu, sigma)
norm_pmf = norm_rv.pdf(x)

# The normal distribution
continuous_norm_rv = stats.norm(mu, sigma)
continuous_x = np.linspace(-n, n, 100)  # Setting the range of the normal distribution
continuous_norm_pmf = continuous_norm_rv.pdf(continuous_x)

# Graphs
plt.bar(x, binom_pmf, alpha=0.5, label='Binomial')
plt.plot(x, norm_pmf, 'r--', label='Normal approximation')
plt.plot(continuous_x, continuous_norm_pmf, 'g-', label='Continuous Normal')
plt.xlabel('k')  # x-axis
plt.ylabel('Probability')  # y-axis
plt.legend()
plt.show()

In [None]:
#Binomial distribution vs. Poisson distribution

import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

# Parameters of the binomial distribution
n = int(input("n = "))
p = float(input("The probability is "))

# The binomial distribution
binom_rv = stats.binom(n, p)
x = np.arange(0, n + 1)
binom_pmf = binom_rv.pmf(x)

# The approximation by Poisson distribution
lambda_ = n * p
poisson_rv = stats.poisson(lambda_)
poisson_pmf = poisson_rv.pmf(x)

# Graphs
plt.bar(x, binom_pmf, alpha=0.5, label='Binomial')
plt.plot(x, poisson_pmf, 'g-', label='Poisson approximation')
plt.xlabel('k')
plt.ylabel('Probability')
plt.legend()
plt.show()

In [None]:
#Binomial distribution vs. the method by using Edgeworth expansion

import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
from scipy.special import erf

# Parameters of the binomial distribution
n = int(input("n = "))
p = float(input("The probability is "))

# The binomial distribution
binom_rv = stats.binom(n, p)
x = np.arange(0, n + 1)
binom_pmf = binom_rv.pmf(x)

# The function of Edgeworth expansion
def edgeworth_expansion(x, n, p):
    q = 1 - p
    mu = n * p
    sigma = np.sqrt(n * p * q)
    z = (x - mu) / sigma
    skewness = (q - p) / np.sqrt(n * p * q)
    kurtosis = (1 - 6*p*q) / (n * p * q)

    phi_z = stats.norm.pdf(z)
    Phi_z = stats.norm.cdf(z)

    correction = (1/6) * skewness * (z**2 - 1) * phi_z + (1/24) * kurtosis * (z**3 - 3*z) * phi_z - (1/36) * skewness**2 * (z**5 - 10*z**3 + 15*z) * phi_z

    return Phi_z + correction

# The approximation by using Edgeworth expansion
edgeworth_cdf = edgeworth_expansion(x, n, p)
edgeworth_pmf = np.diff(edgeworth_cdf, prepend=0)

# Graphs
plt.bar(x, binom_pmf, alpha=0.5, label='Binomial')
plt.plot(x, edgeworth_pmf, 'b-', label='Edgeworth approximation')
plt.xlabel('k')
plt.ylabel('Probability')
plt.legend()
plt.show()

In [None]:
#Binomial distribution vs. others

import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
from scipy.special import erf

# Parameters of the binomial distribution
n = int(input("n = "))
p = float(input("The probability is "))

# The binomial distribution
binom_rv = stats.binom(n, p)
x = np.arange(0, n + 1)
binom_pmf = binom_rv.pmf(x)

# The approximation by the normal distribution
mu = n * p
sigma = np.sqrt(n * p * (1 - p))
norm_rv = stats.norm(mu, sigma)
norm_pmf = norm_rv.pdf(x)

# The normal distribution
continuous_norm_rv = stats.norm(mu, sigma)
continuous_x = np.linspace(-n, n, 100)  # Setting the range of the normal distribution
continuous_norm_pmf = continuous_norm_rv.pdf(continuous_x)

# The approximation by Poisson distribution
lambda_ = n * p
poisson_rv = stats.poisson(lambda_)
poisson_pmf = poisson_rv.pmf(x)

# The function of Edgeworth expansion
def edgeworth_expansion(x, n, p):
    q = 1 - p
    mu = n * p
    sigma = np.sqrt(n * p * q)
    z = (x - mu) / sigma
    skewness = (q - p) / np.sqrt(n * p * q)
    kurtosis = (1 - 6*p*q) / (n * p * q)

    phi_z = stats.norm.pdf(z)
    Phi_z = stats.norm.cdf(z)

    correction = (1/6) * skewness * (z**2 - 1) * phi_z + (1/24) * kurtosis * (z**3 - 3*z) * phi_z - (1/36) * skewness**2 * (z**5 - 10*z**3 + 15*z) * phi_z

    return Phi_z + correction

# The approximation by using Edgeworth expansion
edgeworth_cdf = edgeworth_expansion(x, n, p)
edgeworth_pmf = np.diff(edgeworth_cdf, prepend=0)

# Graphs
plt.bar(x, binom_pmf, alpha=0.5, label='Binomial')
plt.plot(x, norm_pmf, label='Normal approximation')
plt.plot(continuous_x, continuous_norm_pmf, 'r--', label='Continuous Normal')
plt.plot(x, poisson_pmf, label='Poisson approximation')
plt.plot(x, edgeworth_pmf, label='Edgeworth approximation')
plt.xlabel('k')
plt.ylabel('Probability')
plt.title('Comparison between distributions (n = '+str(n)+', probability: '+str(p)+')')
plt.legend()
plt.show()

In [None]:
#Multinomial distribution vs. others (without line graphs)
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import dirichlet, multinomial

# Parameter input
categories = int(input("Number of categories = "))
trials = int(input("Number of trials = "))

# Parameter of Dirichlet distribution (Generating at ramdom)
#alpha = np.array([float(input(f"Dirichlet parameters of the category{i+1} = ")) for i in range(categories)])
alpha = np.random.rand(categories) + 1 # Generating random numbers greater than or equal to one to ensure that the sum of the probabilities is equal to one.

# Parameter of Poisson distribution (Generating at ramdom)
#lambdas = np.array([float(input(f"Poisson parameter λ of the category{i+1} = ")) for i in range(categories)])
lambdas = np.random.rand(categories) * 10 # Generating a random number between 0 and 10. 

# Transition matrix of a Markov chain.
transition_matrix = np.random.rand(categories, categories)
transition_matrix = transition_matrix / transition_matrix.sum(axis=1, keepdims=True)

# Seting the initial state distribution evenly
initial_state_distribution = np.full(categories, 1.0 / categories)

# Randomly generated and normalised parameters (probabilities) of a multinomial distribution
true_probs = np.random.rand(categories)
true_probs /= true_probs.sum()

# Generating samples from a multinomial distribution
samples_multinomial = multinomial.rvs(n=trials, p=true_probs, size=1)
approx_multinomial = samples_multinomial[0] / trials

# Approximation by using Dirichlet distribution
samples_dirichlet = dirichlet.rvs(alpha, size=trials)
approx_multinomial_dirichlet = np.mean(samples_dirichlet, axis=0)

# Approximation by using Poisson distribution
samples_poisson = np.random.poisson(lambdas, size=(trials, categories))
approx_multinomial_poisson = np.mean(samples_poisson / np.sum(samples_poisson, axis=1, keepdims=True), axis=0)

# Approximation by using a Markov chain
states = np.zeros((trials, categories))
current_state = np.random.choice(categories, p=initial_state_distribution)
states[0, current_state] = 1

for t in range(1, trials):
    current_state = np.random.choice(categories, p=transition_matrix[current_state])
    states[t, current_state] = 1

approx_multinomial_markov = np.mean(states, axis=0)

# Plots
labels = [f"{i+1}" for i in range(categories)] #Each category
x = np.arange(categories)
width = 0.15

fig, ax = plt.subplots()
ax.bar(x - 1.5*width, approx_multinomial, width, label='True Multinomial')
ax.bar(x - 0.5*width, approx_multinomial_dirichlet, width, label='Dirichlet')
ax.bar(x + 0.5*width, approx_multinomial_poisson, width, label='Poisson')
ax.bar(x + 1.5*width, approx_multinomial_markov, width, label='Markov')

ax.set_xlabel('Categories')
ax.set_ylabel('Probability')
ax.set_title('Approximations to multinomial ('+str(trials)+' trials)')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()

plt.show()

In [None]:
#Multinomial distribution vs. others (with line graphs)
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import dirichlet, multinomial

# Parameter input
categories = int(input("Number of categories = "))
trials = int(input("Number of trials = "))

# Parameter of Dirichlet distribution (Generating at ramdom)
#alpha = np.array([float(input(f"Dirichlet parameters of the category{i+1} = ")) for i in range(categories)])
alpha = np.random.rand(categories) + 1 # Generating random numbers greater than or equal to one to ensure that the sum of the probabilities is equal to one.

# Parameter of Poisson distribution (Generating at ramdom)
#lambdas = np.array([float(input(f"Poisson parameter λ of the category{i+1} = ")) for i in range(categories)])
lambdas = np.random.rand(categories) * 10 # Generating a random number between 0 and 10. 

# Transition matrix of a Markov chain.
transition_matrix = np.random.rand(categories, categories)
transition_matrix = transition_matrix / transition_matrix.sum(axis=1, keepdims=True)

# Seting the initial state distribution evenly
initial_state_distribution = np.full(categories, 1.0 / categories)

# Randomly generated and normalised parameters (probabilities) of a multinomial distribution
true_probs = np.random.rand(categories)
true_probs /= true_probs.sum()

# Generating samples from a multinomial distribution
samples_multinomial = multinomial.rvs(n=trials, p=true_probs, size=1)
approx_multinomial = samples_multinomial[0] / trials

# Approximation by using Dirichlet distribution
samples_dirichlet = dirichlet.rvs(alpha, size=trials)
approx_multinomial_dirichlet = np.mean(samples_dirichlet, axis=0)

# Approximation by using Poisson distribution
samples_poisson = np.random.poisson(lambdas, size=(trials, categories))
approx_multinomial_poisson = np.mean(samples_poisson / np.sum(samples_poisson, axis=1, keepdims=True), axis=0)

# Approximation by using a Markov chain
states = np.zeros((trials, categories))
current_state = np.random.choice(categories, p=initial_state_distribution)
states[0, current_state] = 1

for t in range(1, trials):
    current_state = np.random.choice(categories, p=transition_matrix[current_state])
    states[t, current_state] = 1

approx_multinomial_markov = np.mean(states, axis=0)

# Plots
labels = [f"{i+1}" for i in range(categories)] #Each category
x = np.arange(categories)
width = 0.15

fig, ax = plt.subplots(figsize=(12, 8))

ax.bar(x - 1.5*width, approx_multinomial, width, label='True Multinomial')
ax.bar(x - 0.5*width, approx_multinomial_dirichlet, width, label='Dirichlet')
ax.bar(x + 0.5*width, approx_multinomial_poisson, width, label='Poisson')
ax.bar(x + 1.5*width, approx_multinomial_markov, width, label='Markov')

ax.plot(x, approx_multinomial, marker='o', linestyle='-', label='True Multinomial')
ax.plot(x, approx_multinomial_dirichlet, marker='o', linestyle='-', label='Dirichlet')
ax.plot(x, approx_multinomial_poisson, marker='o', linestyle='-', label='Poisson')
ax.plot(x, approx_multinomial_markov, marker='o', linestyle='-', label='Markov')

ax.set_xlabel('Categories')
ax.set_ylabel('Probability')
ax.set_title('Comparison between multinomial and others ('+str(trials)+' trials)')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()

plt.show()

In [None]:
#Comparison of Binomial Distribution and Its Approximations
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import time

# Parameters
n = int(input("n = "))
p = float(input("The probability is "))

# The binomial distribution
binom_rv = stats.binom(n, p)
x = np.arange(0, n + 1)
binom_pmf = binom_rv.pmf(x)

# Normal approximation
mu = n * p
sigma = np.sqrt(n * p * (1 - p))
norm_rv = stats.norm(mu, sigma)
norm_pmf = norm_rv.pdf(x)

# Continuous normal distribution
continuous_norm_rv = stats.norm(mu, sigma)
continuous_x = np.linspace(-n, n, 100)  
continuous_norm_pmf = continuous_norm_rv.pdf(continuous_x)

# Poisson approximation
lambda_ = n * p
poisson_rv = stats.poisson(lambda_)
poisson_pmf = poisson_rv.pmf(x)

# Edgeworth expansion
def edgeworth_expansion(x, n, p):
    q = 1 - p
    mu = n * p
    sigma = np.sqrt(n * p * q)
    z = (x - mu) / sigma
    skewness = (q - p) / np.sqrt(n * p * q)
    kurtosis = (1 - 6*p*q) / (n * p * q)

    phi_z = stats.norm.pdf(z)
    Phi_z = stats.norm.cdf(z)

    correction = (1/6) * skewness * (z**2 - 1) * phi_z + (1/24) * kurtosis * (z**3 - 3*z) * phi_z - (1/36) * skewness**2 * (z**5 - 10*z**3 + 15*z) * phi_z

    return Phi_z + correction

# Measuring execution time for each approximation
def measure_time_and_accuracy():    
    start_time = time.time()
    binom_rv = stats.binom(n, p)
    binom_pmf = binom_rv.pmf(x)
    binom_time = time.time() - start_time

    start_time = time.time()
    norm_rv = stats.norm(mu, sigma)
    norm_pmf = norm_rv.pdf(x)
    norm_time = time.time() - start_time
    
    start_time = time.time()
    poisson_rv = stats.poisson(lambda_)
    poisson_pmf = poisson_rv.pmf(x)
    poisson_time = time.time() - start_time

    start_time = time.time()
    edgeworth_cdf = edgeworth_expansion(x, n, p)
    edgeworth_pmf = np.diff(edgeworth_cdf, prepend=0)
    edgeworth_time = time.time() - start_time
    
    return binom_pmf, binom_time, norm_pmf, norm_time, poisson_pmf, poisson_time, edgeworth_pmf, edgeworth_time

binom_pmf, binom_time, norm_pmf, norm_time, poisson_pmf, poisson_time, edgeworth_pmf, edgeworth_time = measure_time_and_accuracy()

# Accuracy measures
def calculate_accuracy(true_pmf, approx_pmf):
    return np.mean(np.abs(true_pmf - approx_pmf))
def calculate_mse(true_pmf, approx_pmf):
    return np.mean((true_pmf - approx_pmf) ** 2)

binom_accuracy = calculate_accuracy(binom_pmf, binom_pmf)
norm_accuracy = calculate_accuracy(binom_pmf, norm_pmf)
poisson_accuracy = calculate_accuracy(binom_pmf, poisson_pmf)
edgeworth_accuracy = calculate_accuracy(binom_pmf, edgeworth_pmf)
binom_mse = calculate_mse(binom_pmf, binom_pmf)
norm_mse = calculate_mse(binom_pmf, norm_pmf)
poisson_mse = calculate_mse(binom_pmf, poisson_pmf)
edgeworth_mse = calculate_mse(binom_pmf, edgeworth_pmf)

# Plotting
plt.figure(figsize=(18, 6))

# Probability mass functions comparison
plt.subplot(2, 2, 1)
plt.bar(x, binom_pmf, alpha=0.5, label='Binomial distribution', color='black')
plt.plot(x, norm_pmf, label='Normal approximation')
plt.plot(continuous_x, continuous_norm_pmf, 'r--', label='Continuous Normal')
plt.plot(x, poisson_pmf, label='Poisson approximation')
plt.plot(x, edgeworth_pmf, label='Edgeworth approximation')
plt.xlabel('k')
plt.ylabel('Probability')
plt.title('Probability Mass Functions Comparison')
plt.legend()

# Execution time comparison
plt.subplot(2, 2, 2)
methods = ['Binomial', 'Normal', 'Poisson', 'Edgeworth']
times = [binom_time, norm_time, poisson_time, edgeworth_time]
plt.bar(methods, times, color=['black', 'blue', 'orange', 'green'])
plt.xlabel('Method')
plt.ylabel('Execution Time (s)')
plt.title('Execution Time Comparison')

# Accuracy comparison
plt.subplot(2, 2, 3)
methods = ['Normal', 'Poisson', 'Edgeworth']
accuracies = [norm_accuracy, poisson_accuracy, edgeworth_accuracy]
plt.bar(methods, accuracies, color=['blue', 'orange', 'green'])
plt.xlabel('Method')
plt.ylabel('Mean Absolute Error')
plt.title('Accuracy Comparison 1')

plt.subplot(2, 2, 4)
methods = ['Normal', 'Poisson', 'Edgeworth']
mses = [norm_mse, poisson_mse, edgeworth_mse]
plt.bar(methods, mses, color=['blue', 'orange', 'green'])
plt.xlabel('Method')
plt.ylabel('Mean Squared Error')
plt.title('Accuracy Comparison 2')

plt.suptitle('Comparison of Binomial Distribution and Its Approximations (n = '+str(n)+', probability: '+str(p)+')', fontsize=16)

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()