In [None]:
#Binomial distribution vs. Normal distribution

import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

# Parameters of the binomial distribution
n = int(input("n = "))
p = float(input("The probability is "))

# The binomial distribution
binom_rv = stats.binom(n, p)
x = np.arange(0, n + 1)
binom_pmf = binom_rv.pmf(x)

# The approximation by the normal distribution
mu = n * p
sigma = np.sqrt(n * p * (1 - p))
norm_rv = stats.norm(mu, sigma)
norm_pmf = norm_rv.pdf(x)

# The normal distribution
continuous_norm_rv = stats.norm(mu, sigma)
continuous_x = np.linspace(-n, n, 100)  # Setting the range of the normal distribution
continuous_norm_pmf = continuous_norm_rv.pdf(continuous_x)

# Graphs
plt.bar(x, binom_pmf, alpha=0.5, label='Binomial')
plt.plot(x, norm_pmf, 'r--', label='Normal approximation')
plt.plot(continuous_x, continuous_norm_pmf, 'g-', label='Continuous Normal')
plt.xlabel('k')  # x-axis
plt.ylabel('Probability')  # y-axis
plt.legend()
plt.show()

In [None]:
#Binomial distribution vs. Poisson distribution

import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

# Parameters of the binomial distribution
n = int(input("n = "))
p = float(input("The probability is "))

# The binomial distribution
binom_rv = stats.binom(n, p)
x = np.arange(0, n + 1)
binom_pmf = binom_rv.pmf(x)

# The approximation by Poisson distribution
lambda_ = n * p
poisson_rv = stats.poisson(lambda_)
poisson_pmf = poisson_rv.pmf(x)

# Graphs
plt.bar(x, binom_pmf, alpha=0.5, label='Binomial')
plt.plot(x, poisson_pmf, 'g-', label='Poisson approximation')
plt.xlabel('k')
plt.ylabel('Probability')
plt.legend()
plt.show()

In [None]:
#Binomial distribution vs. the method by using Edgeworth expansion

import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
from scipy.special import erf

# Parameters of the binomial distribution
n = int(input("n = "))
p = float(input("The probability is "))

# The binomial distribution
binom_rv = stats.binom(n, p)
x = np.arange(0, n + 1)
binom_pmf = binom_rv.pmf(x)

# The function of Edgeworth expansion
def edgeworth_expansion(x, n, p):
    q = 1 - p
    mu = n * p
    sigma = np.sqrt(n * p * q)
    z = (x - mu) / sigma
    skewness = (q - p) / np.sqrt(n * p * q)
    kurtosis = (1 - 6*p*q) / (n * p * q)

    phi_z = stats.norm.pdf(z)
    Phi_z = stats.norm.cdf(z)

    correction = (1/6) * skewness * (z**2 - 1) * phi_z + (1/24) * kurtosis * (z**3 - 3*z) * phi_z - (1/36) * skewness**2 * (z**5 - 10*z**3 + 15*z) * phi_z

    return Phi_z + correction

# The approximation by using Edgeworth expansion
edgeworth_cdf = edgeworth_expansion(x, n, p)
edgeworth_pmf = np.diff(edgeworth_cdf, prepend=0)

# Graphs
plt.bar(x, binom_pmf, alpha=0.5, label='Binomial')
plt.plot(x, edgeworth_pmf, 'b-', label='Edgeworth approximation')
plt.xlabel('k')
plt.ylabel('Probability')
plt.legend()
plt.show()

In [None]:
#Binomial distribution vs. others

import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
from scipy.special import erf

# Parameters of the binomial distribution
n = int(input("n = "))
p = float(input("The probability is "))

# The binomial distribution
binom_rv = stats.binom(n, p)
x = np.arange(0, n + 1)
binom_pmf = binom_rv.pmf(x)

# The approximation by the normal distribution
mu = n * p
sigma = np.sqrt(n * p * (1 - p))
norm_rv = stats.norm(mu, sigma)
norm_pmf = norm_rv.pdf(x)

# The normal distribution
continuous_norm_rv = stats.norm(mu, sigma)
continuous_x = np.linspace(-n, n, 100)  # Setting the range of the normal distribution
continuous_norm_pmf = continuous_norm_rv.pdf(continuous_x)

# The approximation by Poisson distribution
lambda_ = n * p
poisson_rv = stats.poisson(lambda_)
poisson_pmf = poisson_rv.pmf(x)

# The function of Edgeworth expansion
def edgeworth_expansion(x, n, p):
    q = 1 - p
    mu = n * p
    sigma = np.sqrt(n * p * q)
    z = (x - mu) / sigma
    skewness = (q - p) / np.sqrt(n * p * q)
    kurtosis = (1 - 6*p*q) / (n * p * q)

    phi_z = stats.norm.pdf(z)
    Phi_z = stats.norm.cdf(z)

    correction = (1/6) * skewness * (z**2 - 1) * phi_z + (1/24) * kurtosis * (z**3 - 3*z) * phi_z - (1/36) * skewness**2 * (z**5 - 10*z**3 + 15*z) * phi_z

    return Phi_z + correction

# The approximation by using Edgeworth expansion
edgeworth_cdf = edgeworth_expansion(x, n, p)
edgeworth_pmf = np.diff(edgeworth_cdf, prepend=0)

# Graphs
plt.bar(x, binom_pmf, alpha=0.5, label='Binomial')
plt.plot(x, norm_pmf, label='Normal approximation')
plt.plot(continuous_x, continuous_norm_pmf, 'r--', label='Continuous Normal')
plt.plot(x, poisson_pmf, label='Poisson approximation')
plt.plot(x, edgeworth_pmf, label='Edgeworth approximation')
plt.xlabel('k')
plt.ylabel('Probability')
plt.title('Comparison between distributions (n = '+str(n)+', probability: '+str(p)+')')
plt.legend()
plt.show()

In [None]:
#Multinomial distribution vs. others (without line graphs)
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import dirichlet, multinomial

# Parameter input
categories = int(input("Number of categories = "))
trials = int(input("Number of trials = "))

# Parameter of Dirichlet distribution (Generating at ramdom)
#alpha = np.array([float(input(f"Dirichlet parameters of the category{i+1} = ")) for i in range(categories)])
alpha = np.random.rand(categories) + 1 # Generating random numbers greater than or equal to one to ensure that the sum of the probabilities is equal to one.

# Parameter of Poisson distribution (Generating at ramdom)
#lambdas = np.array([float(input(f"Poisson parameter λ of the category{i+1} = ")) for i in range(categories)])
lambdas = np.random.rand(categories) * 10 # Generating a random number between 0 and 10. 

# Transition matrix of a Markov chain.
transition_matrix = np.random.rand(categories, categories)
transition_matrix = transition_matrix / transition_matrix.sum(axis=1, keepdims=True)

# Seting the initial state distribution evenly
initial_state_distribution = np.full(categories, 1.0 / categories)

# Randomly generated and normalised parameters (probabilities) of a multinomial distribution
true_probs = np.random.rand(categories)
true_probs /= true_probs.sum()

# Generating samples from a multinomial distribution
samples_multinomial = multinomial.rvs(n=trials, p=true_probs, size=1)
approx_multinomial = samples_multinomial[0] / trials

# Approximation by using Dirichlet distribution
samples_dirichlet = dirichlet.rvs(alpha, size=trials)
approx_multinomial_dirichlet = np.mean(samples_dirichlet, axis=0)

# Approximation by using Poisson distribution
samples_poisson = np.random.poisson(lambdas, size=(trials, categories))
approx_multinomial_poisson = np.mean(samples_poisson / np.sum(samples_poisson, axis=1, keepdims=True), axis=0)

# Approximation by using a Markov chain
states = np.zeros((trials, categories))
current_state = np.random.choice(categories, p=initial_state_distribution)
states[0, current_state] = 1

for t in range(1, trials):
    current_state = np.random.choice(categories, p=transition_matrix[current_state])
    states[t, current_state] = 1

approx_multinomial_markov = np.mean(states, axis=0)

# Plots
labels = [f"{i+1}" for i in range(categories)] #Each category
x = np.arange(categories)
width = 0.15

fig, ax = plt.subplots()
ax.bar(x - 1.5*width, approx_multinomial, width, label='True Multinomial')
ax.bar(x - 0.5*width, approx_multinomial_dirichlet, width, label='Dirichlet')
ax.bar(x + 0.5*width, approx_multinomial_poisson, width, label='Poisson')
ax.bar(x + 1.5*width, approx_multinomial_markov, width, label='Markov')

ax.set_xlabel('Categories')
ax.set_ylabel('Probability')
ax.set_title('Approximations to multinomial ('+str(trials)+' trials)')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()

plt.show()

In [None]:
#Multinomial distribution vs. others (with line graphs)
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import dirichlet, multinomial

# Parameter input
categories = int(input("Number of categories = "))
trials = int(input("Number of trials = "))

# Parameter of Dirichlet distribution (Generating at ramdom)
#alpha = np.array([float(input(f"Dirichlet parameters of the category{i+1} = ")) for i in range(categories)])
alpha = np.random.rand(categories) + 1 # Generating random numbers greater than or equal to one to ensure that the sum of the probabilities is equal to one.

# Parameter of Poisson distribution (Generating at ramdom)
#lambdas = np.array([float(input(f"Poisson parameter λ of the category{i+1} = ")) for i in range(categories)])
lambdas = np.random.rand(categories) * 10 # Generating a random number between 0 and 10. 

# Transition matrix of a Markov chain.
transition_matrix = np.random.rand(categories, categories)
transition_matrix = transition_matrix / transition_matrix.sum(axis=1, keepdims=True)

# Seting the initial state distribution evenly
initial_state_distribution = np.full(categories, 1.0 / categories)

# Randomly generated and normalised parameters (probabilities) of a multinomial distribution
true_probs = np.random.rand(categories)
true_probs /= true_probs.sum()

# Generating samples from a multinomial distribution
samples_multinomial = multinomial.rvs(n=trials, p=true_probs, size=1)
approx_multinomial = samples_multinomial[0] / trials

# Approximation by using Dirichlet distribution
samples_dirichlet = dirichlet.rvs(alpha, size=trials)
approx_multinomial_dirichlet = np.mean(samples_dirichlet, axis=0)

# Approximation by using Poisson distribution
samples_poisson = np.random.poisson(lambdas, size=(trials, categories))
approx_multinomial_poisson = np.mean(samples_poisson / np.sum(samples_poisson, axis=1, keepdims=True), axis=0)

# Approximation by using a Markov chain
states = np.zeros((trials, categories))
current_state = np.random.choice(categories, p=initial_state_distribution)
states[0, current_state] = 1

for t in range(1, trials):
    current_state = np.random.choice(categories, p=transition_matrix[current_state])
    states[t, current_state] = 1

approx_multinomial_markov = np.mean(states, axis=0)

# Plots
labels = [f"{i+1}" for i in range(categories)] #Each category
x = np.arange(categories)
width = 0.15

fig, ax = plt.subplots(figsize=(12, 8))

ax.bar(x - 1.5*width, approx_multinomial, width, label='True Multinomial')
ax.bar(x - 0.5*width, approx_multinomial_dirichlet, width, label='Dirichlet')
ax.bar(x + 0.5*width, approx_multinomial_poisson, width, label='Poisson')
ax.bar(x + 1.5*width, approx_multinomial_markov, width, label='Markov')

ax.plot(x, approx_multinomial, marker='o', linestyle='-', label='True Multinomial')
ax.plot(x, approx_multinomial_dirichlet, marker='o', linestyle='-', label='Dirichlet')
ax.plot(x, approx_multinomial_poisson, marker='o', linestyle='-', label='Poisson')
ax.plot(x, approx_multinomial_markov, marker='o', linestyle='-', label='Markov')

ax.set_xlabel('Categories')
ax.set_ylabel('Probability')
ax.set_title('Comparison between multinomial and others ('+str(trials)+' trials)')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()

plt.show()

In [None]:
#Comparison of Binomial Distribution and Its Approximations
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import time

# Parameters
n = int(input("n = "))
p = float(input("The probability is "))

# The binomial distribution
binom_rv = stats.binom(n, p)
x = np.arange(0, n + 1)
binom_pmf = binom_rv.pmf(x)

# Normal approximation
mu = n * p
sigma = np.sqrt(n * p * (1 - p))
norm_rv = stats.norm(mu, sigma)
norm_pmf = norm_rv.pdf(x)

# Continuous normal distribution
continuous_norm_rv = stats.norm(mu, sigma)
continuous_x = np.linspace(-n, n, 100)  
continuous_norm_pmf = continuous_norm_rv.pdf(continuous_x)

# Poisson approximation
lambda_ = n * p
poisson_rv = stats.poisson(lambda_)
poisson_pmf = poisson_rv.pmf(x)

# Edgeworth expansion
def edgeworth_expansion(x, n, p):
    q = 1 - p
    mu = n * p
    sigma = np.sqrt(n * p * q)
    z = (x - mu) / sigma
    skewness = (q - p) / np.sqrt(n * p * q)
    kurtosis = (1 - 6*p*q) / (n * p * q)

    phi_z = stats.norm.pdf(z)
    Phi_z = stats.norm.cdf(z)

    correction = (1/6) * skewness * (z**2 - 1) * phi_z + (1/24) * kurtosis * (z**3 - 3*z) * phi_z - (1/36) * skewness**2 * (z**5 - 10*z**3 + 15*z) * phi_z

    return Phi_z + correction

# Measuring execution time for each approximation
def measure_time_and_accuracy():    
    start_time = time.time()
    binom_rv = stats.binom(n, p)
    binom_pmf = binom_rv.pmf(x)
    binom_time = time.time() - start_time

    start_time = time.time()
    norm_rv = stats.norm(mu, sigma)
    norm_pmf = norm_rv.pdf(x)
    norm_time = time.time() - start_time
    
    start_time = time.time()
    poisson_rv = stats.poisson(lambda_)
    poisson_pmf = poisson_rv.pmf(x)
    poisson_time = time.time() - start_time

    start_time = time.time()
    edgeworth_cdf = edgeworth_expansion(x, n, p)
    edgeworth_pmf = np.diff(edgeworth_cdf, prepend=0)
    edgeworth_time = time.time() - start_time
    
    return binom_pmf, binom_time, norm_pmf, norm_time, poisson_pmf, poisson_time, edgeworth_pmf, edgeworth_time

binom_pmf, binom_time, norm_pmf, norm_time, poisson_pmf, poisson_time, edgeworth_pmf, edgeworth_time = measure_time_and_accuracy()

# Accuracy measures
def calculate_accuracy(true_pmf, approx_pmf):
    return np.mean(np.abs(true_pmf - approx_pmf))
def calculate_mse(true_pmf, approx_pmf):
    return np.mean((true_pmf - approx_pmf) ** 2)

binom_accuracy = calculate_accuracy(binom_pmf, binom_pmf)
norm_accuracy = calculate_accuracy(binom_pmf, norm_pmf)
poisson_accuracy = calculate_accuracy(binom_pmf, poisson_pmf)
edgeworth_accuracy = calculate_accuracy(binom_pmf, edgeworth_pmf)
binom_mse = calculate_mse(binom_pmf, binom_pmf)
norm_mse = calculate_mse(binom_pmf, norm_pmf)
poisson_mse = calculate_mse(binom_pmf, poisson_pmf)
edgeworth_mse = calculate_mse(binom_pmf, edgeworth_pmf)

# Plotting
plt.figure(figsize=(18, 6))

# Probability mass functions comparison
plt.subplot(2, 2, 1)
plt.bar(x, binom_pmf, alpha=0.5, label='Binomial distribution', color='black')
plt.plot(x, norm_pmf, label='Normal approximation')
plt.plot(continuous_x, continuous_norm_pmf, 'r--', label='Continuous Normal')
plt.plot(x, poisson_pmf, label='Poisson approximation')
plt.plot(x, edgeworth_pmf, label='Edgeworth approximation')
plt.xlabel('k')
plt.ylabel('Probability')
plt.title('Probability Mass Functions Comparison')
plt.legend()

# Execution time comparison
plt.subplot(2, 2, 2)
methods = ['Binomial', 'Normal', 'Poisson', 'Edgeworth']
times = [binom_time, norm_time, poisson_time, edgeworth_time]
plt.bar(methods, times, color=['black', 'blue', 'orange', 'green'])
plt.xlabel('Method')
plt.ylabel('Execution Time (s)')
plt.title('Execution Time Comparison')

# Accuracy comparison
plt.subplot(2, 2, 3)
methods = ['Normal', 'Poisson', 'Edgeworth']
accuracies = [norm_accuracy, poisson_accuracy, edgeworth_accuracy]
plt.bar(methods, accuracies, color=['blue', 'orange', 'green'])
plt.xlabel('Method')
plt.ylabel('Mean Absolute Error')
plt.title('Accuracy Comparison 1')

plt.subplot(2, 2, 4)
methods = ['Normal', 'Poisson', 'Edgeworth']
mses = [norm_mse, poisson_mse, edgeworth_mse]
plt.bar(methods, mses, color=['blue', 'orange', 'green'])
plt.xlabel('Method')
plt.ylabel('Mean Squared Error')
plt.title('Accuracy Comparison 2')

#plt.suptitle('Comparison of Binomial Distribution and Its Approximations (Number of trials: '+str(n)+', probability: '+str(p)+')', fontsize=16)

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

In [None]:
#Suggestion 1 by THE TEACHER
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import time
from scipy.special import factorial

def multinomial_pmf(x, n, p):
    coeff = factorial(n) / np.prod(factorial(x))
    prob = coeff * np.prod(p**x)
    return prob

def edgeworth_expansion_multinomial(x, n, p):
    p = np.array(p)
    q = 1 - p
    mu = n * p
    sigma = np.sqrt(n * p * q)
    
    # Calculate the covariance matrix
    cov = np.diag(n * p * q)
    
    # Calculate the skewness and kurtosis tensors
    skewness = (q - p) / np.sqrt(n * p * q)
    kurtosis = (1 - 6*p*q) / (n * p * q)
    
    z = (x - mu) / sigma
    
    # Calculate the multivariate normal PDF
    mvn = stats.multivariate_normal(mu, cov)
    phi_z = mvn.pdf(x)
    
    # Calculate the correction terms
    correction = (1/6) * np.sum(skewness * (z**2 - 1)) * phi_z + \
                 (1/24) * np.sum(kurtosis * (z**3 - 3*z)) * phi_z - \
                 (1/36) * np.sum(skewness**2 * (z**5 - 10*z**3 + 15*z)) * phi_z

    return phi_z + correction

def measure_time_and_accuracy_multinomial(n, p, num_samples):
    outcomes = np.random.multinomial(n, p, num_samples)
    
    start_time = time.time()
    binom_pmf = np.array([multinomial_pmf(x, n, p) for x in outcomes])
    binom_time = time.time() - start_time
    
    start_time = time.time()
    edgeworth_pmf = np.array([edgeworth_expansion_multinomial(x, n, p) for x in outcomes])
    edgeworth_time = time.time() - start_time
    
    return outcomes, binom_pmf, binom_time, edgeworth_pmf, edgeworth_time

def calculate_accuracy(true_pmf, approx_pmf):
    return np.mean(np.abs(true_pmf - approx_pmf))

def calculate_mse(true_pmf, approx_pmf):
    return np.mean((true_pmf - approx_pmf) ** 2)

# Parameters
n = int(input("n = "))
p = list(map(float, input("Probabilities (comma-separated) = ").split(',')))
num_samples = 100  # Number of samples to evaluate

outcomes, binom_pmf, binom_time, edgeworth_pmf, edgeworth_time = measure_time_and_accuracy_multinomial(n, p, num_samples)

binom_accuracy = calculate_accuracy(binom_pmf, binom_pmf)
edgeworth_accuracy = calculate_accuracy(binom_pmf, edgeworth_pmf)
binom_mse = calculate_mse(binom_pmf, binom_pmf)
edgeworth_mse = calculate_mse(binom_pmf, edgeworth_pmf)

# Plotting
plt.figure(figsize=(18, 6))

# Probability mass functions comparison
plt.subplot(2, 2, 1)
indices = np.arange(len(outcomes))
plt.bar(indices, binom_pmf, alpha=0.5, label='Multinomial distribution', color='black')
plt.plot(indices, edgeworth_pmf, 'g-', label='Edgeworth approximation')
plt.xlabel('Sample index')
plt.ylabel('Probability')
plt.title('Probability Mass Functions Comparison')
plt.legend()

# Execution time comparison
plt.subplot(2, 2, 2)
methods = ['Multinomial', 'Edgeworth']
times = [binom_time, edgeworth_time]
plt.bar(methods, times, color=['black', 'green'])
plt.xlabel('Method')
plt.ylabel('Execution Time (s)')
plt.title('Execution Time Comparison')

# Accuracy comparison
plt.subplot(2, 2, 3)
methods = ['Edgeworth']
accuracies = [edgeworth_accuracy]
plt.bar(methods, accuracies, color=['green'])
plt.xlabel('Method')
plt.ylabel('Mean Absolute Error')
plt.title('Accuracy Comparison (MAE)')

plt.subplot(2, 2, 4)
methods = ['Edgeworth']
mses = [edgeworth_mse]
plt.bar(methods, mses, color=['green'])
plt.xlabel('Method')
plt.ylabel('Mean Squared Error')
plt.title('Accuracy Comparison (MSE)')

plt.suptitle('Comparison of Multinomial Distribution and Edgeworth Approximation (n = '+str(n)+', p = '+str(p)+')', fontsize=16)

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

In [None]:
#Suggestion 2 by THE TEACHER
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import time

# Parameters
n = int(input("n = "))
p = list(map(float, input("The probabilities are (space-separated): ").split()))

# Generating sample outcomes from the multinomial distribution
num_samples = 1000
samples = np.random.multinomial(n, p, num_samples)

# Normal approximation for multinomial distribution
mu = n * np.array(p)
sigma = np.sqrt(n * np.array(p) * (1 - np.array(p)))
cov_matrix = np.diag(sigma**2)

# Edgeworth expansion for multinomial distribution
def edgeworth_expansion_multinomial(x, n, p):
    p = np.array(p)
    q = 1 - p
    mu = n * p
    sigma = np.sqrt(n * p * q)
    
    # Calculate the covariance matrix
    cov = np.diag(n * p * q)
    
    # Calculate the skewness and kurtosis tensors
    skewness = (q - p) / np.sqrt(n * p * q)
    kurtosis = (1 - 6*p*q) / (n * p * q)
    
    z = (x - mu) / sigma
    
    # Calculate the multivariate normal PDF
    mvn = stats.multivariate_normal(mu, cov)
    phi_z = mvn.pdf(x)
    
    # Calculate the correction terms
    correction = (1/6) * np.sum(skewness * (z**2 - 1)) * phi_z + \
                 (1/24) * np.sum(kurtosis * (z**3 - 3*z)) * phi_z - \
                 (1/36) * np.sum(skewness**2 * (z**5 - 10*z**3 + 15*z)) * phi_z

    return phi_z + correction

# Measuring execution time for each approximation
def measure_time_and_accuracy_multinomial(samples):
    start_time = time.time()
    multinom_pmf = np.array([stats.multinomial.pmf(sample, n, p) for sample in samples])
    multinom_time = time.time() - start_time

    start_time = time.time()
    norm_pmf = np.array([stats.multivariate_normal.pdf(sample, mu, cov_matrix) for sample in samples])
    norm_time = time.time() - start_time
    
    start_time = time.time()
    edgeworth_pmf = np.array([edgeworth_expansion_multinomial(sample, n, p) for sample in samples])
    edgeworth_time = time.time() - start_time
    
    return multinom_pmf, multinom_time, norm_pmf, norm_time, edgeworth_pmf, edgeworth_time

multinom_pmf, multinom_time, norm_pmf, norm_time, edgeworth_pmf, edgeworth_time = measure_time_and_accuracy_multinomial(samples)

# Accuracy measures
def calculate_accuracy(true_pmf, approx_pmf):
    return np.mean(np.abs(true_pmf - approx_pmf))
def calculate_mse(true_pmf, approx_pmf):
    return np.mean((true_pmf - approx_pmf) ** 2)

norm_accuracy = calculate_accuracy(multinom_pmf, norm_pmf)
edgeworth_accuracy = calculate_accuracy(multinom_pmf, edgeworth_pmf)
norm_mse = calculate_mse(multinom_pmf, norm_pmf)
edgeworth_mse = calculate_mse(multinom_pmf, edgeworth_pmf)

# Plotting
plt.figure(figsize=(18, 6))

# Probability mass functions comparison
plt.subplot(2, 2, 1)
indices = np.arange(len(samples))
plt.bar(indices, multinom_pmf, alpha=0.5, label='Multinomial distribution', color='black')
plt.plot(indices, norm_pmf, label='Normal approximation', color='blue')
plt.plot(indices, edgeworth_pmf, label='Edgeworth approximation', color='green')
plt.xlabel('Sample index')
plt.ylabel('Probability')
plt.title('Probability Mass Functions Comparison')
plt.legend()

# Execution time comparison
plt.subplot(2, 2, 2)
methods = ['Multinomial', 'Normal', 'Edgeworth']
times = [multinom_time, norm_time, edgeworth_time]
plt.bar(methods, times, color=['black', 'blue', 'green'])
plt.xlabel('Method')
plt.ylabel('Execution Time (s)')
plt.title('Execution Time Comparison')

# Accuracy comparison
plt.subplot(2, 2, 3)
methods = ['Normal', 'Edgeworth']
accuracies = [norm_accuracy, edgeworth_accuracy]
plt.bar(methods, accuracies, color=['blue', 'green'])
plt.xlabel('Method')
plt.ylabel('Mean Absolute Error')
plt.title('Accuracy Comparison 1')

plt.subplot(2, 2, 4)
methods = ['Normal', 'Edgeworth']
mses = [norm_mse, edgeworth_mse]
plt.bar(methods, mses, color=['blue', 'green'])
plt.xlabel('Method')
plt.ylabel('Mean Squared Error')
plt.title('Accuracy Comparison 2')

plt.suptitle('Comparison of Multinomial Distribution and Its Approximations (n = '+str(n)+', p = '+str(p)+')', fontsize=16)

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

In [None]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import time

# Parameters
categories = int(input("Number of categories: "))
trials = int(input("Number of trials: "))

# Generating the parameter of Dirichlet distribution at random
alpha = np.random.rand(categories) + 1  # Generating a random number greater than 1 to make the sum of probabilities 1

# Generating the parameter of Poisson distribution at random
lambdas = np.random.rand(categories) * 10  # Generating a random number between 0 and 10

# Generating a transition matrix for a Markov chain using random numbers and normalizing each row to 1.
transition_matrix = np.random.rand(categories, categories)
transition_matrix = transition_matrix / transition_matrix.sum(axis=1, keepdims=True)

# Setting the initial state distribution evenly
initial_state_distribution = np.full(categories, 1.0 / categories)

# Generating and normalizing randomly the parameters (probabilities) of the multinomial distribution
true_probs = np.random.rand(categories)
true_probs /= true_probs.sum()

# Generating a sample from a multinomial distribution
samples_multinomial = stats.multinomial.rvs(n=trials, p=true_probs, size=1)
approx_multinomial = samples_multinomial[0] / trials

# Approximation by Dirichlet distribution
samples_dirichlet = stats.dirichlet.rvs(alpha, size=trials)
approx_multinomial_dirichlet = np.mean(samples_dirichlet, axis=0)

# Approximation by Poisson distribution
samples_poisson = np.random.poisson(lambdas, size=(trials, categories))
approx_multinomial_poisson = np.mean(samples_poisson / np.sum(samples_poisson, axis=1, keepdims=True), axis=0)

# Approximation by a Markov chain
states = np.zeros((trials, categories))
current_state = np.random.choice(categories, p=initial_state_distribution)
states[0, current_state] = 1

for t in range(1, trials):
    current_state = np.random.choice(categories, p=transition_matrix[current_state])
    states[t, current_state] = 1

approx_multinomial_markov = np.mean(states, axis=0)

#  Approximation by Edgeworth expansion
def edgeworth_expansion(true_probs, trials, categories):
    mu = true_probs
    sigma2 = (true_probs * (1 - true_probs)) / trials
    skewness = (1 - 2 * true_probs) / np.sqrt(trials * sigma2)
    
    correction = np.zeros(categories)
    for i in range(categories):
        z = (true_probs[i] - mu[i]) / np.sqrt(sigma2[i])
        phi_z = stats.norm.pdf(z)
        Phi_z = stats.norm.cdf(z)
        correction[i] = Phi_z + (1/6) * skewness[i] * (z**2 - 1) * phi_z

    return correction

approx_multinomial_edgeworth = edgeworth_expansion(true_probs, trials, categories)

# Measurement of runtime
def measure_time():
    start_time = time.time()
    samples_multinomial = stats.multinomial.rvs(n=trials, p=true_probs, size=1)
    approx_multinomial = samples_multinomial[0] / trials
    binom_time = time.time() - start_time

    start_time = time.time()
    samples_dirichlet = stats.dirichlet.rvs(alpha, size=trials)
    approx_multinomial_dirichlet = np.mean(samples_dirichlet, axis=0)
    dirichlet_time = time.time() - start_time
    
    start_time = time.time()
    samples_poisson = np.random.poisson(lambdas, size=(trials, categories))
    approx_multinomial_poisson = np.mean(samples_poisson / np.sum(samples_poisson, axis=1, keepdims=True), axis=0)
    poisson_time = time.time() - start_time

    start_time = time.time()
    states = np.zeros((trials, categories))
    current_state = np.random.choice(categories, p=initial_state_distribution)
    states[0, current_state] = 1

    for t in range(1, trials):
        current_state = np.random.choice(categories, p=transition_matrix[current_state])
        states[t, current_state] = 1

    approx_multinomial_markov = np.mean(states, axis=0)
    markov_time = time.time() - start_time

    start_time = time.time()
    edgeworth_exp = edgeworth_expansion(true_probs, trials, categories)
    edgeworth_time = time.time() - start_time
    
    return binom_time, dirichlet_time, poisson_time, markov_time, edgeworth_time

binom_time, dirichlet_time, poisson_time, markov_time, edgeworth_time = measure_time()

# Calculation of accuracy
def calculate_accuracy(true_probs, approx_probs):
    return np.mean(np.abs(true_probs - approx_probs))

def calculate_mse(true_probs, approx_probs):
    return np.mean((true_probs - approx_probs) ** 2)

dirichlet_accuracy = calculate_accuracy(approx_multinomial, approx_multinomial_dirichlet)
poisson_accuracy = calculate_accuracy(approx_multinomial, approx_multinomial_poisson)
markov_accuracy = calculate_accuracy(approx_multinomial, approx_multinomial_markov)
edgeworth_accuracy = calculate_accuracy(approx_multinomial, approx_multinomial_edgeworth)
dirichlet_mse = calculate_mse(approx_multinomial, approx_multinomial_dirichlet)
poisson_mse = calculate_mse(approx_multinomial, approx_multinomial_poisson)
markov_mse = calculate_mse(approx_multinomial, approx_multinomial_markov)
edgeworth_mse = calculate_mse(approx_multinomial, approx_multinomial_edgeworth)

# Plotting
labels = [f"the category {i+1}" for i in range(categories)]
x = np.arange(categories)

plt.figure(figsize=(18, 6))

# Line graph of results
plt.subplot(2, 2, 1)
plt.plot(x, approx_multinomial, marker='o', linestyle='-', label='True Multinomial')
plt.plot(x, approx_multinomial_dirichlet, marker='o', linestyle='-', label='Dirichlet')
plt.plot(x, approx_multinomial_poisson, marker='o', linestyle='-', label='Poisson')
plt.plot(x, approx_multinomial_markov, marker='o', linestyle='-', label='Markov')
plt.plot(x, approx_multinomial_edgeworth, marker='o', linestyle='-', label='Edgeworth')
plt.xlabel('Categories')
plt.ylabel('Probability')
plt.title('Randomized samples')
plt.xticks(x, labels)
plt.legend()

# Comparison of runtime
plt.subplot(2, 2, 2)
methods = ['Multinomial', 'Dirichlet', 'Poisson', 'Markov', 'Edgeworth']
times = [binom_time, dirichlet_time, poisson_time, markov_time, edgeworth_time]
plt.bar(methods, times, color=['black', 'blue', 'orange', 'green', 'purple'])
plt.xlabel('Method')
plt.ylabel('Execution Time (s)')
plt.title('Execution Time Comparison')

# Comparison of accuracy about MAE
plt.subplot(2, 2, 3)
methods = ['Dirichlet', 'Poisson', 'Markov', 'Edgeworth']
accuracies = [dirichlet_accuracy, poisson_accuracy, markov_accuracy, edgeworth_accuracy]
plt.bar(methods, accuracies, color=['blue', 'orange', 'green', 'purple'])
plt.xlabel('Method')
plt.ylabel('Mean Absolute Error')
plt.title('Accuracy Comparison (Mean Absolute Error)')

# Comparison of accuracy about MSE
plt.subplot(2, 2, 4)
methods = ['Dirichlet', 'Poisson', 'Markov', 'Edgeworth']
mses = [dirichlet_mse, poisson_mse, markov_mse, edgeworth_mse]
plt.bar(methods, mses, color=['blue', 'orange', 'green', 'purple'])
plt.xlabel('Method')
plt.ylabel('Mean Squared Error')
plt.title('Accuracy Comparison (Mean Squared Error)')

plt.suptitle('Comparison of Multinomial Distribution and Its Approximations (Categories = '+str(categories)+', Trials = '+str(trials)+')', fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import multinomial
from scipy.special import gammaln#gamma

def continuous_deformation_of_multinomial_pdf(x, n, p):
    if len(x) != len(p):
        raise ValueError("Length of x and p must be the same!")
    if not np.isclose(sum(x), 1):
        return 0
    try:
        #numerator = gamma(n-1)
        log_numerator = gammaln(n) - gammaln(n-1)#gammaln(n-1)
        #denominator = 1
        log_denominator = sum(gammaln((n-1)*pi) for pi in p)
        '''
        for pi in p:
            denominator *= gamma((n-1)*pi)
        variables_product = 1
        for xi, pi in zip(x, p):
            if xi == 0 and (n-1)*pi-1 < 0:
                return 0
            variables_product *= xi**((n-1)*pi-1)
        pmf = numerator/(denominator*variables_product)
        '''
        log_variables_product = 0
        for xi, pi in zip(x, p):
            if xi == 0:
                if (n-1)*pi-1 < 0:
                    return 0
                continue
            log_variables_product += ((n-1)*pi-1)*np.log(xi)
        log_pmf = log_numerator - log_denominator - log_variables_product
        pmf = np.exp(log_pmf)
        return pmf
    except Exception as e:
        print(f"Error: {e}")
        return 0

# Input
#n = int(input("Number of trials: "))
max_n = int(input("Enter the maximum number of trials (n): "))
k = 4#int(input("Number of categories: "))

# Prepare for plotting
n_values = range(1, max_n + 1)
continuous_values = []
multinomial_values = []

'''
# Randomly generating p and x based on the p
np.random.seed(42)  # For reproducibility
p = np.random.dirichlet(np.ones(k))  # Probability vector of k-th categories
x = np.random.multinomial(n, p)  # Generation of x

# Calculation of continuous_deformation_of_multinomial_pdf
cdf_continuous = continuous_deformation_of_multinomial_pdf(x / n, n, p)

# Calculation of multinomial.pmf
cdf_multinomial = multinomial.pmf(x, n, p)

# Plotting
plt.figure(figsize=(10, 6))
plt.bar(['Continuous Deformation', 'Multinomial'], [cdf_continuous, cdf_multinomial], color=['blue', 'green'])
plt.ylabel('Probability')
plt.title('Comparison between Multinomial and Its Continuous Deformation')
plt.show()
'''

# For each n, compute probabilities and store them
for n in n_values:
    np.random.seed(42)  # For reproducibility
    p = np.random.dirichlet(np.ones(k))  # Probability vector of k-th categories
    x = np.random.multinomial(n, p)  # Generation of x
    
    # Calculate probabilities
    cdf_continuous = continuous_deformation_of_multinomial_pdf(x / n, n, p)
    cdf_multinomial = multinomial.pmf(x, n, p)
    
    continuous_values.append(cdf_continuous)
    multinomial_values.append(cdf_multinomial)

# Plotting
plt.figure(figsize=(12, 6))
plt.plot(n_values, continuous_values, label='Continuous Deformation', marker='o')
plt.plot(n_values, multinomial_values, label='Multinomial', marker='x')
plt.xlabel('Number of Trials (n)')
plt.ylabel('Probability')
plt.title('Comparison between Multinomial and Its Continuous Deformation')
plt.legend()
plt.grid(True)
plt.ylim(0, max(max(continuous_values), max(multinomial_values)))
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import multinomial
from scipy.special import gammaln

def continuous_deformation_of_multinomial_pdf(x, n, p):
    if len(x) != len(p):
        raise ValueError("Length of x and p must be the same!")
    if not np.isclose(sum(x), 1):
        return 0
    try:
        log_numerator = gammaln(n) - gammaln(n-1)  # Corrected calculation
        log_denominator = sum(gammaln((n-1)*pi) for pi in p)
        
        log_variables_product = 0
        for xi, pi in zip(x, p):
            if xi <= 0:
                if (n-1)*pi-1 < 0:
                    return 0
                continue
            log_variables_product += ((n-1)*pi-1) * np.log(xi)
        
        log_pmf = log_numerator - log_denominator - log_variables_product
        pmf = np.exp(log_pmf)
        
        return pmf
    except Exception as e:
        print(f"Error: {e}")
        return 0

# Input
max_n = int(input("Enter the maximum number of trials (n): "))
k = 4  # Number of categories

# Prepare for plotting
n_values = range(1, max_n + 1)
continuous_values = []
multinomial_values = []

# For each n, compute probabilities and store them
for n in n_values:
    np.random.seed(42)  # For reproducibility
    p = np.random.dirichlet(np.ones(k))  # Probability vector of k-th categories
    x = np.random.multinomial(n, p)  # Generation of x
    
    # Calculate probabilities
    cdf_continuous = continuous_deformation_of_multinomial_pdf(x / n, n, p)
    cdf_multinomial = multinomial.pmf(x, n, p)
    
    continuous_values.append(cdf_continuous)
    multinomial_values.append(cdf_multinomial)

# Plotting
plt.figure(figsize=(12, 6))
plt.plot(n_values, continuous_values, label='Continuous Deformation', marker='o')
plt.plot(n_values, multinomial_values, label='Multinomial', marker='x')
plt.xlabel('Number of Trials (n)')
plt.ylabel('Probability')
plt.title('Comparison between Multinomial and Its Continuous Deformation')
plt.legend()
plt.grid(True)
plt.ylim(0, max(max(continuous_values), max(multinomial_values)))
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import multinomial
from scipy.special import gamma

def continuous_deformation_of_multinomial_pdf(x, n, p):
    if len(x) != len(p):
        raise ValueError("Length of x and p must be the same!")
    if not np.isclose(sum(x), 1):
        return 0
    try:
        # Compute the numerator
        numerator = gamma(n - 1)
        
        # Compute the denominator
        denominator = 1
        for pi in p:
            denominator *= gamma((n - 1) * pi)
        
        # Compute the product of x^(n-1)*pi - 1
        variables_product = 1
        for xi, pi in zip(x, p):
            if xi == 0 and (n - 1) * pi - 1 < 0:
                return 0
            variables_product *= xi**((n - 1) * pi - 1)
        
        # Compute the PMF
        pmf = numerator / (denominator * variables_product)
        return pmf
    except Exception as e:
        print(f"Error: {e}")
        return 0

# Input
max_n = int(input("Enter the maximum number of trials (n): "))
k = 4  # Number of categories

# Prepare for plotting
n_values = range(1, max_n + 1)
continuous_values = []
multinomial_values = []

# For each n, compute probabilities and store them
for n in n_values:
    np.random.seed(42)  # For reproducibility
    p = np.random.dirichlet(np.ones(k))  # Probability vector of k-th categories
    x = np.random.multinomial(n, p)  # Generation of x
    
    # Calculate probabilities
    cdf_continuous = continuous_deformation_of_multinomial_pdf(x / n, n, p)
    cdf_multinomial = multinomial.pmf(x, n, p)
    
    continuous_values.append(cdf_continuous)
    multinomial_values.append(cdf_multinomial)

# Plotting
plt.figure(figsize=(12, 6))
plt.plot(n_values, continuous_values, label='Continuous Deformation', marker='o')
plt.plot(n_values, multinomial_values, label='Multinomial', marker='x')
plt.xlabel('Number of Trials (n)')
plt.ylabel('Probability')
plt.title('Comparison between Multinomial and Its Continuous Deformation')
plt.legend()
plt.grid(True)
plt.ylim(0, max(max(continuous_values), max(multinomial_values)))
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import multinomial
from scipy.special import gammaln

def continuous_deformation_of_multinomial_pdf(x, n, p):
    if len(x) != len(p):
        raise ValueError("Length of x and p must be the same!")
    if not np.isclose(sum(x), 1):
        return 0
    try:
        log_numerator = gammaln(n) - gammaln(n - 1)
        log_denominator = sum(gammaln((n - 1) * pi) for pi in p)
        log_variables_product = 0
        for xi, pi in zip(x, p):
            if xi > 0:
                log_variables_product += ((n - 1) * pi - 1) * np.log(xi)
            elif xi == 0 and (n - 1) * pi - 1 < 0:
                return 0
        log_pmf = log_numerator - log_denominator - log_variables_product
        pmf = np.exp(log_pmf)
        return pmf
    except Exception as e:
        print(f"Error: {e}")
        return 0

# Input
max_n = int(input("Enter the maximum number of trials (n): "))
k = 4  # Number of categories

# Prepare for plotting
n_values = range(1, max_n + 1)
continuous_values = []
multinomial_values = []

for n in n_values:
    np.random.seed(42)  # For reproducibility
    p = np.random.dirichlet(np.ones(k))  # Probability vector of k-th categories
    x = np.random.multinomial(n, p)  # Generation of x
    
    # Ensure x.sum() == n and p.sum() == 1
    assert np.isclose(np.sum(x), n), "x does not sum to n"
    assert np.isclose(np.sum(p), 1), "p does not sum to 1"
    
    # Calculate probabilities
    cdf_continuous = continuous_deformation_of_multinomial_pdf(x / n, n, p)
    cdf_multinomial = multinomial.pmf(x, n, p)
    
    continuous_values.append(cdf_continuous)
    multinomial_values.append(cdf_multinomial)

# Plotting
plt.figure(figsize=(12, 6))
plt.plot(n_values, continuous_values, label='Continuous Deformation', marker='o')
plt.plot(n_values, multinomial_values, label='Multinomial', marker='x')
plt.xlabel('Number of Trials (n)')
plt.ylabel('Probability')
plt.title('Comparison between Multinomial and Its Continuous Deformation')
plt.legend()
plt.grid(True)
plt.ylim(0, max(max(continuous_values), max(multinomial_values)))
plt.show()

In [None]:
#Comparison of Multinomial Distribution and Its Approximations
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import multinomial, norm, dirichlet, poisson
from math import factorial
import itertools

def edgeworth_multinomial_pmf(counts, probs, n):
    counts = np.array(counts)
    probs = np.array(probs)
    k = len(probs)
    
    # Calculate the mean and variance for the normal approximation
    mu = n * probs
    sigma2 = n * probs * (1 - probs)
    
    # Calculate the standardized counts
    z = (counts - mu) / np.sqrt(sigma2)
    
    # Calculate the skewness and kurtosis
    skewness = (1 - 2 * probs) / np.sqrt(sigma2 / n)
    kurtosis = (1 - 6 * probs * (1 - probs)) / (sigma2 / n)
    
    # Calculate the correction terms for Edgeworth expansion
    phi_z = norm.pdf(z)
    Phi_z = norm.cdf(z)
    
    term1 = skewness / 6 * (z**3 - 3*z) * phi_z
    term2 = kurtosis / 24 * (z**4 - 6*z**2 + 3) * phi_z
    term3 = (skewness**2) / 72 * (z**6 - 15*z**4 + 45*z**2 - 15) * phi_z
    
    correction = Phi_z + term1 + term2 + term3
    
    # Calculate the multinomial coefficient
    multinomial_coeff = factorial(n) / np.prod([factorial(c) for c in counts])
    
    # Calculate the probability using the normal approximation
    normal_approx = np.prod(norm.pdf((counts - mu) / np.sqrt(sigma2)))
    
    # Apply the Edgeworth correction
    pmf = multinomial_coeff * normal_approx * np.prod(correction)
    
    return pmf

# Approximation using normal distribution
def normal_multinomial_pmf(counts, probs, n):
    counts = np.array(counts)
    probs = np.array(probs)
    mu = n * probs
    sigma2 = n * probs * (1 - probs)
    normal_approx = np.prod(norm.pdf((counts - mu) / np.sqrt(sigma2)))
    return normal_approx

# Approximation using Poisson distribution
def poisson_multinomial_pmf(counts, probs, n):
    counts = np.array(counts)
    lambdas = n * probs
    poisson_approx = np.prod([poisson.pmf(counts[i], lambdas[i]) for i in range(len(probs))])
    return poisson_approx

# MCMC Approximation Function
def mcmc_multinomial_pmf(counts, probs, n, num_samples=10000):
    counts = np.array(counts)
    k = len(probs)
    
    samples = np.zeros((num_samples, k))
    
    for i in range(num_samples):
        sample = np.random.multinomial(n, probs)
        samples[i] = sample
    
    sample_pmf = np.mean(np.all(samples == counts, axis=1))
    
    return sample_pmf

# Laplace Approximation Function
def laplace_multinomial_pmf(counts, probs, n):
    counts = np.array(counts)
    probs = np.array(probs)
    k = len(probs)
    
    # Mean and variance
    mu = n * probs
    sigma2 = n * probs * (1 - probs)
    
    # Multinomial coefficient
    multinomial_coeff = factorial(n) / np.prod([factorial(c) for c in counts])
    
    # Normal approximation
    normal_approx = np.prod(norm.pdf((counts - mu) / np.sqrt(sigma2)))
    
    # Laplace correction
    laplace_pmf = multinomial_coeff * normal_approx
    
    return laplace_pmf

'''
# Approximation using Dirichlet distribution
def dirichlet_multinomial_pmf(counts, alpha):
    counts = np.array(counts)
    alpha = np.array(alpha)
    dirichlet_approx = dirichlet.pdf(counts, alpha)
    return dirichlet_approx

alpha = np.random.rand(categories) + 1
dirichlet_pmfs = [dirichlet_multinomial_pmf(counts, alpha) for counts in counts_list]
'''
'''
# Approximation using Bayesian estimation
def bayesian_multinomial_pmf(counts, probs, n):
    counts = np.array(counts)
    alpha = np.ones_like(probs)
    posterior = dirichlet.pdf(probs, alpha + counts)
    bayesian_approx = multinomial.pmf(counts, n, probs) * posterior
    return bayesian_approx

bayesian_pmfs = [bayesian_multinomial_pmf(counts, probs, trials) for counts in counts_list]
'''

def measure_time_and_accuracy(trials, probs, counts_list):
    # Calculate exact PMF and measure time
    start_time = time.time()
    multinomial_pmfs = [multinomial.pmf(counts, n=trials, p=probs) for counts in counts_list]
    multinomial_time = time.time() - start_time
    
    # Calculate approximation PMFs and measure time
    start_time = time.time()
    edgeworth_pmfs = [edgeworth_multinomial_pmf(counts, probs, trials) for counts in counts_list]
    edgeworth_time = time.time() - start_time
    
    start_time = time.time()
    normal_pmfs = [normal_multinomial_pmf(counts, probs, trials) for counts in counts_list]
    normal_time = time.time() - start_time
    
    start_time = time.time()
    poisson_pmfs = [poisson_multinomial_pmf(counts, probs, trials) for counts in counts_list]
    poisson_time = time.time() - start_time
    
    start_time = time.time()
    mcmc_pmfs = [mcmc_multinomial_pmf(counts, probs, trials) for counts in counts_list]
    mcmc_time = time.time() - start_time
    
    start_time = time.time()
    laplace_pmfs = [laplace_multinomial_pmf(counts, probs, trials) for counts in counts_list]
    laplace_time = time.time() - start_time
    
    # Accuracy measures
    def calculate_accuracy(true_pmf, approx_pmf):
        return np.mean(np.abs(true_pmf - approx_pmf))
    
    def calculate_mse(true_pmf, approx_pmf):
        return np.mean((true_pmf - approx_pmf) ** 2)
    
    multinomial_pmf = np.array(multinomial_pmfs)
    edgeworth_accuracy = calculate_accuracy(multinomial_pmf, edgeworth_pmfs)
    normal_accuracy = calculate_accuracy(multinomial_pmf, normal_pmfs)
    poisson_accuracy = calculate_accuracy(multinomial_pmf, poisson_pmfs)
    mcmc_accuracy = calculate_accuracy(multinomial_pmf, mcmc_pmfs)
    laplace_accuracy = calculate_accuracy(multinomial_pmf, laplace_pmfs)
    
    edgeworth_mse = calculate_mse(multinomial_pmf, edgeworth_pmfs)
    normal_mse = calculate_mse(multinomial_pmf, normal_pmfs)
    poisson_mse = calculate_mse(multinomial_pmf, poisson_pmfs)
    mcmc_mse = calculate_mse(multinomial_pmf, mcmc_pmfs)
    laplace_mse = calculate_mse(multinomial_pmf, laplace_pmfs)
    
    return (multinomial_pmfs, multinomial_time, 
            edgeworth_pmfs, edgeworth_time,
            normal_pmfs, normal_time,
            poisson_pmfs, poisson_time,
            mcmc_pmfs, mcmc_time,
            laplace_pmfs, laplace_time,
            edgeworth_accuracy, normal_accuracy, poisson_accuracy, mcmc_accuracy, laplace_accuracy,
            edgeworth_mse, normal_mse, poisson_mse, mcmc_mse, laplace_mse)

# Parameters and user input
categories = 4
trials = int(input("Number of trials: "))

# Generate random probabilities that sum to 1
probs = np.random.rand(categories)
probs /= probs.sum()

# Generate all possible counts
counts_list = list(itertools.product(range(trials + 1), repeat=categories))
counts_list = [counts for counts in counts_list if sum(counts) == trials]

'''
# Calculate PMFs
multinomial_pmfs = [multinomial.pmf(counts, n=trials, p=probs) for counts in counts_list]
edgeworth_pmfs = [edgeworth_multinomial_pmf(counts, probs, trials) for counts in counts_list]
normal_pmfs = [normal_multinomial_pmf(counts, probs, trials) for counts in counts_list]
poisson_pmfs = [poisson_multinomial_pmf(counts, probs, trials) for counts in counts_list]
mcmc_pmfs = [mcmc_multinomial_pmf(counts, probs, trials) for counts in counts_list]
laplace_pmfs = [laplace_multinomial_pmf(counts, probs, trials) for counts in counts_list]
'''

# Measure time and accuracy
results = measure_time_and_accuracy(trials, probs, counts_list)
(multinomial_pmfs, multinomial_time, 
 edgeworth_pmfs, edgeworth_time,
 normal_pmfs, normal_time,
 poisson_pmfs, poisson_time,
 mcmc_pmfs, mcmc_time,
 laplace_pmfs, laplace_time,
 edgeworth_accuracy, normal_accuracy, poisson_accuracy, mcmc_accuracy, laplace_accuracy,
 edgeworth_mse, normal_mse, poisson_mse, mcmc_mse, laplace_mse) = results

# Plotting
#labels = [str(counts) for counts in counts_list]
#x = np.arange(len(counts_list))

plt.figure(figsize=(18, 12))

# Probability mass functions comparison
plt.subplot(2, 2, 1)
x = np.arange(len(counts_list))
plt.plot(x, multinomial_pmfs, marker='o', linestyle='-', label='True Multinomial')
plt.plot(x, edgeworth_pmfs, marker='x', linestyle='-', label='Edgeworth Approximation')
plt.plot(x, normal_pmfs, marker='v', linestyle='-', label='Normal Approximation')
plt.plot(x, poisson_pmfs, marker='s', linestyle='-', label='Poisson Approximation')
plt.plot(x, mcmc_pmfs, marker='D', linestyle='-', label='MCMC Approximation')
plt.plot(x, laplace_pmfs, marker='^', linestyle='-', label='Laplace Approximation')
plt.xlabel('Counts')
plt.ylabel('PMF')
plt.title('Probability Mass Functions Comparison')
#plt.xticks(x, [str(counts) for counts in counts_list], rotation=90)
plt.legend()

# Execution time comparison
plt.subplot(2, 2, 2)
methods = ['Multinomial', 'Edgeworth', 'Normal', 'Poisson', 'MCMC', 'Laplace']
times = [multinomial_time, edgeworth_time, normal_time, poisson_time, mcmc_time, laplace_time]
plt.bar(methods, times, color=['black', 'red', 'blue', 'orange', 'green', 'purple'])
plt.xlabel('Method')
plt.ylabel('Execution Time (s)')
plt.title('Execution Time Comparison')

# Accuracy comparison
plt.subplot(2, 2, 3)
methods = ['Edgeworth', 'Normal', 'Poisson', 'MCMC', 'Laplace']
accuracies = [edgeworth_accuracy, normal_accuracy, poisson_accuracy, mcmc_accuracy, laplace_accuracy]
plt.bar(methods, accuracies, color=['red', 'blue', 'orange', 'green', 'purple'])
plt.xlabel('Method')
plt.ylabel('Mean Absolute Error')
plt.title('Mean Absolute Error Comparison')

plt.subplot(2, 2, 4)
methods = ['Edgeworth', 'Normal', 'Poisson', 'MCMC', 'Laplace']
mses = [edgeworth_mse, normal_mse, poisson_mse, mcmc_mse, laplace_mse]
plt.bar(methods, mses, color=['red', 'blue', 'orange', 'green', 'purple'])
plt.xlabel('Method')
plt.ylabel('Mean Squared Error')
plt.title('Mean Squared Error Comparison')

#plt.suptitle(f'Comparison of Multinomial Distribution and Its Approximations (Number of trials: {trials})', fontsize=16)

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

In [None]:
#Comparison of Multinomial Distribution and Its Approximations with logarithmic vertical axis
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import multinomial, norm, dirichlet, poisson
from math import factorial
import itertools
import time

def edgeworth_multinomial_pmf(counts, probs, n):
    counts = np.array(counts)
    probs = np.array(probs)
    k = len(probs)
    
    # Calculate the mean and variance for the normal approximation
    mu = n * probs
    sigma2 = n * probs * (1 - probs)
    
    # Calculate the standardized counts
    z = (counts - mu) / np.sqrt(sigma2)
    
    # Calculate the skewness and kurtosis
    skewness = (1 - 2 * probs) / np.sqrt(sigma2 / n)
    kurtosis = (1 - 6 * probs * (1 - probs)) / (sigma2 / n)
    
    # Calculate the correction terms for Edgeworth expansion
    phi_z = norm.pdf(z)
    Phi_z = norm.cdf(z)
    
    term1 = skewness / 6 * (z**3 - 3*z) * phi_z
    term2 = kurtosis / 24 * (z**4 - 6*z**2 + 3) * phi_z
    term3 = (skewness**2) / 72 * (z**6 - 15*z**4 + 45*z**2 - 15) * phi_z
    
    correction = Phi_z + term1 + term2 + term3
    
    # Calculate the multinomial coefficient
    multinomial_coeff = factorial(n) / np.prod([factorial(c) for c in counts])
    
    # Calculate the probability using the normal approximation
    normal_approx = np.prod(norm.pdf((counts - mu) / np.sqrt(sigma2)))
    
    # Apply the Edgeworth correction
    pmf = multinomial_coeff * normal_approx * np.prod(correction)
    
    return pmf

# Approximation using normal distribution
def normal_multinomial_pmf(counts, probs, n):
    counts = np.array(counts)
    probs = np.array(probs)
    mu = n * probs
    sigma2 = n * probs * (1 - probs)
    normal_approx = np.prod(norm.pdf((counts - mu) / np.sqrt(sigma2)))
    return normal_approx

# Approximation using Poisson distribution
def poisson_multinomial_pmf(counts, probs, n):
    counts = np.array(counts)
    lambdas = n * probs
    poisson_approx = np.prod([poisson.pmf(counts[i], lambdas[i]) for i in range(len(probs))])
    return poisson_approx

# MCMC Approximation Function
def mcmc_multinomial_pmf(counts, probs, n, num_samples=10000):
    counts = np.array(counts)
    k = len(probs)
    
    samples = np.zeros((num_samples, k))
    
    for i in range(num_samples):
        sample = np.random.multinomial(n, probs)
        samples[i] = sample
    
    sample_pmf = np.mean(np.all(samples == counts, axis=1))
    
    return sample_pmf

# Laplace Approximation Function
def laplace_multinomial_pmf(counts, probs, n):
    counts = np.array(counts)
    probs = np.array(probs)
    k = len(probs)
    
    # Mean and variance
    mu = n * probs
    sigma2 = n * probs * (1 - probs)
    
    # Multinomial coefficient
    multinomial_coeff = factorial(n) / np.prod([factorial(c) for c in counts])
    
    # Normal approximation
    normal_approx = np.prod(norm.pdf((counts - mu) / np.sqrt(sigma2)))
    
    # Laplace correction
    laplace_pmf = multinomial_coeff * normal_approx
    
    return laplace_pmf

def measure_time_and_accuracy(trials, probs, counts_list):
    # Calculate exact PMF and measure time
    start_time = time.time()
    multinomial_pmfs = [multinomial.pmf(counts, n=trials, p=probs) for counts in counts_list]
    multinomial_time = time.time() - start_time
    
    # Calculate approximation PMFs and measure time
    start_time = time.time()
    edgeworth_pmfs = [edgeworth_multinomial_pmf(counts, probs, trials) for counts in counts_list]
    edgeworth_time = time.time() - start_time
    
    start_time = time.time()
    normal_pmfs = [normal_multinomial_pmf(counts, probs, trials) for counts in counts_list]
    normal_time = time.time() - start_time
    
    start_time = time.time()
    poisson_pmfs = [poisson_multinomial_pmf(counts, probs, trials) for counts in counts_list]
    poisson_time = time.time() - start_time
    
    start_time = time.time()
    mcmc_pmfs = [mcmc_multinomial_pmf(counts, probs, trials) for counts in counts_list]
    mcmc_time = time.time() - start_time
    
    start_time = time.time()
    laplace_pmfs = [laplace_multinomial_pmf(counts, probs, trials) for counts in counts_list]
    laplace_time = time.time() - start_time
    
    # Accuracy measures
    def calculate_accuracy(true_pmf, approx_pmf):
        return np.mean(np.abs(true_pmf - approx_pmf))
    
    def calculate_mse(true_pmf, approx_pmf):
        return np.mean((true_pmf - approx_pmf) ** 2)
    
    multinomial_pmf = np.array(multinomial_pmfs)
    edgeworth_accuracy = calculate_accuracy(multinomial_pmf, edgeworth_pmfs)
    normal_accuracy = calculate_accuracy(multinomial_pmf, normal_pmfs)
    poisson_accuracy = calculate_accuracy(multinomial_pmf, poisson_pmfs)
    mcmc_accuracy = calculate_accuracy(multinomial_pmf, mcmc_pmfs)
    laplace_accuracy = calculate_accuracy(multinomial_pmf, laplace_pmfs)
    
    edgeworth_mse = calculate_mse(multinomial_pmf, edgeworth_pmfs)
    normal_mse = calculate_mse(multinomial_pmf, normal_pmfs)
    poisson_mse = calculate_mse(multinomial_pmf, poisson_pmfs)
    mcmc_mse = calculate_mse(multinomial_pmf, mcmc_pmfs)
    laplace_mse = calculate_mse(multinomial_pmf, laplace_pmfs)
    
    return (multinomial_pmfs, multinomial_time, 
            edgeworth_pmfs, edgeworth_time,
            normal_pmfs, normal_time,
            poisson_pmfs, poisson_time,
            mcmc_pmfs, mcmc_time,
            laplace_pmfs, laplace_time,
            edgeworth_accuracy, normal_accuracy, poisson_accuracy, mcmc_accuracy, laplace_accuracy,
            edgeworth_mse, normal_mse, poisson_mse, mcmc_mse, laplace_mse)

# Parameters and user input
categories = 4
trials = int(input("Number of trials: "))

# Generate random probabilities that sum to 1
probs = np.random.rand(categories)
probs /= probs.sum()

# Generate all possible counts
counts_list = list(itertools.product(range(trials + 1), repeat=categories))
counts_list = [counts for counts in counts_list if sum(counts) == trials]

# Measure time and accuracy
results = measure_time_and_accuracy(trials, probs, counts_list)
(multinomial_pmfs, multinomial_time, 
 edgeworth_pmfs, edgeworth_time,
 normal_pmfs, normal_time,
 poisson_pmfs, poisson_time,
 mcmc_pmfs, mcmc_time,
 laplace_pmfs, laplace_time,
 edgeworth_accuracy, normal_accuracy, poisson_accuracy, mcmc_accuracy, laplace_accuracy,
 edgeworth_mse, normal_mse, poisson_mse, mcmc_mse, laplace_mse) = results

# Plotting
plt.figure(figsize=(18, 12))

# Probability mass functions comparison
plt.subplot(2, 2, 1)
x = np.arange(len(counts_list))
plt.plot(x, multinomial_pmfs, marker='o', linestyle='-', label='True Multinomial')
plt.plot(x, edgeworth_pmfs, marker='x', linestyle='-', label='Edgeworth Approximation')
plt.plot(x, normal_pmfs, marker='v', linestyle='-', label='Normal Approximation')
plt.plot(x, poisson_pmfs, marker='s', linestyle='-', label='Poisson Approximation')
plt.plot(x, mcmc_pmfs, marker='D', linestyle='-', label='MCMC Approximation')
plt.plot(x, laplace_pmfs, marker='^', linestyle='-', label='Laplace Approximation')
plt.xlabel('Counts')
plt.ylabel('ln(PMF)')
plt.title('Probability Mass Functions Comparison with logarithm')
plt.yscale('log')  # Seting the y-axis to logarithmic scale
plt.legend()

# Execution time comparison
plt.subplot(2, 2, 2)
methods = ['Multinomial', 'Edgeworth', 'Normal', 'Poisson', 'MCMC', 'Laplace']
times = [multinomial_time, edgeworth_time, normal_time, poisson_time, mcmc_time, laplace_time]
plt.bar(methods, times, color=['black', 'red', 'blue', 'orange', 'green', 'purple'])
plt.xlabel('Method')
plt.ylabel('Execution Time (s)')
plt.title('Execution Time Comparison')

# Accuracy comparison
plt.subplot(2, 2, 3)
methods = ['Edgeworth', 'Normal', 'Poisson', 'MCMC', 'Laplace']
accuracies = [edgeworth_accuracy, normal_accuracy, poisson_accuracy, mcmc_accuracy, laplace_accuracy]
plt.bar(methods, accuracies, color=['red', 'blue', 'orange', 'green', 'purple'])
plt.xlabel('Method')
plt.ylabel('Mean Absolute Error')
plt.title('Mean Absolute Error Comparison')

plt.subplot(2, 2, 4)
methods = ['Edgeworth', 'Normal', 'Poisson', 'MCMC', 'Laplace']
mses = [edgeworth_mse, normal_mse, poisson_mse, mcmc_mse, laplace_mse]
plt.bar(methods, mses, color=['red', 'blue', 'orange', 'green', 'purple'])
plt.xlabel('Method')
plt.ylabel('Mean Squared Error')
plt.title('Mean Squared Error Comparison')

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

In [None]:
#Comparison of Multinomial Distribution and Its Approximations (with logarithmic vertical axis)
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import multinomial, norm, dirichlet, poisson
from math import factorial
import itertools
import time

def edgeworth_multinomial_pmf(counts, probs, n):
    counts = np.array(counts)
    probs = np.array(probs)
    k = len(probs)
    
    # Calculate the mean and variance for the normal approximation
    mu = n * probs
    sigma2 = n * probs * (1 - probs)
    
    # Calculate the standardized counts
    z = (counts - mu) / np.sqrt(sigma2)
    
    # Calculate the skewness and kurtosis
    skewness = (1 - 2 * probs) / np.sqrt(sigma2 / n)
    kurtosis = (1 - 6 * probs * (1 - probs)) / (sigma2 / n)
    
    # Calculate the correction terms for Edgeworth expansion
    phi_z = norm.pdf(z)
    Phi_z = norm.cdf(z)
    
    term1 = skewness / 6 * (z**3 - 3*z) * phi_z
    term2 = kurtosis / 24 * (z**4 - 6*z**2 + 3) * phi_z
    term3 = (skewness**2) / 72 * (z**6 - 15*z**4 + 45*z**2 - 15) * phi_z
    
    correction = Phi_z + term1 + term2 + term3
    
    # Calculate the multinomial coefficient
    multinomial_coeff = factorial(n) / np.prod([factorial(c) for c in counts])
    
    # Calculate the probability using the normal approximation
    normal_approx = np.prod(norm.pdf((counts - mu) / np.sqrt(sigma2)))
    
    # Apply the Edgeworth correction
    pmf = multinomial_coeff * normal_approx * np.prod(correction)
    
    return pmf

# Approximation using normal distribution
def normal_multinomial_pmf(counts, probs, n):
    counts = np.array(counts)
    probs = np.array(probs)
    mu = n * probs
    sigma2 = n * probs * (1 - probs)
    normal_approx = np.prod(norm.pdf((counts - mu) / np.sqrt(sigma2)))
    return normal_approx

# Approximation using Poisson distribution
def poisson_multinomial_pmf(counts, probs, n):
    counts = np.array(counts)
    lambdas = n * probs
    poisson_approx = np.prod([poisson.pmf(counts[i], lambdas[i]) for i in range(len(probs))])
    return poisson_approx

# MCMC Approximation Function
def mcmc_multinomial_pmf(counts, probs, n, num_samples=10000):
    counts = np.array(counts)
    k = len(probs)
    
    samples = np.zeros((num_samples, k))
    
    for i in range(num_samples):
        sample = np.random.multinomial(n, probs)
        samples[i] = sample
    
    sample_pmf = np.mean(np.all(samples == counts, axis=1))
    
    return sample_pmf

# Laplace Approximation Function
def laplace_multinomial_pmf(counts, probs, n):
    counts = np.array(counts)
    probs = np.array(probs)
    k = len(probs)
    
    # Mean and variance
    mu = n * probs
    sigma2 = n * probs * (1 - probs)
    
    # Multinomial coefficient
    multinomial_coeff = factorial(n) / np.prod([factorial(c) for c in counts])
    
    # Normal approximation
    normal_approx = np.prod(norm.pdf((counts - mu) / np.sqrt(sigma2)))
    
    # Laplace correction
    laplace_pmf = multinomial_coeff * normal_approx
    
    return laplace_pmf

def measure_time_and_accuracy(trials, probs, counts_list):
    # Calculate exact PMF and measure time
    start_time = time.time()
    multinomial_pmfs = [multinomial.pmf(counts, n=trials, p=probs) for counts in counts_list]
    multinomial_time = time.time() - start_time
    
    # Calculate approximation PMFs and measure time
    start_time = time.time()
    edgeworth_pmfs = [edgeworth_multinomial_pmf(counts, probs, trials) for counts in counts_list]
    edgeworth_time = time.time() - start_time
    
    start_time = time.time()
    normal_pmfs = [normal_multinomial_pmf(counts, probs, trials) for counts in counts_list]
    normal_time = time.time() - start_time
    
    start_time = time.time()
    poisson_pmfs = [poisson_multinomial_pmf(counts, probs, trials) for counts in counts_list]
    poisson_time = time.time() - start_time
    
    start_time = time.time()
    mcmc_pmfs = [mcmc_multinomial_pmf(counts, probs, trials) for counts in counts_list]
    mcmc_time = time.time() - start_time
    
    start_time = time.time()
    laplace_pmfs = [laplace_multinomial_pmf(counts, probs, trials) for counts in counts_list]
    laplace_time = time.time() - start_time
    
    # Accuracy measures
    def calculate_accuracy(true_pmf, approx_pmf):
        return np.mean(np.abs(true_pmf - approx_pmf))
    
    def calculate_mse(true_pmf, approx_pmf):
        return np.mean((true_pmf - approx_pmf) ** 2)
    
    multinomial_pmf = np.array(multinomial_pmfs)
    edgeworth_accuracy = calculate_accuracy(multinomial_pmf, edgeworth_pmfs)
    normal_accuracy = calculate_accuracy(multinomial_pmf, normal_pmfs)
    poisson_accuracy = calculate_accuracy(multinomial_pmf, poisson_pmfs)
    mcmc_accuracy = calculate_accuracy(multinomial_pmf, mcmc_pmfs)
    laplace_accuracy = calculate_accuracy(multinomial_pmf, laplace_pmfs)
    
    edgeworth_mse = calculate_mse(multinomial_pmf, edgeworth_pmfs)
    normal_mse = calculate_mse(multinomial_pmf, normal_pmfs)
    poisson_mse = calculate_mse(multinomial_pmf, poisson_pmfs)
    mcmc_mse = calculate_mse(multinomial_pmf, mcmc_pmfs)
    laplace_mse = calculate_mse(multinomial_pmf, laplace_pmfs)
    
    return (multinomial_pmfs, multinomial_time, 
            edgeworth_pmfs, edgeworth_time,
            normal_pmfs, normal_time,
            poisson_pmfs, poisson_time,
            mcmc_pmfs, mcmc_time,
            laplace_pmfs, laplace_time,
            edgeworth_accuracy, normal_accuracy, poisson_accuracy, mcmc_accuracy, laplace_accuracy,
            edgeworth_mse, normal_mse, poisson_mse, mcmc_mse, laplace_mse)

# Parameters and user input
categories = 4
trials = int(input("Number of trials: "))

# Generate random probabilities that sum to 1
probs = np.random.rand(categories)
probs /= probs.sum()

# Generate all possible counts
counts_list = list(itertools.product(range(trials + 1), repeat=categories))
counts_list = [counts for counts in counts_list if sum(counts) == trials]

# Measure time and accuracy
results = measure_time_and_accuracy(trials, probs, counts_list)
(multinomial_pmfs, multinomial_time, 
 edgeworth_pmfs, edgeworth_time,
 normal_pmfs, normal_time,
 poisson_pmfs, poisson_time,
 mcmc_pmfs, mcmc_time,
 laplace_pmfs, laplace_time,
 edgeworth_accuracy, normal_accuracy, poisson_accuracy, mcmc_accuracy, laplace_accuracy,
 edgeworth_mse, normal_mse, poisson_mse, mcmc_mse, laplace_mse) = results

# Plotting
plt.figure(figsize=(18, 12))

# Colors and markers for consistency
colors = ['black', 'red', 'blue', 'orange', 'green', 'purple']
markers = ['o', 'x', 'v', 's', 'D', '^']
labels = ['True Multinomial', 'Edgeworth Approximation', 'Normal Approximation', 'Poisson Approximation', 'MCMC Approximation', 'Laplace Approximation']

# Probability mass functions comparison
plt.subplot(2, 2, 1)
x = np.arange(len(counts_list))
plt.plot(x, np.log10(multinomial_pmfs), marker=markers[0], linestyle='-', color=colors[0], label=labels[0])
plt.plot(x, np.log10(edgeworth_pmfs), marker=markers[1], linestyle='-', color=colors[1], label=labels[1])
plt.plot(x, np.log10(normal_pmfs), marker=markers[2], linestyle='-', color=colors[2], label=labels[2])
plt.plot(x, np.log10(poisson_pmfs), marker=markers[3], linestyle='-', color=colors[3], label=labels[3])
plt.plot(x, np.log10(mcmc_pmfs), marker=markers[4], linestyle='-', color=colors[4], label=labels[4])
plt.plot(x, np.log10(laplace_pmfs), marker=markers[5], linestyle='-', color=colors[5], label=labels[5])
plt.xlabel('Counts')
plt.ylabel('log10(PMF)')
plt.title('Probability Mass Functions Comparison with logarithm')
#plt.xticks(x, [str(counts) for counts in counts_list], rotation=90)
plt.legend()

# Execution time comparison
plt.subplot(2, 2, 2)
methods = ['Multinomial', 'Edgeworth', 'Normal', 'Poisson', 'MCMC', 'Laplace']
times = [multinomial_time, edgeworth_time, normal_time, poisson_time, mcmc_time, laplace_time]
plt.bar(methods, times, color=colors)
plt.xlabel('Method')
plt.ylabel('Execution Time (s)')
plt.title('Execution Time Comparison')

# Accuracy comparison
plt.subplot(2, 2, 3)
methods = ['Edgeworth', 'Normal', 'Poisson', 'MCMC', 'Laplace']
accuracies = [edgeworth_accuracy, normal_accuracy, poisson_accuracy, mcmc_accuracy, laplace_accuracy]
plt.bar(methods, accuracies, color=colors[1:])  # Adjust colors to match methods
plt.xlabel('Method')
plt.ylabel('Mean Absolute Error (common logarithm)')
plt.title('Mean Absolute Error Comparison')
plt.yscale('log', base=10)  # Setting the y-axis to logarithmic scale with base 10

plt.subplot(2, 2, 4)
methods = ['Edgeworth', 'Normal', 'Poisson', 'MCMC', 'Laplace']
mses = [edgeworth_mse, normal_mse, poisson_mse, mcmc_mse, laplace_mse]
plt.bar(methods, mses, color=colors[1:])  # Adjust colors to match methods
plt.xlabel('Method')
plt.ylabel('Mean Squared Error (common logarithm)')
plt.title('Mean Squared Error Comparison')
plt.yscale('log', base=10)  # Setting the y-axis to logarithmic scale with base 10

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

In [None]:
#Comparison of Binomial Distribution and Its Approximations (with logarithmic vertical axis)
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import time

# Parameters
n = int(input("n = "))
p = float(input("The probability is "))

# The binomial distribution
binom_rv = stats.binom(n, p)
x = np.arange(0, n + 1)
binom_pmf = binom_rv.pmf(x)

# Normal approximation
mu = n * p
sigma = np.sqrt(n * p * (1 - p))
norm_rv = stats.norm(mu, sigma)
norm_pmf = norm_rv.pdf(x)

# Continuous normal distribution
continuous_norm_rv = stats.norm(mu, sigma)
continuous_x = np.linspace(-n, n, 100)  
continuous_norm_pmf = continuous_norm_rv.pdf(continuous_x)

# Poisson approximation
lambda_ = n * p
poisson_rv = stats.poisson(lambda_)
poisson_pmf = poisson_rv.pmf(x)

# Edgeworth expansion
def edgeworth_expansion(x, n, p):
    q = 1 - p
    mu = n * p
    sigma = np.sqrt(n * p * q)
    z = (x - mu) / sigma
    skewness = (q - p) / np.sqrt(n * p * q)
    kurtosis = (1 - 6*p*q) / (n * p * q)

    phi_z = stats.norm.pdf(z)
    Phi_z = stats.norm.cdf(z)

    correction = (1/6) * skewness * (z**2 - 1) * phi_z + (1/24) * kurtosis * (z**3 - 3*z) * phi_z - (1/36) * skewness**2 * (z**5 - 10*z**3 + 15*z) * phi_z

    return Phi_z + correction

# Measuring execution time for each approximation
def measure_time_and_accuracy():    
    start_time = time.time()
    binom_rv = stats.binom(n, p)
    binom_pmf = binom_rv.pmf(x)
    binom_time = time.time() - start_time

    start_time = time.time()
    norm_rv = stats.norm(mu, sigma)
    norm_pmf = norm_rv.pdf(x)
    norm_time = time.time() - start_time
    
    start_time = time.time()
    poisson_rv = stats.poisson(lambda_)
    poisson_pmf = poisson_rv.pmf(x)
    poisson_time = time.time() - start_time

    start_time = time.time()
    edgeworth_cdf = edgeworth_expansion(x, n, p)
    edgeworth_pmf = np.diff(edgeworth_cdf, prepend=0)
    edgeworth_time = time.time() - start_time
    
    return binom_pmf, binom_time, norm_pmf, norm_time, poisson_pmf, poisson_time, edgeworth_pmf, edgeworth_time

binom_pmf, binom_time, norm_pmf, norm_time, poisson_pmf, poisson_time, edgeworth_pmf, edgeworth_time = measure_time_and_accuracy()

# Accuracy measures
def calculate_accuracy(true_pmf, approx_pmf):
    return np.mean(np.abs(true_pmf - approx_pmf))
def calculate_mse(true_pmf, approx_pmf):
    return np.mean((true_pmf - approx_pmf) ** 2)

binom_accuracy = calculate_accuracy(binom_pmf, binom_pmf)
norm_accuracy = calculate_accuracy(binom_pmf, norm_pmf)
poisson_accuracy = calculate_accuracy(binom_pmf, poisson_pmf)
edgeworth_accuracy = calculate_accuracy(binom_pmf, edgeworth_pmf)
binom_mse = calculate_mse(binom_pmf, binom_pmf)
norm_mse = calculate_mse(binom_pmf, norm_pmf)
poisson_mse = calculate_mse(binom_pmf, poisson_pmf)
edgeworth_mse = calculate_mse(binom_pmf, edgeworth_pmf)

# Plotting
plt.figure(figsize=(18, 6))

# Probability mass functions comparison
plt.subplot(2, 2, 1)
plt.bar(x, binom_pmf, alpha=0.5, label='Binomial distribution', color='black')
plt.plot(x, norm_pmf, label='Normal approximation')
plt.plot(continuous_x, continuous_norm_pmf, 'r--', label='Continuous Normal')
plt.plot(x, poisson_pmf, label='Poisson approximation')
plt.plot(x, edgeworth_pmf, label='Edgeworth approximation')
plt.xlabel('k')
plt.ylabel('Probability')
plt.title('Probability Mass Functions Comparison')
plt.legend()

# Execution time comparison
plt.subplot(2, 2, 2)
methods = ['Binomial', 'Normal', 'Poisson', 'Edgeworth']
times = [binom_time, norm_time, poisson_time, edgeworth_time]
plt.bar(methods, times, color=['black', 'blue', 'orange', 'green'])
plt.xlabel('Method')
plt.ylabel('Execution Time (s)')
plt.title('Execution Time Comparison')

# Accuracy comparison
plt.subplot(2, 2, 3)
methods = ['Normal', 'Poisson', 'Edgeworth']
accuracies = [norm_accuracy, poisson_accuracy, edgeworth_accuracy]
plt.bar(methods, accuracies, color=['blue', 'orange', 'green'])
plt.xlabel('Method')
plt.ylabel('log10(MAE)') #Mean Absolute Error
plt.yscale('log', base=10)
plt.title('Accuracy Comparison 1')

plt.subplot(2, 2, 4)
methods = ['Normal', 'Poisson', 'Edgeworth']
mses = [norm_mse, poisson_mse, edgeworth_mse]
plt.bar(methods, mses, color=['blue', 'orange', 'green'])
plt.xlabel('Method')
plt.ylabel('log10(MSE)') #Mean Squared Error
plt.yscale('log', base=10)
plt.title('Accuracy Comparison 2')

#plt.suptitle('Comparison of Binomial Distribution and Its Approximations (Number of trials: '+str(n)+', probability: '+str(p)+')', fontsize=16)

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()