<a href="https://colab.research.google.com/github/mjoudy/statistical-experiments/blob/master/clt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import interact, widgets

#%matplotlib widgetsnbextension

Low of large numbers

make another axampe with different experiment type.

In [2]:
# Function to simulate coin flips
def simulate_coin_flips(sample_size):
    outcomes = np.random.randint(0, 2, sample_size)
    return np.cumsum(outcomes) / np.arange(1, sample_size+1)

# Interactive plot
@interact(sample_size=widgets.IntSlider(min=10, max=10000, step=10, value=1000))
def plot_law_of_large_numbers(sample_size):
    plt.figure(figsize=(10, 6))
    plt.title("Law of Large Numbers")
    plt.xlabel("Sample Size")
    plt.ylabel("Proportion of Heads")

    proportions = simulate_coin_flips(sample_size)
    plt.plot(range(1, sample_size+1), proportions, label="Proportion of Heads")
    plt.axhline(y=0.5, color='r', linestyle='--', label="True Probability (0.5)")

    plt.legend()
    plt.show()

interactive(children=(IntSlider(value=1000, description='sample_size', max=10000, min=10, step=10), Output()),…

CLT: Central Limit Theorem

In [3]:

def generate_sample(sample_size, num_samples, num_trials):
    means = []
    for _ in range(num_trials):
        #samples = np.random.randn(num_samples, sample_size)
        samples = np.random.rand(num_samples, sample_size)
        samples = np.random.exponential(scale=1.0, size=(num_samples, sample_size))
        sample_means = np.mean(samples, axis=1)
        means.extend(sample_means)
    return means

def plot_histogram(sample_size, num_samples, num_trials):
    sample_means = generate_sample(sample_size, num_samples, num_trials)
    plt.figure(figsize=(10, 6))
    plt.hist(sample_means, bins=30, density=True, alpha=0.75, color='blue', edgecolor='black')
    plt.title('Central Limit Theorem')
    plt.xlabel('Sample Mean')
    plt.ylabel('Probability Density')
    plt.show()

interact(
    plot_histogram,
    sample_size=widgets.IntSlider(min=1, max=100, step=1, value=10, description='Sample Size'),
    num_samples=widgets.IntSlider(min=1, max=100, step=1, value=100, description='Number of Samples'),
    num_trials=widgets.IntSlider(min=1, max=100, step=1, value=1000, description='Number of Trials')
)


interactive(children=(IntSlider(value=10, description='Sample Size', min=1), IntSlider(value=100, description=…

<function __main__.plot_histogram(sample_size, num_samples, num_trials)>

In [4]:
import matplotlib.pyplot as plt
import numpy as np
from ipywidgets import interact, widgets

def generate_samples(sample_size, num_samples):
    sample_means = []
    generated_means = []
    generated_variances = []

    for _ in range(num_samples):
        means = np.random.uniform(5, 15, sample_size)  # Generate n different means
        variances = np.random.uniform(1, 5, sample_size)  # Generate n different variances
        samples = np.random.normal(means, np.sqrt(variances))
        sample_means.append(np.mean(samples))
        generated_means.extend(means)
        generated_variances.extend(variances)
    return sample_means, generated_means, generated_variances

def plot_distribution(sample_size):
    num_samples = 1000
    sample_means, generated_means, generated_variances = generate_samples(sample_size, num_samples)

    plt.figure(figsize=(15, 5))

    plt.subplot(1, 3, 1)
    plt.hist(generated_means, bins=30, density=True, alpha=0.75, color='green', edgecolor='black')
    plt.title('Generated Means')
    plt.xlabel('Mean Value')
    plt.ylabel('Probability Density')

    plt.subplot(1, 3, 2)
    plt.hist(generated_variances, bins=30, density=True, alpha=0.75, color='purple', edgecolor='black')
    plt.title('Generated Variances')
    plt.xlabel('Variance Value')
    plt.ylabel('Probability Density')

    plt.subplot(1, 3, 3)
    plt.hist(sample_means, bins=30, density=True, alpha=0.75, color='blue', edgecolor='black')
    plt.title(f'Sample Means (Sample Size = {sample_size})')
    plt.xlabel('Sample Mean')
    plt.ylabel('Probability Density')

    plt.tight_layout()
    plt.show()

interact(
    plot_distribution,
    sample_size=widgets.IntSlider(min=1, max=50, step=1, value=10, description='Sample Size')
)


interactive(children=(IntSlider(value=10, description='Sample Size', max=50, min=1), Output()), _dom_classes=(…

<function __main__.plot_distribution(sample_size)>

In [5]:
import matplotlib.pyplot as plt
import numpy as np
from ipywidgets import interact, widgets

def generate_samples(sample_size, num_samples, distribution):
    sample_means = []
    generated_values = []

    for _ in range(num_samples):
        if distribution == 'Normal':
            means = np.random.uniform(5, 15, sample_size)
            variances = np.random.uniform(1, 5, sample_size)
            samples = np.random.normal(means, np.sqrt(variances))
            generated_values.extend(samples)
        elif distribution == 'Uniform':
            samples = np.random.uniform(5, 15, sample_size)
            generated_values.extend(samples)
        elif distribution == 'Exponential':
            samples = np.random.exponential(5, sample_size)
            generated_values.extend(samples)
        elif distribution == 'Poisson':
            samples = np.random.poisson(10, sample_size)
            generated_values.extend(samples)
        elif distribution == 'Gamma':
            samples = np.random.gamma(2, 2, sample_size)
            generated_values.extend(samples)
        elif distribution == 'Beta':
            samples = np.random.beta(2, 5, sample_size)
            generated_values.extend(samples)

        sample_means.append(np.mean(samples))

    return sample_means, generated_values

def plot_distribution(sample_size, distribution):
    num_samples = 1000
    sample_means, generated_values = generate_samples(sample_size, num_samples, distribution)

    plt.figure(figsize=(15, 5))

    plt.subplot(1, 2, 1)
    plt.hist(generated_values, bins=30, density=True, alpha=0.75, color='green', edgecolor='black')
    plt.title(f'Generated Values ({distribution})')
    plt.xlabel('Value')
    plt.ylabel('Probability Density')

    plt.subplot(1, 2, 2)
    plt.hist(sample_means, bins=30, density=True, alpha=0.75, color='blue', edgecolor='black')
    plt.title(f'Sample Means (Sample Size = {sample_size})')
    plt.xlabel('Sample Mean')
    plt.ylabel('Probability Density')

    plt.tight_layout()
    plt.show()

interact(
    plot_distribution,
    sample_size=widgets.IntSlider(min=1, max=50, step=1, value=10, description='Sample Size'),
    distribution=widgets.Dropdown(
        options=['Normal', 'Uniform', 'Exponential', 'Poisson', 'Gamma', 'Beta'],
        value='Normal',
        description='Distribution'
    )
)


interactive(children=(IntSlider(value=10, description='Sample Size', max=50, min=1), Dropdown(description='Dis…

<function __main__.plot_distribution(sample_size, distribution)>

sum of independet and normally distributed random variables tends to be normal

In [6]:
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import interact, widgets

def sum_of_normals_with_random_params(n, num_samples):
    # Generate random means and variances for n random variables
    means = np.random.uniform(0, 10, n)  # Random means between 0 and 10
    variances = np.random.uniform(1, 5, n)  # Random variances between 1 and 5

    # Calculate theoretical mean and standard deviation
    theoretical_mean = np.sum(means)  # Sum of random means
    theoretical_std = np.sqrt(np.sum(variances))  # Square root of sum of variances

    # Generate n independent random variables with random means and variances
    random_variables = np.random.normal(means, np.sqrt(variances), (num_samples, n))

    # Compute the sum for each sample
    sum_of_randoms = np.sum(random_variables, axis=1)

    return sum_of_randoms, theoretical_mean, theoretical_std

def plot_sum_of_normals(n, num_samples):
    # Simulate the sum of normals with random parameters
    sum_results, theoretical_mean, theoretical_std = sum_of_normals_with_random_params(n, num_samples)

    # Create a histogram
    plt.figure(figsize=(10, 6))
    plt.hist(sum_results, bins=30, density=True, color='blue', alpha=0.7, label='Simulated Distribution')

    # Generate theoretical normal distribution
    x = np.linspace(theoretical_mean - 3*theoretical_std, theoretical_mean + 3*theoretical_std, 100)
    plt.plot(x, 1/(theoretical_std * np.sqrt(2 * np.pi)) * np.exp( - (x - theoretical_mean)**2 / (2 * theoretical_std**2) ), linewidth=2, color='red', label='Theoretical Distribution')

    plt.title('Sum of Randomly Generated Normally Distributed Random Variables')
    plt.xlabel('Sum')
    plt.ylabel('Density')
    plt.legend()
    plt.show()

# Define sliders
n_slider = widgets.IntSlider(value=10, min=1, max=100, step=1, description='n')
num_samples_slider = widgets.IntSlider(value=10, min=1, max=10000, step=5, description='Num Samples')

# Create interactive widget
interact(plot_sum_of_normals, n=n_slider, num_samples=num_samples_slider)


interactive(children=(IntSlider(value=10, description='n', min=1), IntSlider(value=10, description='Num Sample…

<function __main__.plot_sum_of_normals(n, num_samples)>