In [2]:
!pip install seaborn

Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)
     -------------------------------------- 294.9/294.9 kB 1.0 MB/s eta 0:00:00
Installing collected packages: seaborn
Successfully installed seaborn-0.13.2




In [9]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import interact, IntSlider, Dropdown

sns.set(style="whitegrid")
np.random.seed(42)

# --- Define fixed population distributions ---
def generate_population(option, size=100_000):
    if option == 'Income':
        # Lognormal ~ skewed to mimic income distribution
        population = np.random.lognormal(mean=10, sigma=0.5, size=size)  # in euros/month
    elif option == 'Body size':
        # Normal distribution, e.g., human height (cm)
        population = np.random.normal(loc=170, scale=10, size=size)
    elif option == 'Coin toss':
        # Bernoulli(0.5): heads = 1, tails = 0
        population = np.random.binomial(n=1, p=0.5, size=size)
    elif option == 'Dice roll':
        # Uniform integers 1–6
        population = np.random.randint(1, 7, size=size)
    else:
        population = np.random.normal(size=size)
    return population

# --- Sampling function ---
def get_sample_means(population, n, num_samples):
    return np.array([np.mean(np.random.choice(population, size=n, replace=True)) 
                     for _ in range(num_samples)])

# --- Interactive function ---
@interact(
    variable=Dropdown(options=['Income', 'Body size', 'Coin toss', 'Dice roll'], value='Income', description='Variable:'),
    sample_size=IntSlider(min=2, max=500, step=1, value=30, description='Sample size'),
    num_samples=IntSlider(min=100, max=5000, step=100, value=1000, description='No. of samples')
)
def clt_demo(variable, sample_size, num_samples):
    population = generate_population(variable)
    xmin, xmax = np.percentile(population, [0.5, 99.5])
    true_mean = np.mean(population)
    sample_means = get_sample_means(population, sample_size, num_samples)
    
    plt.figure(figsize=(12, 5))
    
    # Population distribution
    plt.subplot(1, 2, 1)
    sns.histplot(population, bins=40, kde=True, color='gray')
    plt.axvline(true_mean, color='red', linestyle='--', label='True mean')
    plt.title(f'Population: {variable}')
    plt.xlabel('Value')
    plt.ylabel('Frequency')
    plt.legend()
    
    # Sampling distribution of the mean
    plt.subplot(1, 2, 2)
    plt.xlim(xmin, xmax)
    sns.histplot(sample_means, bins=40, kde=True, color='royalblue')
    plt.axvline(true_mean, color='red', linestyle='--', label='True mean')
    plt.title(f'Sampling Distribution (n = {sample_size}, samples = {num_samples})')
    plt.xlabel('Sample mean')
    plt.ylabel('Frequency')
    plt.legend()
    
    plt.tight_layout()
    plt.show()


interactive(children=(Dropdown(description='Variable:', options=('Income', 'Body size', 'Coin toss', 'Dice rol…

interactive(children=(FloatSlider(value=0.2, description='P(1)', max=1.0, step=0.05), FloatSlider(value=0.2, d…